41 files changed, 2113 insertions, 773 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 58908f9d156a..f6ef00f4f90f 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -20,7 +20,6 @@ obj-$(CONFIG_SMP) += cpu.o spinlock.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
 obj-$(CONFIG_UID16) += uid16.o
 obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_OBSOLETE_INTERMODULE) += intermodule.o
 obj-$(CONFIG_KALLSYMS) += kallsyms.o
 obj-$(CONFIG_PM) += power/
 obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
diff --git a/kernel/acct.c b/kernel/acct.c
index b327f4d20104..368c4f03fe0e 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -75,7 +75,7 @@ int acct_parm[3] = {4, 2, 30};
 /*
  * External references and all of the globals.
  */
-static void do_acct_process(long, struct file *);
+static void do_acct_process(struct file *);
 
 /*
  * This structure is used so that all the data protected by lock
@@ -118,7 +118,7 @@ static int check_free_space(struct file *file)
 	spin_unlock(&acct_globals.lock);
 
 	/* May block */
-	if (vfs_statfs(file->f_dentry->d_inode->i_sb, &sbuf))
+	if (vfs_statfs(file->f_dentry, &sbuf))
 		return res;
 	suspend = sbuf.f_blocks * SUSPEND;
 	resume = sbuf.f_blocks * RESUME;
@@ -196,7 +196,7 @@ static void acct_file_reopen(struct file *file)
 	if (old_acct) {
 		mnt_unpin(old_acct->f_vfsmnt);
 		spin_unlock(&acct_globals.lock);
-		do_acct_process(0, old_acct);
+		do_acct_process(old_acct);
 		filp_close(old_acct, NULL);
 		spin_lock(&acct_globals.lock);
 	}
@@ -419,16 +419,15 @@ static u32 encode_float(u64 value)
 /*
  *  do_acct_process does all actual work. Caller holds the reference to file.
  */
-static void do_acct_process(long exitcode, struct file *file)
+static void do_acct_process(struct file *file)
 {
+	struct pacct_struct *pacct = &current->signal->pacct;
 	acct_t ac;
 	mm_segment_t fs;
-	unsigned long vsize;
 	unsigned long flim;
 	u64 elapsed;
 	u64 run_time;
 	struct timespec uptime;
-	unsigned long jiffies;
 
 	/*
 	 * First check to see if there is enough free_space to continue
@@ -469,12 +468,6 @@ static void do_acct_process(long exitcode, struct file *file)
 #endif
 	do_div(elapsed, AHZ);
 	ac.ac_btime = xtime.tv_sec - elapsed;
-	jiffies = cputime_to_jiffies(cputime_add(current->utime,
-						 current->signal->utime));
-	ac.ac_utime = encode_comp_t(jiffies_to_AHZ(jiffies));
-	jiffies = cputime_to_jiffies(cputime_add(current->stime,
-						 current->signal->stime));
-	ac.ac_stime = encode_comp_t(jiffies_to_AHZ(jiffies));
 	/* we really need to bite the bullet and change layout */
 	ac.ac_uid = current->uid;
 	ac.ac_gid = current->gid;
@@ -496,37 +489,18 @@ static void do_acct_process(long exitcode, struct file *file)
 		old_encode_dev(tty_devnum(current->signal->tty)) : 0;
 	read_unlock(&tasklist_lock);
 
-	ac.ac_flag = 0;
-	if (current->flags & PF_FORKNOEXEC)
-		ac.ac_flag |= AFORK;
-	if (current->flags & PF_SUPERPRIV)
-		ac.ac_flag |= ASU;
-	if (current->flags & PF_DUMPCORE)
-		ac.ac_flag |= ACORE;
-	if (current->flags & PF_SIGNALED)
-		ac.ac_flag |= AXSIG;
-
-	vsize = 0;
-	if (current->mm) {
-		struct vm_area_struct *vma;
-		down_read(&current->mm->mmap_sem);
-		vma = current->mm->mmap;
-		while (vma) {
-			vsize += vma->vm_end - vma->vm_start;
-			vma = vma->vm_next;
-		}
-		up_read(&current->mm->mmap_sem);
-	}
-	vsize = vsize / 1024;
-	ac.ac_mem = encode_comp_t(vsize);
+	spin_lock(&current->sighand->siglock);
+	ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime)));
+	ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime)));
+	ac.ac_flag = pacct->ac_flag;
+	ac.ac_mem = encode_comp_t(pacct->ac_mem);
+	ac.ac_minflt = encode_comp_t(pacct->ac_minflt);
+	ac.ac_majflt = encode_comp_t(pacct->ac_majflt);
+	ac.ac_exitcode = pacct->ac_exitcode;
+	spin_unlock(&current->sighand->siglock);
 	ac.ac_io = encode_comp_t(0 /* current->io_usage */);	/* %% */
 	ac.ac_rw = encode_comp_t(ac.ac_io / 1024);
-	ac.ac_minflt = encode_comp_t(current->signal->min_flt +
-				     current->min_flt);
-	ac.ac_majflt = encode_comp_t(current->signal->maj_flt +
-				     current->maj_flt);
 	ac.ac_swaps = encode_comp_t(0);
-	ac.ac_exitcode = exitcode;
 
 	/*
          * Kernel segment override to datasegment and write it
@@ -546,12 +520,63 @@ static void do_acct_process(long exitcode, struct file *file)
 }
 
 /**
+ * acct_init_pacct - initialize a new pacct_struct
+ */
+void acct_init_pacct(struct pacct_struct *pacct)
+{
+	memset(pacct, 0, sizeof(struct pacct_struct));
+	pacct->ac_utime = pacct->ac_stime = cputime_zero;
+}
+
+/**
+ * acct_collect - collect accounting information into pacct_struct
+ * @exitcode: task exit code
+ * @group_dead: not 0, if this thread is the last one in the process.
+ */
+void acct_collect(long exitcode, int group_dead)
+{
+	struct pacct_struct *pacct = &current->signal->pacct;
+	unsigned long vsize = 0;
+
+	if (group_dead && current->mm) {
+		struct vm_area_struct *vma;
+		down_read(&current->mm->mmap_sem);
+		vma = current->mm->mmap;
+		while (vma) {
+			vsize += vma->vm_end - vma->vm_start;
+			vma = vma->vm_next;
+		}
+		up_read(&current->mm->mmap_sem);
+	}
+
+	spin_lock_irq(&current->sighand->siglock);
+	if (group_dead)
+		pacct->ac_mem = vsize / 1024;
+	if (thread_group_leader(current)) {
+		pacct->ac_exitcode = exitcode;
+		if (current->flags & PF_FORKNOEXEC)
+			pacct->ac_flag |= AFORK;
+	}
+	if (current->flags & PF_SUPERPRIV)
+		pacct->ac_flag |= ASU;
+	if (current->flags & PF_DUMPCORE)
+		pacct->ac_flag |= ACORE;
+	if (current->flags & PF_SIGNALED)
+		pacct->ac_flag |= AXSIG;
+	pacct->ac_utime = cputime_add(pacct->ac_utime, current->utime);
+	pacct->ac_stime = cputime_add(pacct->ac_stime, current->stime);
+	pacct->ac_minflt += current->min_flt;
+	pacct->ac_majflt += current->maj_flt;
+	spin_unlock_irq(&current->sighand->siglock);
+}
+
+/**
  * acct_process - now just a wrapper around do_acct_process
  * @exitcode: task exit code
  *
  * handles process accounting for an exiting task
  */
-void acct_process(long exitcode)
+void acct_process()
 {
 	struct file *file = NULL;
 
@@ -570,7 +595,7 @@ void acct_process(long exitcode)
 	get_file(file);
 	spin_unlock(&acct_globals.lock);
 
-	do_acct_process(exitcode, file);
+	do_acct_process(file);
 	fput(file);
 }
 
@@ -599,9 +624,7 @@ void acct_update_integrals(struct task_struct *tsk)
  */
 void acct_clear_integrals(struct task_struct *tsk)
 {
-	if (tsk) {
-		tsk->acct_stimexpd = 0;
-		tsk->acct_rss_mem1 = 0;
-		tsk->acct_vm_mem1 = 0;
-	}
+	tsk->acct_stimexpd = 0;
+	tsk->acct_rss_mem1 = 0;
+	tsk->acct_vm_mem1 = 0;
 }
diff --git a/kernel/audit.c b/kernel/audit.c
index df57b493e1cb..7dfac7031bd7 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -56,6 +56,7 @@
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/selinux.h>
+#include <linux/inotify.h>
 
 #include "audit.h"
 
@@ -89,6 +90,7 @@ static int	audit_backlog_wait_overflow = 0;
 /* The identity of the user shutting down the audit system. */
 uid_t		audit_sig_uid = -1;
 pid_t		audit_sig_pid = -1;
+u32		audit_sig_sid = 0;
 
 /* Records can be lost in several ways:
    0) [suppressed in audit_alloc]
@@ -102,6 +104,12 @@ static atomic_t    audit_lost = ATOMIC_INIT(0);
 /* The netlink socket. */
 static struct sock *audit_sock;
 
+/* Inotify handle. */
+struct inotify_handle *audit_ih;
+
+/* Hash for inode-based rules */
+struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
+
 /* The audit_freelist is a list of pre-allocated audit buffers (if more
  * than AUDIT_MAXFREE are in use, the audit buffer is freed instead of
  * being placed on the freelist). */
@@ -114,10 +122,8 @@ static struct task_struct *kauditd_task;
 static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
 static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);
 
-/* The netlink socket is only to be read by 1 CPU, which lets us assume
- * that list additions and deletions never happen simultaneously in
- * auditsc.c */
-DEFINE_MUTEX(audit_netlink_mutex);
+/* Serialize requests from userspace. */
+static DEFINE_MUTEX(audit_cmd_mutex);
 
 /* AUDIT_BUFSIZ is the size of the temporary buffer used for formatting
  * audit records.  Since printk uses a 1024 byte buffer, this buffer
@@ -250,7 +256,7 @@ static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid)
 			"audit_rate_limit=%d old=%d by auid=%u",
 			limit, old, loginuid);
 	audit_rate_limit = limit;
-	return old;
+	return 0;
 }
 
 static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid)
@@ -273,7 +279,7 @@ static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid)
 			"audit_backlog_limit=%d old=%d by auid=%u",
 			limit, old, loginuid);
 	audit_backlog_limit = limit;
-	return old;
+	return 0;
 }
 
 static int audit_set_enabled(int state, uid_t loginuid, u32 sid)
@@ -299,7 +305,7 @@ static int audit_set_enabled(int state, uid_t loginuid, u32 sid)
 			"audit_enabled=%d old=%d by auid=%u",
 			state, old, loginuid);
 	audit_enabled = state;
-	return old;
+	return 0;
 }
 
 static int audit_set_failure(int state, uid_t loginuid, u32 sid)
@@ -327,7 +333,7 @@ static int audit_set_failure(int state, uid_t loginuid, u32 sid)
 			"audit_failure=%d old=%d by auid=%u",
 			state, old, loginuid);
 	audit_failure = state;
-	return old;
+	return 0;
 }
 
 static int kauditd_thread(void *dummy)
@@ -363,9 +369,52 @@ static int kauditd_thread(void *dummy)
 			remove_wait_queue(&kauditd_wait, &wait);
 		}
 	}
+}
+
+int audit_send_list(void *_dest)
+{
+	struct audit_netlink_list *dest = _dest;
+	int pid = dest->pid;
+	struct sk_buff *skb;
+
+	/* wait for parent to finish and send an ACK */
+	mutex_lock(&audit_cmd_mutex);
+	mutex_unlock(&audit_cmd_mutex);
+
+	while ((skb = __skb_dequeue(&dest->q)) != NULL)
+		netlink_unicast(audit_sock, skb, pid, 0);
+
+	kfree(dest);
+
 	return 0;
 }
 
+struct sk_buff *audit_make_reply(int pid, int seq, int type, int done,
+				 int multi, void *payload, int size)
+{
+	struct sk_buff	*skb;
+	struct nlmsghdr	*nlh;
+	int		len = NLMSG_SPACE(size);
+	void		*data;
+	int		flags = multi ? NLM_F_MULTI : 0;
+	int		t     = done  ? NLMSG_DONE  : type;
+
+	skb = alloc_skb(len, GFP_KERNEL);
+	if (!skb)
+		return NULL;
+
+	nlh		 = NLMSG_PUT(skb, pid, seq, t, size);
+	nlh->nlmsg_flags = flags;
+	data		 = NLMSG_DATA(nlh);
+	memcpy(data, payload, size);
+	return skb;
+
+nlmsg_failure:			/* Used by NLMSG_PUT */
+	if (skb)
+		kfree_skb(skb);
+	return NULL;
+}
+
 /**
  * audit_send_reply - send an audit reply message via netlink
  * @pid: process id to send reply to
@@ -383,29 +432,13 @@ void audit_send_reply(int pid, int seq, int type, int done, int multi,
 		      void *payload, int size)
 {
 	struct sk_buff	*skb;
-	struct nlmsghdr	*nlh;
-	int		len = NLMSG_SPACE(size);
-	void		*data;
-	int		flags = multi ? NLM_F_MULTI : 0;
-	int		t     = done  ? NLMSG_DONE  : type;
-
-	skb = alloc_skb(len, GFP_KERNEL);
+	skb = audit_make_reply(pid, seq, type, done, multi, payload, size);
 	if (!skb)
 		return;
-
-	nlh		 = NLMSG_PUT(skb, pid, seq, t, size);
-	nlh->nlmsg_flags = flags;
-	data		 = NLMSG_DATA(nlh);
-	memcpy(data, payload, size);
-
 	/* Ignore failure. It'll only happen if the sender goes away,
 	   because our timeout is set to infinite. */
 	netlink_unicast(audit_sock, skb, pid, 0);
 	return;
-
-nlmsg_failure:			/* Used by NLMSG_PUT */
-	if (skb)
-		kfree_skb(skb);
 }
 
 /*
@@ -451,7 +484,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	struct audit_buffer	*ab;
 	u16			msg_type = nlh->nlmsg_type;
 	uid_t			loginuid; /* loginuid of sender */
-	struct audit_sig_info   sig_data;
+	struct audit_sig_info   *sig_data;
+	char			*ctx;
+	u32			len;
 
 	err = audit_netlink_ok(NETLINK_CB(skb).eff_cap, msg_type);
 	if (err)
@@ -503,12 +538,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		if (status_get->mask & AUDIT_STATUS_PID) {
 			int old   = audit_pid;
 			if (sid) {
-				char *ctx = NULL;
-				u32 len;
-				int rc;
-				if ((rc = selinux_ctxid_to_string(
+				if ((err = selinux_ctxid_to_string(
 						sid, &ctx, &len)))
-					return rc;
+					return err;
 				else
 					audit_log(NULL, GFP_KERNEL,
 						AUDIT_CONFIG_CHANGE,
@@ -523,10 +555,10 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 			audit_pid = status_get->pid;
 		}
 		if (status_get->mask & AUDIT_STATUS_RATE_LIMIT)
-			audit_set_rate_limit(status_get->rate_limit,
+			err = audit_set_rate_limit(status_get->rate_limit,
 							 loginuid, sid);
 		if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT)
-			audit_set_backlog_limit(status_get->backlog_limit,
+			err = audit_set_backlog_limit(status_get->backlog_limit,
 							loginuid, sid);
 		break;
 	case AUDIT_USER:
@@ -544,8 +576,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 						 "user pid=%d uid=%u auid=%u",
 						 pid, uid, loginuid);
 				if (sid) {
-					char *ctx = NULL;
-					u32 len;
 					if (selinux_ctxid_to_string(
 							sid, &ctx, &len)) {
 						audit_log_format(ab, 
@@ -584,10 +614,21 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 					   loginuid, sid);
 		break;
 	case AUDIT_SIGNAL_INFO:
-		sig_data.uid = audit_sig_uid;
-		sig_data.pid = audit_sig_pid;
+		err = selinux_ctxid_to_string(audit_sig_sid, &ctx, &len);
+		if (err)
+			return err;
+		sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL);
+		if (!sig_data) {
+			kfree(ctx);
+			return -ENOMEM;
+		}
+		sig_data->uid = audit_sig_uid;
+		sig_data->pid = audit_sig_pid;
+		memcpy(sig_data->ctx, ctx, len);
+		kfree(ctx);
 		audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO, 
-				0, 0, &sig_data, sizeof(sig_data));
+				0, 0, sig_data, sizeof(*sig_data) + len);
+		kfree(sig_data);
 		break;
 	default:
 		err = -EINVAL;
@@ -629,20 +670,30 @@ static void audit_receive(struct sock *sk, int length)
 	struct sk_buff  *skb;
 	unsigned int qlen;
 
-	mutex_lock(&audit_netlink_mutex);
+	mutex_lock(&audit_cmd_mutex);
 
 	for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
 		skb = skb_dequeue(&sk->sk_receive_queue);
 		audit_receive_skb(skb);
 		kfree_skb(skb);
 	}
-	mutex_unlock(&audit_netlink_mutex);
+	mutex_unlock(&audit_cmd_mutex);
 }
 
+#ifdef CONFIG_AUDITSYSCALL
+static const struct inotify_operations audit_inotify_ops = {
+	.handle_event	= audit_handle_ievent,
+	.destroy_watch	= audit_free_parent,
+};
+#endif
 
 /* Initialize audit support at boot time. */
 static int __init audit_init(void)
 {
+#ifdef CONFIG_AUDITSYSCALL
+	int i;
+#endif
+
 	printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
 	       audit_default ? "enabled" : "disabled");
 	audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive,
@@ -661,6 +712,16 @@ static int __init audit_init(void)
 	selinux_audit_set_callback(&selinux_audit_rule_update);
 
 	audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
+
+#ifdef CONFIG_AUDITSYSCALL
+	audit_ih = inotify_init(&audit_inotify_ops);
+	if (IS_ERR(audit_ih))
+		audit_panic("cannot initialize inotify handle");
+
+	for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
+		INIT_LIST_HEAD(&audit_inode_hash[i]);
+#endif
+
 	return 0;
 }
 __initcall(audit_init);
@@ -690,10 +751,12 @@ static void audit_buffer_free(struct audit_buffer *ab)
 		kfree_skb(ab->skb);
 
 	spin_lock_irqsave(&audit_freelist_lock, flags);
-	if (++audit_freelist_count > AUDIT_MAXFREE)
+	if (audit_freelist_count > AUDIT_MAXFREE)
 		kfree(ab);
-	else
+	else {
+		audit_freelist_count++;
 		list_add(&ab->list, &audit_freelist);
+	}
 	spin_unlock_irqrestore(&audit_freelist_lock, flags);
 }
 
@@ -988,28 +1051,76 @@ void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf,
 	skb_put(skb, len << 1); /* new string is twice the old string */
 }
 
+/*
+ * Format a string of no more than slen characters into the audit buffer,
+ * enclosed in quote marks.
+ */
+static void audit_log_n_string(struct audit_buffer *ab, size_t slen,
+			       const char *string)
+{
+	int avail, new_len;
+	unsigned char *ptr;
+	struct sk_buff *skb;
+
+	BUG_ON(!ab->skb);
+	skb = ab->skb;
+	avail = skb_tailroom(skb);
+	new_len = slen + 3;	/* enclosing quotes + null terminator */
+	if (new_len > avail) {
+		avail = audit_expand(ab, new_len);
+		if (!avail)
+			return;
+	}
+	ptr = skb->tail;
+	*ptr++ = '"';
+	memcpy(ptr, string, slen);
+	ptr += slen;
+	*ptr++ = '"';
+	*ptr = 0;
+	skb_put(skb, slen + 2);	/* don't include null terminator */
+}
+
 /**
- * audit_log_unstrustedstring - log a string that may contain random characters
+ * audit_log_n_unstrustedstring - log a string that may contain random characters
  * @ab: audit_buffer
+ * @len: lenth of string (not including trailing null)
  * @string: string to be logged
  *
  * This code will escape a string that is passed to it if the string
  * contains a control character, unprintable character, double quote mark,
  * or a space. Unescaped strings will start and end with a double quote mark.
  * Strings that are escaped are printed in hex (2 digits per char).
+ *
+ * The caller specifies the number of characters in the string to log, which may
+ * or may not be the entire string.
  */
-void audit_log_untrustedstring(struct audit_buffer *ab, const char *string)
+const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len,
+					const char *string)
 {
 	const unsigned char *p = string;
 
 	while (*p) {
 		if (*p == '"' || *p < 0x21 || *p > 0x7f) {
-			audit_log_hex(ab, string, strlen(string));
-			return;
+			audit_log_hex(ab, string, len);
+			return string + len + 1;
 		}
 		p++;
 	}
-	audit_log_format(ab, "\"%s\"", string);
+	audit_log_n_string(ab, len, string);
+	return p + 1;
+}
+
+/**
+ * audit_log_unstrustedstring - log a string that may contain random characters
+ * @ab: audit_buffer
+ * @string: string to be logged
+ *
+ * Same as audit_log_n_unstrustedstring(), except that strlen is used to
+ * determine string length.
+ */
+const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string)
+{
+	return audit_log_n_untrustedstring(ab, strlen(string), string);
 }
 
 /* This is a helper-function to print the escaped d_path */
diff --git a/kernel/audit.h b/kernel/audit.h
index 6f733920fd32..8323e4132a33 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -19,9 +19,9 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-#include <linux/mutex.h>
 #include <linux/fs.h>
 #include <linux/audit.h>
+#include <linux/skbuff.h>
 
 /* 0 = no checking
    1 = put_count checking
@@ -53,6 +53,18 @@ enum audit_state {
 };
 
 /* Rule lists */
+struct audit_parent;
+
+struct audit_watch {
+	atomic_t		count;	/* reference count */
+	char			*path;	/* insertion path */
+	dev_t			dev;	/* associated superblock device */
+	unsigned long		ino;	/* associated inode number */
+	struct audit_parent	*parent; /* associated parent */
+	struct list_head	wlist;	/* entry in parent->watches list */
+	struct list_head	rules;	/* associated rules */
+};
+
 struct audit_field {
 	u32				type;
 	u32				val;
@@ -70,6 +82,9 @@ struct audit_krule {
 	u32			buflen; /* for data alloc on list rules */
 	u32			field_count;
 	struct audit_field	*fields;
+	struct audit_field	*inode_f; /* quick access to an inode field */
+	struct audit_watch	*watch;	/* associated watch */
+	struct list_head	rlist;	/* entry in audit_watch.rules list */
 };
 
 struct audit_entry {
@@ -78,15 +93,53 @@ struct audit_entry {
 	struct audit_krule	rule;
 };
 
-
 extern int audit_pid;
-extern int audit_comparator(const u32 left, const u32 op, const u32 right);
 
+#define AUDIT_INODE_BUCKETS	32
+extern struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
+
+static inline int audit_hash_ino(u32 ino)
+{
+	return (ino & (AUDIT_INODE_BUCKETS-1));
+}
+
+extern int audit_comparator(const u32 left, const u32 op, const u32 right);
+extern int audit_compare_dname_path(const char *dname, const char *path,
+				    int *dirlen);
+extern struct sk_buff *	    audit_make_reply(int pid, int seq, int type,
+					     int done, int multi,
+					     void *payload, int size);
 extern void		    audit_send_reply(int pid, int seq, int type,
 					     int done, int multi,
 					     void *payload, int size);
 extern void		    audit_log_lost(const char *message);
 extern void		    audit_panic(const char *message);
-extern struct mutex audit_netlink_mutex;
 
+struct audit_netlink_list {
+	int pid;
+	struct sk_buff_head q;
+};
+
+int audit_send_list(void *);
+
+struct inotify_watch;
+extern void audit_free_parent(struct inotify_watch *);
+extern void audit_handle_ievent(struct inotify_watch *, u32, u32, u32,
+				const char *, struct inode *);
 extern int selinux_audit_rule_update(void);
+
+#ifdef CONFIG_AUDITSYSCALL
+extern void __audit_signal_info(int sig, struct task_struct *t);
+static inline void audit_signal_info(int sig, struct task_struct *t)
+{
+	if (unlikely(audit_pid && t->tgid == audit_pid))
+		__audit_signal_info(sig, t);
+}
+extern enum audit_state audit_filter_inodes(struct task_struct *,
+					    struct audit_context *);
+extern void audit_set_auditable(struct audit_context *);
+#else
+#define audit_signal_info(s,t)
+#define audit_filter_inodes(t,c) AUDIT_DISABLED
+#define audit_set_auditable(c)
+#endif
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 7c134906d689..4c99d2c586ed 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -22,13 +22,59 @@
 #include <linux/kernel.h>
 #include <linux/audit.h>
 #include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
 #include <linux/netlink.h>
+#include <linux/sched.h>
+#include <linux/inotify.h>
 #include <linux/selinux.h>
 #include "audit.h"
 
-/* There are three lists of rules -- one to search at task creation
- * time, one to search at syscall entry time, and another to search at
- * syscall exit time. */
+/*
+ * Locking model:
+ *
+ * audit_filter_mutex:
+ * 		Synchronizes writes and blocking reads of audit's filterlist
+ * 		data.  Rcu is used to traverse the filterlist and access
+ * 		contents of structs audit_entry, audit_watch and opaque
+ * 		selinux rules during filtering.  If modified, these structures
+ * 		must be copied and replace their counterparts in the filterlist.
+ * 		An audit_parent struct is not accessed during filtering, so may
+ * 		be written directly provided audit_filter_mutex is held.
+ */
+
+/*
+ * Reference counting:
+ *
+ * audit_parent: lifetime is from audit_init_parent() to receipt of an IN_IGNORED
+ * 	event.  Each audit_watch holds a reference to its associated parent.
+ *
+ * audit_watch: if added to lists, lifetime is from audit_init_watch() to
+ * 	audit_remove_watch().  Additionally, an audit_watch may exist
+ * 	temporarily to assist in searching existing filter data.  Each
+ * 	audit_krule holds a reference to its associated watch.
+ */
+
+struct audit_parent {
+	struct list_head	ilist;	/* entry in inotify registration list */
+	struct list_head	watches; /* associated watches */
+	struct inotify_watch	wdata;	/* inotify watch data */
+	unsigned		flags;	/* status flags */
+};
+
+/*
+ * audit_parent status flags:
+ *
+ * AUDIT_PARENT_INVALID - set anytime rules/watches are auto-removed due to
+ * a filesystem event to ensure we're adding audit watches to a valid parent.
+ * Technically not needed for IN_DELETE_SELF or IN_UNMOUNT events, as we cannot
+ * receive them while we have nameidata, but must be used for IN_MOVE_SELF which
+ * we can receive while holding nameidata.
+ */
+#define AUDIT_PARENT_INVALID	0x001
+
+/* Audit filter lists, defined in <linux/audit.h> */
 struct list_head audit_filter_list[AUDIT_NR_FILTERS] = {
 	LIST_HEAD_INIT(audit_filter_list[0]),
 	LIST_HEAD_INIT(audit_filter_list[1]),
@@ -41,9 +87,53 @@ struct list_head audit_filter_list[AUDIT_NR_FILTERS] = {
 #endif
 };
 
+static DEFINE_MUTEX(audit_filter_mutex);
+
+/* Inotify handle */
+extern struct inotify_handle *audit_ih;
+
+/* Inotify events we care about. */
+#define AUDIT_IN_WATCH IN_MOVE|IN_CREATE|IN_DELETE|IN_DELETE_SELF|IN_MOVE_SELF
+
+void audit_free_parent(struct inotify_watch *i_watch)
+{
+	struct audit_parent *parent;
+
+	parent = container_of(i_watch, struct audit_parent, wdata);
+	WARN_ON(!list_empty(&parent->watches));
+	kfree(parent);
+}
+
+static inline void audit_get_watch(struct audit_watch *watch)
+{
+	atomic_inc(&watch->count);
+}
+
+static void audit_put_watch(struct audit_watch *watch)
+{
+	if (atomic_dec_and_test(&watch->count)) {
+		WARN_ON(watch->parent);
+		WARN_ON(!list_empty(&watch->rules));
+		kfree(watch->path);
+		kfree(watch);
+	}
+}
+
+static void audit_remove_watch(struct audit_watch *watch)
+{
+	list_del(&watch->wlist);
+	put_inotify_watch(&watch->parent->wdata);
+	watch->parent = NULL;
+	audit_put_watch(watch); /* match initial get */
+}
+
 static inline void audit_free_rule(struct audit_entry *e)
 {
 	int i;
+
+	/* some rules don't have associated watches */
+	if (e->rule.watch)
+		audit_put_watch(e->rule.watch);
 	if (e->rule.fields)
 		for (i = 0; i < e->rule.field_count; i++) {
 			struct audit_field *f = &e->rule.fields[i];
@@ -60,6 +150,50 @@ static inline void audit_free_rule_rcu(struct rcu_head *head)
 	audit_free_rule(e);
 }
 
+/* Initialize a parent watch entry. */
+static struct audit_parent *audit_init_parent(struct nameidata *ndp)
+{
+	struct audit_parent *parent;
+	s32 wd;
+
+	parent = kzalloc(sizeof(*parent), GFP_KERNEL);
+	if (unlikely(!parent))
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&parent->watches);
+	parent->flags = 0;
+
+	inotify_init_watch(&parent->wdata);
+	/* grab a ref so inotify watch hangs around until we take audit_filter_mutex */
+	get_inotify_watch(&parent->wdata);
+	wd = inotify_add_watch(audit_ih, &parent->wdata, ndp->dentry->d_inode,
+			       AUDIT_IN_WATCH);
+	if (wd < 0) {
+		audit_free_parent(&parent->wdata);
+		return ERR_PTR(wd);
+	}
+
+	return parent;
+}
+
+/* Initialize a watch entry. */
+static struct audit_watch *audit_init_watch(char *path)
+{
+	struct audit_watch *watch;
+
+	watch = kzalloc(sizeof(*watch), GFP_KERNEL);
+	if (unlikely(!watch))
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&watch->rules);
+	atomic_set(&watch->count, 1);
+	watch->path = path;
+	watch->dev = (dev_t)-1;
+	watch->ino = (unsigned long)-1;
+
+	return watch;
+}
+
 /* Initialize an audit filterlist entry. */
 static inline struct audit_entry *audit_init_entry(u32 field_count)
 {
@@ -107,6 +241,43 @@ static char *audit_unpack_string(void **bufp, size_t *remain, size_t len)
 	return str;
 }
 
+/* Translate an inode field to kernel respresentation. */
+static inline int audit_to_inode(struct audit_krule *krule,
+				 struct audit_field *f)
+{
+	if (krule->listnr != AUDIT_FILTER_EXIT ||
+	    krule->watch || krule->inode_f)
+		return -EINVAL;
+
+	krule->inode_f = f;
+	return 0;
+}
+
+/* Translate a watch string to kernel respresentation. */
+static int audit_to_watch(struct audit_krule *krule, char *path, int len,
+			  u32 op)
+{
+	struct audit_watch *watch;
+
+	if (!audit_ih)
+		return -EOPNOTSUPP;
+
+	if (path[0] != '/' || path[len-1] == '/' ||
+	    krule->listnr != AUDIT_FILTER_EXIT ||
+	    op & ~AUDIT_EQUAL ||
+	    krule->inode_f || krule->watch) /* 1 inode # per rule, for hash */
+		return -EINVAL;
+
+	watch = audit_init_watch(path);
+	if (unlikely(IS_ERR(watch)))
+		return PTR_ERR(watch);
+
+	audit_get_watch(watch);
+	krule->watch = watch;
+
+	return 0;
+}
+
 /* Common user-space to kernel rule translation. */
 static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule)
 {
@@ -128,8 +299,11 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule)
 #endif
 		;
 	}
-	if (rule->action != AUDIT_NEVER && rule->action != AUDIT_POSSIBLE &&
-	    rule->action != AUDIT_ALWAYS)
+	if (unlikely(rule->action == AUDIT_POSSIBLE)) {
+		printk(KERN_ERR "AUDIT_POSSIBLE is deprecated\n");
+		goto exit_err;
+	}
+	if (rule->action != AUDIT_NEVER && rule->action != AUDIT_ALWAYS)
 		goto exit_err;
 	if (rule->field_count > AUDIT_MAX_FIELDS)
 		goto exit_err;
@@ -158,6 +332,7 @@ exit_err:
 static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 {
 	struct audit_entry *entry;
+	struct audit_field *f;
 	int err = 0;
 	int i;
 
@@ -172,14 +347,37 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 		f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS);
 		f->val = rule->values[i];
 
-		if (f->type & AUDIT_UNUSED_BITS ||
-		    f->type == AUDIT_SE_USER ||
-		    f->type == AUDIT_SE_ROLE ||
-		    f->type == AUDIT_SE_TYPE ||
-		    f->type == AUDIT_SE_SEN ||
-		    f->type == AUDIT_SE_CLR) {
-			err = -EINVAL;
+		err = -EINVAL;
+		switch(f->type) {
+		default:
 			goto exit_free;
+		case AUDIT_PID:
+		case AUDIT_UID:
+		case AUDIT_EUID:
+		case AUDIT_SUID:
+		case AUDIT_FSUID:
+		case AUDIT_GID:
+		case AUDIT_EGID:
+		case AUDIT_SGID:
+		case AUDIT_FSGID:
+		case AUDIT_LOGINUID:
+		case AUDIT_PERS:
+		case AUDIT_ARCH:
+		case AUDIT_MSGTYPE:
+		case AUDIT_DEVMAJOR:
+		case AUDIT_DEVMINOR:
+		case AUDIT_EXIT:
+		case AUDIT_SUCCESS:
+		case AUDIT_ARG0:
+		case AUDIT_ARG1:
+		case AUDIT_ARG2:
+		case AUDIT_ARG3:
+			break;
+		case AUDIT_INODE:
+			err = audit_to_inode(&entry->rule, f);
+			if (err)
+				goto exit_free;
+			break;
 		}
 
 		entry->rule.vers_ops = (f->op & AUDIT_OPERATORS) ? 2 : 1;
@@ -196,6 +394,18 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 		}
 	}
 
+	f = entry->rule.inode_f;
+	if (f) {
+		switch(f->op) {
+		case AUDIT_NOT_EQUAL:
+			entry->rule.inode_f = NULL;
+		case AUDIT_EQUAL:
+			break;
+		default:
+			goto exit_free;
+		}
+	}
+
 exit_nofree:
 	return entry;
 
@@ -210,6 +420,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 {
 	int err = 0;
 	struct audit_entry *entry;
+	struct audit_field *f;
 	void *bufp;
 	size_t remain = datasz - sizeof(struct audit_rule_data);
 	int i;
@@ -235,6 +446,29 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 		f->se_str = NULL;
 		f->se_rule = NULL;
 		switch(f->type) {
+		case AUDIT_PID:
+		case AUDIT_UID:
+		case AUDIT_EUID:
+		case AUDIT_SUID:
+		case AUDIT_FSUID:
+		case AUDIT_GID:
+		case AUDIT_EGID:
+		case AUDIT_SGID:
+		case AUDIT_FSGID:
+		case AUDIT_LOGINUID:
+		case AUDIT_PERS:
+		case AUDIT_ARCH:
+		case AUDIT_MSGTYPE:
+		case AUDIT_PPID:
+		case AUDIT_DEVMAJOR:
+		case AUDIT_DEVMINOR:
+		case AUDIT_EXIT:
+		case AUDIT_SUCCESS:
+		case AUDIT_ARG0:
+		case AUDIT_ARG1:
+		case AUDIT_ARG2:
+		case AUDIT_ARG3:
+			break;
 		case AUDIT_SE_USER:
 		case AUDIT_SE_ROLE:
 		case AUDIT_SE_TYPE:
@@ -260,6 +494,37 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 			} else
 				f->se_str = str;
 			break;
+		case AUDIT_WATCH:
+			str = audit_unpack_string(&bufp, &remain, f->val);
+			if (IS_ERR(str))
+				goto exit_free;
+			entry->rule.buflen += f->val;
+
+			err = audit_to_watch(&entry->rule, str, f->val, f->op);
+			if (err) {
+				kfree(str);
+				goto exit_free;
+			}
+			break;
+		case AUDIT_INODE:
+			err = audit_to_inode(&entry->rule, f);
+			if (err)
+				goto exit_free;
+			break;
+		default:
+			goto exit_free;
+		}
+	}
+
+	f = entry->rule.inode_f;
+	if (f) {
+		switch(f->op) {
+		case AUDIT_NOT_EQUAL:
+			entry->rule.inode_f = NULL;
+		case AUDIT_EQUAL:
+			break;
+		default:
+			goto exit_free;
 		}
 	}
 
@@ -291,7 +556,7 @@ static struct audit_rule *audit_krule_to_rule(struct audit_krule *krule)
 
 	rule = kmalloc(sizeof(*rule), GFP_KERNEL);
 	if (unlikely(!rule))
-		return ERR_PTR(-ENOMEM);
+		return NULL;
 	memset(rule, 0, sizeof(*rule));
 
 	rule->flags = krule->flags | krule->listnr;
@@ -322,7 +587,7 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
 
 	data = kmalloc(sizeof(*data) + krule->buflen, GFP_KERNEL);
 	if (unlikely(!data))
-		return ERR_PTR(-ENOMEM);
+		return NULL;
 	memset(data, 0, sizeof(*data));
 
 	data->flags = krule->flags | krule->listnr;
@@ -343,6 +608,10 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
 			data->buflen += data->values[i] =
 				audit_pack_string(&bufp, f->se_str);
 			break;
+		case AUDIT_WATCH:
+			data->buflen += data->values[i] =
+				audit_pack_string(&bufp, krule->watch->path);
+			break;
 		default:
 			data->values[i] = f->val;
 		}
@@ -378,6 +647,10 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
 			if (strcmp(a->fields[i].se_str, b->fields[i].se_str))
 				return 1;
 			break;
+		case AUDIT_WATCH:
+			if (strcmp(a->watch->path, b->watch->path))
+				return 1;
+			break;
 		default:
 			if (a->fields[i].val != b->fields[i].val)
 				return 1;
@@ -391,6 +664,32 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
 	return 0;
 }
 
+/* Duplicate the given audit watch.  The new watch's rules list is initialized
+ * to an empty list and wlist is undefined. */
+static struct audit_watch *audit_dupe_watch(struct audit_watch *old)
+{
+	char *path;
+	struct audit_watch *new;
+
+	path = kstrdup(old->path, GFP_KERNEL);
+	if (unlikely(!path))
+		return ERR_PTR(-ENOMEM);
+
+	new = audit_init_watch(path);
+	if (unlikely(IS_ERR(new))) {
+		kfree(path);
+		goto out;
+	}
+
+	new->dev = old->dev;
+	new->ino = old->ino;
+	get_inotify_watch(&old->parent->wdata);
+	new->parent = old->parent;
+
+out:
+	return new;
+}
+
 /* Duplicate selinux field information.  The se_rule is opaque, so must be
  * re-initialized. */
 static inline int audit_dupe_selinux_field(struct audit_field *df,
@@ -422,8 +721,11 @@ static inline int audit_dupe_selinux_field(struct audit_field *df,
 /* Duplicate an audit rule.  This will be a deep copy with the exception
  * of the watch - that pointer is carried over.  The selinux specific fields
  * will be updated in the copy.  The point is to be able to replace the old
- * rule with the new rule in the filterlist, then free the old rule. */
-static struct audit_entry *audit_dupe_rule(struct audit_krule *old)
+ * rule with the new rule in the filterlist, then free the old rule.
+ * The rlist element is undefined; list manipulations are handled apart from
+ * the initial copy. */
+static struct audit_entry *audit_dupe_rule(struct audit_krule *old,
+					   struct audit_watch *watch)
 {
 	u32 fcount = old->field_count;
 	struct audit_entry *entry;
@@ -442,6 +744,8 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old)
 	for (i = 0; i < AUDIT_BITMASK_SIZE; i++)
 		new->mask[i] = old->mask[i];
 	new->buflen = old->buflen;
+	new->inode_f = old->inode_f;
+	new->watch = NULL;
 	new->field_count = old->field_count;
 	memcpy(new->fields, old->fields, sizeof(struct audit_field) * fcount);
 
@@ -463,68 +767,409 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old)
 		}
 	}
 
+	if (watch) {
+		audit_get_watch(watch);
+		new->watch = watch;
+	}
+
 	return entry;
 }
 
-/* Add rule to given filterlist if not a duplicate.  Protected by
- * audit_netlink_mutex. */
+/* Update inode info in audit rules based on filesystem event. */
+static void audit_update_watch(struct audit_parent *parent,
+			       const char *dname, dev_t dev,
+			       unsigned long ino, unsigned invalidating)
+{
+	struct audit_watch *owatch, *nwatch, *nextw;
+	struct audit_krule *r, *nextr;
+	struct audit_entry *oentry, *nentry;
+	struct audit_buffer *ab;
+
+	mutex_lock(&audit_filter_mutex);
+	list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) {
+		if (audit_compare_dname_path(dname, owatch->path, NULL))
+			continue;
+
+		/* If the update involves invalidating rules, do the inode-based
+		 * filtering now, so we don't omit records. */
+		if (invalidating &&
+		    audit_filter_inodes(current, current->audit_context) == AUDIT_RECORD_CONTEXT)
+			audit_set_auditable(current->audit_context);
+
+		nwatch = audit_dupe_watch(owatch);
+		if (unlikely(IS_ERR(nwatch))) {
+			mutex_unlock(&audit_filter_mutex);
+			audit_panic("error updating watch, skipping");
+			return;
+		}
+		nwatch->dev = dev;
+		nwatch->ino = ino;
+
+		list_for_each_entry_safe(r, nextr, &owatch->rules, rlist) {
+
+			oentry = container_of(r, struct audit_entry, rule);
+			list_del(&oentry->rule.rlist);
+			list_del_rcu(&oentry->list);
+
+			nentry = audit_dupe_rule(&oentry->rule, nwatch);
+			if (unlikely(IS_ERR(nentry)))
+				audit_panic("error updating watch, removing");
+			else {
+				int h = audit_hash_ino((u32)ino);
+				list_add(&nentry->rule.rlist, &nwatch->rules);
+				list_add_rcu(&nentry->list, &audit_inode_hash[h]);
+			}
+
+			call_rcu(&oentry->rcu, audit_free_rule_rcu);
+		}
+
+		ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
+		audit_log_format(ab, "audit updated rules specifying watch=");
+		audit_log_untrustedstring(ab, owatch->path);
+		audit_log_format(ab, " with dev=%u ino=%lu\n", dev, ino);
+		audit_log_end(ab);
+
+		audit_remove_watch(owatch);
+		goto add_watch_to_parent; /* event applies to a single watch */
+	}
+	mutex_unlock(&audit_filter_mutex);
+	return;
+
+add_watch_to_parent:
+	list_add(&nwatch->wlist, &parent->watches);
+	mutex_unlock(&audit_filter_mutex);
+	return;
+}
+
+/* Remove all watches & rules associated with a parent that is going away. */
+static void audit_remove_parent_watches(struct audit_parent *parent)
+{
+	struct audit_watch *w, *nextw;
+	struct audit_krule *r, *nextr;
+	struct audit_entry *e;
+
+	mutex_lock(&audit_filter_mutex);
+	parent->flags |= AUDIT_PARENT_INVALID;
+	list_for_each_entry_safe(w, nextw, &parent->watches, wlist) {
+		list_for_each_entry_safe(r, nextr, &w->rules, rlist) {
+			e = container_of(r, struct audit_entry, rule);
+			list_del(&r->rlist);
+			list_del_rcu(&e->list);
+			call_rcu(&e->rcu, audit_free_rule_rcu);
+
+			audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+				 "audit implicitly removed rule from list=%d\n",
+				  AUDIT_FILTER_EXIT);
+		}
+		audit_remove_watch(w);
+	}
+	mutex_unlock(&audit_filter_mutex);
+}
+
+/* Unregister inotify watches for parents on in_list.
+ * Generates an IN_IGNORED event. */
+static void audit_inotify_unregister(struct list_head *in_list)
+{
+	struct audit_parent *p, *n;
+
+	list_for_each_entry_safe(p, n, in_list, ilist) {
+		list_del(&p->ilist);
+		inotify_rm_watch(audit_ih, &p->wdata);
+		/* the put matching the get in audit_do_del_rule() */
+		put_inotify_watch(&p->wdata);
+	}
+}
+
+/* Find an existing audit rule.
+ * Caller must hold audit_filter_mutex to prevent stale rule data. */
+static struct audit_entry *audit_find_rule(struct audit_entry *entry,
+					   struct list_head *list)
+{
+	struct audit_entry *e, *found = NULL;
+	int h;
+
+	if (entry->rule.watch) {
+		/* we don't know the inode number, so must walk entire hash */
+		for (h = 0; h < AUDIT_INODE_BUCKETS; h++) {
+			list = &audit_inode_hash[h];
+			list_for_each_entry(e, list, list)
+				if (!audit_compare_rule(&entry->rule, &e->rule)) {
+					found = e;
+					goto out;
+				}
+		}
+		goto out;
+	}
+
+	list_for_each_entry(e, list, list)
+		if (!audit_compare_rule(&entry->rule, &e->rule)) {
+			found = e;
+			goto out;
+		}
+
+out:
+	return found;
+}
+
+/* Get path information necessary for adding watches. */
+static int audit_get_nd(char *path, struct nameidata **ndp,
+			struct nameidata **ndw)
+{
+	struct nameidata *ndparent, *ndwatch;
+	int err;
+
+	ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL);
+	if (unlikely(!ndparent))
+		return -ENOMEM;
+
+	ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL);
+	if (unlikely(!ndwatch)) {
+		kfree(ndparent);
+		return -ENOMEM;
+	}
+
+	err = path_lookup(path, LOOKUP_PARENT, ndparent);
+	if (err) {
+		kfree(ndparent);
+		kfree(ndwatch);
+		return err;
+	}
+
+	err = path_lookup(path, 0, ndwatch);
+	if (err) {
+		kfree(ndwatch);
+		ndwatch = NULL;
+	}
+
+	*ndp = ndparent;
+	*ndw = ndwatch;
+
+	return 0;
+}
+
+/* Release resources used for watch path information. */
+static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw)
+{
+	if (ndp) {
+		path_release(ndp);
+		kfree(ndp);
+	}
+	if (ndw) {
+		path_release(ndw);
+		kfree(ndw);
+	}
+}
+
+/* Associate the given rule with an existing parent inotify_watch.
+ * Caller must hold audit_filter_mutex. */
+static void audit_add_to_parent(struct audit_krule *krule,
+				struct audit_parent *parent)
+{
+	struct audit_watch *w, *watch = krule->watch;
+	int watch_found = 0;
+
+	list_for_each_entry(w, &parent->watches, wlist) {
+		if (strcmp(watch->path, w->path))
+			continue;
+
+		watch_found = 1;
+
+		/* put krule's and initial refs to temporary watch */
+		audit_put_watch(watch);
+		audit_put_watch(watch);
+
+		audit_get_watch(w);
+		krule->watch = watch = w;
+		break;
+	}
+
+	if (!watch_found) {
+		get_inotify_watch(&parent->wdata);
+		watch->parent = parent;
+
+		list_add(&watch->wlist, &parent->watches);
+	}
+	list_add(&krule->rlist, &watch->rules);
+}
+
+/* Find a matching watch entry, or add this one.
+ * Caller must hold audit_filter_mutex. */
+static int audit_add_watch(struct audit_krule *krule, struct nameidata *ndp,
+			   struct nameidata *ndw)
+{
+	struct audit_watch *watch = krule->watch;
+	struct inotify_watch *i_watch;
+	struct audit_parent *parent;
+	int ret = 0;
+
+	/* update watch filter fields */
+	if (ndw) {
+		watch->dev = ndw->dentry->d_inode->i_sb->s_dev;
+		watch->ino = ndw->dentry->d_inode->i_ino;
+	}
+
+	/* The audit_filter_mutex must not be held during inotify calls because
+	 * we hold it during inotify event callback processing.  If an existing
+	 * inotify watch is found, inotify_find_watch() grabs a reference before
+	 * returning.
+	 */
+	mutex_unlock(&audit_filter_mutex);
+
+	if (inotify_find_watch(audit_ih, ndp->dentry->d_inode, &i_watch) < 0) {
+		parent = audit_init_parent(ndp);
+		if (IS_ERR(parent)) {
+			/* caller expects mutex locked */
+			mutex_lock(&audit_filter_mutex);
+			return PTR_ERR(parent);
+		}
+	} else
+		parent = container_of(i_watch, struct audit_parent, wdata);
+
+	mutex_lock(&audit_filter_mutex);
+
+	/* parent was moved before we took audit_filter_mutex */
+	if (parent->flags & AUDIT_PARENT_INVALID)
+		ret = -ENOENT;
+	else
+		audit_add_to_parent(krule, parent);
+
+	/* match get in audit_init_parent or inotify_find_watch */
+	put_inotify_watch(&parent->wdata);
+	return ret;
+}
+
+/* Add rule to given filterlist if not a duplicate. */
 static inline int audit_add_rule(struct audit_entry *entry,
-				  struct list_head *list)
+				 struct list_head *list)
 {
 	struct audit_entry *e;
+	struct audit_field *inode_f = entry->rule.inode_f;
+	struct audit_watch *watch = entry->rule.watch;
+	struct nameidata *ndp, *ndw;
+	int h, err, putnd_needed = 0;
+
+	if (inode_f) {
+		h = audit_hash_ino(inode_f->val);
+		list = &audit_inode_hash[h];
+	}
 
-	/* Do not use the _rcu iterator here, since this is the only
-	 * addition routine. */
-	list_for_each_entry(e, list, list) {
-		if (!audit_compare_rule(&entry->rule, &e->rule))
-			return -EEXIST;
+	mutex_lock(&audit_filter_mutex);
+	e = audit_find_rule(entry, list);
+	mutex_unlock(&audit_filter_mutex);
+	if (e) {
+		err = -EEXIST;
+		goto error;
+	}
+
+	/* Avoid calling path_lookup under audit_filter_mutex. */
+	if (watch) {
+		err = audit_get_nd(watch->path, &ndp, &ndw);
+		if (err)
+			goto error;
+		putnd_needed = 1;
+	}
+
+	mutex_lock(&audit_filter_mutex);
+	if (watch) {
+		/* audit_filter_mutex is dropped and re-taken during this call */
+		err = audit_add_watch(&entry->rule, ndp, ndw);
+		if (err) {
+			mutex_unlock(&audit_filter_mutex);
+			goto error;
+		}
+		h = audit_hash_ino((u32)watch->ino);
+		list = &audit_inode_hash[h];
 	}
 
 	if (entry->rule.flags & AUDIT_FILTER_PREPEND) {
 		list_add_rcu(&entry->list, list);
+		entry->rule.flags &= ~AUDIT_FILTER_PREPEND;
 	} else {
 		list_add_tail_rcu(&entry->list, list);
 	}
+	mutex_unlock(&audit_filter_mutex);
 
-	return 0;
+	if (putnd_needed)
+		audit_put_nd(ndp, ndw);
+
+ 	return 0;
+
+error:
+	if (putnd_needed)
+		audit_put_nd(ndp, ndw);
+	if (watch)
+		audit_put_watch(watch); /* tmp watch, matches initial get */
+	return err;
 }
 
-/* Remove an existing rule from filterlist.  Protected by
- * audit_netlink_mutex. */
+/* Remove an existing rule from filterlist. */
 static inline int audit_del_rule(struct audit_entry *entry,
 				 struct list_head *list)
 {
 	struct audit_entry  *e;
+	struct audit_field *inode_f = entry->rule.inode_f;
+	struct audit_watch *watch, *tmp_watch = entry->rule.watch;
+	LIST_HEAD(inotify_list);
+	int h, ret = 0;
+
+	if (inode_f) {
+		h = audit_hash_ino(inode_f->val);
+		list = &audit_inode_hash[h];
+	}
 
-	/* Do not use the _rcu iterator here, since this is the only
-	 * deletion routine. */
-	list_for_each_entry(e, list, list) {
-		if (!audit_compare_rule(&entry->rule, &e->rule)) {
-			list_del_rcu(&e->list);
-			call_rcu(&e->rcu, audit_free_rule_rcu);
-			return 0;
+	mutex_lock(&audit_filter_mutex);
+	e = audit_find_rule(entry, list);
+	if (!e) {
+		mutex_unlock(&audit_filter_mutex);
+		ret = -ENOENT;
+		goto out;
+	}
+
+	watch = e->rule.watch;
+	if (watch) {
+		struct audit_parent *parent = watch->parent;
+
+		list_del(&e->rule.rlist);
+
+		if (list_empty(&watch->rules)) {
+			audit_remove_watch(watch);
+
+			if (list_empty(&parent->watches)) {
+				/* Put parent on the inotify un-registration
+				 * list.  Grab a reference before releasing
+				 * audit_filter_mutex, to be released in
+				 * audit_inotify_unregister(). */
+				list_add(&parent->ilist, &inotify_list);
+				get_inotify_watch(&parent->wdata);
+			}
 		}
 	}
-	return -ENOENT;		/* No matching rule */
+
+	list_del_rcu(&e->list);
+	call_rcu(&e->rcu, audit_free_rule_rcu);
+
+	mutex_unlock(&audit_filter_mutex);
+
+	if (!list_empty(&inotify_list))
+		audit_inotify_unregister(&inotify_list);
+
+out:
+	if (tmp_watch)
+		audit_put_watch(tmp_watch); /* match initial get */
+
+	return ret;
 }
 
 /* List rules using struct audit_rule.  Exists for backward
  * compatibility with userspace. */
-static int audit_list(void *_dest)
+static void audit_list(int pid, int seq, struct sk_buff_head *q)
 {
-	int pid, seq;
-	int *dest = _dest;
+	struct sk_buff *skb;
 	struct audit_entry *entry;
 	int i;
 
-	pid = dest[0];
-	seq = dest[1];
-	kfree(dest);
-
-	mutex_lock(&audit_netlink_mutex);
-
-	/* The *_rcu iterators not needed here because we are
-	   always called with audit_netlink_mutex held. */
+	/* This is a blocking read, so use audit_filter_mutex instead of rcu
+	 * iterator to sync with list writers. */
 	for (i=0; i<AUDIT_NR_FILTERS; i++) {
 		list_for_each_entry(entry, &audit_filter_list[i], list) {
 			struct audit_rule *rule;
@@ -532,33 +1177,41 @@ static int audit_list(void *_dest)
 			rule = audit_krule_to_rule(&entry->rule);
 			if (unlikely(!rule))
 				break;
-			audit_send_reply(pid, seq, AUDIT_LIST, 0, 1,
+			skb = audit_make_reply(pid, seq, AUDIT_LIST, 0, 1,
 					 rule, sizeof(*rule));
+			if (skb)
+				skb_queue_tail(q, skb);
 			kfree(rule);
 		}
 	}
-	audit_send_reply(pid, seq, AUDIT_LIST, 1, 1, NULL, 0);
-	
-	mutex_unlock(&audit_netlink_mutex);
-	return 0;
+	for (i = 0; i < AUDIT_INODE_BUCKETS; i++) {
+		list_for_each_entry(entry, &audit_inode_hash[i], list) {
+			struct audit_rule *rule;
+
+			rule = audit_krule_to_rule(&entry->rule);
+			if (unlikely(!rule))
+				break;
+			skb = audit_make_reply(pid, seq, AUDIT_LIST, 0, 1,
+					 rule, sizeof(*rule));
+			if (skb)
+				skb_queue_tail(q, skb);
+			kfree(rule);
+		}
+	}
+	skb = audit_make_reply(pid, seq, AUDIT_LIST, 1, 1, NULL, 0);
+	if (skb)
+		skb_queue_tail(q, skb);
 }
 
 /* List rules using struct audit_rule_data. */
-static int audit_list_rules(void *_dest)
+static void audit_list_rules(int pid, int seq, struct sk_buff_head *q)
 {
-	int pid, seq;
-	int *dest = _dest;
+	struct sk_buff *skb;
 	struct audit_entry *e;
 	int i;
 
-	pid = dest[0];
-	seq = dest[1];
-	kfree(dest);
-
-	mutex_lock(&audit_netlink_mutex);
-
-	/* The *_rcu iterators not needed here because we are
-	   always called with audit_netlink_mutex held. */
+	/* This is a blocking read, so use audit_filter_mutex instead of rcu
+	 * iterator to sync with list writers. */
 	for (i=0; i<AUDIT_NR_FILTERS; i++) {
 		list_for_each_entry(e, &audit_filter_list[i], list) {
 			struct audit_rule_data *data;
@@ -566,15 +1219,30 @@ static int audit_list_rules(void *_dest)
 			data = audit_krule_to_data(&e->rule);
 			if (unlikely(!data))
 				break;
-			audit_send_reply(pid, seq, AUDIT_LIST_RULES, 0, 1,
-					 data, sizeof(*data));
+			skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 0, 1,
+					 data, sizeof(*data) + data->buflen);
+			if (skb)
+				skb_queue_tail(q, skb);
 			kfree(data);
 		}
 	}
-	audit_send_reply(pid, seq, AUDIT_LIST_RULES, 1, 1, NULL, 0);
+	for (i=0; i< AUDIT_INODE_BUCKETS; i++) {
+		list_for_each_entry(e, &audit_inode_hash[i], list) {
+			struct audit_rule_data *data;
 
-	mutex_unlock(&audit_netlink_mutex);
-	return 0;
+			data = audit_krule_to_data(&e->rule);
+			if (unlikely(!data))
+				break;
+			skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 0, 1,
+					 data, sizeof(*data) + data->buflen);
+			if (skb)
+				skb_queue_tail(q, skb);
+			kfree(data);
+		}
+	}
+	skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 1, 1, NULL, 0);
+	if (skb)
+		skb_queue_tail(q, skb);
 }
 
 /**
@@ -592,7 +1260,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
 			 size_t datasz, uid_t loginuid, u32 sid)
 {
 	struct task_struct *tsk;
-	int *dest;
+	struct audit_netlink_list *dest;
 	int err = 0;
 	struct audit_entry *entry;
 
@@ -605,18 +1273,22 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
 		 * happen if we're actually running in the context of auditctl
 		 * trying to _send_ the stuff */
 		 
-		dest = kmalloc(2 * sizeof(int), GFP_KERNEL);
+		dest = kmalloc(sizeof(struct audit_netlink_list), GFP_KERNEL);
 		if (!dest)
 			return -ENOMEM;
-		dest[0] = pid;
-		dest[1] = seq;
+		dest->pid = pid;
+		skb_queue_head_init(&dest->q);
 
+		mutex_lock(&audit_filter_mutex);
 		if (type == AUDIT_LIST)
-			tsk = kthread_run(audit_list, dest, "audit_list");
+			audit_list(pid, seq, &dest->q);
 		else
-			tsk = kthread_run(audit_list_rules, dest,
-					  "audit_list_rules");
+			audit_list_rules(pid, seq, &dest->q);
+		mutex_unlock(&audit_filter_mutex);
+
+		tsk = kthread_run(audit_send_list, dest, "audit_send_list");
 		if (IS_ERR(tsk)) {
+			skb_queue_purge(&dest->q);
 			kfree(dest);
 			err = PTR_ERR(tsk);
 		}
@@ -632,6 +1304,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
 
 		err = audit_add_rule(entry,
 				     &audit_filter_list[entry->rule.listnr]);
+
 		if (sid) {
 			char *ctx = NULL;
 			u32 len;
@@ -712,7 +1385,43 @@ int audit_comparator(const u32 left, const u32 op, const u32 right)
 	return 0;
 }
 
+/* Compare given dentry name with last component in given path,
+ * return of 0 indicates a match. */
+int audit_compare_dname_path(const char *dname, const char *path,
+			     int *dirlen)
+{
+	int dlen, plen;
+	const char *p;
 
+	if (!dname || !path)
+		return 1;
+
+	dlen = strlen(dname);
+	plen = strlen(path);
+	if (plen < dlen)
+		return 1;
+
+	/* disregard trailing slashes */
+	p = path + plen - 1;
+	while ((*p == '/') && (p > path))
+		p--;
+
+	/* find last path component */
+	p = p - dlen + 1;
+	if (p < path)
+		return 1;
+	else if (p > path) {
+		if (*--p != '/')
+			return 1;
+		else
+			p++;
+	}
+
+	/* return length of path's directory component */
+	if (dirlen)
+		*dirlen = p - path;
+	return strncmp(p, dname, dlen);
+}
 
 static int audit_filter_user_rules(struct netlink_skb_parms *cb,
 				   struct audit_krule *rule,
@@ -744,7 +1453,6 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb,
 	}
 	switch (rule->action) {
 	case AUDIT_NEVER:    *state = AUDIT_DISABLED;	    break;
-	case AUDIT_POSSIBLE: *state = AUDIT_BUILD_CONTEXT;  break;
 	case AUDIT_ALWAYS:   *state = AUDIT_RECORD_CONTEXT; break;
 	}
 	return 1;
@@ -826,32 +1534,65 @@ static inline int audit_rule_has_selinux(struct audit_krule *rule)
 int selinux_audit_rule_update(void)
 {
 	struct audit_entry *entry, *n, *nentry;
+	struct audit_watch *watch;
 	int i, err = 0;
 
-	/* audit_netlink_mutex synchronizes the writers */
-	mutex_lock(&audit_netlink_mutex);
+	/* audit_filter_mutex synchronizes the writers */
+	mutex_lock(&audit_filter_mutex);
 
 	for (i = 0; i < AUDIT_NR_FILTERS; i++) {
 		list_for_each_entry_safe(entry, n, &audit_filter_list[i], list) {
 			if (!audit_rule_has_selinux(&entry->rule))
 				continue;
 
-			nentry = audit_dupe_rule(&entry->rule);
+			watch = entry->rule.watch;
+			nentry = audit_dupe_rule(&entry->rule, watch);
 			if (unlikely(IS_ERR(nentry))) {
 				/* save the first error encountered for the
 				 * return value */
 				if (!err)
 					err = PTR_ERR(nentry);
 				audit_panic("error updating selinux filters");
+				if (watch)
+					list_del(&entry->rule.rlist);
 				list_del_rcu(&entry->list);
 			} else {
+				if (watch) {
+					list_add(&nentry->rule.rlist,
+						 &watch->rules);
+					list_del(&entry->rule.rlist);
+				}
 				list_replace_rcu(&entry->list, &nentry->list);
 			}
 			call_rcu(&entry->rcu, audit_free_rule_rcu);
 		}
 	}
 
-	mutex_unlock(&audit_netlink_mutex);
+	mutex_unlock(&audit_filter_mutex);
 
 	return err;
 }
+
+/* Update watch data in audit rules based on inotify events. */
+void audit_handle_ievent(struct inotify_watch *i_watch, u32 wd, u32 mask,
+			 u32 cookie, const char *dname, struct inode *inode)
+{
+	struct audit_parent *parent;
+
+	parent = container_of(i_watch, struct audit_parent, wdata);
+
+	if (mask & (IN_CREATE|IN_MOVED_TO) && inode)
+		audit_update_watch(parent, dname, inode->i_sb->s_dev,
+				   inode->i_ino, 0);
+	else if (mask & (IN_DELETE|IN_MOVED_FROM))
+		audit_update_watch(parent, dname, (dev_t)-1, (unsigned long)-1, 1);
+	/* inotify automatically removes the watch and sends IN_IGNORED */
+	else if (mask & (IN_DELETE_SELF|IN_UNMOUNT))
+		audit_remove_parent_watches(parent);
+	/* inotify does not remove the watch, so remove it manually */
+	else if(mask & IN_MOVE_SELF) {
+		audit_remove_parent_watches(parent);
+		inotify_remove_watch_locked(audit_ih, i_watch);
+	} else if (mask & IN_IGNORED)
+		put_inotify_watch(i_watch);
+}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 1c03a4ed1b27..9ebd96fda295 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -3,7 +3,7 @@
  *
  * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
  * Copyright 2005 Hewlett-Packard Development Company, L.P.
- * Copyright (C) 2005 IBM Corporation
+ * Copyright (C) 2005, 2006 IBM Corporation
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify
@@ -29,6 +29,9 @@
  * this file -- see entry.S) is based on a GPL'd patch written by
  * okir@suse.de and Copyright 2003 SuSE Linux AG.
  *
+ * POSIX message queue support added by George Wilson <ltcgcw@us.ibm.com>,
+ * 2006.
+ *
  * The support of additional filter rules compares (>, <, >=, <=) was
  * added by Dustin Kirkland <dustin.kirkland@us.ibm.com>, 2005.
  *
@@ -49,6 +52,7 @@
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/socket.h>
+#include <linux/mqueue.h>
 #include <linux/audit.h>
 #include <linux/personality.h>
 #include <linux/time.h>
@@ -59,6 +63,8 @@
 #include <linux/list.h>
 #include <linux/tty.h>
 #include <linux/selinux.h>
+#include <linux/binfmts.h>
+#include <linux/syscalls.h>
 
 #include "audit.h"
 
@@ -76,6 +82,9 @@ extern int audit_enabled;
  * path_lookup. */
 #define AUDIT_NAMES_RESERVED 7
 
+/* Indicates that audit should log the full pathname. */
+#define AUDIT_NAME_FULL -1
+
 /* When fs/namei.c:getname() is called, we store the pointer in name and
  * we don't let putname() free it (instead we free all of the saved
  * pointers at syscall exit time).
@@ -83,8 +92,9 @@ extern int audit_enabled;
  * Further, in fs/namei.c:path_lookup() we store the inode and device. */
 struct audit_names {
 	const char	*name;
+	int		name_len;	/* number of name's characters to log */
+	unsigned	name_put;	/* call __putname() for this name */
 	unsigned long	ino;
-	unsigned long	pino;
 	dev_t		dev;
 	umode_t		mode;
 	uid_t		uid;
@@ -100,6 +110,33 @@ struct audit_aux_data {
 
 #define AUDIT_AUX_IPCPERM	0
 
+struct audit_aux_data_mq_open {
+	struct audit_aux_data	d;
+	int			oflag;
+	mode_t			mode;
+	struct mq_attr		attr;
+};
+
+struct audit_aux_data_mq_sendrecv {
+	struct audit_aux_data	d;
+	mqd_t			mqdes;
+	size_t			msg_len;
+	unsigned int		msg_prio;
+	struct timespec		abs_timeout;
+};
+
+struct audit_aux_data_mq_notify {
+	struct audit_aux_data	d;
+	mqd_t			mqdes;
+	struct sigevent 	notification;
+};
+
+struct audit_aux_data_mq_getsetattr {
+	struct audit_aux_data	d;
+	mqd_t			mqdes;
+	struct mq_attr 		mqstat;
+};
+
 struct audit_aux_data_ipcctl {
 	struct audit_aux_data	d;
 	struct ipc_perm		p;
@@ -110,6 +147,13 @@ struct audit_aux_data_ipcctl {
 	u32			osid;
 };
 
+struct audit_aux_data_execve {
+	struct audit_aux_data	d;
+	int argc;
+	int envc;
+	char mem[0];
+};
+
 struct audit_aux_data_socketcall {
 	struct audit_aux_data	d;
 	int			nargs;
@@ -148,7 +192,7 @@ struct audit_context {
 	struct audit_aux_data *aux;
 
 				/* Save things to print about task_struct */
-	pid_t		    pid;
+	pid_t		    pid, ppid;
 	uid_t		    uid, euid, suid, fsuid;
 	gid_t		    gid, egid, sgid, fsgid;
 	unsigned long	    personality;
@@ -160,12 +204,13 @@ struct audit_context {
 #endif
 };
 
-
+/* Determine if any context name data matches a rule's watch data */
 /* Compare a task_struct with an audit_rule.  Return 1 on match, 0
  * otherwise. */
 static int audit_filter_rules(struct task_struct *tsk,
 			      struct audit_krule *rule,
 			      struct audit_context *ctx,
+			      struct audit_names *name,
 			      enum audit_state *state)
 {
 	int i, j, need_sid = 1;
@@ -179,6 +224,10 @@ static int audit_filter_rules(struct task_struct *tsk,
 		case AUDIT_PID:
 			result = audit_comparator(tsk->pid, f->op, f->val);
 			break;
+		case AUDIT_PPID:
+			if (ctx)
+				result = audit_comparator(ctx->ppid, f->op, f->val);
+			break;
 		case AUDIT_UID:
 			result = audit_comparator(tsk->uid, f->op, f->val);
 			break;
@@ -224,7 +273,10 @@ static int audit_filter_rules(struct task_struct *tsk,
 			}
 			break;
 		case AUDIT_DEVMAJOR:
-			if (ctx) {
+			if (name)
+				result = audit_comparator(MAJOR(name->dev),
+							  f->op, f->val);
+			else if (ctx) {
 				for (j = 0; j < ctx->name_count; j++) {
 					if (audit_comparator(MAJOR(ctx->names[j].dev),	f->op, f->val)) {
 						++result;
@@ -234,7 +286,10 @@ static int audit_filter_rules(struct task_struct *tsk,
 			}
 			break;
 		case AUDIT_DEVMINOR:
-			if (ctx) {
+			if (name)
+				result = audit_comparator(MINOR(name->dev),
+							  f->op, f->val);
+			else if (ctx) {
 				for (j = 0; j < ctx->name_count; j++) {
 					if (audit_comparator(MINOR(ctx->names[j].dev), f->op, f->val)) {
 						++result;
@@ -244,16 +299,22 @@ static int audit_filter_rules(struct task_struct *tsk,
 			}
 			break;
 		case AUDIT_INODE:
-			if (ctx) {
+			if (name)
+				result = (name->ino == f->val);
+			else if (ctx) {
 				for (j = 0; j < ctx->name_count; j++) {
-					if (audit_comparator(ctx->names[j].ino, f->op, f->val) ||
-					    audit_comparator(ctx->names[j].pino, f->op, f->val)) {
+					if (audit_comparator(ctx->names[j].ino, f->op, f->val)) {
 						++result;
 						break;
 					}
 				}
 			}
 			break;
+		case AUDIT_WATCH:
+			if (name && rule->watch->ino != (unsigned long)-1)
+				result = (name->dev == rule->watch->dev &&
+					  name->ino == rule->watch->ino);
+			break;
 		case AUDIT_LOGINUID:
 			result = 0;
 			if (ctx)
@@ -294,7 +355,6 @@ static int audit_filter_rules(struct task_struct *tsk,
 	}
 	switch (rule->action) {
 	case AUDIT_NEVER:    *state = AUDIT_DISABLED;	    break;
-	case AUDIT_POSSIBLE: *state = AUDIT_BUILD_CONTEXT;  break;
 	case AUDIT_ALWAYS:   *state = AUDIT_RECORD_CONTEXT; break;
 	}
 	return 1;
@@ -311,7 +371,7 @@ static enum audit_state audit_filter_task(struct task_struct *tsk)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_TASK], list) {
-		if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
+		if (audit_filter_rules(tsk, &e->rule, NULL, NULL, &state)) {
 			rcu_read_unlock();
 			return state;
 		}
@@ -341,8 +401,47 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
 		int bit  = AUDIT_BIT(ctx->major);
 
 		list_for_each_entry_rcu(e, list, list) {
-			if ((e->rule.mask[word] & bit) == bit
-					&& audit_filter_rules(tsk, &e->rule, ctx, &state)) {
+			if ((e->rule.mask[word] & bit) == bit &&
+			    audit_filter_rules(tsk, &e->rule, ctx, NULL,
+					       &state)) {
+				rcu_read_unlock();
+				return state;
+			}
+		}
+	}
+	rcu_read_unlock();
+	return AUDIT_BUILD_CONTEXT;
+}
+
+/* At syscall exit time, this filter is called if any audit_names[] have been
+ * collected during syscall processing.  We only check rules in sublists at hash
+ * buckets applicable to the inode numbers in audit_names[].
+ * Regarding audit_state, same rules apply as for audit_filter_syscall().
+ */
+enum audit_state audit_filter_inodes(struct task_struct *tsk,
+				     struct audit_context *ctx)
+{
+	int i;
+	struct audit_entry *e;
+	enum audit_state state;
+
+	if (audit_pid && tsk->tgid == audit_pid)
+		return AUDIT_DISABLED;
+
+	rcu_read_lock();
+	for (i = 0; i < ctx->name_count; i++) {
+		int word = AUDIT_WORD(ctx->major);
+		int bit  = AUDIT_BIT(ctx->major);
+		struct audit_names *n = &ctx->names[i];
+		int h = audit_hash_ino((u32)n->ino);
+		struct list_head *list = &audit_inode_hash[h];
+
+		if (list_empty(list))
+			continue;
+
+		list_for_each_entry_rcu(e, list, list) {
+			if ((e->rule.mask[word] & bit) == bit &&
+			    audit_filter_rules(tsk, &e->rule, ctx, n, &state)) {
 				rcu_read_unlock();
 				return state;
 			}
@@ -352,6 +451,11 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
 	return AUDIT_BUILD_CONTEXT;
 }
 
+void audit_set_auditable(struct audit_context *ctx)
+{
+	ctx->auditable = 1;
+}
+
 static inline struct audit_context *audit_get_context(struct task_struct *tsk,
 						      int return_valid,
 						      int return_code)
@@ -365,12 +469,22 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk,
 
 	if (context->in_syscall && !context->auditable) {
 		enum audit_state state;
+
 		state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]);
+		if (state == AUDIT_RECORD_CONTEXT) {
+			context->auditable = 1;
+			goto get_context;
+		}
+
+		state = audit_filter_inodes(tsk, context);
 		if (state == AUDIT_RECORD_CONTEXT)
 			context->auditable = 1;
+
 	}
 
+get_context:
 	context->pid = tsk->pid;
+	context->ppid = sys_getppid();	/* sic.  tsk == current in all cases */
 	context->uid = tsk->uid;
 	context->gid = tsk->gid;
 	context->euid = tsk->euid;
@@ -413,7 +527,7 @@ static inline void audit_free_names(struct audit_context *context)
 #endif
 
 	for (i = 0; i < context->name_count; i++) {
-		if (context->names[i].name)
+		if (context->names[i].name && context->names[i].name_put)
 			__putname(context->names[i].name);
 	}
 	context->name_count = 0;
@@ -606,7 +720,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 		tty = "(none)";
 	audit_log_format(ab,
 		  " a0=%lx a1=%lx a2=%lx a3=%lx items=%d"
-		  " pid=%d auid=%u uid=%u gid=%u"
+		  " ppid=%d pid=%d auid=%u uid=%u gid=%u"
 		  " euid=%u suid=%u fsuid=%u"
 		  " egid=%u sgid=%u fsgid=%u tty=%s",
 		  context->argv[0],
@@ -614,6 +728,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 		  context->argv[2],
 		  context->argv[3],
 		  context->name_count,
+		  context->ppid,
 		  context->pid,
 		  context->loginuid,
 		  context->uid,
@@ -630,11 +745,48 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 			continue; /* audit_panic has been called */
 
 		switch (aux->type) {
+		case AUDIT_MQ_OPEN: {
+			struct audit_aux_data_mq_open *axi = (void *)aux;
+			audit_log_format(ab,
+				"oflag=0x%x mode=%#o mq_flags=0x%lx mq_maxmsg=%ld "
+				"mq_msgsize=%ld mq_curmsgs=%ld",
+				axi->oflag, axi->mode, axi->attr.mq_flags,
+				axi->attr.mq_maxmsg, axi->attr.mq_msgsize,
+				axi->attr.mq_curmsgs);
+			break; }
+
+		case AUDIT_MQ_SENDRECV: {
+			struct audit_aux_data_mq_sendrecv *axi = (void *)aux;
+			audit_log_format(ab,
+				"mqdes=%d msg_len=%zd msg_prio=%u "
+				"abs_timeout_sec=%ld abs_timeout_nsec=%ld",
+				axi->mqdes, axi->msg_len, axi->msg_prio,
+				axi->abs_timeout.tv_sec, axi->abs_timeout.tv_nsec);
+			break; }
+
+		case AUDIT_MQ_NOTIFY: {
+			struct audit_aux_data_mq_notify *axi = (void *)aux;
+			audit_log_format(ab,
+				"mqdes=%d sigev_signo=%d",
+				axi->mqdes,
+				axi->notification.sigev_signo);
+			break; }
+
+		case AUDIT_MQ_GETSETATTR: {
+			struct audit_aux_data_mq_getsetattr *axi = (void *)aux;
+			audit_log_format(ab,
+				"mqdes=%d mq_flags=0x%lx mq_maxmsg=%ld mq_msgsize=%ld "
+				"mq_curmsgs=%ld ",
+				axi->mqdes,
+				axi->mqstat.mq_flags, axi->mqstat.mq_maxmsg,
+				axi->mqstat.mq_msgsize, axi->mqstat.mq_curmsgs);
+			break; }
+
 		case AUDIT_IPC: {
 			struct audit_aux_data_ipcctl *axi = (void *)aux;
 			audit_log_format(ab, 
-				 " qbytes=%lx iuid=%u igid=%u mode=%x",
-				 axi->qbytes, axi->uid, axi->gid, axi->mode);
+				 "ouid=%u ogid=%u mode=%x",
+				 axi->uid, axi->gid, axi->mode);
 			if (axi->osid != 0) {
 				char *ctx = NULL;
 				u32 len;
@@ -652,19 +804,18 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 		case AUDIT_IPC_SET_PERM: {
 			struct audit_aux_data_ipcctl *axi = (void *)aux;
 			audit_log_format(ab,
-				" new qbytes=%lx new iuid=%u new igid=%u new mode=%x",
+				"qbytes=%lx ouid=%u ogid=%u mode=%x",
 				axi->qbytes, axi->uid, axi->gid, axi->mode);
-			if (axi->osid != 0) {
-				char *ctx = NULL;
-				u32 len;
-				if (selinux_ctxid_to_string(
-						axi->osid, &ctx, &len)) {
-					audit_log_format(ab, " osid=%u",
-							axi->osid);
-					call_panic = 1;
-				} else
-					audit_log_format(ab, " obj=%s", ctx);
-				kfree(ctx);
+			break; }
+
+		case AUDIT_EXECVE: {
+			struct audit_aux_data_execve *axi = (void *)aux;
+			int i;
+			const char *p;
+			for (i = 0, p = axi->mem; i < axi->argc; i++) {
+				audit_log_format(ab, "a%d=", i);
+				p = audit_log_untrustedstring(ab, p);
+				audit_log_format(ab, "\n");
 			}
 			break; }
 
@@ -700,8 +851,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 		}
 	}
 	for (i = 0; i < context->name_count; i++) {
-		unsigned long ino  = context->names[i].ino;
-		unsigned long pino = context->names[i].pino;
+		struct audit_names *n = &context->names[i];
 
 		ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
 		if (!ab)
@@ -709,33 +859,47 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 
 		audit_log_format(ab, "item=%d", i);
 
-		audit_log_format(ab, " name=");
-		if (context->names[i].name)
-			audit_log_untrustedstring(ab, context->names[i].name);
-		else
-			audit_log_format(ab, "(null)");
-
-		if (pino != (unsigned long)-1)
-			audit_log_format(ab, " parent=%lu",  pino);
-		if (ino != (unsigned long)-1)
-			audit_log_format(ab, " inode=%lu",  ino);
-		if ((pino != (unsigned long)-1) || (ino != (unsigned long)-1))
-			audit_log_format(ab, " dev=%02x:%02x mode=%#o" 
-					 " ouid=%u ogid=%u rdev=%02x:%02x", 
-					 MAJOR(context->names[i].dev), 
-					 MINOR(context->names[i].dev), 
-					 context->names[i].mode, 
-					 context->names[i].uid, 
-					 context->names[i].gid, 
-					 MAJOR(context->names[i].rdev), 
-					 MINOR(context->names[i].rdev));
-		if (context->names[i].osid != 0) {
+		if (n->name) {
+			switch(n->name_len) {
+			case AUDIT_NAME_FULL:
+				/* log the full path */
+				audit_log_format(ab, " name=");
+				audit_log_untrustedstring(ab, n->name);
+				break;
+			case 0:
+				/* name was specified as a relative path and the
+				 * directory component is the cwd */
+				audit_log_d_path(ab, " name=", context->pwd,
+						 context->pwdmnt);
+				break;
+			default:
+				/* log the name's directory component */
+				audit_log_format(ab, " name=");
+				audit_log_n_untrustedstring(ab, n->name_len,
+							    n->name);
+			}
+		} else
+			audit_log_format(ab, " name=(null)");
+
+		if (n->ino != (unsigned long)-1) {
+			audit_log_format(ab, " inode=%lu"
+					 " dev=%02x:%02x mode=%#o"
+					 " ouid=%u ogid=%u rdev=%02x:%02x",
+					 n->ino,
+					 MAJOR(n->dev),
+					 MINOR(n->dev),
+					 n->mode,
+					 n->uid,
+					 n->gid,
+					 MAJOR(n->rdev),
+					 MINOR(n->rdev));
+		}
+		if (n->osid != 0) {
 			char *ctx = NULL;
 			u32 len;
 			if (selinux_ctxid_to_string(
-				context->names[i].osid, &ctx, &len)) {
-				audit_log_format(ab, " osid=%u",
-						context->names[i].osid);
+				n->osid, &ctx, &len)) {
+				audit_log_format(ab, " osid=%u", n->osid);
 				call_panic = 2;
 			} else
 				audit_log_format(ab, " obj=%s", ctx);
@@ -908,11 +1072,11 @@ void audit_syscall_exit(int valid, long return_code)
  * Add a name to the list of audit names for this context.
  * Called from fs/namei.c:getname().
  */
-void audit_getname(const char *name)
+void __audit_getname(const char *name)
 {
 	struct audit_context *context = current->audit_context;
 
-	if (!context || IS_ERR(name) || !name)
+	if (IS_ERR(name) || !name)
 		return;
 
 	if (!context->in_syscall) {
@@ -925,6 +1089,8 @@ void audit_getname(const char *name)
 	}
 	BUG_ON(context->name_count >= AUDIT_NAMES);
 	context->names[context->name_count].name = name;
+	context->names[context->name_count].name_len = AUDIT_NAME_FULL;
+	context->names[context->name_count].name_put = 1;
 	context->names[context->name_count].ino  = (unsigned long)-1;
 	++context->name_count;
 	if (!context->pwd) {
@@ -991,11 +1157,10 @@ static void audit_inode_context(int idx, const struct inode *inode)
  * audit_inode - store the inode and device from a lookup
  * @name: name being audited
  * @inode: inode being audited
- * @flags: lookup flags (as used in path_lookup())
  *
  * Called from fs/namei.c:path_lookup().
  */
-void __audit_inode(const char *name, const struct inode *inode, unsigned flags)
+void __audit_inode(const char *name, const struct inode *inode)
 {
 	int idx;
 	struct audit_context *context = current->audit_context;
@@ -1021,20 +1186,13 @@ void __audit_inode(const char *name, const struct inode *inode, unsigned flags)
 		++context->ino_count;
 #endif
 	}
+	context->names[idx].ino   = inode->i_ino;
 	context->names[idx].dev	  = inode->i_sb->s_dev;
 	context->names[idx].mode  = inode->i_mode;
 	context->names[idx].uid   = inode->i_uid;
 	context->names[idx].gid   = inode->i_gid;
 	context->names[idx].rdev  = inode->i_rdev;
 	audit_inode_context(idx, inode);
-	if ((flags & LOOKUP_PARENT) && (strcmp(name, "/") != 0) && 
-	    (strcmp(name, ".") != 0)) {
-		context->names[idx].ino   = (unsigned long)-1;
-		context->names[idx].pino  = inode->i_ino;
-	} else {
-		context->names[idx].ino   = inode->i_ino;
-		context->names[idx].pino  = (unsigned long)-1;
-	}
 }
 
 /**
@@ -1056,51 +1214,40 @@ void __audit_inode_child(const char *dname, const struct inode *inode,
 {
 	int idx;
 	struct audit_context *context = current->audit_context;
+	const char *found_name = NULL;
+	int dirlen = 0;
 
 	if (!context->in_syscall)
 		return;
 
 	/* determine matching parent */
-	if (dname)
-		for (idx = 0; idx < context->name_count; idx++)
-			if (context->names[idx].pino == pino) {
-				const char *n;
-				const char *name = context->names[idx].name;
-				int dlen = strlen(dname);
-				int nlen = name ? strlen(name) : 0;
-
-				if (nlen < dlen)
-					continue;
-				
-				/* disregard trailing slashes */
-				n = name + nlen - 1;
-				while ((*n == '/') && (n > name))
-					n--;
-
-				/* find last path component */
-				n = n - dlen + 1;
-				if (n < name)
-					continue;
-				else if (n > name) {
-					if (*--n != '/')
-						continue;
-					else
-						n++;
-				}
-
-				if (strncmp(n, dname, dlen) == 0)
-					goto update_context;
+	if (!dname)
+		goto update_context;
+	for (idx = 0; idx < context->name_count; idx++)
+		if (context->names[idx].ino == pino) {
+			const char *name = context->names[idx].name;
+
+			if (!name)
+				continue;
+
+			if (audit_compare_dname_path(dname, name, &dirlen) == 0) {
+				context->names[idx].name_len = dirlen;
+				found_name = name;
+				break;
 			}
+		}
 
-	/* catch-all in case match not found */
+update_context:
 	idx = context->name_count++;
-	context->names[idx].name  = NULL;
-	context->names[idx].pino  = pino;
 #if AUDIT_DEBUG
 	context->ino_count++;
 #endif
+	/* Re-use the name belonging to the slot for a matching parent directory.
+	 * All names for this context are relinquished in audit_free_names() */
+	context->names[idx].name = found_name;
+	context->names[idx].name_len = AUDIT_NAME_FULL;
+	context->names[idx].name_put = 0;	/* don't call __putname() */
 
-update_context:
 	if (inode) {
 		context->names[idx].ino   = inode->i_ino;
 		context->names[idx].dev	  = inode->i_sb->s_dev;
@@ -1109,7 +1256,8 @@ update_context:
 		context->names[idx].gid   = inode->i_gid;
 		context->names[idx].rdev  = inode->i_rdev;
 		audit_inode_context(idx, inode);
-	}
+	} else
+		context->names[idx].ino   = (unsigned long)-1;
 }
 
 /**
@@ -1142,18 +1290,23 @@ void auditsc_get_stamp(struct audit_context *ctx,
  */
 int audit_set_loginuid(struct task_struct *task, uid_t loginuid)
 {
-	if (task->audit_context) {
-		struct audit_buffer *ab;
-
-		ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
-		if (ab) {
-			audit_log_format(ab, "login pid=%d uid=%u "
-				"old auid=%u new auid=%u",
-				task->pid, task->uid, 
-				task->audit_context->loginuid, loginuid);
-			audit_log_end(ab);
+	struct audit_context *context = task->audit_context;
+
+	if (context) {
+		/* Only log if audit is enabled */
+		if (context->in_syscall) {
+			struct audit_buffer *ab;
+
+			ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
+			if (ab) {
+				audit_log_format(ab, "login pid=%d uid=%u "
+					"old auid=%u new auid=%u",
+					task->pid, task->uid,
+					context->loginuid, loginuid);
+				audit_log_end(ab);
+			}
 		}
-		task->audit_context->loginuid = loginuid;
+		context->loginuid = loginuid;
 	}
 	return 0;
 }
@@ -1170,16 +1323,193 @@ uid_t audit_get_loginuid(struct audit_context *ctx)
 }
 
 /**
- * audit_ipc_obj - record audit data for ipc object
- * @ipcp: ipc permissions
+ * __audit_mq_open - record audit data for a POSIX MQ open
+ * @oflag: open flag
+ * @mode: mode bits
+ * @u_attr: queue attributes
  *
  * Returns 0 for success or NULL context or < 0 on error.
  */
-int audit_ipc_obj(struct kern_ipc_perm *ipcp)
+int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr)
 {
-	struct audit_aux_data_ipcctl *ax;
+	struct audit_aux_data_mq_open *ax;
+	struct audit_context *context = current->audit_context;
+
+	if (!audit_enabled)
+		return 0;
+
+	if (likely(!context))
+		return 0;
+
+	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
+	if (!ax)
+		return -ENOMEM;
+
+	if (u_attr != NULL) {
+		if (copy_from_user(&ax->attr, u_attr, sizeof(ax->attr))) {
+			kfree(ax);
+			return -EFAULT;
+		}
+	} else
+		memset(&ax->attr, 0, sizeof(ax->attr));
+
+	ax->oflag = oflag;
+	ax->mode = mode;
+
+	ax->d.type = AUDIT_MQ_OPEN;
+	ax->d.next = context->aux;
+	context->aux = (void *)ax;
+	return 0;
+}
+
+/**
+ * __audit_mq_timedsend - record audit data for a POSIX MQ timed send
+ * @mqdes: MQ descriptor
+ * @msg_len: Message length
+ * @msg_prio: Message priority
+ * @abs_timeout: Message timeout in absolute time
+ *
+ * Returns 0 for success or NULL context or < 0 on error.
+ */
+int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio,
+			const struct timespec __user *u_abs_timeout)
+{
+	struct audit_aux_data_mq_sendrecv *ax;
+	struct audit_context *context = current->audit_context;
+
+	if (!audit_enabled)
+		return 0;
+
+	if (likely(!context))
+		return 0;
+
+	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
+	if (!ax)
+		return -ENOMEM;
+
+	if (u_abs_timeout != NULL) {
+		if (copy_from_user(&ax->abs_timeout, u_abs_timeout, sizeof(ax->abs_timeout))) {
+			kfree(ax);
+			return -EFAULT;
+		}
+	} else
+		memset(&ax->abs_timeout, 0, sizeof(ax->abs_timeout));
+
+	ax->mqdes = mqdes;
+	ax->msg_len = msg_len;
+	ax->msg_prio = msg_prio;
+
+	ax->d.type = AUDIT_MQ_SENDRECV;
+	ax->d.next = context->aux;
+	context->aux = (void *)ax;
+	return 0;
+}
+
+/**
+ * __audit_mq_timedreceive - record audit data for a POSIX MQ timed receive
+ * @mqdes: MQ descriptor
+ * @msg_len: Message length
+ * @msg_prio: Message priority
+ * @abs_timeout: Message timeout in absolute time
+ *
+ * Returns 0 for success or NULL context or < 0 on error.
+ */
+int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len,
+				unsigned int __user *u_msg_prio,
+				const struct timespec __user *u_abs_timeout)
+{
+	struct audit_aux_data_mq_sendrecv *ax;
+	struct audit_context *context = current->audit_context;
+
+	if (!audit_enabled)
+		return 0;
+
+	if (likely(!context))
+		return 0;
+
+	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
+	if (!ax)
+		return -ENOMEM;
+
+	if (u_msg_prio != NULL) {
+		if (get_user(ax->msg_prio, u_msg_prio)) {
+			kfree(ax);
+			return -EFAULT;
+		}
+	} else
+		ax->msg_prio = 0;
+
+	if (u_abs_timeout != NULL) {
+		if (copy_from_user(&ax->abs_timeout, u_abs_timeout, sizeof(ax->abs_timeout))) {
+			kfree(ax);
+			return -EFAULT;
+		}
+	} else
+		memset(&ax->abs_timeout, 0, sizeof(ax->abs_timeout));
+
+	ax->mqdes = mqdes;
+	ax->msg_len = msg_len;
+
+	ax->d.type = AUDIT_MQ_SENDRECV;
+	ax->d.next = context->aux;
+	context->aux = (void *)ax;
+	return 0;
+}
+
+/**
+ * __audit_mq_notify - record audit data for a POSIX MQ notify
+ * @mqdes: MQ descriptor
+ * @u_notification: Notification event
+ *
+ * Returns 0 for success or NULL context or < 0 on error.
+ */
+
+int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification)
+{
+	struct audit_aux_data_mq_notify *ax;
+	struct audit_context *context = current->audit_context;
+
+	if (!audit_enabled)
+		return 0;
+
+	if (likely(!context))
+		return 0;
+
+	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
+	if (!ax)
+		return -ENOMEM;
+
+	if (u_notification != NULL) {
+		if (copy_from_user(&ax->notification, u_notification, sizeof(ax->notification))) {
+			kfree(ax);
+			return -EFAULT;
+		}
+	} else
+		memset(&ax->notification, 0, sizeof(ax->notification));
+
+	ax->mqdes = mqdes;
+
+	ax->d.type = AUDIT_MQ_NOTIFY;
+	ax->d.next = context->aux;
+	context->aux = (void *)ax;
+	return 0;
+}
+
+/**
+ * __audit_mq_getsetattr - record audit data for a POSIX MQ get/set attribute
+ * @mqdes: MQ descriptor
+ * @mqstat: MQ flags
+ *
+ * Returns 0 for success or NULL context or < 0 on error.
+ */
+int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
+{
+	struct audit_aux_data_mq_getsetattr *ax;
 	struct audit_context *context = current->audit_context;
 
+	if (!audit_enabled)
+		return 0;
+
 	if (likely(!context))
 		return 0;
 
@@ -1187,6 +1517,30 @@ int audit_ipc_obj(struct kern_ipc_perm *ipcp)
 	if (!ax)
 		return -ENOMEM;
 
+	ax->mqdes = mqdes;
+	ax->mqstat = *mqstat;
+
+	ax->d.type = AUDIT_MQ_GETSETATTR;
+	ax->d.next = context->aux;
+	context->aux = (void *)ax;
+	return 0;
+}
+
+/**
+ * audit_ipc_obj - record audit data for ipc object
+ * @ipcp: ipc permissions
+ *
+ * Returns 0 for success or NULL context or < 0 on error.
+ */
+int __audit_ipc_obj(struct kern_ipc_perm *ipcp)
+{
+	struct audit_aux_data_ipcctl *ax;
+	struct audit_context *context = current->audit_context;
+
+	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
+	if (!ax)
+		return -ENOMEM;
+
 	ax->uid = ipcp->uid;
 	ax->gid = ipcp->gid;
 	ax->mode = ipcp->mode;
@@ -1204,17 +1558,15 @@ int audit_ipc_obj(struct kern_ipc_perm *ipcp)
  * @uid: msgq user id
  * @gid: msgq group id
  * @mode: msgq mode (permissions)
+ * @ipcp: in-kernel IPC permissions
  *
  * Returns 0 for success or NULL context or < 0 on error.
  */
-int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp)
+int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode)
 {
 	struct audit_aux_data_ipcctl *ax;
 	struct audit_context *context = current->audit_context;
 
-	if (likely(!context))
-		return 0;
-
 	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
 	if (!ax)
 		return -ENOMEM;
@@ -1223,7 +1575,6 @@ int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode,
 	ax->uid = uid;
 	ax->gid = gid;
 	ax->mode = mode;
-	selinux_get_ipc_sid(ipcp, &ax->osid);
 
 	ax->d.type = AUDIT_IPC_SET_PERM;
 	ax->d.next = context->aux;
@@ -1231,6 +1582,39 @@ int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode,
 	return 0;
 }
 
+int audit_bprm(struct linux_binprm *bprm)
+{
+	struct audit_aux_data_execve *ax;
+	struct audit_context *context = current->audit_context;
+	unsigned long p, next;
+	void *to;
+
+	if (likely(!audit_enabled || !context))
+		return 0;
+
+	ax = kmalloc(sizeof(*ax) + PAGE_SIZE * MAX_ARG_PAGES - bprm->p,
+				GFP_KERNEL);
+	if (!ax)
+		return -ENOMEM;
+
+	ax->argc = bprm->argc;
+	ax->envc = bprm->envc;
+	for (p = bprm->p, to = ax->mem; p < MAX_ARG_PAGES*PAGE_SIZE; p = next) {
+		struct page *page = bprm->page[p / PAGE_SIZE];
+		void *kaddr = kmap(page);
+		next = (p + PAGE_SIZE) & ~(PAGE_SIZE - 1);
+		memcpy(to, kaddr + (p & (PAGE_SIZE - 1)), next - p);
+		to += next - p;
+		kunmap(page);
+	}
+
+	ax->d.type = AUDIT_EXECVE;
+	ax->d.next = context->aux;
+	context->aux = (void *)ax;
+	return 0;
+}
+
+
 /**
  * audit_socketcall - record audit data for sys_socketcall
  * @nargs: number of args
@@ -1325,19 +1709,20 @@ int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt)
  * If the audit subsystem is being terminated, record the task (pid)
  * and uid that is doing that.
  */
-void audit_signal_info(int sig, struct task_struct *t)
+void __audit_signal_info(int sig, struct task_struct *t)
 {
 	extern pid_t audit_sig_pid;
 	extern uid_t audit_sig_uid;
-
-	if (unlikely(audit_pid && t->tgid == audit_pid)) {
-		if (sig == SIGTERM || sig == SIGHUP) {
-			struct audit_context *ctx = current->audit_context;
-			audit_sig_pid = current->pid;
-			if (ctx)
-				audit_sig_uid = ctx->loginuid;
-			else
-				audit_sig_uid = current->uid;
-		}
+	extern u32 audit_sig_sid;
+
+	if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1) {
+		struct task_struct *tsk = current;
+		struct audit_context *ctx = tsk->audit_context;
+		audit_sig_pid = tsk->pid;
+		if (ctx)
+			audit_sig_uid = ctx->loginuid;
+		else
+			audit_sig_uid = tsk->uid;
+		selinux_get_task_sid(tsk, &audit_sig_sid);
 	}
 }
diff --git a/kernel/compat.c b/kernel/compat.c
index c1601a84f8d8..126dee9530aa 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -21,6 +21,7 @@
 #include <linux/unistd.h>
 #include <linux/security.h>
 #include <linux/timex.h>
+#include <linux/migrate.h>
 
 #include <asm/uaccess.h>
 
@@ -729,17 +730,10 @@ void
 sigset_from_compat (sigset_t *set, compat_sigset_t *compat)
 {
 	switch (_NSIG_WORDS) {
-#if defined (__COMPAT_ENDIAN_SWAP__)
-	case 4: set->sig[3] = compat->sig[7] | (((long)compat->sig[6]) << 32 );
-	case 3: set->sig[2] = compat->sig[5] | (((long)compat->sig[4]) << 32 );
-	case 2: set->sig[1] = compat->sig[3] | (((long)compat->sig[2]) << 32 );
-	case 1: set->sig[0] = compat->sig[1] | (((long)compat->sig[0]) << 32 );
-#else
 	case 4: set->sig[3] = compat->sig[6] | (((long)compat->sig[7]) << 32 );
 	case 3: set->sig[2] = compat->sig[4] | (((long)compat->sig[5]) << 32 );
 	case 2: set->sig[1] = compat->sig[2] | (((long)compat->sig[3]) << 32 );
 	case 1: set->sig[0] = compat->sig[0] | (((long)compat->sig[1]) << 32 );
-#endif
 	}
 }
 
@@ -934,3 +928,25 @@ asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp)
 
 	return ret;
 }
+
+#ifdef CONFIG_NUMA
+asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_pages,
+		compat_uptr_t __user *pages32,
+		const int __user *nodes,
+		int __user *status,
+		int flags)
+{
+	const void __user * __user *pages;
+	int i;
+
+	pages = compat_alloc_user_space(nr_pages * sizeof(void *));
+	for (i = 0; i < nr_pages; i++) {
+		compat_uptr_t p;
+
+		if (get_user(p, pages32 + i) ||
+			put_user(compat_ptr(p), pages + i))
+			return -EFAULT;
+	}
+	return sys_move_pages(pid, nr_pages, pages, nodes, status, flags);
+}
+#endif
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index ab81fdd4572b..b602f73fb38d 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -41,6 +41,7 @@
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
+#include <linux/security.h>
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/spinlock.h>
@@ -392,11 +393,11 @@ static int cpuset_fill_super(struct super_block *sb, void *unused_data,
 	return 0;
 }
 
-static struct super_block *cpuset_get_sb(struct file_system_type *fs_type,
-					int flags, const char *unused_dev_name,
-					void *data)
+static int cpuset_get_sb(struct file_system_type *fs_type,
+			 int flags, const char *unused_dev_name,
+			 void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, cpuset_fill_super);
+	return get_sb_single(fs_type, flags, data, cpuset_fill_super, mnt);
 }
 
 static struct file_system_type cpuset_fs_type = {
@@ -1177,6 +1178,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
 	cpumask_t cpus;
 	nodemask_t from, to;
 	struct mm_struct *mm;
+	int retval;
 
 	if (sscanf(pidbuf, "%d", &pid) != 1)
 		return -EIO;
@@ -1205,6 +1207,12 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
 		get_task_struct(tsk);
 	}
 
+	retval = security_task_setscheduler(tsk, 0, NULL);
+	if (retval) {
+		put_task_struct(tsk);
+		return retval;
+	}
+
 	mutex_lock(&callback_mutex);
 
 	task_lock(tsk);
diff --git a/kernel/exit.c b/kernel/exit.c
index e95b93282210..e76bd02e930e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -36,6 +36,7 @@
 #include <linux/compat.h>
 #include <linux/pipe_fs_i.h>
 #include <linux/audit.h> /* for audit_free() */
+#include <linux/resource.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -45,8 +46,6 @@
 extern void sem_exit (void);
 extern struct task_struct *child_reaper;
 
-int getrusage(struct task_struct *, int, struct rusage __user *);
-
 static void exit_mm(struct task_struct * tsk);
 
 static void __unhash_process(struct task_struct *p)
@@ -579,7 +578,7 @@ static void exit_mm(struct task_struct * tsk)
 		down_read(&mm->mmap_sem);
 	}
 	atomic_inc(&mm->mm_count);
-	if (mm != tsk->active_mm) BUG();
+	BUG_ON(mm != tsk->active_mm);
 	/* more a memory barrier than a real lock */
 	task_lock(tsk);
 	tsk->mm = NULL;
@@ -881,14 +880,6 @@ fastcall NORET_TYPE void do_exit(long code)
 
 	tsk->flags |= PF_EXITING;
 
-	/*
-	 * Make sure we don't try to process any timer firings
-	 * while we are already exiting.
-	 */
- 	tsk->it_virt_expires = cputime_zero;
- 	tsk->it_prof_expires = cputime_zero;
-	tsk->it_sched_expires = 0;
-
 	if (unlikely(in_atomic()))
 		printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
 				current->comm, current->pid,
@@ -903,11 +894,11 @@ fastcall NORET_TYPE void do_exit(long code)
 	if (group_dead) {
  		hrtimer_cancel(&tsk->signal->real_timer);
 		exit_itimers(tsk->signal);
-		acct_process(code);
 	}
+	acct_collect(code, group_dead);
 	if (unlikely(tsk->robust_list))
 		exit_robust_list(tsk);
-#ifdef CONFIG_COMPAT
+#if defined(CONFIG_FUTEX) && defined(CONFIG_COMPAT)
 	if (unlikely(tsk->compat_robust_list))
 		compat_exit_robust_list(tsk);
 #endif
@@ -915,6 +906,8 @@ fastcall NORET_TYPE void do_exit(long code)
 		audit_free(tsk);
 	exit_mm(tsk);
 
+	if (group_dead)
+		acct_process();
 	exit_sem(tsk);
 	__exit_files(tsk);
 	__exit_fs(tsk);
@@ -1538,8 +1531,7 @@ check_continued:
 		if (options & __WNOTHREAD)
 			break;
 		tsk = next_thread(tsk);
-		if (tsk->signal != current->signal)
-			BUG();
+		BUG_ON(tsk->signal != current->signal);
 	} while (tsk != current);
 
 	read_unlock(&tasklist_lock);
diff --git a/kernel/fork.c b/kernel/fork.c
index ac8100e3088a..dfd10cb370c3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -368,6 +368,8 @@ void fastcall __mmdrop(struct mm_struct *mm)
  */
 void mmput(struct mm_struct *mm)
 {
+	might_sleep();
+
 	if (atomic_dec_and_test(&mm->mm_users)) {
 		exit_aio(mm);
 		exit_mmap(mm);
@@ -623,6 +625,7 @@ out:
 /*
  * Allocate a new files structure and copy contents from the
  * passed in files structure.
+ * errorp will be valid only when the returned files_struct is NULL.
  */
 static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 {
@@ -631,6 +634,7 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 	int open_files, size, i, expand;
 	struct fdtable *old_fdt, *new_fdt;
 
+	*errorp = -ENOMEM;
 	newf = alloc_files();
 	if (!newf)
 		goto out;
@@ -744,7 +748,6 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
 	 * break this.
 	 */
 	tsk->files = NULL;
-	error = -ENOMEM;
 	newf = dup_fd(oldf, &error);
 	if (!newf)
 		goto out;
@@ -871,6 +874,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
 		tsk->it_prof_expires =
 			secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
 	}
+	acct_init_pacct(&sig->pacct);
 
 	return 0;
 }
diff --git a/kernel/futex.c b/kernel/futex.c
index 5699c512057b..e1a380c77a5a 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1056,11 +1056,11 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, int val,
 			(unsigned long)uaddr2, val2, val3);
 }
 
-static struct super_block *
-futexfs_get_sb(struct file_system_type *fs_type,
-	       int flags, const char *dev_name, void *data)
+static int futexfs_get_sb(struct file_system_type *fs_type,
+			  int flags, const char *dev_name, void *data,
+			  struct vfsmount *mnt)
 {
-	return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA);
+	return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA, mnt);
 }
 
 static struct file_system_type futex_fs_type = {
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index b7f0388bd71c..55601b3ce60e 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -98,7 +98,6 @@ static DEFINE_PER_CPU(struct hrtimer_base, hrtimer_bases[MAX_HRTIMER_BASES]) =
 
 /**
  * ktime_get_ts - get the monotonic clock in timespec format
- *
  * @ts:		pointer to timespec variable
  *
  * The function calculates the monotonic clock from the realtime
@@ -238,7 +237,6 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
 # ifndef CONFIG_KTIME_SCALAR
 /**
  * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable
- *
  * @kt:		addend
  * @nsec:	the scalar nsec value to add
  *
@@ -299,7 +297,6 @@ void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
 
 /**
  * hrtimer_forward - forward the timer expiry
- *
  * @timer:	hrtimer to forward
  * @now:	forward past this time
  * @interval:	the interval to forward
@@ -393,7 +390,7 @@ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base)
 	if (base->first == &timer->node)
 		base->first = rb_next(&timer->node);
 	rb_erase(&timer->node, &base->active);
-	timer->node.rb_parent = HRTIMER_INACTIVE;
+	rb_set_parent(&timer->node, &timer->node);
 }
 
 /*
@@ -411,7 +408,6 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base)
 
 /**
  * hrtimer_start - (re)start an relative timer on the current CPU
- *
  * @timer:	the timer to be added
  * @tim:	expiry time
  * @mode:	expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
@@ -456,17 +452,17 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(hrtimer_start);
 
 /**
  * hrtimer_try_to_cancel - try to deactivate a timer
- *
  * @timer:	hrtimer to stop
  *
  * Returns:
  *  0 when the timer was not active
  *  1 when the timer was active
  * -1 when the timer is currently excuting the callback function and
- *    can not be stopped
+ *    cannot be stopped
  */
 int hrtimer_try_to_cancel(struct hrtimer *timer)
 {
@@ -484,10 +480,10 @@ int hrtimer_try_to_cancel(struct hrtimer *timer)
 	return ret;
 
 }
+EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
 
 /**
  * hrtimer_cancel - cancel a timer and wait for the handler to finish.
- *
  * @timer:	the timer to be cancelled
  *
  * Returns:
@@ -504,10 +500,10 @@ int hrtimer_cancel(struct hrtimer *timer)
 		cpu_relax();
 	}
 }
+EXPORT_SYMBOL_GPL(hrtimer_cancel);
 
 /**
  * hrtimer_get_remaining - get remaining time for the timer
- *
  * @timer:	the timer to read
  */
 ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
@@ -522,6 +518,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
 
 	return rem;
 }
+EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
 
 #ifdef CONFIG_NO_IDLE_HZ
 /**
@@ -560,7 +557,6 @@ ktime_t hrtimer_get_next_event(void)
 
 /**
  * hrtimer_init - initialize a timer to the given clock
- *
  * @timer:	the timer to be initialized
  * @clock_id:	the clock to be used
  * @mode:	timer mode abs/rel
@@ -572,18 +568,18 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 
 	memset(timer, 0, sizeof(struct hrtimer));
 
-	bases = per_cpu(hrtimer_bases, raw_smp_processor_id());
+	bases = __raw_get_cpu_var(hrtimer_bases);
 
 	if (clock_id == CLOCK_REALTIME && mode != HRTIMER_ABS)
 		clock_id = CLOCK_MONOTONIC;
 
 	timer->base = &bases[clock_id];
-	timer->node.rb_parent = HRTIMER_INACTIVE;
+	rb_set_parent(&timer->node, &timer->node);
 }
+EXPORT_SYMBOL_GPL(hrtimer_init);
 
 /**
  * hrtimer_get_res - get the timer resolution for a clock
- *
  * @which_clock: which clock to query
  * @tp:		 pointer to timespec variable to store the resolution
  *
@@ -594,11 +590,12 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
 {
 	struct hrtimer_base *bases;
 
-	bases = per_cpu(hrtimer_bases, raw_smp_processor_id());
+	bases = __raw_get_cpu_var(hrtimer_bases);
 	*tp = ktime_to_timespec(bases[which_clock].resolution);
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(hrtimer_get_res);
 
 /*
  * Expire the per base hrtimer-queue:
diff --git a/kernel/intermodule.c b/kernel/intermodule.c
deleted file mode 100644
index 55b1e5b85db9..000000000000
--- a/kernel/intermodule.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/* Deprecated, do not use.  Moved from module.c to here. --RR */
-
-/* Written by Keith Owens <kaos@ocs.com.au> Oct 2000 */
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/spinlock.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-
-/* inter_module functions are always available, even when the kernel is
- * compiled without modules.  Consumers of inter_module_xxx routines
- * will always work, even when both are built into the kernel, this
- * approach removes lots of #ifdefs in mainline code.
- */
-
-static struct list_head ime_list = LIST_HEAD_INIT(ime_list);
-static DEFINE_SPINLOCK(ime_lock);
-static int kmalloc_failed;
-
-struct inter_module_entry {
-	struct list_head list;
-	const char *im_name;
-	struct module *owner;
-	const void *userdata;
-};
-
-/**
- * inter_module_register - register a new set of inter module data.
- * @im_name: an arbitrary string to identify the data, must be unique
- * @owner: module that is registering the data, always use THIS_MODULE
- * @userdata: pointer to arbitrary userdata to be registered
- *
- * Description: Check that the im_name has not already been registered,
- * complain if it has.  For new data, add it to the inter_module_entry
- * list.
- */
-void inter_module_register(const char *im_name, struct module *owner, const void *userdata)
-{
-	struct list_head *tmp;
-	struct inter_module_entry *ime, *ime_new;
-
-	if (!(ime_new = kzalloc(sizeof(*ime), GFP_KERNEL))) {
-		/* Overloaded kernel, not fatal */
-		printk(KERN_ERR
-			"Aiee, inter_module_register: cannot kmalloc entry for '%s'\n",
-			im_name);
-		kmalloc_failed = 1;
-		return;
-	}
-	ime_new->im_name = im_name;
-	ime_new->owner = owner;
-	ime_new->userdata = userdata;
-
-	spin_lock(&ime_lock);
-	list_for_each(tmp, &ime_list) {
-		ime = list_entry(tmp, struct inter_module_entry, list);
-		if (strcmp(ime->im_name, im_name) == 0) {
-			spin_unlock(&ime_lock);
-			kfree(ime_new);
-			/* Program logic error, fatal */
-			printk(KERN_ERR "inter_module_register: duplicate im_name '%s'", im_name);
-			BUG();
-		}
-	}
-	list_add(&(ime_new->list), &ime_list);
-	spin_unlock(&ime_lock);
-}
-
-/**
- * inter_module_unregister - unregister a set of inter module data.
- * @im_name: an arbitrary string to identify the data, must be unique
- *
- * Description: Check that the im_name has been registered, complain if
- * it has not.  For existing data, remove it from the
- * inter_module_entry list.
- */
-void inter_module_unregister(const char *im_name)
-{
-	struct list_head *tmp;
-	struct inter_module_entry *ime;
-
-	spin_lock(&ime_lock);
-	list_for_each(tmp, &ime_list) {
-		ime = list_entry(tmp, struct inter_module_entry, list);
-		if (strcmp(ime->im_name, im_name) == 0) {
-			list_del(&(ime->list));
-			spin_unlock(&ime_lock);
-			kfree(ime);
-			return;
-		}
-	}
-	spin_unlock(&ime_lock);
-	if (kmalloc_failed) {
-		printk(KERN_ERR
-			"inter_module_unregister: no entry for '%s', "
-			"probably caused by previous kmalloc failure\n",
-			im_name);
-		return;
-	}
-	else {
-		/* Program logic error, fatal */
-		printk(KERN_ERR "inter_module_unregister: no entry for '%s'", im_name);
-		BUG();
-	}
-}
-
-/**
- * inter_module_get - return arbitrary userdata from another module.
- * @im_name: an arbitrary string to identify the data, must be unique
- *
- * Description: If the im_name has not been registered, return NULL.
- * Try to increment the use count on the owning module, if that fails
- * then return NULL.  Otherwise return the userdata.
- */
-static const void *inter_module_get(const char *im_name)
-{
-	struct list_head *tmp;
-	struct inter_module_entry *ime;
-	const void *result = NULL;
-
-	spin_lock(&ime_lock);
-	list_for_each(tmp, &ime_list) {
-		ime = list_entry(tmp, struct inter_module_entry, list);
-		if (strcmp(ime->im_name, im_name) == 0) {
-			if (try_module_get(ime->owner))
-				result = ime->userdata;
-			break;
-		}
-	}
-	spin_unlock(&ime_lock);
-	return(result);
-}
-
-/**
- * inter_module_get_request - im get with automatic request_module.
- * @im_name: an arbitrary string to identify the data, must be unique
- * @modname: module that is expected to register im_name
- *
- * Description: If inter_module_get fails, do request_module then retry.
- */
-const void *inter_module_get_request(const char *im_name, const char *modname)
-{
-	const void *result = inter_module_get(im_name);
-	if (!result) {
-		request_module("%s", modname);
-		result = inter_module_get(im_name);
-	}
-	return(result);
-}
-
-/**
- * inter_module_put - release use of data from another module.
- * @im_name: an arbitrary string to identify the data, must be unique
- *
- * Description: If the im_name has not been registered, complain,
- * otherwise decrement the use count on the owning module.
- */
-void inter_module_put(const char *im_name)
-{
-	struct list_head *tmp;
-	struct inter_module_entry *ime;
-
-	spin_lock(&ime_lock);
-	list_for_each(tmp, &ime_list) {
-		ime = list_entry(tmp, struct inter_module_entry, list);
-		if (strcmp(ime->im_name, im_name) == 0) {
-			if (ime->owner)
-				module_put(ime->owner);
-			spin_unlock(&ime_lock);
-			return;
-		}
-	}
-	spin_unlock(&ime_lock);
-	printk(KERN_ERR "inter_module_put: no entry for '%s'", im_name);
-	BUG();
-}
-
-EXPORT_SYMBOL(inter_module_register);
-EXPORT_SYMBOL(inter_module_unregister);
-EXPORT_SYMBOL(inter_module_get_request);
-EXPORT_SYMBOL(inter_module_put);
-
-MODULE_LICENSE("GPL");
-
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 51df337b37db..0f6530117105 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -76,10 +76,11 @@ irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs)
 /*
  * Have got an event to handle:
  */
-fastcall int handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
+fastcall irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
 				struct irqaction *action)
 {
-	int ret, retval = 0, status = 0;
+	irqreturn_t ret, retval = IRQ_NONE;
+	unsigned int status = 0;
 
 	if (!(action->flags & SA_INTERRUPT))
 		local_irq_enable();
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 134f9f2e0e39..a12d00eb5e7c 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -30,7 +30,7 @@ void move_native_irq(int irq)
 
 	desc->move_irq = 0;
 
-	if (likely(cpus_empty(pending_irq_cpumask[irq])))
+	if (unlikely(cpus_empty(pending_irq_cpumask[irq])))
 		return;
 
 	if (!desc->handler->set_affinity)
@@ -49,7 +49,7 @@ void move_native_irq(int irq)
 	 * cause some ioapics to mal-function.
 	 * Being paranoid i guess!
 	 */
-	if (unlikely(!cpus_empty(tmp))) {
+	if (likely(!cpus_empty(tmp))) {
 		if (likely(!(desc->status & IRQ_DISABLED)))
 			desc->handler->disable(irq);
 
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index d03b5eef8ce0..afacd6f585fa 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -24,6 +24,8 @@ static struct proc_dir_entry *smp_affinity_entry[NR_IRQS];
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
 {
+	set_balance_irq_affinity(irq, mask_val);
+
 	/*
 	 * Save these away for later use. Re-progam when the
 	 * interrupt is pending
@@ -33,6 +35,7 @@ void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
 #else
 void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
 {
+	set_balance_irq_affinity(irq, mask_val);
 	irq_affinity[irq] = mask_val;
 	irq_desc[irq].handler->set_affinity(irq, mask_val);
 }
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 7df9abd5ec86..b2fb3c18d06b 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -11,7 +11,7 @@
 #include <linux/kallsyms.h>
 #include <linux/interrupt.h>
 
-static int irqfixup;
+static int irqfixup __read_mostly;
 
 /*
  * Recovery handler for misrouted interrupts.
@@ -136,9 +136,9 @@ static void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t actio
 void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret,
 			struct pt_regs *regs)
 {
-	if (action_ret != IRQ_HANDLED) {
+	if (unlikely(action_ret != IRQ_HANDLED)) {
 		desc->irqs_unhandled++;
-		if (action_ret != IRQ_NONE)
+		if (unlikely(action_ret != IRQ_NONE))
 			report_bad_irq(irq, desc, action_ret);
 	}
 
@@ -152,11 +152,11 @@ void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret,
 	}
 
 	desc->irq_count++;
-	if (desc->irq_count < 100000)
+	if (likely(desc->irq_count < 100000))
 		return;
 
 	desc->irq_count = 0;
-	if (desc->irqs_unhandled > 99900) {
+	if (unlikely(desc->irqs_unhandled > 99900)) {
 		/*
 		 * The interrupt is stuck
 		 */
@@ -171,7 +171,7 @@ void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret,
 	desc->irqs_unhandled = 0;
 }
 
-int noirqdebug;
+int noirqdebug __read_mostly;
 
 int __init noirqdebug_setup(char *str)
 {
diff --git a/kernel/kexec.c b/kernel/kexec.c
index bf39d28e4c0e..58f0f382597c 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -902,14 +902,14 @@ static int kimage_load_segment(struct kimage *image,
  * kexec does not sync, or unmount filesystems so if you need
  * that to happen you need to do that yourself.
  */
-struct kimage *kexec_image = NULL;
-static struct kimage *kexec_crash_image = NULL;
+struct kimage *kexec_image;
+struct kimage *kexec_crash_image;
 /*
  * A home grown binary mutex.
  * Nothing can wait so this mutex is safe to use
  * in interrupt context :)
  */
-static int kexec_lock = 0;
+static int kexec_lock;
 
 asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
 				struct kexec_segment __user *segments,
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index f119e098e67b..9e28478a17a5 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -14,6 +14,7 @@
 #include <linux/sysfs.h>
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/kexec.h>
 
 #define KERNEL_ATTR_RO(_name) \
 static struct subsys_attribute _name##_attr = __ATTR_RO(_name)
@@ -48,6 +49,20 @@ static ssize_t uevent_helper_store(struct subsystem *subsys, const char *page, s
 KERNEL_ATTR_RW(uevent_helper);
 #endif
 
+#ifdef CONFIG_KEXEC
+static ssize_t kexec_loaded_show(struct subsystem *subsys, char *page)
+{
+	return sprintf(page, "%d\n", !!kexec_image);
+}
+KERNEL_ATTR_RO(kexec_loaded);
+
+static ssize_t kexec_crash_loaded_show(struct subsystem *subsys, char *page)
+{
+	return sprintf(page, "%d\n", !!kexec_crash_image);
+}
+KERNEL_ATTR_RO(kexec_crash_loaded);
+#endif /* CONFIG_KEXEC */
+
 decl_subsys(kernel, NULL, NULL);
 EXPORT_SYMBOL_GPL(kernel_subsys);
 
@@ -56,6 +71,10 @@ static struct attribute * kernel_attrs[] = {
 	&uevent_seqnum_attr.attr,
 	&uevent_helper_attr.attr,
 #endif
+#ifdef CONFIG_KEXEC
+	&kexec_loaded_attr.attr,
+	&kexec_crash_loaded_attr.attr,
+#endif
 	NULL
 };
 
diff --git a/kernel/kthread.c b/kernel/kthread.c
index c5f3c6613b6d..24be714b04c7 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -45,6 +45,13 @@ struct kthread_stop_info
 static DEFINE_MUTEX(kthread_stop_lock);
 static struct kthread_stop_info kthread_stop_info;
 
+/**
+ * kthread_should_stop - should this kthread return now?
+ *
+ * When someone calls kthread_stop on your kthread, it will be woken
+ * and this will return true.  You should then return, and your return
+ * value will be passed through to kthread_stop().
+ */
 int kthread_should_stop(void)
 {
 	return (kthread_stop_info.k == current);
@@ -122,6 +129,25 @@ static void keventd_create_kthread(void *_create)
 	complete(&create->done);
 }
 
+/**
+ * kthread_create - create a kthread.
+ * @threadfn: the function to run until signal_pending(current).
+ * @data: data ptr for @threadfn.
+ * @namefmt: printf-style name for the thread.
+ *
+ * Description: This helper function creates and names a kernel
+ * thread.  The thread will be stopped: use wake_up_process() to start
+ * it.  See also kthread_run(), kthread_create_on_cpu().
+ *
+ * When woken, the thread will run @threadfn() with @data as its
+ * argument. @threadfn can either call do_exit() directly if it is a
+ * standalone thread for which noone will call kthread_stop(), or
+ * return when 'kthread_should_stop()' is true (which means
+ * kthread_stop() has been called).  The return value should be zero
+ * or a negative error number; it will be passed to kthread_stop().
+ *
+ * Returns a task_struct or ERR_PTR(-ENOMEM).
+ */
 struct task_struct *kthread_create(int (*threadfn)(void *data),
 				   void *data,
 				   const char namefmt[],
@@ -156,6 +182,15 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
 }
 EXPORT_SYMBOL(kthread_create);
 
+/**
+ * kthread_bind - bind a just-created kthread to a cpu.
+ * @k: thread created by kthread_create().
+ * @cpu: cpu (might not be online, must be possible) for @k to run on.
+ *
+ * Description: This function is equivalent to set_cpus_allowed(),
+ * except that @cpu doesn't need to be online, and the thread must be
+ * stopped (i.e., just returned from kthread_create().
+ */
 void kthread_bind(struct task_struct *k, unsigned int cpu)
 {
 	BUG_ON(k->state != TASK_INTERRUPTIBLE);
@@ -166,12 +201,36 @@ void kthread_bind(struct task_struct *k, unsigned int cpu)
 }
 EXPORT_SYMBOL(kthread_bind);
 
+/**
+ * kthread_stop - stop a thread created by kthread_create().
+ * @k: thread created by kthread_create().
+ *
+ * Sets kthread_should_stop() for @k to return true, wakes it, and
+ * waits for it to exit.  Your threadfn() must not call do_exit()
+ * itself if you use this function!  This can also be called after
+ * kthread_create() instead of calling wake_up_process(): the thread
+ * will exit without calling threadfn().
+ *
+ * Returns the result of threadfn(), or %-EINTR if wake_up_process()
+ * was never called.
+ */
 int kthread_stop(struct task_struct *k)
 {
 	return kthread_stop_sem(k, NULL);
 }
 EXPORT_SYMBOL(kthread_stop);
 
+/**
+ * kthread_stop_sem - stop a thread created by kthread_create().
+ * @k: thread created by kthread_create().
+ * @s: semaphore that @k waits on while idle.
+ *
+ * Does essentially the same thing as kthread_stop() above, but wakes
+ * @k by calling up(@s).
+ *
+ * Returns the result of threadfn(), or %-EINTR if wake_up_process()
+ * was never called.
+ */
 int kthread_stop_sem(struct task_struct *k, struct semaphore *s)
 {
 	int ret;
@@ -210,5 +269,5 @@ static __init int helper_init(void)
 
 	return 0;
 }
-core_initcall(helper_init);
 
+core_initcall(helper_init);
diff --git a/kernel/module.c b/kernel/module.c
index bbe04862e1b0..d75275de1c28 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1326,7 +1326,7 @@ int is_exported(const char *name, const struct module *mod)
 	if (!mod && lookup_symbol(name, __start___ksymtab, __stop___ksymtab))
 		return 1;
 	else
-		if (lookup_symbol(name, mod->syms, mod->syms + mod->num_syms))
+		if (mod && lookup_symbol(name, mod->syms, mod->syms + mod->num_syms))
 			return 1;
 		else
 			return 0;
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 520f6c59948d..d38d9ec3276c 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -555,9 +555,6 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 	struct cpu_timer_list *next;
 	unsigned long i;
 
-	if (CPUCLOCK_PERTHREAD(timer->it_clock)	&& (p->flags & PF_EXITING))
-		return;
-
 	head = (CPUCLOCK_PERTHREAD(timer->it_clock) ?
 		p->cpu_timers : p->signal->cpu_timers);
 	head += CPUCLOCK_WHICH(timer->it_clock);
@@ -1173,6 +1170,9 @@ static void check_process_timers(struct task_struct *tsk,
 		}
 		t = tsk;
 		do {
+			if (unlikely(t->flags & PF_EXITING))
+				continue;
+
 			ticks = cputime_add(cputime_add(t->utime, t->stime),
 					    prof_left);
 			if (!cputime_eq(prof_expires, cputime_zero) &&
@@ -1193,11 +1193,7 @@ static void check_process_timers(struct task_struct *tsk,
 					      t->it_sched_expires > sched)) {
 				t->it_sched_expires = sched;
 			}
-
-			do {
-				t = next_thread(t);
-			} while (unlikely(t->flags & PF_EXITING));
-		} while (t != tsk);
+		} while ((t = next_thread(t)) != tsk);
 	}
 }
 
@@ -1289,30 +1285,30 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 
 #undef	UNEXPIRED
 
-	BUG_ON(tsk->exit_state);
-
 	/*
 	 * Double-check with locks held.
 	 */
 	read_lock(&tasklist_lock);
-	spin_lock(&tsk->sighand->siglock);
+	if (likely(tsk->signal != NULL)) {
+		spin_lock(&tsk->sighand->siglock);
 
-	/*
-	 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N]
-	 * all the timers that are firing, and put them on the firing list.
-	 */
-	check_thread_timers(tsk, &firing);
-	check_process_timers(tsk, &firing);
+		/*
+		 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N]
+		 * all the timers that are firing, and put them on the firing list.
+		 */
+		check_thread_timers(tsk, &firing);
+		check_process_timers(tsk, &firing);
 
-	/*
-	 * We must release these locks before taking any timer's lock.
-	 * There is a potential race with timer deletion here, as the
-	 * siglock now protects our private firing list.  We have set
-	 * the firing flag in each timer, so that a deletion attempt
-	 * that gets the timer lock before we do will give it up and
-	 * spin until we've taken care of that timer below.
-	 */
-	spin_unlock(&tsk->sighand->siglock);
+		/*
+		 * We must release these locks before taking any timer's lock.
+		 * There is a potential race with timer deletion here, as the
+		 * siglock now protects our private firing list.  We have set
+		 * the firing flag in each timer, so that a deletion attempt
+		 * that gets the timer lock before we do will give it up and
+		 * spin until we've taken care of that timer below.
+		 */
+		spin_unlock(&tsk->sighand->siglock);
+	}
 	read_unlock(&tasklist_lock);
 
 	/*
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index ce0dfb8f4a4e..fc311a4673a2 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -36,6 +36,15 @@ config PM_DEBUG
 	code. This is helpful when debugging and reporting various PM bugs, 
 	like suspend support.
 
+config PM_TRACE
+	bool "Suspend/resume event tracing"
+	depends on PM && PM_DEBUG && X86_32
+	default y
+	---help---
+	This enables some cheesy code to save the last PM event point in the
+	RTC across reboots, so that you can debug a machine that just hangs
+	during suspend (or more commonly, during resume).
+
 config SOFTWARE_SUSPEND
 	bool "Software Suspend"
 	depends on PM && SWAP && (X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 81d4d982f3f0..e13e74067845 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -231,7 +231,7 @@ static int software_resume(void)
 late_initcall(software_resume);
 
 
-static char * pm_disk_modes[] = {
+static const char * const pm_disk_modes[] = {
 	[PM_DISK_FIRMWARE]	= "firmware",
 	[PM_DISK_PLATFORM]	= "platform",
 	[PM_DISK_SHUTDOWN]	= "shutdown",
diff --git a/kernel/power/main.c b/kernel/power/main.c
index a6d9ef46009e..6d295c776794 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -15,7 +15,7 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/pm.h>
-
+#include <linux/console.h>
 
 #include "power.h"
 
@@ -86,6 +86,7 @@ static int suspend_prepare(suspend_state_t state)
 			goto Thaw;
 	}
 
+	suspend_console();
 	if ((error = device_suspend(PMSG_SUSPEND))) {
 		printk(KERN_ERR "Some devices failed to suspend\n");
 		goto Finish;
@@ -133,6 +134,7 @@ int suspend_enter(suspend_state_t state)
 static void suspend_finish(suspend_state_t state)
 {
 	device_resume();
+	resume_console();
 	thaw_processes();
 	enable_nonboot_cpus();
 	if (pm_ops && pm_ops->finish)
@@ -143,7 +145,7 @@ static void suspend_finish(suspend_state_t state)
 
 
 
-static char *pm_states[PM_SUSPEND_MAX] = {
+static const char * const pm_states[PM_SUSPEND_MAX] = {
 	[PM_SUSPEND_STANDBY]	= "standby",
 	[PM_SUSPEND_MEM]	= "mem",
 #ifdef CONFIG_SOFTWARE_SUSPEND
@@ -260,7 +262,7 @@ static ssize_t state_show(struct subsystem * subsys, char * buf)
 static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n)
 {
 	suspend_state_t state = PM_SUSPEND_STANDBY;
-	char ** s;
+	const char * const *s;
 	char *p;
 	int error;
 	int len;
diff --git a/kernel/power/power.h b/kernel/power/power.h
index f06f12f21767..57a792982fb9 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -55,7 +55,7 @@ struct snapshot_handle {
 	unsigned int	page;
 	unsigned int	page_offset;
 	unsigned int	prev;
-	struct pbe	*pbe;
+	struct pbe	*pbe, *last_pbe;
 	void		*buffer;
 	unsigned int	buf_offset;
 };
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 3eeedbb13b78..24c96f354231 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -150,6 +150,10 @@ int restore_highmem(void)
 	}
 	return 0;
 }
+#else
+static inline unsigned int count_highmem_pages(void) {return 0;}
+static inline int save_highmem(void) {return 0;}
+static inline int restore_highmem(void) {return 0;}
 #endif
 
 static int pfn_is_nosave(unsigned long pfn)
@@ -293,62 +297,29 @@ static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages)
 	}
 }
 
-/**
- *	On resume it is necessary to trace and eventually free the unsafe
- *	pages that have been allocated, because they are needed for I/O
- *	(on x86-64 we likely will "eat" these pages once again while
- *	creating the temporary page translation tables)
- */
-
-struct eaten_page {
-	struct eaten_page *next;
-	char padding[PAGE_SIZE - sizeof(void *)];
-};
-
-static struct eaten_page *eaten_pages = NULL;
-
-static void release_eaten_pages(void)
-{
-	struct eaten_page *p, *q;
-
-	p = eaten_pages;
-	while (p) {
-		q = p->next;
-		/* We don't want swsusp_free() to free this page again */
-		ClearPageNosave(virt_to_page(p));
-		free_page((unsigned long)p);
-		p = q;
-	}
-	eaten_pages = NULL;
-}
+static unsigned int unsafe_pages;
 
 /**
  *	@safe_needed - on resume, for storing the PBE list and the image,
  *	we can only use memory pages that do not conflict with the pages
- *	which had been used before suspend.
+ *	used before suspend.
  *
  *	The unsafe pages are marked with the PG_nosave_free flag
- *
- *	Allocated but unusable (ie eaten) memory pages should be marked
- *	so that swsusp_free() can release them
+ *	and we count them using unsafe_pages
  */
 
 static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
 {
 	void *res;
 
+	res = (void *)get_zeroed_page(gfp_mask);
 	if (safe_needed)
-		do {
+		while (res && PageNosaveFree(virt_to_page(res))) {
+			/* The page is unsafe, mark it for swsusp_free() */
+			SetPageNosave(virt_to_page(res));
+			unsafe_pages++;
 			res = (void *)get_zeroed_page(gfp_mask);
-			if (res && PageNosaveFree(virt_to_page(res))) {
-				/* This is for swsusp_free() */
-				SetPageNosave(virt_to_page(res));
-				((struct eaten_page *)res)->next = eaten_pages;
-				eaten_pages = res;
-			}
-		} while (res && PageNosaveFree(virt_to_page(res)));
-	else
-		res = (void *)get_zeroed_page(gfp_mask);
+		}
 	if (res) {
 		SetPageNosave(virt_to_page(res));
 		SetPageNosaveFree(virt_to_page(res));
@@ -374,7 +345,8 @@ unsigned long get_safe_page(gfp_t gfp_mask)
  *	On each page we set up a list of struct_pbe elements.
  */
 
-struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed)
+static struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask,
+				 int safe_needed)
 {
 	unsigned int num;
 	struct pbe *pblist, *pbe;
@@ -642,6 +614,8 @@ static int mark_unsafe_pages(struct pbe *pblist)
 			return -EFAULT;
 	}
 
+	unsafe_pages = 0;
+
 	return 0;
 }
 
@@ -719,42 +693,99 @@ static inline struct pbe *unpack_orig_addresses(unsigned long *buf,
 }
 
 /**
- *	create_image - use metadata contained in the PBE list
+ *	prepare_image - use metadata contained in the PBE list
  *	pointed to by pagedir_nosave to mark the pages that will
  *	be overwritten in the process of restoring the system
- *	memory state from the image and allocate memory for
- *	the image avoiding these pages
+ *	memory state from the image ("unsafe" pages) and allocate
+ *	memory for the image
+ *
+ *	The idea is to allocate the PBE list first and then
+ *	allocate as many pages as it's needed for the image data,
+ *	but not to assign these pages to the PBEs initially.
+ *	Instead, we just mark them as allocated and create a list
+ *	of "safe" which will be used later
  */
 
-static int create_image(struct snapshot_handle *handle)
+struct safe_page {
+	struct safe_page *next;
+	char padding[PAGE_SIZE - sizeof(void *)];
+};
+
+static struct safe_page *safe_pages;
+
+static int prepare_image(struct snapshot_handle *handle)
 {
 	int error = 0;
-	struct pbe *p, *pblist;
+	unsigned int nr_pages = nr_copy_pages;
+	struct pbe *p, *pblist = NULL;
 
 	p = pagedir_nosave;
 	error = mark_unsafe_pages(p);
 	if (!error) {
-		pblist = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1);
+		pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1);
 		if (pblist)
 			copy_page_backup_list(pblist, p);
 		free_pagedir(p, 0);
 		if (!pblist)
 			error = -ENOMEM;
 	}
-	if (!error)
-		error = alloc_data_pages(pblist, GFP_ATOMIC, 1);
+	safe_pages = NULL;
+	if (!error && nr_pages > unsafe_pages) {
+		nr_pages -= unsafe_pages;
+		while (nr_pages--) {
+			struct safe_page *ptr;
+
+			ptr = (struct safe_page *)get_zeroed_page(GFP_ATOMIC);
+			if (!ptr) {
+				error = -ENOMEM;
+				break;
+			}
+			if (!PageNosaveFree(virt_to_page(ptr))) {
+				/* The page is "safe", add it to the list */
+				ptr->next = safe_pages;
+				safe_pages = ptr;
+			}
+			/* Mark the page as allocated */
+			SetPageNosave(virt_to_page(ptr));
+			SetPageNosaveFree(virt_to_page(ptr));
+		}
+	}
 	if (!error) {
-		release_eaten_pages();
 		pagedir_nosave = pblist;
 	} else {
-		pagedir_nosave = NULL;
 		handle->pbe = NULL;
-		nr_copy_pages = 0;
-		nr_meta_pages = 0;
+		swsusp_free();
 	}
 	return error;
 }
 
+static void *get_buffer(struct snapshot_handle *handle)
+{
+	struct pbe *pbe = handle->pbe, *last = handle->last_pbe;
+	struct page *page = virt_to_page(pbe->orig_address);
+
+	if (PageNosave(page) && PageNosaveFree(page)) {
+		/*
+		 * We have allocated the "original" page frame and we can
+		 * use it directly to store the read page
+		 */
+		pbe->address = 0;
+		if (last && last->next)
+			last->next = NULL;
+		return (void *)pbe->orig_address;
+	}
+	/*
+	 * The "original" page frame has not been allocated and we have to
+	 * use a "safe" page frame to store the read page
+	 */
+	pbe->address = (unsigned long)safe_pages;
+	safe_pages = safe_pages->next;
+	if (last)
+		last->next = pbe;
+	handle->last_pbe = pbe;
+	return (void *)pbe->address;
+}
+
 /**
  *	snapshot_write_next - used for writing the system memory snapshot.
  *
@@ -799,15 +830,16 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
 		} else if (handle->prev <= nr_meta_pages) {
 			handle->pbe = unpack_orig_addresses(buffer, handle->pbe);
 			if (!handle->pbe) {
-				error = create_image(handle);
+				error = prepare_image(handle);
 				if (error)
 					return error;
 				handle->pbe = pagedir_nosave;
-				handle->buffer = (void *)handle->pbe->address;
+				handle->last_pbe = NULL;
+				handle->buffer = get_buffer(handle);
 			}
 		} else {
 			handle->pbe = handle->pbe->next;
-			handle->buffer = (void *)handle->pbe->address;
+			handle->buffer = get_buffer(handle);
 		}
 		handle->prev = handle->page;
 	}
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index c4016cbbd3e0..17f669c83012 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -67,9 +67,9 @@ unsigned int count_highmem_pages(void);
 int save_highmem(void);
 int restore_highmem(void);
 #else
-static int save_highmem(void) { return 0; }
-static int restore_highmem(void) { return 0; }
-static unsigned int count_highmem_pages(void) { return 0; }
+static inline int save_highmem(void) { return 0; }
+static inline int restore_highmem(void) { return 0; }
+static inline unsigned int count_highmem_pages(void) { return 0; }
 #endif
 
 /**
@@ -175,6 +175,12 @@ void free_all_swap_pages(int swap, struct bitmap_page *bitmap)
  */
 
 #define SHRINK_BITE	10000
+static inline unsigned long __shrink_memory(long tmp)
+{
+	if (tmp > SHRINK_BITE)
+		tmp = SHRINK_BITE;
+	return shrink_all_memory(tmp);
+}
 
 int swsusp_shrink_memory(void)
 {
@@ -192,15 +198,17 @@ int swsusp_shrink_memory(void)
 			PAGES_FOR_IO;
 		tmp = size;
 		for_each_zone (zone)
-			if (!is_highmem(zone))
+			if (!is_highmem(zone) && populated_zone(zone)) {
 				tmp -= zone->free_pages;
+				tmp += zone->lowmem_reserve[ZONE_NORMAL];
+			}
 		if (tmp > 0) {
-			tmp = shrink_all_memory(SHRINK_BITE);
+			tmp = __shrink_memory(tmp);
 			if (!tmp)
 				return -ENOMEM;
 			pages += tmp;
 		} else if (size > image_size / PAGE_SIZE) {
-			tmp = shrink_all_memory(SHRINK_BITE);
+			tmp = __shrink_memory(size - (image_size / PAGE_SIZE));
 			pages += tmp;
 		}
 		printk("\b%c", p[i++%4]);
diff --git a/kernel/printk.c b/kernel/printk.c
index c056f3324432..95b7fe17f124 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -24,6 +24,7 @@
 #include <linux/console.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/interrupt.h>			/* For in_interrupt() */
 #include <linux/config.h>
 #include <linux/delay.h>
@@ -67,6 +68,7 @@ EXPORT_SYMBOL(oops_in_progress);
  * driver system.
  */
 static DECLARE_MUTEX(console_sem);
+static DECLARE_MUTEX(secondary_console_sem);
 struct console *console_drivers;
 /*
  * This is used for debugging the mess that is the VT code by
@@ -76,7 +78,7 @@ struct console *console_drivers;
  * path in the console code where we end up in places I want
  * locked without the console sempahore held
  */
-static int console_locked;
+static int console_locked, console_suspended;
 
 /*
  * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars
@@ -326,7 +328,9 @@ static void __call_console_drivers(unsigned long start, unsigned long end)
 	struct console *con;
 
 	for (con = console_drivers; con; con = con->next) {
-		if ((con->flags & CON_ENABLED) && con->write)
+		if ((con->flags & CON_ENABLED) && con->write &&
+				(cpu_online(smp_processor_id()) ||
+				(con->flags & CON_ANYTIME)))
 			con->write(con, &LOG_BUF(start), end - start);
 	}
 }
@@ -436,6 +440,7 @@ static int printk_time = 1;
 #else
 static int printk_time = 0;
 #endif
+module_param(printk_time, int, S_IRUGO | S_IWUSR);
 
 static int __init printk_time_setup(char *str)
 {
@@ -452,6 +457,18 @@ __attribute__((weak)) unsigned long long printk_clock(void)
 	return sched_clock();
 }
 
+/* Check if we have any console registered that can be called early in boot. */
+static int have_callable_console(void)
+{
+	struct console *con;
+
+	for (con = console_drivers; con; con = con->next)
+		if (con->flags & CON_ANYTIME)
+			return 1;
+
+	return 0;
+}
+
 /**
  * printk - print a kernel message
  * @fmt: format string
@@ -565,27 +582,29 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 			log_level_unknown = 1;
 	}
 
-	if (!cpu_online(smp_processor_id())) {
+	if (!down_trylock(&console_sem)) {
 		/*
-		 * Some console drivers may assume that per-cpu resources have
-		 * been allocated.  So don't allow them to be called by this
-		 * CPU until it is officially up.  We shouldn't be calling into
-		 * random console drivers on a CPU which doesn't exist yet..
+		 * We own the drivers.  We can drop the spinlock and
+		 * let release_console_sem() print the text, maybe ...
 		 */
+		console_locked = 1;
 		printk_cpu = UINT_MAX;
 		spin_unlock_irqrestore(&logbuf_lock, flags);
-		goto out;
-	}
-	if (!down_trylock(&console_sem)) {
-		console_locked = 1;
+
 		/*
-		 * We own the drivers.  We can drop the spinlock and let
-		 * release_console_sem() print the text
+		 * Console drivers may assume that per-cpu resources have
+		 * been allocated. So unless they're explicitly marked as
+		 * being able to cope (CON_ANYTIME) don't call them until
+		 * this CPU is officially up.
 		 */
-		printk_cpu = UINT_MAX;
-		spin_unlock_irqrestore(&logbuf_lock, flags);
-		console_may_schedule = 0;
-		release_console_sem();
+		if (cpu_online(smp_processor_id()) || have_callable_console()) {
+			console_may_schedule = 0;
+			release_console_sem();
+		} else {
+			/* Release by hand to avoid flushing the buffer. */
+			console_locked = 0;
+			up(&console_sem);
+		}
 	} else {
 		/*
 		 * Someone else owns the drivers.  We drop the spinlock, which
@@ -595,7 +614,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 		printk_cpu = UINT_MAX;
 		spin_unlock_irqrestore(&logbuf_lock, flags);
 	}
-out:
+
 	preempt_enable();
 	return printed_len;
 }
@@ -698,6 +717,23 @@ int __init add_preferred_console(char *name, int idx, char *options)
 }
 
 /**
+ * suspend_console - suspend the console subsystem
+ *
+ * This disables printk() while we go into suspend states
+ */
+void suspend_console(void)
+{
+	acquire_console_sem();
+	console_suspended = 1;
+}
+
+void resume_console(void)
+{
+	console_suspended = 0;
+	release_console_sem();
+}
+
+/**
  * acquire_console_sem - lock the console system for exclusive use.
  *
  * Acquires a semaphore which guarantees that the caller has
@@ -708,6 +744,10 @@ int __init add_preferred_console(char *name, int idx, char *options)
 void acquire_console_sem(void)
 {
 	BUG_ON(in_interrupt());
+	if (console_suspended) {
+		down(&secondary_console_sem);
+		return;
+	}
 	down(&console_sem);
 	console_locked = 1;
 	console_may_schedule = 1;
@@ -750,6 +790,10 @@ void release_console_sem(void)
 	unsigned long _con_start, _log_end;
 	unsigned long wake_klogd = 0;
 
+	if (console_suspended) {
+		up(&secondary_console_sem);
+		return;
+	}
 	for ( ; ; ) {
 		spin_lock_irqsave(&logbuf_lock, flags);
 		wake_klogd |= log_start - log_end;
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 2058f88c7bbb..20e9710fc21c 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -612,14 +612,6 @@ void synchronize_rcu(void)
 	wait_for_completion(&rcu.completion);
 }
 
-/*
- * Deprecated, use synchronize_rcu() or synchronize_sched() instead.
- */
-void synchronize_kernel(void)
-{
-	synchronize_rcu();
-}
-
 module_param(blimit, int, 0);
 module_param(qhimark, int, 0);
 module_param(qlowmark, int, 0);
@@ -627,7 +619,6 @@ module_param(qlowmark, int, 0);
 module_param(rsinterval, int, 0);
 #endif
 EXPORT_SYMBOL_GPL(rcu_batches_completed);
-EXPORT_SYMBOL_GPL_FUTURE(call_rcu);	/* WARNING: GPL-only in April 2006. */
-EXPORT_SYMBOL_GPL_FUTURE(call_rcu_bh);	/* WARNING: GPL-only in April 2006. */
+EXPORT_SYMBOL_GPL(call_rcu);
+EXPORT_SYMBOL_GPL(call_rcu_bh);
 EXPORT_SYMBOL_GPL(synchronize_rcu);
-EXPORT_SYMBOL_GPL_FUTURE(synchronize_kernel); /* WARNING: GPL-only in April 2006. */
diff --git a/kernel/sched.c b/kernel/sched.c
index c13f1bd2df7d..f06d059edef5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3886,6 +3886,10 @@ long sched_setaffinity(pid_t pid, cpumask_t new_mask)
 			!capable(CAP_SYS_NICE))
 		goto out_unlock;
 
+	retval = security_task_setscheduler(p, 0, NULL);
+	if (retval)
+		goto out_unlock;
+
 	cpus_allowed = cpuset_cpus_allowed(p);
 	cpus_and(new_mask, new_mask, cpus_allowed);
 	retval = set_cpus_allowed(p, new_mask);
@@ -3954,7 +3958,10 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask)
 	if (!p)
 		goto out_unlock;
 
-	retval = 0;
+	retval = security_task_getscheduler(p);
+	if (retval)
+		goto out_unlock;
+
 	cpus_and(*mask, p->cpus_allowed, cpu_online_map);
 
 out_unlock:
@@ -4046,6 +4053,9 @@ asmlinkage long sys_sched_yield(void)
 
 static inline void __cond_resched(void)
 {
+#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
+	__might_sleep(__FILE__, __LINE__);
+#endif
 	/*
 	 * The BKS might be reacquired before we have dropped
 	 * PREEMPT_ACTIVE, which could trigger a second
@@ -4142,7 +4152,7 @@ EXPORT_SYMBOL(yield);
  */
 void __sched io_schedule(void)
 {
-	struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
+	struct runqueue *rq = &__raw_get_cpu_var(runqueues);
 
 	atomic_inc(&rq->nr_iowait);
 	schedule();
@@ -4153,7 +4163,7 @@ EXPORT_SYMBOL(io_schedule);
 
 long __sched io_schedule_timeout(long timeout)
 {
-	struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
+	struct runqueue *rq = &__raw_get_cpu_var(runqueues);
 	long ret;
 
 	atomic_inc(&rq->nr_iowait);
@@ -4746,6 +4756,8 @@ static int migration_call(struct notifier_block *nfb, unsigned long action,
 		break;
 #ifdef CONFIG_HOTPLUG_CPU
 	case CPU_UP_CANCELED:
+		if (!cpu_rq(cpu)->migration_thread)
+			break;
 		/* Unbind it from offline cpu so it can run.  Fall thru. */
 		kthread_bind(cpu_rq(cpu)->migration_thread,
 			     any_online_cpu(cpu_online_map));
diff --git a/kernel/signal.c b/kernel/signal.c
index e5f8aea78ffe..1b3c921737e2 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -23,12 +23,12 @@
 #include <linux/syscalls.h>
 #include <linux/ptrace.h>
 #include <linux/signal.h>
-#include <linux/audit.h>
 #include <linux/capability.h>
 #include <asm/param.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/siginfo.h>
+#include "audit.h"	/* audit_signal_info() */
 
 /*
  * SLAB caches for signal bits.
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 336f92d64e2e..9e2f1c6e73d7 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -470,6 +470,8 @@ static int cpu_callback(struct notifier_block *nfb,
 		break;
 #ifdef CONFIG_HOTPLUG_CPU
 	case CPU_UP_CANCELED:
+		if (!per_cpu(ksoftirqd, hotcpu))
+			break;
 		/* Unbind so it can run.  Fall thru. */
 		kthread_bind(per_cpu(ksoftirqd, hotcpu),
 			     any_online_cpu(cpu_online_map));
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 14c7faf02909..b5c3b94e01ce 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -36,7 +36,7 @@ static struct notifier_block panic_block = {
 
 void touch_softlockup_watchdog(void)
 {
-	per_cpu(touch_timestamp, raw_smp_processor_id()) = jiffies;
+	__raw_get_cpu_var(touch_timestamp) = jiffies;
 }
 EXPORT_SYMBOL(touch_softlockup_watchdog);
 
@@ -127,6 +127,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		break;
 #ifdef CONFIG_HOTPLUG_CPU
 	case CPU_UP_CANCELED:
+		if (!per_cpu(watchdog_task, hotcpu))
+			break;
 		/* Unbind so it can run.  Fall thru. */
 		kthread_bind(per_cpu(watchdog_task, hotcpu),
 			     any_online_cpu(cpu_online_map));
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index dcfb5d731466..2c0aacc37c55 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -4,6 +4,7 @@
 #include <linux/cpu.h>
 #include <linux/err.h>
 #include <linux/syscalls.h>
+#include <linux/kthread.h>
 #include <asm/atomic.h>
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
@@ -25,13 +26,11 @@ static unsigned int stopmachine_num_threads;
 static atomic_t stopmachine_thread_ack;
 static DECLARE_MUTEX(stopmachine_mutex);
 
-static int stopmachine(void *cpu)
+static int stopmachine(void *unused)
 {
 	int irqs_disabled = 0;
 	int prepared = 0;
 
-	set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu));
-
 	/* Ack: we are alive */
 	smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
 	atomic_inc(&stopmachine_thread_ack);
@@ -85,7 +84,8 @@ static void stopmachine_set_state(enum stopmachine_state state)
 
 static int stop_machine(void)
 {
-	int i, ret = 0;
+	int ret = 0;
+	unsigned int i;
 	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
 
 	/* One high-prio thread per cpu.  We'll do this one. */
@@ -96,11 +96,16 @@ static int stop_machine(void)
 	stopmachine_state = STOPMACHINE_WAIT;
 
 	for_each_online_cpu(i) {
+		struct task_struct *tsk;
 		if (i == raw_smp_processor_id())
 			continue;
-		ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
-		if (ret < 0)
+		tsk = kthread_create(stopmachine, NULL, "stopmachine");
+		if (IS_ERR(tsk)) {
+			ret = PTR_ERR(tsk);
 			break;
+		}
+		kthread_bind(tsk, i);
+		wake_up_process(tsk);
 		stopmachine_num_threads++;
 	}
 
diff --git a/kernel/sys.c b/kernel/sys.c
index 0b6ec0e7936f..2d5179c67cec 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -13,7 +13,6 @@
 #include <linux/notifier.h>
 #include <linux/reboot.h>
 #include <linux/prctl.h>
-#include <linux/init.h>
 #include <linux/highuid.h>
 #include <linux/fs.h>
 #include <linux/kernel.h>
@@ -57,6 +56,12 @@
 #ifndef GET_FPEXC_CTL
 # define GET_FPEXC_CTL(a,b)	(-EINVAL)
 #endif
+#ifndef GET_ENDIAN
+# define GET_ENDIAN(a,b)	(-EINVAL)
+#endif
+#ifndef SET_ENDIAN
+# define SET_ENDIAN(a,b)	(-EINVAL)
+#endif
 
 /*
  * this is where the system-wide overflow UID and GID are defined, for
@@ -132,14 +137,15 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
 		unsigned long val, void *v)
 {
 	int ret = NOTIFY_DONE;
-	struct notifier_block *nb;
+	struct notifier_block *nb, *next_nb;
 
 	nb = rcu_dereference(*nl);
 	while (nb) {
+		next_nb = rcu_dereference(nb->next);
 		ret = nb->notifier_call(nb, val, v);
 		if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)
 			break;
-		nb = rcu_dereference(nb->next);
+		nb = next_nb;
 	}
 	return ret;
 }
@@ -583,7 +589,7 @@ void emergency_restart(void)
 }
 EXPORT_SYMBOL_GPL(emergency_restart);
 
-void kernel_restart_prepare(char *cmd)
+static void kernel_restart_prepare(char *cmd)
 {
 	blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
 	system_state = SYSTEM_RESTART;
@@ -617,7 +623,7 @@ EXPORT_SYMBOL_GPL(kernel_restart);
  *	Move into place and start executing a preloaded standalone
  *	executable.  If nothing was preloaded return an error.
  */
-void kernel_kexec(void)
+static void kernel_kexec(void)
 {
 #ifdef CONFIG_KEXEC
 	struct kimage *image;
@@ -631,7 +637,6 @@ void kernel_kexec(void)
 	machine_kexec(image);
 #endif
 }
-EXPORT_SYMBOL_GPL(kernel_kexec);
 
 void kernel_shutdown_prepare(enum system_states state)
 {
@@ -1860,23 +1865,20 @@ out:
  * fields when reaping, so a sample either gets all the additions of a
  * given child after it's reaped, or none so this sample is before reaping.
  *
- * tasklist_lock locking optimisation:
- * If we are current and single threaded, we do not need to take the tasklist
- * lock or the siglock.  No one else can take our signal_struct away,
- * no one else can reap the children to update signal->c* counters, and
- * no one else can race with the signal-> fields.
- * If we do not take the tasklist_lock, the signal-> fields could be read
- * out of order while another thread was just exiting. So we place a
- * read memory barrier when we avoid the lock.  On the writer side,
- * write memory barrier is implied in  __exit_signal as __exit_signal releases
- * the siglock spinlock after updating the signal-> fields.
- *
- * We don't really need the siglock when we access the non c* fields
- * of the signal_struct (for RUSAGE_SELF) even in multithreaded
- * case, since we take the tasklist lock for read and the non c* signal->
- * fields are updated only in __exit_signal, which is called with
- * tasklist_lock taken for write, hence these two threads cannot execute
- * concurrently.
+ * Locking:
+ * We need to take the siglock for CHILDEREN, SELF and BOTH
+ * for  the cases current multithreaded, non-current single threaded
+ * non-current multithreaded.  Thread traversal is now safe with
+ * the siglock held.
+ * Strictly speaking, we donot need to take the siglock if we are current and
+ * single threaded,  as no one else can take our signal_struct away, no one
+ * else can  reap the  children to update signal->c* counters, and no one else
+ * can race with the signal-> fields. If we do not take any lock, the
+ * signal-> fields could be read out of order while another thread was just
+ * exiting. So we should  place a read memory barrier when we avoid the lock.
+ * On the writer side,  write memory barrier is implied in  __exit_signal
+ * as __exit_signal releases  the siglock spinlock after updating the signal->
+ * fields. But we don't do this yet to keep things simple.
  *
  */
 
@@ -1885,35 +1887,25 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 	struct task_struct *t;
 	unsigned long flags;
 	cputime_t utime, stime;
-	int need_lock = 0;
 
 	memset((char *) r, 0, sizeof *r);
 	utime = stime = cputime_zero;
 
-	if (p != current || !thread_group_empty(p))
-		need_lock = 1;
-
-	if (need_lock) {
-		read_lock(&tasklist_lock);
-		if (unlikely(!p->signal)) {
-			read_unlock(&tasklist_lock);
-			return;
-		}
-	} else
-		/* See locking comments above */
-		smp_rmb();
+	rcu_read_lock();
+	if (!lock_task_sighand(p, &flags)) {
+		rcu_read_unlock();
+		return;
+	}
 
 	switch (who) {
 		case RUSAGE_BOTH:
 		case RUSAGE_CHILDREN:
-			spin_lock_irqsave(&p->sighand->siglock, flags);
 			utime = p->signal->cutime;
 			stime = p->signal->cstime;
 			r->ru_nvcsw = p->signal->cnvcsw;
 			r->ru_nivcsw = p->signal->cnivcsw;
 			r->ru_minflt = p->signal->cmin_flt;
 			r->ru_majflt = p->signal->cmaj_flt;
-			spin_unlock_irqrestore(&p->sighand->siglock, flags);
 
 			if (who == RUSAGE_CHILDREN)
 				break;
@@ -1941,8 +1933,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 			BUG();
 	}
 
-	if (need_lock)
-		read_unlock(&tasklist_lock);
+	unlock_task_sighand(p, &flags);
+	rcu_read_unlock();
+
 	cputime_to_timeval(utime, &r->ru_utime);
 	cputime_to_timeval(stime, &r->ru_stime);
 }
@@ -2057,6 +2050,13 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
 				return -EFAULT;
 			return 0;
 		}
+		case PR_GET_ENDIAN:
+			error = GET_ENDIAN(current, arg2);
+			break;
+		case PR_SET_ENDIAN:
+			error = SET_ENDIAN(current, arg2);
+			break;
+
 		default:
 			error = -EINVAL;
 			break;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 5433195040f1..6991bece67e8 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -87,6 +87,7 @@ cond_syscall(sys_inotify_init);
 cond_syscall(sys_inotify_add_watch);
 cond_syscall(sys_inotify_rm_watch);
 cond_syscall(sys_migrate_pages);
+cond_syscall(sys_move_pages);
 cond_syscall(sys_chown16);
 cond_syscall(sys_fchown16);
 cond_syscall(sys_getegid16);
@@ -132,3 +133,4 @@ cond_syscall(sys_mincore);
 cond_syscall(sys_madvise);
 cond_syscall(sys_mremap);
 cond_syscall(sys_remap_file_pages);
+cond_syscall(compat_sys_move_pages);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e82726faeeff..2c0e65819448 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -59,6 +59,7 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
 extern int C_A_D;
 extern int sysctl_overcommit_memory;
 extern int sysctl_overcommit_ratio;
+extern int sysctl_panic_on_oom;
 extern int max_threads;
 extern int sysrq_enabled;
 extern int core_uses_pid;
@@ -142,7 +143,6 @@ static struct ctl_table_header root_table_header =
 
 static ctl_table kern_table[];
 static ctl_table vm_table[];
-static ctl_table proc_table[];
 static ctl_table fs_table[];
 static ctl_table debug_table[];
 static ctl_table dev_table[];
@@ -150,7 +150,7 @@ extern ctl_table random_table[];
 #ifdef CONFIG_UNIX98_PTYS
 extern ctl_table pty_table[];
 #endif
-#ifdef CONFIG_INOTIFY
+#ifdef CONFIG_INOTIFY_USER
 extern ctl_table inotify_table[];
 #endif
 
@@ -202,12 +202,6 @@ static ctl_table root_table[] = {
 	},
 #endif
 	{
-		.ctl_name	= CTL_PROC,
-		.procname	= "proc",
-		.mode		= 0555,
-		.child		= proc_table,
-	},
-	{
 		.ctl_name	= CTL_FS,
 		.procname	= "fs",
 		.mode		= 0555,
@@ -398,7 +392,7 @@ static ctl_table kern_table[] = {
 		.strategy	= &sysctl_string,
 	},
 #endif
-#ifdef CONFIG_HOTPLUG
+#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
 	{
 		.ctl_name	= KERN_HOTPLUG,
 		.procname	= "hotplug",
@@ -702,6 +696,14 @@ static ctl_table vm_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 	{
+		.ctl_name	= VM_PANIC_ON_OOM,
+		.procname	= "panic_on_oom",
+		.data		= &sysctl_panic_on_oom,
+		.maxlen		= sizeof(sysctl_panic_on_oom),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
 		.ctl_name	= VM_OVERCOMMIT_RATIO,
 		.procname	= "overcommit_ratio",
 		.data		= &sysctl_overcommit_ratio,
@@ -918,10 +920,6 @@ static ctl_table vm_table[] = {
 	{ .ctl_name = 0 }
 };
 
-static ctl_table proc_table[] = {
-	{ .ctl_name = 0 }
-};
-
 static ctl_table fs_table[] = {
 	{
 		.ctl_name	= FS_NRINODE,
@@ -1028,7 +1026,7 @@ static ctl_table fs_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_doulongvec_minmax,
 	},
-#ifdef CONFIG_INOTIFY
+#ifdef CONFIG_INOTIFY_USER
 	{
 		.ctl_name	= FS_INOTIFY,
 		.procname	= "inotify",
diff --git a/kernel/timer.c b/kernel/timer.c
index 9e49deed468c..eb97371b87d8 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -146,7 +146,7 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
 void fastcall init_timer(struct timer_list *timer)
 {
 	timer->entry.next = NULL;
-	timer->base = per_cpu(tvec_bases, raw_smp_processor_id());
+	timer->base = __raw_get_cpu_var(tvec_bases);
 }
 EXPORT_SYMBOL(init_timer);
 
@@ -383,23 +383,19 @@ EXPORT_SYMBOL(del_timer_sync);
 static int cascade(tvec_base_t *base, tvec_t *tv, int index)
 {
 	/* cascade all the timers from tv up one level */
-	struct list_head *head, *curr;
+	struct timer_list *timer, *tmp;
+	struct list_head tv_list;
+
+	list_replace_init(tv->vec + index, &tv_list);
 
-	head = tv->vec + index;
-	curr = head->next;
 	/*
-	 * We are removing _all_ timers from the list, so we don't  have to
-	 * detach them individually, just clear the list afterwards.
+	 * We are removing _all_ timers from the list, so we
+	 * don't have to detach them individually.
 	 */
-	while (curr != head) {
-		struct timer_list *tmp;
-
-		tmp = list_entry(curr, struct timer_list, entry);
-		BUG_ON(tmp->base != base);
-		curr = curr->next;
-		internal_add_timer(base, tmp);
+	list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
+		BUG_ON(timer->base != base);
+		internal_add_timer(base, timer);
 	}
-	INIT_LIST_HEAD(head);
 
 	return index;
 }
@@ -419,10 +415,10 @@ static inline void __run_timers(tvec_base_t *base)
 
 	spin_lock_irq(&base->lock);
 	while (time_after_eq(jiffies, base->timer_jiffies)) {
-		struct list_head work_list = LIST_HEAD_INIT(work_list);
+		struct list_head work_list;
 		struct list_head *head = &work_list;
  		int index = base->timer_jiffies & TVR_MASK;
- 
+
 		/*
 		 * Cascade timers:
 		 */
@@ -431,8 +427,8 @@ static inline void __run_timers(tvec_base_t *base)
 				(!cascade(base, &base->tv3, INDEX(1))) &&
 					!cascade(base, &base->tv4, INDEX(2)))
 			cascade(base, &base->tv5, INDEX(3));
-		++base->timer_jiffies; 
-		list_splice_init(base->tv1.vec + index, &work_list);
+		++base->timer_jiffies;
+		list_replace_init(base->tv1.vec + index, &work_list);
 		while (!list_empty(head)) {
 			void (*fn)(unsigned long);
 			unsigned long data;
diff --git a/kernel/user.c b/kernel/user.c
index 2116642f42c6..6408c0424291 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -140,7 +140,7 @@ struct user_struct * alloc_uid(uid_t uid)
 		atomic_set(&new->processes, 0);
 		atomic_set(&new->files, 0);
 		atomic_set(&new->sigpending, 0);
-#ifdef CONFIG_INOTIFY
+#ifdef CONFIG_INOTIFY_USER
 		atomic_set(&new->inotify_watches, 0);
 		atomic_set(&new->inotify_devs, 0);
 #endif
@@ -148,7 +148,7 @@ struct user_struct * alloc_uid(uid_t uid)
 		new->mq_bytes = 0;
 		new->locked_shm = 0;
 
-		if (alloc_uid_keyring(new) < 0) {
+		if (alloc_uid_keyring(new, current) < 0) {
 			kmem_cache_free(uid_cachep, new);
 			return NULL;
 		}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 880fb415a8f6..565cf7a1febd 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -428,22 +428,34 @@ int schedule_delayed_work_on(int cpu,
 	return ret;
 }
 
-int schedule_on_each_cpu(void (*func) (void *info), void *info)
+/**
+ * schedule_on_each_cpu - call a function on each online CPU from keventd
+ * @func: the function to call
+ * @info: a pointer to pass to func()
+ *
+ * Returns zero on success.
+ * Returns -ve errno on failure.
+ *
+ * Appears to be racy against CPU hotplug.
+ *
+ * schedule_on_each_cpu() is very slow.
+ */
+int schedule_on_each_cpu(void (*func)(void *info), void *info)
 {
 	int cpu;
-	struct work_struct *work;
+	struct work_struct *works;
 
-	work = kmalloc(NR_CPUS * sizeof(struct work_struct), GFP_KERNEL);
-
-	if (!work)
+	works = alloc_percpu(struct work_struct);
+	if (!works)
 		return -ENOMEM;
+
 	for_each_online_cpu(cpu) {
-		INIT_WORK(work + cpu, func, info);
+		INIT_WORK(per_cpu_ptr(works, cpu), func, info);
 		__queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu),
-				work + cpu);
+				per_cpu_ptr(works, cpu));
 	}
 	flush_workqueue(keventd_wq);
-	kfree(work);
+	free_percpu(works);
 	return 0;
 }
 
@@ -531,11 +543,11 @@ int current_is_keventd(void)
 static void take_over_work(struct workqueue_struct *wq, unsigned int cpu)
 {
 	struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
-	LIST_HEAD(list);
+	struct list_head list;
 	struct work_struct *work;
 
 	spin_lock_irq(&cwq->lock);
-	list_splice_init(&cwq->worklist, &list);
+	list_replace_init(&cwq->worklist, &list);
 
 	while (!list_empty(&list)) {
 		printk("Taking work for %s\n", wq->name);
@@ -578,6 +590,8 @@ static int workqueue_cpu_callback(struct notifier_block *nfb,
 
 	case CPU_UP_CANCELED:
 		list_for_each_entry(wq, &workqueues, list) {
+			if (!per_cpu_ptr(wq->cpu_wq, hotcpu)->thread)
+				continue;
 			/* Unbind so it can run. */
 			kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread,
 				     any_online_cpu(cpu_online_map));