diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit.h | 13 | ||||
-rw-r--r-- | kernel/auditfilter.c | 48 | ||||
-rw-r--r-- | kernel/auditsc.c | 311 | ||||
-rw-r--r-- | kernel/compat.c | 8 | ||||
-rw-r--r-- | kernel/exit.c | 22 | ||||
-rw-r--r-- | kernel/fork.c | 24 | ||||
-rw-r--r-- | kernel/pid.c | 11 | ||||
-rw-r--r-- | kernel/ptrace.c | 3 | ||||
-rw-r--r-- | kernel/signal.c | 32 | ||||
-rw-r--r-- | kernel/stop_machine.c | 4 | ||||
-rw-r--r-- | kernel/sys.c | 14 | ||||
-rw-r--r-- | kernel/sys_ni.c | 5 | ||||
-rw-r--r-- | kernel/timer.c | 10 |
13 files changed, 381 insertions, 124 deletions
diff --git a/kernel/audit.h b/kernel/audit.h index a3370232a390..815d6f5c04ee 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -83,6 +83,7 @@ struct audit_krule { u32 field_count; char *filterkey; /* ties events to rules */ struct audit_field *fields; + struct audit_field *arch_f; /* quick access to arch field */ struct audit_field *inode_f; /* quick access to an inode field */ struct audit_watch *watch; /* associated watch */ struct list_head rlist; /* entry in audit_watch.rules list */ @@ -131,17 +132,19 @@ extern void audit_handle_ievent(struct inotify_watch *, u32, u32, u32, extern int selinux_audit_rule_update(void); #ifdef CONFIG_AUDITSYSCALL -extern void __audit_signal_info(int sig, struct task_struct *t); -static inline void audit_signal_info(int sig, struct task_struct *t) +extern int __audit_signal_info(int sig, struct task_struct *t); +static inline int audit_signal_info(int sig, struct task_struct *t) { - if (unlikely(audit_pid && t->tgid == audit_pid)) - __audit_signal_info(sig, t); + if (unlikely((audit_pid && t->tgid == audit_pid) || + (audit_signals && !audit_dummy_context()))) + return __audit_signal_info(sig, t); + return 0; } extern enum audit_state audit_filter_inodes(struct task_struct *, struct audit_context *); extern void audit_set_auditable(struct audit_context *); #else -#define audit_signal_info(s,t) +#define audit_signal_info(s,t) AUDIT_DISABLED #define audit_filter_inodes(t,c) AUDIT_DISABLED #define audit_set_auditable(c) #endif diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 3749193aed8c..6c61263ff96d 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -311,6 +311,43 @@ int audit_match_class(int class, unsigned syscall) return classes[class][AUDIT_WORD(syscall)] & AUDIT_BIT(syscall); } +static inline int audit_match_class_bits(int class, u32 *mask) +{ + int i; + + if (classes[class]) { + for (i = 0; i < AUDIT_BITMASK_SIZE; i++) + if (mask[i] & classes[class][i]) + return 0; + } + return 1; +} + +static int audit_match_signal(struct audit_entry *entry) +{ + struct audit_field *arch = entry->rule.arch_f; + + if (!arch) { + /* When arch is unspecified, we must check both masks on biarch + * as syscall number alone is ambiguous. */ + return (audit_match_class_bits(AUDIT_CLASS_SIGNAL, + entry->rule.mask) && + audit_match_class_bits(AUDIT_CLASS_SIGNAL_32, + entry->rule.mask)); + } + + switch(audit_classify_arch(arch->val)) { + case 0: /* native */ + return (audit_match_class_bits(AUDIT_CLASS_SIGNAL, + entry->rule.mask)); + case 1: /* 32bit on biarch */ + return (audit_match_class_bits(AUDIT_CLASS_SIGNAL_32, + entry->rule.mask)); + default: + return 1; + } +} + /* Common user-space to kernel rule translation. */ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) { @@ -429,6 +466,7 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) err = -EINVAL; goto exit_free; } + entry->rule.arch_f = f; break; case AUDIT_PERM: if (f->val & ~15) @@ -519,7 +557,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, case AUDIT_FSGID: case AUDIT_LOGINUID: case AUDIT_PERS: - case AUDIT_ARCH: case AUDIT_MSGTYPE: case AUDIT_PPID: case AUDIT_DEVMAJOR: @@ -531,6 +568,9 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, case AUDIT_ARG2: case AUDIT_ARG3: break; + case AUDIT_ARCH: + entry->rule.arch_f = f; + break; case AUDIT_SUBJ_USER: case AUDIT_SUBJ_ROLE: case AUDIT_SUBJ_TYPE: @@ -1221,6 +1261,9 @@ static inline int audit_add_rule(struct audit_entry *entry, #ifdef CONFIG_AUDITSYSCALL if (!dont_count) audit_n_rules++; + + if (!audit_match_signal(entry)) + audit_signals++; #endif mutex_unlock(&audit_filter_mutex); @@ -1294,6 +1337,9 @@ static inline int audit_del_rule(struct audit_entry *entry, #ifdef CONFIG_AUDITSYSCALL if (!dont_count) audit_n_rules--; + + if (!audit_match_signal(entry)) + audit_signals--; #endif mutex_unlock(&audit_filter_mutex); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 628c7ac590a0..e36481ed61b4 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -78,17 +78,15 @@ extern int audit_enabled; * for saving names from getname(). */ #define AUDIT_NAMES 20 -/* AUDIT_NAMES_RESERVED is the number of slots we reserve in the - * audit_context from being used for nameless inodes from - * path_lookup. */ -#define AUDIT_NAMES_RESERVED 7 - /* Indicates that audit should log the full pathname. */ #define AUDIT_NAME_FULL -1 /* number of audit rules */ int audit_n_rules; +/* determines whether we collect data for signals sent */ +int audit_signals; + /* When fs/namei.c:getname() is called, we store the pointer in name and * we don't let putname() free it (instead we free all of the saved * pointers at syscall exit time). @@ -114,6 +112,9 @@ struct audit_aux_data { #define AUDIT_AUX_IPCPERM 0 +/* Number of target pids per aux struct. */ +#define AUDIT_AUX_PIDS 16 + struct audit_aux_data_mq_open { struct audit_aux_data d; int oflag; @@ -181,6 +182,13 @@ struct audit_aux_data_path { struct vfsmount *mnt; }; +struct audit_aux_data_pids { + struct audit_aux_data d; + pid_t target_pid[AUDIT_AUX_PIDS]; + u32 target_sid[AUDIT_AUX_PIDS]; + int pid_count; +}; + /* The per-task audit context. */ struct audit_context { int dummy; /* must be the first element */ @@ -201,6 +209,7 @@ struct audit_context { struct vfsmount * pwdmnt; struct audit_context *previous; /* For nested syscalls */ struct audit_aux_data *aux; + struct audit_aux_data *aux_pids; /* Save things to print about task_struct */ pid_t pid, ppid; @@ -209,6 +218,9 @@ struct audit_context { unsigned long personality; int arch; + pid_t target_pid; + u32 target_sid; + #if AUDIT_DEBUG int put_count; int ino_count; @@ -654,6 +666,10 @@ static inline void audit_free_aux(struct audit_context *context) context->aux = aux->next; kfree(aux); } + while ((aux = context->aux_pids)) { + context->aux_pids = aux->next; + kfree(aux); + } } static inline void audit_zero_context(struct audit_context *context, @@ -795,6 +811,29 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk audit_log_task_context(ab); } +static int audit_log_pid_context(struct audit_context *context, pid_t pid, + u32 sid) +{ + struct audit_buffer *ab; + char *s = NULL; + u32 len; + int rc = 0; + + ab = audit_log_start(context, GFP_KERNEL, AUDIT_OBJ_PID); + if (!ab) + return 1; + + if (selinux_sid_to_string(sid, &s, &len)) { + audit_log_format(ab, "opid=%d obj=(none)", pid); + rc = 1; + } else + audit_log_format(ab, "opid=%d obj=%s", pid, s); + audit_log_end(ab); + kfree(s); + + return rc; +} + static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) { int i, call_panic = 0; @@ -973,6 +1012,21 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts audit_log_end(ab); } + for (aux = context->aux_pids; aux; aux = aux->next) { + struct audit_aux_data_pids *axs = (void *)aux; + int i; + + for (i = 0; i < axs->pid_count; i++) + if (audit_log_pid_context(context, axs->target_pid[i], + axs->target_sid[i])) + call_panic = 1; + } + + if (context->target_pid && + audit_log_pid_context(context, context->target_pid, + context->target_sid)) + call_panic = 1; + if (context->pwd && context->pwdmnt) { ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD); if (ab) { @@ -1193,6 +1247,10 @@ void audit_syscall_exit(int valid, long return_code) } else { audit_free_names(context); audit_free_aux(context); + context->aux = NULL; + context->aux_pids = NULL; + context->target_pid = 0; + context->target_sid = 0; kfree(context->filterkey); context->filterkey = NULL; tsk->audit_context = context; @@ -1226,6 +1284,7 @@ void __audit_getname(const char *name) context->names[context->name_count].name_len = AUDIT_NAME_FULL; context->names[context->name_count].name_put = 1; context->names[context->name_count].ino = (unsigned long)-1; + context->names[context->name_count].osid = 0; ++context->name_count; if (!context->pwd) { read_lock(¤t->fs->lock); @@ -1279,6 +1338,28 @@ void audit_putname(const char *name) #endif } +static int audit_inc_name_count(struct audit_context *context, + const struct inode *inode) +{ + if (context->name_count >= AUDIT_NAMES) { + if (inode) + printk(KERN_DEBUG "name_count maxed, losing inode data: " + "dev=%02x:%02x, inode=%lu", + MAJOR(inode->i_sb->s_dev), + MINOR(inode->i_sb->s_dev), + inode->i_ino); + + else + printk(KERN_DEBUG "name_count maxed, losing inode data"); + return 1; + } + context->name_count++; +#if AUDIT_DEBUG + context->ino_count++; +#endif + return 0; +} + /* Copy inode data into an audit_names. */ static void audit_copy_inode(struct audit_names *name, const struct inode *inode) { @@ -1316,13 +1397,10 @@ void __audit_inode(const char *name, const struct inode *inode) else { /* FIXME: how much do we care about inodes that have no * associated name? */ - if (context->name_count >= AUDIT_NAMES - AUDIT_NAMES_RESERVED) + if (audit_inc_name_count(context, inode)) return; - idx = context->name_count++; + idx = context->name_count - 1; context->names[idx].name = NULL; -#if AUDIT_DEBUG - ++context->ino_count; -#endif } audit_copy_inode(&context->names[idx], inode); } @@ -1346,7 +1424,7 @@ void __audit_inode_child(const char *dname, const struct inode *inode, { int idx; struct audit_context *context = current->audit_context; - const char *found_name = NULL; + const char *found_parent = NULL, *found_child = NULL; int dirlen = 0; if (!context->in_syscall) @@ -1354,88 +1432,73 @@ void __audit_inode_child(const char *dname, const struct inode *inode, /* determine matching parent */ if (!dname) - goto update_context; - for (idx = 0; idx < context->name_count; idx++) - if (context->names[idx].ino == parent->i_ino) { - const char *name = context->names[idx].name; + goto add_names; - if (!name) - continue; + /* parent is more likely, look for it first */ + for (idx = 0; idx < context->name_count; idx++) { + struct audit_names *n = &context->names[idx]; - if (audit_compare_dname_path(dname, name, &dirlen) == 0) { - context->names[idx].name_len = dirlen; - found_name = name; - break; - } + if (!n->name) + continue; + + if (n->ino == parent->i_ino && + !audit_compare_dname_path(dname, n->name, &dirlen)) { + n->name_len = dirlen; /* update parent data in place */ + found_parent = n->name; + goto add_names; } + } -update_context: - idx = context->name_count; - if (context->name_count == AUDIT_NAMES) { - printk(KERN_DEBUG "name_count maxed and losing %s\n", - found_name ?: "(null)"); - return; + /* no matching parent, look for matching child */ + for (idx = 0; idx < context->name_count; idx++) { + struct audit_names *n = &context->names[idx]; + + if (!n->name) + continue; + + /* strcmp() is the more likely scenario */ + if (!strcmp(dname, n->name) || + !audit_compare_dname_path(dname, n->name, &dirlen)) { + if (inode) + audit_copy_inode(n, inode); + else + n->ino = (unsigned long)-1; + found_child = n->name; + goto add_names; + } } - context->name_count++; -#if AUDIT_DEBUG - context->ino_count++; -#endif - /* Re-use the name belonging to the slot for a matching parent directory. - * All names for this context are relinquished in audit_free_names() */ - context->names[idx].name = found_name; - context->names[idx].name_len = AUDIT_NAME_FULL; - context->names[idx].name_put = 0; /* don't call __putname() */ - - if (!inode) - context->names[idx].ino = (unsigned long)-1; - else - audit_copy_inode(&context->names[idx], inode); - - /* A parent was not found in audit_names, so copy the inode data for the - * provided parent. */ - if (!found_name) { - idx = context->name_count; - if (context->name_count == AUDIT_NAMES) { - printk(KERN_DEBUG - "name_count maxed and losing parent inode data: dev=%02x:%02x, inode=%lu", - MAJOR(parent->i_sb->s_dev), - MINOR(parent->i_sb->s_dev), - parent->i_ino); + +add_names: + if (!found_parent) { + if (audit_inc_name_count(context, parent)) return; - } - context->name_count++; -#if AUDIT_DEBUG - context->ino_count++; -#endif + idx = context->name_count - 1; + context->names[idx].name = NULL; audit_copy_inode(&context->names[idx], parent); } -} -/** - * audit_inode_update - update inode info for last collected name - * @inode: inode being audited - * - * When open() is called on an existing object with the O_CREAT flag, the inode - * data audit initially collects is incorrect. This additional hook ensures - * audit has the inode data for the actual object to be opened. - */ -void __audit_inode_update(const struct inode *inode) -{ - struct audit_context *context = current->audit_context; - int idx; + if (!found_child) { + if (audit_inc_name_count(context, inode)) + return; + idx = context->name_count - 1; - if (!context->in_syscall || !inode) - return; + /* Re-use the name belonging to the slot for a matching parent + * directory. All names for this context are relinquished in + * audit_free_names() */ + if (found_parent) { + context->names[idx].name = found_parent; + context->names[idx].name_len = AUDIT_NAME_FULL; + /* don't call __putname() */ + context->names[idx].name_put = 0; + } else { + context->names[idx].name = NULL; + } - if (context->name_count == 0) { - context->name_count++; -#if AUDIT_DEBUG - context->ino_count++; -#endif + if (inode) + audit_copy_inode(&context->names[idx], inode); + else + context->names[idx].ino = (unsigned long)-1; } - idx = context->name_count - 1; - - audit_copy_inode(&context->names[idx], inode); } /** @@ -1880,6 +1943,14 @@ int audit_sockaddr(int len, void *a) return 0; } +void __audit_ptrace(struct task_struct *t) +{ + struct audit_context *context = current->audit_context; + + context->target_pid = t->pid; + selinux_get_task_sid(t, &context->target_sid); +} + /** * audit_avc_path - record the granting or denial of permissions * @dentry: dentry to record @@ -1918,15 +1989,17 @@ int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt) * If the audit subsystem is being terminated, record the task (pid) * and uid that is doing that. */ -void __audit_signal_info(int sig, struct task_struct *t) +int __audit_signal_info(int sig, struct task_struct *t) { + struct audit_aux_data_pids *axp; + struct task_struct *tsk = current; + struct audit_context *ctx = tsk->audit_context; extern pid_t audit_sig_pid; extern uid_t audit_sig_uid; extern u32 audit_sig_sid; - if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1) { - struct task_struct *tsk = current; - struct audit_context *ctx = tsk->audit_context; + if (audit_pid && t->tgid == audit_pid && + (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1)) { audit_sig_pid = tsk->pid; if (ctx) audit_sig_uid = ctx->loginuid; @@ -1934,4 +2007,72 @@ void __audit_signal_info(int sig, struct task_struct *t) audit_sig_uid = tsk->uid; selinux_get_task_sid(tsk, &audit_sig_sid); } + + if (!audit_signals) /* audit_context checked in wrapper */ + return 0; + + /* optimize the common case by putting first signal recipient directly + * in audit_context */ + if (!ctx->target_pid) { + ctx->target_pid = t->tgid; + selinux_get_task_sid(t, &ctx->target_sid); + return 0; + } + + axp = (void *)ctx->aux_pids; + if (!axp || axp->pid_count == AUDIT_AUX_PIDS) { + axp = kzalloc(sizeof(*axp), GFP_ATOMIC); + if (!axp) + return -ENOMEM; + + axp->d.type = AUDIT_OBJ_PID; + axp->d.next = ctx->aux_pids; + ctx->aux_pids = (void *)axp; + } + BUG_ON(axp->pid_count > AUDIT_AUX_PIDS); + + axp->target_pid[axp->pid_count] = t->tgid; + selinux_get_task_sid(t, &axp->target_sid[axp->pid_count]); + axp->pid_count++; + + return 0; +} + +/** + * audit_core_dumps - record information about processes that end abnormally + * @sig: signal value + * + * If a process ends with a core dump, something fishy is going on and we + * should record the event for investigation. + */ +void audit_core_dumps(long signr) +{ + struct audit_buffer *ab; + u32 sid; + + if (!audit_enabled) + return; + + if (signr == SIGQUIT) /* don't care for those */ + return; + + ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); + audit_log_format(ab, "auid=%u uid=%u gid=%u", + audit_get_loginuid(current->audit_context), + current->uid, current->gid); + selinux_get_task_sid(current, &sid); + if (sid) { + char *ctx = NULL; + u32 len; + + if (selinux_sid_to_string(sid, &ctx, &len)) + audit_log_format(ab, " ssid=%u", sid); + else + audit_log_format(ab, " subj=%s", ctx); + kfree(ctx); + } + audit_log_format(ab, " pid=%d comm=", current->pid); + audit_log_untrustedstring(ab, current->comm); + audit_log_format(ab, " sig=%ld", signr); + audit_log_end(ab); } diff --git a/kernel/compat.c b/kernel/compat.c index cebb4c28c039..3bae3742c2aa 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -475,8 +475,8 @@ asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, unsigned int len, return min_length; } -static int get_compat_itimerspec(struct itimerspec *dst, - struct compat_itimerspec __user *src) +int get_compat_itimerspec(struct itimerspec *dst, + const struct compat_itimerspec __user *src) { if (get_compat_timespec(&dst->it_interval, &src->it_interval) || get_compat_timespec(&dst->it_value, &src->it_value)) @@ -484,8 +484,8 @@ static int get_compat_itimerspec(struct itimerspec *dst, return 0; } -static int put_compat_itimerspec(struct compat_itimerspec __user *dst, - struct itimerspec *src) +int put_compat_itimerspec(struct compat_itimerspec __user *dst, + const struct itimerspec *src) { if (put_compat_timespec(&src->it_interval, &dst->it_interval) || put_compat_timespec(&src->it_value, &dst->it_value)) diff --git a/kernel/exit.c b/kernel/exit.c index b0c6f0c3a2df..c6d14b8008dd 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -24,6 +24,7 @@ #include <linux/pid_namespace.h> #include <linux/ptrace.h> #include <linux/profile.h> +#include <linux/signalfd.h> #include <linux/mount.h> #include <linux/proc_fs.h> #include <linux/kthread.h> @@ -42,6 +43,7 @@ #include <linux/audit.h> /* for audit_free() */ #include <linux/resource.h> #include <linux/blkdev.h> +#include <linux/task_io_accounting_ops.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -82,6 +84,14 @@ static void __exit_signal(struct task_struct *tsk) sighand = rcu_dereference(tsk->sighand); spin_lock(&sighand->siglock); + /* + * Notify that this sighand has been detached. This must + * be called with the tsk->sighand lock held. Also, this + * access tsk->sighand internally, so it must be called + * before tsk->sighand is reset. + */ + signalfd_detach_locked(tsk); + posix_cpu_timers_exit(tsk); if (atomic_dec_and_test(&sig->count)) posix_cpu_timers_exit_group(tsk); @@ -113,6 +123,8 @@ static void __exit_signal(struct task_struct *tsk) sig->nvcsw += tsk->nvcsw; sig->nivcsw += tsk->nivcsw; sig->sched_time += tsk->sched_time; + sig->inblock += task_io_get_inblock(tsk); + sig->oublock += task_io_get_oublock(tsk); sig = NULL; /* Marker for below. */ } @@ -299,12 +311,12 @@ void __set_special_pids(pid_t session, pid_t pgrp) if (process_session(curr) != session) { detach_pid(curr, PIDTYPE_SID); set_signal_session(curr->signal, session); - attach_pid(curr, PIDTYPE_SID, session); + attach_pid(curr, PIDTYPE_SID, find_pid(session)); } if (process_group(curr) != pgrp) { detach_pid(curr, PIDTYPE_PGID); curr->signal->pgrp = pgrp; - attach_pid(curr, PIDTYPE_PGID, pgrp); + attach_pid(curr, PIDTYPE_PGID, find_pid(pgrp)); } } @@ -1193,6 +1205,12 @@ static int wait_task_zombie(struct task_struct *p, int noreap, p->nvcsw + sig->nvcsw + sig->cnvcsw; psig->cnivcsw += p->nivcsw + sig->nivcsw + sig->cnivcsw; + psig->cinblock += + task_io_get_inblock(p) + + sig->inblock + sig->cinblock; + psig->coublock += + task_io_get_oublock(p) + + sig->oublock + sig->coublock; spin_unlock_irq(&p->parent->sighand->siglock); } diff --git a/kernel/fork.c b/kernel/fork.c index 5dd3979747f5..49530e40ea8b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -875,6 +875,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; + sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; sig->sched_time = 0; INIT_LIST_HEAD(&sig->cpu_timers[0]); INIT_LIST_HEAD(&sig->cpu_timers[1]); @@ -955,7 +956,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr, - int pid) + struct pid *pid) { int retval; struct task_struct *p = NULL; @@ -1022,7 +1023,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); - p->pid = pid; + p->pid = pid_nr(pid); retval = -EFAULT; if (clone_flags & CLONE_PARENT_SETTID) if (put_user(p->pid, parent_tidptr)) @@ -1251,13 +1252,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->signal->tty = current->signal->tty; p->signal->pgrp = process_group(current); set_signal_session(p->signal, process_session(current)); - attach_pid(p, PIDTYPE_PGID, process_group(p)); - attach_pid(p, PIDTYPE_SID, process_session(p)); + attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); + attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail_rcu(&p->tasks, &init_task.tasks); __get_cpu_var(process_counts)++; } - attach_pid(p, PIDTYPE_PID, p->pid); + attach_pid(p, PIDTYPE_PID, pid); nr_threads++; } @@ -1321,7 +1322,8 @@ struct task_struct * __cpuinit fork_idle(int cpu) struct task_struct *task; struct pt_regs regs; - task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, NULL, 0); + task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, NULL, + &init_struct_pid); if (!IS_ERR(task)) init_idle(task, cpu); @@ -1371,7 +1373,7 @@ long do_fork(unsigned long clone_flags, clone_flags |= CLONE_PTRACE; } - p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, nr); + p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid); /* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. @@ -1420,12 +1422,15 @@ long do_fork(unsigned long clone_flags, #define ARCH_MIN_MMSTRUCT_ALIGN 0 #endif -static void sighand_ctor(void *data, struct kmem_cache *cachep, unsigned long flags) +static void sighand_ctor(void *data, struct kmem_cache *cachep, + unsigned long flags) { struct sighand_struct *sighand = data; - if (flags & SLAB_CTOR_CONSTRUCTOR) + if (flags & SLAB_CTOR_CONSTRUCTOR) { spin_lock_init(&sighand->siglock); + INIT_LIST_HEAD(&sighand->signalfd_list); + } } void __init proc_caches_init(void) @@ -1451,7 +1456,6 @@ void __init proc_caches_init(void) SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); } - /* * Check constraints on flags passed to the unshare system call and * force unsharing of additional process context as appropriate. diff --git a/kernel/pid.c b/kernel/pid.c index d3ad724afa83..eb66bd2953ab 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -27,11 +27,13 @@ #include <linux/bootmem.h> #include <linux/hash.h> #include <linux/pid_namespace.h> +#include <linux/init_task.h> #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) static struct hlist_head *pid_hash; static int pidhash_shift; static struct kmem_cache *pid_cachep; +struct pid init_struct_pid = INIT_STRUCT_PID; int pid_max = PID_MAX_DEFAULT; @@ -247,13 +249,16 @@ struct pid * fastcall find_pid(int nr) } EXPORT_SYMBOL_GPL(find_pid); -int fastcall attach_pid(struct task_struct *task, enum pid_type type, int nr) +/* + * attach_pid() must be called with the tasklist_lock write-held. + */ +int fastcall attach_pid(struct task_struct *task, enum pid_type type, + struct pid *pid) { struct pid_link *link; - struct pid *pid; link = &task->pids[type]; - link->pid = pid = find_pid(nr); + link->pid = pid; hlist_add_head_rcu(&link->node, &pid->tasks[type]); return 0; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 4d50e06fd745..ad7949a589dd 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -18,6 +18,7 @@ #include <linux/ptrace.h> #include <linux/security.h> #include <linux/signal.h> +#include <linux/audit.h> #include <asm/pgtable.h> #include <asm/uaccess.h> @@ -161,6 +162,8 @@ int ptrace_attach(struct task_struct *task) { int retval; + audit_ptrace(task); + retval = -EPERM; if (task->pid <= 1) goto out; diff --git a/kernel/signal.c b/kernel/signal.c index 2ac3a668d9dd..364fc95bf97c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -21,6 +21,7 @@ #include <linux/syscalls.h> #include <linux/ptrace.h> #include <linux/signal.h> +#include <linux/signalfd.h> #include <linux/capability.h> #include <linux/freezer.h> #include <linux/pid_namespace.h> @@ -113,8 +114,7 @@ void recalc_sigpending(void) /* Given the mask, find the first available signal that should be serviced. */ -static int -next_signal(struct sigpending *pending, sigset_t *mask) +int next_signal(struct sigpending *pending, sigset_t *mask) { unsigned long i, *s, *m, x; int sig = 0; @@ -497,6 +497,11 @@ static int check_kill_permission(int sig, struct siginfo *info, int error = -EINVAL; if (!valid_signal(sig)) return error; + + error = audit_signal_info(sig, t); /* Let audit system see the signal */ + if (error) + return error; + error = -EPERM; if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info))) && ((sig != SIGCONT) || @@ -506,10 +511,7 @@ static int check_kill_permission(int sig, struct siginfo *info, && !capable(CAP_KILL)) return error; - error = security_task_kill(t, info, sig, 0); - if (!error) - audit_signal_info(sig, t); /* Let audit system see the signal */ - return error; + return security_task_kill(t, info, sig, 0); } /* forward decl */ @@ -630,6 +632,12 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t, int ret = 0; /* + * Deliver the signal to listening signalfds. This must be called + * with the sighand lock held. + */ + signalfd_notify(t, sig); + + /* * fast-pathed signals for kernel-internal things like SIGSTOP * or SIGKILL. */ @@ -1280,6 +1288,11 @@ int send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) ret = 1; goto out; } + /* + * Deliver the signal to listening signalfds. This must be called + * with the sighand lock held. + */ + signalfd_notify(p, sig); list_add_tail(&q->list, &p->pending.list); sigaddset(&p->pending.signal, sig); @@ -1323,6 +1336,11 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) q->info.si_overrun++; goto out; } + /* + * Deliver the signal to listening signalfds. This must be called + * with the sighand lock held. + */ + signalfd_notify(p, sig); /* * Put this signal on the shared-pending queue. @@ -1983,6 +2001,8 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from) /* * If you change siginfo_t structure, please be sure * this code is fixed accordingly. + * Please remember to update the signalfd_copyinfo() function + * inside fs/signalfd.c too, in case siginfo_t changes. * It should never copy any pad contained in the structure * to avoid security leaks, but must copy the generic * 3 ints plus the relevant union member. diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index daabb74ee0bc..fcee2a8e6da3 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -8,6 +8,8 @@ #include <linux/sched.h> #include <linux/stop_machine.h> #include <linux/syscalls.h> +#include <linux/interrupt.h> + #include <asm/atomic.h> #include <asm/semaphore.h> #include <asm/uaccess.h> @@ -45,6 +47,7 @@ static int stopmachine(void *cpu) if (stopmachine_state == STOPMACHINE_DISABLE_IRQ && !irqs_disabled) { local_irq_disable(); + hard_irq_disable(); irqs_disabled = 1; /* Ack: irqs disabled. */ smp_mb(); /* Must read state first. */ @@ -124,6 +127,7 @@ static int stop_machine(void) /* Make them disable irqs. */ local_irq_disable(); + hard_irq_disable(); stopmachine_set_state(STOPMACHINE_DISABLE_IRQ); return 0; diff --git a/kernel/sys.c b/kernel/sys.c index cdb7e9457ba6..872271ccc384 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -14,6 +14,7 @@ #include <linux/prctl.h> #include <linux/highuid.h> #include <linux/fs.h> +#include <linux/resource.h> #include <linux/kernel.h> #include <linux/kexec.h> #include <linux/workqueue.h> @@ -29,6 +30,7 @@ #include <linux/signal.h> #include <linux/cn_proc.h> #include <linux/getcpu.h> +#include <linux/task_io_accounting_ops.h> #include <linux/compat.h> #include <linux/syscalls.h> @@ -658,7 +660,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) int error = -EINVAL; struct pid *pgrp; - if (which > 2 || which < 0) + if (which > PRIO_USER || which < PRIO_PROCESS) goto out; /* normalize: avoid signed division (rounding problems) */ @@ -722,7 +724,7 @@ asmlinkage long sys_getpriority(int which, int who) long niceval, retval = -ESRCH; struct pid *pgrp; - if (which > 2 || which < 0) + if (which > PRIO_USER || which < PRIO_PROCESS) return -EINVAL; read_lock(&tasklist_lock); @@ -1486,7 +1488,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) if (process_group(p) != pgid) { detach_pid(p, PIDTYPE_PGID); p->signal->pgrp = pgid; - attach_pid(p, PIDTYPE_PGID, pgid); + attach_pid(p, PIDTYPE_PGID, find_pid(pgid)); } err = 0; @@ -2082,6 +2084,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) r->ru_nivcsw = p->signal->cnivcsw; r->ru_minflt = p->signal->cmin_flt; r->ru_majflt = p->signal->cmaj_flt; + r->ru_inblock = p->signal->cinblock; + r->ru_oublock = p->signal->coublock; if (who == RUSAGE_CHILDREN) break; @@ -2093,6 +2097,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) r->ru_nivcsw += p->signal->nivcsw; r->ru_minflt += p->signal->min_flt; r->ru_majflt += p->signal->maj_flt; + r->ru_inblock += p->signal->inblock; + r->ru_oublock += p->signal->oublock; t = p; do { utime = cputime_add(utime, t->utime); @@ -2101,6 +2107,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) r->ru_nivcsw += t->nivcsw; r->ru_minflt += t->min_flt; r->ru_majflt += t->maj_flt; + r->ru_inblock += task_io_get_inblock(t); + r->ru_oublock += task_io_get_oublock(t); t = next_thread(t); } while (t != p); break; diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index d7306d0f3dfc..b6d77a8a1ca9 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -141,3 +141,8 @@ cond_syscall(compat_sys_migrate_pages); cond_syscall(sys_bdflush); cond_syscall(sys_ioprio_set); cond_syscall(sys_ioprio_get); + +/* New file descriptors */ +cond_syscall(sys_signalfd); +cond_syscall(sys_timerfd); +cond_syscall(sys_eventfd); diff --git a/kernel/timer.c b/kernel/timer.c index 59a28b1752f8..a6c580ac084b 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -92,24 +92,24 @@ static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases; /* Functions below help us manage 'deferrable' flag */ static inline unsigned int tbase_get_deferrable(tvec_base_t *base) { - return (unsigned int)((unsigned long)base & TBASE_DEFERRABLE_FLAG); + return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG); } static inline tvec_base_t *tbase_get_base(tvec_base_t *base) { - return (tvec_base_t *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG); + return ((tvec_base_t *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG)); } static inline void timer_set_deferrable(struct timer_list *timer) { - timer->base = (tvec_base_t *)((unsigned long)timer->base | - TBASE_DEFERRABLE_FLAG); + timer->base = ((tvec_base_t *)((unsigned long)(timer->base) | + TBASE_DEFERRABLE_FLAG)); } static inline void timer_set_base(struct timer_list *timer, tvec_base_t *new_base) { - timer->base = (tvec_base_t *)((unsigned long)new_base | + timer->base = (tvec_base_t *)((unsigned long)(new_base) | tbase_get_deferrable(timer->base)); } |