diff options
Diffstat (limited to 'kernel')
50 files changed, 647 insertions, 223 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index d06467fc8f7c..eca595e2fd52 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ - async.o range.o jump_label.o + async.o range.o obj-y += groups.o ifdef CONFIG_FUNCTION_TRACER @@ -107,6 +107,7 @@ obj-$(CONFIG_PERF_EVENTS) += events/ obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o obj-$(CONFIG_PADATA) += padata.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_JUMP_LABEL) += jump_label.o ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is diff --git a/kernel/audit.c b/kernel/audit.c index 52501b5d4902..0a1355ca3d79 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -43,7 +43,7 @@ #include <linux/init.h> #include <asm/types.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/slab.h> diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 00d79df03e76..ce4b054acee5 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -44,7 +44,7 @@ #include <linux/init.h> #include <asm/types.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <linux/fs.h> #include <linux/namei.h> #include <linux/mm.h> diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a63507b92ca4..1d2b6ceea95d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -27,9 +27,11 @@ */ #include <linux/cgroup.h> +#include <linux/cred.h> #include <linux/ctype.h> #include <linux/errno.h> #include <linux/fs.h> +#include <linux/init_task.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/mm.h> @@ -59,7 +61,7 @@ #include <linux/poll.h> #include <linux/flex_array.h> /* used in cgroup_attach_proc */ -#include <asm/atomic.h> +#include <linux/atomic.h> static DEFINE_MUTEX(cgroup_mutex); @@ -1514,6 +1516,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, struct cgroup *root_cgrp = &root->top_cgroup; struct inode *inode; struct cgroupfs_root *existing_root; + const struct cred *cred; int i; BUG_ON(sb->s_root != NULL); @@ -1593,7 +1596,9 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, BUG_ON(!list_empty(&root_cgrp->children)); BUG_ON(root->number_of_cgroups != 1); + cred = override_creds(&init_cred); cgroup_populate_dir(root_cgrp); + revert_creds(cred); mutex_unlock(&cgroup_mutex); mutex_unlock(&inode->i_mutex); } else { diff --git a/kernel/compat.c b/kernel/compat.c index 18197ae2d465..e2435ee9993a 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -158,6 +158,7 @@ int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user __put_user(ts->tv_sec, &cts->tv_sec) || __put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0; } +EXPORT_SYMBOL_GPL(put_compat_timespec); static long compat_nanosleep_restart(struct restart_block *restart) { @@ -992,11 +993,8 @@ asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat sigset_from_compat(&newset, &newset32); sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP)); - spin_lock_irq(¤t->sighand->siglock); current->saved_sigmask = current->blocked; - current->blocked = newset; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + set_current_blocked(&newset); current->state = TASK_INTERRUPTIBLE; schedule(); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 9c9b7545c810..10131fdaff70 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -55,7 +55,7 @@ #include <linux/sort.h> #include <asm/uaccess.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <linux/mutex.h> #include <linux/workqueue.h> #include <linux/cgroup.h> @@ -2460,11 +2460,19 @@ static int cpuset_spread_node(int *rotor) int cpuset_mem_spread_node(void) { + if (current->cpuset_mem_spread_rotor == NUMA_NO_NODE) + current->cpuset_mem_spread_rotor = + node_random(¤t->mems_allowed); + return cpuset_spread_node(¤t->cpuset_mem_spread_rotor); } int cpuset_slab_spread_node(void) { + if (current->cpuset_slab_spread_rotor == NUMA_NO_NODE) + current->cpuset_slab_spread_rotor = + node_random(¤t->mems_allowed); + return cpuset_spread_node(¤t->cpuset_slab_spread_rotor); } diff --git a/kernel/cred.c b/kernel/cred.c index 174fa84eca30..8ef31f53c44c 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -508,10 +508,8 @@ int commit_creds(struct cred *new) key_fsgid_changed(task); /* do it - * - What if a process setreuid()'s and this brings the - * new uid over his NPROC rlimit? We can check this now - * cheaply with the new uid cache, so if it matters - * we should be checking for it. -DaveM + * RLIMIT_NPROC limits on user->processes have already been checked + * in set_user(). */ alter_cred_subscribers(new, 2); if (new->user != old->user) diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index bad6786dee88..0d7c08784efb 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -51,7 +51,7 @@ #include <asm/cacheflush.h> #include <asm/byteorder.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <asm/system.h> #include "debug_core.h" diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index a11db956dd62..34872482315e 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -42,6 +42,8 @@ /* Our I/O buffers. */ static char remcom_in_buffer[BUFMAX]; static char remcom_out_buffer[BUFMAX]; +static int gdbstub_use_prev_in_buf; +static int gdbstub_prev_in_buf_pos; /* Storage for the registers, in GDB format. */ static unsigned long gdb_regs[(NUMREGBYTES + @@ -58,6 +60,13 @@ static int gdbstub_read_wait(void) int ret = -1; int i; + if (unlikely(gdbstub_use_prev_in_buf)) { + if (gdbstub_prev_in_buf_pos < gdbstub_use_prev_in_buf) + return remcom_in_buffer[gdbstub_prev_in_buf_pos++]; + else + gdbstub_use_prev_in_buf = 0; + } + /* poll any additional I/O interfaces that are defined */ while (ret < 0) for (i = 0; kdb_poll_funcs[i] != NULL; i++) { @@ -109,7 +118,6 @@ static void get_packet(char *buffer) buffer[count] = ch; count = count + 1; } - buffer[count] = 0; if (ch == '#') { xmitcsum = hex_to_bin(gdbstub_read_wait()) << 4; @@ -124,6 +132,7 @@ static void get_packet(char *buffer) if (dbg_io_ops->flush) dbg_io_ops->flush(); } + buffer[count] = 0; } while (checksum != xmitcsum); } @@ -1082,12 +1091,11 @@ int gdbstub_state(struct kgdb_state *ks, char *cmd) case 'c': strcpy(remcom_in_buffer, cmd); return 0; - case '?': - gdb_cmd_status(ks); - break; - case '\0': - strcpy(remcom_out_buffer, ""); - break; + case '$': + strcpy(remcom_in_buffer, cmd); + gdbstub_use_prev_in_buf = strlen(remcom_in_buffer); + gdbstub_prev_in_buf_pos = 0; + return 0; } dbg_io_ops->write_char('+'); put_packet(remcom_out_buffer); diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index 2f62fe85f16a..7179eac7b41c 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c @@ -112,9 +112,8 @@ kdb_bt(int argc, const char **argv) unsigned long addr; long offset; - kdbgetintenv("BTARGS", &argcount); /* Arguments to print */ - kdbgetintenv("BTAPROMPT", &btaprompt); /* Prompt after each - * proc in bta */ + /* Prompt after each proc in bta */ + kdbgetintenv("BTAPROMPT", &btaprompt); if (strcmp(argv[0], "bta") == 0) { struct task_struct *g, *p; diff --git a/kernel/debug/kdb/kdb_cmds b/kernel/debug/kdb/kdb_cmds index 56c88e4db309..9834ad303ab6 100644 --- a/kernel/debug/kdb/kdb_cmds +++ b/kernel/debug/kdb/kdb_cmds @@ -18,16 +18,12 @@ defcmd dumpcommon "" "Common kdb debugging" endefcmd defcmd dumpall "" "First line debugging" - set BTSYMARG 1 - set BTARGS 9 pid R -dumpcommon -bta endefcmd defcmd dumpcpu "" "Same as dumpall but only tasks on cpus" - set BTSYMARG 1 - set BTARGS 9 pid R -dumpcommon -btc diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c index dd0b1b7dd02c..d9ca9aa481ec 100644 --- a/kernel/debug/kdb/kdb_debugger.c +++ b/kernel/debug/kdb/kdb_debugger.c @@ -30,6 +30,8 @@ EXPORT_SYMBOL_GPL(kdb_poll_funcs); int kdb_poll_idx = 1; EXPORT_SYMBOL_GPL(kdb_poll_idx); +static struct kgdb_state *kdb_ks; + int kdb_stub(struct kgdb_state *ks) { int error = 0; @@ -39,6 +41,7 @@ int kdb_stub(struct kgdb_state *ks) kdb_dbtrap_t db_result = KDB_DB_NOBPT; int i; + kdb_ks = ks; if (KDB_STATE(REENTRY)) { reason = KDB_REASON_SWITCH; KDB_STATE_CLEAR(REENTRY); @@ -123,20 +126,8 @@ int kdb_stub(struct kgdb_state *ks) KDB_STATE_CLEAR(PAGER); kdbnearsym_cleanup(); if (error == KDB_CMD_KGDB) { - if (KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2)) { - /* - * This inteface glue which allows kdb to transition in into - * the gdb stub. In order to do this the '?' or '' gdb serial - * packet response is processed here. And then control is - * passed to the gdbstub. - */ - if (KDB_STATE(DOING_KGDB)) - gdbstub_state(ks, "?"); - else - gdbstub_state(ks, ""); + if (KDB_STATE(DOING_KGDB)) KDB_STATE_CLEAR(DOING_KGDB); - KDB_STATE_CLEAR(DOING_KGDB2); - } return DBG_PASS_EVENT; } kdb_bp_install(ks->linux_regs); @@ -166,3 +157,7 @@ int kdb_stub(struct kgdb_state *ks) return kgdb_info[ks->cpu].ret_state; } +void kdb_gdb_state_pass(char *buf) +{ + gdbstub_state(kdb_ks, buf); +} diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 96fdaac46a80..4802eb5840e1 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -31,15 +31,21 @@ char kdb_prompt_str[CMD_BUFLEN]; int kdb_trap_printk; -static void kgdb_transition_check(char *buffer) +static int kgdb_transition_check(char *buffer) { - int slen = strlen(buffer); - if (strncmp(buffer, "$?#3f", slen) != 0 && - strncmp(buffer, "$qSupported#37", slen) != 0 && - strncmp(buffer, "+$qSupported#37", slen) != 0) { + if (buffer[0] != '+' && buffer[0] != '$') { KDB_STATE_SET(KGDB_TRANS); kdb_printf("%s", buffer); + } else { + int slen = strlen(buffer); + if (slen > 3 && buffer[slen - 3] == '#') { + kdb_gdb_state_pass(buffer); + strcpy(buffer, "kgdb"); + KDB_STATE_SET(DOING_KGDB); + return 1; + } } + return 0; } static int kdb_read_get_key(char *buffer, size_t bufsize) @@ -251,6 +257,10 @@ poll_again: case 13: /* enter */ *lastchar++ = '\n'; *lastchar++ = '\0'; + if (!KDB_STATE(KGDB_TRANS)) { + KDB_STATE_SET(KGDB_TRANS); + kdb_printf("%s", buffer); + } kdb_printf("\n"); return buffer; case 4: /* Del */ @@ -382,22 +392,26 @@ poll_again: * printed characters if we think that * kgdb is connecting, until the check * fails */ - if (!KDB_STATE(KGDB_TRANS)) - kgdb_transition_check(buffer); - else + if (!KDB_STATE(KGDB_TRANS)) { + if (kgdb_transition_check(buffer)) + return buffer; + } else { kdb_printf("%c", key); + } } /* Special escape to kgdb */ if (lastchar - buffer >= 5 && strcmp(lastchar - 5, "$?#3f") == 0) { + kdb_gdb_state_pass(lastchar - 5); strcpy(buffer, "kgdb"); KDB_STATE_SET(DOING_KGDB); return buffer; } - if (lastchar - buffer >= 14 && - strcmp(lastchar - 14, "$qSupported#37") == 0) { + if (lastchar - buffer >= 11 && + strcmp(lastchar - 11, "$qSupported") == 0) { + kdb_gdb_state_pass(lastchar - 11); strcpy(buffer, "kgdb"); - KDB_STATE_SET(DOING_KGDB2); + KDB_STATE_SET(DOING_KGDB); return buffer; } } diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index be14779bcef6..63786e71a3cd 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -145,7 +145,6 @@ static char *__env[] = { #endif "RADIX=16", "MDCOUNT=8", /* lines of md output */ - "BTARGS=9", /* 9 possible args in bt */ KDB_PLATFORM_ENV, "DTABCOUNT=30", "NOSECT=1", @@ -172,6 +171,7 @@ static char *__env[] = { (char *)0, (char *)0, (char *)0, + (char *)0, }; static const int __nenv = (sizeof(__env) / sizeof(char *)); @@ -1386,7 +1386,7 @@ int kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error, } if (result == KDB_CMD_KGDB) { - if (!(KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2))) + if (!KDB_STATE(DOING_KGDB)) kdb_printf("Entering please attach debugger " "or use $D#44+ or $3#33\n"); break; diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 35d69ed1dfb5..e381d105b40b 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -21,7 +21,6 @@ #define KDB_CMD_SS (-1003) #define KDB_CMD_SSB (-1004) #define KDB_CMD_KGDB (-1005) -#define KDB_CMD_KGDB2 (-1006) /* Internal debug flags */ #define KDB_DEBUG_FLAG_BP 0x0002 /* Breakpoint subsystem debug */ @@ -146,7 +145,6 @@ extern int kdb_state; * keyboard on this cpu */ #define KDB_STATE_KEXEC 0x00040000 /* kexec issued */ #define KDB_STATE_DOING_KGDB 0x00080000 /* kgdb enter now issued */ -#define KDB_STATE_DOING_KGDB2 0x00100000 /* kgdb enter now issued */ #define KDB_STATE_KGDB_TRANS 0x00200000 /* Transition to kgdb */ #define KDB_STATE_ARCH 0xff000000 /* Reserved for arch * specific use */ @@ -218,6 +216,7 @@ extern void kdb_print_nameval(const char *name, unsigned long val); extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); extern void kdb_meminfo_proc_show(void); extern char *kdb_getstr(char *, size_t, char *); +extern void kdb_gdb_state_pass(char *buf); /* Defines for kdb_symbol_print */ #define KDB_SP_SPACEB 0x0001 /* Space before string */ diff --git a/kernel/events/core.c b/kernel/events/core.c index b8785e26ee1c..0f857782d06f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -399,14 +399,54 @@ void perf_cgroup_switch(struct task_struct *task, int mode) local_irq_restore(flags); } -static inline void perf_cgroup_sched_out(struct task_struct *task) +static inline void perf_cgroup_sched_out(struct task_struct *task, + struct task_struct *next) { - perf_cgroup_switch(task, PERF_CGROUP_SWOUT); + struct perf_cgroup *cgrp1; + struct perf_cgroup *cgrp2 = NULL; + + /* + * we come here when we know perf_cgroup_events > 0 + */ + cgrp1 = perf_cgroup_from_task(task); + + /* + * next is NULL when called from perf_event_enable_on_exec() + * that will systematically cause a cgroup_switch() + */ + if (next) + cgrp2 = perf_cgroup_from_task(next); + + /* + * only schedule out current cgroup events if we know + * that we are switching to a different cgroup. Otherwise, + * do no touch the cgroup events. + */ + if (cgrp1 != cgrp2) + perf_cgroup_switch(task, PERF_CGROUP_SWOUT); } -static inline void perf_cgroup_sched_in(struct task_struct *task) +static inline void perf_cgroup_sched_in(struct task_struct *prev, + struct task_struct *task) { - perf_cgroup_switch(task, PERF_CGROUP_SWIN); + struct perf_cgroup *cgrp1; + struct perf_cgroup *cgrp2 = NULL; + + /* + * we come here when we know perf_cgroup_events > 0 + */ + cgrp1 = perf_cgroup_from_task(task); + + /* prev can never be NULL */ + cgrp2 = perf_cgroup_from_task(prev); + + /* + * only need to schedule in cgroup events if we are changing + * cgroup during ctxsw. Cgroup events were not scheduled + * out of ctxsw out if that was not the case. + */ + if (cgrp1 != cgrp2) + perf_cgroup_switch(task, PERF_CGROUP_SWIN); } static inline int perf_cgroup_connect(int fd, struct perf_event *event, @@ -518,11 +558,13 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) { } -static inline void perf_cgroup_sched_out(struct task_struct *task) +static inline void perf_cgroup_sched_out(struct task_struct *task, + struct task_struct *next) { } -static inline void perf_cgroup_sched_in(struct task_struct *task) +static inline void perf_cgroup_sched_in(struct task_struct *prev, + struct task_struct *task) { } @@ -1988,7 +2030,7 @@ void __perf_event_task_sched_out(struct task_struct *task, * cgroup event are system-wide mode only */ if (atomic_read(&__get_cpu_var(perf_cgroup_events))) - perf_cgroup_sched_out(task); + perf_cgroup_sched_out(task, next); } static void task_ctx_sched_out(struct perf_event_context *ctx) @@ -2153,7 +2195,8 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, * accessing the event control register. If a NMI hits, then it will * keep the event running. */ -void __perf_event_task_sched_in(struct task_struct *task) +void __perf_event_task_sched_in(struct task_struct *prev, + struct task_struct *task) { struct perf_event_context *ctx; int ctxn; @@ -2171,7 +2214,7 @@ void __perf_event_task_sched_in(struct task_struct *task) * cgroup event are system-wide mode only */ if (atomic_read(&__get_cpu_var(perf_cgroup_events))) - perf_cgroup_sched_in(task); + perf_cgroup_sched_in(prev, task); } static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) @@ -2427,7 +2470,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx) * ctxswin cgroup events which are already scheduled * in. */ - perf_cgroup_sched_out(current); + perf_cgroup_sched_out(current, NULL); raw_spin_lock(&ctx->lock); task_ctx_sched_out(ctx); @@ -3353,8 +3396,8 @@ static int perf_event_index(struct perf_event *event) } static void calc_timer_values(struct perf_event *event, - u64 *running, - u64 *enabled) + u64 *enabled, + u64 *running) { u64 now, ctx_time; diff --git a/kernel/exit.c b/kernel/exit.c index 9ee58bb9e60f..2913b3509d42 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -980,6 +980,7 @@ NORET_TYPE void do_exit(long code) trace_sched_process_exit(tsk); exit_sem(tsk); + exit_shm(tsk); exit_files(tsk); exit_fs(tsk); check_stack_usage(); diff --git a/kernel/fork.c b/kernel/fork.c index 17bf7c8d6511..8e6b6f4fb272 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -80,7 +80,7 @@ * Protected counters by write_lock_irq(&tasklist_lock) */ unsigned long total_forks; /* Handle normal Linux uptimes. */ -int nr_threads; /* The idle threads do not count.. */ +int nr_threads; /* The idle threads do not count.. */ int max_threads; /* tunable limit on nr_threads */ @@ -232,7 +232,7 @@ void __init fork_init(unsigned long mempages) /* * we need to allow at least 20 threads to boot a system */ - if(max_threads < 20) + if (max_threads < 20) max_threads = 20; init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; @@ -268,7 +268,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) return NULL; } - err = arch_dup_task_struct(tsk, orig); + err = arch_dup_task_struct(tsk, orig); if (err) goto out; @@ -288,8 +288,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) tsk->stack_canary = get_random_int(); #endif - /* One for us, one for whoever does the "release_task()" (usually parent) */ - atomic_set(&tsk->usage,2); + /* + * One for us, one for whoever does the "release_task()" (usually + * parent) + */ + atomic_set(&tsk->usage, 2); #ifdef CONFIG_BLK_DEV_IO_TRACE tsk->btrace_seq = 0; #endif @@ -437,7 +440,7 @@ fail_nomem: goto out; } -static inline int mm_alloc_pgd(struct mm_struct * mm) +static inline int mm_alloc_pgd(struct mm_struct *mm) { mm->pgd = pgd_alloc(mm); if (unlikely(!mm->pgd)) @@ -445,7 +448,7 @@ static inline int mm_alloc_pgd(struct mm_struct * mm) return 0; } -static inline void mm_free_pgd(struct mm_struct * mm) +static inline void mm_free_pgd(struct mm_struct *mm) { pgd_free(mm, mm->pgd); } @@ -482,7 +485,7 @@ static void mm_init_aio(struct mm_struct *mm) #endif } -static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) +static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) { atomic_set(&mm->mm_users, 1); atomic_set(&mm->mm_count, 1); @@ -513,9 +516,9 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) /* * Allocate and initialize an mm_struct. */ -struct mm_struct * mm_alloc(void) +struct mm_struct *mm_alloc(void) { - struct mm_struct * mm; + struct mm_struct *mm; mm = allocate_mm(); if (!mm) @@ -583,7 +586,7 @@ void added_exe_file_vma(struct mm_struct *mm) void removed_exe_file_vma(struct mm_struct *mm) { mm->num_exe_file_vmas--; - if ((mm->num_exe_file_vmas == 0) && mm->exe_file){ + if ((mm->num_exe_file_vmas == 0) && mm->exe_file) { fput(mm->exe_file); mm->exe_file = NULL; } @@ -775,9 +778,9 @@ fail_nocontext: return NULL; } -static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) +static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) { - struct mm_struct * mm, *oldmm; + struct mm_struct *mm, *oldmm; int retval; tsk->min_flt = tsk->maj_flt = 0; @@ -844,7 +847,7 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) return 0; } -static int copy_files(unsigned long clone_flags, struct task_struct * tsk) +static int copy_files(unsigned long clone_flags, struct task_struct *tsk) { struct files_struct *oldf, *newf; int error = 0; @@ -1108,6 +1111,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->real_cred->user != INIT_USER) goto bad_fork_free; } + current->flags &= ~PF_NPROC_EXCEEDED; retval = copy_creds(p, clone_flags); if (retval < 0) @@ -1166,13 +1170,17 @@ static struct task_struct *copy_process(unsigned long clone_flags, cgroup_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); - if (IS_ERR(p->mempolicy)) { - retval = PTR_ERR(p->mempolicy); - p->mempolicy = NULL; - goto bad_fork_cleanup_cgroup; - } + if (IS_ERR(p->mempolicy)) { + retval = PTR_ERR(p->mempolicy); + p->mempolicy = NULL; + goto bad_fork_cleanup_cgroup; + } mpol_fix_fork_child_flag(p); #endif +#ifdef CONFIG_CPUSETS + p->cpuset_mem_spread_rotor = NUMA_NO_NODE; + p->cpuset_slab_spread_rotor = NUMA_NO_NODE; +#endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW @@ -1212,25 +1220,33 @@ static struct task_struct *copy_process(unsigned long clone_flags, retval = perf_event_init_task(p); if (retval) goto bad_fork_cleanup_policy; - - if ((retval = audit_alloc(p))) + retval = audit_alloc(p); + if (retval) goto bad_fork_cleanup_policy; /* copy all the process information */ - if ((retval = copy_semundo(clone_flags, p))) + retval = copy_semundo(clone_flags, p); + if (retval) goto bad_fork_cleanup_audit; - if ((retval = copy_files(clone_flags, p))) + retval = copy_files(clone_flags, p); + if (retval) goto bad_fork_cleanup_semundo; - if ((retval = copy_fs(clone_flags, p))) + retval = copy_fs(clone_flags, p); + if (retval) goto bad_fork_cleanup_files; - if ((retval = copy_sighand(clone_flags, p))) + retval = copy_sighand(clone_flags, p); + if (retval) goto bad_fork_cleanup_fs; - if ((retval = copy_signal(clone_flags, p))) + retval = copy_signal(clone_flags, p); + if (retval) goto bad_fork_cleanup_sighand; - if ((retval = copy_mm(clone_flags, p))) + retval = copy_mm(clone_flags, p); + if (retval) goto bad_fork_cleanup_signal; - if ((retval = copy_namespaces(clone_flags, p))) + retval = copy_namespaces(clone_flags, p); + if (retval) goto bad_fork_cleanup_mm; - if ((retval = copy_io(clone_flags, p))) + retval = copy_io(clone_flags, p); + if (retval) goto bad_fork_cleanup_namespaces; retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); if (retval) @@ -1252,7 +1268,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, /* * Clear TID on mm_release()? */ - p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; + p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; #ifdef CONFIG_BLOCK p->plug = NULL; #endif @@ -1320,7 +1336,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). - */ + */ recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); @@ -1681,12 +1697,14 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) */ if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) do_sysvsem = 1; - if ((err = unshare_fs(unshare_flags, &new_fs))) + err = unshare_fs(unshare_flags, &new_fs); + if (err) goto bad_unshare_out; - if ((err = unshare_fd(unshare_flags, &new_fd))) + err = unshare_fd(unshare_flags, &new_fd); + if (err) goto bad_unshare_cleanup_fs; - if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, - new_fs))) + err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs); + if (err) goto bad_unshare_cleanup_fd; if (new_fs || new_fd || do_sysvsem || new_nsproxy) { diff --git a/kernel/futex.c b/kernel/futex.c index 557c9c7cdd88..1511dff0cfd6 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -218,6 +218,8 @@ static void drop_futex_key_refs(union futex_key *key) * @uaddr: virtual address of the futex * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED * @key: address where result is stored. + * @rw: mapping needs to be read/write (values: VERIFY_READ, + * VERIFY_WRITE) * * Returns a negative error code or 0 * The key words are stored in *key on success. @@ -229,12 +231,12 @@ static void drop_futex_key_refs(union futex_key *key) * lock_page() might sleep, the caller should not hold a spinlock. */ static int -get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) +get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) { unsigned long address = (unsigned long)uaddr; struct mm_struct *mm = current->mm; struct page *page, *page_head; - int err; + int err, ro = 0; /* * The futex address must be "naturally" aligned. @@ -262,8 +264,18 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) again: err = get_user_pages_fast(address, 1, 1, &page); + /* + * If write access is not required (eg. FUTEX_WAIT), try + * and get read-only access. + */ + if (err == -EFAULT && rw == VERIFY_READ) { + err = get_user_pages_fast(address, 1, 0, &page); + ro = 1; + } if (err < 0) return err; + else + err = 0; #ifdef CONFIG_TRANSPARENT_HUGEPAGE page_head = page; @@ -305,6 +317,13 @@ again: if (!page_head->mapping) { unlock_page(page_head); put_page(page_head); + /* + * ZERO_PAGE pages don't have a mapping. Avoid a busy loop + * trying to find one. RW mapping would have COW'd (and thus + * have a mapping) so this page is RO and won't ever change. + */ + if ((page_head == ZERO_PAGE(address))) + return -EFAULT; goto again; } @@ -316,6 +335,15 @@ again: * the object not the particular process. */ if (PageAnon(page_head)) { + /* + * A RO anonymous page will never change and thus doesn't make + * sense for futex operations. + */ + if (ro) { + err = -EFAULT; + goto out; + } + key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ key->private.mm = mm; key->private.address = address; @@ -327,9 +355,10 @@ again: get_futex_key_refs(key); +out: unlock_page(page_head); put_page(page_head); - return 0; + return err; } static inline void put_futex_key(union futex_key *key) @@ -940,7 +969,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) if (!bitset) return -EINVAL; - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); + ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ); if (unlikely(ret != 0)) goto out; @@ -986,10 +1015,10 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, int ret, op_ret; retry: - ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1); + ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); if (unlikely(ret != 0)) goto out; - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); + ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); if (unlikely(ret != 0)) goto out_put_key1; @@ -1243,10 +1272,11 @@ retry: pi_state = NULL; } - ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1); + ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); if (unlikely(ret != 0)) goto out; - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); + ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, + requeue_pi ? VERIFY_WRITE : VERIFY_READ); if (unlikely(ret != 0)) goto out_put_key1; @@ -1790,7 +1820,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, * while the syscall executes. */ retry: - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key); + ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ); if (unlikely(ret != 0)) return ret; @@ -1941,7 +1971,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect, } retry: - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key); + ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE); if (unlikely(ret != 0)) goto out; @@ -2060,7 +2090,7 @@ retry: if ((uval & FUTEX_TID_MASK) != vpid) return -EPERM; - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); + ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE); if (unlikely(ret != 0)) goto out; @@ -2249,7 +2279,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, debug_rt_mutex_init_waiter(&rt_waiter); rt_waiter.task = NULL; - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); + ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); if (unlikely(ret != 0)) goto out; diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index 5bf924d80b5c..a92028196cc1 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -3,7 +3,7 @@ menu "GCOV-based kernel profiling" config GCOV_KERNEL bool "Enable gcov-based kernel profiling" depends on DEBUG_FS - select CONSTRUCTORS + select CONSTRUCTORS if !UML default n ---help--- This option enables gcov-based code profiling (e.g. for code coverage diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index d1d051b38e0b..5a38bf4de641 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -52,6 +52,10 @@ config IRQ_EDGE_EOI_HANDLER config GENERIC_IRQ_CHIP bool +# Generic irq_domain hw <--> linux irq number translation +config IRQ_DOMAIN + bool + # Support forced irq threading config IRQ_FORCED_THREADING bool diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index 73290056cfb6..fff17381f0af 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -2,6 +2,7 @@ obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o obj-$(CONFIG_GENERIC_IRQ_CHIP) += generic-chip.o obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o +obj-$(CONFIG_IRQ_DOMAIN) += irqdomain.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o obj-$(CONFIG_PM_SLEEP) += pm.o diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index 3a2cab407b93..e38544dddb18 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -246,7 +246,7 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk, gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask); for (i = gc->irq_base; msk; msk >>= 1, i++) { - if (!msk & 0x01) + if (!(msk & 0x01)) continue; if (flags & IRQ_GC_INIT_NESTED_LOCK) @@ -301,7 +301,7 @@ void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk, raw_spin_unlock(&gc_lock); for (; msk; msk >>= 1, i++) { - if (!msk & 0x01) + if (!(msk & 0x01)) continue; /* Remove handler first. That will mask the irq line */ diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 4c60a50e66b2..039b889ea053 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -70,7 +70,8 @@ static inline void desc_smp_init(struct irq_desc *desc, int node) { } static inline int desc_node(struct irq_desc *desc) { return 0; } #endif -static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) +static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, + struct module *owner) { int cpu; @@ -86,6 +87,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) desc->irq_count = 0; desc->irqs_unhandled = 0; desc->name = NULL; + desc->owner = owner; for_each_possible_cpu(cpu) *per_cpu_ptr(desc->kstat_irqs, cpu) = 0; desc_smp_init(desc, node); @@ -128,7 +130,7 @@ static void free_masks(struct irq_desc *desc) static inline void free_masks(struct irq_desc *desc) { } #endif -static struct irq_desc *alloc_desc(int irq, int node) +static struct irq_desc *alloc_desc(int irq, int node, struct module *owner) { struct irq_desc *desc; gfp_t gfp = GFP_KERNEL; @@ -147,7 +149,7 @@ static struct irq_desc *alloc_desc(int irq, int node) raw_spin_lock_init(&desc->lock); lockdep_set_class(&desc->lock, &irq_desc_lock_class); - desc_set_defaults(irq, desc, node); + desc_set_defaults(irq, desc, node, owner); return desc; @@ -173,13 +175,14 @@ static void free_desc(unsigned int irq) kfree(desc); } -static int alloc_descs(unsigned int start, unsigned int cnt, int node) +static int alloc_descs(unsigned int start, unsigned int cnt, int node, + struct module *owner) { struct irq_desc *desc; int i; for (i = 0; i < cnt; i++) { - desc = alloc_desc(start + i, node); + desc = alloc_desc(start + i, node, owner); if (!desc) goto err; mutex_lock(&sparse_irq_lock); @@ -227,7 +230,7 @@ int __init early_irq_init(void) nr_irqs = initcnt; for (i = 0; i < initcnt; i++) { - desc = alloc_desc(i, node); + desc = alloc_desc(i, node, NULL); set_bit(i, allocated_irqs); irq_insert_desc(i, desc); } @@ -261,7 +264,7 @@ int __init early_irq_init(void) alloc_masks(&desc[i], GFP_KERNEL, node); raw_spin_lock_init(&desc[i].lock); lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); - desc_set_defaults(i, &desc[i], node); + desc_set_defaults(i, &desc[i], node, NULL); } return arch_early_irq_init(); } @@ -276,8 +279,16 @@ static void free_desc(unsigned int irq) dynamic_irq_cleanup(irq); } -static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) +static inline int alloc_descs(unsigned int start, unsigned int cnt, int node, + struct module *owner) { + u32 i; + + for (i = 0; i < cnt; i++) { + struct irq_desc *desc = irq_to_desc(start + i); + + desc->owner = owner; + } return start; } @@ -333,11 +344,13 @@ EXPORT_SYMBOL_GPL(irq_free_descs); * @from: Start the search from this irq number * @cnt: Number of consecutive irqs to allocate. * @node: Preferred node on which the irq descriptor should be allocated + * @owner: Owning module (can be NULL) * * Returns the first irq number or error code */ int __ref -irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node) +__irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, + struct module *owner) { int start, ret; @@ -366,13 +379,13 @@ irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node) bitmap_set(allocated_irqs, start, cnt); mutex_unlock(&sparse_irq_lock); - return alloc_descs(start, cnt, node); + return alloc_descs(start, cnt, node, owner); err: mutex_unlock(&sparse_irq_lock); return ret; } -EXPORT_SYMBOL_GPL(irq_alloc_descs); +EXPORT_SYMBOL_GPL(__irq_alloc_descs); /** * irq_reserve_irqs - mark irqs allocated @@ -440,7 +453,7 @@ void dynamic_irq_cleanup(unsigned int irq) unsigned long flags; raw_spin_lock_irqsave(&desc->lock, flags); - desc_set_defaults(irq, desc, desc_node(desc)); + desc_set_defaults(irq, desc, desc_node(desc), NULL); raw_spin_unlock_irqrestore(&desc->lock, flags); } diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c new file mode 100644 index 000000000000..d5828da3fd38 --- /dev/null +++ b/kernel/irq/irqdomain.c @@ -0,0 +1,180 @@ +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/slab.h> + +static LIST_HEAD(irq_domain_list); +static DEFINE_MUTEX(irq_domain_mutex); + +/** + * irq_domain_add() - Register an irq_domain + * @domain: ptr to initialized irq_domain structure + * + * Registers an irq_domain structure. The irq_domain must at a minimum be + * initialized with an ops structure pointer, and either a ->to_irq hook or + * a valid irq_base value. Everything else is optional. + */ +void irq_domain_add(struct irq_domain *domain) +{ + struct irq_data *d; + int hwirq; + + /* + * This assumes that the irq_domain owner has already allocated + * the irq_descs. This block will be removed when support for dynamic + * allocation of irq_descs is added to irq_domain. + */ + for (hwirq = 0; hwirq < domain->nr_irq; hwirq++) { + d = irq_get_irq_data(irq_domain_to_irq(domain, hwirq)); + if (d || d->domain) { + /* things are broken; just report, don't clean up */ + WARN(1, "error: irq_desc already assigned to a domain"); + return; + } + d->domain = domain; + d->hwirq = hwirq; + } + + mutex_lock(&irq_domain_mutex); + list_add(&domain->list, &irq_domain_list); + mutex_unlock(&irq_domain_mutex); +} + +/** + * irq_domain_del() - Unregister an irq_domain + * @domain: ptr to registered irq_domain. + */ +void irq_domain_del(struct irq_domain *domain) +{ + struct irq_data *d; + int hwirq; + + mutex_lock(&irq_domain_mutex); + list_del(&domain->list); + mutex_unlock(&irq_domain_mutex); + + /* Clear the irq_domain assignments */ + for (hwirq = 0; hwirq < domain->nr_irq; hwirq++) { + d = irq_get_irq_data(irq_domain_to_irq(domain, hwirq)); + d->domain = NULL; + } +} + +#if defined(CONFIG_OF_IRQ) +/** + * irq_create_of_mapping() - Map a linux irq number from a DT interrupt spec + * + * Used by the device tree interrupt mapping code to translate a device tree + * interrupt specifier to a valid linux irq number. Returns either a valid + * linux IRQ number or 0. + * + * When the caller no longer need the irq number returned by this function it + * should arrange to call irq_dispose_mapping(). + */ +unsigned int irq_create_of_mapping(struct device_node *controller, + const u32 *intspec, unsigned int intsize) +{ + struct irq_domain *domain; + unsigned long hwirq; + unsigned int irq, type; + int rc = -EINVAL; + + /* Find a domain which can translate the irq spec */ + mutex_lock(&irq_domain_mutex); + list_for_each_entry(domain, &irq_domain_list, list) { + if (!domain->ops->dt_translate) + continue; + rc = domain->ops->dt_translate(domain, controller, + intspec, intsize, &hwirq, &type); + if (rc == 0) + break; + } + mutex_unlock(&irq_domain_mutex); + + if (rc != 0) + return 0; + + irq = irq_domain_to_irq(domain, hwirq); + if (type != IRQ_TYPE_NONE) + irq_set_irq_type(irq, type); + pr_debug("%s: mapped hwirq=%i to irq=%i, flags=%x\n", + controller->full_name, (int)hwirq, irq, type); + return irq; +} +EXPORT_SYMBOL_GPL(irq_create_of_mapping); + +/** + * irq_dispose_mapping() - Discard a mapping created by irq_create_of_mapping() + * @irq: linux irq number to be discarded + * + * Calling this function indicates the caller no longer needs a reference to + * the linux irq number returned by a prior call to irq_create_of_mapping(). + */ +void irq_dispose_mapping(unsigned int irq) +{ + /* + * nothing yet; will be filled when support for dynamic allocation of + * irq_descs is added to irq_domain + */ +} +EXPORT_SYMBOL_GPL(irq_dispose_mapping); + +int irq_domain_simple_dt_translate(struct irq_domain *d, + struct device_node *controller, + const u32 *intspec, unsigned int intsize, + unsigned long *out_hwirq, unsigned int *out_type) +{ + if (d->of_node != controller) + return -EINVAL; + if (intsize < 1) + return -EINVAL; + + *out_hwirq = intspec[0]; + *out_type = IRQ_TYPE_NONE; + if (intsize > 1) + *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK; + return 0; +} + +struct irq_domain_ops irq_domain_simple_ops = { + .dt_translate = irq_domain_simple_dt_translate, +}; +EXPORT_SYMBOL_GPL(irq_domain_simple_ops); + +/** + * irq_domain_create_simple() - Set up a 'simple' translation range + */ +void irq_domain_add_simple(struct device_node *controller, int irq_base) +{ + struct irq_domain *domain; + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) { + WARN_ON(1); + return; + } + + domain->irq_base = irq_base; + domain->of_node = of_node_get(controller); + domain->ops = &irq_domain_simple_ops; + irq_domain_add(domain); +} +EXPORT_SYMBOL_GPL(irq_domain_add_simple); + +void irq_domain_generate_simple(const struct of_device_id *match, + u64 phys_base, unsigned int irq_start) +{ + struct device_node *node; + pr_info("looking for phys_base=%llx, irq_start=%i\n", + (unsigned long long) phys_base, (int) irq_start); + node = of_find_matching_node_by_address(NULL, match, phys_base); + if (node) + irq_domain_add_simple(node, irq_start); + else + pr_info("no node found\n"); +} +EXPORT_SYMBOL_GPL(irq_domain_generate_simple); +#endif /* CONFIG_OF_IRQ */ diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0a7840aeb0fb..9b956fa20308 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -883,6 +883,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (desc->irq_data.chip == &no_irq_chip) return -ENOSYS; + if (!try_module_get(desc->owner)) + return -ENODEV; /* * Some drivers like serial.c use request_irq() heavily, * so we have to be careful not to interfere with a @@ -906,8 +908,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) */ nested = irq_settings_is_nested_thread(desc); if (nested) { - if (!new->thread_fn) - return -EINVAL; + if (!new->thread_fn) { + ret = -EINVAL; + goto out_mput; + } /* * Replace the primary handler which was provided from * the driver for non nested interrupt handling by the @@ -929,8 +933,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) t = kthread_create(irq_thread, new, "irq/%d-%s", irq, new->name); - if (IS_ERR(t)) - return PTR_ERR(t); + if (IS_ERR(t)) { + ret = PTR_ERR(t); + goto out_mput; + } /* * We keep the reference to the task struct even if * the thread dies to avoid that the interrupt code @@ -1095,6 +1101,8 @@ out_thread: kthread_stop(t); put_task_struct(t); } +out_mput: + module_put(desc->owner); return ret; } @@ -1203,6 +1211,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) put_task_struct(action->thread); } + module_put(desc->owner); return action; } diff --git a/kernel/kmod.c b/kernel/kmod.c index 47613dfb7b28..ddc7644c1305 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -274,7 +274,7 @@ static void __call_usermodehelper(struct work_struct *work) * (used for preventing user land processes from being created after the user * land has been frozen during a system-wide hibernation or suspend operation). */ -static int usermodehelper_disabled; +static int usermodehelper_disabled = 1; /* Number of helpers running */ static atomic_t running_helpers = ATOMIC_INIT(0); diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 3956f5149e25..91d67ce3a8d5 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -2468,7 +2468,7 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark) BUG_ON(usage_bit >= LOCK_USAGE_STATES); - if (hlock_class(hlock)->key == &__lockdep_no_validate__) + if (hlock_class(hlock)->key == __lockdep_no_validate__.subkeys) continue; if (!mark_lock(curr, hlock, usage_bit)) @@ -2485,23 +2485,9 @@ static void __trace_hardirqs_on_caller(unsigned long ip) { struct task_struct *curr = current; - if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled))) - return; - - if (unlikely(curr->hardirqs_enabled)) { - /* - * Neither irq nor preemption are disabled here - * so this is racy by nature but losing one hit - * in a stat is not a big deal. - */ - __debug_atomic_inc(redundant_hardirqs_on); - return; - } /* we'll do an OFF -> ON transition: */ curr->hardirqs_enabled = 1; - if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) - return; /* * We are going to turn hardirqs on, so set the * usage bit for all held locks: @@ -2529,9 +2515,25 @@ void trace_hardirqs_on_caller(unsigned long ip) if (unlikely(!debug_locks || current->lockdep_recursion)) return; + if (unlikely(current->hardirqs_enabled)) { + /* + * Neither irq nor preemption are disabled here + * so this is racy by nature but losing one hit + * in a stat is not a big deal. + */ + __debug_atomic_inc(redundant_hardirqs_on); + return; + } + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) return; + if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled))) + return; + + if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) + return; + current->lockdep_recursion = 1; __trace_hardirqs_on_caller(ip); current->lockdep_recursion = 0; @@ -2872,10 +2874,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, void lockdep_init_map(struct lockdep_map *lock, const char *name, struct lock_class_key *key, int subclass) { - int i; - - for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++) - lock->class_cache[i] = NULL; + memset(lock, 0, sizeof(*lock)); #ifdef CONFIG_LOCK_STAT lock->cpu = raw_smp_processor_id(); @@ -3112,7 +3111,13 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock) if (!class) class = look_up_lock_class(lock, 0); - if (DEBUG_LOCKS_WARN_ON(!class)) + /* + * If look_up_lock_class() failed to find a class, we're trying + * to test if we hold a lock that has never yet been acquired. + * Clearly if the lock hasn't been acquired _ever_, we're not + * holding it either, so report failure. + */ + if (!class) return 0; if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock)) diff --git a/kernel/panic.c b/kernel/panic.c index 69231670eb95..d7bb6974efb5 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -119,6 +119,8 @@ NORET_TYPE void panic(const char * fmt, ...) } mdelay(PANIC_TIMER_STEP); } + } + if (panic_timeout != 0) { /* * This will not be a clean reboot, with everything * shutting down. But if there is a chance of diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index b1914cb9095c..3744c594b19b 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -231,3 +231,7 @@ config PM_CLK config PM_GENERIC_DOMAINS bool depends on PM + +config PM_GENERIC_DOMAINS_RUNTIME + def_bool y + depends on PM_RUNTIME && PM_GENERIC_DOMAINS diff --git a/kernel/printk.c b/kernel/printk.c index 37dff3429adb..28a40d8171b8 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -318,8 +318,10 @@ static int check_syslog_permissions(int type, bool from_file) return 0; /* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */ if (capable(CAP_SYS_ADMIN)) { - WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN " - "but no CAP_SYSLOG (deprecated).\n"); + printk_once(KERN_WARNING "%s (%d): " + "Attempt to access syslog with CAP_SYS_ADMIN " + "but no CAP_SYSLOG (deprecated).\n", + current->comm, task_pid_nr(current)); return 0; } return -EPERM; @@ -1602,7 +1604,7 @@ static int __init printk_late_init(void) struct console *con; for_each_console(con) { - if (con->flags & CON_BOOT) { + if (!keep_bootcon && con->flags & CON_BOOT) { printk(KERN_INFO "turn off boot console %s%d\n", con->name, con->index); unregister_console(con); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 7784bd216b6a..ddddb320be61 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -37,7 +37,7 @@ #include <linux/smp.h> #include <linux/interrupt.h> #include <linux/sched.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <linux/bitops.h> #include <linux/percpu.h> #include <linux/notifier.h> diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index ced72102adc2..98f51b13bb7e 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -33,7 +33,7 @@ #include <linux/rcupdate.h> #include <linux/interrupt.h> #include <linux/sched.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <linux/bitops.h> #include <linux/completion.h> #include <linux/moduleparam.h> diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 4e144876dc68..3b0c0986afc0 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -31,7 +31,7 @@ #include <linux/rcupdate.h> #include <linux/interrupt.h> #include <linux/sched.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <linux/bitops.h> #include <linux/module.h> #include <linux/completion.h> diff --git a/kernel/resource.c b/kernel/resource.c index 3ff40178dce7..3b3cedc52592 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -553,6 +553,27 @@ int allocate_resource(struct resource *root, struct resource *new, EXPORT_SYMBOL(allocate_resource); +/** + * lookup_resource - find an existing resource by a resource start address + * @root: root resource descriptor + * @start: resource start address + * + * Returns a pointer to the resource if found, NULL otherwise + */ +struct resource *lookup_resource(struct resource *root, resource_size_t start) +{ + struct resource *res; + + read_lock(&resource_lock); + for (res = root->child; res; res = res->sibling) { + if (res->start == start) + break; + } + read_unlock(&resource_lock); + + return res; +} + /* * Insert a resource into the resource tree. If successful, return NULL, * otherwise return the conflicting resource (compare to __request_resource()) diff --git a/kernel/rwsem.c b/kernel/rwsem.c index 176e5e56ffab..9f48f3d82e9b 100644 --- a/kernel/rwsem.c +++ b/kernel/rwsem.c @@ -11,7 +11,7 @@ #include <linux/rwsem.h> #include <asm/system.h> -#include <asm/atomic.h> +#include <linux/atomic.h> /* * lock for reading diff --git a/kernel/sched.c b/kernel/sched.c index ccacdbdecf45..ec5f472bc5b9 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3065,7 +3065,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW local_irq_disable(); #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ - perf_event_task_sched_in(current); + perf_event_task_sched_in(prev, current); #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW local_irq_enable(); #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ @@ -4279,9 +4279,9 @@ pick_next_task(struct rq *rq) } /* - * schedule() is the main scheduler function. + * __schedule() is the main scheduler function. */ -asmlinkage void __sched schedule(void) +static void __sched __schedule(void) { struct task_struct *prev, *next; unsigned long *switch_count; @@ -4322,16 +4322,6 @@ need_resched: if (to_wakeup) try_to_wake_up_local(to_wakeup); } - - /* - * If we are going to sleep and we have plugged IO - * queued, make sure to submit it to avoid deadlocks. - */ - if (blk_needs_flush_plug(prev)) { - raw_spin_unlock(&rq->lock); - blk_schedule_flush_plug(prev); - raw_spin_lock(&rq->lock); - } } switch_count = &prev->nvcsw; } @@ -4369,6 +4359,26 @@ need_resched: if (need_resched()) goto need_resched; } + +static inline void sched_submit_work(struct task_struct *tsk) +{ + if (!tsk->state) + return; + /* + * If we are going to sleep and we have plugged IO queued, + * make sure to submit it to avoid deadlocks. + */ + if (blk_needs_flush_plug(tsk)) + blk_schedule_flush_plug(tsk); +} + +asmlinkage void schedule(void) +{ + struct task_struct *tsk = current; + + sched_submit_work(tsk); + __schedule(); +} EXPORT_SYMBOL(schedule); #ifdef CONFIG_MUTEX_SPIN_ON_OWNER @@ -4435,7 +4445,7 @@ asmlinkage void __sched notrace preempt_schedule(void) do { add_preempt_count_notrace(PREEMPT_ACTIVE); - schedule(); + __schedule(); sub_preempt_count_notrace(PREEMPT_ACTIVE); /* @@ -4463,7 +4473,7 @@ asmlinkage void __sched preempt_schedule_irq(void) do { add_preempt_count(PREEMPT_ACTIVE); local_irq_enable(); - schedule(); + __schedule(); local_irq_disable(); sub_preempt_count(PREEMPT_ACTIVE); @@ -5588,7 +5598,7 @@ static inline int should_resched(void) static void __cond_resched(void) { add_preempt_count(PREEMPT_ACTIVE); - schedule(); + __schedule(); sub_preempt_count(PREEMPT_ACTIVE); } @@ -7443,6 +7453,7 @@ static void __sdt_free(const struct cpumask *cpu_map) struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j); if (sd && (sd->flags & SD_OVERLAP)) free_sched_groups(sd->groups, 0); + kfree(*per_cpu_ptr(sdd->sd, j)); kfree(*per_cpu_ptr(sdd->sg, j)); kfree(*per_cpu_ptr(sdd->sgp, j)); } diff --git a/kernel/signal.c b/kernel/signal.c index d7f70aed1cc0..291c9700be75 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3102,15 +3102,11 @@ SYSCALL_DEFINE0(sgetmask) SYSCALL_DEFINE1(ssetmask, int, newmask) { - int old; - - spin_lock_irq(¤t->sighand->siglock); - old = current->blocked.sig[0]; + int old = current->blocked.sig[0]; + sigset_t newset; - siginitset(¤t->blocked, newmask & ~(sigmask(SIGKILL)| - sigmask(SIGSTOP))); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + siginitset(&newset, newmask & ~(sigmask(SIGKILL) | sigmask(SIGSTOP))); + set_current_blocked(&newset); return old; } @@ -3167,11 +3163,8 @@ SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize) return -EFAULT; sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP)); - spin_lock_irq(¤t->sighand->siglock); current->saved_sigmask = current->blocked; - current->blocked = newset; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + set_current_blocked(&newset); current->state = TASK_INTERRUPTIBLE; schedule(); diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index c1124752e1d3..ba5070ce5765 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -19,7 +19,7 @@ #include <linux/interrupt.h> #include <linux/kallsyms.h> -#include <asm/atomic.h> +#include <linux/atomic.h> /* * Structure to determine completion condition and record errors. May diff --git a/kernel/sys.c b/kernel/sys.c index a101ba36c444..18ee1d2f6474 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -37,6 +37,8 @@ #include <linux/fs_struct.h> #include <linux/gfp.h> #include <linux/syscore_ops.h> +#include <linux/version.h> +#include <linux/ctype.h> #include <linux/compat.h> #include <linux/syscalls.h> @@ -44,6 +46,8 @@ #include <linux/user_namespace.h> #include <linux/kmsg_dump.h> +/* Move somewhere else to avoid recompiling? */ +#include <generated/utsrelease.h> #include <asm/uaccess.h> #include <asm/io.h> @@ -621,11 +625,18 @@ static int set_user(struct cred *new) if (!new_user) return -EAGAIN; + /* + * We don't fail in case of NPROC limit excess here because too many + * poorly written programs don't check set*uid() return code, assuming + * it never fails if called by root. We may still enforce NPROC limit + * for programs doing set*uid()+execve() by harmlessly deferring the + * failure to the execve() stage. + */ if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) && - new_user != INIT_USER) { - free_uid(new_user); - return -EAGAIN; - } + new_user != INIT_USER) + current->flags |= PF_NPROC_EXCEEDED; + else + current->flags &= ~PF_NPROC_EXCEEDED; free_uid(new->user); new->user = new_user; @@ -1154,6 +1165,34 @@ DECLARE_RWSEM(uts_sem); #define override_architecture(name) 0 #endif +/* + * Work around broken programs that cannot handle "Linux 3.0". + * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 + */ +static int override_release(char __user *release, int len) +{ + int ret = 0; + char buf[len]; + + if (current->personality & UNAME26) { + char *rest = UTS_RELEASE; + int ndots = 0; + unsigned v; + + while (*rest) { + if (*rest == '.' && ++ndots >= 3) + break; + if (!isdigit(*rest) && *rest != '.') + break; + rest++; + } + v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40; + snprintf(buf, len, "2.6.%u%s", v, rest); + ret = copy_to_user(release, buf, len); + } + return ret; +} + SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) { int errno = 0; @@ -1163,6 +1202,8 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) errno = -EFAULT; up_read(&uts_sem); + if (!errno && override_release(name->release, sizeof(name->release))) + errno = -EFAULT; if (!errno && override_architecture(name)) errno = -EFAULT; return errno; @@ -1184,6 +1225,8 @@ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) error = -EFAULT; up_read(&uts_sem); + if (!error && override_release(name->release, sizeof(name->release))) + error = -EFAULT; if (!error && override_architecture(name)) error = -EFAULT; return error; @@ -1218,6 +1261,8 @@ SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) if (!error && override_architecture(name)) error = -EFAULT; + if (!error && override_release(name->release, sizeof(name->release))) + error = -EFAULT; return error ? -EFAULT : 0; } #endif diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 62cbc8877fef..a9a5de07c4f1 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -16,7 +16,6 @@ asmlinkage long sys_ni_syscall(void) return -ENOSYS; } -cond_syscall(sys_nfsservctl); cond_syscall(sys_quotactl); cond_syscall(sys32_quotactl); cond_syscall(sys_acct); diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 3b8e028b9601..e8bffbe2ba4b 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -1,6 +1,6 @@ #include <linux/stat.h> #include <linux/sysctl.h> -#include "../fs/xfs/linux-2.6/xfs_sysctl.h" +#include "../fs/xfs/xfs_sysctl.h" #include <linux/sunrpc/debug.h> #include <linux/string.h> #include <net/ip_vs.h> diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index 4e4932a7b360..362da653813d 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c @@ -1,6 +1,6 @@ #include <linux/stat.h> #include <linux/sysctl.h> -#include "../fs/xfs/linux-2.6/xfs_sysctl.h" +#include "../fs/xfs/xfs_sysctl.h" #include <linux/sunrpc/debug.h> #include <linux/string.h> #include <net/ip_vs.h> diff --git a/kernel/taskstats.c b/kernel/taskstats.c index fc0f22005417..e19ce1454ee1 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -28,7 +28,7 @@ #include <linux/fs.h> #include <linux/file.h> #include <net/genetlink.h> -#include <asm/atomic.h> +#include <linux/atomic.h> /* * Maximum length of a cpumask that can be specified in @@ -291,30 +291,28 @@ static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) if (!cpumask_subset(mask, cpu_possible_mask)) return -EINVAL; - s = NULL; if (isadd == REGISTER) { for_each_cpu(cpu, mask) { - if (!s) - s = kmalloc_node(sizeof(struct listener), - GFP_KERNEL, cpu_to_node(cpu)); + s = kmalloc_node(sizeof(struct listener), + GFP_KERNEL, cpu_to_node(cpu)); if (!s) goto cleanup; + s->pid = pid; - INIT_LIST_HEAD(&s->list); s->valid = 1; listeners = &per_cpu(listener_array, cpu); down_write(&listeners->sem); - list_for_each_entry_safe(s2, tmp, &listeners->list, list) { - if (s2->pid == pid) - goto next_cpu; + list_for_each_entry(s2, &listeners->list, list) { + if (s2->pid == pid && s2->valid) + goto exists; } list_add(&s->list, &listeners->list); s = NULL; -next_cpu: +exists: up_write(&listeners->sem); + kfree(s); /* nop if NULL */ } - kfree(s); return 0; } diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 59f369f98a04..ea5e1a928d5b 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -441,6 +441,8 @@ static int alarm_timer_create(struct k_itimer *new_timer) static void alarm_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) { + memset(cur_setting, 0, sizeof(struct itimerspec)); + cur_setting->it_interval = ktime_to_timespec(timr->it.alarmtimer.period); cur_setting->it_value = @@ -479,11 +481,17 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, if (!rtcdev) return -ENOTSUPP; - /* Save old values */ - old_setting->it_interval = - ktime_to_timespec(timr->it.alarmtimer.period); - old_setting->it_value = - ktime_to_timespec(timr->it.alarmtimer.node.expires); + /* + * XXX HACK! Currently we can DOS a system if the interval + * period on alarmtimers is too small. Cap the interval here + * to 100us and solve this properly in a future patch! -jstultz + */ + if ((new_setting->it_interval.tv_sec == 0) && + (new_setting->it_interval.tv_nsec < 100000)) + new_setting->it_interval.tv_nsec = 100000; + + if (old_setting) + alarm_timer_get(timr, old_setting); /* If the timer was already set, cancel it */ alarm_cancel(&timr->it.alarmtimer); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 2ad39e556cb4..cd3134510f3d 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -82,7 +82,7 @@ config EVENT_POWER_TRACING_DEPRECATED power:power_frequency This is for userspace compatibility and will vanish after 5 kernel iterations, - namely 2.6.41. + namely 3.1. config CONTEXT_SWITCH_TRACER bool diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 6957aa298dfa..7c910a5593a6 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -206,6 +206,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, what |= MASK_TC_BIT(rw, RAHEAD); what |= MASK_TC_BIT(rw, META); what |= MASK_TC_BIT(rw, DISCARD); + what |= MASK_TC_BIT(rw, FLUSH); + what |= MASK_TC_BIT(rw, FUA); pid = tsk->pid; if (act_log_check(bt, what, sector, pid)) @@ -1054,6 +1056,9 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) goto out; } + if (tc & BLK_TC_FLUSH) + rwbs[i++] = 'F'; + if (tc & BLK_TC_DISCARD) rwbs[i++] = 'D'; else if (tc & BLK_TC_WRITE) @@ -1063,10 +1068,10 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) else rwbs[i++] = 'N'; + if (tc & BLK_TC_FUA) + rwbs[i++] = 'F'; if (tc & BLK_TC_AHEAD) rwbs[i++] = 'A'; - if (tc & BLK_TC_BARRIER) - rwbs[i++] = 'B'; if (tc & BLK_TC_SYNC) rwbs[i++] = 'S'; if (tc & BLK_TC_META) @@ -1132,7 +1137,7 @@ typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); static int blk_log_action_classic(struct trace_iterator *iter, const char *act) { - char rwbs[6]; + char rwbs[RWBS_LEN]; unsigned long long ts = iter->ts; unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); unsigned secs = (unsigned long)ts; @@ -1148,7 +1153,7 @@ static int blk_log_action_classic(struct trace_iterator *iter, const char *act) static int blk_log_action(struct trace_iterator *iter, const char *act) { - char rwbs[6]; + char rwbs[RWBS_LEN]; const struct blk_io_trace *t = te_blk_io_trace(iter->ent); fill_rwbs(rwbs, t); @@ -1561,7 +1566,7 @@ static const struct { } mask_maps[] = { { BLK_TC_READ, "read" }, { BLK_TC_WRITE, "write" }, - { BLK_TC_BARRIER, "barrier" }, + { BLK_TC_FLUSH, "flush" }, { BLK_TC_SYNC, "sync" }, { BLK_TC_QUEUE, "queue" }, { BLK_TC_REQUEUE, "requeue" }, @@ -1573,6 +1578,7 @@ static const struct { { BLK_TC_META, "meta" }, { BLK_TC_DISCARD, "discard" }, { BLK_TC_DRV_DATA, "drv_data" }, + { BLK_TC_FUA, "fua" }, }; static int blk_trace_str2mask(const char *str) @@ -1788,6 +1794,9 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) { int i = 0; + if (rw & REQ_FLUSH) + rwbs[i++] = 'F'; + if (rw & WRITE) rwbs[i++] = 'W'; else if (rw & REQ_DISCARD) @@ -1797,6 +1806,8 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) else rwbs[i++] = 'N'; + if (rw & REQ_FUA) + rwbs[i++] = 'F'; if (rw & REQ_RAHEAD) rwbs[i++] = 'A'; if (rw & REQ_SYNC) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3f381d0b20a8..616846bcfee5 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -2,7 +2,7 @@ #define _LINUX_KERNEL_TRACE_H #include <linux/fs.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <linux/sched.h> #include <linux/clocksource.h> #include <linux/ring_buffer.h> diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 017fa376505d..fd3c8aae55e5 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -12,7 +12,7 @@ #include <linux/slab.h> #include <linux/time.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include "trace.h" #include "trace_output.h" diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 25fb1b0e53fa..1783aabc6128 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2412,8 +2412,13 @@ reflush: for_each_cwq_cpu(cpu, wq) { struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); + bool drained; - if (!cwq->nr_active && list_empty(&cwq->delayed_works)) + spin_lock_irq(&cwq->gcwq->lock); + drained = !cwq->nr_active && list_empty(&cwq->delayed_works); + spin_unlock_irq(&cwq->gcwq->lock); + + if (drained) continue; if (++flush_cnt == 10 || |