From a9862e0560866eadbc59b84867492004da436516 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 19 May 2009 22:49:07 +0200 Subject: Export add_timer_on for modules Needed in followon patch. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- kernel/timer.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/timer.c b/kernel/timer.c index cffffad01c31..e2c47b82ac36 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -756,6 +756,7 @@ void add_timer_on(struct timer_list *timer, int cpu) wake_up_idle_cpu(cpu); spin_unlock_irqrestore(&base->lock, flags); } +EXPORT_SYMBOL_GPL(add_timer_on); /** * del_timer - deactive a timer. -- cgit v1.2.3 From ad6ccfad6f759a5d657dabe2071a8f2a503fcc84 Mon Sep 17 00:00:00 2001 From: Manish Katiyar Date: Tue, 12 May 2009 13:43:35 -0700 Subject: kernel/kallsyms.c: replace deprecated __initcall with device_initcall and fix whitespace Fix coding style whitespace issues and replace __initcall with device_initcall. Fixed multi-line comments as per coding style. Errors as reported by checkpatch.pl :- Before: total: 14 errors, 14 warnings, 487 lines checked After : total: 0 errors, 8 warnings, 507 lines checked Compile tested binary verified as :- Before: text data bss dec hex filename 2405 4 0 2409 969 kernel/kallsyms.o After : text data bss dec hex filename 2405 4 0 2409 969 kernel/kallsyms.o Signed-off-by: Manish Katiyar Signed-off-by: Andrew Morton Signed-off-by: Sam Ravnborg --- kernel/kallsyms.c | 134 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 78 insertions(+), 56 deletions(-) (limited to 'kernel') diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 374faf9bfdc7..3a29dbe7898e 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -30,12 +30,16 @@ #define all_var 0 #endif -/* These will be re-linked against their real values during the second link stage */ +/* + * These will be re-linked against their real values + * during the second link stage. + */ extern const unsigned long kallsyms_addresses[] __attribute__((weak)); extern const u8 kallsyms_names[] __attribute__((weak)); -/* tell the compiler that the count isn't in the small data section if the arch - * has one (eg: FRV) +/* + * Tell the compiler that the count isn't in the small data section if the arch + * has one (eg: FRV). */ extern const unsigned long kallsyms_num_syms __attribute__((weak, section(".rodata"))); @@ -75,31 +79,37 @@ static int is_ksym_addr(unsigned long addr) return is_kernel_text(addr) || is_kernel_inittext(addr); } -/* expand a compressed symbol data into the resulting uncompressed string, - given the offset to where the symbol is in the compressed stream */ +/* + * Expand a compressed symbol data into the resulting uncompressed string, + * given the offset to where the symbol is in the compressed stream. + */ static unsigned int kallsyms_expand_symbol(unsigned int off, char *result) { int len, skipped_first = 0; const u8 *tptr, *data; - /* get the compressed symbol length from the first symbol byte */ + /* Get the compressed symbol length from the first symbol byte. */ data = &kallsyms_names[off]; len = *data; data++; - /* update the offset to return the offset for the next symbol on - * the compressed stream */ + /* + * Update the offset to return the offset for the next symbol on + * the compressed stream. + */ off += len + 1; - /* for every byte on the compressed symbol data, copy the table - entry for that byte */ - while(len) { - tptr = &kallsyms_token_table[ kallsyms_token_index[*data] ]; + /* + * For every byte on the compressed symbol data, copy the table + * entry for that byte. + */ + while (len) { + tptr = &kallsyms_token_table[kallsyms_token_index[*data]]; data++; len--; while (*tptr) { - if(skipped_first) { + if (skipped_first) { *result = *tptr; result++; } else @@ -110,36 +120,46 @@ static unsigned int kallsyms_expand_symbol(unsigned int off, char *result) *result = '\0'; - /* return to offset to the next symbol */ + /* Return to offset to the next symbol. */ return off; } -/* get symbol type information. This is encoded as a single char at the - * begining of the symbol name */ +/* + * Get symbol type information. This is encoded as a single char at the + * beginning of the symbol name. + */ static char kallsyms_get_symbol_type(unsigned int off) { - /* get just the first code, look it up in the token table, and return the - * first char from this token */ - return kallsyms_token_table[ kallsyms_token_index[ kallsyms_names[off+1] ] ]; + /* + * Get just the first code, look it up in the token table, + * and return the first char from this token. + */ + return kallsyms_token_table[kallsyms_token_index[kallsyms_names[off + 1]]]; } -/* find the offset on the compressed stream given and index in the - * kallsyms array */ +/* + * Find the offset on the compressed stream given and index in the + * kallsyms array. + */ static unsigned int get_symbol_offset(unsigned long pos) { const u8 *name; int i; - /* use the closest marker we have. We have markers every 256 positions, - * so that should be close enough */ - name = &kallsyms_names[ kallsyms_markers[pos>>8] ]; + /* + * Use the closest marker we have. We have markers every 256 positions, + * so that should be close enough. + */ + name = &kallsyms_names[kallsyms_markers[pos >> 8]]; - /* sequentially scan all the symbols up to the point we're searching for. - * Every symbol is stored in a [][ bytes of data] format, so we - * just need to add the len to the current pointer for every symbol we - * wish to skip */ - for(i = 0; i < (pos&0xFF); i++) + /* + * Sequentially scan all the symbols up to the point we're searching + * for. Every symbol is stored in a [][ bytes of data] format, + * so we just need to add the len to the current pointer for every + * symbol we wish to skip. + */ + for (i = 0; i < (pos & 0xFF); i++) name = name + (*name) + 1; return name - kallsyms_names; @@ -190,7 +210,7 @@ static unsigned long get_symbol_pos(unsigned long addr, /* This kernel should never had been booted. */ BUG_ON(!kallsyms_addresses); - /* do a binary search on the sorted kallsyms_addresses array */ + /* Do a binary search on the sorted kallsyms_addresses array. */ low = 0; high = kallsyms_num_syms; @@ -203,15 +223,15 @@ static unsigned long get_symbol_pos(unsigned long addr, } /* - * search for the first aliased symbol. Aliased - * symbols are symbols with the same address + * Search for the first aliased symbol. Aliased + * symbols are symbols with the same address. */ while (low && kallsyms_addresses[low-1] == kallsyms_addresses[low]) --low; symbol_start = kallsyms_addresses[low]; - /* Search for next non-aliased symbol */ + /* Search for next non-aliased symbol. */ for (i = low + 1; i < kallsyms_num_syms; i++) { if (kallsyms_addresses[i] > symbol_start) { symbol_end = kallsyms_addresses[i]; @@ -219,7 +239,7 @@ static unsigned long get_symbol_pos(unsigned long addr, } } - /* if we found no next symbol, we use the end of the section */ + /* If we found no next symbol, we use the end of the section. */ if (!symbol_end) { if (is_kernel_inittext(addr)) symbol_end = (unsigned long)_einittext; @@ -252,10 +272,10 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, /* * Lookup an address - * - modname is set to NULL if it's in the kernel - * - we guarantee that the returned name is valid until we reschedule even if - * it resides in a module - * - we also guarantee that modname will be valid until rescheduled + * - modname is set to NULL if it's in the kernel. + * - We guarantee that the returned name is valid until we reschedule even if. + * It resides in a module. + * - We also guarantee that modname will be valid until rescheduled. */ const char *kallsyms_lookup(unsigned long addr, unsigned long *symbolsize, @@ -276,7 +296,7 @@ const char *kallsyms_lookup(unsigned long addr, return namebuf; } - /* see if it's in a module */ + /* See if it's in a module. */ return module_address_lookup(addr, symbolsize, offset, modname, namebuf); } @@ -294,7 +314,7 @@ int lookup_symbol_name(unsigned long addr, char *symname) kallsyms_expand_symbol(get_symbol_offset(pos), symname); return 0; } - /* see if it's in a module */ + /* See if it's in a module. */ return lookup_module_symbol_name(addr, symname); } @@ -313,7 +333,7 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size, modname[0] = '\0'; return 0; } - /* see if it's in a module */ + /* See if it's in a module. */ return lookup_module_symbol_attrs(addr, size, offset, modname, name); } @@ -342,6 +362,7 @@ int sprint_symbol(char *buffer, unsigned long address) return len; } +EXPORT_SYMBOL_GPL(sprint_symbol); /* Look up a kernel symbol and print it to the kernel messages. */ void __print_symbol(const char *fmt, unsigned long address) @@ -352,13 +373,13 @@ void __print_symbol(const char *fmt, unsigned long address) printk(fmt, buffer); } +EXPORT_SYMBOL(__print_symbol); /* To avoid using get_symbol_offset for every symbol, we carry prefix along. */ -struct kallsym_iter -{ +struct kallsym_iter { loff_t pos; unsigned long value; - unsigned int nameoff; /* If iterating in core kernel symbols */ + unsigned int nameoff; /* If iterating in core kernel symbols. */ char type; char name[KSYM_NAME_LEN]; char module_name[MODULE_NAME_LEN]; @@ -404,7 +425,7 @@ static int update_iter(struct kallsym_iter *iter, loff_t pos) iter->pos = pos; return get_ksymbol_mod(iter); } - + /* If we're not on the desired position, reset to new position. */ if (pos != iter->pos) reset_iter(iter, pos); @@ -439,23 +460,25 @@ static int s_show(struct seq_file *m, void *p) { struct kallsym_iter *iter = m->private; - /* Some debugging symbols have no name. Ignore them. */ + /* Some debugging symbols have no name. Ignore them. */ if (!iter->name[0]) return 0; if (iter->module_name[0]) { char type; - /* Label it "global" if it is exported, - * "local" if not exported. */ + /* + * Label it "global" if it is exported, + * "local" if not exported. + */ type = iter->exported ? toupper(iter->type) : tolower(iter->type); seq_printf(m, "%0*lx %c %s\t[%s]\n", - (int)(2*sizeof(void*)), + (int)(2 * sizeof(void *)), iter->value, type, iter->name, iter->module_name); } else seq_printf(m, "%0*lx %c %s\n", - (int)(2*sizeof(void*)), + (int)(2 * sizeof(void *)), iter->value, iter->type, iter->name); return 0; } @@ -469,9 +492,11 @@ static const struct seq_operations kallsyms_op = { static int kallsyms_open(struct inode *inode, struct file *file) { - /* We keep iterator in m->private, since normal case is to + /* + * We keep iterator in m->private, since normal case is to * s_start from where we left off, so we avoid doing - * using get_symbol_offset for every symbol */ + * using get_symbol_offset for every symbol. + */ struct kallsym_iter *iter; int ret; @@ -500,7 +525,4 @@ static int __init kallsyms_init(void) proc_create("kallsyms", 0444, NULL, &kallsyms_operations); return 0; } -__initcall(kallsyms_init); - -EXPORT_SYMBOL(__print_symbol); -EXPORT_SYMBOL_GPL(sprint_symbol); +device_initcall(kallsyms_init); -- cgit v1.2.3 From 28be225b23b115573e0ecc8ef9996f42a1652f74 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 12 Jun 2009 11:33:02 +0300 Subject: irq: slab alloc for default irq_affinity Ingo had [ 0.000000] ------------[ cut here ]------------ [ 0.000000] WARNING: at mm/bootmem.c:537 alloc_arch_preferred_bootmem+0x2b/0x71() [ 0.000000] Hardware name: System Product Name [ 0.000000] Modules linked in: [ 0.000000] Pid: 0, comm: swapper Tainted: G W 2.6.30-tip-03087-g0bb2618-dirty #52506 [ 0.000000] Call Trace: [ 0.000000] [<81032588>] warn_slowpath_common+0x60/0x90 [ 0.000000] [<810325c5>] warn_slowpath_null+0xd/0x10 [ 0.000000] [<819d1bc0>] alloc_arch_preferred_bootmem+0x2b/0x71 [ 0.000000] [<819d1c31>] ___alloc_bootmem_nopanic+0x2b/0x9a [ 0.000000] [<81050a0a>] ? lock_release+0xac/0xb2 [ 0.000000] [<819d1d4c>] ___alloc_bootmem+0xe/0x2d [ 0.000000] [<819d1e9f>] __alloc_bootmem+0xa/0xc [ 0.000000] [<819d7c63>] alloc_bootmem_cpumask_var+0x21/0x26 [ 0.000000] [<819d0cc8>] early_irq_init+0x15/0x10d [ 0.000000] [<819bb75a>] start_kernel+0x167/0x326 [ 0.000000] [<819bb06b>] __init_begin+0x6b/0x70 [ 0.000000] ---[ end trace 4eaa2a86a8e2da23 ]--- [ 0.000000] NR_IRQS:2304 nr_irqs:424 [ 0.000000] CPU 0 irqstacks, hard=821e6000 soft=821e7000 we need to update init_irq_default_affinity Signed-off-by: Yinghai Lu Signed-off-by: Pekka Enberg --- kernel/irq/handle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 104578541230..065205bdd920 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -45,7 +45,7 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc) #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) static void __init init_irq_default_affinity(void) { - alloc_bootmem_cpumask_var(&irq_default_affinity); + alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT); cpumask_setall(irq_default_affinity); } #else -- cgit v1.2.3 From 9a71af2c3627b379b7c31917a7f6ee0d29bc559b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 21:46:53 -0600 Subject: module_param: invbool should take a 'bool', not an 'int' It takes an 'int' for historical reasons, and there are only two users: simply switch it over to bool. The other user (uvesafb.c) will get a (harmless-on-x86) warning until the next patch is applied. Cc: Brad Douglas Cc: Michal Januszewski Signed-off-by: Rusty Russell --- kernel/params.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/params.c b/kernel/params.c index de273ec85bd2..023abbf5f89f 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -272,13 +272,13 @@ int param_set_invbool(const char *val, struct kernel_param *kp) dummy.arg = &boolval; ret = param_set_bool(val, &dummy); if (ret == 0) - *(int *)kp->arg = !boolval; + *(bool *)kp->arg = !boolval; return ret; } int param_get_invbool(char *buffer, struct kernel_param *kp) { - return sprintf(buffer, "%c", (*(int *)kp->arg) ? 'N' : 'Y'); + return sprintf(buffer, "%c", (*(bool *)kp->arg) ? 'N' : 'Y'); } /* We break the rule and mangle the string. */ -- cgit v1.2.3 From 45fcc70c0b6ee0c508e1fdb5fef735c3546803f4 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 21:46:56 -0600 Subject: module_param: split perm field into flags and perm Impact: cleanup Rather than hack KPARAM_KMALLOCED into the perm field, separate it out. Since the perm field was 32 bits and only needs 16, we don't add bloat. Signed-off-by: Rusty Russell --- kernel/params.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/params.c b/kernel/params.c index 023abbf5f89f..b4660dc13dbc 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -24,9 +24,6 @@ #include #include -/* We abuse the high bits of "perm" to record whether we kmalloc'ed. */ -#define KPARAM_KMALLOCED 0x80000000 - #if 0 #define DEBUGP printk #else @@ -220,13 +217,13 @@ int param_set_charp(const char *val, struct kernel_param *kp) return -ENOSPC; } - if (kp->perm & KPARAM_KMALLOCED) + if (kp->flags & KPARAM_KMALLOCED) kfree(*(char **)kp->arg); /* This is a hack. We can't need to strdup in early boot, and we * don't need to; this mangled commandline is preserved. */ if (slab_is_available()) { - kp->perm |= KPARAM_KMALLOCED; + kp->flags |= KPARAM_KMALLOCED; *(char **)kp->arg = kstrdup(val, GFP_KERNEL); if (!kp->arg) return -ENOMEM; @@ -591,7 +588,7 @@ void destroy_params(const struct kernel_param *params, unsigned num) unsigned int i; for (i = 0; i < num; i++) - if (params[i].perm & KPARAM_KMALLOCED) + if (params[i].flags & KPARAM_KMALLOCED) kfree(*(char **)params[i].arg); } -- cgit v1.2.3 From fddd520122953550ec2c8b60e7ca0d0f0d115d97 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 21:46:57 -0600 Subject: module_param: allow 'bool' module_params to be bool, not just int. Impact: API cleanup For historical reasons, 'bool' parameters must be an int, not a bool. But there are around 600 users, so a conversion seems like useless churn. So we use __same_type() to distinguish, and handle both cases. Signed-off-by: Rusty Russell --- kernel/params.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/params.c b/kernel/params.c index b4660dc13dbc..7f6912ced2ba 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -238,35 +238,54 @@ int param_get_charp(char *buffer, struct kernel_param *kp) return sprintf(buffer, "%s", *((char **)kp->arg)); } +/* Actually could be a bool or an int, for historical reasons. */ int param_set_bool(const char *val, struct kernel_param *kp) { + bool v; + /* No equals means "set"... */ if (!val) val = "1"; /* One of =[yYnN01] */ switch (val[0]) { case 'y': case 'Y': case '1': - *(int *)kp->arg = 1; - return 0; + v = true; + break; case 'n': case 'N': case '0': - *(int *)kp->arg = 0; - return 0; + v = false; + break; + default: + return -EINVAL; } - return -EINVAL; + + if (kp->flags & KPARAM_ISBOOL) + *(bool *)kp->arg = v; + else + *(int *)kp->arg = v; + return 0; } int param_get_bool(char *buffer, struct kernel_param *kp) { + bool val; + if (kp->flags & KPARAM_ISBOOL) + val = *(bool *)kp->arg; + else + val = *(int *)kp->arg; + /* Y and N chosen as being relatively non-coder friendly */ - return sprintf(buffer, "%c", (*(int *)kp->arg) ? 'Y' : 'N'); + return sprintf(buffer, "%c", val ? 'Y' : 'N'); } +/* This one must be bool. */ int param_set_invbool(const char *val, struct kernel_param *kp) { - int boolval, ret; + int ret; + bool boolval; struct kernel_param dummy; dummy.arg = &boolval; + dummy.flags = KPARAM_ISBOOL; ret = param_set_bool(val, &dummy); if (ret == 0) *(bool *)kp->arg = !boolval; -- cgit v1.2.3 From ad6561dffa17f17bb68d7207d422c26c381c4313 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 21:47:03 -0600 Subject: module: trim exception table on init free. It's theoretically possible that there are exception table entries which point into the (freed) init text of modules. These could cause future problems if other modules get loaded into that memory and cause an exception as we'd see the wrong fixup. The only case I know of is kvm-intel.ko (when CONFIG_CC_OPTIMIZE_FOR_SIZE=n). Amerigo fixed this long-standing FIXME in the x86 version, but this patch is more general. This implements trim_init_extable(); most archs are simple since they use the standard lib/extable.c sort code. Alpha and IA64 use relative addresses in their fixups, so thier trimming is a slight variation. Sparc32 is unique; it doesn't seem to define ARCH_HAS_SORT_EXTABLE, yet it defines its own sort_extable() which overrides the one in lib. It doesn't sort, so we have to mark deleted entries instead of actually trimming them. Inspired-by: Amerigo Wang Signed-off-by: Rusty Russell Cc: linux-alpha@vger.kernel.org Cc: sparclinux@vger.kernel.org Cc: linux-ia64@vger.kernel.org --- kernel/module.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index 35f7de00bf0d..e4ab36ce7672 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2455,6 +2455,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, mutex_lock(&module_mutex); /* Drop initial reference. */ module_put(mod); + trim_init_extable(mod); module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; -- cgit v1.2.3 From 081fad86178ec0f64f32f1bd04cf4aad22714fb9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2009 17:57:21 +0200 Subject: perf_counter: Remove PERF_TYPE_RAW special casing The PERF_TYPE_RAW special case seems superfluous these days. Remove it and add it to the switch() stmt like the others. [ Impact: cleanup ] Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/perf_counter.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index ef5d8a5b2453..663bbe015057 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -3570,12 +3570,8 @@ perf_counter_alloc(struct perf_counter_attr *attr, if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP)) goto done; - if (attr->type == PERF_TYPE_RAW) { - pmu = hw_perf_counter_init(counter); - goto done; - } - switch (attr->type) { + case PERF_TYPE_RAW: case PERF_TYPE_HARDWARE: case PERF_TYPE_HW_CACHE: pmu = hw_perf_counter_init(counter); -- cgit v1.2.3 From 974802eaa1afdc87e00821df7020a2b3c6fee623 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 12 Jun 2009 12:46:55 +0200 Subject: perf_counter: Add forward/backward attribute ABI compatibility Provide for means of extending the perf_counter_attr in a 'natural' way. We allow growing the structure by appending fields at the end by specifying the full structure size inside it. When a new kernel sees a smaller (old) structure, it will 0 pad the tail. When an old kernel sees a larger (new) structure, it will verify the tail consists of 0s, otherwise fail. If we fail due to a size-mismatch, we return -E2BIG and write the kernel's native attribe size back into the provided structure. Furthermore, add some attribute verification, so that we'll fail counter creation when unknown bits are present (PERF_SAMPLE, PERF_FORMAT, or in the __reserved fields). (This ABI detail is introduced while keeping the existing syscall ABI.) Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/perf_counter.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 663bbe015057..29b685f551aa 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -3584,6 +3584,9 @@ perf_counter_alloc(struct perf_counter_attr *attr, case PERF_TYPE_TRACEPOINT: pmu = tp_perf_counter_init(counter); break; + + default: + break; } done: err = 0; @@ -3610,6 +3613,85 @@ done: return counter; } +static int perf_copy_attr(struct perf_counter_attr __user *uattr, + struct perf_counter_attr *attr) +{ + int ret; + u32 size; + + if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0)) + return -EFAULT; + + /* + * zero the full structure, so that a short copy will be nice. + */ + memset(attr, 0, sizeof(*attr)); + + ret = get_user(size, &uattr->size); + if (ret) + return ret; + + if (size > PAGE_SIZE) /* silly large */ + goto err_size; + + if (!size) /* abi compat */ + size = PERF_ATTR_SIZE_VER0; + + if (size < PERF_ATTR_SIZE_VER0) + goto err_size; + + /* + * If we're handed a bigger struct than we know of, + * ensure all the unknown bits are 0. + */ + if (size > sizeof(*attr)) { + unsigned long val; + unsigned long __user *addr; + unsigned long __user *end; + + addr = PTR_ALIGN((void __user *)uattr + sizeof(*attr), + sizeof(unsigned long)); + end = PTR_ALIGN((void __user *)uattr + size, + sizeof(unsigned long)); + + for (; addr < end; addr += sizeof(unsigned long)) { + ret = get_user(val, addr); + if (ret) + return ret; + if (val) + goto err_size; + } + } + + ret = copy_from_user(attr, uattr, size); + if (ret) + return -EFAULT; + + /* + * If the type exists, the corresponding creation will verify + * the attr->config. + */ + if (attr->type >= PERF_TYPE_MAX) + return -EINVAL; + + if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) + return -EINVAL; + + if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) + return -EINVAL; + + if (attr->read_format & ~(PERF_FORMAT_MAX-1)) + return -EINVAL; + +out: + return ret; + +err_size: + put_user(sizeof(*attr), &uattr->size); + ret = -E2BIG; + goto out; +} + /** * sys_perf_counter_open - open a performance counter, associate it to a task/cpu * @@ -3619,7 +3701,7 @@ done: * @group_fd: group leader counter fd */ SYSCALL_DEFINE5(perf_counter_open, - const struct perf_counter_attr __user *, attr_uptr, + struct perf_counter_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) { struct perf_counter *counter, *group_leader; @@ -3635,8 +3717,9 @@ SYSCALL_DEFINE5(perf_counter_open, if (flags) return -EINVAL; - if (copy_from_user(&attr, attr_uptr, sizeof(attr)) != 0) - return -EFAULT; + ret = perf_copy_attr(attr_uptr, &attr); + if (ret) + return ret; if (!attr.exclude_kernel) { if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) -- cgit v1.2.3 From b43e352139f51216a8c56b0bd5fc3d4e05c65619 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 22:27:00 -0600 Subject: sched: export kick_process lguest needs kick_process: wake_up_process() does nothing if a process is running, which isn't sufficient (we need it in the kernel). And lguest support is usually modular. Signed-off-by: Rusty Russell Cc: Ingo Molnar --- kernel/sched.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index f04aa9664504..8ec9d13140be 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2192,6 +2192,7 @@ void kick_process(struct task_struct *p) smp_send_reschedule(cpu); preempt_enable(); } +EXPORT_SYMBOL_GPL(kick_process); /* * Return a low guess at the load of a migration-source cpu weighted -- cgit v1.2.3 From 1dc492a0a4470852cb451db1e00d580ce9fd7a28 Mon Sep 17 00:00:00 2001 From: Manish Katiyar Date: Sun, 22 Feb 2009 10:24:27 +0530 Subject: trivial: kernel/power/poweroff.c: whitespace fix Fix coding style whitespace fixes. Patch compile tested Before :- total: 1 errors, 0 warnings, 46 lines checked After total: 0 errors, 0 warnings, 46 lines checked Before :- text data bss dec hex filename 107 48 0 155 9b kernel/power/poweroff.o After text data bss dec hex filename 107 48 0 155 9b kernel/power/poweroff.o Signed-off-by: Manish Katiyar Signed-off-by: Jiri Kosina --- kernel/power/poweroff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c index 97890831e1b5..e8b337006276 100644 --- a/kernel/power/poweroff.c +++ b/kernel/power/poweroff.c @@ -34,7 +34,7 @@ static struct sysrq_key_op sysrq_poweroff_op = { .handler = handle_poweroff, .help_msg = "powerOff", .action_msg = "Power Off", - .enable_mask = SYSRQ_ENABLE_BOOT, + .enable_mask = SYSRQ_ENABLE_BOOT, }; static int pm_sysrq_init(void) -- cgit v1.2.3 From 3ac49a1c9928b4a242b3cb1d83bc1d5c9b8fcb50 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 4 Jun 2009 16:20:28 +0200 Subject: trivial: fix ETIMEOUT -> ETIMEDOUT typos fix ETIMEOUT -> ETIMEDOUT typos Signed-off-by: Jean Delvare Signed-off-by: Jiri Kosina --- kernel/rtmutex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 820c5af44f3e..fcd107a78c5a 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -902,7 +902,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); * Returns: * 0 on success * -EINTR when interrupted by a signal - * -ETIMEOUT when the timeout expired + * -ETIMEDOUT when the timeout expired * -EDEADLK when the lock would deadlock (when deadlock detection is on) */ int -- cgit v1.2.3 From e39a71ef80877f4e30d808af9acceec80f4d2f7c Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 15 May 2009 00:53:26 +0200 Subject: PM: Rename device_power_down/up() Rename the functions performing "_noirq" dev_pm_ops operations from device_power_down() and device_power_up() to device_suspend_noirq() and device_resume_noirq(). The new function names are chosen to show that the functions are responsible for calling the _noirq() versions to finalize the suspend/resume operation. The current function names do not perform power down/up anymore so the names may be misleading. Global function renames: - device_power_down() -> device_suspend_noirq() - device_power_up() -> device_resume_noirq() Static function renames: - suspend_device_noirq() -> __device_suspend_noirq() - resume_device_noirq() -> __device_resume_noirq() Signed-off-by: Magnus Damm Acked-by: Greg Kroah-Hartman Acked-by: Len Brown Signed-off-by: Rafael J. Wysocki --- kernel/kexec.c | 8 ++++---- kernel/power/disk.c | 16 ++++++++-------- kernel/power/main.c | 4 ++-- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/kexec.c b/kernel/kexec.c index e4983770913b..5a3da87adae0 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1452,13 +1452,13 @@ int kernel_kexec(void) if (error) goto Resume_console; /* At this point, device_suspend() has been called, - * but *not* device_power_down(). We *must* - * device_power_down() now. Otherwise, drivers for + * but *not* device_suspend_noirq(). We *must* call + * device_suspend_noirq() now. Otherwise, drivers for * some devices (e.g. interrupt controllers) become * desynchronized with the actual state of the * hardware at resume time, and evil weirdness ensues. */ - error = device_power_down(PMSG_FREEZE); + error = device_suspend_noirq(PMSG_FREEZE); if (error) goto Resume_devices; error = disable_nonboot_cpus(); @@ -1486,7 +1486,7 @@ int kernel_kexec(void) local_irq_enable(); Enable_cpus: enable_nonboot_cpus(); - device_power_up(PMSG_RESTORE); + device_resume_noirq(PMSG_RESTORE); Resume_devices: device_resume(PMSG_RESTORE); Resume_console: diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 5cb080e7eebd..1c18bc894a2d 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -216,12 +216,12 @@ static int create_image(int platform_mode) return error; /* At this point, device_suspend() has been called, but *not* - * device_power_down(). We *must* call device_power_down() now. + * device_suspend_noirq(). We *must* call device_suspend_noirq() now. * Otherwise, drivers for some devices (e.g. interrupt controllers) * become desynchronized with the actual state of the hardware * at resume time, and evil weirdness ensues. */ - error = device_power_down(PMSG_FREEZE); + error = device_suspend_noirq(PMSG_FREEZE); if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting hibernation\n"); @@ -262,7 +262,7 @@ static int create_image(int platform_mode) Power_up: sysdev_resume(); - /* NOTE: device_power_up() is just a resume() for devices + /* NOTE: device_resume_noirq() is just a resume() for devices * that suspended with irqs off ... no overall powerup. */ @@ -275,7 +275,7 @@ static int create_image(int platform_mode) Platform_finish: platform_finish(platform_mode); - device_power_up(in_suspend ? + device_resume_noirq(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); return error; @@ -339,7 +339,7 @@ static int resume_target_kernel(bool platform_mode) { int error; - error = device_power_down(PMSG_QUIESCE); + error = device_suspend_noirq(PMSG_QUIESCE); if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting resume\n"); @@ -394,7 +394,7 @@ static int resume_target_kernel(bool platform_mode) Cleanup: platform_restore_cleanup(platform_mode); - device_power_up(PMSG_RECOVER); + device_resume_noirq(PMSG_RECOVER); return error; } @@ -454,7 +454,7 @@ int hibernation_platform_enter(void) goto Resume_devices; } - error = device_power_down(PMSG_HIBERNATE); + error = device_suspend_noirq(PMSG_HIBERNATE); if (error) goto Resume_devices; @@ -479,7 +479,7 @@ int hibernation_platform_enter(void) Platofrm_finish: hibernation_ops->finish(); - device_power_up(PMSG_RESTORE); + device_suspend_noirq(PMSG_RESTORE); Resume_devices: entering_platform_hibernation = false; diff --git a/kernel/power/main.c b/kernel/power/main.c index 868028280d13..2f6638ee03c0 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -295,7 +295,7 @@ static int suspend_enter(suspend_state_t state) return error; } - error = device_power_down(PMSG_SUSPEND); + error = device_suspend_noirq(PMSG_SUSPEND); if (error) { printk(KERN_ERR "PM: Some devices failed to power down\n"); goto Platfrom_finish; @@ -335,7 +335,7 @@ static int suspend_enter(suspend_state_t state) suspend_ops->wake(); Power_up_devices: - device_power_up(PMSG_RESUME); + device_resume_noirq(PMSG_RESUME); Platfrom_finish: if (suspend_ops->finish) -- cgit v1.2.3 From d161630297a20802d01c55847bfcba85d2118a9f Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sun, 24 May 2009 22:05:42 +0200 Subject: PM core: rename suspend and resume functions This patch (as1241) renames a bunch of functions in the PM core. Rather than go through a boring list of name changes, suffice it to say that in the end we have a bunch of pairs of functions: device_resume_noirq dpm_resume_noirq device_resume dpm_resume device_complete dpm_complete device_suspend_noirq dpm_suspend_noirq device_suspend dpm_suspend device_prepare dpm_prepare in which device_X does the X operation on a single device and dpm_X invokes device_X for all devices in the dpm_list. In addition, the old dpm_power_up and device_resume_noirq have been combined into a single function (dpm_resume_noirq). Lastly, dpm_suspend_start and dpm_resume_end are the renamed versions of the former top-level device_suspend and device_resume routines. Signed-off-by: Alan Stern Acked-by: Magnus Damm Signed-off-by: Rafael J. Wysocki --- kernel/kexec.c | 14 +++++++------- kernel/power/disk.c | 30 +++++++++++++++--------------- kernel/power/main.c | 8 ++++---- 3 files changed, 26 insertions(+), 26 deletions(-) (limited to 'kernel') diff --git a/kernel/kexec.c b/kernel/kexec.c index 5a3da87adae0..ae1c35201cc8 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1448,17 +1448,17 @@ int kernel_kexec(void) goto Restore_console; } suspend_console(); - error = device_suspend(PMSG_FREEZE); + error = dpm_suspend_start(PMSG_FREEZE); if (error) goto Resume_console; - /* At this point, device_suspend() has been called, - * but *not* device_suspend_noirq(). We *must* call - * device_suspend_noirq() now. Otherwise, drivers for + /* At this point, dpm_suspend_start() has been called, + * but *not* dpm_suspend_noirq(). We *must* call + * dpm_suspend_noirq() now. Otherwise, drivers for * some devices (e.g. interrupt controllers) become * desynchronized with the actual state of the * hardware at resume time, and evil weirdness ensues. */ - error = device_suspend_noirq(PMSG_FREEZE); + error = dpm_suspend_noirq(PMSG_FREEZE); if (error) goto Resume_devices; error = disable_nonboot_cpus(); @@ -1486,9 +1486,9 @@ int kernel_kexec(void) local_irq_enable(); Enable_cpus: enable_nonboot_cpus(); - device_resume_noirq(PMSG_RESTORE); + dpm_resume_noirq(PMSG_RESTORE); Resume_devices: - device_resume(PMSG_RESTORE); + dpm_resume_end(PMSG_RESTORE); Resume_console: resume_console(); thaw_processes(); diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 1c18bc894a2d..a9beba68b6c7 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -215,13 +215,13 @@ static int create_image(int platform_mode) if (error) return error; - /* At this point, device_suspend() has been called, but *not* - * device_suspend_noirq(). We *must* call device_suspend_noirq() now. + /* At this point, dpm_suspend_start() has been called, but *not* + * dpm_suspend_noirq(). We *must* call dpm_suspend_noirq() now. * Otherwise, drivers for some devices (e.g. interrupt controllers) * become desynchronized with the actual state of the hardware * at resume time, and evil weirdness ensues. */ - error = device_suspend_noirq(PMSG_FREEZE); + error = dpm_suspend_noirq(PMSG_FREEZE); if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting hibernation\n"); @@ -262,7 +262,7 @@ static int create_image(int platform_mode) Power_up: sysdev_resume(); - /* NOTE: device_resume_noirq() is just a resume() for devices + /* NOTE: dpm_resume_noirq() is just a resume() for devices * that suspended with irqs off ... no overall powerup. */ @@ -275,7 +275,7 @@ static int create_image(int platform_mode) Platform_finish: platform_finish(platform_mode); - device_resume_noirq(in_suspend ? + dpm_resume_noirq(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); return error; @@ -304,7 +304,7 @@ int hibernation_snapshot(int platform_mode) goto Close; suspend_console(); - error = device_suspend(PMSG_FREEZE); + error = dpm_suspend_start(PMSG_FREEZE); if (error) goto Recover_platform; @@ -315,7 +315,7 @@ int hibernation_snapshot(int platform_mode) /* Control returns here after successful restore */ Resume_devices: - device_resume(in_suspend ? + dpm_resume_end(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); resume_console(); Close: @@ -339,7 +339,7 @@ static int resume_target_kernel(bool platform_mode) { int error; - error = device_suspend_noirq(PMSG_QUIESCE); + error = dpm_suspend_noirq(PMSG_QUIESCE); if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting resume\n"); @@ -394,7 +394,7 @@ static int resume_target_kernel(bool platform_mode) Cleanup: platform_restore_cleanup(platform_mode); - device_resume_noirq(PMSG_RECOVER); + dpm_resume_noirq(PMSG_RECOVER); return error; } @@ -414,10 +414,10 @@ int hibernation_restore(int platform_mode) pm_prepare_console(); suspend_console(); - error = device_suspend(PMSG_QUIESCE); + error = dpm_suspend_start(PMSG_QUIESCE); if (!error) { error = resume_target_kernel(platform_mode); - device_resume(PMSG_RECOVER); + dpm_resume_end(PMSG_RECOVER); } resume_console(); pm_restore_console(); @@ -447,14 +447,14 @@ int hibernation_platform_enter(void) entering_platform_hibernation = true; suspend_console(); - error = device_suspend(PMSG_HIBERNATE); + error = dpm_suspend_start(PMSG_HIBERNATE); if (error) { if (hibernation_ops->recover) hibernation_ops->recover(); goto Resume_devices; } - error = device_suspend_noirq(PMSG_HIBERNATE); + error = dpm_suspend_noirq(PMSG_HIBERNATE); if (error) goto Resume_devices; @@ -479,11 +479,11 @@ int hibernation_platform_enter(void) Platofrm_finish: hibernation_ops->finish(); - device_suspend_noirq(PMSG_RESTORE); + dpm_suspend_noirq(PMSG_RESTORE); Resume_devices: entering_platform_hibernation = false; - device_resume(PMSG_RESTORE); + dpm_resume_end(PMSG_RESTORE); resume_console(); Close: diff --git a/kernel/power/main.c b/kernel/power/main.c index 2f6638ee03c0..46386b9f8dd1 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -295,7 +295,7 @@ static int suspend_enter(suspend_state_t state) return error; } - error = device_suspend_noirq(PMSG_SUSPEND); + error = dpm_suspend_noirq(PMSG_SUSPEND); if (error) { printk(KERN_ERR "PM: Some devices failed to power down\n"); goto Platfrom_finish; @@ -335,7 +335,7 @@ static int suspend_enter(suspend_state_t state) suspend_ops->wake(); Power_up_devices: - device_resume_noirq(PMSG_RESUME); + dpm_resume_noirq(PMSG_RESUME); Platfrom_finish: if (suspend_ops->finish) @@ -363,7 +363,7 @@ int suspend_devices_and_enter(suspend_state_t state) } suspend_console(); suspend_test_start(); - error = device_suspend(PMSG_SUSPEND); + error = dpm_suspend_start(PMSG_SUSPEND); if (error) { printk(KERN_ERR "PM: Some devices failed to suspend\n"); goto Recover_platform; @@ -376,7 +376,7 @@ int suspend_devices_and_enter(suspend_state_t state) Resume_devices: suspend_test_start(); - device_resume(PMSG_RESUME); + dpm_resume_end(PMSG_RESUME); suspend_test_finish("resume devices"); resume_console(); Close: -- cgit v1.2.3 From c6f37f12197ac3bd2e5a35f2f0e195ae63d437de Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 24 May 2009 22:16:31 +0200 Subject: PM/Suspend: Do not shrink memory before suspend Remove the shrinking of memory from the suspend-to-RAM code, where it is not really necessary. Signed-off-by: Rafael J. Wysocki Acked-by: Nigel Cunningham Acked-by: Wu Fengguang --- kernel/power/main.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'kernel') diff --git a/kernel/power/main.c b/kernel/power/main.c index 46386b9f8dd1..2a19f347bd8a 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -188,9 +188,6 @@ static void suspend_test_finish(const char *label) #endif -/* This is just an arbitrary number */ -#define FREE_PAGE_NUMBER (100) - static struct platform_suspend_ops *suspend_ops; /** @@ -226,7 +223,6 @@ int suspend_valid_only_mem(suspend_state_t state) static int suspend_prepare(void) { int error; - unsigned int free_pages; if (!suspend_ops || !suspend_ops->enter) return -EPERM; @@ -241,24 +237,10 @@ static int suspend_prepare(void) if (error) goto Finish; - if (suspend_freeze_processes()) { - error = -EAGAIN; - goto Thaw; - } - - free_pages = global_page_state(NR_FREE_PAGES); - if (free_pages < FREE_PAGE_NUMBER) { - pr_debug("PM: free some memory\n"); - shrink_all_memory(FREE_PAGE_NUMBER - free_pages); - if (nr_free_pages() < FREE_PAGE_NUMBER) { - error = -ENOMEM; - printk(KERN_ERR "PM: No enough memory\n"); - } - } + error = suspend_freeze_processes(); if (!error) return 0; - Thaw: suspend_thaw_processes(); usermodehelper_enable(); Finish: -- cgit v1.2.3 From fe419535d82724314bbf1244a0e740e4ea1bd3ae Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 11 Jun 2009 23:11:17 +0200 Subject: PM/Hibernate: Move memory shrinking to snapshot.c (rev. 2) A future patch is going to modify the memory shrinking code so that it will make memory allocations to free memory instead of using an artificial memory shrinking mechanism for that. For this purpose it is convenient to move swsusp_shrink_memory() from kernel/power/swsusp.c to kernel/power/snapshot.c, because the new memory-shrinking code is going to use things that are local to kernel/power/snapshot.c . [rev. 2: Make some functions static and remove their headers from kernel/power/power.h] Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Acked-by: Wu Fengguang --- kernel/power/power.h | 4 +-- kernel/power/snapshot.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++-- kernel/power/swsusp.c | 76 ---------------------------------------------- 3 files changed, 79 insertions(+), 81 deletions(-) (limited to 'kernel') diff --git a/kernel/power/power.h b/kernel/power/power.h index 46b5ec7a3afb..ec4dbdfb07b4 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -74,7 +74,7 @@ extern asmlinkage int swsusp_arch_resume(void); extern int create_basic_memory_bitmaps(void); extern void free_basic_memory_bitmaps(void); -extern unsigned int count_data_pages(void); +extern int swsusp_shrink_memory(void); /** * Auxiliary structure used for reading the snapshot image data and @@ -149,7 +149,6 @@ extern int swsusp_swap_in_use(void); /* kernel/power/disk.c */ extern int swsusp_check(void); -extern int swsusp_shrink_memory(void); extern void swsusp_free(void); extern int swsusp_read(unsigned int *flags_p); extern int swsusp_write(unsigned int flags); @@ -176,7 +175,6 @@ extern int pm_notifier_call_chain(unsigned long val); #endif #ifdef CONFIG_HIGHMEM -unsigned int count_highmem_pages(void); int restore_highmem(void); #else static inline unsigned int count_highmem_pages(void) { return 0; } diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 33e2e4a819f9..523a451b45d3 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -39,6 +39,14 @@ static int swsusp_page_is_free(struct page *); static void swsusp_set_page_forbidden(struct page *); static void swsusp_unset_page_forbidden(struct page *); +/* + * Preferred image size in bytes (tunable via /sys/power/image_size). + * When it is set to N, swsusp will do its best to ensure the image + * size will not exceed N bytes, but if that is impossible, it will + * try to create the smallest image possible. + */ +unsigned long image_size = 500 * 1024 * 1024; + /* List of PBEs needed for restoring the pages that were allocated before * the suspend and included in the suspend image, but have also been * allocated by the "resume" kernel, so their contents cannot be written @@ -840,7 +848,7 @@ static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) * pages. */ -unsigned int count_highmem_pages(void) +static unsigned int count_highmem_pages(void) { struct zone *zone; unsigned int n = 0; @@ -902,7 +910,7 @@ static struct page *saveable_page(struct zone *zone, unsigned long pfn) * pages. */ -unsigned int count_data_pages(void) +static unsigned int count_data_pages(void) { struct zone *zone; unsigned long pfn, max_zone_pfn; @@ -1058,6 +1066,74 @@ void swsusp_free(void) buffer = NULL; } +/** + * swsusp_shrink_memory - Try to free as much memory as needed + * + * ... but do not OOM-kill anyone + * + * Notice: all userland should be stopped before it is called, or + * livelock is possible. + */ + +#define SHRINK_BITE 10000 +static inline unsigned long __shrink_memory(long tmp) +{ + if (tmp > SHRINK_BITE) + tmp = SHRINK_BITE; + return shrink_all_memory(tmp); +} + +int swsusp_shrink_memory(void) +{ + long tmp; + struct zone *zone; + unsigned long pages = 0; + unsigned int i = 0; + char *p = "-\\|/"; + struct timeval start, stop; + + printk(KERN_INFO "PM: Shrinking memory... "); + do_gettimeofday(&start); + do { + long size, highmem_size; + + highmem_size = count_highmem_pages(); + size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES; + tmp = size; + size += highmem_size; + for_each_populated_zone(zone) { + tmp += snapshot_additional_pages(zone); + if (is_highmem(zone)) { + highmem_size -= + zone_page_state(zone, NR_FREE_PAGES); + } else { + tmp -= zone_page_state(zone, NR_FREE_PAGES); + tmp += zone->lowmem_reserve[ZONE_NORMAL]; + } + } + + if (highmem_size < 0) + highmem_size = 0; + + tmp += highmem_size; + if (tmp > 0) { + tmp = __shrink_memory(tmp); + if (!tmp) + return -ENOMEM; + pages += tmp; + } else if (size > image_size / PAGE_SIZE) { + tmp = __shrink_memory(size - (image_size / PAGE_SIZE)); + pages += tmp; + } + printk("\b%c", p[i++%4]); + } while (tmp > 0); + do_gettimeofday(&stop); + printk("\bdone (%lu pages freed)\n", pages); + swsusp_show_speed(&start, &stop, pages, "Freed"); + + return 0; +} + #ifdef CONFIG_HIGHMEM /** * count_pages_for_highmem - compute the number of non-highmem pages diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 78c35047586d..87b901cb3927 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -55,14 +55,6 @@ #include "power.h" -/* - * Preferred image size in bytes (tunable via /sys/power/image_size). - * When it is set to N, swsusp will do its best to ensure the image - * size will not exceed N bytes, but if that is impossible, it will - * try to create the smallest image possible. - */ -unsigned long image_size = 500 * 1024 * 1024; - int in_suspend __nosavedata = 0; /** @@ -195,74 +187,6 @@ void swsusp_show_speed(struct timeval *start, struct timeval *stop, kps / 1000, (kps % 1000) / 10); } -/** - * swsusp_shrink_memory - Try to free as much memory as needed - * - * ... but do not OOM-kill anyone - * - * Notice: all userland should be stopped before it is called, or - * livelock is possible. - */ - -#define SHRINK_BITE 10000 -static inline unsigned long __shrink_memory(long tmp) -{ - if (tmp > SHRINK_BITE) - tmp = SHRINK_BITE; - return shrink_all_memory(tmp); -} - -int swsusp_shrink_memory(void) -{ - long tmp; - struct zone *zone; - unsigned long pages = 0; - unsigned int i = 0; - char *p = "-\\|/"; - struct timeval start, stop; - - printk(KERN_INFO "PM: Shrinking memory... "); - do_gettimeofday(&start); - do { - long size, highmem_size; - - highmem_size = count_highmem_pages(); - size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES; - tmp = size; - size += highmem_size; - for_each_populated_zone(zone) { - tmp += snapshot_additional_pages(zone); - if (is_highmem(zone)) { - highmem_size -= - zone_page_state(zone, NR_FREE_PAGES); - } else { - tmp -= zone_page_state(zone, NR_FREE_PAGES); - tmp += zone->lowmem_reserve[ZONE_NORMAL]; - } - } - - if (highmem_size < 0) - highmem_size = 0; - - tmp += highmem_size; - if (tmp > 0) { - tmp = __shrink_memory(tmp); - if (!tmp) - return -ENOMEM; - pages += tmp; - } else if (size > image_size / PAGE_SIZE) { - tmp = __shrink_memory(size - (image_size / PAGE_SIZE)); - pages += tmp; - } - printk("\b%c", p[i++%4]); - } while (tmp > 0); - do_gettimeofday(&stop); - printk("\bdone (%lu pages freed)\n", pages); - swsusp_show_speed(&start, &stop, pages, "Freed"); - - return 0; -} - /* * Platforms, like ACPI, may want us to save some memory used by them during * hibernation and to restore the contents of this memory during the subsequent -- cgit v1.2.3 From a9d7052363a6e06bb623ed1876c56c7ca5b2c6d8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 10 Jun 2009 01:27:12 +0200 Subject: PM: Separate suspend to RAM functionality from core Move the suspend to RAM and standby code from kernel/power/main.c to two separate files, kernel/power/suspend.c containing the basic functions and kernel/power/suspend_test.c containing the automatic suspend test facility based on the RTC clock alarm. There are no changes in functionality related to these modifications. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek --- kernel/power/Makefile | 2 + kernel/power/main.c | 503 -------------------------------------------- kernel/power/power.h | 17 +- kernel/power/suspend.c | 300 ++++++++++++++++++++++++++ kernel/power/suspend_test.c | 187 ++++++++++++++++ 5 files changed, 505 insertions(+), 504 deletions(-) create mode 100644 kernel/power/suspend.c create mode 100644 kernel/power/suspend_test.c (limited to 'kernel') diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 720ea4f781bd..c4baf1b633c4 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -6,6 +6,8 @@ endif obj-$(CONFIG_PM) += main.o obj-$(CONFIG_PM_SLEEP) += console.o obj-$(CONFIG_FREEZER) += process.o +obj-$(CONFIG_SUSPEND) += suspend.o +obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/main.c b/kernel/power/main.c index 2a19f347bd8a..f710e36930cc 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -8,20 +8,9 @@ * */ -#include -#include #include #include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include #include "power.h" @@ -119,355 +108,6 @@ power_attr(pm_test); #endif /* CONFIG_PM_SLEEP */ -#ifdef CONFIG_SUSPEND - -static int suspend_test(int level) -{ -#ifdef CONFIG_PM_DEBUG - if (pm_test_level == level) { - printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n"); - mdelay(5000); - return 1; - } -#endif /* !CONFIG_PM_DEBUG */ - return 0; -} - -#ifdef CONFIG_PM_TEST_SUSPEND - -/* - * We test the system suspend code by setting an RTC wakealarm a short - * time in the future, then suspending. Suspending the devices won't - * normally take long ... some systems only need a few milliseconds. - * - * The time it takes is system-specific though, so when we test this - * during system bootup we allow a LOT of time. - */ -#define TEST_SUSPEND_SECONDS 5 - -static unsigned long suspend_test_start_time; - -static void suspend_test_start(void) -{ - /* FIXME Use better timebase than "jiffies", ideally a clocksource. - * What we want is a hardware counter that will work correctly even - * during the irqs-are-off stages of the suspend/resume cycle... - */ - suspend_test_start_time = jiffies; -} - -static void suspend_test_finish(const char *label) -{ - long nj = jiffies - suspend_test_start_time; - unsigned msec; - - msec = jiffies_to_msecs(abs(nj)); - pr_info("PM: %s took %d.%03d seconds\n", label, - msec / 1000, msec % 1000); - - /* Warning on suspend means the RTC alarm period needs to be - * larger -- the system was sooo slooowwww to suspend that the - * alarm (should have) fired before the system went to sleep! - * - * Warning on either suspend or resume also means the system - * has some performance issues. The stack dump of a WARN_ON - * is more likely to get the right attention than a printk... - */ - WARN(msec > (TEST_SUSPEND_SECONDS * 1000), "Component: %s\n", label); -} - -#else - -static void suspend_test_start(void) -{ -} - -static void suspend_test_finish(const char *label) -{ -} - -#endif - -static struct platform_suspend_ops *suspend_ops; - -/** - * suspend_set_ops - Set the global suspend method table. - * @ops: Pointer to ops structure. - */ - -void suspend_set_ops(struct platform_suspend_ops *ops) -{ - mutex_lock(&pm_mutex); - suspend_ops = ops; - mutex_unlock(&pm_mutex); -} - -/** - * suspend_valid_only_mem - generic memory-only valid callback - * - * Platform drivers that implement mem suspend only and only need - * to check for that in their .valid callback can use this instead - * of rolling their own .valid callback. - */ -int suspend_valid_only_mem(suspend_state_t state) -{ - return state == PM_SUSPEND_MEM; -} - -/** - * suspend_prepare - Do prep work before entering low-power state. - * - * This is common code that is called for each state that we're entering. - * Run suspend notifiers, allocate a console and stop all processes. - */ -static int suspend_prepare(void) -{ - int error; - - if (!suspend_ops || !suspend_ops->enter) - return -EPERM; - - pm_prepare_console(); - - error = pm_notifier_call_chain(PM_SUSPEND_PREPARE); - if (error) - goto Finish; - - error = usermodehelper_disable(); - if (error) - goto Finish; - - error = suspend_freeze_processes(); - if (!error) - return 0; - - suspend_thaw_processes(); - usermodehelper_enable(); - Finish: - pm_notifier_call_chain(PM_POST_SUSPEND); - pm_restore_console(); - return error; -} - -/* default implementation */ -void __attribute__ ((weak)) arch_suspend_disable_irqs(void) -{ - local_irq_disable(); -} - -/* default implementation */ -void __attribute__ ((weak)) arch_suspend_enable_irqs(void) -{ - local_irq_enable(); -} - -/** - * suspend_enter - enter the desired system sleep state. - * @state: state to enter - * - * This function should be called after devices have been suspended. - */ -static int suspend_enter(suspend_state_t state) -{ - int error; - - if (suspend_ops->prepare) { - error = suspend_ops->prepare(); - if (error) - return error; - } - - error = dpm_suspend_noirq(PMSG_SUSPEND); - if (error) { - printk(KERN_ERR "PM: Some devices failed to power down\n"); - goto Platfrom_finish; - } - - if (suspend_ops->prepare_late) { - error = suspend_ops->prepare_late(); - if (error) - goto Power_up_devices; - } - - if (suspend_test(TEST_PLATFORM)) - goto Platform_wake; - - error = disable_nonboot_cpus(); - if (error || suspend_test(TEST_CPUS)) - goto Enable_cpus; - - arch_suspend_disable_irqs(); - BUG_ON(!irqs_disabled()); - - error = sysdev_suspend(PMSG_SUSPEND); - if (!error) { - if (!suspend_test(TEST_CORE)) - error = suspend_ops->enter(state); - sysdev_resume(); - } - - arch_suspend_enable_irqs(); - BUG_ON(irqs_disabled()); - - Enable_cpus: - enable_nonboot_cpus(); - - Platform_wake: - if (suspend_ops->wake) - suspend_ops->wake(); - - Power_up_devices: - dpm_resume_noirq(PMSG_RESUME); - - Platfrom_finish: - if (suspend_ops->finish) - suspend_ops->finish(); - - return error; -} - -/** - * suspend_devices_and_enter - suspend devices and enter the desired system - * sleep state. - * @state: state to enter - */ -int suspend_devices_and_enter(suspend_state_t state) -{ - int error; - - if (!suspend_ops) - return -ENOSYS; - - if (suspend_ops->begin) { - error = suspend_ops->begin(state); - if (error) - goto Close; - } - suspend_console(); - suspend_test_start(); - error = dpm_suspend_start(PMSG_SUSPEND); - if (error) { - printk(KERN_ERR "PM: Some devices failed to suspend\n"); - goto Recover_platform; - } - suspend_test_finish("suspend devices"); - if (suspend_test(TEST_DEVICES)) - goto Recover_platform; - - suspend_enter(state); - - Resume_devices: - suspend_test_start(); - dpm_resume_end(PMSG_RESUME); - suspend_test_finish("resume devices"); - resume_console(); - Close: - if (suspend_ops->end) - suspend_ops->end(); - return error; - - Recover_platform: - if (suspend_ops->recover) - suspend_ops->recover(); - goto Resume_devices; -} - -/** - * suspend_finish - Do final work before exiting suspend sequence. - * - * Call platform code to clean up, restart processes, and free the - * console that we've allocated. This is not called for suspend-to-disk. - */ -static void suspend_finish(void) -{ - suspend_thaw_processes(); - usermodehelper_enable(); - pm_notifier_call_chain(PM_POST_SUSPEND); - pm_restore_console(); -} - - - - -static const char * const pm_states[PM_SUSPEND_MAX] = { - [PM_SUSPEND_STANDBY] = "standby", - [PM_SUSPEND_MEM] = "mem", -}; - -static inline int valid_state(suspend_state_t state) -{ - /* All states need lowlevel support and need to be valid - * to the lowlevel implementation, no valid callback - * implies that none are valid. */ - if (!suspend_ops || !suspend_ops->valid || !suspend_ops->valid(state)) - return 0; - return 1; -} - - -/** - * enter_state - Do common work of entering low-power state. - * @state: pm_state structure for state we're entering. - * - * Make sure we're the only ones trying to enter a sleep state. Fail - * if someone has beat us to it, since we don't want anything weird to - * happen when we wake up. - * Then, do the setup for suspend, enter the state, and cleaup (after - * we've woken up). - */ -static int enter_state(suspend_state_t state) -{ - int error; - - if (!valid_state(state)) - return -ENODEV; - - if (!mutex_trylock(&pm_mutex)) - return -EBUSY; - - printk(KERN_INFO "PM: Syncing filesystems ... "); - sys_sync(); - printk("done.\n"); - - pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); - error = suspend_prepare(); - if (error) - goto Unlock; - - if (suspend_test(TEST_FREEZER)) - goto Finish; - - pr_debug("PM: Entering %s sleep\n", pm_states[state]); - error = suspend_devices_and_enter(state); - - Finish: - pr_debug("PM: Finishing wakeup.\n"); - suspend_finish(); - Unlock: - mutex_unlock(&pm_mutex); - return error; -} - - -/** - * pm_suspend - Externally visible function for suspending system. - * @state: Enumerated value of state to enter. - * - * Determine whether or not value is within range, get state - * structure, and enter (above). - */ - -int pm_suspend(suspend_state_t state) -{ - if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX) - return enter_state(state); - return -EINVAL; -} - -EXPORT_SYMBOL(pm_suspend); - -#endif /* CONFIG_SUSPEND */ - struct kobject *power_kobj; /** @@ -480,7 +120,6 @@ struct kobject *power_kobj; * store() accepts one of those strings, translates it into the * proper enumerated value, and initiates a suspend transition. */ - static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -578,7 +217,6 @@ static struct attribute_group attr_group = { .attrs = g, }; - static int __init pm_init(void) { power_kobj = kobject_create_and_add("power", NULL); @@ -588,144 +226,3 @@ static int __init pm_init(void) } core_initcall(pm_init); - - -#ifdef CONFIG_PM_TEST_SUSPEND - -#include - -/* - * To test system suspend, we need a hands-off mechanism to resume the - * system. RTCs wake alarms are a common self-contained mechanism. - */ - -static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state) -{ - static char err_readtime[] __initdata = - KERN_ERR "PM: can't read %s time, err %d\n"; - static char err_wakealarm [] __initdata = - KERN_ERR "PM: can't set %s wakealarm, err %d\n"; - static char err_suspend[] __initdata = - KERN_ERR "PM: suspend test failed, error %d\n"; - static char info_test[] __initdata = - KERN_INFO "PM: test RTC wakeup from '%s' suspend\n"; - - unsigned long now; - struct rtc_wkalrm alm; - int status; - - /* this may fail if the RTC hasn't been initialized */ - status = rtc_read_time(rtc, &alm.time); - if (status < 0) { - printk(err_readtime, dev_name(&rtc->dev), status); - return; - } - rtc_tm_to_time(&alm.time, &now); - - memset(&alm, 0, sizeof alm); - rtc_time_to_tm(now + TEST_SUSPEND_SECONDS, &alm.time); - alm.enabled = true; - - status = rtc_set_alarm(rtc, &alm); - if (status < 0) { - printk(err_wakealarm, dev_name(&rtc->dev), status); - return; - } - - if (state == PM_SUSPEND_MEM) { - printk(info_test, pm_states[state]); - status = pm_suspend(state); - if (status == -ENODEV) - state = PM_SUSPEND_STANDBY; - } - if (state == PM_SUSPEND_STANDBY) { - printk(info_test, pm_states[state]); - status = pm_suspend(state); - } - if (status < 0) - printk(err_suspend, status); - - /* Some platforms can't detect that the alarm triggered the - * wakeup, or (accordingly) disable it after it afterwards. - * It's supposed to give oneshot behavior; cope. - */ - alm.enabled = false; - rtc_set_alarm(rtc, &alm); -} - -static int __init has_wakealarm(struct device *dev, void *name_ptr) -{ - struct rtc_device *candidate = to_rtc_device(dev); - - if (!candidate->ops->set_alarm) - return 0; - if (!device_may_wakeup(candidate->dev.parent)) - return 0; - - *(const char **)name_ptr = dev_name(dev); - return 1; -} - -/* - * Kernel options like "test_suspend=mem" force suspend/resume sanity tests - * at startup time. They're normally disabled, for faster boot and because - * we can't know which states really work on this particular system. - */ -static suspend_state_t test_state __initdata = PM_SUSPEND_ON; - -static char warn_bad_state[] __initdata = - KERN_WARNING "PM: can't test '%s' suspend state\n"; - -static int __init setup_test_suspend(char *value) -{ - unsigned i; - - /* "=mem" ==> "mem" */ - value++; - for (i = 0; i < PM_SUSPEND_MAX; i++) { - if (!pm_states[i]) - continue; - if (strcmp(pm_states[i], value) != 0) - continue; - test_state = (__force suspend_state_t) i; - return 0; - } - printk(warn_bad_state, value); - return 0; -} -__setup("test_suspend", setup_test_suspend); - -static int __init test_suspend(void) -{ - static char warn_no_rtc[] __initdata = - KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n"; - - char *pony = NULL; - struct rtc_device *rtc = NULL; - - /* PM is initialized by now; is that state testable? */ - if (test_state == PM_SUSPEND_ON) - goto done; - if (!valid_state(test_state)) { - printk(warn_bad_state, pm_states[test_state]); - goto done; - } - - /* RTCs have initialized by now too ... can we use one? */ - class_find_device(rtc_class, NULL, &pony, has_wakealarm); - if (pony) - rtc = rtc_class_open(pony); - if (!rtc) { - printk(warn_no_rtc); - goto done; - } - - /* go for it */ - test_wakealarm(rtc, test_state); - rtc_class_close(rtc); -done: - return 0; -} -late_initcall(test_suspend); - -#endif /* CONFIG_PM_TEST_SUSPEND */ diff --git a/kernel/power/power.h b/kernel/power/power.h index ec4dbdfb07b4..2bd98d9fc19e 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -160,15 +160,30 @@ extern void swsusp_show_speed(struct timeval *, struct timeval *, unsigned int, char *); #ifdef CONFIG_SUSPEND -/* kernel/power/main.c */ +/* kernel/power/suspend.c */ +extern const char *const pm_states[]; + +extern bool valid_state(suspend_state_t state); extern int suspend_devices_and_enter(suspend_state_t state); +extern int enter_state(suspend_state_t state); #else /* !CONFIG_SUSPEND */ static inline int suspend_devices_and_enter(suspend_state_t state) { return -ENOSYS; } +static inline int enter_state(suspend_state_t state) { return -ENOSYS; } +static inline bool valid_state(suspend_state_t state) { return false; } #endif /* !CONFIG_SUSPEND */ +#ifdef CONFIG_PM_TEST_SUSPEND +/* kernel/power/suspend_test.c */ +extern void suspend_test_start(void); +extern void suspend_test_finish(const char *label); +#else /* !CONFIG_PM_TEST_SUSPEND */ +static inline void suspend_test_start(void) {} +static inline void suspend_test_finish(const char *label) {} +#endif /* !CONFIG_PM_TEST_SUSPEND */ + #ifdef CONFIG_PM_SLEEP /* kernel/power/main.c */ extern int pm_notifier_call_chain(unsigned long val); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c new file mode 100644 index 000000000000..6f10dfc2d3e9 --- /dev/null +++ b/kernel/power/suspend.c @@ -0,0 +1,300 @@ +/* + * kernel/power/suspend.c - Suspend to RAM and standby functionality. + * + * Copyright (c) 2003 Patrick Mochel + * Copyright (c) 2003 Open Source Development Lab + * Copyright (c) 2009 Rafael J. Wysocki , Novell Inc. + * + * This file is released under the GPLv2. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "power.h" + +const char *const pm_states[PM_SUSPEND_MAX] = { + [PM_SUSPEND_STANDBY] = "standby", + [PM_SUSPEND_MEM] = "mem", +}; + +static struct platform_suspend_ops *suspend_ops; + +/** + * suspend_set_ops - Set the global suspend method table. + * @ops: Pointer to ops structure. + */ +void suspend_set_ops(struct platform_suspend_ops *ops) +{ + mutex_lock(&pm_mutex); + suspend_ops = ops; + mutex_unlock(&pm_mutex); +} + +bool valid_state(suspend_state_t state) +{ + /* + * All states need lowlevel support and need to be valid to the lowlevel + * implementation, no valid callback implies that none are valid. + */ + return suspend_ops && suspend_ops->valid && suspend_ops->valid(state); +} + +/** + * suspend_valid_only_mem - generic memory-only valid callback + * + * Platform drivers that implement mem suspend only and only need + * to check for that in their .valid callback can use this instead + * of rolling their own .valid callback. + */ +int suspend_valid_only_mem(suspend_state_t state) +{ + return state == PM_SUSPEND_MEM; +} + +static int suspend_test(int level) +{ +#ifdef CONFIG_PM_DEBUG + if (pm_test_level == level) { + printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n"); + mdelay(5000); + return 1; + } +#endif /* !CONFIG_PM_DEBUG */ + return 0; +} + +/** + * suspend_prepare - Do prep work before entering low-power state. + * + * This is common code that is called for each state that we're entering. + * Run suspend notifiers, allocate a console and stop all processes. + */ +static int suspend_prepare(void) +{ + int error; + + if (!suspend_ops || !suspend_ops->enter) + return -EPERM; + + pm_prepare_console(); + + error = pm_notifier_call_chain(PM_SUSPEND_PREPARE); + if (error) + goto Finish; + + error = usermodehelper_disable(); + if (error) + goto Finish; + + error = suspend_freeze_processes(); + if (!error) + return 0; + + suspend_thaw_processes(); + usermodehelper_enable(); + Finish: + pm_notifier_call_chain(PM_POST_SUSPEND); + pm_restore_console(); + return error; +} + +/* default implementation */ +void __attribute__ ((weak)) arch_suspend_disable_irqs(void) +{ + local_irq_disable(); +} + +/* default implementation */ +void __attribute__ ((weak)) arch_suspend_enable_irqs(void) +{ + local_irq_enable(); +} + +/** + * suspend_enter - enter the desired system sleep state. + * @state: state to enter + * + * This function should be called after devices have been suspended. + */ +static int suspend_enter(suspend_state_t state) +{ + int error; + + if (suspend_ops->prepare) { + error = suspend_ops->prepare(); + if (error) + return error; + } + + error = dpm_suspend_noirq(PMSG_SUSPEND); + if (error) { + printk(KERN_ERR "PM: Some devices failed to power down\n"); + goto Platfrom_finish; + } + + if (suspend_ops->prepare_late) { + error = suspend_ops->prepare_late(); + if (error) + goto Power_up_devices; + } + + if (suspend_test(TEST_PLATFORM)) + goto Platform_wake; + + error = disable_nonboot_cpus(); + if (error || suspend_test(TEST_CPUS)) + goto Enable_cpus; + + arch_suspend_disable_irqs(); + BUG_ON(!irqs_disabled()); + + error = sysdev_suspend(PMSG_SUSPEND); + if (!error) { + if (!suspend_test(TEST_CORE)) + error = suspend_ops->enter(state); + sysdev_resume(); + } + + arch_suspend_enable_irqs(); + BUG_ON(irqs_disabled()); + + Enable_cpus: + enable_nonboot_cpus(); + + Platform_wake: + if (suspend_ops->wake) + suspend_ops->wake(); + + Power_up_devices: + dpm_resume_noirq(PMSG_RESUME); + + Platfrom_finish: + if (suspend_ops->finish) + suspend_ops->finish(); + + return error; +} + +/** + * suspend_devices_and_enter - suspend devices and enter the desired system + * sleep state. + * @state: state to enter + */ +int suspend_devices_and_enter(suspend_state_t state) +{ + int error; + + if (!suspend_ops) + return -ENOSYS; + + if (suspend_ops->begin) { + error = suspend_ops->begin(state); + if (error) + goto Close; + } + suspend_console(); + suspend_test_start(); + error = dpm_suspend_start(PMSG_SUSPEND); + if (error) { + printk(KERN_ERR "PM: Some devices failed to suspend\n"); + goto Recover_platform; + } + suspend_test_finish("suspend devices"); + if (suspend_test(TEST_DEVICES)) + goto Recover_platform; + + suspend_enter(state); + + Resume_devices: + suspend_test_start(); + dpm_resume_end(PMSG_RESUME); + suspend_test_finish("resume devices"); + resume_console(); + Close: + if (suspend_ops->end) + suspend_ops->end(); + return error; + + Recover_platform: + if (suspend_ops->recover) + suspend_ops->recover(); + goto Resume_devices; +} + +/** + * suspend_finish - Do final work before exiting suspend sequence. + * + * Call platform code to clean up, restart processes, and free the + * console that we've allocated. This is not called for suspend-to-disk. + */ +static void suspend_finish(void) +{ + suspend_thaw_processes(); + usermodehelper_enable(); + pm_notifier_call_chain(PM_POST_SUSPEND); + pm_restore_console(); +} + +/** + * enter_state - Do common work of entering low-power state. + * @state: pm_state structure for state we're entering. + * + * Make sure we're the only ones trying to enter a sleep state. Fail + * if someone has beat us to it, since we don't want anything weird to + * happen when we wake up. + * Then, do the setup for suspend, enter the state, and cleaup (after + * we've woken up). + */ +int enter_state(suspend_state_t state) +{ + int error; + + if (!valid_state(state)) + return -ENODEV; + + if (!mutex_trylock(&pm_mutex)) + return -EBUSY; + + printk(KERN_INFO "PM: Syncing filesystems ... "); + sys_sync(); + printk("done.\n"); + + pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); + error = suspend_prepare(); + if (error) + goto Unlock; + + if (suspend_test(TEST_FREEZER)) + goto Finish; + + pr_debug("PM: Entering %s sleep\n", pm_states[state]); + error = suspend_devices_and_enter(state); + + Finish: + pr_debug("PM: Finishing wakeup.\n"); + suspend_finish(); + Unlock: + mutex_unlock(&pm_mutex); + return error; +} + +/** + * pm_suspend - Externally visible function for suspending system. + * @state: Enumerated value of state to enter. + * + * Determine whether or not value is within range, get state + * structure, and enter (above). + */ +int pm_suspend(suspend_state_t state) +{ + if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX) + return enter_state(state); + return -EINVAL; +} +EXPORT_SYMBOL(pm_suspend); diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c new file mode 100644 index 000000000000..17d8bb1acf9c --- /dev/null +++ b/kernel/power/suspend_test.c @@ -0,0 +1,187 @@ +/* + * kernel/power/suspend_test.c - Suspend to RAM and standby test facility. + * + * Copyright (c) 2009 Pavel Machek + * + * This file is released under the GPLv2. + */ + +#include +#include + +#include "power.h" + +/* + * We test the system suspend code by setting an RTC wakealarm a short + * time in the future, then suspending. Suspending the devices won't + * normally take long ... some systems only need a few milliseconds. + * + * The time it takes is system-specific though, so when we test this + * during system bootup we allow a LOT of time. + */ +#define TEST_SUSPEND_SECONDS 5 + +static unsigned long suspend_test_start_time; + +void suspend_test_start(void) +{ + /* FIXME Use better timebase than "jiffies", ideally a clocksource. + * What we want is a hardware counter that will work correctly even + * during the irqs-are-off stages of the suspend/resume cycle... + */ + suspend_test_start_time = jiffies; +} + +void suspend_test_finish(const char *label) +{ + long nj = jiffies - suspend_test_start_time; + unsigned msec; + + msec = jiffies_to_msecs(abs(nj)); + pr_info("PM: %s took %d.%03d seconds\n", label, + msec / 1000, msec % 1000); + + /* Warning on suspend means the RTC alarm period needs to be + * larger -- the system was sooo slooowwww to suspend that the + * alarm (should have) fired before the system went to sleep! + * + * Warning on either suspend or resume also means the system + * has some performance issues. The stack dump of a WARN_ON + * is more likely to get the right attention than a printk... + */ + WARN(msec > (TEST_SUSPEND_SECONDS * 1000), "Component: %s\n", label); +} + +/* + * To test system suspend, we need a hands-off mechanism to resume the + * system. RTCs wake alarms are a common self-contained mechanism. + */ + +static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state) +{ + static char err_readtime[] __initdata = + KERN_ERR "PM: can't read %s time, err %d\n"; + static char err_wakealarm [] __initdata = + KERN_ERR "PM: can't set %s wakealarm, err %d\n"; + static char err_suspend[] __initdata = + KERN_ERR "PM: suspend test failed, error %d\n"; + static char info_test[] __initdata = + KERN_INFO "PM: test RTC wakeup from '%s' suspend\n"; + + unsigned long now; + struct rtc_wkalrm alm; + int status; + + /* this may fail if the RTC hasn't been initialized */ + status = rtc_read_time(rtc, &alm.time); + if (status < 0) { + printk(err_readtime, dev_name(&rtc->dev), status); + return; + } + rtc_tm_to_time(&alm.time, &now); + + memset(&alm, 0, sizeof alm); + rtc_time_to_tm(now + TEST_SUSPEND_SECONDS, &alm.time); + alm.enabled = true; + + status = rtc_set_alarm(rtc, &alm); + if (status < 0) { + printk(err_wakealarm, dev_name(&rtc->dev), status); + return; + } + + if (state == PM_SUSPEND_MEM) { + printk(info_test, pm_states[state]); + status = pm_suspend(state); + if (status == -ENODEV) + state = PM_SUSPEND_STANDBY; + } + if (state == PM_SUSPEND_STANDBY) { + printk(info_test, pm_states[state]); + status = pm_suspend(state); + } + if (status < 0) + printk(err_suspend, status); + + /* Some platforms can't detect that the alarm triggered the + * wakeup, or (accordingly) disable it after it afterwards. + * It's supposed to give oneshot behavior; cope. + */ + alm.enabled = false; + rtc_set_alarm(rtc, &alm); +} + +static int __init has_wakealarm(struct device *dev, void *name_ptr) +{ + struct rtc_device *candidate = to_rtc_device(dev); + + if (!candidate->ops->set_alarm) + return 0; + if (!device_may_wakeup(candidate->dev.parent)) + return 0; + + *(const char **)name_ptr = dev_name(dev); + return 1; +} + +/* + * Kernel options like "test_suspend=mem" force suspend/resume sanity tests + * at startup time. They're normally disabled, for faster boot and because + * we can't know which states really work on this particular system. + */ +static suspend_state_t test_state __initdata = PM_SUSPEND_ON; + +static char warn_bad_state[] __initdata = + KERN_WARNING "PM: can't test '%s' suspend state\n"; + +static int __init setup_test_suspend(char *value) +{ + unsigned i; + + /* "=mem" ==> "mem" */ + value++; + for (i = 0; i < PM_SUSPEND_MAX; i++) { + if (!pm_states[i]) + continue; + if (strcmp(pm_states[i], value) != 0) + continue; + test_state = (__force suspend_state_t) i; + return 0; + } + printk(warn_bad_state, value); + return 0; +} +__setup("test_suspend", setup_test_suspend); + +static int __init test_suspend(void) +{ + static char warn_no_rtc[] __initdata = + KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n"; + + char *pony = NULL; + struct rtc_device *rtc = NULL; + + /* PM is initialized by now; is that state testable? */ + if (test_state == PM_SUSPEND_ON) + goto done; + if (!valid_state(test_state)) { + printk(warn_bad_state, pm_states[test_state]); + goto done; + } + + /* RTCs have initialized by now too ... can we use one? */ + class_find_device(rtc_class, NULL, &pony, has_wakealarm); + if (pony) + rtc = rtc_class_open(pony); + if (!rtc) { + printk(warn_no_rtc); + goto done; + } + + /* go for it */ + test_wakealarm(rtc, test_state); + rtc_class_close(rtc); +done: + return 0; +} +late_initcall(test_suspend); -- cgit v1.2.3 From 8b759b84c8b3c27ccc8dd787294636297b3ebb40 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 10 Jun 2009 01:27:49 +0200 Subject: PM/Hibernate: Rename disk.c to hibernate.c Change the name of kernel/power/disk.c to kernel/power/hibernate.c in analogy with the file names introduced by the changes that separated the suspend to RAM and standby funtionality from the common PM functions. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek --- kernel/power/Makefile | 2 +- kernel/power/disk.c | 955 ----------------------------------------------- kernel/power/hibernate.c | 955 +++++++++++++++++++++++++++++++++++++++++++++++ kernel/power/power.h | 4 +- 4 files changed, 958 insertions(+), 958 deletions(-) delete mode 100644 kernel/power/disk.c create mode 100644 kernel/power/hibernate.c (limited to 'kernel') diff --git a/kernel/power/Makefile b/kernel/power/Makefile index c4baf1b633c4..eadb17fc8f5e 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -8,6 +8,6 @@ obj-$(CONFIG_PM_SLEEP) += console.o obj-$(CONFIG_FREEZER) += process.o obj-$(CONFIG_SUSPEND) += suspend.o obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o -obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o +obj-$(CONFIG_HIBERNATION) += swsusp.o hibernate.o snapshot.o swap.o user.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/disk.c b/kernel/power/disk.c deleted file mode 100644 index a9beba68b6c7..000000000000 --- a/kernel/power/disk.c +++ /dev/null @@ -1,955 +0,0 @@ -/* - * kernel/power/disk.c - Suspend-to-disk support. - * - * Copyright (c) 2003 Patrick Mochel - * Copyright (c) 2003 Open Source Development Lab - * Copyright (c) 2004 Pavel Machek - * - * This file is released under the GPLv2. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "power.h" - - -static int noresume = 0; -static char resume_file[256] = CONFIG_PM_STD_PARTITION; -dev_t swsusp_resume_device; -sector_t swsusp_resume_block; - -enum { - HIBERNATION_INVALID, - HIBERNATION_PLATFORM, - HIBERNATION_TEST, - HIBERNATION_TESTPROC, - HIBERNATION_SHUTDOWN, - HIBERNATION_REBOOT, - /* keep last */ - __HIBERNATION_AFTER_LAST -}; -#define HIBERNATION_MAX (__HIBERNATION_AFTER_LAST-1) -#define HIBERNATION_FIRST (HIBERNATION_INVALID + 1) - -static int hibernation_mode = HIBERNATION_SHUTDOWN; - -static struct platform_hibernation_ops *hibernation_ops; - -/** - * hibernation_set_ops - set the global hibernate operations - * @ops: the hibernation operations to use in subsequent hibernation transitions - */ - -void hibernation_set_ops(struct platform_hibernation_ops *ops) -{ - if (ops && !(ops->begin && ops->end && ops->pre_snapshot - && ops->prepare && ops->finish && ops->enter && ops->pre_restore - && ops->restore_cleanup)) { - WARN_ON(1); - return; - } - mutex_lock(&pm_mutex); - hibernation_ops = ops; - if (ops) - hibernation_mode = HIBERNATION_PLATFORM; - else if (hibernation_mode == HIBERNATION_PLATFORM) - hibernation_mode = HIBERNATION_SHUTDOWN; - - mutex_unlock(&pm_mutex); -} - -static bool entering_platform_hibernation; - -bool system_entering_hibernation(void) -{ - return entering_platform_hibernation; -} -EXPORT_SYMBOL(system_entering_hibernation); - -#ifdef CONFIG_PM_DEBUG -static void hibernation_debug_sleep(void) -{ - printk(KERN_INFO "hibernation debug: Waiting for 5 seconds.\n"); - mdelay(5000); -} - -static int hibernation_testmode(int mode) -{ - if (hibernation_mode == mode) { - hibernation_debug_sleep(); - return 1; - } - return 0; -} - -static int hibernation_test(int level) -{ - if (pm_test_level == level) { - hibernation_debug_sleep(); - return 1; - } - return 0; -} -#else /* !CONFIG_PM_DEBUG */ -static int hibernation_testmode(int mode) { return 0; } -static int hibernation_test(int level) { return 0; } -#endif /* !CONFIG_PM_DEBUG */ - -/** - * platform_begin - tell the platform driver that we're starting - * hibernation - */ - -static int platform_begin(int platform_mode) -{ - return (platform_mode && hibernation_ops) ? - hibernation_ops->begin() : 0; -} - -/** - * platform_end - tell the platform driver that we've entered the - * working state - */ - -static void platform_end(int platform_mode) -{ - if (platform_mode && hibernation_ops) - hibernation_ops->end(); -} - -/** - * platform_pre_snapshot - prepare the machine for hibernation using the - * platform driver if so configured and return an error code if it fails - */ - -static int platform_pre_snapshot(int platform_mode) -{ - return (platform_mode && hibernation_ops) ? - hibernation_ops->pre_snapshot() : 0; -} - -/** - * platform_leave - prepare the machine for switching to the normal mode - * of operation using the platform driver (called with interrupts disabled) - */ - -static void platform_leave(int platform_mode) -{ - if (platform_mode && hibernation_ops) - hibernation_ops->leave(); -} - -/** - * platform_finish - switch the machine to the normal mode of operation - * using the platform driver (must be called after platform_prepare()) - */ - -static void platform_finish(int platform_mode) -{ - if (platform_mode && hibernation_ops) - hibernation_ops->finish(); -} - -/** - * platform_pre_restore - prepare the platform for the restoration from a - * hibernation image. If the restore fails after this function has been - * called, platform_restore_cleanup() must be called. - */ - -static int platform_pre_restore(int platform_mode) -{ - return (platform_mode && hibernation_ops) ? - hibernation_ops->pre_restore() : 0; -} - -/** - * platform_restore_cleanup - switch the platform to the normal mode of - * operation after a failing restore. If platform_pre_restore() has been - * called before the failing restore, this function must be called too, - * regardless of the result of platform_pre_restore(). - */ - -static void platform_restore_cleanup(int platform_mode) -{ - if (platform_mode && hibernation_ops) - hibernation_ops->restore_cleanup(); -} - -/** - * platform_recover - recover the platform from a failure to suspend - * devices. - */ - -static void platform_recover(int platform_mode) -{ - if (platform_mode && hibernation_ops && hibernation_ops->recover) - hibernation_ops->recover(); -} - -/** - * create_image - freeze devices that need to be frozen with interrupts - * off, create the hibernation image and thaw those devices. Control - * reappears in this routine after a restore. - */ - -static int create_image(int platform_mode) -{ - int error; - - error = arch_prepare_suspend(); - if (error) - return error; - - /* At this point, dpm_suspend_start() has been called, but *not* - * dpm_suspend_noirq(). We *must* call dpm_suspend_noirq() now. - * Otherwise, drivers for some devices (e.g. interrupt controllers) - * become desynchronized with the actual state of the hardware - * at resume time, and evil weirdness ensues. - */ - error = dpm_suspend_noirq(PMSG_FREEZE); - if (error) { - printk(KERN_ERR "PM: Some devices failed to power down, " - "aborting hibernation\n"); - return error; - } - - error = platform_pre_snapshot(platform_mode); - if (error || hibernation_test(TEST_PLATFORM)) - goto Platform_finish; - - error = disable_nonboot_cpus(); - if (error || hibernation_test(TEST_CPUS) - || hibernation_testmode(HIBERNATION_TEST)) - goto Enable_cpus; - - local_irq_disable(); - - error = sysdev_suspend(PMSG_FREEZE); - if (error) { - printk(KERN_ERR "PM: Some system devices failed to power down, " - "aborting hibernation\n"); - goto Enable_irqs; - } - - if (hibernation_test(TEST_CORE)) - goto Power_up; - - in_suspend = 1; - save_processor_state(); - error = swsusp_arch_suspend(); - if (error) - printk(KERN_ERR "PM: Error %d creating hibernation image\n", - error); - /* Restore control flow magically appears here */ - restore_processor_state(); - if (!in_suspend) - platform_leave(platform_mode); - - Power_up: - sysdev_resume(); - /* NOTE: dpm_resume_noirq() is just a resume() for devices - * that suspended with irqs off ... no overall powerup. - */ - - Enable_irqs: - local_irq_enable(); - - Enable_cpus: - enable_nonboot_cpus(); - - Platform_finish: - platform_finish(platform_mode); - - dpm_resume_noirq(in_suspend ? - (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); - - return error; -} - -/** - * hibernation_snapshot - quiesce devices and create the hibernation - * snapshot image. - * @platform_mode - if set, use the platform driver, if available, to - * prepare the platform firmware for the power transition. - * - * Must be called with pm_mutex held - */ - -int hibernation_snapshot(int platform_mode) -{ - int error; - - error = platform_begin(platform_mode); - if (error) - return error; - - /* Free memory before shutting down devices. */ - error = swsusp_shrink_memory(); - if (error) - goto Close; - - suspend_console(); - error = dpm_suspend_start(PMSG_FREEZE); - if (error) - goto Recover_platform; - - if (hibernation_test(TEST_DEVICES)) - goto Recover_platform; - - error = create_image(platform_mode); - /* Control returns here after successful restore */ - - Resume_devices: - dpm_resume_end(in_suspend ? - (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); - resume_console(); - Close: - platform_end(platform_mode); - return error; - - Recover_platform: - platform_recover(platform_mode); - goto Resume_devices; -} - -/** - * resume_target_kernel - prepare devices that need to be suspended with - * interrupts off, restore the contents of highmem that have not been - * restored yet from the image and run the low level code that will restore - * the remaining contents of memory and switch to the just restored target - * kernel. - */ - -static int resume_target_kernel(bool platform_mode) -{ - int error; - - error = dpm_suspend_noirq(PMSG_QUIESCE); - if (error) { - printk(KERN_ERR "PM: Some devices failed to power down, " - "aborting resume\n"); - return error; - } - - error = platform_pre_restore(platform_mode); - if (error) - goto Cleanup; - - error = disable_nonboot_cpus(); - if (error) - goto Enable_cpus; - - local_irq_disable(); - - error = sysdev_suspend(PMSG_QUIESCE); - if (error) - goto Enable_irqs; - - /* We'll ignore saved state, but this gets preempt count (etc) right */ - save_processor_state(); - error = restore_highmem(); - if (!error) { - error = swsusp_arch_resume(); - /* - * The code below is only ever reached in case of a failure. - * Otherwise execution continues at place where - * swsusp_arch_suspend() was called - */ - BUG_ON(!error); - /* This call to restore_highmem() undos the previous one */ - restore_highmem(); - } - /* - * The only reason why swsusp_arch_resume() can fail is memory being - * very tight, so we have to free it as soon as we can to avoid - * subsequent failures - */ - swsusp_free(); - restore_processor_state(); - touch_softlockup_watchdog(); - - sysdev_resume(); - - Enable_irqs: - local_irq_enable(); - - Enable_cpus: - enable_nonboot_cpus(); - - Cleanup: - platform_restore_cleanup(platform_mode); - - dpm_resume_noirq(PMSG_RECOVER); - - return error; -} - -/** - * hibernation_restore - quiesce devices and restore the hibernation - * snapshot image. If successful, control returns in hibernation_snaphot() - * @platform_mode - if set, use the platform driver, if available, to - * prepare the platform firmware for the transition. - * - * Must be called with pm_mutex held - */ - -int hibernation_restore(int platform_mode) -{ - int error; - - pm_prepare_console(); - suspend_console(); - error = dpm_suspend_start(PMSG_QUIESCE); - if (!error) { - error = resume_target_kernel(platform_mode); - dpm_resume_end(PMSG_RECOVER); - } - resume_console(); - pm_restore_console(); - return error; -} - -/** - * hibernation_platform_enter - enter the hibernation state using the - * platform driver (if available) - */ - -int hibernation_platform_enter(void) -{ - int error; - - if (!hibernation_ops) - return -ENOSYS; - - /* - * We have cancelled the power transition by running - * hibernation_ops->finish() before saving the image, so we should let - * the firmware know that we're going to enter the sleep state after all - */ - error = hibernation_ops->begin(); - if (error) - goto Close; - - entering_platform_hibernation = true; - suspend_console(); - error = dpm_suspend_start(PMSG_HIBERNATE); - if (error) { - if (hibernation_ops->recover) - hibernation_ops->recover(); - goto Resume_devices; - } - - error = dpm_suspend_noirq(PMSG_HIBERNATE); - if (error) - goto Resume_devices; - - error = hibernation_ops->prepare(); - if (error) - goto Platofrm_finish; - - error = disable_nonboot_cpus(); - if (error) - goto Platofrm_finish; - - local_irq_disable(); - sysdev_suspend(PMSG_HIBERNATE); - hibernation_ops->enter(); - /* We should never get here */ - while (1); - - /* - * We don't need to reenable the nonboot CPUs or resume consoles, since - * the system is going to be halted anyway. - */ - Platofrm_finish: - hibernation_ops->finish(); - - dpm_suspend_noirq(PMSG_RESTORE); - - Resume_devices: - entering_platform_hibernation = false; - dpm_resume_end(PMSG_RESTORE); - resume_console(); - - Close: - hibernation_ops->end(); - - return error; -} - -/** - * power_down - Shut the machine down for hibernation. - * - * Use the platform driver, if configured so; otherwise try - * to power off or reboot. - */ - -static void power_down(void) -{ - switch (hibernation_mode) { - case HIBERNATION_TEST: - case HIBERNATION_TESTPROC: - break; - case HIBERNATION_REBOOT: - kernel_restart(NULL); - break; - case HIBERNATION_PLATFORM: - hibernation_platform_enter(); - case HIBERNATION_SHUTDOWN: - kernel_power_off(); - break; - } - kernel_halt(); - /* - * Valid image is on the disk, if we continue we risk serious data - * corruption after resume. - */ - printk(KERN_CRIT "PM: Please power down manually\n"); - while(1); -} - -static int prepare_processes(void) -{ - int error = 0; - - if (freeze_processes()) { - error = -EBUSY; - thaw_processes(); - } - return error; -} - -/** - * hibernate - The granpappy of the built-in hibernation management - */ - -int hibernate(void) -{ - int error; - - mutex_lock(&pm_mutex); - /* The snapshot device should not be opened while we're running */ - if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { - error = -EBUSY; - goto Unlock; - } - - pm_prepare_console(); - error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); - if (error) - goto Exit; - - error = usermodehelper_disable(); - if (error) - goto Exit; - - /* Allocate memory management structures */ - error = create_basic_memory_bitmaps(); - if (error) - goto Exit; - - printk(KERN_INFO "PM: Syncing filesystems ... "); - sys_sync(); - printk("done.\n"); - - error = prepare_processes(); - if (error) - goto Finish; - - if (hibernation_test(TEST_FREEZER)) - goto Thaw; - - if (hibernation_testmode(HIBERNATION_TESTPROC)) - goto Thaw; - - error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); - if (in_suspend && !error) { - unsigned int flags = 0; - - if (hibernation_mode == HIBERNATION_PLATFORM) - flags |= SF_PLATFORM_MODE; - pr_debug("PM: writing image.\n"); - error = swsusp_write(flags); - swsusp_free(); - if (!error) - power_down(); - } else { - pr_debug("PM: Image restored successfully.\n"); - swsusp_free(); - } - Thaw: - thaw_processes(); - Finish: - free_basic_memory_bitmaps(); - usermodehelper_enable(); - Exit: - pm_notifier_call_chain(PM_POST_HIBERNATION); - pm_restore_console(); - atomic_inc(&snapshot_device_available); - Unlock: - mutex_unlock(&pm_mutex); - return error; -} - - -/** - * software_resume - Resume from a saved image. - * - * Called as a late_initcall (so all devices are discovered and - * initialized), we call swsusp to see if we have a saved image or not. - * If so, we quiesce devices, the restore the saved image. We will - * return above (in hibernate() ) if everything goes well. - * Otherwise, we fail gracefully and return to the normally - * scheduled program. - * - */ - -static int software_resume(void) -{ - int error; - unsigned int flags; - - /* - * If the user said "noresume".. bail out early. - */ - if (noresume) - return 0; - - /* - * name_to_dev_t() below takes a sysfs buffer mutex when sysfs - * is configured into the kernel. Since the regular hibernate - * trigger path is via sysfs which takes a buffer mutex before - * calling hibernate functions (which take pm_mutex) this can - * cause lockdep to complain about a possible ABBA deadlock - * which cannot happen since we're in the boot code here and - * sysfs can't be invoked yet. Therefore, we use a subclass - * here to avoid lockdep complaining. - */ - mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING); - - if (swsusp_resume_device) - goto Check_image; - - if (!strlen(resume_file)) { - error = -ENOENT; - goto Unlock; - } - - pr_debug("PM: Checking image partition %s\n", resume_file); - - /* Check if the device is there */ - swsusp_resume_device = name_to_dev_t(resume_file); - if (!swsusp_resume_device) { - /* - * Some device discovery might still be in progress; we need - * to wait for this to finish. - */ - wait_for_device_probe(); - /* - * We can't depend on SCSI devices being available after loading - * one of their modules until scsi_complete_async_scans() is - * called and the resume device usually is a SCSI one. - */ - scsi_complete_async_scans(); - - swsusp_resume_device = name_to_dev_t(resume_file); - if (!swsusp_resume_device) { - error = -ENODEV; - goto Unlock; - } - } - - Check_image: - pr_debug("PM: Resume from partition %d:%d\n", - MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); - - pr_debug("PM: Checking hibernation image.\n"); - error = swsusp_check(); - if (error) - goto Unlock; - - /* The snapshot device should not be opened while we're running */ - if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { - error = -EBUSY; - goto Unlock; - } - - pm_prepare_console(); - error = pm_notifier_call_chain(PM_RESTORE_PREPARE); - if (error) - goto Finish; - - error = usermodehelper_disable(); - if (error) - goto Finish; - - error = create_basic_memory_bitmaps(); - if (error) - goto Finish; - - pr_debug("PM: Preparing processes for restore.\n"); - error = prepare_processes(); - if (error) { - swsusp_close(FMODE_READ); - goto Done; - } - - pr_debug("PM: Reading hibernation image.\n"); - - error = swsusp_read(&flags); - if (!error) - hibernation_restore(flags & SF_PLATFORM_MODE); - - printk(KERN_ERR "PM: Restore failed, recovering.\n"); - swsusp_free(); - thaw_processes(); - Done: - free_basic_memory_bitmaps(); - usermodehelper_enable(); - Finish: - pm_notifier_call_chain(PM_POST_RESTORE); - pm_restore_console(); - atomic_inc(&snapshot_device_available); - /* For success case, the suspend path will release the lock */ - Unlock: - mutex_unlock(&pm_mutex); - pr_debug("PM: Resume from disk failed.\n"); - return error; -} - -late_initcall(software_resume); - - -static const char * const hibernation_modes[] = { - [HIBERNATION_PLATFORM] = "platform", - [HIBERNATION_SHUTDOWN] = "shutdown", - [HIBERNATION_REBOOT] = "reboot", - [HIBERNATION_TEST] = "test", - [HIBERNATION_TESTPROC] = "testproc", -}; - -/** - * disk - Control hibernation mode - * - * Suspend-to-disk can be handled in several ways. We have a few options - * for putting the system to sleep - using the platform driver (e.g. ACPI - * or other hibernation_ops), powering off the system or rebooting the - * system (for testing) as well as the two test modes. - * - * The system can support 'platform', and that is known a priori (and - * encoded by the presence of hibernation_ops). However, the user may - * choose 'shutdown' or 'reboot' as alternatives, as well as one fo the - * test modes, 'test' or 'testproc'. - * - * show() will display what the mode is currently set to. - * store() will accept one of - * - * 'platform' - * 'shutdown' - * 'reboot' - * 'test' - * 'testproc' - * - * It will only change to 'platform' if the system - * supports it (as determined by having hibernation_ops). - */ - -static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf) -{ - int i; - char *start = buf; - - for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { - if (!hibernation_modes[i]) - continue; - switch (i) { - case HIBERNATION_SHUTDOWN: - case HIBERNATION_REBOOT: - case HIBERNATION_TEST: - case HIBERNATION_TESTPROC: - break; - case HIBERNATION_PLATFORM: - if (hibernation_ops) - break; - /* not a valid mode, continue with loop */ - continue; - } - if (i == hibernation_mode) - buf += sprintf(buf, "[%s] ", hibernation_modes[i]); - else - buf += sprintf(buf, "%s ", hibernation_modes[i]); - } - buf += sprintf(buf, "\n"); - return buf-start; -} - - -static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t n) -{ - int error = 0; - int i; - int len; - char *p; - int mode = HIBERNATION_INVALID; - - p = memchr(buf, '\n', n); - len = p ? p - buf : n; - - mutex_lock(&pm_mutex); - for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { - if (len == strlen(hibernation_modes[i]) - && !strncmp(buf, hibernation_modes[i], len)) { - mode = i; - break; - } - } - if (mode != HIBERNATION_INVALID) { - switch (mode) { - case HIBERNATION_SHUTDOWN: - case HIBERNATION_REBOOT: - case HIBERNATION_TEST: - case HIBERNATION_TESTPROC: - hibernation_mode = mode; - break; - case HIBERNATION_PLATFORM: - if (hibernation_ops) - hibernation_mode = mode; - else - error = -EINVAL; - } - } else - error = -EINVAL; - - if (!error) - pr_debug("PM: Hibernation mode set to '%s'\n", - hibernation_modes[mode]); - mutex_unlock(&pm_mutex); - return error ? error : n; -} - -power_attr(disk); - -static ssize_t resume_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf) -{ - return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device), - MINOR(swsusp_resume_device)); -} - -static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t n) -{ - unsigned int maj, min; - dev_t res; - int ret = -EINVAL; - - if (sscanf(buf, "%u:%u", &maj, &min) != 2) - goto out; - - res = MKDEV(maj,min); - if (maj != MAJOR(res) || min != MINOR(res)) - goto out; - - mutex_lock(&pm_mutex); - swsusp_resume_device = res; - mutex_unlock(&pm_mutex); - printk(KERN_INFO "PM: Starting manual resume from disk\n"); - noresume = 0; - software_resume(); - ret = n; - out: - return ret; -} - -power_attr(resume); - -static ssize_t image_size_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf) -{ - return sprintf(buf, "%lu\n", image_size); -} - -static ssize_t image_size_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t n) -{ - unsigned long size; - - if (sscanf(buf, "%lu", &size) == 1) { - image_size = size; - return n; - } - - return -EINVAL; -} - -power_attr(image_size); - -static struct attribute * g[] = { - &disk_attr.attr, - &resume_attr.attr, - &image_size_attr.attr, - NULL, -}; - - -static struct attribute_group attr_group = { - .attrs = g, -}; - - -static int __init pm_disk_init(void) -{ - return sysfs_create_group(power_kobj, &attr_group); -} - -core_initcall(pm_disk_init); - - -static int __init resume_setup(char *str) -{ - if (noresume) - return 1; - - strncpy( resume_file, str, 255 ); - return 1; -} - -static int __init resume_offset_setup(char *str) -{ - unsigned long long offset; - - if (noresume) - return 1; - - if (sscanf(str, "%llu", &offset) == 1) - swsusp_resume_block = offset; - - return 1; -} - -static int __init noresume_setup(char *str) -{ - noresume = 1; - return 1; -} - -__setup("noresume", noresume_setup); -__setup("resume_offset=", resume_offset_setup); -__setup("resume=", resume_setup); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c new file mode 100644 index 000000000000..81d2e7464893 --- /dev/null +++ b/kernel/power/hibernate.c @@ -0,0 +1,955 @@ +/* + * kernel/power/hibernate.c - Hibernation (a.k.a suspend-to-disk) support. + * + * Copyright (c) 2003 Patrick Mochel + * Copyright (c) 2003 Open Source Development Lab + * Copyright (c) 2004 Pavel Machek + * Copyright (c) 2009 Rafael J. Wysocki, Novell Inc. + * + * This file is released under the GPLv2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "power.h" + + +static int noresume = 0; +static char resume_file[256] = CONFIG_PM_STD_PARTITION; +dev_t swsusp_resume_device; +sector_t swsusp_resume_block; + +enum { + HIBERNATION_INVALID, + HIBERNATION_PLATFORM, + HIBERNATION_TEST, + HIBERNATION_TESTPROC, + HIBERNATION_SHUTDOWN, + HIBERNATION_REBOOT, + /* keep last */ + __HIBERNATION_AFTER_LAST +}; +#define HIBERNATION_MAX (__HIBERNATION_AFTER_LAST-1) +#define HIBERNATION_FIRST (HIBERNATION_INVALID + 1) + +static int hibernation_mode = HIBERNATION_SHUTDOWN; + +static struct platform_hibernation_ops *hibernation_ops; + +/** + * hibernation_set_ops - set the global hibernate operations + * @ops: the hibernation operations to use in subsequent hibernation transitions + */ + +void hibernation_set_ops(struct platform_hibernation_ops *ops) +{ + if (ops && !(ops->begin && ops->end && ops->pre_snapshot + && ops->prepare && ops->finish && ops->enter && ops->pre_restore + && ops->restore_cleanup)) { + WARN_ON(1); + return; + } + mutex_lock(&pm_mutex); + hibernation_ops = ops; + if (ops) + hibernation_mode = HIBERNATION_PLATFORM; + else if (hibernation_mode == HIBERNATION_PLATFORM) + hibernation_mode = HIBERNATION_SHUTDOWN; + + mutex_unlock(&pm_mutex); +} + +static bool entering_platform_hibernation; + +bool system_entering_hibernation(void) +{ + return entering_platform_hibernation; +} +EXPORT_SYMBOL(system_entering_hibernation); + +#ifdef CONFIG_PM_DEBUG +static void hibernation_debug_sleep(void) +{ + printk(KERN_INFO "hibernation debug: Waiting for 5 seconds.\n"); + mdelay(5000); +} + +static int hibernation_testmode(int mode) +{ + if (hibernation_mode == mode) { + hibernation_debug_sleep(); + return 1; + } + return 0; +} + +static int hibernation_test(int level) +{ + if (pm_test_level == level) { + hibernation_debug_sleep(); + return 1; + } + return 0; +} +#else /* !CONFIG_PM_DEBUG */ +static int hibernation_testmode(int mode) { return 0; } +static int hibernation_test(int level) { return 0; } +#endif /* !CONFIG_PM_DEBUG */ + +/** + * platform_begin - tell the platform driver that we're starting + * hibernation + */ + +static int platform_begin(int platform_mode) +{ + return (platform_mode && hibernation_ops) ? + hibernation_ops->begin() : 0; +} + +/** + * platform_end - tell the platform driver that we've entered the + * working state + */ + +static void platform_end(int platform_mode) +{ + if (platform_mode && hibernation_ops) + hibernation_ops->end(); +} + +/** + * platform_pre_snapshot - prepare the machine for hibernation using the + * platform driver if so configured and return an error code if it fails + */ + +static int platform_pre_snapshot(int platform_mode) +{ + return (platform_mode && hibernation_ops) ? + hibernation_ops->pre_snapshot() : 0; +} + +/** + * platform_leave - prepare the machine for switching to the normal mode + * of operation using the platform driver (called with interrupts disabled) + */ + +static void platform_leave(int platform_mode) +{ + if (platform_mode && hibernation_ops) + hibernation_ops->leave(); +} + +/** + * platform_finish - switch the machine to the normal mode of operation + * using the platform driver (must be called after platform_prepare()) + */ + +static void platform_finish(int platform_mode) +{ + if (platform_mode && hibernation_ops) + hibernation_ops->finish(); +} + +/** + * platform_pre_restore - prepare the platform for the restoration from a + * hibernation image. If the restore fails after this function has been + * called, platform_restore_cleanup() must be called. + */ + +static int platform_pre_restore(int platform_mode) +{ + return (platform_mode && hibernation_ops) ? + hibernation_ops->pre_restore() : 0; +} + +/** + * platform_restore_cleanup - switch the platform to the normal mode of + * operation after a failing restore. If platform_pre_restore() has been + * called before the failing restore, this function must be called too, + * regardless of the result of platform_pre_restore(). + */ + +static void platform_restore_cleanup(int platform_mode) +{ + if (platform_mode && hibernation_ops) + hibernation_ops->restore_cleanup(); +} + +/** + * platform_recover - recover the platform from a failure to suspend + * devices. + */ + +static void platform_recover(int platform_mode) +{ + if (platform_mode && hibernation_ops && hibernation_ops->recover) + hibernation_ops->recover(); +} + +/** + * create_image - freeze devices that need to be frozen with interrupts + * off, create the hibernation image and thaw those devices. Control + * reappears in this routine after a restore. + */ + +static int create_image(int platform_mode) +{ + int error; + + error = arch_prepare_suspend(); + if (error) + return error; + + /* At this point, dpm_suspend_start() has been called, but *not* + * dpm_suspend_noirq(). We *must* call dpm_suspend_noirq() now. + * Otherwise, drivers for some devices (e.g. interrupt controllers) + * become desynchronized with the actual state of the hardware + * at resume time, and evil weirdness ensues. + */ + error = dpm_suspend_noirq(PMSG_FREEZE); + if (error) { + printk(KERN_ERR "PM: Some devices failed to power down, " + "aborting hibernation\n"); + return error; + } + + error = platform_pre_snapshot(platform_mode); + if (error || hibernation_test(TEST_PLATFORM)) + goto Platform_finish; + + error = disable_nonboot_cpus(); + if (error || hibernation_test(TEST_CPUS) + || hibernation_testmode(HIBERNATION_TEST)) + goto Enable_cpus; + + local_irq_disable(); + + error = sysdev_suspend(PMSG_FREEZE); + if (error) { + printk(KERN_ERR "PM: Some system devices failed to power down, " + "aborting hibernation\n"); + goto Enable_irqs; + } + + if (hibernation_test(TEST_CORE)) + goto Power_up; + + in_suspend = 1; + save_processor_state(); + error = swsusp_arch_suspend(); + if (error) + printk(KERN_ERR "PM: Error %d creating hibernation image\n", + error); + /* Restore control flow magically appears here */ + restore_processor_state(); + if (!in_suspend) + platform_leave(platform_mode); + + Power_up: + sysdev_resume(); + /* NOTE: dpm_resume_noirq() is just a resume() for devices + * that suspended with irqs off ... no overall powerup. + */ + + Enable_irqs: + local_irq_enable(); + + Enable_cpus: + enable_nonboot_cpus(); + + Platform_finish: + platform_finish(platform_mode); + + dpm_resume_noirq(in_suspend ? + (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); + + return error; +} + +/** + * hibernation_snapshot - quiesce devices and create the hibernation + * snapshot image. + * @platform_mode - if set, use the platform driver, if available, to + * prepare the platform firmware for the power transition. + * + * Must be called with pm_mutex held + */ + +int hibernation_snapshot(int platform_mode) +{ + int error; + + error = platform_begin(platform_mode); + if (error) + return error; + + /* Free memory before shutting down devices. */ + error = swsusp_shrink_memory(); + if (error) + goto Close; + + suspend_console(); + error = dpm_suspend_start(PMSG_FREEZE); + if (error) + goto Recover_platform; + + if (hibernation_test(TEST_DEVICES)) + goto Recover_platform; + + error = create_image(platform_mode); + /* Control returns here after successful restore */ + + Resume_devices: + dpm_resume_end(in_suspend ? + (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); + resume_console(); + Close: + platform_end(platform_mode); + return error; + + Recover_platform: + platform_recover(platform_mode); + goto Resume_devices; +} + +/** + * resume_target_kernel - prepare devices that need to be suspended with + * interrupts off, restore the contents of highmem that have not been + * restored yet from the image and run the low level code that will restore + * the remaining contents of memory and switch to the just restored target + * kernel. + */ + +static int resume_target_kernel(bool platform_mode) +{ + int error; + + error = dpm_suspend_noirq(PMSG_QUIESCE); + if (error) { + printk(KERN_ERR "PM: Some devices failed to power down, " + "aborting resume\n"); + return error; + } + + error = platform_pre_restore(platform_mode); + if (error) + goto Cleanup; + + error = disable_nonboot_cpus(); + if (error) + goto Enable_cpus; + + local_irq_disable(); + + error = sysdev_suspend(PMSG_QUIESCE); + if (error) + goto Enable_irqs; + + /* We'll ignore saved state, but this gets preempt count (etc) right */ + save_processor_state(); + error = restore_highmem(); + if (!error) { + error = swsusp_arch_resume(); + /* + * The code below is only ever reached in case of a failure. + * Otherwise execution continues at place where + * swsusp_arch_suspend() was called + */ + BUG_ON(!error); + /* This call to restore_highmem() undos the previous one */ + restore_highmem(); + } + /* + * The only reason why swsusp_arch_resume() can fail is memory being + * very tight, so we have to free it as soon as we can to avoid + * subsequent failures + */ + swsusp_free(); + restore_processor_state(); + touch_softlockup_watchdog(); + + sysdev_resume(); + + Enable_irqs: + local_irq_enable(); + + Enable_cpus: + enable_nonboot_cpus(); + + Cleanup: + platform_restore_cleanup(platform_mode); + + dpm_resume_noirq(PMSG_RECOVER); + + return error; +} + +/** + * hibernation_restore - quiesce devices and restore the hibernation + * snapshot image. If successful, control returns in hibernation_snaphot() + * @platform_mode - if set, use the platform driver, if available, to + * prepare the platform firmware for the transition. + * + * Must be called with pm_mutex held + */ + +int hibernation_restore(int platform_mode) +{ + int error; + + pm_prepare_console(); + suspend_console(); + error = dpm_suspend_start(PMSG_QUIESCE); + if (!error) { + error = resume_target_kernel(platform_mode); + dpm_resume_end(PMSG_RECOVER); + } + resume_console(); + pm_restore_console(); + return error; +} + +/** + * hibernation_platform_enter - enter the hibernation state using the + * platform driver (if available) + */ + +int hibernation_platform_enter(void) +{ + int error; + + if (!hibernation_ops) + return -ENOSYS; + + /* + * We have cancelled the power transition by running + * hibernation_ops->finish() before saving the image, so we should let + * the firmware know that we're going to enter the sleep state after all + */ + error = hibernation_ops->begin(); + if (error) + goto Close; + + entering_platform_hibernation = true; + suspend_console(); + error = dpm_suspend_start(PMSG_HIBERNATE); + if (error) { + if (hibernation_ops->recover) + hibernation_ops->recover(); + goto Resume_devices; + } + + error = dpm_suspend_noirq(PMSG_HIBERNATE); + if (error) + goto Resume_devices; + + error = hibernation_ops->prepare(); + if (error) + goto Platofrm_finish; + + error = disable_nonboot_cpus(); + if (error) + goto Platofrm_finish; + + local_irq_disable(); + sysdev_suspend(PMSG_HIBERNATE); + hibernation_ops->enter(); + /* We should never get here */ + while (1); + + /* + * We don't need to reenable the nonboot CPUs or resume consoles, since + * the system is going to be halted anyway. + */ + Platofrm_finish: + hibernation_ops->finish(); + + dpm_suspend_noirq(PMSG_RESTORE); + + Resume_devices: + entering_platform_hibernation = false; + dpm_resume_end(PMSG_RESTORE); + resume_console(); + + Close: + hibernation_ops->end(); + + return error; +} + +/** + * power_down - Shut the machine down for hibernation. + * + * Use the platform driver, if configured so; otherwise try + * to power off or reboot. + */ + +static void power_down(void) +{ + switch (hibernation_mode) { + case HIBERNATION_TEST: + case HIBERNATION_TESTPROC: + break; + case HIBERNATION_REBOOT: + kernel_restart(NULL); + break; + case HIBERNATION_PLATFORM: + hibernation_platform_enter(); + case HIBERNATION_SHUTDOWN: + kernel_power_off(); + break; + } + kernel_halt(); + /* + * Valid image is on the disk, if we continue we risk serious data + * corruption after resume. + */ + printk(KERN_CRIT "PM: Please power down manually\n"); + while(1); +} + +static int prepare_processes(void) +{ + int error = 0; + + if (freeze_processes()) { + error = -EBUSY; + thaw_processes(); + } + return error; +} + +/** + * hibernate - The granpappy of the built-in hibernation management + */ + +int hibernate(void) +{ + int error; + + mutex_lock(&pm_mutex); + /* The snapshot device should not be opened while we're running */ + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { + error = -EBUSY; + goto Unlock; + } + + pm_prepare_console(); + error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); + if (error) + goto Exit; + + error = usermodehelper_disable(); + if (error) + goto Exit; + + /* Allocate memory management structures */ + error = create_basic_memory_bitmaps(); + if (error) + goto Exit; + + printk(KERN_INFO "PM: Syncing filesystems ... "); + sys_sync(); + printk("done.\n"); + + error = prepare_processes(); + if (error) + goto Finish; + + if (hibernation_test(TEST_FREEZER)) + goto Thaw; + + if (hibernation_testmode(HIBERNATION_TESTPROC)) + goto Thaw; + + error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); + if (in_suspend && !error) { + unsigned int flags = 0; + + if (hibernation_mode == HIBERNATION_PLATFORM) + flags |= SF_PLATFORM_MODE; + pr_debug("PM: writing image.\n"); + error = swsusp_write(flags); + swsusp_free(); + if (!error) + power_down(); + } else { + pr_debug("PM: Image restored successfully.\n"); + swsusp_free(); + } + Thaw: + thaw_processes(); + Finish: + free_basic_memory_bitmaps(); + usermodehelper_enable(); + Exit: + pm_notifier_call_chain(PM_POST_HIBERNATION); + pm_restore_console(); + atomic_inc(&snapshot_device_available); + Unlock: + mutex_unlock(&pm_mutex); + return error; +} + + +/** + * software_resume - Resume from a saved image. + * + * Called as a late_initcall (so all devices are discovered and + * initialized), we call swsusp to see if we have a saved image or not. + * If so, we quiesce devices, the restore the saved image. We will + * return above (in hibernate() ) if everything goes well. + * Otherwise, we fail gracefully and return to the normally + * scheduled program. + * + */ + +static int software_resume(void) +{ + int error; + unsigned int flags; + + /* + * If the user said "noresume".. bail out early. + */ + if (noresume) + return 0; + + /* + * name_to_dev_t() below takes a sysfs buffer mutex when sysfs + * is configured into the kernel. Since the regular hibernate + * trigger path is via sysfs which takes a buffer mutex before + * calling hibernate functions (which take pm_mutex) this can + * cause lockdep to complain about a possible ABBA deadlock + * which cannot happen since we're in the boot code here and + * sysfs can't be invoked yet. Therefore, we use a subclass + * here to avoid lockdep complaining. + */ + mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING); + + if (swsusp_resume_device) + goto Check_image; + + if (!strlen(resume_file)) { + error = -ENOENT; + goto Unlock; + } + + pr_debug("PM: Checking image partition %s\n", resume_file); + + /* Check if the device is there */ + swsusp_resume_device = name_to_dev_t(resume_file); + if (!swsusp_resume_device) { + /* + * Some device discovery might still be in progress; we need + * to wait for this to finish. + */ + wait_for_device_probe(); + /* + * We can't depend on SCSI devices being available after loading + * one of their modules until scsi_complete_async_scans() is + * called and the resume device usually is a SCSI one. + */ + scsi_complete_async_scans(); + + swsusp_resume_device = name_to_dev_t(resume_file); + if (!swsusp_resume_device) { + error = -ENODEV; + goto Unlock; + } + } + + Check_image: + pr_debug("PM: Resume from partition %d:%d\n", + MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); + + pr_debug("PM: Checking hibernation image.\n"); + error = swsusp_check(); + if (error) + goto Unlock; + + /* The snapshot device should not be opened while we're running */ + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { + error = -EBUSY; + goto Unlock; + } + + pm_prepare_console(); + error = pm_notifier_call_chain(PM_RESTORE_PREPARE); + if (error) + goto Finish; + + error = usermodehelper_disable(); + if (error) + goto Finish; + + error = create_basic_memory_bitmaps(); + if (error) + goto Finish; + + pr_debug("PM: Preparing processes for restore.\n"); + error = prepare_processes(); + if (error) { + swsusp_close(FMODE_READ); + goto Done; + } + + pr_debug("PM: Reading hibernation image.\n"); + + error = swsusp_read(&flags); + if (!error) + hibernation_restore(flags & SF_PLATFORM_MODE); + + printk(KERN_ERR "PM: Restore failed, recovering.\n"); + swsusp_free(); + thaw_processes(); + Done: + free_basic_memory_bitmaps(); + usermodehelper_enable(); + Finish: + pm_notifier_call_chain(PM_POST_RESTORE); + pm_restore_console(); + atomic_inc(&snapshot_device_available); + /* For success case, the suspend path will release the lock */ + Unlock: + mutex_unlock(&pm_mutex); + pr_debug("PM: Resume from disk failed.\n"); + return error; +} + +late_initcall(software_resume); + + +static const char * const hibernation_modes[] = { + [HIBERNATION_PLATFORM] = "platform", + [HIBERNATION_SHUTDOWN] = "shutdown", + [HIBERNATION_REBOOT] = "reboot", + [HIBERNATION_TEST] = "test", + [HIBERNATION_TESTPROC] = "testproc", +}; + +/** + * disk - Control hibernation mode + * + * Suspend-to-disk can be handled in several ways. We have a few options + * for putting the system to sleep - using the platform driver (e.g. ACPI + * or other hibernation_ops), powering off the system or rebooting the + * system (for testing) as well as the two test modes. + * + * The system can support 'platform', and that is known a priori (and + * encoded by the presence of hibernation_ops). However, the user may + * choose 'shutdown' or 'reboot' as alternatives, as well as one fo the + * test modes, 'test' or 'testproc'. + * + * show() will display what the mode is currently set to. + * store() will accept one of + * + * 'platform' + * 'shutdown' + * 'reboot' + * 'test' + * 'testproc' + * + * It will only change to 'platform' if the system + * supports it (as determined by having hibernation_ops). + */ + +static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + int i; + char *start = buf; + + for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { + if (!hibernation_modes[i]) + continue; + switch (i) { + case HIBERNATION_SHUTDOWN: + case HIBERNATION_REBOOT: + case HIBERNATION_TEST: + case HIBERNATION_TESTPROC: + break; + case HIBERNATION_PLATFORM: + if (hibernation_ops) + break; + /* not a valid mode, continue with loop */ + continue; + } + if (i == hibernation_mode) + buf += sprintf(buf, "[%s] ", hibernation_modes[i]); + else + buf += sprintf(buf, "%s ", hibernation_modes[i]); + } + buf += sprintf(buf, "\n"); + return buf-start; +} + + +static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + int error = 0; + int i; + int len; + char *p; + int mode = HIBERNATION_INVALID; + + p = memchr(buf, '\n', n); + len = p ? p - buf : n; + + mutex_lock(&pm_mutex); + for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { + if (len == strlen(hibernation_modes[i]) + && !strncmp(buf, hibernation_modes[i], len)) { + mode = i; + break; + } + } + if (mode != HIBERNATION_INVALID) { + switch (mode) { + case HIBERNATION_SHUTDOWN: + case HIBERNATION_REBOOT: + case HIBERNATION_TEST: + case HIBERNATION_TESTPROC: + hibernation_mode = mode; + break; + case HIBERNATION_PLATFORM: + if (hibernation_ops) + hibernation_mode = mode; + else + error = -EINVAL; + } + } else + error = -EINVAL; + + if (!error) + pr_debug("PM: Hibernation mode set to '%s'\n", + hibernation_modes[mode]); + mutex_unlock(&pm_mutex); + return error ? error : n; +} + +power_attr(disk); + +static ssize_t resume_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device), + MINOR(swsusp_resume_device)); +} + +static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned int maj, min; + dev_t res; + int ret = -EINVAL; + + if (sscanf(buf, "%u:%u", &maj, &min) != 2) + goto out; + + res = MKDEV(maj,min); + if (maj != MAJOR(res) || min != MINOR(res)) + goto out; + + mutex_lock(&pm_mutex); + swsusp_resume_device = res; + mutex_unlock(&pm_mutex); + printk(KERN_INFO "PM: Starting manual resume from disk\n"); + noresume = 0; + software_resume(); + ret = n; + out: + return ret; +} + +power_attr(resume); + +static ssize_t image_size_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%lu\n", image_size); +} + +static ssize_t image_size_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long size; + + if (sscanf(buf, "%lu", &size) == 1) { + image_size = size; + return n; + } + + return -EINVAL; +} + +power_attr(image_size); + +static struct attribute * g[] = { + &disk_attr.attr, + &resume_attr.attr, + &image_size_attr.attr, + NULL, +}; + + +static struct attribute_group attr_group = { + .attrs = g, +}; + + +static int __init pm_disk_init(void) +{ + return sysfs_create_group(power_kobj, &attr_group); +} + +core_initcall(pm_disk_init); + + +static int __init resume_setup(char *str) +{ + if (noresume) + return 1; + + strncpy( resume_file, str, 255 ); + return 1; +} + +static int __init resume_offset_setup(char *str) +{ + unsigned long long offset; + + if (noresume) + return 1; + + if (sscanf(str, "%llu", &offset) == 1) + swsusp_resume_block = offset; + + return 1; +} + +static int __init noresume_setup(char *str) +{ + noresume = 1; + return 1; +} + +__setup("noresume", noresume_setup); +__setup("resume_offset=", resume_offset_setup); +__setup("resume=", resume_setup); diff --git a/kernel/power/power.h b/kernel/power/power.h index 2bd98d9fc19e..26d5a26f82e3 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -45,7 +45,7 @@ static inline char *check_image_kernel(struct swsusp_info *info) */ #define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) -/* kernel/power/disk.c */ +/* kernel/power/hibernate.c */ extern int hibernation_snapshot(int platform_mode); extern int hibernation_restore(int platform_mode); extern int hibernation_platform_enter(void); @@ -147,7 +147,7 @@ extern int swsusp_swap_in_use(void); */ #define SF_PLATFORM_MODE 1 -/* kernel/power/disk.c */ +/* kernel/power/hibernate.c */ extern int swsusp_check(void); extern void swsusp_free(void); extern int swsusp_read(unsigned int *flags_p); -- cgit v1.2.3 From fce2b111fae9151a53dabb36513b398d03337a19 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Wed, 10 Jun 2009 01:28:19 +0200 Subject: PM/Hibernate: Move NVS routines into a seperate file (v2). The *_nvs_* routines in swsusp.c make use of the io*map() functions, which are only provided for HAS_IOMEM, thus breaking compilation if HAS_IOMEM is not set. Fix this by moving the *_nvs_* routines into hibernate_nvs.c, which is only compiled if HAS_IOMEM is set. [rjw: Change the name of the new file to hibernate_nvs.c, add the license line to the header comment.] Signed-off-by: Cornelia Huck Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- kernel/power/Kconfig | 4 ++ kernel/power/Makefile | 1 + kernel/power/hibernate_nvs.c | 135 +++++++++++++++++++++++++++++++++++++++++++ kernel/power/swsusp.c | 122 -------------------------------------- 4 files changed, 140 insertions(+), 122 deletions(-) create mode 100644 kernel/power/hibernate_nvs.c (limited to 'kernel') diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 23bd4daeb96b..72067cbdb37f 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -116,9 +116,13 @@ config SUSPEND_FREEZER Turning OFF this setting is NOT recommended! If in doubt, say Y. +config HIBERNATION_NVS + bool + config HIBERNATION bool "Hibernation (aka 'suspend to disk')" depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE + select HIBERNATION_NVS if HAS_IOMEM ---help--- Enable the suspend to disk (STD) functionality, which is usually called "hibernation" in user interfaces. STD checkpoints the diff --git a/kernel/power/Makefile b/kernel/power/Makefile index eadb17fc8f5e..c3b81c30e5d5 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -9,5 +9,6 @@ obj-$(CONFIG_FREEZER) += process.o obj-$(CONFIG_SUSPEND) += suspend.o obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o obj-$(CONFIG_HIBERNATION) += swsusp.o hibernate.o snapshot.o swap.o user.o +obj-$(CONFIG_HIBERNATION_NVS) += hibernate_nvs.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/hibernate_nvs.c b/kernel/power/hibernate_nvs.c new file mode 100644 index 000000000000..39ac698ef836 --- /dev/null +++ b/kernel/power/hibernate_nvs.c @@ -0,0 +1,135 @@ +/* + * linux/kernel/power/hibernate_nvs.c - Routines for handling NVS memory + * + * Copyright (C) 2008,2009 Rafael J. Wysocki , Novell Inc. + * + * This file is released under the GPLv2. + */ + +#include +#include +#include +#include +#include + +/* + * Platforms, like ACPI, may want us to save some memory used by them during + * hibernation and to restore the contents of this memory during the subsequent + * resume. The code below implements a mechanism allowing us to do that. + */ + +struct nvs_page { + unsigned long phys_start; + unsigned int size; + void *kaddr; + void *data; + struct list_head node; +}; + +static LIST_HEAD(nvs_list); + +/** + * hibernate_nvs_register - register platform NVS memory region to save + * @start - physical address of the region + * @size - size of the region + * + * The NVS region need not be page-aligned (both ends) and we arrange + * things so that the data from page-aligned addresses in this region will + * be copied into separate RAM pages. + */ +int hibernate_nvs_register(unsigned long start, unsigned long size) +{ + struct nvs_page *entry, *next; + + while (size > 0) { + unsigned int nr_bytes; + + entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL); + if (!entry) + goto Error; + + list_add_tail(&entry->node, &nvs_list); + entry->phys_start = start; + nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK); + entry->size = (size < nr_bytes) ? size : nr_bytes; + + start += entry->size; + size -= entry->size; + } + return 0; + + Error: + list_for_each_entry_safe(entry, next, &nvs_list, node) { + list_del(&entry->node); + kfree(entry); + } + return -ENOMEM; +} + +/** + * hibernate_nvs_free - free data pages allocated for saving NVS regions + */ +void hibernate_nvs_free(void) +{ + struct nvs_page *entry; + + list_for_each_entry(entry, &nvs_list, node) + if (entry->data) { + free_page((unsigned long)entry->data); + entry->data = NULL; + if (entry->kaddr) { + iounmap(entry->kaddr); + entry->kaddr = NULL; + } + } +} + +/** + * hibernate_nvs_alloc - allocate memory necessary for saving NVS regions + */ +int hibernate_nvs_alloc(void) +{ + struct nvs_page *entry; + + list_for_each_entry(entry, &nvs_list, node) { + entry->data = (void *)__get_free_page(GFP_KERNEL); + if (!entry->data) { + hibernate_nvs_free(); + return -ENOMEM; + } + } + return 0; +} + +/** + * hibernate_nvs_save - save NVS memory regions + */ +void hibernate_nvs_save(void) +{ + struct nvs_page *entry; + + printk(KERN_INFO "PM: Saving platform NVS memory\n"); + + list_for_each_entry(entry, &nvs_list, node) + if (entry->data) { + entry->kaddr = ioremap(entry->phys_start, entry->size); + memcpy(entry->data, entry->kaddr, entry->size); + } +} + +/** + * hibernate_nvs_restore - restore NVS memory regions + * + * This function is going to be called with interrupts disabled, so it + * cannot iounmap the virtual addresses used to access the NVS region. + */ +void hibernate_nvs_restore(void) +{ + struct nvs_page *entry; + + printk(KERN_INFO "PM: Restoring platform NVS memory\n"); + + list_for_each_entry(entry, &nvs_list, node) + if (entry->data) + memcpy(entry->kaddr, entry->data, entry->size); +} diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 87b901cb3927..6a07f4dbf2f8 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -186,125 +186,3 @@ void swsusp_show_speed(struct timeval *start, struct timeval *stop, centisecs / 100, centisecs % 100, kps / 1000, (kps % 1000) / 10); } - -/* - * Platforms, like ACPI, may want us to save some memory used by them during - * hibernation and to restore the contents of this memory during the subsequent - * resume. The code below implements a mechanism allowing us to do that. - */ - -struct nvs_page { - unsigned long phys_start; - unsigned int size; - void *kaddr; - void *data; - struct list_head node; -}; - -static LIST_HEAD(nvs_list); - -/** - * hibernate_nvs_register - register platform NVS memory region to save - * @start - physical address of the region - * @size - size of the region - * - * The NVS region need not be page-aligned (both ends) and we arrange - * things so that the data from page-aligned addresses in this region will - * be copied into separate RAM pages. - */ -int hibernate_nvs_register(unsigned long start, unsigned long size) -{ - struct nvs_page *entry, *next; - - while (size > 0) { - unsigned int nr_bytes; - - entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL); - if (!entry) - goto Error; - - list_add_tail(&entry->node, &nvs_list); - entry->phys_start = start; - nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK); - entry->size = (size < nr_bytes) ? size : nr_bytes; - - start += entry->size; - size -= entry->size; - } - return 0; - - Error: - list_for_each_entry_safe(entry, next, &nvs_list, node) { - list_del(&entry->node); - kfree(entry); - } - return -ENOMEM; -} - -/** - * hibernate_nvs_free - free data pages allocated for saving NVS regions - */ -void hibernate_nvs_free(void) -{ - struct nvs_page *entry; - - list_for_each_entry(entry, &nvs_list, node) - if (entry->data) { - free_page((unsigned long)entry->data); - entry->data = NULL; - if (entry->kaddr) { - iounmap(entry->kaddr); - entry->kaddr = NULL; - } - } -} - -/** - * hibernate_nvs_alloc - allocate memory necessary for saving NVS regions - */ -int hibernate_nvs_alloc(void) -{ - struct nvs_page *entry; - - list_for_each_entry(entry, &nvs_list, node) { - entry->data = (void *)__get_free_page(GFP_KERNEL); - if (!entry->data) { - hibernate_nvs_free(); - return -ENOMEM; - } - } - return 0; -} - -/** - * hibernate_nvs_save - save NVS memory regions - */ -void hibernate_nvs_save(void) -{ - struct nvs_page *entry; - - printk(KERN_INFO "PM: Saving platform NVS memory\n"); - - list_for_each_entry(entry, &nvs_list, node) - if (entry->data) { - entry->kaddr = ioremap(entry->phys_start, entry->size); - memcpy(entry->data, entry->kaddr, entry->size); - } -} - -/** - * hibernate_nvs_restore - restore NVS memory regions - * - * This function is going to be called with interrupts disabled, so it - * cannot iounmap the virtual addresses used to access the NVS region. - */ -void hibernate_nvs_restore(void) -{ - struct nvs_page *entry; - - printk(KERN_INFO "PM: Restoring platform NVS memory\n"); - - list_for_each_entry(entry, &nvs_list, node) - if (entry->data) - memcpy(entry->kaddr, entry->data, entry->size); -} -- cgit v1.2.3