summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c1
-rw-r--r--arch/powerpc/kernel/cpu_setup_a2.S10
-rw-r--r--arch/powerpc/kernel/cputable.c27
-rw-r--r--arch/powerpc/kernel/crash.c220
-rw-r--r--arch/powerpc/kernel/crash_dump.c4
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S2
-rw-r--r--arch/powerpc/kernel/head_44x.S107
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S2
-rw-r--r--arch/powerpc/kernel/idle.c42
-rw-r--r--arch/powerpc/kernel/idle_power7.S4
-rw-r--r--arch/powerpc/kernel/irq.c28
-rw-r--r--arch/powerpc/kernel/machine_kexec.c5
-rw-r--r--arch/powerpc/kernel/pci-common.c78
-rw-r--r--arch/powerpc/kernel/pci_dn.c3
-rw-r--r--arch/powerpc/kernel/process.c22
-rw-r--r--arch/powerpc/kernel/prom.c20
-rw-r--r--arch/powerpc/kernel/prom_init.c37
-rw-r--r--arch/powerpc/kernel/reloc_32.S208
-rw-r--r--arch/powerpc/kernel/rtas_flash.c6
-rw-r--r--arch/powerpc/kernel/rtasd.c7
-rw-r--r--arch/powerpc/kernel/setup_64.c10
-rw-r--r--arch/powerpc/kernel/smp.c3
-rw-r--r--arch/powerpc/kernel/sysfs.c10
-rw-r--r--arch/powerpc/kernel/time.c105
-rw-r--r--arch/powerpc/kernel/traps.c173
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S8
27 files changed, 791 insertions, 353 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index ce4f7f179117..ee728e433aa2 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -85,6 +85,8 @@ extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o
extra-$(CONFIG_8xx) := head_8xx.o
extra-y += vmlinux.lds
+obj-$(CONFIG_RELOCATABLE_PPC32) += reloc_32.o
+
obj-$(CONFIG_PPC32) += entry_32.o setup_32.o
obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o
obj-$(CONFIG_KGDB) += kgdb.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 7c5324f1ec9c..04caee7d9bc1 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -208,6 +208,7 @@ int main(void)
DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));
+ DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost));
#endif /* CONFIG_PPC64 */
/* RTAS */
diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S
index 7f818feaa7a5..ebc62f42a237 100644
--- a/arch/powerpc/kernel/cpu_setup_a2.S
+++ b/arch/powerpc/kernel/cpu_setup_a2.S
@@ -41,11 +41,16 @@ _GLOBAL(__setup_cpu_a2)
* core local but doing it always won't hurt
*/
-#ifdef CONFIG_PPC_WSP_COPRO
+#ifdef CONFIG_PPC_ICSWX
/* Make sure ACOP starts out as zero */
li r3,0
mtspr SPRN_ACOP,r3
+ /* Skip the following if we are in Guest mode */
+ mfmsr r3
+ andis. r0,r3,MSR_GS@h
+ bne _icswx_skip_guest
+
/* Enable icswx instruction */
mfspr r3,SPRN_A2_CCR2
ori r3,r3,A2_CCR2_ENABLE_ICSWX
@@ -54,7 +59,8 @@ _GLOBAL(__setup_cpu_a2)
/* Unmask all CTs in HACOP */
li r3,-1
mtspr SPRN_HACOP,r3
-#endif /* CONFIG_PPC_WSP_COPRO */
+_icswx_skip_guest:
+#endif /* CONFIG_PPC_ICSWX */
/* Enable doorbell */
mfspr r3,SPRN_A2_CCR2
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index edae5bb06f1f..81db9e2a8a20 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1505,6 +1505,19 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_4xx,
.platform = "ppc405",
},
+ { /* APM8018X */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x7ff11432,
+ .cpu_name = "APM8018X",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
{ /* default match */
.pvr_mask = 0x00000000,
.pvr_value = 0x00000000,
@@ -1830,6 +1843,20 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_47x,
.platform = "ppc470",
},
+ { /* 476fpe */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x7ff50000,
+ .cpu_name = "476fpe",
+ .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2,
+ .cpu_user_features = COMMON_USER_BOOKE |
+ PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_47x |
+ MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
+ .icache_bsize = 32,
+ .dcache_bsize = 128,
+ .machine_check = machine_check_47x,
+ .platform = "ppc470",
+ },
{ /* 476 iss */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00050000,
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index d879809d5c45..28be3452e67a 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -10,85 +10,85 @@
*
*/
-#undef DEBUG
-
#include <linux/kernel.h>
#include <linux/smp.h>
#include <linux/reboot.h>
#include <linux/kexec.h>
-#include <linux/bootmem.h>
#include <linux/export.h>
#include <linux/crash_dump.h>
#include <linux/delay.h>
-#include <linux/elf.h>
-#include <linux/elfcore.h>
#include <linux/init.h>
#include <linux/irq.h>
#include <linux/types.h>
-#include <linux/memblock.h>
#include <asm/processor.h>
#include <asm/machdep.h>
#include <asm/kexec.h>
#include <asm/kdump.h>
#include <asm/prom.h>
-#include <asm/firmware.h>
#include <asm/smp.h>
#include <asm/system.h>
#include <asm/setjmp.h>
-#ifdef DEBUG
-#include <asm/udbg.h>
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
+/*
+ * The primary CPU waits a while for all secondary CPUs to enter. This is to
+ * avoid sending an IPI if the secondary CPUs are entering
+ * crash_kexec_secondary on their own (eg via a system reset).
+ *
+ * The secondary timeout has to be longer than the primary. Both timeouts are
+ * in milliseconds.
+ */
+#define PRIMARY_TIMEOUT 500
+#define SECONDARY_TIMEOUT 1000
-/* This keeps a track of which one is crashing cpu. */
+#define IPI_TIMEOUT 10000
+#define REAL_MODE_TIMEOUT 10000
+
+/* This keeps a track of which one is the crashing cpu. */
int crashing_cpu = -1;
-static cpumask_t cpus_in_crash = CPU_MASK_NONE;
-cpumask_t cpus_in_sr = CPU_MASK_NONE;
+static atomic_t cpus_in_crash;
+static int time_to_dump;
#define CRASH_HANDLER_MAX 3
/* NULL terminated list of shutdown handles */
static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1];
static DEFINE_SPINLOCK(crash_handlers_lock);
+static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
+static int crash_shutdown_cpu = -1;
+
+static int handle_fault(struct pt_regs *regs)
+{
+ if (crash_shutdown_cpu == smp_processor_id())
+ longjmp(crash_shutdown_buf, 1);
+ return 0;
+}
+
#ifdef CONFIG_SMP
-static atomic_t enter_on_soft_reset = ATOMIC_INIT(0);
void crash_ipi_callback(struct pt_regs *regs)
{
+ static cpumask_t cpus_state_saved = CPU_MASK_NONE;
+
int cpu = smp_processor_id();
if (!cpu_online(cpu))
return;
hard_irq_disable();
- if (!cpumask_test_cpu(cpu, &cpus_in_crash))
+ if (!cpumask_test_cpu(cpu, &cpus_state_saved)) {
crash_save_cpu(regs, cpu);
- cpumask_set_cpu(cpu, &cpus_in_crash);
-
- /*
- * Entered via soft-reset - could be the kdump
- * process is invoked using soft-reset or user activated
- * it if some CPU did not respond to an IPI.
- * For soft-reset, the secondary CPU can enter this func
- * twice. 1 - using IPI, and 2. soft-reset.
- * Tell the kexec CPU that entered via soft-reset and ready
- * to go down.
- */
- if (cpumask_test_cpu(cpu, &cpus_in_sr)) {
- cpumask_clear_cpu(cpu, &cpus_in_sr);
- atomic_inc(&enter_on_soft_reset);
+ cpumask_set_cpu(cpu, &cpus_state_saved);
}
+ atomic_inc(&cpus_in_crash);
+ smp_mb__after_atomic_inc();
+
/*
* Starting the kdump boot.
* This barrier is needed to make sure that all CPUs are stopped.
- * If not, soft-reset will be invoked to bring other CPUs.
*/
- while (!cpumask_test_cpu(crashing_cpu, &cpus_in_crash))
+ while (!time_to_dump)
cpu_relax();
if (ppc_md.kexec_cpu_down)
@@ -103,106 +103,99 @@ void crash_ipi_callback(struct pt_regs *regs)
/* NOTREACHED */
}
-/*
- * Wait until all CPUs are entered via soft-reset.
- */
-static void crash_soft_reset_check(int cpu)
-{
- unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
-
- cpumask_clear_cpu(cpu, &cpus_in_sr);
- while (atomic_read(&enter_on_soft_reset) != ncpus)
- cpu_relax();
-}
-
-
static void crash_kexec_prepare_cpus(int cpu)
{
unsigned int msecs;
-
unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
+ int tries = 0;
+ int (*old_handler)(struct pt_regs *regs);
+
+ printk(KERN_EMERG "Sending IPI to other CPUs\n");
crash_send_ipi(crash_ipi_callback);
smp_wmb();
+again:
/*
* FIXME: Until we will have the way to stop other CPUs reliably,
* the crash CPU will send an IPI and wait for other CPUs to
* respond.
- * Delay of at least 10 seconds.
*/
- printk(KERN_EMERG "Sending IPI to other cpus...\n");
- msecs = 10000;
- while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) {
- cpu_relax();
+ msecs = IPI_TIMEOUT;
+ while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0))
mdelay(1);
- }
/* Would it be better to replace the trap vector here? */
+ if (atomic_read(&cpus_in_crash) >= ncpus) {
+ printk(KERN_EMERG "IPI complete\n");
+ return;
+ }
+
+ printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n",
+ ncpus - atomic_read(&cpus_in_crash));
+
/*
- * FIXME: In case if we do not get all CPUs, one possibility: ask the
- * user to do soft reset such that we get all.
- * Soft-reset will be used until better mechanism is implemented.
+ * If we have a panic timeout set then we can't wait indefinitely
+ * for someone to activate system reset. We also give up on the
+ * second time through if system reset fail to work.
*/
- if (cpumask_weight(&cpus_in_crash) < ncpus) {
- printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n",
- ncpus - cpumask_weight(&cpus_in_crash));
- printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n");
- cpumask_clear(&cpus_in_sr);
- atomic_set(&enter_on_soft_reset, 0);
- while (cpumask_weight(&cpus_in_crash) < ncpus)
- cpu_relax();
- }
+ if ((panic_timeout > 0) || (tries > 0))
+ return;
+
/*
- * Make sure all CPUs are entered via soft-reset if the kdump is
- * invoked using soft-reset.
+ * A system reset will cause all CPUs to take an 0x100 exception.
+ * The primary CPU returns here via setjmp, and the secondary
+ * CPUs reexecute the crash_kexec_secondary path.
*/
- if (cpumask_test_cpu(cpu, &cpus_in_sr))
- crash_soft_reset_check(cpu);
- /* Leave the IPI callback set */
+ old_handler = __debugger;
+ __debugger = handle_fault;
+ crash_shutdown_cpu = smp_processor_id();
+
+ if (setjmp(crash_shutdown_buf) == 0) {
+ printk(KERN_EMERG "Activate system reset (dumprestart) "
+ "to stop other cpu(s)\n");
+
+ /*
+ * A system reset will force all CPUs to execute the
+ * crash code again. We need to reset cpus_in_crash so we
+ * wait for everyone to do this.
+ */
+ atomic_set(&cpus_in_crash, 0);
+ smp_mb();
+
+ while (atomic_read(&cpus_in_crash) < ncpus)
+ cpu_relax();
+ }
+
+ crash_shutdown_cpu = -1;
+ __debugger = old_handler;
+
+ tries++;
+ goto again;
}
/*
- * This function will be called by secondary cpus or by kexec cpu
- * if soft-reset is activated to stop some CPUs.
+ * This function will be called by secondary cpus.
*/
void crash_kexec_secondary(struct pt_regs *regs)
{
- int cpu = smp_processor_id();
unsigned long flags;
- int msecs = 5;
+ int msecs = SECONDARY_TIMEOUT;
local_irq_save(flags);
- /* Wait 5ms if the kexec CPU is not entered yet. */
+
+ /* Wait for the primary crash CPU to signal its progress */
while (crashing_cpu < 0) {
if (--msecs < 0) {
- /*
- * Either kdump image is not loaded or
- * kdump process is not started - Probably xmon
- * exited using 'x'(exit and recover) or
- * kexec_should_crash() failed for all running tasks.
- */
- cpumask_clear_cpu(cpu, &cpus_in_sr);
+ /* No response, kdump image may not have been loaded */
local_irq_restore(flags);
return;
}
+
mdelay(1);
- cpu_relax();
- }
- if (cpu == crashing_cpu) {
- /*
- * Panic CPU will enter this func only via soft-reset.
- * Wait until all secondary CPUs entered and
- * then start kexec boot.
- */
- crash_soft_reset_check(cpu);
- cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
- if (ppc_md.kexec_cpu_down)
- ppc_md.kexec_cpu_down(1, 0);
- machine_kexec(kexec_crash_image);
- /* NOTREACHED */
}
+
crash_ipi_callback(regs);
}
@@ -211,7 +204,7 @@ void crash_kexec_secondary(struct pt_regs *regs)
static void crash_kexec_prepare_cpus(int cpu)
{
/*
- * move the secondarys to us so that we can copy
+ * move the secondaries to us so that we can copy
* the new kernel 0-0x100 safely
*
* do this if kexec in setup.c ?
@@ -225,7 +218,6 @@ static void crash_kexec_prepare_cpus(int cpu)
void crash_kexec_secondary(struct pt_regs *regs)
{
- cpumask_clear(&cpus_in_sr);
}
#endif /* CONFIG_SMP */
@@ -236,7 +228,7 @@ static void crash_kexec_wait_realmode(int cpu)
unsigned int msecs;
int i;
- msecs = 10000;
+ msecs = REAL_MODE_TIMEOUT;
for (i=0; i < nr_cpu_ids && msecs > 0; i++) {
if (i == cpu)
continue;
@@ -308,22 +300,11 @@ int crash_shutdown_unregister(crash_shutdown_t handler)
}
EXPORT_SYMBOL(crash_shutdown_unregister);
-static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
-static int crash_shutdown_cpu = -1;
-
-static int handle_fault(struct pt_regs *regs)
-{
- if (crash_shutdown_cpu == smp_processor_id())
- longjmp(crash_shutdown_buf, 1);
- return 0;
-}
-
void default_machine_crash_shutdown(struct pt_regs *regs)
{
unsigned int i;
int (*old_handler)(struct pt_regs *regs);
-
/*
* This function is only called after the system
* has panicked or is otherwise in a critical state.
@@ -341,15 +322,26 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
* such that another IPI will not be sent.
*/
crashing_cpu = smp_processor_id();
- crash_save_cpu(regs, crashing_cpu);
+
+ /*
+ * If we came in via system reset, wait a while for the secondary
+ * CPUs to enter.
+ */
+ if (TRAP(regs) == 0x100)
+ mdelay(PRIMARY_TIMEOUT);
+
crash_kexec_prepare_cpus(crashing_cpu);
- cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
+
+ crash_save_cpu(regs, crashing_cpu);
+
+ time_to_dump = 1;
+
crash_kexec_wait_realmode(crashing_cpu);
machine_kexec_mask_interrupts();
/*
- * Call registered shutdown routines savely. Swap out
+ * Call registered shutdown routines safely. Swap out
* __debugger_fault_handler, and replace on exit.
*/
old_handler = __debugger_fault_handler;
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 424afb6b8fba..b3ba5163eae2 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -28,7 +28,7 @@
#define DBG(fmt...)
#endif
-#ifndef CONFIG_RELOCATABLE
+#ifndef CONFIG_NONSTATIC_KERNEL
void __init reserve_kdump_trampoline(void)
{
memblock_reserve(0, KDUMP_RESERVE_LIMIT);
@@ -67,7 +67,7 @@ void __init setup_kdump_trampoline(void)
DBG(" <- setup_kdump_trampoline()\n");
}
-#endif /* CONFIG_RELOCATABLE */
+#endif /* CONFIG_NONSTATIC_KERNEL */
static int __init parse_savemaxmem(char *p)
{
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index cf9c69b9189c..d4be7bb3dbdf 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -65,7 +65,7 @@ BEGIN_FTR_SECTION
lbz r0,PACAPROCSTART(r13)
cmpwi r0,0x80
bne 1f
- li r0,0
+ li r0,1
stb r0,PACAPROCSTART(r13)
b kvm_start_guest
1:
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index b725dab0f88a..7dd2981bcc50 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -64,6 +64,35 @@ _ENTRY(_start);
mr r31,r3 /* save device tree ptr */
li r24,0 /* CPU number */
+#ifdef CONFIG_RELOCATABLE
+/*
+ * Relocate ourselves to the current runtime address.
+ * This is called only by the Boot CPU.
+ * "relocate" is called with our current runtime virutal
+ * address.
+ * r21 will be loaded with the physical runtime address of _stext
+ */
+ bl 0f /* Get our runtime address */
+0: mflr r21 /* Make it accessible */
+ addis r21,r21,(_stext - 0b)@ha
+ addi r21,r21,(_stext - 0b)@l /* Get our current runtime base */
+
+ /*
+ * We have the runtime (virutal) address of our base.
+ * We calculate our shift of offset from a 256M page.
+ * We could map the 256M page we belong to at PAGE_OFFSET and
+ * get going from there.
+ */
+ lis r4,KERNELBASE@h
+ ori r4,r4,KERNELBASE@l
+ rlwinm r6,r21,0,4,31 /* r6 = PHYS_START % 256M */
+ rlwinm r5,r4,0,4,31 /* r5 = KERNELBASE % 256M */
+ subf r3,r5,r6 /* r3 = r6 - r5 */
+ add r3,r4,r3 /* Required Virutal Address */
+
+ bl relocate
+#endif
+
bl init_cpu_state
/*
@@ -88,6 +117,65 @@ _ENTRY(_start);
#ifdef CONFIG_RELOCATABLE
/*
+ * Relocatable kernel support based on processing of dynamic
+ * relocation entries.
+ *
+ * r25 will contain RPN/ERPN for the start address of memory
+ * r21 will contain the current offset of _stext
+ */
+ lis r3,kernstart_addr@ha
+ la r3,kernstart_addr@l(r3)
+
+ /*
+ * Compute the kernstart_addr.
+ * kernstart_addr => (r6,r8)
+ * kernstart_addr & ~0xfffffff => (r6,r7)
+ */
+ rlwinm r6,r25,0,28,31 /* ERPN. Bits 32-35 of Address */
+ rlwinm r7,r25,0,0,3 /* RPN - assuming 256 MB page size */
+ rlwinm r8,r21,0,4,31 /* r8 = (_stext & 0xfffffff) */
+ or r8,r7,r8 /* Compute the lower 32bit of kernstart_addr */
+
+ /* Store kernstart_addr */
+ stw r6,0(r3) /* higher 32bit */
+ stw r8,4(r3) /* lower 32bit */
+
+ /*
+ * Compute the virt_phys_offset :
+ * virt_phys_offset = stext.run - kernstart_addr
+ *
+ * stext.run = (KERNELBASE & ~0xfffffff) + (kernstart_addr & 0xfffffff)
+ * When we relocate, we have :
+ *
+ * (kernstart_addr & 0xfffffff) = (stext.run & 0xfffffff)
+ *
+ * hence:
+ * virt_phys_offset = (KERNELBASE & ~0xfffffff) - (kernstart_addr & ~0xfffffff)
+ *
+ */
+
+ /* KERNELBASE&~0xfffffff => (r4,r5) */
+ li r4, 0 /* higer 32bit */
+ lis r5,KERNELBASE@h
+ rlwinm r5,r5,0,0,3 /* Align to 256M, lower 32bit */
+
+ /*
+ * 64bit subtraction.
+ */
+ subfc r5,r7,r5
+ subfe r4,r6,r4
+
+ /* Store virt_phys_offset */
+ lis r3,virt_phys_offset@ha
+ la r3,virt_phys_offset@l(r3)
+
+ stw r4,0(r3)
+ stw r5,4(r3)
+
+#elif defined(CONFIG_DYNAMIC_MEMSTART)
+ /*
+ * Mapping based, page aligned dynamic kernel loading.
+ *
* r25 will contain RPN/ERPN for the start address of memory
*
* Add the difference between KERNELBASE and PAGE_OFFSET to the
@@ -732,6 +820,8 @@ _GLOBAL(init_cpu_state)
/* We use the PVR to differenciate 44x cores from 476 */
mfspr r3,SPRN_PVR
srwi r3,r3,16
+ cmplwi cr0,r3,PVR_476FPE@h
+ beq head_start_47x
cmplwi cr0,r3,PVR_476@h
beq head_start_47x
cmplwi cr0,r3,PVR_476_ISS@h
@@ -800,12 +890,29 @@ skpinv: addi r4,r4,1 /* Increment */
/*
* Configure and load pinned entry into TLB slot 63.
*/
+#ifdef CONFIG_NONSTATIC_KERNEL
+ /*
+ * In case of a NONSTATIC_KERNEL we reuse the TLB XLAT
+ * entries of the initial mapping set by the boot loader.
+ * The XLAT entry is stored in r25
+ */
+
+ /* Read the XLAT entry for our current mapping */
+ tlbre r25,r23,PPC44x_TLB_XLAT
+
+ lis r3,KERNELBASE@h
+ ori r3,r3,KERNELBASE@l
+
+ /* Use our current RPN entry */
+ mr r4,r25
+#else
lis r3,PAGE_OFFSET@h
ori r3,r3,PAGE_OFFSET@l
/* Kernel is at the base of RAM */
li r4, 0 /* Load the kernel physical address */
+#endif
/* Load the kernel PID = 0 */
li r0,0
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 9f5d210ddf3f..d5d78c4ceef6 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -197,7 +197,7 @@ _ENTRY(__early_start)
bl early_init
-#ifdef CONFIG_RELOCATABLE
+#ifdef CONFIG_DYNAMIC_MEMSTART
lis r3,kernstart_addr@ha
la r3,kernstart_addr@l(r3)
#ifdef CONFIG_PHYS_64BIT
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 39a2baa6ad58..7c66ce13da89 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -39,13 +39,23 @@
#define cpu_should_die() 0
#endif
+unsigned long cpuidle_disable = IDLE_NO_OVERRIDE;
+EXPORT_SYMBOL(cpuidle_disable);
+
static int __init powersave_off(char *arg)
{
ppc_md.power_save = NULL;
+ cpuidle_disable = IDLE_POWERSAVE_OFF;
return 0;
}
__setup("powersave=off", powersave_off);
+#if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_TRACEPOINTS)
+static const bool idle_uses_rcu = 1;
+#else
+static const bool idle_uses_rcu;
+#endif
+
/*
* The body of the idle task.
*/
@@ -56,7 +66,10 @@ void cpu_idle(void)
set_thread_flag(TIF_POLLING_NRFLAG);
while (1) {
- tick_nohz_stop_sched_tick(1);
+ tick_nohz_idle_enter();
+ if (!idle_uses_rcu)
+ rcu_idle_enter();
+
while (!need_resched() && !cpu_should_die()) {
ppc64_runlatch_off();
@@ -93,7 +106,9 @@ void cpu_idle(void)
HMT_medium();
ppc64_runlatch_on();
- tick_nohz_restart_sched_tick();
+ if (!idle_uses_rcu)
+ rcu_idle_exit();
+ tick_nohz_idle_exit();
preempt_enable_no_resched();
if (cpu_should_die())
cpu_die();
@@ -102,6 +117,29 @@ void cpu_idle(void)
}
}
+
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs come out of the old
+ * idle loop and start using the new idle loop.
+ * Required while changing idle handler on SMP systems.
+ * Caller must have changed idle handler to the new value before the call.
+ * This window may be larger on shared systems.
+ */
+void cpu_idle_wait(void)
+{
+ int cpu;
+ smp_mb();
+
+ /* kick all the CPUs so that they exit out of old idle routine */
+ get_online_cpus();
+ for_each_online_cpu(cpu) {
+ if (cpu != smp_processor_id())
+ smp_send_reschedule(cpu);
+ }
+ put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
int powersave_nap;
#ifdef CONFIG_SYSCTL
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index 3a70845a51c7..fcdff198da4b 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -54,6 +54,7 @@ _GLOBAL(power7_idle)
li r0,0
stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */
stb r0,PACAHARDIRQEN(r13)
+ stb r0,PACA_NAPSTATELOST(r13)
/* Continue saving state */
SAVE_GPR(2, r1)
@@ -86,6 +87,9 @@ _GLOBAL(power7_wakeup_loss)
rfid
_GLOBAL(power7_wakeup_noloss)
+ lbz r0,PACA_NAPSTATELOST(r13)
+ cmpwi r0,0
+ bne .power7_wakeup_loss
ld r1,PACAR1(r13)
ld r4,_MSR(r1)
ld r5,_NIP(r1)
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 5c3c46948d94..701d4aceb4f4 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -115,6 +115,15 @@ static inline notrace void set_soft_enabled(unsigned long enable)
: : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
}
+static inline notrace void decrementer_check_overflow(void)
+{
+ u64 now = get_tb_or_rtc();
+ u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+
+ if (now >= *next_tb)
+ set_dec(1);
+}
+
notrace void arch_local_irq_restore(unsigned long en)
{
/*
@@ -164,24 +173,21 @@ notrace void arch_local_irq_restore(unsigned long en)
*/
local_paca->hard_enabled = en;
-#ifndef CONFIG_BOOKE
- /* On server, re-trigger the decrementer if it went negative since
- * some processors only trigger on edge transitions of the sign bit.
- *
- * BookE has a level sensitive decrementer (latches in TSR) so we
- * don't need that
+ /*
+ * Trigger the decrementer if we have a pending event. Some processors
+ * only trigger on edge transitions of the sign bit. We might also
+ * have disabled interrupts long enough that the decrementer wrapped
+ * to positive.
*/
- if ((int)mfspr(SPRN_DEC) < 0)
- mtspr(SPRN_DEC, 1);
-#endif /* CONFIG_BOOKE */
+ decrementer_check_overflow();
/*
* Force the delivery of pending soft-disabled interrupts on PS3.
* Any HV call will have this side effect.
*/
if (firmware_has_feature(FW_FEATURE_PS3_LV1)) {
- u64 tmp;
- lv1_get_version_info(&tmp);
+ u64 tmp, tmp2;
+ lv1_get_version_info(&tmp, &tmp2);
}
__hard_irq_enable();
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 9ce1672afb59..c957b1202bdc 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -107,9 +107,6 @@ void __init reserve_crashkernel(void)
unsigned long long crash_size, crash_base;
int ret;
- /* this is necessary because of memblock_phys_mem_size() */
- memblock_analyze();
-
/* use common parsing */
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
&crash_size, &crash_base);
@@ -128,7 +125,7 @@ void __init reserve_crashkernel(void)
crash_size = resource_size(&crashk_res);
-#ifndef CONFIG_RELOCATABLE
+#ifndef CONFIG_NONSTATIC_KERNEL
if (crashk_res.start != KDUMP_KERNELBASE)
printk("Crash kernel location must be 0x%x\n",
KDUMP_KERNELBASE);
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 458ed3bee663..fa4a573d6716 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -214,7 +214,7 @@ char __devinit *pcibios_setup(char *str)
* If the interrupt is used, then gets the interrupt line from the
* openfirmware and sets it in the pci_dev and pci_config line.
*/
-int pci_read_irq_line(struct pci_dev *pci_dev)
+static int pci_read_irq_line(struct pci_dev *pci_dev)
{
struct of_irq oirq;
unsigned int virq;
@@ -283,7 +283,6 @@ int pci_read_irq_line(struct pci_dev *pci_dev)
return 0;
}
-EXPORT_SYMBOL(pci_read_irq_line);
/*
* Platform support for /proc/bus/pci/X/Y mmap()s,
@@ -921,18 +920,22 @@ static void __devinit pcibios_fixup_resources(struct pci_dev *dev)
struct resource *res = dev->resource + i;
if (!res->flags)
continue;
- /* On platforms that have PCI_PROBE_ONLY set, we don't
- * consider 0 as an unassigned BAR value. It's technically
- * a valid value, but linux doesn't like it... so when we can
- * re-assign things, we do so, but if we can't, we keep it
- * around and hope for the best...
+
+ /* If we're going to re-assign everything, we mark all resources
+ * as unset (and 0-base them). In addition, we mark BARs starting
+ * at 0 as unset as well, except if PCI_PROBE_ONLY is also set
+ * since in that case, we don't want to re-assign anything
*/
- if (res->start == 0 && !pci_has_flag(PCI_PROBE_ONLY)) {
- pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] is unassigned\n",
- pci_name(dev), i,
- (unsigned long long)res->start,
- (unsigned long long)res->end,
- (unsigned int)res->flags);
+ if (pci_has_flag(PCI_REASSIGN_ALL_RSRC) ||
+ (res->start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) {
+ /* Only print message if not re-assigning */
+ if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC))
+ pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] "
+ "is unassigned\n",
+ pci_name(dev), i,
+ (unsigned long long)res->start,
+ (unsigned long long)res->end,
+ (unsigned int)res->flags);
res->end -= res->start;
res->start = 0;
res->flags |= IORESOURCE_UNSET;
@@ -1042,6 +1045,16 @@ static void __devinit pcibios_fixup_bridge(struct pci_bus *bus)
if (i >= 3 && bus->self->transparent)
continue;
+ /* If we are going to re-assign everything, mark the resource
+ * as unset and move it down to 0
+ */
+ if (pci_has_flag(PCI_REASSIGN_ALL_RSRC)) {
+ res->flags |= IORESOURCE_UNSET;
+ res->end -= res->start;
+ res->start = 0;
+ continue;
+ }
+
pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x] fixup...\n",
pci_name(dev), i,
(unsigned long long)res->start,\
@@ -1262,18 +1275,15 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus)
pci_bus_for_each_resource(bus, res, i) {
if (!res || !res->flags || res->start > res->end || res->parent)
continue;
+
+ /* If the resource was left unset at this point, we clear it */
+ if (res->flags & IORESOURCE_UNSET)
+ goto clear_resource;
+
if (bus->parent == NULL)
pr = (res->flags & IORESOURCE_IO) ?
&ioport_resource : &iomem_resource;
else {
- /* Don't bother with non-root busses when
- * re-assigning all resources. We clear the
- * resource flags as if they were colliding
- * and as such ensure proper re-allocation
- * later.
- */
- if (pci_has_flag(PCI_REASSIGN_ALL_RSRC))
- goto clear_resource;
pr = pci_find_parent_resource(bus->self, res);
if (pr == res) {
/* this happens when the generic PCI
@@ -1304,9 +1314,9 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus)
if (reparent_resources(pr, res) == 0)
continue;
}
- printk(KERN_WARNING "PCI: Cannot allocate resource region "
- "%d of PCI bridge %d, will remap\n", i, bus->number);
-clear_resource:
+ pr_warning("PCI: Cannot allocate resource region "
+ "%d of PCI bridge %d, will remap\n", i, bus->number);
+ clear_resource:
res->start = res->end = 0;
res->flags = 0;
}
@@ -1451,16 +1461,11 @@ void __init pcibios_resource_survey(void)
{
struct pci_bus *b;
- /* Allocate and assign resources. If we re-assign everything, then
- * we skip the allocate phase
- */
+ /* Allocate and assign resources */
list_for_each_entry(b, &pci_root_buses, node)
pcibios_allocate_bus_resources(b);
-
- if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC)) {
- pcibios_allocate_resources(0);
- pcibios_allocate_resources(1);
- }
+ pcibios_allocate_resources(0);
+ pcibios_allocate_resources(1);
/* Before we start assigning unassigned resource, we try to reserve
* the low IO area and the VGA memory area if they intersect the
@@ -1732,6 +1737,12 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose)
if (mode == PCI_PROBE_NORMAL)
hose->last_busno = bus->subordinate = pci_scan_child_bus(bus);
+ /* Platform gets a chance to do some global fixups before
+ * we proceed to resource allocation
+ */
+ if (ppc_md.pcibios_fixup_phb)
+ ppc_md.pcibios_fixup_phb(hose);
+
/* Configure PCI Express settings */
if (bus && !pci_has_flag(PCI_PROBE_ONLY)) {
struct pci_bus *child;
@@ -1747,10 +1758,13 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose)
static void fixup_hide_host_resource_fsl(struct pci_dev *dev)
{
int i, class = dev->class >> 8;
+ /* When configured as agent, programing interface = 1 */
+ int prog_if = dev->class & 0xf;
if ((class == PCI_CLASS_PROCESSOR_POWERPC ||
class == PCI_CLASS_BRIDGE_OTHER) &&
(dev->hdr_type == PCI_HEADER_TYPE_NORMAL) &&
+ (prog_if == 0) &&
(dev->bus->parent == NULL)) {
for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
dev->resource[i].start = 0;
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 4e69deb89b37..dd9e4a04bf79 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -50,6 +50,9 @@ void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
dn->data = pdn;
pdn->node = dn;
pdn->phb = phb;
+#ifdef CONFIG_PPC_POWERNV
+ pdn->pe_number = IODA_INVALID_PE;
+#endif
regs = of_get_property(dn, "reg", NULL);
if (regs) {
/* First register entry is addr (00BBSS00) */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 6457574c0b2f..ebe5766781aa 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -584,16 +584,32 @@ static struct regbit {
unsigned long bit;
const char *name;
} msr_bits[] = {
+#if defined(CONFIG_PPC64) && !defined(CONFIG_BOOKE)
+ {MSR_SF, "SF"},
+ {MSR_HV, "HV"},
+#endif
+ {MSR_VEC, "VEC"},
+ {MSR_VSX, "VSX"},
+#ifdef CONFIG_BOOKE
+ {MSR_CE, "CE"},
+#endif
{MSR_EE, "EE"},
{MSR_PR, "PR"},
{MSR_FP, "FP"},
- {MSR_VEC, "VEC"},
- {MSR_VSX, "VSX"},
{MSR_ME, "ME"},
- {MSR_CE, "CE"},
+#ifdef CONFIG_BOOKE
{MSR_DE, "DE"},
+#else
+ {MSR_SE, "SE"},
+ {MSR_BE, "BE"},
+#endif
{MSR_IR, "IR"},
{MSR_DR, "DR"},
+ {MSR_PMM, "PMM"},
+#ifndef CONFIG_BOOKE
+ {MSR_RI, "RI"},
+ {MSR_LE, "LE"},
+#endif
{0, NULL}
};
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index fa1235b0503b..abe405dab34d 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -733,8 +733,6 @@ void __init early_init_devtree(void *params)
of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line);
/* Scan memory nodes and rebuild MEMBLOCKs */
- memblock_init();
-
of_scan_flat_dt(early_init_dt_scan_root, NULL);
of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
@@ -756,20 +754,14 @@ void __init early_init_devtree(void *params)
early_reserve_mem();
phyp_dump_reserve_mem();
- limit = memory_limit;
- if (! limit) {
- phys_addr_t memsize;
-
- /* Ensure that total memory size is page-aligned, because
- * otherwise mark_bootmem() gets upset. */
- memblock_analyze();
- memsize = memblock_phys_mem_size();
- if ((memsize & PAGE_MASK) != memsize)
- limit = memsize & PAGE_MASK;
- }
+ /*
+ * Ensure that total memory size is page-aligned, because otherwise
+ * mark_bootmem() gets upset.
+ */
+ limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE);
memblock_enforce_memory_limit(limit);
- memblock_analyze();
+ memblock_allow_resize();
memblock_dump_all();
DBG("Phys. mem: %llx\n", memblock_phys_mem_size());
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index cc584865b3df..eca626ea3f23 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -742,7 +742,7 @@ static unsigned char ibm_architecture_vec[] = {
W(0xffffffff), /* virt_base */
W(0xffffffff), /* virt_size */
W(0xffffffff), /* load_base */
- W(64), /* 64MB min RMA */
+ W(256), /* 256MB min RMA */
W(0xffffffff), /* full client load */
0, /* min RMA percentage of total RAM */
48, /* max log_2(hash table size) */
@@ -1224,14 +1224,6 @@ static void __init prom_init_mem(void)
RELOC(alloc_bottom) = PAGE_ALIGN((unsigned long)&RELOC(_end) + 0x4000);
- /* Check if we have an initrd after the kernel, if we do move our bottom
- * point to after it
- */
- if (RELOC(prom_initrd_start)) {
- if (RELOC(prom_initrd_end) > RELOC(alloc_bottom))
- RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(prom_initrd_end));
- }
-
/*
* If prom_memory_limit is set we reduce the upper limits *except* for
* alloc_top_high. This must be the real top of RAM so we can put
@@ -1269,6 +1261,15 @@ static void __init prom_init_mem(void)
RELOC(alloc_top) = RELOC(rmo_top);
RELOC(alloc_top_high) = RELOC(ram_top);
+ /*
+ * Check if we have an initrd after the kernel but still inside
+ * the RMO. If we do move our bottom point to after it.
+ */
+ if (RELOC(prom_initrd_start) &&
+ RELOC(prom_initrd_start) < RELOC(rmo_top) &&
+ RELOC(prom_initrd_end) > RELOC(alloc_bottom))
+ RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(prom_initrd_end));
+
prom_printf("memory layout at init:\n");
prom_printf(" memory_limit : %x (16 MB aligned)\n", RELOC(prom_memory_limit));
prom_printf(" alloc_bottom : %x\n", RELOC(alloc_bottom));
@@ -2079,7 +2080,7 @@ static void __init prom_check_displays(void)
/* Setup a usable color table when the appropriate
* method is available. Should update this to set-colors */
clut = RELOC(default_colors);
- for (i = 0; i < 32; i++, clut += 3)
+ for (i = 0; i < 16; i++, clut += 3)
if (prom_set_color(ih, i, clut[0], clut[1],
clut[2]) != 0)
break;
@@ -2844,7 +2845,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
RELOC(of_platform) = prom_find_machine_type();
prom_printf("Detected machine type: %x\n", RELOC(of_platform));
-#ifndef CONFIG_RELOCATABLE
+#ifndef CONFIG_NONSTATIC_KERNEL
/* Bail if this is a kdump kernel. */
if (PHYSICAL_START > 0)
prom_panic("Error: You can't boot a kdump kernel from OF!\n");
@@ -2969,9 +2970,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
/*
* in case stdin is USB and still active on IBM machines...
* Unfortunately quiesce crashes on some powermacs if we have
- * closed stdin already (in particular the powerbook 101).
+ * closed stdin already (in particular the powerbook 101). It
+ * appears that the OPAL version of OFW doesn't like it either.
*/
- if (RELOC(of_platform) != PLATFORM_POWERMAC)
+ if (RELOC(of_platform) != PLATFORM_POWERMAC &&
+ RELOC(of_platform) != PLATFORM_OPAL)
prom_close_stdin();
/*
@@ -2987,8 +2990,12 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
* is common to us and kexec
*/
hdr = RELOC(dt_header_start);
- prom_printf("returning from prom_init\n");
- prom_debug("->dt_header_start=0x%x\n", hdr);
+
+ /* Don't print anything after quiesce under OPAL, it crashes OFW */
+ if (RELOC(of_platform) != PLATFORM_OPAL) {
+ prom_printf("returning from prom_init\n");
+ prom_debug("->dt_header_start=0x%x\n", hdr);
+ }
#ifdef CONFIG_PPC32
reloc_got2(-offset);
diff --git a/arch/powerpc/kernel/reloc_32.S b/arch/powerpc/kernel/reloc_32.S
new file mode 100644
index 000000000000..ef46ba6e094f
--- /dev/null
+++ b/arch/powerpc/kernel/reloc_32.S
@@ -0,0 +1,208 @@
+/*
+ * Code to process dynamic relocations for PPC32.
+ *
+ * Copyrights (C) IBM Corporation, 2011.
+ * Author: Suzuki Poulose <suzuki@in.ibm.com>
+ *
+ * - Based on ppc64 code - reloc_64.S
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/ppc_asm.h>
+
+/* Dynamic section table entry tags */
+DT_RELA = 7 /* Tag for Elf32_Rela section */
+DT_RELASZ = 8 /* Size of the Rela relocs */
+DT_RELAENT = 9 /* Size of one Rela reloc entry */
+
+STN_UNDEF = 0 /* Undefined symbol index */
+STB_LOCAL = 0 /* Local binding for the symbol */
+
+R_PPC_ADDR16_LO = 4 /* Lower half of (S+A) */
+R_PPC_ADDR16_HI = 5 /* Upper half of (S+A) */
+R_PPC_ADDR16_HA = 6 /* High Adjusted (S+A) */
+R_PPC_RELATIVE = 22
+
+/*
+ * r3 = desired final address
+ */
+
+_GLOBAL(relocate)
+
+ mflr r0 /* Save our LR */
+ bl 0f /* Find our current runtime address */
+0: mflr r12 /* Make it accessible */
+ mtlr r0
+
+ lwz r11, (p_dyn - 0b)(r12)
+ add r11, r11, r12 /* runtime address of .dynamic section */
+ lwz r9, (p_rela - 0b)(r12)
+ add r9, r9, r12 /* runtime address of .rela.dyn section */
+ lwz r10, (p_st - 0b)(r12)
+ add r10, r10, r12 /* runtime address of _stext section */
+ lwz r13, (p_sym - 0b)(r12)
+ add r13, r13, r12 /* runtime address of .dynsym section */
+
+ /*
+ * Scan the dynamic section for RELA, RELASZ entries
+ */
+ li r6, 0
+ li r7, 0
+ li r8, 0
+1: lwz r5, 0(r11) /* ELF_Dyn.d_tag */
+ cmpwi r5, 0 /* End of ELF_Dyn[] */
+ beq eodyn
+ cmpwi r5, DT_RELA
+ bne relasz
+ lwz r7, 4(r11) /* r7 = rela.link */
+ b skip
+relasz:
+ cmpwi r5, DT_RELASZ
+ bne relaent
+ lwz r8, 4(r11) /* r8 = Total Rela relocs size */
+ b skip
+relaent:
+ cmpwi r5, DT_RELAENT
+ bne skip
+ lwz r6, 4(r11) /* r6 = Size of one Rela reloc */
+skip:
+ addi r11, r11, 8
+ b 1b
+eodyn: /* End of Dyn Table scan */
+
+ /* Check if we have found all the entries */
+ cmpwi r7, 0
+ beq done
+ cmpwi r8, 0
+ beq done
+ cmpwi r6, 0
+ beq done
+
+
+ /*
+ * Work out the current offset from the link time address of .rela
+ * section.
+ * cur_offset[r7] = rela.run[r9] - rela.link [r7]
+ * _stext.link[r12] = _stext.run[r10] - cur_offset[r7]
+ * final_offset[r3] = _stext.final[r3] - _stext.link[r12]
+ */
+ subf r7, r7, r9 /* cur_offset */
+ subf r12, r7, r10
+ subf r3, r12, r3 /* final_offset */
+
+ subf r8, r6, r8 /* relaz -= relaent */
+ /*
+ * Scan through the .rela table and process each entry
+ * r9 - points to the current .rela table entry
+ * r13 - points to the symbol table
+ */
+
+ /*
+ * Check if we have a relocation based on symbol
+ * r5 will hold the value of the symbol.
+ */
+applyrela:
+ lwz r4, 4(r9) /* r4 = rela.r_info */
+ srwi r5, r4, 8 /* ELF32_R_SYM(r_info) */
+ cmpwi r5, STN_UNDEF /* sym == STN_UNDEF ? */
+ beq get_type /* value = 0 */
+ /* Find the value of the symbol at index(r5) */
+ slwi r5, r5, 4 /* r5 = r5 * sizeof(Elf32_Sym) */
+ add r12, r13, r5 /* r12 = &__dyn_sym[Index] */
+
+ /*
+ * GNU ld has a bug, where dynamic relocs based on
+ * STB_LOCAL symbols, the value should be assumed
+ * to be zero. - Alan Modra
+ */
+ /* XXX: Do we need to check if we are using GNU ld ? */
+ lbz r5, 12(r12) /* r5 = dyn_sym[Index].st_info */
+ extrwi r5, r5, 4, 24 /* r5 = ELF32_ST_BIND(r5) */
+ cmpwi r5, STB_LOCAL /* st_value = 0, ld bug */
+ beq get_type /* We have r5 = 0 */
+ lwz r5, 4(r12) /* r5 = __dyn_sym[Index].st_value */
+
+get_type:
+ /* Load the relocation type to r4 */
+ extrwi r4, r4, 8, 24 /* r4 = ELF32_R_TYPE(r_info) = ((char*)r4)[3] */
+
+ /* R_PPC_RELATIVE */
+ cmpwi r4, R_PPC_RELATIVE
+ bne hi16
+ lwz r4, 0(r9) /* r_offset */
+ lwz r0, 8(r9) /* r_addend */
+ add r0, r0, r3 /* final addend */
+ stwx r0, r4, r7 /* memory[r4+r7]) = (u32)r0 */
+ b nxtrela /* continue */
+
+ /* R_PPC_ADDR16_HI */
+hi16:
+ cmpwi r4, R_PPC_ADDR16_HI
+ bne ha16
+ lwz r4, 0(r9) /* r_offset */
+ lwz r0, 8(r9) /* r_addend */
+ add r0, r0, r3
+ add r0, r0, r5 /* r0 = (S+A+Offset) */
+ extrwi r0, r0, 16, 0 /* r0 = (r0 >> 16) */
+ b store_half
+
+ /* R_PPC_ADDR16_HA */
+ha16:
+ cmpwi r4, R_PPC_ADDR16_HA
+ bne lo16
+ lwz r4, 0(r9) /* r_offset */
+ lwz r0, 8(r9) /* r_addend */
+ add r0, r0, r3
+ add r0, r0, r5 /* r0 = (S+A+Offset) */
+ extrwi r5, r0, 1, 16 /* Extract bit 16 */
+ extrwi r0, r0, 16, 0 /* r0 = (r0 >> 16) */
+ add r0, r0, r5 /* Add it to r0 */
+ b store_half
+
+ /* R_PPC_ADDR16_LO */
+lo16:
+ cmpwi r4, R_PPC_ADDR16_LO
+ bne nxtrela
+ lwz r4, 0(r9) /* r_offset */
+ lwz r0, 8(r9) /* r_addend */
+ add r0, r0, r3
+ add r0, r0, r5 /* r0 = (S+A+Offset) */
+ extrwi r0, r0, 16, 16 /* r0 &= 0xffff */
+ /* Fall through to */
+
+ /* Store half word */
+store_half:
+ sthx r0, r4, r7 /* memory[r4+r7] = (u16)r0 */
+
+nxtrela:
+ /*
+ * We have to flush the modified instructions to the
+ * main storage from the d-cache. And also, invalidate the
+ * cached instructions in i-cache which has been modified.
+ *
+ * We delay the sync / isync operation till the end, since
+ * we won't be executing the modified instructions until
+ * we return from here.
+ */
+ dcbst r4,r7
+ sync /* Ensure the data is flushed before icbi */
+ icbi r4,r7
+ cmpwi r8, 0 /* relasz = 0 ? */
+ ble done
+ add r9, r9, r6 /* move to next entry in the .rela table */
+ subf r8, r6, r8 /* relasz -= relaent */
+ b applyrela
+
+done:
+ sync /* Wait for the flush to finish */
+ isync /* Discard prefetched instructions */
+ blr
+
+p_dyn: .long __dynamic_start - 0b
+p_rela: .long __rela_dyn_start - 0b
+p_sym: .long __dynamic_symtab - 0b
+p_st: .long _stext - 0b
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index e037c7494fd8..4174b4b23246 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -568,6 +568,12 @@ static void rtas_flash_firmware(int reboot_type)
}
/*
+ * Just before starting the firmware flash, cancel the event scan work
+ * to avoid any soft lockup issues.
+ */
+ rtas_cancel_event_scan();
+
+ /*
* NOTE: the "first" block must be under 4GB, so we create
* an entry with no data blocks in the reserved buffer in
* the kernel data segment.
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 481ef064c8f1..1045ff49cc6d 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -472,6 +472,13 @@ static void start_event_scan(void)
&event_scan_work, event_scan_delay);
}
+/* Cancel the rtas event scan work */
+void rtas_cancel_event_scan(void)
+{
+ cancel_delayed_work_sync(&event_scan_work);
+}
+EXPORT_SYMBOL_GPL(rtas_cancel_event_scan);
+
static int __init rtas_init(void)
{
struct proc_dir_entry *entry;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index fb9bb46e7e88..4cb8f1e9d044 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -35,6 +35,8 @@
#include <linux/pci.h>
#include <linux/lockdep.h>
#include <linux/memblock.h>
+#include <linux/hugetlb.h>
+
#include <asm/io.h>
#include <asm/kdump.h>
#include <asm/prom.h>
@@ -64,6 +66,7 @@
#include <asm/mmu_context.h>
#include <asm/code-patching.h>
#include <asm/kvm_ppc.h>
+#include <asm/hugetlb.h>
#include "setup.h"
@@ -217,6 +220,13 @@ void __init early_setup(unsigned long dt_ptr)
/* Initialize the hash table or TLB handling */
early_init_mmu();
+ /*
+ * Reserve any gigantic pages requested on the command line.
+ * memblock needs to have been initialized by the time this is
+ * called since this will reserve memory.
+ */
+ reserve_hugetlb_gpages();
+
DBG(" <- early_setup()\n");
}
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index d9a7a464ec51..46695febc09f 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -187,7 +187,8 @@ int smp_request_message_ipi(int virq, int msg)
return 1;
}
#endif
- err = request_irq(virq, smp_ipi_action[msg], IRQF_PERCPU,
+ err = request_irq(virq, smp_ipi_action[msg],
+ IRQF_PERCPU | IRQF_NO_THREAD,
smp_ipi_name[msg], 0);
WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n",
virq, smp_ipi_name[msg], err);
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 5e7c1655f13a..883e74c0d1b3 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -18,6 +18,7 @@
#include <asm/machdep.h>
#include <asm/smp.h>
#include <asm/pmc.h>
+#include <asm/system.h>
#include "cacheinfo.h"
@@ -51,6 +52,7 @@ static ssize_t store_smt_snooze_delay(struct device *dev,
return -EINVAL;
per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
+ update_smt_snooze_delay(snooze);
return count;
}
@@ -177,11 +179,13 @@ SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
SYSFS_PMCSETUP(purr, SPRN_PURR);
SYSFS_PMCSETUP(spurr, SPRN_SPURR);
SYSFS_PMCSETUP(dscr, SPRN_DSCR);
+SYSFS_PMCSETUP(pir, SPRN_PIR);
static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
static DEVICE_ATTR(spurr, 0600, show_spurr, NULL);
static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
static DEVICE_ATTR(purr, 0600, show_purr, store_purr);
+static DEVICE_ATTR(pir, 0400, show_pir, NULL);
unsigned long dscr_default = 0;
EXPORT_SYMBOL(dscr_default);
@@ -391,6 +395,9 @@ static void __cpuinit register_cpu_online(unsigned int cpu)
if (cpu_has_feature(CPU_FTR_DSCR))
device_create_file(s, &dev_attr_dscr);
+
+ if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
+ device_create_file(s, &dev_attr_pir);
#endif /* CONFIG_PPC64 */
cacheinfo_cpu_online(cpu);
@@ -461,6 +468,9 @@ static void unregister_cpu_online(unsigned int cpu)
if (cpu_has_feature(CPU_FTR_DSCR))
device_remove_file(s, &dev_attr_dscr);
+
+ if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
+ device_remove_file(s, &dev_attr_pir);
#endif /* CONFIG_PPC64 */
cacheinfo_cpu_offline(cpu);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 522bb1dfc353..567dd7c3ac2a 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -86,8 +86,6 @@ static struct clocksource clocksource_rtc = {
.rating = 400,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
.mask = CLOCKSOURCE_MASK(64),
- .shift = 22,
- .mult = 0, /* To be filled in */
.read = rtc_read,
};
@@ -97,8 +95,6 @@ static struct clocksource clocksource_timebase = {
.rating = 400,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
.mask = CLOCKSOURCE_MASK(64),
- .shift = 22,
- .mult = 0, /* To be filled in */
.read = timebase_read,
};
@@ -110,22 +106,16 @@ static void decrementer_set_mode(enum clock_event_mode mode,
struct clock_event_device *dev);
static struct clock_event_device decrementer_clockevent = {
- .name = "decrementer",
- .rating = 200,
- .shift = 0, /* To be filled in */
- .mult = 0, /* To be filled in */
- .irq = 0,
- .set_next_event = decrementer_set_next_event,
- .set_mode = decrementer_set_mode,
- .features = CLOCK_EVT_FEAT_ONESHOT,
+ .name = "decrementer",
+ .rating = 200,
+ .irq = 0,
+ .set_next_event = decrementer_set_next_event,
+ .set_mode = decrementer_set_mode,
+ .features = CLOCK_EVT_FEAT_ONESHOT,
};
-struct decrementer_clock {
- struct clock_event_device event;
- u64 next_tb;
-};
-
-static DEFINE_PER_CPU(struct decrementer_clock, decrementers);
+DEFINE_PER_CPU(u64, decrementers_next_tb);
+static DEFINE_PER_CPU(struct clock_event_device, decrementers);
#ifdef CONFIG_PPC_ISERIES
static unsigned long __initdata iSeries_recal_titan;
@@ -168,13 +158,13 @@ EXPORT_SYMBOL_GPL(ppc_tb_freq);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
/*
* Factors for converting from cputime_t (timebase ticks) to
- * jiffies, milliseconds, seconds, and clock_t (1/USER_HZ seconds).
+ * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds).
* These are all stored as 0.64 fixed-point binary fractions.
*/
u64 __cputime_jiffies_factor;
EXPORT_SYMBOL(__cputime_jiffies_factor);
-u64 __cputime_msec_factor;
-EXPORT_SYMBOL(__cputime_msec_factor);
+u64 __cputime_usec_factor;
+EXPORT_SYMBOL(__cputime_usec_factor);
u64 __cputime_sec_factor;
EXPORT_SYMBOL(__cputime_sec_factor);
u64 __cputime_clockt_factor;
@@ -192,8 +182,8 @@ static void calc_cputime_factors(void)
div128_by_32(HZ, 0, tb_ticks_per_sec, &res);
__cputime_jiffies_factor = res.result_low;
- div128_by_32(1000, 0, tb_ticks_per_sec, &res);
- __cputime_msec_factor = res.result_low;
+ div128_by_32(1000000, 0, tb_ticks_per_sec, &res);
+ __cputime_usec_factor = res.result_low;
div128_by_32(1, 0, tb_ticks_per_sec, &res);
__cputime_sec_factor = res.result_low;
div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res);
@@ -441,7 +431,7 @@ EXPORT_SYMBOL(profile_pc);
/*
* This function recalibrates the timebase based on the 49-bit time-of-day
* value in the Titan chip. The Titan is much more accurate than the value
- * returned by the service processor for the timebase frequency.
+ * returned by the service processor for the timebase frequency.
*/
static int __init iSeries_tb_recal(void)
@@ -576,9 +566,8 @@ void arch_irq_work_raise(void)
void timer_interrupt(struct pt_regs * regs)
{
struct pt_regs *old_regs;
- struct decrementer_clock *decrementer = &__get_cpu_var(decrementers);
- struct clock_event_device *evt = &decrementer->event;
- u64 now;
+ u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+ struct clock_event_device *evt = &__get_cpu_var(decrementers);
/* Ensure a positive value is written to the decrementer, or else
* some CPUs will continue to take decrementer exceptions.
@@ -613,16 +602,9 @@ void timer_interrupt(struct pt_regs * regs)
get_lppaca()->int_dword.fields.decr_int = 0;
#endif
- now = get_tb_or_rtc();
- if (now >= decrementer->next_tb) {
- decrementer->next_tb = ~(u64)0;
- if (evt->event_handler)
- evt->event_handler(evt);
- } else {
- now = decrementer->next_tb - now;
- if (now <= DECREMENTER_MAX)
- set_dec((int)now);
- }
+ *next_tb = ~(u64)0;
+ if (evt->event_handler)
+ evt->event_handler(evt);
#ifdef CONFIG_PPC_ISERIES
if (firmware_has_feature(FW_FEATURE_ISERIES) && hvlpevent_is_pending())
@@ -650,9 +632,9 @@ static void generic_suspend_disable_irqs(void)
* with suspending.
*/
- set_dec(0x7fffffff);
+ set_dec(DECREMENTER_MAX);
local_irq_disable();
- set_dec(0x7fffffff);
+ set_dec(DECREMENTER_MAX);
}
static void generic_suspend_enable_irqs(void)
@@ -824,9 +806,8 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
++vdso_data->tb_update_count;
smp_mb();
- /* XXX this assumes clock->shift == 22 */
- /* 4611686018 ~= 2^(20+64-22) / 1e9 */
- new_tb_to_xs = (u64) mult * 4611686018ULL;
+ /* 19342813113834067 ~= 2^(20+64) / 1e9 */
+ new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift);
new_stamp_xsec = (u64) wall_time->tv_nsec * XSEC_PER_SEC;
do_div(new_stamp_xsec, 1000000000);
new_stamp_xsec += (u64) wall_time->tv_sec * XSEC_PER_SEC;
@@ -877,9 +858,7 @@ static void __init clocksource_init(void)
else
clock = &clocksource_timebase;
- clock->mult = clocksource_hz2mult(tb_ticks_per_sec, clock->shift);
-
- if (clocksource_register(clock)) {
+ if (clocksource_register_hz(clock, tb_ticks_per_sec)) {
printk(KERN_ERR "clocksource: %s is already registered\n",
clock->name);
return;
@@ -892,7 +871,7 @@ static void __init clocksource_init(void)
static int decrementer_set_next_event(unsigned long evt,
struct clock_event_device *dev)
{
- __get_cpu_var(decrementers).next_tb = get_tb_or_rtc() + evt;
+ __get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt;
set_dec(evt);
return 0;
}
@@ -904,34 +883,9 @@ static void decrementer_set_mode(enum clock_event_mode mode,
decrementer_set_next_event(DECREMENTER_MAX, dev);
}
-static inline uint64_t div_sc64(unsigned long ticks, unsigned long nsec,
- int shift)
-{
- uint64_t tmp = ((uint64_t)ticks) << shift;
-
- do_div(tmp, nsec);
- return tmp;
-}
-
-static void __init setup_clockevent_multiplier(unsigned long hz)
-{
- u64 mult, shift = 32;
-
- while (1) {
- mult = div_sc64(hz, NSEC_PER_SEC, shift);
- if (mult && (mult >> 32UL) == 0UL)
- break;
-
- shift--;
- }
-
- decrementer_clockevent.shift = shift;
- decrementer_clockevent.mult = mult;
-}
-
static void register_decrementer_clockevent(int cpu)
{
- struct clock_event_device *dec = &per_cpu(decrementers, cpu).event;
+ struct clock_event_device *dec = &per_cpu(decrementers, cpu);
*dec = decrementer_clockevent;
dec->cpumask = cpumask_of(cpu);
@@ -946,7 +900,8 @@ static void __init init_decrementer_clockevent(void)
{
int cpu = smp_processor_id();
- setup_clockevent_multiplier(ppc_tb_freq);
+ clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4);
+
decrementer_clockevent.max_delta_ns =
clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent);
decrementer_clockevent.min_delta_ns =
@@ -1014,10 +969,10 @@ void __init time_init(void)
boot_tb = get_tb_or_rtc();
/* If platform provided a timezone (pmac), we correct the time */
- if (timezone_offset) {
+ if (timezone_offset) {
sys_tz.tz_minuteswest = -timezone_offset / 60;
sys_tz.tz_dsttime = 0;
- }
+ }
vdso_data->tb_update_count = 0;
vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 5459d148a0f6..c091527efd89 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -98,18 +98,14 @@ static void pmac_backlight_unblank(void)
static inline void pmac_backlight_unblank(void) { }
#endif
-int die(const char *str, struct pt_regs *regs, long err)
+static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+static int die_owner = -1;
+static unsigned int die_nest_count;
+static int die_counter;
+
+static unsigned __kprobes long oops_begin(struct pt_regs *regs)
{
- static struct {
- raw_spinlock_t lock;
- u32 lock_owner;
- int lock_owner_depth;
- } die = {
- .lock = __RAW_SPIN_LOCK_UNLOCKED(die.lock),
- .lock_owner = -1,
- .lock_owner_depth = 0
- };
- static int die_counter;
+ int cpu;
unsigned long flags;
if (debugger(regs))
@@ -117,66 +113,109 @@ int die(const char *str, struct pt_regs *regs, long err)
oops_enter();
- if (die.lock_owner != raw_smp_processor_id()) {
- console_verbose();
- raw_spin_lock_irqsave(&die.lock, flags);
- die.lock_owner = smp_processor_id();
- die.lock_owner_depth = 0;
- bust_spinlocks(1);
- if (machine_is(powermac))
- pmac_backlight_unblank();
- } else {
- local_save_flags(flags);
+ /* racy, but better than risking deadlock. */
+ raw_local_irq_save(flags);
+ cpu = smp_processor_id();
+ if (!arch_spin_trylock(&die_lock)) {
+ if (cpu == die_owner)
+ /* nested oops. should stop eventually */;
+ else
+ arch_spin_lock(&die_lock);
}
+ die_nest_count++;
+ die_owner = cpu;
+ console_verbose();
+ bust_spinlocks(1);
+ if (machine_is(powermac))
+ pmac_backlight_unblank();
+ return flags;
+}
- if (++die.lock_owner_depth < 3) {
- printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
- printk("SMP NR_CPUS=%d ", NR_CPUS);
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC ");
-#endif
-#ifdef CONFIG_NUMA
- printk("NUMA ");
-#endif
- printk("%s\n", ppc_md.name ? ppc_md.name : "");
+static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs,
+ int signr)
+{
+ bust_spinlocks(0);
+ die_owner = -1;
+ add_taint(TAINT_DIE);
+ die_nest_count--;
+ oops_exit();
+ printk("\n");
+ if (!die_nest_count)
+ /* Nest count reaches zero, release the lock. */
+ arch_spin_unlock(&die_lock);
+ raw_local_irq_restore(flags);
- if (notify_die(DIE_OOPS, str, regs, err, 255,
- SIGSEGV) == NOTIFY_STOP)
- return 1;
+ /*
+ * A system reset (0x100) is a request to dump, so we always send
+ * it through the crashdump code.
+ */
+ if (kexec_should_crash(current) || (TRAP(regs) == 0x100)) {
+ crash_kexec(regs);
- print_modules();
- show_regs(regs);
- } else {
- printk("Recursive die() failure, output suppressed\n");
+ /*
+ * We aren't the primary crash CPU. We need to send it
+ * to a holding pattern to avoid it ending up in the panic
+ * code.
+ */
+ crash_kexec_secondary(regs);
}
- bust_spinlocks(0);
- die.lock_owner = -1;
- add_taint(TAINT_DIE);
- raw_spin_unlock_irqrestore(&die.lock, flags);
+ if (!signr)
+ return;
- if (kexec_should_crash(current) ||
- kexec_sr_activated(smp_processor_id()))
- crash_kexec(regs);
- crash_kexec_secondary(regs);
+ /*
+ * While our oops output is serialised by a spinlock, output
+ * from panic() called below can race and corrupt it. If we
+ * know we are going to panic, delay for 1 second so we have a
+ * chance to get clean backtraces from all CPUs that are oopsing.
+ */
+ if (in_interrupt() || panic_on_oops || !current->pid ||
+ is_global_init(current)) {
+ mdelay(MSEC_PER_SEC);
+ }
if (in_interrupt())
panic("Fatal exception in interrupt");
-
if (panic_on_oops)
panic("Fatal exception");
+ do_exit(signr);
+}
- oops_exit();
- do_exit(err);
+static int __kprobes __die(const char *str, struct pt_regs *regs, long err)
+{
+ printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
+#ifdef CONFIG_PREEMPT
+ printk("PREEMPT ");
+#endif
+#ifdef CONFIG_SMP
+ printk("SMP NR_CPUS=%d ", NR_CPUS);
+#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ printk("DEBUG_PAGEALLOC ");
+#endif
+#ifdef CONFIG_NUMA
+ printk("NUMA ");
+#endif
+ printk("%s\n", ppc_md.name ? ppc_md.name : "");
+
+ if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP)
+ return 1;
+
+ print_modules();
+ show_regs(regs);
return 0;
}
+void die(const char *str, struct pt_regs *regs, long err)
+{
+ unsigned long flags = oops_begin(regs);
+
+ if (__die(str, regs, err))
+ err = 0;
+ oops_end(flags, regs, err);
+}
+
void user_single_step_siginfo(struct task_struct *tsk,
struct pt_regs *regs, siginfo_t *info)
{
@@ -195,10 +234,11 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
"at %016lx nip %016lx lr %016lx code %x\n";
if (!user_mode(regs)) {
- if (die("Exception in kernel mode", regs, signr))
- return;
- } else if (show_unhandled_signals &&
- unhandled_signal(current, signr)) {
+ die("Exception in kernel mode", regs, signr);
+ return;
+ }
+
+ if (show_unhandled_signals && unhandled_signal(current, signr)) {
printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32,
current->comm, current->pid, signr,
addr, regs->nip, regs->link, code);
@@ -220,25 +260,8 @@ void system_reset_exception(struct pt_regs *regs)
return;
}
-#ifdef CONFIG_KEXEC
- cpumask_set_cpu(smp_processor_id(), &cpus_in_sr);
-#endif
-
die("System Reset", regs, SIGABRT);
- /*
- * Some CPUs when released from the debugger will execute this path.
- * These CPUs entered the debugger via a soft-reset. If the CPU was
- * hung before entering the debugger it will return to the hung
- * state when exiting this function. This causes a problem in
- * kdump since the hung CPU(s) will not respond to the IPI sent
- * from kdump. To prevent the problem we call crash_kexec_secondary()
- * here. If a kdump had not been initiated or we exit the debugger
- * with the "exit and recover" command (x) crash_kexec_secondary()
- * will return after 5ms and the CPU returns to its previous state.
- */
- crash_kexec_secondary(regs);
-
/* Must die if the interrupt is not recoverable */
if (!(regs->msr & MSR_RI))
panic("Unrecoverable System Reset");
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 920276c0f6a1..710a54005dfb 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -170,7 +170,13 @@ SECTIONS
}
#ifdef CONFIG_RELOCATABLE
. = ALIGN(8);
- .dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET) { *(.dynsym) }
+ .dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET)
+ {
+#ifdef CONFIG_RELOCATABLE_PPC32
+ __dynamic_symtab = .;
+#endif
+ *(.dynsym)
+ }
.dynstr : AT(ADDR(.dynstr) - LOAD_OFFSET) { *(.dynstr) }
.dynamic : AT(ADDR(.dynamic) - LOAD_OFFSET)
{