summaryrefslogtreecommitdiffstats
path: root/arch/ia64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ia64/kernel')
-rw-r--r--arch/ia64/kernel/Makefile1
-rw-r--r--arch/ia64/kernel/efi.c32
-rw-r--r--arch/ia64/kernel/entry.S6
-rw-r--r--arch/ia64/kernel/fsys.S4
-rw-r--r--arch/ia64/kernel/mca.c8
-rw-r--r--arch/ia64/kernel/minstate.h3
-rw-r--r--arch/ia64/kernel/module.c10
-rw-r--r--arch/ia64/kernel/perfmon.c175
-rw-r--r--arch/ia64/kernel/ptrace.c26
-rw-r--r--arch/ia64/kernel/setup.c3
-rw-r--r--arch/ia64/kernel/smpboot.c2
-rw-r--r--arch/ia64/kernel/sys_ia64.c7
-rw-r--r--arch/ia64/kernel/traps.c29
-rw-r--r--arch/ia64/kernel/uncached.c246
14 files changed, 509 insertions, 43 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index c1a02bbc252c..4c73d8ba2e3d 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_SMP) += smp.o smpboot.o domain.o
obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o
obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
+obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
mca_recovery-y += mca_drv.o mca_drv_asm.o
# The gate DSO image is built using a special linker script.
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 4a3b1aac43e7..179f230816ed 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -410,6 +410,38 @@ efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
}
/*
+ * Walk the EFI memory map to pull out leftover pages in the lower
+ * memory regions which do not end up in the regular memory map and
+ * stick them into the uncached allocator
+ *
+ * The regular walk function is significantly more complex than the
+ * uncached walk which means it really doesn't make sense to try and
+ * marge the two.
+ */
+void __init
+efi_memmap_walk_uc (efi_freemem_callback_t callback)
+{
+ void *efi_map_start, *efi_map_end, *p;
+ efi_memory_desc_t *md;
+ u64 efi_desc_size, start, end;
+
+ efi_map_start = __va(ia64_boot_param->efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
+ efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ md = p;
+ if (md->attribute == EFI_MEMORY_UC) {
+ start = PAGE_ALIGN(md->phys_addr);
+ end = PAGE_ALIGN((md->phys_addr+(md->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK);
+ if ((*callback)(start, end, NULL) < 0)
+ return;
+ }
+ }
+}
+
+
+/*
* Look for the PAL_CODE region reported by EFI and maps it using an
* ITR to enable safe PAL calls in virtual mode. See IA-64 Processor
* Abstraction Layer chapter 11 in ADAG
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 81c45d447394..b1d5d3d5276c 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1182,7 +1182,7 @@ ENTRY(notify_resume_user)
;;
(pNonSys) mov out2=0 // out2==0 => not a syscall
.fframe 16
- .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!)
+ .spillsp ar.unat, 16
st8 [sp]=r9,-16 // allocate space for ar.unat and save it
st8 [out1]=loc1,-8 // save ar.pfs, out1=&sigscratch
.body
@@ -1208,7 +1208,7 @@ GLOBAL_ENTRY(sys_rt_sigsuspend)
adds out2=8,sp // out2=&sigscratch->ar_pfs
;;
.fframe 16
- .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!)
+ .spillsp ar.unat, 16
st8 [sp]=r9,-16 // allocate space for ar.unat and save it
st8 [out2]=loc1,-8 // save ar.pfs, out2=&sigscratch
.body
@@ -1579,7 +1579,7 @@ sys_call_table:
data8 sys_keyctl
data8 sys_ni_syscall
data8 sys_ni_syscall // 1275
- data8 sys_ni_syscall
+ data8 sys_set_zone_reclaim
data8 sys_ni_syscall
data8 sys_ni_syscall
data8 sys_ni_syscall
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 4f3cdef75797..962b6c4e32b5 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -460,9 +460,9 @@ EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set
;;
st8 [r2]=r14 // update current->blocked with new mask
- cmpxchg4.acq r14=[r9],r18,ar.ccv // current->thread_info->flags <- r18
+ cmpxchg4.acq r8=[r9],r18,ar.ccv // current->thread_info->flags <- r18
;;
- cmp.ne p6,p0=r17,r14 // update failed?
+ cmp.ne p6,p0=r17,r8 // update failed?
(p6) br.cond.spnt.few 1b // yes -> retry
#ifdef CONFIG_SMP
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 4d6c7b8f667b..736e328b5e61 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1103,8 +1103,6 @@ ia64_mca_cpe_int_caller(int cpe_irq, void *arg, struct pt_regs *ptregs)
return IRQ_HANDLED;
}
-#endif /* CONFIG_ACPI */
-
/*
* ia64_mca_cpe_poll
*
@@ -1122,6 +1120,8 @@ ia64_mca_cpe_poll (unsigned long dummy)
platform_send_ipi(first_cpu(cpu_online_map), IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0);
}
+#endif /* CONFIG_ACPI */
+
/*
* C portion of the OS INIT handler
*
@@ -1390,8 +1390,7 @@ ia64_mca_init(void)
register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, &mca_wkup_irqaction);
#ifdef CONFIG_ACPI
- /* Setup the CPEI/P vector and handler */
- cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI);
+ /* Setup the CPEI/P handler */
register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction);
#endif
@@ -1436,6 +1435,7 @@ ia64_mca_late_init(void)
#ifdef CONFIG_ACPI
/* Setup the CPEI/P vector and handler */
+ cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI);
init_timer(&cpe_poll_timer);
cpe_poll_timer.function = ia64_mca_cpe_poll;
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
index 1dbc7b2497c9..f6d8a010d99b 100644
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@@ -41,7 +41,7 @@
(pKStk) addl r3=THIS_CPU(ia64_mca_data),r3;; \
(pKStk) ld8 r3 = [r3];; \
(pKStk) addl r3=IA64_MCA_CPU_INIT_STACK_OFFSET,r3;; \
-(pKStk) addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3; \
+(pKStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3; \
(pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \
(pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of register backing store */ \
;; \
@@ -50,7 +50,6 @@
(pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \
(pUStk) dep r22=-1,r22,61,3; /* compute kernel virtual addr of RBS */ \
;; \
-(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \
(pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \
;; \
(pUStk) mov r18=ar.bsp; \
diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c
index febc091c2f02..f1aca7cffd12 100644
--- a/arch/ia64/kernel/module.c
+++ b/arch/ia64/kernel/module.c
@@ -825,14 +825,16 @@ apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symind
* XXX Should have an arch-hook for running this after final section
* addresses have been selected...
*/
- /* See if gp can cover the entire core module: */
- uint64_t gp = (uint64_t) mod->module_core + MAX_LTOFF / 2;
- if (mod->core_size >= MAX_LTOFF)
+ uint64_t gp;
+ if (mod->core_size > MAX_LTOFF)
/*
* This takes advantage of fact that SHF_ARCH_SMALL gets allocated
* at the end of the module.
*/
- gp = (uint64_t) mod->module_core + mod->core_size - MAX_LTOFF / 2;
+ gp = mod->core_size - MAX_LTOFF / 2;
+ else
+ gp = mod->core_size / 2;
+ gp = (uint64_t) mod->module_core + ((gp + 7) & -8);
mod->arch.gp = gp;
DEBUGP("%s: placing gp at 0x%lx\n", __FUNCTION__, gp);
}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 71c101601e3e..6407bff6bfd7 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -11,7 +11,7 @@
* Version Perfmon-2.x is a rewrite of perfmon-1.x
* by Stephane Eranian, Hewlett Packard Co.
*
- * Copyright (C) 1999-2003, 2005 Hewlett Packard Co
+ * Copyright (C) 1999-2005 Hewlett Packard Co
* Stephane Eranian <eranian@hpl.hp.com>
* David Mosberger-Tang <davidm@hpl.hp.com>
*
@@ -497,6 +497,9 @@ typedef struct {
static pfm_stats_t pfm_stats[NR_CPUS];
static pfm_session_t pfm_sessions; /* global sessions information */
+static spinlock_t pfm_alt_install_check = SPIN_LOCK_UNLOCKED;
+static pfm_intr_handler_desc_t *pfm_alt_intr_handler;
+
static struct proc_dir_entry *perfmon_dir;
static pfm_uuid_t pfm_null_uuid = {0,};
@@ -606,6 +609,7 @@ DEFINE_PER_CPU(unsigned long, pfm_syst_info);
DEFINE_PER_CPU(struct task_struct *, pmu_owner);
DEFINE_PER_CPU(pfm_context_t *, pmu_ctx);
DEFINE_PER_CPU(unsigned long, pmu_activation_number);
+EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info);
/* forward declaration */
@@ -1325,7 +1329,7 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
error_conflict:
DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n",
pfm_sessions.pfs_sys_session[cpu]->pid,
- smp_processor_id()));
+ cpu));
abort:
UNLOCK_PFS(flags);
@@ -5555,26 +5559,32 @@ pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
int ret;
this_cpu = get_cpu();
- min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min;
- max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max;
+ if (likely(!pfm_alt_intr_handler)) {
+ min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min;
+ max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max;
- start_cycles = ia64_get_itc();
+ start_cycles = ia64_get_itc();
- ret = pfm_do_interrupt_handler(irq, arg, regs);
+ ret = pfm_do_interrupt_handler(irq, arg, regs);
- total_cycles = ia64_get_itc();
+ total_cycles = ia64_get_itc();
- /*
- * don't measure spurious interrupts
- */
- if (likely(ret == 0)) {
- total_cycles -= start_cycles;
+ /*
+ * don't measure spurious interrupts
+ */
+ if (likely(ret == 0)) {
+ total_cycles -= start_cycles;
- if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles;
- if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles;
+ if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles;
+ if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles;
- pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles;
+ pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles;
+ }
+ }
+ else {
+ (*pfm_alt_intr_handler->handler)(irq, arg, regs);
}
+
put_cpu_no_resched();
return IRQ_HANDLED;
}
@@ -6425,6 +6435,141 @@ static struct irqaction perfmon_irqaction = {
.name = "perfmon"
};
+static void
+pfm_alt_save_pmu_state(void *data)
+{
+ struct pt_regs *regs;
+
+ regs = ia64_task_regs(current);
+
+ DPRINT(("called\n"));
+
+ /*
+ * should not be necessary but
+ * let's take not risk
+ */
+ pfm_clear_psr_up();
+ pfm_clear_psr_pp();
+ ia64_psr(regs)->pp = 0;
+
+ /*
+ * This call is required
+ * May cause a spurious interrupt on some processors
+ */
+ pfm_freeze_pmu();
+
+ ia64_srlz_d();
+}
+
+void
+pfm_alt_restore_pmu_state(void *data)
+{
+ struct pt_regs *regs;
+
+ regs = ia64_task_regs(current);
+
+ DPRINT(("called\n"));
+
+ /*
+ * put PMU back in state expected
+ * by perfmon
+ */
+ pfm_clear_psr_up();
+ pfm_clear_psr_pp();
+ ia64_psr(regs)->pp = 0;
+
+ /*
+ * perfmon runs with PMU unfrozen at all times
+ */
+ pfm_unfreeze_pmu();
+
+ ia64_srlz_d();
+}
+
+int
+pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
+{
+ int ret, i;
+ int reserve_cpu;
+
+ /* some sanity checks */
+ if (hdl == NULL || hdl->handler == NULL) return -EINVAL;
+
+ /* do the easy test first */
+ if (pfm_alt_intr_handler) return -EBUSY;
+
+ /* one at a time in the install or remove, just fail the others */
+ if (!spin_trylock(&pfm_alt_install_check)) {
+ return -EBUSY;
+ }
+
+ /* reserve our session */
+ for_each_online_cpu(reserve_cpu) {
+ ret = pfm_reserve_session(NULL, 1, reserve_cpu);
+ if (ret) goto cleanup_reserve;
+ }
+
+ /* save the current system wide pmu states */
+ ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 0, 1);
+ if (ret) {
+ DPRINT(("on_each_cpu() failed: %d\n", ret));
+ goto cleanup_reserve;
+ }
+
+ /* officially change to the alternate interrupt handler */
+ pfm_alt_intr_handler = hdl;
+
+ spin_unlock(&pfm_alt_install_check);
+
+ return 0;
+
+cleanup_reserve:
+ for_each_online_cpu(i) {
+ /* don't unreserve more than we reserved */
+ if (i >= reserve_cpu) break;
+
+ pfm_unreserve_session(NULL, 1, i);
+ }
+
+ spin_unlock(&pfm_alt_install_check);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(pfm_install_alt_pmu_interrupt);
+
+int
+pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
+{
+ int i;
+ int ret;
+
+ if (hdl == NULL) return -EINVAL;
+
+ /* cannot remove someone else's handler! */
+ if (pfm_alt_intr_handler != hdl) return -EINVAL;
+
+ /* one at a time in the install or remove, just fail the others */
+ if (!spin_trylock(&pfm_alt_install_check)) {
+ return -EBUSY;
+ }
+
+ pfm_alt_intr_handler = NULL;
+
+ ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 0, 1);
+ if (ret) {
+ DPRINT(("on_each_cpu() failed: %d\n", ret));
+ }
+
+ for_each_online_cpu(i) {
+ pfm_unreserve_session(NULL, 1, i);
+ }
+
+ spin_unlock(&pfm_alt_install_check);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pfm_remove_alt_pmu_interrupt);
+
/*
* perfmon initialization routine, called from the initcall() table
*/
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 907464ee7273..575a8f657b31 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -635,11 +635,17 @@ ia64_flush_fph (struct task_struct *task)
{
struct ia64_psr *psr = ia64_psr(ia64_task_regs(task));
+ /*
+ * Prevent migrating this task while
+ * we're fiddling with the FPU state
+ */
+ preempt_disable();
if (ia64_is_local_fpu_owner(task) && psr->mfh) {
psr->mfh = 0;
task->thread.flags |= IA64_THREAD_FPH_VALID;
ia64_save_fpu(&task->thread.fph[0]);
}
+ preempt_enable();
}
/*
@@ -692,16 +698,30 @@ convert_to_non_syscall (struct task_struct *child, struct pt_regs *pt,
unsigned long cfm)
{
struct unw_frame_info info, prev_info;
- unsigned long ip, pr;
+ unsigned long ip, sp, pr;
unw_init_from_blocked_task(&info, child);
while (1) {
prev_info = info;
if (unw_unwind(&info) < 0)
return;
- if (unw_get_rp(&info, &ip) < 0)
+
+ unw_get_sp(&info, &sp);
+ if ((long)((unsigned long)child + IA64_STK_OFFSET - sp)
+ < IA64_PT_REGS_SIZE) {
+ dprintk("ptrace.%s: ran off the top of the kernel "
+ "stack\n", __FUNCTION__);
+ return;
+ }
+ if (unw_get_pr (&prev_info, &pr) < 0) {
+ unw_get_rp(&prev_info, &ip);
+ dprintk("ptrace.%s: failed to read "
+ "predicate register (ip=0x%lx)\n",
+ __FUNCTION__, ip);
return;
- if (ip < FIXADDR_USER_END)
+ }
+ if (unw_is_intr_frame(&info)
+ && (pr & (1UL << PRED_USER_STACK)))
break;
}
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index b7e6b4cb374b..d14692e0920a 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -720,7 +720,8 @@ cpu_init (void)
ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page)));
/*
- * Initialize default control register to defer all speculative faults. The
+ * Initialize default control register to defer speculative faults except
+ * for those arising from TLB misses, which are not deferred. The
* kernel MUST NOT depend on a particular setting of these bits (in other words,
* the kernel must have recovery code for all speculative accesses). Turn on
* dcr.lc as per recommendation by the architecture team. Most IA-32 apps
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 0d5ee57c9865..3865f088ffa2 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -624,7 +624,7 @@ static struct {
__u16 thread_id;
__u16 proc_fixed_addr;
__u8 valid;
-}mt_info[NR_CPUS] __devinit;
+} mt_info[NR_CPUS] __devinitdata;
#ifdef CONFIG_HOTPLUG_CPU
static inline void
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index a8cf6d8a509c..770fab37928e 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -182,13 +182,6 @@ do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, un
}
}
- /*
- * A zero mmap always succeeds in Linux, independent of whether or not the
- * remaining arguments are valid.
- */
- if (len == 0)
- goto out;
-
/* Careful about overflows.. */
len = PAGE_ALIGN(len);
if (!len || len > TASK_SIZE) {
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index e82ad78081b3..1861173bd4f6 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -111,6 +111,24 @@ ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
siginfo_t siginfo;
int sig, code;
+ /* break.b always sets cr.iim to 0, which causes problems for
+ * debuggers. Get the real break number from the original instruction,
+ * but only for kernel code. User space break.b is left alone, to
+ * preserve the existing behaviour. All break codings have the same
+ * format, so there is no need to check the slot type.
+ */
+ if (break_num == 0 && !user_mode(regs)) {
+ struct ia64_psr *ipsr = ia64_psr(regs);
+ unsigned long *bundle = (unsigned long *)regs->cr_iip;
+ unsigned long slot;
+ switch (ipsr->ri) {
+ case 0: slot = (bundle[0] >> 5); break;
+ case 1: slot = (bundle[0] >> 46) | (bundle[1] << 18); break;
+ default: slot = (bundle[1] >> 23); break;
+ }
+ break_num = ((slot >> 36 & 1) << 20) | (slot >> 6 & 0xfffff);
+ }
+
/* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */
siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
siginfo.si_imm = break_num;
@@ -202,13 +220,21 @@ disabled_fph_fault (struct pt_regs *regs)
/* first, grant user-level access to fph partition: */
psr->dfh = 0;
+
+ /*
+ * Make sure that no other task gets in on this processor
+ * while we're claiming the FPU
+ */
+ preempt_disable();
#ifndef CONFIG_SMP
{
struct task_struct *fpu_owner
= (struct task_struct *)ia64_get_kr(IA64_KR_FPU_OWNER);
- if (ia64_is_local_fpu_owner(current))
+ if (ia64_is_local_fpu_owner(current)) {
+ preempt_enable_no_resched();
return;
+ }
if (fpu_owner)
ia64_flush_fph(fpu_owner);
@@ -226,6 +252,7 @@ disabled_fph_fault (struct pt_regs *regs)
*/
psr->mfh = 1;
}
+ preempt_enable_no_resched();
}
static inline int
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
new file mode 100644
index 000000000000..490dfc9ab47f
--- /dev/null
+++ b/arch/ia64/kernel/uncached.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) 2001-2005 Silicon Graphics, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * A simple uncached page allocator using the generic allocator. This
+ * allocator first utilizes the spare (spill) pages found in the EFI
+ * memmap and will then start converting cached pages to uncached ones
+ * at a granule at a time. Node awareness is implemented by having a
+ * pool of pages per node.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/efi.h>
+#include <linux/genalloc.h>
+#include <asm/page.h>
+#include <asm/pal.h>
+#include <asm/system.h>
+#include <asm/pgtable.h>
+#include <asm/atomic.h>
+#include <asm/tlbflush.h>
+#include <asm/sn/arch.h>
+
+#define DEBUG 0
+
+#if DEBUG
+#define dprintk printk
+#else
+#define dprintk(x...) do { } while (0)
+#endif
+
+void __init efi_memmap_walk_uc (efi_freemem_callback_t callback);
+
+#define MAX_UNCACHED_GRANULES 5
+static int allocated_granules;
+
+struct gen_pool *uncached_pool[MAX_NUMNODES];
+
+
+static void uncached_ipi_visibility(void *data)
+{
+ int status;
+
+ status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
+ if ((status != PAL_VISIBILITY_OK) &&
+ (status != PAL_VISIBILITY_OK_REMOTE_NEEDED))
+ printk(KERN_DEBUG "pal_prefetch_visibility() returns %i on "
+ "CPU %i\n", status, get_cpu());
+}
+
+
+static void uncached_ipi_mc_drain(void *data)
+{
+ int status;
+ status = ia64_pal_mc_drain();
+ if (status)
+ printk(KERN_WARNING "ia64_pal_mc_drain() failed with %i on "
+ "CPU %i\n", status, get_cpu());
+}
+
+
+static unsigned long
+uncached_get_new_chunk(struct gen_pool *poolp)
+{
+ struct page *page;
+ void *tmp;
+ int status, i;
+ unsigned long addr, node;
+
+ if (allocated_granules >= MAX_UNCACHED_GRANULES)
+ return 0;
+
+ node = poolp->private;
+ page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO,
+ IA64_GRANULE_SHIFT-PAGE_SHIFT);
+
+ dprintk(KERN_INFO "get_new_chunk page %p, addr %lx\n",
+ page, (unsigned long)(page-vmem_map) << PAGE_SHIFT);
+
+ /*
+ * Do magic if no mem on local node! XXX
+ */
+ if (!page)
+ return 0;
+ tmp = page_address(page);
+
+ /*
+ * There's a small race here where it's possible for someone to
+ * access the page through /dev/mem halfway through the conversion
+ * to uncached - not sure it's really worth bothering about
+ */
+ for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++)
+ SetPageUncached(&page[i]);
+
+ flush_tlb_kernel_range(tmp, tmp + IA64_GRANULE_SIZE);
+
+ status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
+
+ dprintk(KERN_INFO "pal_prefetch_visibility() returns %i on cpu %i\n",
+ status, get_cpu());
+
+ if (!status) {
+ status = smp_call_function(uncached_ipi_visibility, NULL, 0, 1);
+ if (status)
+ printk(KERN_WARNING "smp_call_function failed for "
+ "uncached_ipi_visibility! (%i)\n", status);
+ }
+
+ if (ia64_platform_is("sn2"))
+ sn_flush_all_caches((unsigned long)tmp, IA64_GRANULE_SIZE);
+ else
+ flush_icache_range((unsigned long)tmp,
+ (unsigned long)tmp+IA64_GRANULE_SIZE);
+
+ ia64_pal_mc_drain();
+ status = smp_call_function(uncached_ipi_mc_drain, NULL, 0, 1);
+ if (status)
+ printk(KERN_WARNING "smp_call_function failed for "
+ "uncached_ipi_mc_drain! (%i)\n", status);
+
+ addr = (unsigned long)tmp - PAGE_OFFSET + __IA64_UNCACHED_OFFSET;
+
+ allocated_granules++;
+ return addr;
+}
+
+
+/*
+ * uncached_alloc_page
+ *
+ * Allocate 1 uncached page. Allocates on the requested node. If no
+ * uncached pages are available on the requested node, roundrobin starting
+ * with higher nodes.
+ */
+unsigned long
+uncached_alloc_page(int nid)
+{
+ unsigned long maddr;
+
+ maddr = gen_pool_alloc(uncached_pool[nid], PAGE_SIZE);
+
+ dprintk(KERN_DEBUG "uncached_alloc_page returns %lx on node %i\n",
+ maddr, nid);
+
+ /*
+ * If no memory is availble on our local node, try the
+ * remaining nodes in the system.
+ */
+ if (!maddr) {
+ int i;
+
+ for (i = MAX_NUMNODES - 1; i >= 0; i--) {
+ if (i == nid || !node_online(i))
+ continue;
+ maddr = gen_pool_alloc(uncached_pool[i], PAGE_SIZE);
+ dprintk(KERN_DEBUG "uncached_alloc_page alternate search "
+ "returns %lx on node %i\n", maddr, i);
+ if (maddr) {
+ break;
+ }
+ }
+ }
+
+ return maddr;
+}
+EXPORT_SYMBOL(uncached_alloc_page);
+
+
+/*
+ * uncached_free_page
+ *
+ * Free a single uncached page.
+ */
+void
+uncached_free_page(unsigned long maddr)
+{
+ int node;
+
+ node = nasid_to_cnodeid(NASID_GET(maddr));
+
+ dprintk(KERN_DEBUG "uncached_free_page(%lx) on node %i\n", maddr, node);
+
+ if ((maddr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET)
+ panic("uncached_free_page invalid address %lx\n", maddr);
+
+ gen_pool_free(uncached_pool[node], maddr, PAGE_SIZE);
+}
+EXPORT_SYMBOL(uncached_free_page);
+
+
+/*
+ * uncached_build_memmap,
+ *
+ * Called at boot time to build a map of pages that can be used for
+ * memory special operations.
+ */
+static int __init
+uncached_build_memmap(unsigned long start, unsigned long end, void *arg)
+{
+ long length;
+ unsigned long vstart, vend;
+ int node;
+
+ length = end - start;
+ vstart = start + __IA64_UNCACHED_OFFSET;
+ vend = end + __IA64_UNCACHED_OFFSET;
+
+ dprintk(KERN_ERR "uncached_build_memmap(%lx %lx)\n", start, end);
+
+ memset((char *)vstart, 0, length);
+
+ node = nasid_to_cnodeid(NASID_GET(start));
+
+ for (; vstart < vend ; vstart += PAGE_SIZE) {
+ dprintk(KERN_INFO "sticking %lx into the pool!\n", vstart);
+ gen_pool_free(uncached_pool[node], vstart, PAGE_SIZE);
+ }
+
+ return 0;
+}
+
+
+static int __init uncached_init(void) {
+ int i;
+
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ if (!node_online(i))
+ continue;
+ uncached_pool[i] = gen_pool_create(0, IA64_GRANULE_SHIFT,
+ &uncached_get_new_chunk, i);
+ }
+
+ efi_memmap_walk_uc(uncached_build_memmap);
+
+ return 0;
+}
+
+__initcall(uncached_init);