diff options
Diffstat (limited to 'arch/ia64/kernel')
-rw-r--r-- | arch/ia64/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/ia64/kernel/efi.c | 32 | ||||
-rw-r--r-- | arch/ia64/kernel/entry.S | 6 | ||||
-rw-r--r-- | arch/ia64/kernel/fsys.S | 4 | ||||
-rw-r--r-- | arch/ia64/kernel/mca.c | 8 | ||||
-rw-r--r-- | arch/ia64/kernel/minstate.h | 3 | ||||
-rw-r--r-- | arch/ia64/kernel/module.c | 10 | ||||
-rw-r--r-- | arch/ia64/kernel/perfmon.c | 175 | ||||
-rw-r--r-- | arch/ia64/kernel/ptrace.c | 26 | ||||
-rw-r--r-- | arch/ia64/kernel/setup.c | 3 | ||||
-rw-r--r-- | arch/ia64/kernel/smpboot.c | 2 | ||||
-rw-r--r-- | arch/ia64/kernel/sys_ia64.c | 7 | ||||
-rw-r--r-- | arch/ia64/kernel/traps.c | 29 | ||||
-rw-r--r-- | arch/ia64/kernel/uncached.c | 246 |
14 files changed, 509 insertions, 43 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index c1a02bbc252c..4c73d8ba2e3d 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_SMP) += smp.o smpboot.o domain.o obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o obj-$(CONFIG_IA64_CYCLONE) += cyclone.o obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o +obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o mca_recovery-y += mca_drv.o mca_drv_asm.o # The gate DSO image is built using a special linker script. diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 4a3b1aac43e7..179f230816ed 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -410,6 +410,38 @@ efi_memmap_walk (efi_freemem_callback_t callback, void *arg) } /* + * Walk the EFI memory map to pull out leftover pages in the lower + * memory regions which do not end up in the regular memory map and + * stick them into the uncached allocator + * + * The regular walk function is significantly more complex than the + * uncached walk which means it really doesn't make sense to try and + * marge the two. + */ +void __init +efi_memmap_walk_uc (efi_freemem_callback_t callback) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size, start, end; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (md->attribute == EFI_MEMORY_UC) { + start = PAGE_ALIGN(md->phys_addr); + end = PAGE_ALIGN((md->phys_addr+(md->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK); + if ((*callback)(start, end, NULL) < 0) + return; + } + } +} + + +/* * Look for the PAL_CODE region reported by EFI and maps it using an * ITR to enable safe PAL calls in virtual mode. See IA-64 Processor * Abstraction Layer chapter 11 in ADAG diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 81c45d447394..b1d5d3d5276c 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1182,7 +1182,7 @@ ENTRY(notify_resume_user) ;; (pNonSys) mov out2=0 // out2==0 => not a syscall .fframe 16 - .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!) + .spillsp ar.unat, 16 st8 [sp]=r9,-16 // allocate space for ar.unat and save it st8 [out1]=loc1,-8 // save ar.pfs, out1=&sigscratch .body @@ -1208,7 +1208,7 @@ GLOBAL_ENTRY(sys_rt_sigsuspend) adds out2=8,sp // out2=&sigscratch->ar_pfs ;; .fframe 16 - .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!) + .spillsp ar.unat, 16 st8 [sp]=r9,-16 // allocate space for ar.unat and save it st8 [out2]=loc1,-8 // save ar.pfs, out2=&sigscratch .body @@ -1579,7 +1579,7 @@ sys_call_table: data8 sys_keyctl data8 sys_ni_syscall data8 sys_ni_syscall // 1275 - data8 sys_ni_syscall + data8 sys_set_zone_reclaim data8 sys_ni_syscall data8 sys_ni_syscall data8 sys_ni_syscall diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index 4f3cdef75797..962b6c4e32b5 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -460,9 +460,9 @@ EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set ;; st8 [r2]=r14 // update current->blocked with new mask - cmpxchg4.acq r14=[r9],r18,ar.ccv // current->thread_info->flags <- r18 + cmpxchg4.acq r8=[r9],r18,ar.ccv // current->thread_info->flags <- r18 ;; - cmp.ne p6,p0=r17,r14 // update failed? + cmp.ne p6,p0=r17,r8 // update failed? (p6) br.cond.spnt.few 1b // yes -> retry #ifdef CONFIG_SMP diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 4d6c7b8f667b..736e328b5e61 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1103,8 +1103,6 @@ ia64_mca_cpe_int_caller(int cpe_irq, void *arg, struct pt_regs *ptregs) return IRQ_HANDLED; } -#endif /* CONFIG_ACPI */ - /* * ia64_mca_cpe_poll * @@ -1122,6 +1120,8 @@ ia64_mca_cpe_poll (unsigned long dummy) platform_send_ipi(first_cpu(cpu_online_map), IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0); } +#endif /* CONFIG_ACPI */ + /* * C portion of the OS INIT handler * @@ -1390,8 +1390,7 @@ ia64_mca_init(void) register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, &mca_wkup_irqaction); #ifdef CONFIG_ACPI - /* Setup the CPEI/P vector and handler */ - cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI); + /* Setup the CPEI/P handler */ register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction); #endif @@ -1436,6 +1435,7 @@ ia64_mca_late_init(void) #ifdef CONFIG_ACPI /* Setup the CPEI/P vector and handler */ + cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI); init_timer(&cpe_poll_timer); cpe_poll_timer.function = ia64_mca_cpe_poll; diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h index 1dbc7b2497c9..f6d8a010d99b 100644 --- a/arch/ia64/kernel/minstate.h +++ b/arch/ia64/kernel/minstate.h @@ -41,7 +41,7 @@ (pKStk) addl r3=THIS_CPU(ia64_mca_data),r3;; \ (pKStk) ld8 r3 = [r3];; \ (pKStk) addl r3=IA64_MCA_CPU_INIT_STACK_OFFSET,r3;; \ -(pKStk) addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3; \ +(pKStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3; \ (pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ (pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of register backing store */ \ ;; \ @@ -50,7 +50,6 @@ (pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \ (pUStk) dep r22=-1,r22,61,3; /* compute kernel virtual addr of RBS */ \ ;; \ -(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \ (pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \ ;; \ (pUStk) mov r18=ar.bsp; \ diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index febc091c2f02..f1aca7cffd12 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -825,14 +825,16 @@ apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symind * XXX Should have an arch-hook for running this after final section * addresses have been selected... */ - /* See if gp can cover the entire core module: */ - uint64_t gp = (uint64_t) mod->module_core + MAX_LTOFF / 2; - if (mod->core_size >= MAX_LTOFF) + uint64_t gp; + if (mod->core_size > MAX_LTOFF) /* * This takes advantage of fact that SHF_ARCH_SMALL gets allocated * at the end of the module. */ - gp = (uint64_t) mod->module_core + mod->core_size - MAX_LTOFF / 2; + gp = mod->core_size - MAX_LTOFF / 2; + else + gp = mod->core_size / 2; + gp = (uint64_t) mod->module_core + ((gp + 7) & -8); mod->arch.gp = gp; DEBUGP("%s: placing gp at 0x%lx\n", __FUNCTION__, gp); } diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 71c101601e3e..6407bff6bfd7 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -11,7 +11,7 @@ * Version Perfmon-2.x is a rewrite of perfmon-1.x * by Stephane Eranian, Hewlett Packard Co. * - * Copyright (C) 1999-2003, 2005 Hewlett Packard Co + * Copyright (C) 1999-2005 Hewlett Packard Co * Stephane Eranian <eranian@hpl.hp.com> * David Mosberger-Tang <davidm@hpl.hp.com> * @@ -497,6 +497,9 @@ typedef struct { static pfm_stats_t pfm_stats[NR_CPUS]; static pfm_session_t pfm_sessions; /* global sessions information */ +static spinlock_t pfm_alt_install_check = SPIN_LOCK_UNLOCKED; +static pfm_intr_handler_desc_t *pfm_alt_intr_handler; + static struct proc_dir_entry *perfmon_dir; static pfm_uuid_t pfm_null_uuid = {0,}; @@ -606,6 +609,7 @@ DEFINE_PER_CPU(unsigned long, pfm_syst_info); DEFINE_PER_CPU(struct task_struct *, pmu_owner); DEFINE_PER_CPU(pfm_context_t *, pmu_ctx); DEFINE_PER_CPU(unsigned long, pmu_activation_number); +EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info); /* forward declaration */ @@ -1325,7 +1329,7 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) error_conflict: DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n", pfm_sessions.pfs_sys_session[cpu]->pid, - smp_processor_id())); + cpu)); abort: UNLOCK_PFS(flags); @@ -5555,26 +5559,32 @@ pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs) int ret; this_cpu = get_cpu(); - min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; - max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; + if (likely(!pfm_alt_intr_handler)) { + min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; + max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; - start_cycles = ia64_get_itc(); + start_cycles = ia64_get_itc(); - ret = pfm_do_interrupt_handler(irq, arg, regs); + ret = pfm_do_interrupt_handler(irq, arg, regs); - total_cycles = ia64_get_itc(); + total_cycles = ia64_get_itc(); - /* - * don't measure spurious interrupts - */ - if (likely(ret == 0)) { - total_cycles -= start_cycles; + /* + * don't measure spurious interrupts + */ + if (likely(ret == 0)) { + total_cycles -= start_cycles; - if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; - if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; + if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; + if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; - pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; + pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; + } + } + else { + (*pfm_alt_intr_handler->handler)(irq, arg, regs); } + put_cpu_no_resched(); return IRQ_HANDLED; } @@ -6425,6 +6435,141 @@ static struct irqaction perfmon_irqaction = { .name = "perfmon" }; +static void +pfm_alt_save_pmu_state(void *data) +{ + struct pt_regs *regs; + + regs = ia64_task_regs(current); + + DPRINT(("called\n")); + + /* + * should not be necessary but + * let's take not risk + */ + pfm_clear_psr_up(); + pfm_clear_psr_pp(); + ia64_psr(regs)->pp = 0; + + /* + * This call is required + * May cause a spurious interrupt on some processors + */ + pfm_freeze_pmu(); + + ia64_srlz_d(); +} + +void +pfm_alt_restore_pmu_state(void *data) +{ + struct pt_regs *regs; + + regs = ia64_task_regs(current); + + DPRINT(("called\n")); + + /* + * put PMU back in state expected + * by perfmon + */ + pfm_clear_psr_up(); + pfm_clear_psr_pp(); + ia64_psr(regs)->pp = 0; + + /* + * perfmon runs with PMU unfrozen at all times + */ + pfm_unfreeze_pmu(); + + ia64_srlz_d(); +} + +int +pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) +{ + int ret, i; + int reserve_cpu; + + /* some sanity checks */ + if (hdl == NULL || hdl->handler == NULL) return -EINVAL; + + /* do the easy test first */ + if (pfm_alt_intr_handler) return -EBUSY; + + /* one at a time in the install or remove, just fail the others */ + if (!spin_trylock(&pfm_alt_install_check)) { + return -EBUSY; + } + + /* reserve our session */ + for_each_online_cpu(reserve_cpu) { + ret = pfm_reserve_session(NULL, 1, reserve_cpu); + if (ret) goto cleanup_reserve; + } + + /* save the current system wide pmu states */ + ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 0, 1); + if (ret) { + DPRINT(("on_each_cpu() failed: %d\n", ret)); + goto cleanup_reserve; + } + + /* officially change to the alternate interrupt handler */ + pfm_alt_intr_handler = hdl; + + spin_unlock(&pfm_alt_install_check); + + return 0; + +cleanup_reserve: + for_each_online_cpu(i) { + /* don't unreserve more than we reserved */ + if (i >= reserve_cpu) break; + + pfm_unreserve_session(NULL, 1, i); + } + + spin_unlock(&pfm_alt_install_check); + + return ret; +} +EXPORT_SYMBOL_GPL(pfm_install_alt_pmu_interrupt); + +int +pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) +{ + int i; + int ret; + + if (hdl == NULL) return -EINVAL; + + /* cannot remove someone else's handler! */ + if (pfm_alt_intr_handler != hdl) return -EINVAL; + + /* one at a time in the install or remove, just fail the others */ + if (!spin_trylock(&pfm_alt_install_check)) { + return -EBUSY; + } + + pfm_alt_intr_handler = NULL; + + ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 0, 1); + if (ret) { + DPRINT(("on_each_cpu() failed: %d\n", ret)); + } + + for_each_online_cpu(i) { + pfm_unreserve_session(NULL, 1, i); + } + + spin_unlock(&pfm_alt_install_check); + + return 0; +} +EXPORT_SYMBOL_GPL(pfm_remove_alt_pmu_interrupt); + /* * perfmon initialization routine, called from the initcall() table */ diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 907464ee7273..575a8f657b31 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -635,11 +635,17 @@ ia64_flush_fph (struct task_struct *task) { struct ia64_psr *psr = ia64_psr(ia64_task_regs(task)); + /* + * Prevent migrating this task while + * we're fiddling with the FPU state + */ + preempt_disable(); if (ia64_is_local_fpu_owner(task) && psr->mfh) { psr->mfh = 0; task->thread.flags |= IA64_THREAD_FPH_VALID; ia64_save_fpu(&task->thread.fph[0]); } + preempt_enable(); } /* @@ -692,16 +698,30 @@ convert_to_non_syscall (struct task_struct *child, struct pt_regs *pt, unsigned long cfm) { struct unw_frame_info info, prev_info; - unsigned long ip, pr; + unsigned long ip, sp, pr; unw_init_from_blocked_task(&info, child); while (1) { prev_info = info; if (unw_unwind(&info) < 0) return; - if (unw_get_rp(&info, &ip) < 0) + + unw_get_sp(&info, &sp); + if ((long)((unsigned long)child + IA64_STK_OFFSET - sp) + < IA64_PT_REGS_SIZE) { + dprintk("ptrace.%s: ran off the top of the kernel " + "stack\n", __FUNCTION__); + return; + } + if (unw_get_pr (&prev_info, &pr) < 0) { + unw_get_rp(&prev_info, &ip); + dprintk("ptrace.%s: failed to read " + "predicate register (ip=0x%lx)\n", + __FUNCTION__, ip); return; - if (ip < FIXADDR_USER_END) + } + if (unw_is_intr_frame(&info) + && (pr & (1UL << PRED_USER_STACK))) break; } diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index b7e6b4cb374b..d14692e0920a 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -720,7 +720,8 @@ cpu_init (void) ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page))); /* - * Initialize default control register to defer all speculative faults. The + * Initialize default control register to defer speculative faults except + * for those arising from TLB misses, which are not deferred. The * kernel MUST NOT depend on a particular setting of these bits (in other words, * the kernel must have recovery code for all speculative accesses). Turn on * dcr.lc as per recommendation by the architecture team. Most IA-32 apps diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 0d5ee57c9865..3865f088ffa2 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -624,7 +624,7 @@ static struct { __u16 thread_id; __u16 proc_fixed_addr; __u8 valid; -}mt_info[NR_CPUS] __devinit; +} mt_info[NR_CPUS] __devinitdata; #ifdef CONFIG_HOTPLUG_CPU static inline void diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index a8cf6d8a509c..770fab37928e 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -182,13 +182,6 @@ do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, un } } - /* - * A zero mmap always succeeds in Linux, independent of whether or not the - * remaining arguments are valid. - */ - if (len == 0) - goto out; - /* Careful about overflows.. */ len = PAGE_ALIGN(len); if (!len || len > TASK_SIZE) { diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index e82ad78081b3..1861173bd4f6 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -111,6 +111,24 @@ ia64_bad_break (unsigned long break_num, struct pt_regs *regs) siginfo_t siginfo; int sig, code; + /* break.b always sets cr.iim to 0, which causes problems for + * debuggers. Get the real break number from the original instruction, + * but only for kernel code. User space break.b is left alone, to + * preserve the existing behaviour. All break codings have the same + * format, so there is no need to check the slot type. + */ + if (break_num == 0 && !user_mode(regs)) { + struct ia64_psr *ipsr = ia64_psr(regs); + unsigned long *bundle = (unsigned long *)regs->cr_iip; + unsigned long slot; + switch (ipsr->ri) { + case 0: slot = (bundle[0] >> 5); break; + case 1: slot = (bundle[0] >> 46) | (bundle[1] << 18); break; + default: slot = (bundle[1] >> 23); break; + } + break_num = ((slot >> 36 & 1) << 20) | (slot >> 6 & 0xfffff); + } + /* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */ siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri); siginfo.si_imm = break_num; @@ -202,13 +220,21 @@ disabled_fph_fault (struct pt_regs *regs) /* first, grant user-level access to fph partition: */ psr->dfh = 0; + + /* + * Make sure that no other task gets in on this processor + * while we're claiming the FPU + */ + preempt_disable(); #ifndef CONFIG_SMP { struct task_struct *fpu_owner = (struct task_struct *)ia64_get_kr(IA64_KR_FPU_OWNER); - if (ia64_is_local_fpu_owner(current)) + if (ia64_is_local_fpu_owner(current)) { + preempt_enable_no_resched(); return; + } if (fpu_owner) ia64_flush_fph(fpu_owner); @@ -226,6 +252,7 @@ disabled_fph_fault (struct pt_regs *regs) */ psr->mfh = 1; } + preempt_enable_no_resched(); } static inline int diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c new file mode 100644 index 000000000000..490dfc9ab47f --- /dev/null +++ b/arch/ia64/kernel/uncached.c @@ -0,0 +1,246 @@ +/* + * Copyright (C) 2001-2005 Silicon Graphics, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * A simple uncached page allocator using the generic allocator. This + * allocator first utilizes the spare (spill) pages found in the EFI + * memmap and will then start converting cached pages to uncached ones + * at a granule at a time. Node awareness is implemented by having a + * pool of pages per node. + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/efi.h> +#include <linux/genalloc.h> +#include <asm/page.h> +#include <asm/pal.h> +#include <asm/system.h> +#include <asm/pgtable.h> +#include <asm/atomic.h> +#include <asm/tlbflush.h> +#include <asm/sn/arch.h> + +#define DEBUG 0 + +#if DEBUG +#define dprintk printk +#else +#define dprintk(x...) do { } while (0) +#endif + +void __init efi_memmap_walk_uc (efi_freemem_callback_t callback); + +#define MAX_UNCACHED_GRANULES 5 +static int allocated_granules; + +struct gen_pool *uncached_pool[MAX_NUMNODES]; + + +static void uncached_ipi_visibility(void *data) +{ + int status; + + status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL); + if ((status != PAL_VISIBILITY_OK) && + (status != PAL_VISIBILITY_OK_REMOTE_NEEDED)) + printk(KERN_DEBUG "pal_prefetch_visibility() returns %i on " + "CPU %i\n", status, get_cpu()); +} + + +static void uncached_ipi_mc_drain(void *data) +{ + int status; + status = ia64_pal_mc_drain(); + if (status) + printk(KERN_WARNING "ia64_pal_mc_drain() failed with %i on " + "CPU %i\n", status, get_cpu()); +} + + +static unsigned long +uncached_get_new_chunk(struct gen_pool *poolp) +{ + struct page *page; + void *tmp; + int status, i; + unsigned long addr, node; + + if (allocated_granules >= MAX_UNCACHED_GRANULES) + return 0; + + node = poolp->private; + page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, + IA64_GRANULE_SHIFT-PAGE_SHIFT); + + dprintk(KERN_INFO "get_new_chunk page %p, addr %lx\n", + page, (unsigned long)(page-vmem_map) << PAGE_SHIFT); + + /* + * Do magic if no mem on local node! XXX + */ + if (!page) + return 0; + tmp = page_address(page); + + /* + * There's a small race here where it's possible for someone to + * access the page through /dev/mem halfway through the conversion + * to uncached - not sure it's really worth bothering about + */ + for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++) + SetPageUncached(&page[i]); + + flush_tlb_kernel_range(tmp, tmp + IA64_GRANULE_SIZE); + + status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL); + + dprintk(KERN_INFO "pal_prefetch_visibility() returns %i on cpu %i\n", + status, get_cpu()); + + if (!status) { + status = smp_call_function(uncached_ipi_visibility, NULL, 0, 1); + if (status) + printk(KERN_WARNING "smp_call_function failed for " + "uncached_ipi_visibility! (%i)\n", status); + } + + if (ia64_platform_is("sn2")) + sn_flush_all_caches((unsigned long)tmp, IA64_GRANULE_SIZE); + else + flush_icache_range((unsigned long)tmp, + (unsigned long)tmp+IA64_GRANULE_SIZE); + + ia64_pal_mc_drain(); + status = smp_call_function(uncached_ipi_mc_drain, NULL, 0, 1); + if (status) + printk(KERN_WARNING "smp_call_function failed for " + "uncached_ipi_mc_drain! (%i)\n", status); + + addr = (unsigned long)tmp - PAGE_OFFSET + __IA64_UNCACHED_OFFSET; + + allocated_granules++; + return addr; +} + + +/* + * uncached_alloc_page + * + * Allocate 1 uncached page. Allocates on the requested node. If no + * uncached pages are available on the requested node, roundrobin starting + * with higher nodes. + */ +unsigned long +uncached_alloc_page(int nid) +{ + unsigned long maddr; + + maddr = gen_pool_alloc(uncached_pool[nid], PAGE_SIZE); + + dprintk(KERN_DEBUG "uncached_alloc_page returns %lx on node %i\n", + maddr, nid); + + /* + * If no memory is availble on our local node, try the + * remaining nodes in the system. + */ + if (!maddr) { + int i; + + for (i = MAX_NUMNODES - 1; i >= 0; i--) { + if (i == nid || !node_online(i)) + continue; + maddr = gen_pool_alloc(uncached_pool[i], PAGE_SIZE); + dprintk(KERN_DEBUG "uncached_alloc_page alternate search " + "returns %lx on node %i\n", maddr, i); + if (maddr) { + break; + } + } + } + + return maddr; +} +EXPORT_SYMBOL(uncached_alloc_page); + + +/* + * uncached_free_page + * + * Free a single uncached page. + */ +void +uncached_free_page(unsigned long maddr) +{ + int node; + + node = nasid_to_cnodeid(NASID_GET(maddr)); + + dprintk(KERN_DEBUG "uncached_free_page(%lx) on node %i\n", maddr, node); + + if ((maddr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET) + panic("uncached_free_page invalid address %lx\n", maddr); + + gen_pool_free(uncached_pool[node], maddr, PAGE_SIZE); +} +EXPORT_SYMBOL(uncached_free_page); + + +/* + * uncached_build_memmap, + * + * Called at boot time to build a map of pages that can be used for + * memory special operations. + */ +static int __init +uncached_build_memmap(unsigned long start, unsigned long end, void *arg) +{ + long length; + unsigned long vstart, vend; + int node; + + length = end - start; + vstart = start + __IA64_UNCACHED_OFFSET; + vend = end + __IA64_UNCACHED_OFFSET; + + dprintk(KERN_ERR "uncached_build_memmap(%lx %lx)\n", start, end); + + memset((char *)vstart, 0, length); + + node = nasid_to_cnodeid(NASID_GET(start)); + + for (; vstart < vend ; vstart += PAGE_SIZE) { + dprintk(KERN_INFO "sticking %lx into the pool!\n", vstart); + gen_pool_free(uncached_pool[node], vstart, PAGE_SIZE); + } + + return 0; +} + + +static int __init uncached_init(void) { + int i; + + for (i = 0; i < MAX_NUMNODES; i++) { + if (!node_online(i)) + continue; + uncached_pool[i] = gen_pool_create(0, IA64_GRANULE_SHIFT, + &uncached_get_new_chunk, i); + } + + efi_memmap_walk_uc(uncached_build_memmap); + + return 0; +} + +__initcall(uncached_init); |