diff options
author | Linus Torvalds <torvalds@g5.osdl.org> | 2005-07-07 10:24:51 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-07-07 10:24:51 -0700 |
commit | 043d051615aa5da09a7e44f1edbb69798458e067 (patch) | |
tree | a0bfe7f6ed6efa4e0eb7f6b9891a0dc3f2fafe57 | |
parent | c101f3136cc98a003d0d16be6fab7d0d950581a6 (diff) | |
parent | 21517a57e838a1fbb7a54a8a77501024e77f83e0 (diff) | |
download | linux-043d051615aa5da09a7e44f1edbb69798458e067.tar.gz linux-043d051615aa5da09a7e44f1edbb69798458e067.tar.bz2 linux-043d051615aa5da09a7e44f1edbb69798458e067.zip |
Merge master.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6
31 files changed, 1255 insertions, 544 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index b2e2f6509eb0..e1fb68ddec26 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -17,6 +17,7 @@ obj-$(CONFIG_IA64_PALINFO) += palinfo.o obj-$(CONFIG_IOSAPIC) += iosapic.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_SMP) += smp.o smpboot.o domain.o +obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o obj-$(CONFIG_IA64_CYCLONE) += cyclone.o obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index cda06f88c66e..542256e98e60 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -640,9 +640,11 @@ acpi_boot_init (void) if (smp_boot_data.cpu_phys_id[cpu] != hard_smp_processor_id()) node_cpuid[i++].phys_id = smp_boot_data.cpu_phys_id[cpu]; } - build_cpu_to_node_map(); # endif #endif +#ifdef CONFIG_ACPI_NUMA + build_cpu_to_node_map(); +#endif /* Make boot-up look pretty */ printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, total_cpus); return 0; diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c new file mode 100644 index 000000000000..a68ce6678092 --- /dev/null +++ b/arch/ia64/kernel/numa.c @@ -0,0 +1,57 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ia64 kernel NUMA specific stuff + * + * Copyright (C) 2002 Erich Focht <efocht@ess.nec.de> + * Copyright (C) 2004 Silicon Graphics, Inc. + * Jesse Barnes <jbarnes@sgi.com> + */ +#include <linux/config.h> +#include <linux/topology.h> +#include <linux/module.h> +#include <asm/processor.h> +#include <asm/smp.h> + +u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned; +EXPORT_SYMBOL(cpu_to_node_map); + +cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; + +/** + * build_cpu_to_node_map - setup cpu to node and node to cpumask arrays + * + * Build cpu to node mapping and initialize the per node cpu masks using + * info from the node_cpuid array handed to us by ACPI. + */ +void __init build_cpu_to_node_map(void) +{ + int cpu, i, node; + + for(node=0; node < MAX_NUMNODES; node++) + cpus_clear(node_to_cpu_mask[node]); + + for(cpu = 0; cpu < NR_CPUS; ++cpu) { + node = -1; + for (i = 0; i < NR_CPUS; ++i) + if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) { + node = node_cpuid[i].nid; + break; + } + cpu_to_node_map[cpu] = (node >= 0) ? node : 0; + if (node >= 0) + cpu_set(cpu, node_to_cpu_mask[node]); + } +} diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index edd9f07860b2..b8a0a7d257a9 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -143,6 +143,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr) __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16); psr->mfh = 0; /* drop signal handler's fph contents... */ + preempt_disable(); if (psr->dfh) ia64_drop_fpu(current); else { @@ -150,6 +151,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr) __ia64_load_fpu(current->thread.fph); ia64_set_local_fpu_owner(current); } + preempt_enable(); } return err; } diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 623b0a546709..7d72c0d872b3 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -525,47 +525,6 @@ smp_build_cpu_map (void) } } -#ifdef CONFIG_NUMA - -/* on which node is each logical CPU (one cacheline even for 64 CPUs) */ -u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_to_node_map); -/* which logical CPUs are on which nodes */ -cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; - -/* - * Build cpu to node mapping and initialize the per node cpu masks. - */ -void __init -build_cpu_to_node_map (void) -{ - int cpu, i, node; - - for(node=0; node<MAX_NUMNODES; node++) - cpus_clear(node_to_cpu_mask[node]); - for(cpu = 0; cpu < NR_CPUS; ++cpu) { - /* - * All Itanium NUMA platforms I know use ACPI, so maybe we - * can drop this ifdef completely. [EF] - */ -#ifdef CONFIG_ACPI_NUMA - node = -1; - for (i = 0; i < NR_CPUS; ++i) - if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) { - node = node_cpuid[i].nid; - break; - } -#else -# error Fixme: Dunno how to build CPU-to-node map. -#endif - cpu_to_node_map[cpu] = (node >= 0) ? node : 0; - if (node >= 0) - cpu_set(cpu, node_to_cpu_mask[node]); - } -} - -#endif /* CONFIG_NUMA */ - /* * Cycle through the APs sending Wakeup IPIs to boot each. */ diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index e7e520d90f03..4440c8343fa4 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -90,14 +90,16 @@ die (const char *str, struct pt_regs *regs, long err) .lock_owner_depth = 0 }; static int die_counter; + int cpu = get_cpu(); - if (die.lock_owner != smp_processor_id()) { + if (die.lock_owner != cpu) { console_verbose(); spin_lock_irq(&die.lock); - die.lock_owner = smp_processor_id(); + die.lock_owner = cpu; die.lock_owner_depth = 0; bust_spinlocks(1); } + put_cpu(); if (++die.lock_owner_depth < 3) { printk("%s[%d]: %s %ld [%d]\n", diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index f3fd528ead3b..b5c90e548195 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -44,150 +44,7 @@ struct early_node_data { }; static struct early_node_data mem_data[MAX_NUMNODES] __initdata; - -/** - * reassign_cpu_only_nodes - called from find_memory to move CPU-only nodes to a memory node - * - * This function will move nodes with only CPUs (no memory) - * to a node with memory which is at the minimum numa_slit distance. - * Any reassigments will result in the compression of the nodes - * and renumbering the nid values where appropriate. - * The static declarations below are to avoid large stack size which - * makes the code not re-entrant. - */ -static void __init reassign_cpu_only_nodes(void) -{ - struct node_memblk_s *p; - int i, j, k, nnode, nid, cpu, cpunid, pxm; - u8 cslit, slit; - static DECLARE_BITMAP(nodes_with_mem, MAX_NUMNODES) __initdata; - static u8 numa_slit_fix[MAX_NUMNODES * MAX_NUMNODES] __initdata; - static int node_flip[MAX_NUMNODES] __initdata; - static int old_nid_map[NR_CPUS] __initdata; - - for (nnode = 0, p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++) - if (!test_bit(p->nid, (void *) nodes_with_mem)) { - set_bit(p->nid, (void *) nodes_with_mem); - nnode++; - } - - /* - * All nids with memory. - */ - if (nnode == num_online_nodes()) - return; - - /* - * Change nids and attempt to migrate CPU-only nodes - * to the best numa_slit (closest neighbor) possible. - * For reassigned CPU nodes a nid can't be arrived at - * until after this loop because the target nid's new - * identity might not have been established yet. So - * new nid values are fabricated above num_online_nodes() and - * mapped back later to their true value. - */ - /* MCD - This code is a bit complicated, but may be unnecessary now. - * We can now handle much more interesting node-numbering. - * The old requirement that 0 <= nid <= numnodes <= MAX_NUMNODES - * and that there be no holes in the numbering 0..numnodes - * has become simply 0 <= nid <= MAX_NUMNODES. - */ - nid = 0; - for_each_online_node(i) { - if (test_bit(i, (void *) nodes_with_mem)) { - /* - * Save original nid value for numa_slit - * fixup and node_cpuid reassignments. - */ - node_flip[nid] = i; - - if (i == nid) { - nid++; - continue; - } - - for (p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++) - if (p->nid == i) - p->nid = nid; - - cpunid = nid; - nid++; - } else - cpunid = MAX_NUMNODES; - - for (cpu = 0; cpu < NR_CPUS; cpu++) - if (node_cpuid[cpu].nid == i) { - /* - * For nodes not being reassigned just - * fix the cpu's nid and reverse pxm map - */ - if (cpunid < MAX_NUMNODES) { - pxm = nid_to_pxm_map[i]; - pxm_to_nid_map[pxm] = - node_cpuid[cpu].nid = cpunid; - continue; - } - - /* - * For nodes being reassigned, find best node by - * numa_slit information and then make a temporary - * nid value based on current nid and num_online_nodes(). - */ - slit = 0xff; - k = 2*num_online_nodes(); - for_each_online_node(j) { - if (i == j) - continue; - else if (test_bit(j, (void *) nodes_with_mem)) { - cslit = numa_slit[i * num_online_nodes() + j]; - if (cslit < slit) { - k = num_online_nodes() + j; - slit = cslit; - } - } - } - - /* save old nid map so we can update the pxm */ - old_nid_map[cpu] = node_cpuid[cpu].nid; - node_cpuid[cpu].nid = k; - } - } - - /* - * Fixup temporary nid values for CPU-only nodes. - */ - for (cpu = 0; cpu < NR_CPUS; cpu++) - if (node_cpuid[cpu].nid == (2*num_online_nodes())) { - pxm = nid_to_pxm_map[old_nid_map[cpu]]; - pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = nnode - 1; - } else { - for (i = 0; i < nnode; i++) { - if (node_flip[i] != (node_cpuid[cpu].nid - num_online_nodes())) - continue; - - pxm = nid_to_pxm_map[old_nid_map[cpu]]; - pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = i; - break; - } - } - - /* - * Fix numa_slit by compressing from larger - * nid array to reduced nid array. - */ - for (i = 0; i < nnode; i++) - for (j = 0; j < nnode; j++) - numa_slit_fix[i * nnode + j] = - numa_slit[node_flip[i] * num_online_nodes() + node_flip[j]]; - - memcpy(numa_slit, numa_slit_fix, sizeof (numa_slit)); - - nodes_clear(node_online_map); - for (i = 0; i < nnode; i++) - node_set_online(i); - - return; -} +static nodemask_t memory_less_mask __initdata; /* * To prevent cache aliasing effects, align per-node structures so that they @@ -233,44 +90,101 @@ static int __init build_node_maps(unsigned long start, unsigned long len, } /** - * early_nr_phys_cpus_node - return number of physical cpus on a given node + * early_nr_cpus_node - return number of cpus on a given node * @node: node to check * - * Count the number of physical cpus on @node. These are cpus that actually - * exist. We can't use nr_cpus_node() yet because + * Count the number of cpus on @node. We can't use nr_cpus_node() yet because * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been - * called yet. + * called yet. Note that node 0 will also count all non-existent cpus. */ -static int early_nr_phys_cpus_node(int node) +static int __init early_nr_cpus_node(int node) { int cpu, n = 0; for (cpu = 0; cpu < NR_CPUS; cpu++) if (node == node_cpuid[cpu].nid) - if ((cpu == 0) || node_cpuid[cpu].phys_id) - n++; + n++; return n; } +/** + * compute_pernodesize - compute size of pernode data + * @node: the node id. + */ +static unsigned long __init compute_pernodesize(int node) +{ + unsigned long pernodesize = 0, cpus; + + cpus = early_nr_cpus_node(node); + pernodesize += PERCPU_PAGE_SIZE * cpus; + pernodesize += node * L1_CACHE_BYTES; + pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); + pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); + pernodesize = PAGE_ALIGN(pernodesize); + return pernodesize; +} /** - * early_nr_cpus_node - return number of cpus on a given node - * @node: node to check + * per_cpu_node_setup - setup per-cpu areas on each node + * @cpu_data: per-cpu area on this node + * @node: node to setup * - * Count the number of cpus on @node. We can't use nr_cpus_node() yet because - * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been - * called yet. Note that node 0 will also count all non-existent cpus. + * Copy the static per-cpu data into the region we just set aside and then + * setup __per_cpu_offset for each CPU on this node. Return a pointer to + * the end of the area. */ -static int early_nr_cpus_node(int node) +static void *per_cpu_node_setup(void *cpu_data, int node) { - int cpu, n = 0; +#ifdef CONFIG_SMP + int cpu; - for (cpu = 0; cpu < NR_CPUS; cpu++) - if (node == node_cpuid[cpu].nid) - n++; + for (cpu = 0; cpu < NR_CPUS; cpu++) { + if (node == node_cpuid[cpu].nid) { + memcpy(__va(cpu_data), __phys_per_cpu_start, + __per_cpu_end - __per_cpu_start); + __per_cpu_offset[cpu] = (char*)__va(cpu_data) - + __per_cpu_start; + cpu_data += PERCPU_PAGE_SIZE; + } + } +#endif + return cpu_data; +} - return n; +/** + * fill_pernode - initialize pernode data. + * @node: the node id. + * @pernode: physical address of pernode data + * @pernodesize: size of the pernode data + */ +static void __init fill_pernode(int node, unsigned long pernode, + unsigned long pernodesize) +{ + void *cpu_data; + int cpus = early_nr_cpus_node(node); + struct bootmem_data *bdp = &mem_data[node].bootmem_data; + + mem_data[node].pernode_addr = pernode; + mem_data[node].pernode_size = pernodesize; + memset(__va(pernode), 0, pernodesize); + + cpu_data = (void *)pernode; + pernode += PERCPU_PAGE_SIZE * cpus; + pernode += node * L1_CACHE_BYTES; + + mem_data[node].pgdat = __va(pernode); + pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); + + mem_data[node].node_data = __va(pernode); + pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); + + mem_data[node].pgdat->bdata = bdp; + pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); + + cpu_data = per_cpu_node_setup(cpu_data, node); + + return; } /** @@ -304,9 +218,8 @@ static int early_nr_cpus_node(int node) static int __init find_pernode_space(unsigned long start, unsigned long len, int node) { - unsigned long epfn, cpu, cpus, phys_cpus; + unsigned long epfn; unsigned long pernodesize = 0, pernode, pages, mapsize; - void *cpu_data; struct bootmem_data *bdp = &mem_data[node].bootmem_data; epfn = (start + len) >> PAGE_SHIFT; @@ -329,49 +242,12 @@ static int __init find_pernode_space(unsigned long start, unsigned long len, * Calculate total size needed, incl. what's necessary * for good alignment and alias prevention. */ - cpus = early_nr_cpus_node(node); - phys_cpus = early_nr_phys_cpus_node(node); - pernodesize += PERCPU_PAGE_SIZE * cpus; - pernodesize += node * L1_CACHE_BYTES; - pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); - pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); - pernodesize = PAGE_ALIGN(pernodesize); + pernodesize = compute_pernodesize(node); pernode = NODEDATA_ALIGN(start, node); /* Is this range big enough for what we want to store here? */ - if (start + len > (pernode + pernodesize + mapsize)) { - mem_data[node].pernode_addr = pernode; - mem_data[node].pernode_size = pernodesize; - memset(__va(pernode), 0, pernodesize); - - cpu_data = (void *)pernode; - pernode += PERCPU_PAGE_SIZE * cpus; - pernode += node * L1_CACHE_BYTES; - - mem_data[node].pgdat = __va(pernode); - pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); - - mem_data[node].node_data = __va(pernode); - pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); - - mem_data[node].pgdat->bdata = bdp; - pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); - - /* - * Copy the static per-cpu data into the region we - * just set aside and then setup __per_cpu_offset - * for each CPU on this node. - */ - for (cpu = 0; cpu < NR_CPUS; cpu++) { - if (node == node_cpuid[cpu].nid) { - memcpy(__va(cpu_data), __phys_per_cpu_start, - __per_cpu_end - __per_cpu_start); - __per_cpu_offset[cpu] = (char*)__va(cpu_data) - - __per_cpu_start; - cpu_data += PERCPU_PAGE_SIZE; - } - } - } + if (start + len > (pernode + pernodesize + mapsize)) + fill_pernode(node, pernode, pernodesize); return 0; } @@ -411,6 +287,9 @@ static void __init reserve_pernode_space(void) for_each_online_node(node) { pg_data_t *pdp = mem_data[node].pgdat; + if (node_isset(node, memory_less_mask)) + continue; + bdp = pdp->bdata; /* First the bootmem_map itself */ @@ -436,8 +315,8 @@ static void __init reserve_pernode_space(void) */ static void __init initialize_pernode_data(void) { - int cpu, node; pg_data_t *pgdat_list[MAX_NUMNODES]; + int cpu, node; for_each_online_node(node) pgdat_list[node] = mem_data[node].pgdat; @@ -447,12 +326,99 @@ static void __init initialize_pernode_data(void) memcpy(mem_data[node].node_data->pg_data_ptrs, pgdat_list, sizeof(pgdat_list)); } - +#ifdef CONFIG_SMP /* Set the node_data pointer for each per-cpu struct */ for (cpu = 0; cpu < NR_CPUS; cpu++) { node = node_cpuid[cpu].nid; per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data; } +#else + { + struct cpuinfo_ia64 *cpu0_cpu_info; + cpu = 0; + node = node_cpuid[cpu].nid; + cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start + + ((char *)&per_cpu__cpu_info - __per_cpu_start)); + cpu0_cpu_info->node_data = mem_data[node].node_data; + } +#endif /* CONFIG_SMP */ +} + +/** + * memory_less_node_alloc - * attempt to allocate memory on the best NUMA slit + * node but fall back to any other node when __alloc_bootmem_node fails + * for best. + * @nid: node id + * @pernodesize: size of this node's pernode data + * @align: alignment to use for this node's pernode data + */ +static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize, + unsigned long align) +{ + void *ptr = NULL; + u8 best = 0xff; + int bestnode = -1, node; + + for_each_online_node(node) { + if (node_isset(node, memory_less_mask)) + continue; + else if (node_distance(nid, node) < best) { + best = node_distance(nid, node); + bestnode = node; + } + } + + ptr = __alloc_bootmem_node(mem_data[bestnode].pgdat, + pernodesize, align, __pa(MAX_DMA_ADDRESS)); + + if (!ptr) + panic("NO memory for memory less node\n"); + return ptr; +} + +/** + * pgdat_insert - insert the pgdat into global pgdat_list + * @pgdat: the pgdat for a node. + */ +static void __init pgdat_insert(pg_data_t *pgdat) +{ + pg_data_t *prev = NULL, *next; + + for_each_pgdat(next) + if (pgdat->node_id < next->node_id) + break; + else + prev = next; + + if (prev) { + prev->pgdat_next = pgdat; + pgdat->pgdat_next = next; + } else { + pgdat->pgdat_next = pgdat_list; + pgdat_list = pgdat; + } + + return; +} + +/** + * memory_less_nodes - allocate and initialize CPU only nodes pernode + * information. + */ +static void __init memory_less_nodes(void) +{ + unsigned long pernodesize; + void *pernode; + int node; + + for_each_node_mask(node, memory_less_mask) { + pernodesize = compute_pernodesize(node); + pernode = memory_less_node_alloc(node, pernodesize, + (node) ? (node * PERCPU_PAGE_SIZE) : (1024*1024)); + fill_pernode(node, __pa(pernode), pernodesize); + } + + return; } /** @@ -472,16 +438,19 @@ void __init find_memory(void) node_set_online(0); } + nodes_or(memory_less_mask, memory_less_mask, node_online_map); min_low_pfn = -1; max_low_pfn = 0; - if (num_online_nodes() > 1) - reassign_cpu_only_nodes(); - /* These actually end up getting called by call_pernode_memory() */ efi_memmap_walk(filter_rsvd_memory, build_node_maps); efi_memmap_walk(filter_rsvd_memory, find_pernode_space); + for_each_online_node(node) + if (mem_data[node].bootmem_data.node_low_pfn) { + node_clear(node, memory_less_mask); + mem_data[node].min_pfn = ~0UL; + } /* * Initialize the boot memory maps in reverse order since that's * what the bootmem allocator expects @@ -492,17 +461,14 @@ void __init find_memory(void) if (!node_online(node)) continue; + else if (node_isset(node, memory_less_mask)) + continue; bdp = &mem_data[node].bootmem_data; pernode = mem_data[node].pernode_addr; pernodesize = mem_data[node].pernode_size; map = pernode + pernodesize; - /* Sanity check... */ - if (!pernode) - panic("pernode space for node %d " - "could not be allocated!", node); - init_bootmem_node(mem_data[node].pgdat, map>>PAGE_SHIFT, bdp->node_boot_start>>PAGE_SHIFT, @@ -512,6 +478,7 @@ void __init find_memory(void) efi_memmap_walk(filter_rsvd_memory, free_node_bootmem); reserve_pernode_space(); + memory_less_nodes(); initialize_pernode_data(); max_pfn = max_low_pfn; @@ -519,6 +486,7 @@ void __init find_memory(void) find_initrd(); } +#ifdef CONFIG_SMP /** * per_cpu_init - setup per-cpu variables * @@ -529,15 +497,15 @@ void *per_cpu_init(void) { int cpu; - if (smp_processor_id() == 0) { - for (cpu = 0; cpu < NR_CPUS; cpu++) { - per_cpu(local_per_cpu_offset, cpu) = - __per_cpu_offset[cpu]; - } - } + if (smp_processor_id() != 0) + return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; + + for (cpu = 0; cpu < NR_CPUS; cpu++) + per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; } +#endif /* CONFIG_SMP */ /** * show_mem - give short summary of memory stats @@ -680,12 +648,13 @@ void __init paging_init(void) max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; - /* so min() will work in count_node_pages */ - for_each_online_node(node) - mem_data[node].min_pfn = ~0UL; - efi_memmap_walk(filter_rsvd_memory, count_node_pages); + vmalloc_end -= PAGE_ALIGN(max_low_pfn * sizeof(struct page)); + vmem_map = (struct page *) vmalloc_end; + efi_memmap_walk(create_mem_map_page_table, NULL); + printk("Virtual mem_map starts at 0x%p\n", vmem_map); + for_each_online_node(node) { memset(zones_size, 0, sizeof(zones_size)); memset(zholes_size, 0, sizeof(zholes_size)); @@ -719,15 +688,6 @@ void __init paging_init(void) mem_data[node].num_dma_physpages); } - if (node == 0) { - vmalloc_end -= - PAGE_ALIGN(max_low_pfn * sizeof(struct page)); - vmem_map = (struct page *) vmalloc_end; - - efi_memmap_walk(create_mem_map_page_table, NULL); - printk("Virtual mem_map starts at 0x%p\n", vmem_map); - } - pfn_offset = mem_data[node].min_pfn; NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset; @@ -735,5 +695,11 @@ void __init paging_init(void) pfn_offset, zholes_size); } + /* + * Make memory less nodes become a member of the known nodes. + */ + for_each_node_mask(node, memory_less_mask) + pgdat_insert(mem_data[node].pgdat); + zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); } diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 4eb2f52b87a1..65f9958db9f0 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -597,7 +597,8 @@ mem_init (void) kclist_add(&kcore_kernel, _stext, _end - _stext); for_each_pgdat(pgdat) - totalram_pages += free_all_bootmem_node(pgdat); + if (pgdat->bdata->node_bootmem_map) + totalram_pages += free_all_bootmem_node(pgdat); reserved_pages = 0; efi_memmap_walk(count_reserved_pages, &reserved_pages); diff --git a/arch/ia64/sn/include/xtalk/hubdev.h b/arch/ia64/sn/include/xtalk/hubdev.h index 868e7ecae84b..580a1c0403a7 100644 --- a/arch/ia64/sn/include/xtalk/hubdev.h +++ b/arch/ia64/sn/include/xtalk/hubdev.h @@ -8,6 +8,8 @@ #ifndef _ASM_IA64_SN_XTALK_HUBDEV_H #define _ASM_IA64_SN_XTALK_HUBDEV_H +#include "xtalk/xwidgetdev.h" + #define HUB_WIDGET_ID_MAX 0xf #define DEV_PER_WIDGET (2*2*8) #define IIO_ITTE_WIDGET_BITS 4 /* size of widget field */ diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 783eb4323847..a67f39e448cb 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -9,21 +9,28 @@ #include <linux/bootmem.h> #include <linux/nodemask.h> #include <asm/sn/types.h> -#include <asm/sn/sn_sal.h> #include <asm/sn/addrs.h> -#include <asm/sn/pcibus_provider_defs.h> -#include <asm/sn/pcidev.h> -#include "pci/pcibr_provider.h" -#include "xtalk/xwidgetdev.h" #include <asm/sn/geo.h> -#include "xtalk/hubdev.h" #include <asm/sn/io.h> +#include <asm/sn/pcibr_provider.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> #include <asm/sn/simulator.h> +#include <asm/sn/sn_sal.h> #include <asm/sn/tioca_provider.h> +#include "xtalk/hubdev.h" +#include "xtalk/xwidgetdev.h" -char master_baseio_wid; nasid_t master_nasid = INVALID_NASID; /* Partition Master */ +static struct list_head sn_sysdata_list; + +/* sysdata list struct */ +struct sysdata_el { + struct list_head entry; + void *sysdata; +}; + struct slab_info { struct hubdev_info hubdev; }; @@ -138,23 +145,6 @@ sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev, } /* - * sn_alloc_pci_sysdata() - This routine allocates a pci controller - * which is expected as the pci_dev and pci_bus sysdata by the Linux - * PCI infrastructure. - */ -static inline struct pci_controller *sn_alloc_pci_sysdata(void) -{ - struct pci_controller *pci_sysdata; - - pci_sysdata = kmalloc(sizeof(*pci_sysdata), GFP_KERNEL); - if (!pci_sysdata) - BUG(); - - memset(pci_sysdata, 0, sizeof(*pci_sysdata)); - return pci_sysdata; -} - -/* * sn_fixup_ionodes() - This routine initializes the HUB data strcuture for * each node in the system. */ @@ -221,22 +211,34 @@ static void sn_fixup_ionodes(void) } +void sn_pci_unfixup_slot(struct pci_dev *dev) +{ + struct pci_dev *host_pci_dev = SN_PCIDEV_INFO(dev)->host_pci_dev; + + sn_irq_unfixup(dev); + pci_dev_put(host_pci_dev); + pci_dev_put(dev); +} + /* * sn_pci_fixup_slot() - This routine sets up a slot's resources * consistent with the Linux PCI abstraction layer. Resources acquired * from our PCI provider include PIO maps to BAR space and interrupt * objects. */ -static void sn_pci_fixup_slot(struct pci_dev *dev) +void sn_pci_fixup_slot(struct pci_dev *dev) { int idx; int segment = 0; - uint64_t size; - struct sn_irq_info *sn_irq_info; - struct pci_dev *host_pci_dev; int status = 0; struct pcibus_bussoft *bs; + struct pci_bus *host_pci_bus; + struct pci_dev *host_pci_dev; + struct sn_irq_info *sn_irq_info; + unsigned long size; + unsigned int bus_no, devfn; + pci_dev_get(dev); /* for the sysdata pointer */ dev->sysdata = kmalloc(sizeof(struct pcidev_info), GFP_KERNEL); if (SN_PCIDEV_INFO(dev) <= 0) BUG(); /* Cannot afford to run out of memory */ @@ -253,7 +255,7 @@ static void sn_pci_fixup_slot(struct pci_dev *dev) (u64) __pa(SN_PCIDEV_INFO(dev)), (u64) __pa(sn_irq_info)); if (status) - BUG(); /* Cannot get platform pci device information information */ + BUG(); /* Cannot get platform pci device information */ /* Copy over PIO Mapped Addresses */ for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) { @@ -275,15 +277,21 @@ static void sn_pci_fixup_slot(struct pci_dev *dev) dev->resource[idx].parent = &iomem_resource; } - /* set up host bus linkages */ - bs = SN_PCIBUS_BUSSOFT(dev->bus); - host_pci_dev = - pci_find_slot(SN_PCIDEV_INFO(dev)->pdi_slot_host_handle >> 32, - SN_PCIDEV_INFO(dev)-> - pdi_slot_host_handle & 0xffffffff); + /* + * Using the PROMs values for the PCI host bus, get the Linux + * PCI host_pci_dev struct and set up host bus linkages + */ + + bus_no = SN_PCIDEV_INFO(dev)->pdi_slot_host_handle >> 32; + devfn = SN_PCIDEV_INFO(dev)->pdi_slot_host_handle & 0xffffffff; + host_pci_bus = pci_find_bus(pci_domain_nr(dev->bus), bus_no); + host_pci_dev = pci_get_slot(host_pci_bus, devfn); + + SN_PCIDEV_INFO(dev)->host_pci_dev = host_pci_dev; SN_PCIDEV_INFO(dev)->pdi_host_pcidev_info = - SN_PCIDEV_INFO(host_pci_dev); + SN_PCIDEV_INFO(host_pci_dev); SN_PCIDEV_INFO(dev)->pdi_linux_pcidev = dev; + bs = SN_PCIBUS_BUSSOFT(dev->bus); SN_PCIDEV_INFO(dev)->pdi_pcibus_info = bs; if (bs && bs->bs_asic_type < PCIIO_ASIC_MAX_TYPES) { @@ -297,6 +305,9 @@ static void sn_pci_fixup_slot(struct pci_dev *dev) SN_PCIDEV_INFO(dev)->pdi_sn_irq_info = sn_irq_info; dev->irq = SN_PCIDEV_INFO(dev)->pdi_sn_irq_info->irq_irq; sn_irq_fixup(dev, sn_irq_info); + } else { + SN_PCIDEV_INFO(dev)->pdi_sn_irq_info = NULL; + kfree(sn_irq_info); } } @@ -304,55 +315,57 @@ static void sn_pci_fixup_slot(struct pci_dev *dev) * sn_pci_controller_fixup() - This routine sets up a bus's resources * consistent with the Linux PCI abstraction layer. */ -static void sn_pci_controller_fixup(int segment, int busnum) +void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) { int status = 0; int nasid, cnode; - struct pci_bus *bus; struct pci_controller *controller; struct pcibus_bussoft *prom_bussoft_ptr; struct hubdev_info *hubdev_info; void *provider_soft; struct sn_pcibus_provider *provider; - status = - sal_get_pcibus_info((u64) segment, (u64) busnum, - (u64) ia64_tpa(&prom_bussoft_ptr)); - if (status > 0) { - return; /* bus # does not exist */ - } - + status = sal_get_pcibus_info((u64) segment, (u64) busnum, + (u64) ia64_tpa(&prom_bussoft_ptr)); + if (status > 0) + return; /*bus # does not exist */ prom_bussoft_ptr = __va(prom_bussoft_ptr); - controller = sn_alloc_pci_sysdata(); - /* controller non-zero is BUG'd in sn_alloc_pci_sysdata */ - bus = pci_scan_bus(busnum, &pci_root_ops, controller); + controller = kcalloc(1,sizeof(struct pci_controller), GFP_KERNEL); + if (!controller) + BUG(); + if (bus == NULL) { - return; /* error, or bus already scanned */ + bus = pci_scan_bus(busnum, &pci_root_ops, controller); + if (bus == NULL) + return; /* error, or bus already scanned */ + bus->sysdata = NULL; } + if (bus->sysdata) + goto error_return; /* sysdata already alloc'd */ + /* * Per-provider fixup. Copies the contents from prom to local * area and links SN_PCIBUS_BUSSOFT(). */ - if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES) { + if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES) return; /* unsupported asic type */ - } + + if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB) + goto error_return; /* no further fixup necessary */ provider = sn_pci_provider[prom_bussoft_ptr->bs_asic_type]; - if (provider == NULL) { + if (provider == NULL) return; /* no provider registerd for this asic */ - } provider_soft = NULL; - if (provider->bus_fixup) { + if (provider->bus_fixup) provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr); - } - if (provider_soft == NULL) { + if (provider_soft == NULL) return; /* fixup failed or not applicable */ - } /* * Generic bus fixup goes here. Don't reference prom_bussoft_ptr @@ -361,12 +374,47 @@ static void sn_pci_controller_fixup(int segment, int busnum) bus->sysdata = controller; PCI_CONTROLLER(bus)->platform_data = provider_soft; - nasid = NASID_GET(SN_PCIBUS_BUSSOFT(bus)->bs_base); cnode = nasid_to_cnodeid(nasid); hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); SN_PCIBUS_BUSSOFT(bus)->bs_xwidget_info = &(hubdev_info->hdi_xwidget_info[SN_PCIBUS_BUSSOFT(bus)->bs_xid]); + + return; + +error_return: + + kfree(controller); + return; +} + +void sn_bus_store_sysdata(struct pci_dev *dev) +{ + struct sysdata_el *element; + + element = kcalloc(1, sizeof(struct sysdata_el), GFP_KERNEL); + if (!element) { + dev_dbg(dev, "%s: out of memory!\n", __FUNCTION__); + return; + } + element->sysdata = dev->sysdata; + list_add(&element->entry, &sn_sysdata_list); +} + +void sn_bus_free_sysdata(void) +{ + struct sysdata_el *element; + struct list_head *list; + +sn_sysdata_free_start: + list_for_each(list, &sn_sysdata_list) { + element = list_entry(list, struct sysdata_el, entry); + list_del(&element->entry); + kfree(element->sysdata); + kfree(element); + goto sn_sysdata_free_start; + } + return; } /* @@ -403,20 +451,17 @@ static int __init sn_pci_init(void) */ ia64_max_iommu_merge_mask = ~PAGE_MASK; sn_fixup_ionodes(); - sn_irq = kmalloc(sizeof(struct sn_irq_info *) * NR_IRQS, GFP_KERNEL); - if (sn_irq <= 0) - BUG(); /* Canno afford to run out of memory. */ - memset(sn_irq, 0, sizeof(struct sn_irq_info *) * NR_IRQS); - + sn_irq_lh_init(); + INIT_LIST_HEAD(&sn_sysdata_list); sn_init_cpei_timer(); #ifdef CONFIG_PROC_FS register_sn_procfs(); #endif - for (i = 0; i < PCI_BUSES_TO_SCAN; i++) { - sn_pci_controller_fixup(0, i); - } + /* busses are not known yet ... */ + for (i = 0; i < PCI_BUSES_TO_SCAN; i++) + sn_pci_controller_fixup(0, i, NULL); /* * Generic Linux PCI Layer has created the pci_bus and pci_dev @@ -425,9 +470,8 @@ static int __init sn_pci_init(void) */ while ((pci_dev = - pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pci_dev)) != NULL) { + pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pci_dev)) != NULL) sn_pci_fixup_slot(pci_dev); - } sn_ioif_inited = 1; /* sn I/O infrastructure now initialized */ @@ -469,3 +513,8 @@ cnodeid_get_geoid(cnodeid_t cnode) } subsys_initcall(sn_pci_init); +EXPORT_SYMBOL(sn_pci_fixup_slot); +EXPORT_SYMBOL(sn_pci_unfixup_slot); +EXPORT_SYMBOL(sn_pci_controller_fixup); +EXPORT_SYMBOL(sn_bus_store_sysdata); +EXPORT_SYMBOL(sn_bus_free_sysdata); diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 0f4e8138658f..84d276a14ecb 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -9,13 +9,13 @@ */ #include <linux/irq.h> -#include <asm/sn/intr.h> +#include <linux/spinlock.h> #include <asm/sn/addrs.h> #include <asm/sn/arch.h> -#include "xtalk/xwidgetdev.h" +#include <asm/sn/intr.h> +#include <asm/sn/pcibr_provider.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/pcidev.h> -#include "pci/pcibr_provider.h" #include <asm/sn/shub_mmr.h> #include <asm/sn/sn_sal.h> @@ -25,7 +25,8 @@ static void unregister_intr_pda(struct sn_irq_info *sn_irq_info); extern int sn_force_interrupt_flag; extern int sn_ioif_inited; -struct sn_irq_info **sn_irq; +static struct list_head **sn_irq_lh; +static spinlock_t sn_irq_info_lock = SPIN_LOCK_UNLOCKED; /* non-IRQ lock */ static inline uint64_t sn_intr_alloc(nasid_t local_nasid, int local_widget, u64 sn_irq_info, @@ -101,7 +102,7 @@ static void sn_end_irq(unsigned int irq) nasid = get_nasid(); event_occurred = HUB_L((uint64_t *) GLOBAL_MMR_ADDR (nasid, SH_EVENT_OCCURRED)); - /* If the UART bit is set here, we may have received an + /* If the UART bit is set here, we may have received an * interrupt from the UART that the driver missed. To * make sure, we IPI ourselves to force us to look again. */ @@ -115,82 +116,84 @@ static void sn_end_irq(unsigned int irq) force_interrupt(irq); } +static void sn_irq_info_free(struct rcu_head *head); + static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) { - struct sn_irq_info *sn_irq_info = sn_irq[irq]; - struct sn_irq_info *tmp_sn_irq_info; + struct sn_irq_info *sn_irq_info, *sn_irq_info_safe; int cpuid, cpuphys; - nasid_t t_nasid; /* nasid to target */ - int t_slice; /* slice to target */ - - /* allocate a temp sn_irq_info struct to get new target info */ - tmp_sn_irq_info = kmalloc(sizeof(*tmp_sn_irq_info), GFP_KERNEL); - if (!tmp_sn_irq_info) - return; cpuid = first_cpu(mask); cpuphys = cpu_physical_id(cpuid); - t_nasid = cpuid_to_nasid(cpuid); - t_slice = cpuid_to_slice(cpuid); - while (sn_irq_info) { - int status; - int local_widget; - uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge; - nasid_t local_nasid = NASID_GET(bridge); + list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe, + sn_irq_lh[irq], list) { + uint64_t bridge; + int local_widget, status; + nasid_t local_nasid; + struct sn_irq_info *new_irq_info; + + new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC); + if (new_irq_info == NULL) + break; + memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info)); + + bridge = (uint64_t) new_irq_info->irq_bridge; + if (!bridge) { + kfree(new_irq_info); + break; /* irq is not a device interrupt */ + } - if (!bridge) - break; /* irq is not a device interrupt */ + local_nasid = NASID_GET(bridge); if (local_nasid & 1) local_widget = TIO_SWIN_WIDGETNUM(bridge); else local_widget = SWIN_WIDGETNUM(bridge); - /* Free the old PROM sn_irq_info structure */ - sn_intr_free(local_nasid, local_widget, sn_irq_info); + /* Free the old PROM new_irq_info structure */ + sn_intr_free(local_nasid, local_widget, new_irq_info); + /* Update kernels new_irq_info with new target info */ + unregister_intr_pda(new_irq_info); - /* allocate a new PROM sn_irq_info struct */ + /* allocate a new PROM new_irq_info struct */ status = sn_intr_alloc(local_nasid, local_widget, - __pa(tmp_sn_irq_info), irq, t_nasid, - t_slice); - - if (status == 0) { - /* Update kernels sn_irq_info with new target info */ - unregister_intr_pda(sn_irq_info); - sn_irq_info->irq_cpuid = cpuid; - sn_irq_info->irq_nasid = t_nasid; - sn_irq_info->irq_slice = t_slice; - sn_irq_info->irq_xtalkaddr = - tmp_sn_irq_info->irq_xtalkaddr; - sn_irq_info->irq_cookie = tmp_sn_irq_info->irq_cookie; - register_intr_pda(sn_irq_info); - - if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type)) { - pcibr_change_devices_irq(sn_irq_info); - } + __pa(new_irq_info), irq, + cpuid_to_nasid(cpuid), + cpuid_to_slice(cpuid)); + + /* SAL call failed */ + if (status) { + kfree(new_irq_info); + break; + } + + new_irq_info->irq_cpuid = cpuid; + register_intr_pda(new_irq_info); + + if (IS_PCI_BRIDGE_ASIC(new_irq_info->irq_bridge_type)) + pcibr_change_devices_irq(new_irq_info); - sn_irq_info = sn_irq_info->irq_next; + spin_lock(&sn_irq_info_lock); + list_replace_rcu(&sn_irq_info->list, &new_irq_info->list); + spin_unlock(&sn_irq_info_lock); + call_rcu(&sn_irq_info->rcu, sn_irq_info_free); #ifdef CONFIG_SMP - set_irq_affinity_info((irq & 0xff), cpuphys, 0); + set_irq_affinity_info((irq & 0xff), cpuphys, 0); #endif - } else { - break; /* snp_affinity failed the intr_alloc */ - } } - kfree(tmp_sn_irq_info); } struct hw_interrupt_type irq_type_sn = { - "SN hub", - sn_startup_irq, - sn_shutdown_irq, - sn_enable_irq, - sn_disable_irq, - sn_ack_irq, - sn_end_irq, - sn_set_affinity_irq + .typename = "SN hub", + .startup = sn_startup_irq, + .shutdown = sn_shutdown_irq, + .enable = sn_enable_irq, + .disable = sn_disable_irq, + .ack = sn_ack_irq, + .end = sn_end_irq, + .set_affinity = sn_set_affinity_irq }; unsigned int sn_local_vector_to_irq(u8 vector) @@ -231,19 +234,18 @@ static void unregister_intr_pda(struct sn_irq_info *sn_irq_info) struct sn_irq_info *tmp_irq_info; int i, foundmatch; + rcu_read_lock(); if (pdacpu(cpu)->sn_last_irq == irq) { foundmatch = 0; - for (i = pdacpu(cpu)->sn_last_irq - 1; i; i--) { - tmp_irq_info = sn_irq[i]; - while (tmp_irq_info) { + for (i = pdacpu(cpu)->sn_last_irq - 1; + i && !foundmatch; i--) { + list_for_each_entry_rcu(tmp_irq_info, + sn_irq_lh[i], + list) { if (tmp_irq_info->irq_cpuid == cpu) { - foundmatch++; + foundmatch = 1; break; } - tmp_irq_info = tmp_irq_info->irq_next; - } - if (foundmatch) { - break; } } pdacpu(cpu)->sn_last_irq = i; @@ -251,60 +253,27 @@ static void unregister_intr_pda(struct sn_irq_info *sn_irq_info) if (pdacpu(cpu)->sn_first_irq == irq) { foundmatch = 0; - for (i = pdacpu(cpu)->sn_first_irq + 1; i < NR_IRQS; i++) { - tmp_irq_info = sn_irq[i]; - while (tmp_irq_info) { + for (i = pdacpu(cpu)->sn_first_irq + 1; + i < NR_IRQS && !foundmatch; i++) { + list_for_each_entry_rcu(tmp_irq_info, + sn_irq_lh[i], + list) { if (tmp_irq_info->irq_cpuid == cpu) { - foundmatch++; + foundmatch = 1; break; } - tmp_irq_info = tmp_irq_info->irq_next; - } - if (foundmatch) { - break; } } pdacpu(cpu)->sn_first_irq = ((i == NR_IRQS) ? 0 : i); } + rcu_read_unlock(); } -struct sn_irq_info *sn_irq_alloc(nasid_t local_nasid, int local_widget, int irq, - nasid_t nasid, int slice) +static void sn_irq_info_free(struct rcu_head *head) { struct sn_irq_info *sn_irq_info; - int status; - - sn_irq_info = kmalloc(sizeof(*sn_irq_info), GFP_KERNEL); - if (sn_irq_info == NULL) - return NULL; - - memset(sn_irq_info, 0x0, sizeof(*sn_irq_info)); - - status = - sn_intr_alloc(local_nasid, local_widget, __pa(sn_irq_info), irq, - nasid, slice); - - if (status) { - kfree(sn_irq_info); - return NULL; - } else { - return sn_irq_info; - } -} - -void sn_irq_free(struct sn_irq_info *sn_irq_info) -{ - uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge; - nasid_t local_nasid = NASID_GET(bridge); - int local_widget; - - if (local_nasid & 1) /* tio check */ - local_widget = TIO_SWIN_WIDGETNUM(bridge); - else - local_widget = SWIN_WIDGETNUM(bridge); - - sn_intr_free(local_nasid, local_widget, sn_irq_info); + sn_irq_info = container_of(head, struct sn_irq_info, rcu); kfree(sn_irq_info); } @@ -314,30 +283,54 @@ void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info) int slice = sn_irq_info->irq_slice; int cpu = nasid_slice_to_cpuid(nasid, slice); + pci_dev_get(pci_dev); sn_irq_info->irq_cpuid = cpu; sn_irq_info->irq_pciioinfo = SN_PCIDEV_INFO(pci_dev); /* link it into the sn_irq[irq] list */ - sn_irq_info->irq_next = sn_irq[sn_irq_info->irq_irq]; - sn_irq[sn_irq_info->irq_irq] = sn_irq_info; + spin_lock(&sn_irq_info_lock); + list_add_rcu(&sn_irq_info->list, sn_irq_lh[sn_irq_info->irq_irq]); + spin_unlock(&sn_irq_info_lock); (void)register_intr_pda(sn_irq_info); } +void sn_irq_unfixup(struct pci_dev *pci_dev) +{ + struct sn_irq_info *sn_irq_info; + + /* Only cleanup IRQ stuff if this device has a host bus context */ + if (!SN_PCIDEV_BUSSOFT(pci_dev)) + return; + + sn_irq_info = SN_PCIDEV_INFO(pci_dev)->pdi_sn_irq_info; + if (!sn_irq_info || !sn_irq_info->irq_irq) { + kfree(sn_irq_info); + return; + } + + unregister_intr_pda(sn_irq_info); + spin_lock(&sn_irq_info_lock); + list_del_rcu(&sn_irq_info->list); + spin_unlock(&sn_irq_info_lock); + call_rcu(&sn_irq_info->rcu, sn_irq_info_free); + pci_dev_put(pci_dev); +} + static void force_interrupt(int irq) { struct sn_irq_info *sn_irq_info; if (!sn_ioif_inited) return; - sn_irq_info = sn_irq[irq]; - while (sn_irq_info) { + + rcu_read_lock(); + list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[irq], list) { if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) && - (sn_irq_info->irq_bridge != NULL)) { + (sn_irq_info->irq_bridge != NULL)) pcibr_force_interrupt(sn_irq_info); - } - sn_irq_info = sn_irq_info->irq_next; } + rcu_read_unlock(); } /* @@ -402,19 +395,41 @@ static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info) void sn_lb_int_war_check(void) { + struct sn_irq_info *sn_irq_info; int i; if (!sn_ioif_inited || pda->sn_first_irq == 0) return; + + rcu_read_lock(); for (i = pda->sn_first_irq; i <= pda->sn_last_irq; i++) { - struct sn_irq_info *sn_irq_info = sn_irq[i]; - while (sn_irq_info) { - /* Only call for PCI bridges that are fully initialized. */ + list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[i], list) { + /* + * Only call for PCI bridges that are fully + * initialized. + */ if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) && - (sn_irq_info->irq_bridge != NULL)) { + (sn_irq_info->irq_bridge != NULL)) sn_check_intr(i, sn_irq_info); - } - sn_irq_info = sn_irq_info->irq_next; } } + rcu_read_unlock(); +} + +void sn_irq_lh_init(void) +{ + int i; + + sn_irq_lh = kmalloc(sizeof(struct list_head *) * NR_IRQS, GFP_KERNEL); + if (!sn_irq_lh) + panic("SN PCI INIT: Failed to allocate memory for PCI init\n"); + + for (i = 0; i < NR_IRQS; i++) { + sn_irq_lh[i] = kmalloc(sizeof(struct list_head), GFP_KERNEL); + if (!sn_irq_lh[i]) + panic("SN PCI INIT: Failed IRQ memory allocation\n"); + + INIT_LIST_HEAD(sn_irq_lh[i]); + } + } diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 22e10d282c7f..7c7fe441d623 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -270,7 +270,7 @@ void __init sn_setup(char **cmdline_p) { long status, ticks_per_sec, drift; int pxm; - int major = sn_sal_rev_major(), minor = sn_sal_rev_minor(); + u32 version = sn_sal_rev(); extern void sn_cpu_init(void); ia64_sn_plat_set_error_handling_features(); @@ -308,22 +308,21 @@ void __init sn_setup(char **cmdline_p) * support here so we don't have to listen to failed keyboard probe * messages. */ - if ((major < 2 || (major == 2 && minor <= 9)) && - acpi_kbd_controller_present) { + if (version <= 0x0209 && acpi_kbd_controller_present) { printk(KERN_INFO "Disabling legacy keyboard support as prom " "is too old and doesn't provide FADT\n"); acpi_kbd_controller_present = 0; } - printk("SGI SAL version %x.%02x\n", major, minor); + printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF); /* * Confirm the SAL we're running on is recent enough... */ - if ((major < SN_SAL_MIN_MAJOR) || (major == SN_SAL_MIN_MAJOR && - minor < SN_SAL_MIN_MINOR)) { + if (version < SN_SAL_MIN_VERSION) { printk(KERN_ERR "This kernel needs SGI SAL version >= " - "%x.%02x\n", SN_SAL_MIN_MAJOR, SN_SAL_MIN_MINOR); + "%x.%02x\n", SN_SAL_MIN_VERSION >> 8, + SN_SAL_MIN_VERSION & 0x00FF); panic("PROM version too old\n"); } diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c index 8716f4d5314b..c1cbcd1a1398 100644 --- a/arch/ia64/sn/kernel/tiocx.c +++ b/arch/ia64/sn/kernel/tiocx.c @@ -14,6 +14,7 @@ #include <linux/proc_fs.h> #include <linux/device.h> #include <linux/delay.h> +#include <asm/system.h> #include <asm/uaccess.h> #include <asm/sn/sn_sal.h> #include <asm/sn/addrs.h> @@ -481,6 +482,9 @@ static int __init tiocx_init(void) cnodeid_t cnodeid; int found_tiocx_device = 0; + if (!ia64_platform_is("sn2")) + return -ENODEV; + bus_register(&tiocx_bus_type); for (cnodeid = 0; cnodeid < MAX_COMPACT_NODES; cnodeid++) { diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index 5da9bdbde7cb..a2f7a88aefbb 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -11,9 +11,10 @@ #include <linux/module.h> #include <asm/dma.h> -#include <asm/sn/sn_sal.h> +#include <asm/sn/pcibr_provider.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/pcidev.h> +#include <asm/sn/sn_sal.h> #define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset) #define SG_ENT_PHYS_ADDRESS(SG) virt_to_phys(SG_ENT_VIRT_ADDRESS(SG)) diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c index 0e47bce85f2d..d1647b863e61 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_ate.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c @@ -8,9 +8,9 @@ #include <linux/types.h> #include <asm/sn/sn_sal.h> +#include <asm/sn/pcibr_provider.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/pcidev.h> -#include "pci/pcibr_provider.h" int pcibr_invalidate_ate = 0; /* by default don't invalidate ATE on free */ diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c index 64af2b2c1787..b058dc2a0b9d 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -8,18 +8,17 @@ #include <linux/types.h> #include <linux/pci.h> -#include <asm/sn/sn_sal.h> +#include <asm/sn/addrs.h> #include <asm/sn/geo.h> -#include "xtalk/xwidgetdev.h" -#include "xtalk/hubdev.h" +#include <asm/sn/pcibr_provider.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/pcidev.h> -#include "pci/tiocp.h" -#include "pci/pic.h" -#include "pci/pcibr_provider.h" -#include "pci/tiocp.h" +#include <asm/sn/pic.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/tiocp.h> #include "tio.h" -#include <asm/sn/addrs.h> +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" extern int sn_ioif_inited; diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index 3893999d23d8..9813da56d311 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -6,18 +6,51 @@ * Copyright (C) 2001-2004 Silicon Graphics, Inc. All rights reserved. */ -#include <linux/types.h> #include <linux/interrupt.h> +#include <linux/types.h> #include <linux/pci.h> -#include <asm/sn/sn_sal.h> -#include "xtalk/xwidgetdev.h" +#include <asm/sn/addrs.h> #include <asm/sn/geo.h> -#include "xtalk/hubdev.h" +#include <asm/sn/pcibr_provider.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/pcidev.h> -#include "pci/pcibr_provider.h" -#include <asm/sn/addrs.h> +#include <asm/sn/sn_sal.h> +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" + +int +sal_pcibr_slot_enable(struct pcibus_info *soft, int device, void *resp) +{ + struct ia64_sal_retval ret_stuff; + uint64_t busnum; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + busnum = soft->pbi_buscommon.bs_persist_busnum; + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_SLOT_ENABLE, (u64) busnum, + (u64) device, (u64) resp, 0, 0, 0, 0); + + return (int)ret_stuff.v0; +} + +int +sal_pcibr_slot_disable(struct pcibus_info *soft, int device, int action, + void *resp) +{ + struct ia64_sal_retval ret_stuff; + uint64_t busnum; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + busnum = soft->pbi_buscommon.bs_persist_busnum; + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_SLOT_DISABLE, + (u64) busnum, (u64) device, (u64) action, + (u64) resp, 0, 0, 0); + + return (int)ret_stuff.v0; +} static int sal_pcibr_error_interrupt(struct pcibus_info *soft) { @@ -188,3 +221,6 @@ pcibr_init_provider(void) return 0; } + +EXPORT_SYMBOL_GPL(sal_pcibr_slot_enable); +EXPORT_SYMBOL_GPL(sal_pcibr_slot_disable); diff --git a/arch/ia64/sn/pci/pcibr/pcibr_reg.c b/arch/ia64/sn/pci/pcibr/pcibr_reg.c index 865c11c3b50a..21426d02fbe6 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_reg.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_reg.c @@ -6,13 +6,13 @@ * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved. */ -#include <linux/types.h> #include <linux/interrupt.h> +#include <linux/types.h> +#include <asm/sn/pcibr_provider.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/pcidev.h> -#include "pci/tiocp.h" -#include "pci/pic.h" -#include "pci/pcibr_provider.h" +#include <asm/sn/pic.h> +#include <asm/sn/tiocp.h> union br_ptr { struct tiocp tio; diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c index 05aa8c2fe9bb..51cc4e63092c 100644 --- a/arch/ia64/sn/pci/tioca_provider.c +++ b/arch/ia64/sn/pci/tioca_provider.c @@ -589,8 +589,7 @@ tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft) /* sanity check prom rev */ - if (sn_sal_rev_major() < 4 || - (sn_sal_rev_major() == 4 && sn_sal_rev_minor() < 6)) { + if (sn_sal_rev() < 0x0406) { printk (KERN_ERR "%s: SGI prom rev 4.06 or greater required " "for tioca support\n", __FUNCTION__); diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig index 1a4d4ca2a4dc..9c4a39ee89b5 100644 --- a/drivers/pci/hotplug/Kconfig +++ b/drivers/pci/hotplug/Kconfig @@ -187,9 +187,10 @@ config HOTPLUG_PCI_RPA_DLPAR config HOTPLUG_PCI_SGI tristate "SGI PCI Hotplug Support" - depends on HOTPLUG_PCI && IA64_SGI_SN2 + depends on HOTPLUG_PCI && (IA64_SGI_SN2 || IA64_GENERIC) help - Say Y here if you have an SGI IA64 Altix system. + Say Y here if you want to use the SGI Altix Hotplug + Driver for PCI devices. When in doubt, say N. diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile index 3e632ff8c717..31a307004b94 100644 --- a/drivers/pci/hotplug/Makefile +++ b/drivers/pci/hotplug/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_HOTPLUG_PCI_PCIE) += pciehp.o obj-$(CONFIG_HOTPLUG_PCI_SHPC) += shpchp.o obj-$(CONFIG_HOTPLUG_PCI_RPA) += rpaphp.o obj-$(CONFIG_HOTPLUG_PCI_RPA_DLPAR) += rpadlpar_io.o +obj-$(CONFIG_HOTPLUG_PCI_SGI) += sgi_hotplug.o pci_hotplug-objs := pci_hotplug_core.o diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c new file mode 100644 index 000000000000..323041fd41dc --- /dev/null +++ b/drivers/pci/hotplug/sgi_hotplug.c @@ -0,0 +1,611 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2005 Silicon Graphics, Inc. All rights reserved. + * + * This work was based on the 2.4/2.6 kernel development by Dick Reigner. + * Work to add BIOS PROM support was completed by Mike Habeck. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/proc_fs.h> +#include <linux/types.h> + +#include <asm/sn/addrs.h> +#include <asm/sn/l1.h> +#include <asm/sn/module.h> +#include <asm/sn/pcibr_provider.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/types.h> + +#include "../pci.h" +#include "pci_hotplug.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("SGI (prarit@sgi.com, dickie@sgi.com, habeck@sgi.com)"); +MODULE_DESCRIPTION("SGI Altix Hot Plug PCI Controller Driver"); + +#define PCIIO_ASIC_TYPE_TIOCA 4 +#define PCI_SLOT_ALREADY_UP 2 /* slot already up */ +#define PCI_SLOT_ALREADY_DOWN 3 /* slot already down */ +#define PCI_L1_ERR 7 /* L1 console command error */ +#define PCI_EMPTY_33MHZ 15 /* empty 33 MHz bus */ +#define PCI_L1_QSIZE 128 /* our L1 message buffer size */ +#define SN_MAX_HP_SLOTS 32 /* max number of hotplug slots */ +#define SGI_HOTPLUG_PROM_REV 0x0420 /* Min. required PROM version */ + +/* internal list head */ +static struct list_head sn_hp_list; + +/* hotplug_slot struct's private pointer */ +struct slot { + int device_num; + struct pci_bus *pci_bus; + /* this struct for glue internal only */ + struct hotplug_slot *hotplug_slot; + struct list_head hp_list; +}; + +struct pcibr_slot_enable_resp { + int resp_sub_errno; + char resp_l1_msg[PCI_L1_QSIZE + 1]; +}; + +struct pcibr_slot_disable_resp { + int resp_sub_errno; + char resp_l1_msg[PCI_L1_QSIZE + 1]; +}; + +enum sn_pci_req_e { + PCI_REQ_SLOT_ELIGIBLE, + PCI_REQ_SLOT_DISABLE +}; + +static int enable_slot(struct hotplug_slot *slot); +static int disable_slot(struct hotplug_slot *slot); +static int get_power_status(struct hotplug_slot *slot, u8 *value); + +static struct hotplug_slot_ops sn_hotplug_slot_ops = { + .owner = THIS_MODULE, + .enable_slot = enable_slot, + .disable_slot = disable_slot, + .get_power_status = get_power_status, +}; + +static DECLARE_MUTEX(sn_hotplug_sem); + +static int sn_pci_slot_valid(struct pci_bus *pci_bus, int device) +{ + struct pcibus_info *pcibus_info; + int bricktype; + int bus_num; + + pcibus_info = SN_PCIBUS_BUSSOFT_INFO(pci_bus); + + /* Check to see if this is a valid slot on 'pci_bus' */ + if (!(pcibus_info->pbi_valid_devices & (1 << device))) + return -EPERM; + + bricktype = MODULE_GET_BTYPE(pcibus_info->pbi_moduleid); + bus_num = pcibus_info->pbi_buscommon.bs_persist_busnum & 0xf; + + /* Do not allow hotplug operations on base I/O cards */ + if ((bricktype == L1_BRICKTYPE_IX || bricktype == L1_BRICKTYPE_IA) && + (bus_num == 1 && device != 1)) + return -EPERM; + + return 1; +} + +static int sn_pci_bus_valid(struct pci_bus *pci_bus) +{ + struct pcibus_info *pcibus_info; + int asic_type; + int bricktype; + + pcibus_info = SN_PCIBUS_BUSSOFT_INFO(pci_bus); + + /* Don't register slots hanging off the TIOCA bus */ + asic_type = pcibus_info->pbi_buscommon.bs_asic_type; + if (asic_type == PCIIO_ASIC_TYPE_TIOCA) + return -EPERM; + + /* Only register slots in I/O Bricks that support hotplug */ + bricktype = MODULE_GET_BTYPE(pcibus_info->pbi_moduleid); + switch (bricktype) { + case L1_BRICKTYPE_IX: + case L1_BRICKTYPE_PX: + case L1_BRICKTYPE_IA: + case L1_BRICKTYPE_PA: + return 1; + break; + default: + return -EPERM; + break; + } + + return -EIO; +} + +static int sn_hp_slot_private_alloc(struct hotplug_slot *bss_hotplug_slot, + struct pci_bus *pci_bus, int device) +{ + struct pcibus_info *pcibus_info; + struct slot *slot; + + pcibus_info = SN_PCIBUS_BUSSOFT_INFO(pci_bus); + + bss_hotplug_slot->private = kcalloc(1, sizeof(struct slot), + GFP_KERNEL); + if (!bss_hotplug_slot->private) + return -ENOMEM; + slot = (struct slot *)bss_hotplug_slot->private; + + bss_hotplug_slot->name = kmalloc(33, GFP_KERNEL); + if (!bss_hotplug_slot->name) { + kfree(bss_hotplug_slot->private); + return -ENOMEM; + } + + slot->device_num = device; + slot->pci_bus = pci_bus; + + sprintf(bss_hotplug_slot->name, "module_%c%c%c%c%.2d_b_%d_s_%d", + '0'+RACK_GET_CLASS(MODULE_GET_RACK(pcibus_info->pbi_moduleid)), + '0'+RACK_GET_GROUP(MODULE_GET_RACK(pcibus_info->pbi_moduleid)), + '0'+RACK_GET_NUM(MODULE_GET_RACK(pcibus_info->pbi_moduleid)), + MODULE_GET_BTCHAR(pcibus_info->pbi_moduleid), + MODULE_GET_BPOS(pcibus_info->pbi_moduleid), + ((int)pcibus_info->pbi_buscommon.bs_persist_busnum) & 0xf, + device + 1); + + slot->hotplug_slot = bss_hotplug_slot; + list_add(&slot->hp_list, &sn_hp_list); + + return 0; +} + +static struct hotplug_slot * sn_hp_destroy(void) +{ + struct slot *slot; + struct list_head *list; + struct hotplug_slot *bss_hotplug_slot = NULL; + + list_for_each(list, &sn_hp_list) { + slot = list_entry(list, struct slot, hp_list); + bss_hotplug_slot = slot->hotplug_slot; + list_del(&((struct slot *)bss_hotplug_slot->private)-> + hp_list); + break; + } + return bss_hotplug_slot; +} + +static void sn_bus_alloc_data(struct pci_dev *dev) +{ + struct list_head *node; + struct pci_bus *subordinate_bus; + struct pci_dev *child; + + sn_pci_fixup_slot(dev); + + /* Recursively sets up the sn_irq_info structs */ + if (dev->subordinate) { + subordinate_bus = dev->subordinate; + list_for_each(node, &subordinate_bus->devices) { + child = list_entry(node, struct pci_dev, bus_list); + sn_bus_alloc_data(child); + } + } +} + +static void sn_bus_free_data(struct pci_dev *dev) +{ + struct list_head *node; + struct pci_bus *subordinate_bus; + struct pci_dev *child; + + /* Recursively clean up sn_irq_info structs */ + if (dev->subordinate) { + subordinate_bus = dev->subordinate; + list_for_each(node, &subordinate_bus->devices) { + child = list_entry(node, struct pci_dev, bus_list); + sn_bus_free_data(child); + } + } + sn_pci_unfixup_slot(dev); +} + +static u8 sn_power_status_get(struct hotplug_slot *bss_hotplug_slot) +{ + struct slot *slot = (struct slot *)bss_hotplug_slot->private; + struct pcibus_info *pcibus_info; + u8 retval; + + pcibus_info = SN_PCIBUS_BUSSOFT_INFO(slot->pci_bus); + retval = pcibus_info->pbi_enabled_devices & (1 << slot->device_num); + + return retval ? 1 : 0; +} + +static void sn_slot_mark_enable(struct hotplug_slot *bss_hotplug_slot, + int device_num) +{ + struct slot *slot = (struct slot *)bss_hotplug_slot->private; + struct pcibus_info *pcibus_info; + + pcibus_info = SN_PCIBUS_BUSSOFT_INFO(slot->pci_bus); + pcibus_info->pbi_enabled_devices |= (1 << device_num); +} + +static void sn_slot_mark_disable(struct hotplug_slot *bss_hotplug_slot, + int device_num) +{ + struct slot *slot = (struct slot *)bss_hotplug_slot->private; + struct pcibus_info *pcibus_info; + + pcibus_info = SN_PCIBUS_BUSSOFT_INFO(slot->pci_bus); + pcibus_info->pbi_enabled_devices &= ~(1 << device_num); +} + +static int sn_slot_enable(struct hotplug_slot *bss_hotplug_slot, + int device_num) +{ + struct slot *slot = (struct slot *)bss_hotplug_slot->private; + struct pcibus_info *pcibus_info; + struct pcibr_slot_enable_resp resp; + int rc; + + pcibus_info = SN_PCIBUS_BUSSOFT_INFO(slot->pci_bus); + + /* + * Power-on and initialize the slot in the SN + * PCI infrastructure. + */ + rc = sal_pcibr_slot_enable(pcibus_info, device_num, &resp); + + if (rc == PCI_SLOT_ALREADY_UP) { + dev_dbg(slot->pci_bus->self, "is already active\n"); + return -EPERM; + } + + if (rc == PCI_L1_ERR) { + dev_dbg(slot->pci_bus->self, + "L1 failure %d with message: %s", + resp.resp_sub_errno, resp.resp_l1_msg); + return -EPERM; + } + + if (rc) { + dev_dbg(slot->pci_bus->self, + "insert failed with error %d sub-error %d\n", + rc, resp.resp_sub_errno); + return -EIO; + } + + sn_slot_mark_enable(bss_hotplug_slot, device_num); + + return 0; +} + +static int sn_slot_disable(struct hotplug_slot *bss_hotplug_slot, + int device_num, int action) +{ + struct slot *slot = (struct slot *)bss_hotplug_slot->private; + struct pcibus_info *pcibus_info; + struct pcibr_slot_disable_resp resp; + int rc; + + pcibus_info = SN_PCIBUS_BUSSOFT_INFO(slot->pci_bus); + + rc = sal_pcibr_slot_disable(pcibus_info, device_num, action, &resp); + + if (action == PCI_REQ_SLOT_ELIGIBLE && rc == PCI_SLOT_ALREADY_DOWN) { + dev_dbg(slot->pci_bus->self, "Slot %s already inactive\n"); + return -ENODEV; + } + + if (action == PCI_REQ_SLOT_ELIGIBLE && rc == PCI_EMPTY_33MHZ) { + dev_dbg(slot->pci_bus->self, + "Cannot remove last 33MHz card\n"); + return -EPERM; + } + + if (action == PCI_REQ_SLOT_ELIGIBLE && rc == PCI_L1_ERR) { + dev_dbg(slot->pci_bus->self, + "L1 failure %d with message \n%s\n", + resp.resp_sub_errno, resp.resp_l1_msg); + return -EPERM; + } + + if (action == PCI_REQ_SLOT_ELIGIBLE && rc) { + dev_dbg(slot->pci_bus->self, + "remove failed with error %d sub-error %d\n", + rc, resp.resp_sub_errno); + return -EIO; + } + + if (action == PCI_REQ_SLOT_ELIGIBLE && !rc) + return 0; + + if (action == PCI_REQ_SLOT_DISABLE && !rc) { + sn_slot_mark_disable(bss_hotplug_slot, device_num); + dev_dbg(slot->pci_bus->self, "remove successful\n"); + return 0; + } + + if (action == PCI_REQ_SLOT_DISABLE && rc) { + dev_dbg(slot->pci_bus->self,"remove failed rc = %d\n", rc); + return rc; + } + + return rc; +} + +static int enable_slot(struct hotplug_slot *bss_hotplug_slot) +{ + struct slot *slot = (struct slot *)bss_hotplug_slot->private; + struct pci_bus *new_bus = NULL; + struct pci_dev *dev; + int func, num_funcs; + int new_ppb = 0; + int rc; + + /* Serialize the Linux PCI infrastructure */ + down(&sn_hotplug_sem); + + /* + * Power-on and initialize the slot in the SN + * PCI infrastructure. + */ + rc = sn_slot_enable(bss_hotplug_slot, slot->device_num); + if (rc) { + up(&sn_hotplug_sem); + return rc; + } + + num_funcs = pci_scan_slot(slot->pci_bus, PCI_DEVFN(slot->device_num+1, + PCI_FUNC(0))); + if (!num_funcs) { + dev_dbg(slot->pci_bus->self, "no device in slot\n"); + up(&sn_hotplug_sem); + return -ENODEV; + } + + sn_pci_controller_fixup(pci_domain_nr(slot->pci_bus), + slot->pci_bus->number, + slot->pci_bus); + /* + * Map SN resources for all functions on the card + * to the Linux PCI interface and tell the drivers + * about them. + */ + for (func = 0; func < num_funcs; func++) { + dev = pci_get_slot(slot->pci_bus, + PCI_DEVFN(slot->device_num + 1, + PCI_FUNC(func))); + + + if (dev) { + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { + unsigned char sec_bus; + pci_read_config_byte(dev, PCI_SECONDARY_BUS, + &sec_bus); + new_bus = pci_add_new_bus(dev->bus, dev, + sec_bus); + pci_scan_child_bus(new_bus); + sn_pci_controller_fixup(pci_domain_nr(new_bus), + new_bus->number, + new_bus); + new_ppb = 1; + } + sn_bus_alloc_data(dev); + pci_dev_put(dev); + } + } + + /* Call the driver for the new device */ + pci_bus_add_devices(slot->pci_bus); + /* Call the drivers for the new devices subordinate to PPB */ + if (new_ppb) + pci_bus_add_devices(new_bus); + + up(&sn_hotplug_sem); + + if (rc == 0) + dev_dbg(slot->pci_bus->self, + "insert operation successful\n"); + else + dev_dbg(slot->pci_bus->self, + "insert operation failed rc = %d\n", rc); + + return rc; +} + +static int disable_slot(struct hotplug_slot *bss_hotplug_slot) +{ + struct slot *slot = (struct slot *)bss_hotplug_slot->private; + struct pci_dev *dev; + int func; + int rc; + + /* Acquire update access to the bus */ + down(&sn_hotplug_sem); + + /* is it okay to bring this slot down? */ + rc = sn_slot_disable(bss_hotplug_slot, slot->device_num, + PCI_REQ_SLOT_ELIGIBLE); + if (rc) + goto leaving; + + /* Free the SN resources assigned to the Linux device.*/ + for (func = 0; func < 8; func++) { + dev = pci_get_slot(slot->pci_bus, + PCI_DEVFN(slot->device_num+1, + PCI_FUNC(func))); + if (dev) { + /* + * Some drivers may use dma accesses during the + * driver remove function. We release the sysdata + * areas after the driver remove functions have + * been called. + */ + sn_bus_store_sysdata(dev); + sn_bus_free_data(dev); + pci_remove_bus_device(dev); + pci_dev_put(dev); + } + } + + /* free the collected sysdata pointers */ + sn_bus_free_sysdata(); + + /* Deactivate slot */ + rc = sn_slot_disable(bss_hotplug_slot, slot->device_num, + PCI_REQ_SLOT_DISABLE); + leaving: + /* Release the bus lock */ + up(&sn_hotplug_sem); + + return rc; +} + +static int get_power_status(struct hotplug_slot *bss_hotplug_slot, u8 *value) +{ + down(&sn_hotplug_sem); + *value = sn_power_status_get(bss_hotplug_slot); + up(&sn_hotplug_sem); + return 0; +} + +static void sn_release_slot(struct hotplug_slot *bss_hotplug_slot) +{ + kfree(bss_hotplug_slot->info); + kfree(bss_hotplug_slot->name); + kfree(bss_hotplug_slot->private); + kfree(bss_hotplug_slot); +} + +static int sn_hotplug_slot_register(struct pci_bus *pci_bus) +{ + int device; + struct hotplug_slot *bss_hotplug_slot; + int rc = 0; + + /* + * Currently only four devices are supported, + * in the future there maybe more -- up to 32. + */ + + for (device = 0; device < SN_MAX_HP_SLOTS ; device++) { + if (sn_pci_slot_valid(pci_bus, device) != 1) + continue; + + bss_hotplug_slot = kcalloc(1,sizeof(struct hotplug_slot), + GFP_KERNEL); + if (!bss_hotplug_slot) { + rc = -ENOMEM; + goto alloc_err; + } + + bss_hotplug_slot->info = + kcalloc(1,sizeof(struct hotplug_slot_info), + GFP_KERNEL); + if (!bss_hotplug_slot->info) { + rc = -ENOMEM; + goto alloc_err; + } + + if (sn_hp_slot_private_alloc(bss_hotplug_slot, + pci_bus, device)) { + rc = -ENOMEM; + goto alloc_err; + } + + bss_hotplug_slot->ops = &sn_hotplug_slot_ops; + bss_hotplug_slot->release = &sn_release_slot; + + rc = pci_hp_register(bss_hotplug_slot); + if (rc) + goto register_err; + } + dev_dbg(pci_bus->self, "Registered bus with hotplug\n"); + return rc; + +register_err: + dev_dbg(pci_bus->self, "bus failed to register with err = %d\n", + rc); + +alloc_err: + if (rc == -ENOMEM) + dev_dbg(pci_bus->self, "Memory allocation error\n"); + + /* destroy THIS element */ + if (bss_hotplug_slot) + sn_release_slot(bss_hotplug_slot); + + /* destroy anything else on the list */ + while ((bss_hotplug_slot = sn_hp_destroy())) + pci_hp_deregister(bss_hotplug_slot); + + return rc; +} + +static int sn_pci_hotplug_init(void) +{ + struct pci_bus *pci_bus = NULL; + int rc; + int registered = 0; + + INIT_LIST_HEAD(&sn_hp_list); + + if (sn_sal_rev() < SGI_HOTPLUG_PROM_REV) { + printk(KERN_ERR "%s: PROM version must be greater than 4.05\n", + __FUNCTION__); + return -EPERM; + } + + while ((pci_bus = pci_find_next_bus(pci_bus))) { + if (!pci_bus->sysdata) + continue; + + rc = sn_pci_bus_valid(pci_bus); + if (rc != 1) { + dev_dbg(pci_bus->self, "not a valid hotplug bus\n"); + continue; + } + dev_dbg(pci_bus->self, "valid hotplug bus\n"); + + rc = sn_hotplug_slot_register(pci_bus); + if (!rc) + registered = 1; + else { + registered = 0; + break; + } + } + + return registered == 1 ? 0 : -ENODEV; +} + +static void sn_pci_hotplug_exit(void) +{ + struct hotplug_slot *bss_hotplug_slot; + + while ((bss_hotplug_slot = sn_hp_destroy())) { + pci_hp_deregister(bss_hotplug_slot); + } + + if (!list_empty(&sn_hp_list)) + printk(KERN_ERR "%s: internal list is not empty\n", __FILE__); +} + +module_init(sn_pci_hotplug_init); +module_exit(sn_pci_hotplug_exit); diff --git a/drivers/pci/search.c b/drivers/pci/search.c index a90a533eba0f..05fa91a31c62 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -379,6 +379,7 @@ exit: EXPORT_SYMBOL(pci_dev_present); EXPORT_SYMBOL(pci_find_bus); +EXPORT_SYMBOL(pci_find_next_bus); EXPORT_SYMBOL(pci_find_device); EXPORT_SYMBOL(pci_find_device_reverse); EXPORT_SYMBOL(pci_find_slot); diff --git a/include/asm-ia64/sn/arch.h b/include/asm-ia64/sn/arch.h index 635fdce854a8..ab827d298569 100644 --- a/include/asm-ia64/sn/arch.h +++ b/include/asm-ia64/sn/arch.h @@ -11,6 +11,7 @@ #ifndef _ASM_IA64_SN_ARCH_H #define _ASM_IA64_SN_ARCH_H +#include <linux/numa.h> #include <asm/types.h> #include <asm/percpu.h> #include <asm/sn/types.h> diff --git a/include/asm-ia64/sn/intr.h b/include/asm-ia64/sn/intr.h index e51471fb0867..e190dd4213d5 100644 --- a/include/asm-ia64/sn/intr.h +++ b/include/asm-ia64/sn/intr.h @@ -9,6 +9,8 @@ #ifndef _ASM_IA64_SN_INTR_H #define _ASM_IA64_SN_INTR_H +#include <linux/rcupdate.h> + #define SGI_UART_VECTOR (0xe9) #define SGI_PCIBR_ERROR (0x33) @@ -33,7 +35,7 @@ // The SN PROM irq struct struct sn_irq_info { - struct sn_irq_info *irq_next; /* sharing irq list */ + struct sn_irq_info *irq_next; /* deprecated DO NOT USE */ short irq_nasid; /* Nasid IRQ is assigned to */ int irq_slice; /* slice IRQ is assigned to */ int irq_cpuid; /* kernel logical cpuid */ @@ -47,6 +49,8 @@ struct sn_irq_info { int irq_cookie; /* unique cookie */ int irq_flags; /* flags */ int irq_share_cnt; /* num devices sharing IRQ */ + struct list_head list; /* list of sn_irq_info structs */ + struct rcu_head rcu; /* rcu callback list */ }; extern void sn_send_IPI_phys(int, long, int, int); diff --git a/arch/ia64/sn/include/pci/pcibr_provider.h b/include/asm-ia64/sn/pcibr_provider.h index 1cd291d8badd..f9b8d2164007 100644 --- a/arch/ia64/sn/include/pci/pcibr_provider.h +++ b/include/asm-ia64/sn/pcibr_provider.h @@ -8,6 +8,9 @@ #ifndef _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H #define _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H +#include <asm/sn/intr.h> +#include <asm/sn/pcibus_provider_defs.h> + /* Workarounds */ #define PV907516 (1 << 1) /* TIOCP: Don't write the write buffer flush reg */ @@ -20,7 +23,7 @@ #define IS_PIC_SOFT(ps) (ps->pbi_bridge_type == PCIBR_BRIDGETYPE_PIC) -/* +/* * The different PCI Bridge types supported on the SGI Altix platforms */ #define PCIBR_BRIDGETYPE_UNKNOWN -1 @@ -100,15 +103,16 @@ struct pcibus_info { struct ate_resource pbi_int_ate_resource; uint64_t pbi_int_ate_size; - + uint64_t pbi_dir_xbase; char pbi_hub_xid; uint64_t pbi_devreg[8]; - spinlock_t pbi_lock; uint32_t pbi_valid_devices; uint32_t pbi_enabled_devices; + + spinlock_t pbi_lock; }; /* @@ -148,4 +152,8 @@ extern void pcibr_change_devices_irq(struct sn_irq_info *sn_irq_info); extern int pcibr_ate_alloc(struct pcibus_info *, int); extern void pcibr_ate_free(struct pcibus_info *, int); extern void ate_write(struct pcibus_info *, int, int, uint64_t); +extern int sal_pcibr_slot_enable(struct pcibus_info *soft, int device, + void *resp); +extern int sal_pcibr_slot_disable(struct pcibus_info *soft, int device, + int action, void *resp); #endif diff --git a/include/asm-ia64/sn/pcidev.h b/include/asm-ia64/sn/pcidev.h index ed4031d80811..49711d00ad04 100644 --- a/include/asm-ia64/sn/pcidev.h +++ b/include/asm-ia64/sn/pcidev.h @@ -10,11 +10,11 @@ #include <linux/pci.h> -extern struct sn_irq_info **sn_irq; - #define SN_PCIDEV_INFO(pci_dev) \ ((struct pcidev_info *)(pci_dev)->sysdata) +#define SN_PCIBUS_BUSSOFT_INFO(pci_bus) \ + (struct pcibus_info *)((struct pcibus_bussoft *)(PCI_CONTROLLER((pci_bus))->platform_data)) /* * Given a pci_bus, return the sn pcibus_bussoft struct. Note that * this only works for root busses, not for busses represented by PPB's. @@ -23,6 +23,8 @@ extern struct sn_irq_info **sn_irq; #define SN_PCIBUS_BUSSOFT(pci_bus) \ ((struct pcibus_bussoft *)(PCI_CONTROLLER((pci_bus))->platform_data)) +#define SN_PCIBUS_BUSSOFT_INFO(pci_bus) \ + (struct pcibus_info *)((struct pcibus_bussoft *)(PCI_CONTROLLER((pci_bus))->platform_data)) /* * Given a struct pci_dev, return the sn pcibus_bussoft struct. Note * that this is not equivalent to SN_PCIBUS_BUSSOFT(pci_dev->bus) due @@ -50,9 +52,17 @@ struct pcidev_info { struct sn_irq_info *pdi_sn_irq_info; struct sn_pcibus_provider *pdi_provider; /* sn pci ops */ + struct pci_dev *host_pci_dev; /* host bus link */ }; extern void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info); - +extern void sn_irq_unfixup(struct pci_dev *pci_dev); +extern void sn_pci_controller_fixup(int segment, int busnum, + struct pci_bus *bus); +extern void sn_bus_store_sysdata(struct pci_dev *dev); +extern void sn_bus_free_sysdata(void); +extern void sn_pci_fixup_slot(struct pci_dev *dev); +extern void sn_pci_unfixup_slot(struct pci_dev *dev); +extern void sn_irq_lh_init(void); #endif /* _ASM_IA64_SN_PCI_PCIDEV_H */ diff --git a/arch/ia64/sn/include/pci/pic.h b/include/asm-ia64/sn/pic.h index fd18acecb1e6..0de82e6b0893 100644 --- a/arch/ia64/sn/include/pci/pic.h +++ b/include/asm-ia64/sn/pic.h @@ -15,7 +15,7 @@ * PIC handles PCI/X busses. PCI/X requires that the 'bridge' (i.e. PIC) * be designated as 'device 0'. That is a departure from earlier SGI * PCI bridges. Because of that we use config space 1 to access the - * config space of the first actual PCI device on the bus. + * config space of the first actual PCI device on the bus. * Here's what the PIC manual says: * * The current PCI-X bus specification now defines that the parent @@ -29,14 +29,14 @@ * correlated Configs pace and our device space 0 <-> 0, 1 <-> 1, etc. * PCI-X requires we start a 1, not 0 and currently the PX brick * does associate our: - * + * * device 0 with configuration space window 1, - * device 1 with configuration space window 2, + * device 1 with configuration space window 2, * device 2 with configuration space window 3, * device 3 with configuration space window 4. * - * The net effect is that all config space access are off-by-one with - * relation to other per-slot accesses on the PIC. + * The net effect is that all config space access are off-by-one with + * relation to other per-slot accesses on the PIC. * Here is a table that shows some of that: * * Internal Slot# @@ -65,7 +65,7 @@ *****************************************************************************/ /* NOTE: PIC WAR. PV#854697. PIC does not allow writes just to [31:0] - * of a 64-bit register. When writing PIC registers, always write the + * of a 64-bit register. When writing PIC registers, always write the * entire 64 bits. */ @@ -164,7 +164,7 @@ struct pic { uint64_t clear_all; /* 0x000{438,,,5F8} */ } p_buf_count[8]; - + /* 0x000600-0x0009FF -- PCI/X registers */ uint64_t p_pcix_bus_err_addr; /* 0x000600 */ uint64_t p_pcix_bus_err_attr; /* 0x000608 */ diff --git a/include/asm-ia64/sn/sn_cpuid.h b/include/asm-ia64/sn/sn_cpuid.h index 20b300187669..d2c1d34dcce4 100644 --- a/include/asm-ia64/sn/sn_cpuid.h +++ b/include/asm-ia64/sn/sn_cpuid.h @@ -81,11 +81,6 @@ * */ -#ifndef CONFIG_SMP -#define cpu_physical_id(cpuid) ((ia64_getreg(_IA64_REG_CR_LID) >> 16) & 0xffff) -#endif - - #define get_node_number(addr) NASID_GET(addr) /* diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h index 1455375d2ce4..27976d223186 100644 --- a/include/asm-ia64/sn/sn_sal.h +++ b/include/asm-ia64/sn/sn_sal.h @@ -134,43 +134,28 @@ #define SN_SAL_FAKE_PROM 0x02009999 - /** - * sn_sal_rev_major - get the major SGI SAL revision number - * - * The SGI PROM stores its version in sal_[ab]_rev_(major|minor). - * This routine simply extracts the major value from the - * @ia64_sal_systab structure constructed by ia64_sal_init(). - */ -static inline int -sn_sal_rev_major(void) + * sn_sal_revision - get the SGI SAL revision number + * + * The SGI PROM stores its version in the sal_[ab]_rev_(major|minor). + * This routine simply extracts the major and minor values and + * presents them in a u32 format. + * + * For example, version 4.05 would be represented at 0x0405. + */ +static inline u32 +sn_sal_rev(void) { struct ia64_sal_systab *systab = efi.sal_systab; - return (int)systab->sal_b_rev_major; -} - -/** - * sn_sal_rev_minor - get the minor SGI SAL revision number - * - * The SGI PROM stores its version in sal_[ab]_rev_(major|minor). - * This routine simply extracts the minor value from the - * @ia64_sal_systab structure constructed by ia64_sal_init(). - */ -static inline int -sn_sal_rev_minor(void) -{ - struct ia64_sal_systab *systab = efi.sal_systab; - - return (int)systab->sal_b_rev_minor; + return (u32)(systab->sal_b_rev_major << 8 | systab->sal_b_rev_minor); } /* * Specify the minimum PROM revsion required for this kernel. * Note that they're stored in hex format... */ -#define SN_SAL_MIN_MAJOR 0x4 /* SN2 kernels need at least PROM 4.0 */ -#define SN_SAL_MIN_MINOR 0x0 +#define SN_SAL_MIN_VERSION 0x0404 /* * Returns the master console nasid, if the call fails, return an illegal diff --git a/arch/ia64/sn/include/pci/tiocp.h b/include/asm-ia64/sn/tiocp.h index f07c83b2bf6e..5f2489c9d2dd 100644 --- a/arch/ia64/sn/include/pci/tiocp.h +++ b/include/asm-ia64/sn/tiocp.h @@ -111,7 +111,7 @@ struct tiocp{ uint64_t clear_all; /* 0x000{438,,,5F8} */ } cp_buf_count[8]; - + /* 0x000600-0x0009FF -- PCI/X registers */ uint64_t cp_pcix_bus_err_addr; /* 0x000600 */ uint64_t cp_pcix_bus_err_attr; /* 0x000608 */ |