diff options
author | Christoph Lameter <clameter@sgi.com> | 2007-10-16 01:25:38 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-16 09:42:59 -0700 |
commit | 0e1e7c7a739562a321fda07c7cd2a97a7114f8f8 (patch) | |
tree | f2148e5b667152681625c19cf8b2a556500994ea /kernel | |
parent | 523b945855a1427000ffc707c610abe5947ae607 (diff) | |
download | linux-0e1e7c7a739562a321fda07c7cd2a97a7114f8f8.tar.gz linux-0e1e7c7a739562a321fda07c7cd2a97a7114f8f8.tar.bz2 linux-0e1e7c7a739562a321fda07c7cd2a97a7114f8f8.zip |
Memoryless nodes: Use N_HIGH_MEMORY for cpusets
cpusets try to ensure that any node added to a cpuset's mems_allowed is
on-line and contains memory. The assumption was that online nodes contained
memory. Thus, it is possible to add memoryless nodes to a cpuset and then add
tasks to this cpuset. This results in continuous series of oom-kill and
apparent system hang.
Change cpusets to use node_states[N_HIGH_MEMORY] [a.k.a. node_memory_map] in
place of node_online_map when vetting memories. Return error if admin
attempts to write a non-empty mems_allowed node mask containing only
memoryless-nodes.
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Bob Picco <bob.picco@hp.com>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@skynet.ie>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpuset.c | 56 |
1 files changed, 38 insertions, 18 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 57e6448b171e..8b2daac4de83 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -581,26 +581,28 @@ static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask) /* * Return in *pmask the portion of a cpusets's mems_allowed that - * are online. If none are online, walk up the cpuset hierarchy - * until we find one that does have some online mems. If we get - * all the way to the top and still haven't found any online mems, - * return node_online_map. + * are online, with memory. If none are online with memory, walk + * up the cpuset hierarchy until we find one that does have some + * online mems. If we get all the way to the top and still haven't + * found any online mems, return node_states[N_HIGH_MEMORY]. * * One way or another, we guarantee to return some non-empty subset - * of node_online_map. + * of node_states[N_HIGH_MEMORY]. * * Call with callback_mutex held. */ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) { - while (cs && !nodes_intersects(cs->mems_allowed, node_online_map)) + while (cs && !nodes_intersects(cs->mems_allowed, + node_states[N_HIGH_MEMORY])) cs = cs->parent; if (cs) - nodes_and(*pmask, cs->mems_allowed, node_online_map); + nodes_and(*pmask, cs->mems_allowed, + node_states[N_HIGH_MEMORY]); else - *pmask = node_online_map; - BUG_ON(!nodes_intersects(*pmask, node_online_map)); + *pmask = node_states[N_HIGH_MEMORY]; + BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY])); } /** @@ -924,7 +926,10 @@ static int update_nodemask(struct cpuset *cs, char *buf) int fudge; int retval; - /* top_cpuset.mems_allowed tracks node_online_map; it's read-only */ + /* + * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; + * it's read-only + */ if (cs == &top_cpuset) return -EACCES; @@ -941,8 +946,21 @@ static int update_nodemask(struct cpuset *cs, char *buf) retval = nodelist_parse(buf, trialcs.mems_allowed); if (retval < 0) goto done; + if (!nodes_intersects(trialcs.mems_allowed, + node_states[N_HIGH_MEMORY])) { + /* + * error if only memoryless nodes specified. + */ + retval = -ENOSPC; + goto done; + } } - nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, node_online_map); + /* + * Exclude memoryless nodes. We know that trialcs.mems_allowed + * contains at least one node with memory. + */ + nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, + node_states[N_HIGH_MEMORY]); oldmem = cs->mems_allowed; if (nodes_equal(oldmem, trialcs.mems_allowed)) { retval = 0; /* Too easy - nothing to do */ @@ -2098,8 +2116,9 @@ static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur) /* * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track - * cpu_online_map and node_online_map. Force the top cpuset to track - * whats online after any CPU or memory node hotplug or unplug event. + * cpu_online_map and node_states[N_HIGH_MEMORY]. Force the top cpuset to + * track what's online after any CPU or memory node hotplug or unplug + * event. * * To ensure that we don't remove a CPU or node from the top cpuset * that is currently in use by a child cpuset (which would violate @@ -2119,7 +2138,7 @@ static void common_cpu_mem_hotplug_unplug(void) guarantee_online_cpus_mems_in_subtree(&top_cpuset); top_cpuset.cpus_allowed = cpu_online_map; - top_cpuset.mems_allowed = node_online_map; + top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; mutex_unlock(&callback_mutex); mutex_unlock(&manage_mutex); @@ -2147,8 +2166,9 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb, #ifdef CONFIG_MEMORY_HOTPLUG /* - * Keep top_cpuset.mems_allowed tracking node_online_map. - * Call this routine anytime after you change node_online_map. + * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY]. + * Call this routine anytime after you change + * node_states[N_HIGH_MEMORY]. * See also the previous routine cpuset_handle_cpuhp(). */ @@ -2167,7 +2187,7 @@ void cpuset_track_online_nodes(void) void __init cpuset_init_smp(void) { top_cpuset.cpus_allowed = cpu_online_map; - top_cpuset.mems_allowed = node_online_map; + top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; hotcpu_notifier(cpuset_handle_cpuhp, 0); } @@ -2309,7 +2329,7 @@ void cpuset_init_current_mems_allowed(void) * * Description: Returns the nodemask_t mems_allowed of the cpuset * attached to the specified @tsk. Guaranteed to return some non-empty - * subset of node_online_map, even if this means going outside the + * subset of node_states[N_HIGH_MEMORY], even if this means going outside the * tasks cpuset. **/ |