summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2007-10-16 01:25:38 -0700
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-16 09:42:59 -0700
commit0e1e7c7a739562a321fda07c7cd2a97a7114f8f8 (patch)
treef2148e5b667152681625c19cf8b2a556500994ea /kernel
parent523b945855a1427000ffc707c610abe5947ae607 (diff)
downloadlinux-0e1e7c7a739562a321fda07c7cd2a97a7114f8f8.tar.gz
linux-0e1e7c7a739562a321fda07c7cd2a97a7114f8f8.tar.bz2
linux-0e1e7c7a739562a321fda07c7cd2a97a7114f8f8.zip
Memoryless nodes: Use N_HIGH_MEMORY for cpusets
cpusets try to ensure that any node added to a cpuset's mems_allowed is on-line and contains memory. The assumption was that online nodes contained memory. Thus, it is possible to add memoryless nodes to a cpuset and then add tasks to this cpuset. This results in continuous series of oom-kill and apparent system hang. Change cpusets to use node_states[N_HIGH_MEMORY] [a.k.a. node_memory_map] in place of node_online_map when vetting memories. Return error if admin attempts to write a non-empty mems_allowed node mask containing only memoryless-nodes. Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Signed-off-by: Bob Picco <bob.picco@hp.com> Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@skynet.ie> Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c56
1 files changed, 38 insertions, 18 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 57e6448b171e..8b2daac4de83 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -581,26 +581,28 @@ static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask)
/*
* Return in *pmask the portion of a cpusets's mems_allowed that
- * are online. If none are online, walk up the cpuset hierarchy
- * until we find one that does have some online mems. If we get
- * all the way to the top and still haven't found any online mems,
- * return node_online_map.
+ * are online, with memory. If none are online with memory, walk
+ * up the cpuset hierarchy until we find one that does have some
+ * online mems. If we get all the way to the top and still haven't
+ * found any online mems, return node_states[N_HIGH_MEMORY].
*
* One way or another, we guarantee to return some non-empty subset
- * of node_online_map.
+ * of node_states[N_HIGH_MEMORY].
*
* Call with callback_mutex held.
*/
static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
{
- while (cs && !nodes_intersects(cs->mems_allowed, node_online_map))
+ while (cs && !nodes_intersects(cs->mems_allowed,
+ node_states[N_HIGH_MEMORY]))
cs = cs->parent;
if (cs)
- nodes_and(*pmask, cs->mems_allowed, node_online_map);
+ nodes_and(*pmask, cs->mems_allowed,
+ node_states[N_HIGH_MEMORY]);
else
- *pmask = node_online_map;
- BUG_ON(!nodes_intersects(*pmask, node_online_map));
+ *pmask = node_states[N_HIGH_MEMORY];
+ BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY]));
}
/**
@@ -924,7 +926,10 @@ static int update_nodemask(struct cpuset *cs, char *buf)
int fudge;
int retval;
- /* top_cpuset.mems_allowed tracks node_online_map; it's read-only */
+ /*
+ * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
+ * it's read-only
+ */
if (cs == &top_cpuset)
return -EACCES;
@@ -941,8 +946,21 @@ static int update_nodemask(struct cpuset *cs, char *buf)
retval = nodelist_parse(buf, trialcs.mems_allowed);
if (retval < 0)
goto done;
+ if (!nodes_intersects(trialcs.mems_allowed,
+ node_states[N_HIGH_MEMORY])) {
+ /*
+ * error if only memoryless nodes specified.
+ */
+ retval = -ENOSPC;
+ goto done;
+ }
}
- nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, node_online_map);
+ /*
+ * Exclude memoryless nodes. We know that trialcs.mems_allowed
+ * contains at least one node with memory.
+ */
+ nodes_and(trialcs.mems_allowed, trialcs.mems_allowed,
+ node_states[N_HIGH_MEMORY]);
oldmem = cs->mems_allowed;
if (nodes_equal(oldmem, trialcs.mems_allowed)) {
retval = 0; /* Too easy - nothing to do */
@@ -2098,8 +2116,9 @@ static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
/*
* The cpus_allowed and mems_allowed nodemasks in the top_cpuset track
- * cpu_online_map and node_online_map. Force the top cpuset to track
- * whats online after any CPU or memory node hotplug or unplug event.
+ * cpu_online_map and node_states[N_HIGH_MEMORY]. Force the top cpuset to
+ * track what's online after any CPU or memory node hotplug or unplug
+ * event.
*
* To ensure that we don't remove a CPU or node from the top cpuset
* that is currently in use by a child cpuset (which would violate
@@ -2119,7 +2138,7 @@ static void common_cpu_mem_hotplug_unplug(void)
guarantee_online_cpus_mems_in_subtree(&top_cpuset);
top_cpuset.cpus_allowed = cpu_online_map;
- top_cpuset.mems_allowed = node_online_map;
+ top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
mutex_unlock(&callback_mutex);
mutex_unlock(&manage_mutex);
@@ -2147,8 +2166,9 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb,
#ifdef CONFIG_MEMORY_HOTPLUG
/*
- * Keep top_cpuset.mems_allowed tracking node_online_map.
- * Call this routine anytime after you change node_online_map.
+ * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY].
+ * Call this routine anytime after you change
+ * node_states[N_HIGH_MEMORY].
* See also the previous routine cpuset_handle_cpuhp().
*/
@@ -2167,7 +2187,7 @@ void cpuset_track_online_nodes(void)
void __init cpuset_init_smp(void)
{
top_cpuset.cpus_allowed = cpu_online_map;
- top_cpuset.mems_allowed = node_online_map;
+ top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
hotcpu_notifier(cpuset_handle_cpuhp, 0);
}
@@ -2309,7 +2329,7 @@ void cpuset_init_current_mems_allowed(void)
*
* Description: Returns the nodemask_t mems_allowed of the cpuset
* attached to the specified @tsk. Guaranteed to return some non-empty
- * subset of node_online_map, even if this means going outside the
+ * subset of node_states[N_HIGH_MEMORY], even if this means going outside the
* tasks cpuset.
**/