cpuset: update top cpuset's mems after adding a node
After adding a node into the machine, top cpuset's mems isn't updated.
By reviewing the code, we found that the update function
cpuset_track_online_nodes()
was invoked after node_states[N_ONLINE] changes. It is wrong because
N_ONLINE just means node has pgdat, and if node has/added memory, we use
N_HIGH_MEMORY. So, We should invoke the update function after
node_states[N_HIGH_MEMORY] changes, just like its commit says.
This patch fixes it. And we use notifier of memory hotplug instead of
direct calling of cpuset_track_online_nodes().
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Acked-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Paul Menage <menage@google.com
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 2691926..8e540d3 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -74,8 +74,6 @@
return current->flags & PF_SPREAD_SLAB;
}
-extern void cpuset_track_online_nodes(void);
-
extern int current_cpuset_is_being_rebound(void);
extern void rebuild_sched_domains(void);
@@ -151,8 +149,6 @@
return 0;
}
-static inline void cpuset_track_online_nodes(void) {}
-
static inline int current_cpuset_is_being_rebound(void)
{
return 0;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 81fc679..da7ff61 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -36,6 +36,7 @@
#include <linux/list.h>
#include <linux/mempolicy.h>
#include <linux/mm.h>
+#include <linux/memory.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/namei.h>
@@ -2015,12 +2016,23 @@
* Call this routine anytime after node_states[N_HIGH_MEMORY] changes.
* See also the previous routine cpuset_track_online_cpus().
*/
-void cpuset_track_online_nodes(void)
+static int cpuset_track_online_nodes(struct notifier_block *self,
+ unsigned long action, void *arg)
{
cgroup_lock();
- top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
- scan_for_empty_cpusets(&top_cpuset);
+ switch (action) {
+ case MEM_ONLINE:
+ top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
+ break;
+ case MEM_OFFLINE:
+ top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
+ scan_for_empty_cpusets(&top_cpuset);
+ break;
+ default:
+ break;
+ }
cgroup_unlock();
+ return NOTIFY_OK;
}
#endif
@@ -2036,6 +2048,7 @@
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
hotcpu_notifier(cpuset_track_online_cpus, 0);
+ hotplug_memory_notifier(cpuset_track_online_nodes, 10);
}
/**
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 6837a10..b5b2b15 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -22,7 +22,6 @@
#include <linux/highmem.h>
#include <linux/vmalloc.h>
#include <linux/ioport.h>
-#include <linux/cpuset.h>
#include <linux/delay.h>
#include <linux/migrate.h>
#include <linux/page-isolation.h>
@@ -498,8 +497,6 @@
/* we online node here. we can't roll back from here. */
node_set_online(nid);
- cpuset_track_online_nodes();
-
if (new_pgdat) {
ret = register_one_node(nid);
/*