[PATCH] Remove down_write() from taskstats code invoked on the exit() path
In send_cpu_listeners(), which is called on the exit path, a down_write()
was protecting operations like skb_clone() and genlmsg_unicast() that do
GFP_KERNEL allocations. If the oom-killer decides to kill tasks to satisfy
the allocations,the exit of those tasks could block on the same semphore.
The down_write() was only needed to allow removal of invalid listeners from
the listener list. The patch converts the down_write to a down_read and
defers the removal to a separate critical region. This ensures that even
if the oom-killer is called, no other task's exit is blocked as it can
still acquire another down_read.
Thanks to Andrew Morton & Herbert Xu for pointing out the oom related
pitfalls, and to Chandra Seetharaman for suggesting this fix instead of
using something more complex like RCU.
Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index abb59e3..f45179c 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -51,6 +51,7 @@
struct listener {
struct list_head list;
pid_t pid;
+ char valid;
};
struct listener_list {
@@ -127,7 +128,7 @@
struct listener *s, *tmp;
struct sk_buff *skb_next, *skb_cur = skb;
void *reply = genlmsg_data(genlhdr);
- int rc, ret;
+ int rc, ret, delcount = 0;
rc = genlmsg_end(skb, reply);
if (rc < 0) {
@@ -137,7 +138,7 @@
rc = 0;
listeners = &per_cpu(listener_array, cpu);
- down_write(&listeners->sem);
+ down_read(&listeners->sem);
list_for_each_entry_safe(s, tmp, &listeners->list, list) {
skb_next = NULL;
if (!list_is_last(&s->list, &listeners->list)) {
@@ -150,14 +151,26 @@
}
ret = genlmsg_unicast(skb_cur, s->pid);
if (ret == -ECONNREFUSED) {
- list_del(&s->list);
- kfree(s);
+ s->valid = 0;
+ delcount++;
rc = ret;
}
skb_cur = skb_next;
}
- up_write(&listeners->sem);
+ up_read(&listeners->sem);
+ if (!delcount)
+ return rc;
+
+ /* Delete invalidated entries */
+ down_write(&listeners->sem);
+ list_for_each_entry_safe(s, tmp, &listeners->list, list) {
+ if (!s->valid) {
+ list_del(&s->list);
+ kfree(s);
+ }
+ }
+ up_write(&listeners->sem);
return rc;
}
@@ -290,6 +303,7 @@
goto cleanup;
s->pid = pid;
INIT_LIST_HEAD(&s->list);
+ s->valid = 1;
listeners = &per_cpu(listener_array, cpu);
down_write(&listeners->sem);