rds: tcp: atomically purge entries from rds_tcp_conn_list during netns delete
authorSowmini Varadhan <sowmini.varadhan@oracle.com>
Thu, 30 Nov 2017 19:11:29 +0000 (11:11 -0800)
committerDavid S. Miller <davem@davemloft.net>
Fri, 1 Dec 2017 20:25:15 +0000 (15:25 -0500)
The rds_tcp_kill_sock() function parses the rds_tcp_conn_list
to find the rds_connection entries marked for deletion as part
of the netns deletion under the protection of the rds_tcp_conn_lock.
Since the rds_tcp_conn_list tracks rds_tcp_connections (which
have a 1:1 mapping with rds_conn_path), multiple tc entries in
the rds_tcp_conn_list will map to a single rds_connection, and will
be deleted as part of the rds_conn_destroy() operation that is
done outside the rds_tcp_conn_lock.

The rds_tcp_conn_list traversal done under the protection of
rds_tcp_conn_lock should not leave any doomed tc entries in
the list after the rds_tcp_conn_lock is released, else another
concurrently executiong netns delete (for a differnt netns) thread
may trip on these entries.

Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/rds/tcp.c
net/rds/tcp.h

index f580f72ae69ec651c2942830a02cb61aac2b77aa..39f502d47969d9dcb30fc1f87640325a0f4c260d 100644 (file)
@@ -306,7 +306,8 @@ static void rds_tcp_conn_free(void *arg)
        rdsdebug("freeing tc %p\n", tc);
 
        spin_lock_irqsave(&rds_tcp_conn_lock, flags);
-       list_del(&tc->t_tcp_node);
+       if (!tc->t_tcp_node_detached)
+               list_del(&tc->t_tcp_node);
        spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
 
        kmem_cache_free(rds_tcp_conn_slab, tc);
@@ -510,8 +511,12 @@ static void rds_tcp_kill_sock(struct net *net)
 
                if (net != c_net || !tc->t_sock)
                        continue;
-               if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn))
+               if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) {
                        list_move_tail(&tc->t_tcp_node, &tmp_list);
+               } else {
+                       list_del(&tc->t_tcp_node);
+                       tc->t_tcp_node_detached = true;
+               }
        }
        spin_unlock_irq(&rds_tcp_conn_lock);
        list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
index 1aafbf7c30114155daec203fe8e0980249853ae9..e7858ee8ed8b7c15f1017f2e99efbb6fc3bf7ffb 100644 (file)
@@ -12,6 +12,7 @@ struct rds_tcp_incoming {
 struct rds_tcp_connection {
 
        struct list_head        t_tcp_node;
+       bool                    t_tcp_node_detached;
        struct rds_conn_path    *t_cpath;
        /* t_conn_path_lock synchronizes the connection establishment between
         * rds_tcp_accept_one and rds_tcp_conn_path_connect