febef3e28a66fb0aa34b712901cc804f2780dc40
[openwrt/staging/xback.git] /
1 From: Wei Wang <weiwan@google.com>
2 Date: Mon, 1 Mar 2021 17:21:13 -0800
3 Subject: [PATCH] net: fix race between napi kthread mode and busy poll
4
5 Currently, napi_thread_wait() checks for NAPI_STATE_SCHED bit to
6 determine if the kthread owns this napi and could call napi->poll() on
7 it. However, if socket busy poll is enabled, it is possible that the
8 busy poll thread grabs this SCHED bit (after the previous napi->poll()
9 invokes napi_complete_done() and clears SCHED bit) and tries to poll
10 on the same napi. napi_disable() could grab the SCHED bit as well.
11 This patch tries to fix this race by adding a new bit
12 NAPI_STATE_SCHED_THREADED in napi->state. This bit gets set in
13 ____napi_schedule() if the threaded mode is enabled, and gets cleared
14 in napi_complete_done(), and we only poll the napi in kthread if this
15 bit is set. This helps distinguish the ownership of the napi between
16 kthread and other scenarios and fixes the race issue.
17
18 Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support")
19 Reported-by: Martin Zaharinov <micron10@gmail.com>
20 Suggested-by: Jakub Kicinski <kuba@kernel.org>
21 Signed-off-by: Wei Wang <weiwan@google.com>
22 Cc: Alexander Duyck <alexanderduyck@fb.com>
23 Cc: Eric Dumazet <edumazet@google.com>
24 Cc: Paolo Abeni <pabeni@redhat.com>
25 Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
26 ---
27
28 --- a/include/linux/netdevice.h
29 +++ b/include/linux/netdevice.h
30 @@ -359,6 +359,7 @@ enum {
31 NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
32 NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
33 NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
34 + NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */
35 };
36
37 enum {
38 @@ -370,6 +371,7 @@ enum {
39 NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
40 NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
41 NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
42 + NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED),
43 };
44
45 enum gro_result {
46 --- a/net/core/dev.c
47 +++ b/net/core/dev.c
48 @@ -4300,6 +4300,8 @@ static inline void ____napi_schedule(str
49 */
50 thread = READ_ONCE(napi->thread);
51 if (thread) {
52 + if (thread->state != TASK_INTERRUPTIBLE)
53 + set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
54 wake_up_process(thread);
55 return;
56 }
57 @@ -6560,7 +6562,8 @@ bool napi_complete_done(struct napi_stru
58
59 WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
60
61 - new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED);
62 + new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
63 + NAPIF_STATE_SCHED_THREADED);
64
65 /* If STATE_MISSED was set, leave STATE_SCHED set,
66 * because we will call napi->poll() one more time.
67 @@ -6996,16 +6999,25 @@ static int napi_poll(struct napi_struct
68
69 static int napi_thread_wait(struct napi_struct *napi)
70 {
71 + bool woken = false;
72 +
73 set_current_state(TASK_INTERRUPTIBLE);
74
75 while (!kthread_should_stop() && !napi_disable_pending(napi)) {
76 - if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
77 + /* Testing SCHED_THREADED bit here to make sure the current
78 + * kthread owns this napi and could poll on this napi.
79 + * Testing SCHED bit is not enough because SCHED bit might be
80 + * set by some other busy poll thread or by napi_disable().
81 + */
82 + if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) {
83 WARN_ON(!list_empty(&napi->poll_list));
84 __set_current_state(TASK_RUNNING);
85 return 0;
86 }
87
88 schedule();
89 + /* woken being true indicates this thread owns this napi. */
90 + woken = true;
91 set_current_state(TASK_INTERRUPTIBLE);
92 }
93 __set_current_state(TASK_RUNNING);