55bb276118bbd6897765ac40815280d7167017f7
[openwrt/staging/nbd.git] /
1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: "Jason A. Donenfeld" <Jason@zx2c4.com>
3 Date: Tue, 4 Feb 2020 22:17:26 +0100
4 Subject: [PATCH] wireguard: noise: reject peers with low order public keys
5
6 commit ec31c2676a10e064878927b243fada8c2fb0c03c upstream.
7
8 Our static-static calculation returns a failure if the public key is of
9 low order. We check for this when peers are added, and don't allow them
10 to be added if they're low order, except in the case where we haven't
11 yet been given a private key. In that case, we would defer the removal
12 of the peer until we're given a private key, since at that point we're
13 doing new static-static calculations which incur failures we can act on.
14 This meant, however, that we wound up removing peers rather late in the
15 configuration flow.
16
17 Syzkaller points out that peer_remove calls flush_workqueue, which in
18 turn might then wait for sending a handshake initiation to complete.
19 Since handshake initiation needs the static identity lock, holding the
20 static identity lock while calling peer_remove can result in a rare
21 deadlock. We have precisely this case in this situation of late-stage
22 peer removal based on an invalid public key. We can't drop the lock when
23 removing, because then incoming handshakes might interact with a bogus
24 static-static calculation.
25
26 While the band-aid patch for this would involve breaking up the peer
27 removal into two steps like wg_peer_remove_all does, in order to solve
28 the locking issue, there's actually a much more elegant way of fixing
29 this:
30
31 If the static-static calculation succeeds with one private key, it
32 *must* succeed with all others, because all 32-byte strings map to valid
33 private keys, thanks to clamping. That means we can get rid of this
34 silly dance and locking headaches of removing peers late in the
35 configuration flow, and instead just reject them early on, regardless of
36 whether the device has yet been assigned a private key. For the case
37 where the device doesn't yet have a private key, we safely use zeros
38 just for the purposes of checking for low order points by way of
39 checking the output of the calculation.
40
41 The following PoC will trigger the deadlock:
42
43 ip link add wg0 type wireguard
44 ip addr add 10.0.0.1/24 dev wg0
45 ip link set wg0 up
46 ping -f 10.0.0.2 &
47 while true; do
48 wg set wg0 private-key /dev/null peer AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= allowed-ips 10.0.0.0/24 endpoint 10.0.0.3:1234
49 wg set wg0 private-key <(echo AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=)
50 done
51
52 [ 0.949105] ======================================================
53 [ 0.949550] WARNING: possible circular locking dependency detected
54 [ 0.950143] 5.5.0-debug+ #18 Not tainted
55 [ 0.950431] ------------------------------------------------------
56 [ 0.950959] wg/89 is trying to acquire lock:
57 [ 0.951252] ffff8880333e2128 ((wq_completion)wg-kex-wg0){+.+.}, at: flush_workqueue+0xe3/0x12f0
58 [ 0.951865]
59 [ 0.951865] but task is already holding lock:
60 [ 0.952280] ffff888032819bc0 (&wg->static_identity.lock){++++}, at: wg_set_device+0x95d/0xcc0
61 [ 0.953011]
62 [ 0.953011] which lock already depends on the new lock.
63 [ 0.953011]
64 [ 0.953651]
65 [ 0.953651] the existing dependency chain (in reverse order) is:
66 [ 0.954292]
67 [ 0.954292] -> #2 (&wg->static_identity.lock){++++}:
68 [ 0.954804] lock_acquire+0x127/0x350
69 [ 0.955133] down_read+0x83/0x410
70 [ 0.955428] wg_noise_handshake_create_initiation+0x97/0x700
71 [ 0.955885] wg_packet_send_handshake_initiation+0x13a/0x280
72 [ 0.956401] wg_packet_handshake_send_worker+0x10/0x20
73 [ 0.956841] process_one_work+0x806/0x1500
74 [ 0.957167] worker_thread+0x8c/0xcb0
75 [ 0.957549] kthread+0x2ee/0x3b0
76 [ 0.957792] ret_from_fork+0x24/0x30
77 [ 0.958234]
78 [ 0.958234] -> #1 ((work_completion)(&peer->transmit_handshake_work)){+.+.}:
79 [ 0.958808] lock_acquire+0x127/0x350
80 [ 0.959075] process_one_work+0x7ab/0x1500
81 [ 0.959369] worker_thread+0x8c/0xcb0
82 [ 0.959639] kthread+0x2ee/0x3b0
83 [ 0.959896] ret_from_fork+0x24/0x30
84 [ 0.960346]
85 [ 0.960346] -> #0 ((wq_completion)wg-kex-wg0){+.+.}:
86 [ 0.960945] check_prev_add+0x167/0x1e20
87 [ 0.961351] __lock_acquire+0x2012/0x3170
88 [ 0.961725] lock_acquire+0x127/0x350
89 [ 0.961990] flush_workqueue+0x106/0x12f0
90 [ 0.962280] peer_remove_after_dead+0x160/0x220
91 [ 0.962600] wg_set_device+0xa24/0xcc0
92 [ 0.962994] genl_rcv_msg+0x52f/0xe90
93 [ 0.963298] netlink_rcv_skb+0x111/0x320
94 [ 0.963618] genl_rcv+0x1f/0x30
95 [ 0.963853] netlink_unicast+0x3f6/0x610
96 [ 0.964245] netlink_sendmsg+0x700/0xb80
97 [ 0.964586] __sys_sendto+0x1dd/0x2c0
98 [ 0.964854] __x64_sys_sendto+0xd8/0x1b0
99 [ 0.965141] do_syscall_64+0x90/0xd9a
100 [ 0.965408] entry_SYSCALL_64_after_hwframe+0x49/0xbe
101 [ 0.965769]
102 [ 0.965769] other info that might help us debug this:
103 [ 0.965769]
104 [ 0.966337] Chain exists of:
105 [ 0.966337] (wq_completion)wg-kex-wg0 --> (work_completion)(&peer->transmit_handshake_work) --> &wg->static_identity.lock
106 [ 0.966337]
107 [ 0.967417] Possible unsafe locking scenario:
108 [ 0.967417]
109 [ 0.967836] CPU0 CPU1
110 [ 0.968155] ---- ----
111 [ 0.968497] lock(&wg->static_identity.lock);
112 [ 0.968779] lock((work_completion)(&peer->transmit_handshake_work));
113 [ 0.969345] lock(&wg->static_identity.lock);
114 [ 0.969809] lock((wq_completion)wg-kex-wg0);
115 [ 0.970146]
116 [ 0.970146] *** DEADLOCK ***
117 [ 0.970146]
118 [ 0.970531] 5 locks held by wg/89:
119 [ 0.970908] #0: ffffffff827433c8 (cb_lock){++++}, at: genl_rcv+0x10/0x30
120 [ 0.971400] #1: ffffffff82743480 (genl_mutex){+.+.}, at: genl_rcv_msg+0x642/0xe90
121 [ 0.971924] #2: ffffffff827160c0 (rtnl_mutex){+.+.}, at: wg_set_device+0x9f/0xcc0
122 [ 0.972488] #3: ffff888032819de0 (&wg->device_update_lock){+.+.}, at: wg_set_device+0xb0/0xcc0
123 [ 0.973095] #4: ffff888032819bc0 (&wg->static_identity.lock){++++}, at: wg_set_device+0x95d/0xcc0
124 [ 0.973653]
125 [ 0.973653] stack backtrace:
126 [ 0.973932] CPU: 1 PID: 89 Comm: wg Not tainted 5.5.0-debug+ #18
127 [ 0.974476] Call Trace:
128 [ 0.974638] dump_stack+0x97/0xe0
129 [ 0.974869] check_noncircular+0x312/0x3e0
130 [ 0.975132] ? print_circular_bug+0x1f0/0x1f0
131 [ 0.975410] ? __kernel_text_address+0x9/0x30
132 [ 0.975727] ? unwind_get_return_address+0x51/0x90
133 [ 0.976024] check_prev_add+0x167/0x1e20
134 [ 0.976367] ? graph_lock+0x70/0x160
135 [ 0.976682] __lock_acquire+0x2012/0x3170
136 [ 0.976998] ? register_lock_class+0x1140/0x1140
137 [ 0.977323] lock_acquire+0x127/0x350
138 [ 0.977627] ? flush_workqueue+0xe3/0x12f0
139 [ 0.977890] flush_workqueue+0x106/0x12f0
140 [ 0.978147] ? flush_workqueue+0xe3/0x12f0
141 [ 0.978410] ? find_held_lock+0x2c/0x110
142 [ 0.978662] ? lock_downgrade+0x6e0/0x6e0
143 [ 0.978919] ? queue_rcu_work+0x60/0x60
144 [ 0.979166] ? netif_napi_del+0x151/0x3b0
145 [ 0.979501] ? peer_remove_after_dead+0x160/0x220
146 [ 0.979871] peer_remove_after_dead+0x160/0x220
147 [ 0.980232] wg_set_device+0xa24/0xcc0
148 [ 0.980516] ? deref_stack_reg+0x8e/0xc0
149 [ 0.980801] ? set_peer+0xe10/0xe10
150 [ 0.981040] ? __ww_mutex_check_waiters+0x150/0x150
151 [ 0.981430] ? __nla_validate_parse+0x163/0x270
152 [ 0.981719] ? genl_family_rcv_msg_attrs_parse+0x13f/0x310
153 [ 0.982078] genl_rcv_msg+0x52f/0xe90
154 [ 0.982348] ? genl_family_rcv_msg_attrs_parse+0x310/0x310
155 [ 0.982690] ? register_lock_class+0x1140/0x1140
156 [ 0.983049] netlink_rcv_skb+0x111/0x320
157 [ 0.983298] ? genl_family_rcv_msg_attrs_parse+0x310/0x310
158 [ 0.983645] ? netlink_ack+0x880/0x880
159 [ 0.983888] genl_rcv+0x1f/0x30
160 [ 0.984168] netlink_unicast+0x3f6/0x610
161 [ 0.984443] ? netlink_detachskb+0x60/0x60
162 [ 0.984729] ? find_held_lock+0x2c/0x110
163 [ 0.984976] netlink_sendmsg+0x700/0xb80
164 [ 0.985220] ? netlink_broadcast_filtered+0xa60/0xa60
165 [ 0.985533] __sys_sendto+0x1dd/0x2c0
166 [ 0.985763] ? __x64_sys_getpeername+0xb0/0xb0
167 [ 0.986039] ? sockfd_lookup_light+0x17/0x160
168 [ 0.986397] ? __sys_recvmsg+0x8c/0xf0
169 [ 0.986711] ? __sys_recvmsg_sock+0xd0/0xd0
170 [ 0.987018] __x64_sys_sendto+0xd8/0x1b0
171 [ 0.987283] ? lockdep_hardirqs_on+0x39b/0x5a0
172 [ 0.987666] do_syscall_64+0x90/0xd9a
173 [ 0.987903] entry_SYSCALL_64_after_hwframe+0x49/0xbe
174 [ 0.988223] RIP: 0033:0x7fe77c12003e
175 [ 0.988508] Code: c3 8b 07 85 c0 75 24 49 89 fb 48 89 f0 48 89 d7 48 89 ce 4c 89 c2 4d 89 ca 4c 8b 44 24 08 4c 8b 4c 24 10 4c 4
176 [ 0.989666] RSP: 002b:00007fffada2ed58 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
177 [ 0.990137] RAX: ffffffffffffffda RBX: 00007fe77c159d48 RCX: 00007fe77c12003e
178 [ 0.990583] RDX: 0000000000000040 RSI: 000055fd1d38e020 RDI: 0000000000000004
179 [ 0.991091] RBP: 000055fd1d38e020 R08: 000055fd1cb63358 R09: 000000000000000c
180 [ 0.991568] R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000002c
181 [ 0.992014] R13: 0000000000000004 R14: 000055fd1d38e020 R15: 0000000000000001
182
183 Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
184 Reported-by: syzbot <syzkaller@googlegroups.com>
185 Signed-off-by: David S. Miller <davem@davemloft.net>
186 Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
187 ---
188 drivers/net/wireguard/netlink.c | 6 ++----
189 drivers/net/wireguard/noise.c | 10 +++++++---
190 2 files changed, 9 insertions(+), 7 deletions(-)
191
192 --- a/drivers/net/wireguard/netlink.c
193 +++ b/drivers/net/wireguard/netlink.c
194 @@ -575,10 +575,8 @@ static int wg_set_device(struct sk_buff
195 private_key);
196 list_for_each_entry_safe(peer, temp, &wg->peer_list,
197 peer_list) {
198 - if (wg_noise_precompute_static_static(peer))
199 - wg_noise_expire_current_peer_keypairs(peer);
200 - else
201 - wg_peer_remove(peer);
202 + BUG_ON(!wg_noise_precompute_static_static(peer));
203 + wg_noise_expire_current_peer_keypairs(peer);
204 }
205 wg_cookie_checker_precompute_device_keys(&wg->cookie_checker);
206 up_write(&wg->static_identity.lock);
207 --- a/drivers/net/wireguard/noise.c
208 +++ b/drivers/net/wireguard/noise.c
209 @@ -46,17 +46,21 @@ void __init wg_noise_init(void)
210 /* Must hold peer->handshake.static_identity->lock */
211 bool wg_noise_precompute_static_static(struct wg_peer *peer)
212 {
213 - bool ret = true;
214 + bool ret;
215
216 down_write(&peer->handshake.lock);
217 - if (peer->handshake.static_identity->has_identity)
218 + if (peer->handshake.static_identity->has_identity) {
219 ret = curve25519(
220 peer->handshake.precomputed_static_static,
221 peer->handshake.static_identity->static_private,
222 peer->handshake.remote_static);
223 - else
224 + } else {
225 + u8 empty[NOISE_PUBLIC_KEY_LEN] = { 0 };
226 +
227 + ret = curve25519(empty, empty, peer->handshake.remote_static);
228 memset(peer->handshake.precomputed_static_static, 0,
229 NOISE_PUBLIC_KEY_LEN);
230 + }
231 up_write(&peer->handshake.lock);
232 return ret;
233 }