113678d21210d41b3d178afed9b93783f73ad760
[openwrt/openwrt.git] /
1 From 1da05ad0bbc51cd226a2297e66b3cc8499803306 Mon Sep 17 00:00:00 2001
2 From: "Jason A. Donenfeld" <Jason@zx2c4.com>
3 Date: Tue, 4 Feb 2020 22:17:26 +0100
4 Subject: [PATCH 083/124] wireguard: noise: reject peers with low order public
5 keys
6
7 commit ec31c2676a10e064878927b243fada8c2fb0c03c upstream.
8
9 Our static-static calculation returns a failure if the public key is of
10 low order. We check for this when peers are added, and don't allow them
11 to be added if they're low order, except in the case where we haven't
12 yet been given a private key. In that case, we would defer the removal
13 of the peer until we're given a private key, since at that point we're
14 doing new static-static calculations which incur failures we can act on.
15 This meant, however, that we wound up removing peers rather late in the
16 configuration flow.
17
18 Syzkaller points out that peer_remove calls flush_workqueue, which in
19 turn might then wait for sending a handshake initiation to complete.
20 Since handshake initiation needs the static identity lock, holding the
21 static identity lock while calling peer_remove can result in a rare
22 deadlock. We have precisely this case in this situation of late-stage
23 peer removal based on an invalid public key. We can't drop the lock when
24 removing, because then incoming handshakes might interact with a bogus
25 static-static calculation.
26
27 While the band-aid patch for this would involve breaking up the peer
28 removal into two steps like wg_peer_remove_all does, in order to solve
29 the locking issue, there's actually a much more elegant way of fixing
30 this:
31
32 If the static-static calculation succeeds with one private key, it
33 *must* succeed with all others, because all 32-byte strings map to valid
34 private keys, thanks to clamping. That means we can get rid of this
35 silly dance and locking headaches of removing peers late in the
36 configuration flow, and instead just reject them early on, regardless of
37 whether the device has yet been assigned a private key. For the case
38 where the device doesn't yet have a private key, we safely use zeros
39 just for the purposes of checking for low order points by way of
40 checking the output of the calculation.
41
42 The following PoC will trigger the deadlock:
43
44 ip link add wg0 type wireguard
45 ip addr add 10.0.0.1/24 dev wg0
46 ip link set wg0 up
47 ping -f 10.0.0.2 &
48 while true; do
49 wg set wg0 private-key /dev/null peer AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= allowed-ips 10.0.0.0/24 endpoint 10.0.0.3:1234
50 wg set wg0 private-key <(echo AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=)
51 done
52
53 [ 0.949105] ======================================================
54 [ 0.949550] WARNING: possible circular locking dependency detected
55 [ 0.950143] 5.5.0-debug+ #18 Not tainted
56 [ 0.950431] ------------------------------------------------------
57 [ 0.950959] wg/89 is trying to acquire lock:
58 [ 0.951252] ffff8880333e2128 ((wq_completion)wg-kex-wg0){+.+.}, at: flush_workqueue+0xe3/0x12f0
59 [ 0.951865]
60 [ 0.951865] but task is already holding lock:
61 [ 0.952280] ffff888032819bc0 (&wg->static_identity.lock){++++}, at: wg_set_device+0x95d/0xcc0
62 [ 0.953011]
63 [ 0.953011] which lock already depends on the new lock.
64 [ 0.953011]
65 [ 0.953651]
66 [ 0.953651] the existing dependency chain (in reverse order) is:
67 [ 0.954292]
68 [ 0.954292] -> #2 (&wg->static_identity.lock){++++}:
69 [ 0.954804] lock_acquire+0x127/0x350
70 [ 0.955133] down_read+0x83/0x410
71 [ 0.955428] wg_noise_handshake_create_initiation+0x97/0x700
72 [ 0.955885] wg_packet_send_handshake_initiation+0x13a/0x280
73 [ 0.956401] wg_packet_handshake_send_worker+0x10/0x20
74 [ 0.956841] process_one_work+0x806/0x1500
75 [ 0.957167] worker_thread+0x8c/0xcb0
76 [ 0.957549] kthread+0x2ee/0x3b0
77 [ 0.957792] ret_from_fork+0x24/0x30
78 [ 0.958234]
79 [ 0.958234] -> #1 ((work_completion)(&peer->transmit_handshake_work)){+.+.}:
80 [ 0.958808] lock_acquire+0x127/0x350
81 [ 0.959075] process_one_work+0x7ab/0x1500
82 [ 0.959369] worker_thread+0x8c/0xcb0
83 [ 0.959639] kthread+0x2ee/0x3b0
84 [ 0.959896] ret_from_fork+0x24/0x30
85 [ 0.960346]
86 [ 0.960346] -> #0 ((wq_completion)wg-kex-wg0){+.+.}:
87 [ 0.960945] check_prev_add+0x167/0x1e20
88 [ 0.961351] __lock_acquire+0x2012/0x3170
89 [ 0.961725] lock_acquire+0x127/0x350
90 [ 0.961990] flush_workqueue+0x106/0x12f0
91 [ 0.962280] peer_remove_after_dead+0x160/0x220
92 [ 0.962600] wg_set_device+0xa24/0xcc0
93 [ 0.962994] genl_rcv_msg+0x52f/0xe90
94 [ 0.963298] netlink_rcv_skb+0x111/0x320
95 [ 0.963618] genl_rcv+0x1f/0x30
96 [ 0.963853] netlink_unicast+0x3f6/0x610
97 [ 0.964245] netlink_sendmsg+0x700/0xb80
98 [ 0.964586] __sys_sendto+0x1dd/0x2c0
99 [ 0.964854] __x64_sys_sendto+0xd8/0x1b0
100 [ 0.965141] do_syscall_64+0x90/0xd9a
101 [ 0.965408] entry_SYSCALL_64_after_hwframe+0x49/0xbe
102 [ 0.965769]
103 [ 0.965769] other info that might help us debug this:
104 [ 0.965769]
105 [ 0.966337] Chain exists of:
106 [ 0.966337] (wq_completion)wg-kex-wg0 --> (work_completion)(&peer->transmit_handshake_work) --> &wg->static_identity.lock
107 [ 0.966337]
108 [ 0.967417] Possible unsafe locking scenario:
109 [ 0.967417]
110 [ 0.967836] CPU0 CPU1
111 [ 0.968155] ---- ----
112 [ 0.968497] lock(&wg->static_identity.lock);
113 [ 0.968779] lock((work_completion)(&peer->transmit_handshake_work));
114 [ 0.969345] lock(&wg->static_identity.lock);
115 [ 0.969809] lock((wq_completion)wg-kex-wg0);
116 [ 0.970146]
117 [ 0.970146] *** DEADLOCK ***
118 [ 0.970146]
119 [ 0.970531] 5 locks held by wg/89:
120 [ 0.970908] #0: ffffffff827433c8 (cb_lock){++++}, at: genl_rcv+0x10/0x30
121 [ 0.971400] #1: ffffffff82743480 (genl_mutex){+.+.}, at: genl_rcv_msg+0x642/0xe90
122 [ 0.971924] #2: ffffffff827160c0 (rtnl_mutex){+.+.}, at: wg_set_device+0x9f/0xcc0
123 [ 0.972488] #3: ffff888032819de0 (&wg->device_update_lock){+.+.}, at: wg_set_device+0xb0/0xcc0
124 [ 0.973095] #4: ffff888032819bc0 (&wg->static_identity.lock){++++}, at: wg_set_device+0x95d/0xcc0
125 [ 0.973653]
126 [ 0.973653] stack backtrace:
127 [ 0.973932] CPU: 1 PID: 89 Comm: wg Not tainted 5.5.0-debug+ #18
128 [ 0.974476] Call Trace:
129 [ 0.974638] dump_stack+0x97/0xe0
130 [ 0.974869] check_noncircular+0x312/0x3e0
131 [ 0.975132] ? print_circular_bug+0x1f0/0x1f0
132 [ 0.975410] ? __kernel_text_address+0x9/0x30
133 [ 0.975727] ? unwind_get_return_address+0x51/0x90
134 [ 0.976024] check_prev_add+0x167/0x1e20
135 [ 0.976367] ? graph_lock+0x70/0x160
136 [ 0.976682] __lock_acquire+0x2012/0x3170
137 [ 0.976998] ? register_lock_class+0x1140/0x1140
138 [ 0.977323] lock_acquire+0x127/0x350
139 [ 0.977627] ? flush_workqueue+0xe3/0x12f0
140 [ 0.977890] flush_workqueue+0x106/0x12f0
141 [ 0.978147] ? flush_workqueue+0xe3/0x12f0
142 [ 0.978410] ? find_held_lock+0x2c/0x110
143 [ 0.978662] ? lock_downgrade+0x6e0/0x6e0
144 [ 0.978919] ? queue_rcu_work+0x60/0x60
145 [ 0.979166] ? netif_napi_del+0x151/0x3b0
146 [ 0.979501] ? peer_remove_after_dead+0x160/0x220
147 [ 0.979871] peer_remove_after_dead+0x160/0x220
148 [ 0.980232] wg_set_device+0xa24/0xcc0
149 [ 0.980516] ? deref_stack_reg+0x8e/0xc0
150 [ 0.980801] ? set_peer+0xe10/0xe10
151 [ 0.981040] ? __ww_mutex_check_waiters+0x150/0x150
152 [ 0.981430] ? __nla_validate_parse+0x163/0x270
153 [ 0.981719] ? genl_family_rcv_msg_attrs_parse+0x13f/0x310
154 [ 0.982078] genl_rcv_msg+0x52f/0xe90
155 [ 0.982348] ? genl_family_rcv_msg_attrs_parse+0x310/0x310
156 [ 0.982690] ? register_lock_class+0x1140/0x1140
157 [ 0.983049] netlink_rcv_skb+0x111/0x320
158 [ 0.983298] ? genl_family_rcv_msg_attrs_parse+0x310/0x310
159 [ 0.983645] ? netlink_ack+0x880/0x880
160 [ 0.983888] genl_rcv+0x1f/0x30
161 [ 0.984168] netlink_unicast+0x3f6/0x610
162 [ 0.984443] ? netlink_detachskb+0x60/0x60
163 [ 0.984729] ? find_held_lock+0x2c/0x110
164 [ 0.984976] netlink_sendmsg+0x700/0xb80
165 [ 0.985220] ? netlink_broadcast_filtered+0xa60/0xa60
166 [ 0.985533] __sys_sendto+0x1dd/0x2c0
167 [ 0.985763] ? __x64_sys_getpeername+0xb0/0xb0
168 [ 0.986039] ? sockfd_lookup_light+0x17/0x160
169 [ 0.986397] ? __sys_recvmsg+0x8c/0xf0
170 [ 0.986711] ? __sys_recvmsg_sock+0xd0/0xd0
171 [ 0.987018] __x64_sys_sendto+0xd8/0x1b0
172 [ 0.987283] ? lockdep_hardirqs_on+0x39b/0x5a0
173 [ 0.987666] do_syscall_64+0x90/0xd9a
174 [ 0.987903] entry_SYSCALL_64_after_hwframe+0x49/0xbe
175 [ 0.988223] RIP: 0033:0x7fe77c12003e
176 [ 0.988508] Code: c3 8b 07 85 c0 75 24 49 89 fb 48 89 f0 48 89 d7 48 89 ce 4c 89 c2 4d 89 ca 4c 8b 44 24 08 4c 8b 4c 24 10 4c 4
177 [ 0.989666] RSP: 002b:00007fffada2ed58 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
178 [ 0.990137] RAX: ffffffffffffffda RBX: 00007fe77c159d48 RCX: 00007fe77c12003e
179 [ 0.990583] RDX: 0000000000000040 RSI: 000055fd1d38e020 RDI: 0000000000000004
180 [ 0.991091] RBP: 000055fd1d38e020 R08: 000055fd1cb63358 R09: 000000000000000c
181 [ 0.991568] R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000002c
182 [ 0.992014] R13: 0000000000000004 R14: 000055fd1d38e020 R15: 0000000000000001
183
184 Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
185 Reported-by: syzbot <syzkaller@googlegroups.com>
186 Signed-off-by: David S. Miller <davem@davemloft.net>
187 Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
188 ---
189 drivers/net/wireguard/netlink.c | 6 ++----
190 drivers/net/wireguard/noise.c | 10 +++++++---
191 2 files changed, 9 insertions(+), 7 deletions(-)
192
193 --- a/drivers/net/wireguard/netlink.c
194 +++ b/drivers/net/wireguard/netlink.c
195 @@ -575,10 +575,8 @@ static int wg_set_device(struct sk_buff
196 private_key);
197 list_for_each_entry_safe(peer, temp, &wg->peer_list,
198 peer_list) {
199 - if (wg_noise_precompute_static_static(peer))
200 - wg_noise_expire_current_peer_keypairs(peer);
201 - else
202 - wg_peer_remove(peer);
203 + BUG_ON(!wg_noise_precompute_static_static(peer));
204 + wg_noise_expire_current_peer_keypairs(peer);
205 }
206 wg_cookie_checker_precompute_device_keys(&wg->cookie_checker);
207 up_write(&wg->static_identity.lock);
208 --- a/drivers/net/wireguard/noise.c
209 +++ b/drivers/net/wireguard/noise.c
210 @@ -46,17 +46,21 @@ void __init wg_noise_init(void)
211 /* Must hold peer->handshake.static_identity->lock */
212 bool wg_noise_precompute_static_static(struct wg_peer *peer)
213 {
214 - bool ret = true;
215 + bool ret;
216
217 down_write(&peer->handshake.lock);
218 - if (peer->handshake.static_identity->has_identity)
219 + if (peer->handshake.static_identity->has_identity) {
220 ret = curve25519(
221 peer->handshake.precomputed_static_static,
222 peer->handshake.static_identity->static_private,
223 peer->handshake.remote_static);
224 - else
225 + } else {
226 + u8 empty[NOISE_PUBLIC_KEY_LEN] = { 0 };
227 +
228 + ret = curve25519(empty, empty, peer->handshake.remote_static);
229 memset(peer->handshake.precomputed_static_static, 0,
230 NOISE_PUBLIC_KEY_LEN);
231 + }
232 up_write(&peer->handshake.lock);
233 return ret;
234 }