xref: /openbsd-src/sys/net/if_wg.c (revision c1a45aed656e7d5627c30c92421893a76f370ccb)
1 /*	$OpenBSD: if_wg.c,v 1.24 2022/03/17 18:51:56 tb Exp $ */
2 
3 /*
4  * Copyright (C) 2015-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
5  * Copyright (C) 2019-2020 Matt Dunwoodie <ncon@noconroy.net>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include "bpfilter.h"
21 #include "pf.h"
22 
23 #include <sys/types.h>
24 #include <sys/systm.h>
25 #include <sys/param.h>
26 #include <sys/pool.h>
27 
28 #include <sys/socket.h>
29 #include <sys/socketvar.h>
30 #include <sys/percpu.h>
31 #include <sys/ioctl.h>
32 #include <sys/mbuf.h>
33 
34 #include <net/if.h>
35 #include <net/if_var.h>
36 #include <net/if_types.h>
37 #include <net/if_wg.h>
38 
39 #include <net/wg_noise.h>
40 #include <net/wg_cookie.h>
41 
42 #include <net/pfvar.h>
43 #include <net/route.h>
44 #include <net/bpf.h>
45 
46 #include <netinet/ip.h>
47 #include <netinet/ip6.h>
48 #include <netinet/udp.h>
49 #include <netinet/in_pcb.h>
50 
51 #include <crypto/siphash.h>
52 
53 #define DEFAULT_MTU		1420
54 
55 #define MAX_STAGED_PKT		128
56 #define MAX_QUEUED_PKT		1024
57 #define MAX_QUEUED_PKT_MASK	(MAX_QUEUED_PKT - 1)
58 
59 #define MAX_QUEUED_HANDSHAKES	4096
60 
61 #define HASHTABLE_PEER_SIZE	(1 << 11)
62 #define HASHTABLE_INDEX_SIZE	(1 << 13)
63 #define MAX_PEERS_PER_IFACE	(1 << 20)
64 
65 #define REKEY_TIMEOUT		5
66 #define REKEY_TIMEOUT_JITTER	334 /* 1/3 sec, round for arc4random_uniform */
67 #define KEEPALIVE_TIMEOUT	10
68 #define MAX_TIMER_HANDSHAKES	(90 / REKEY_TIMEOUT)
69 #define NEW_HANDSHAKE_TIMEOUT	(REKEY_TIMEOUT + KEEPALIVE_TIMEOUT)
70 #define UNDERLOAD_TIMEOUT	1
71 
72 #define DPRINTF(sc, str, ...) do { if (ISSET((sc)->sc_if.if_flags, IFF_DEBUG))\
73     printf("%s: " str, (sc)->sc_if.if_xname, ##__VA_ARGS__); } while (0)
74 
75 #define CONTAINER_OF(ptr, type, member) ({			\
76 	const __typeof( ((type *)0)->member ) *__mptr = (ptr);	\
77 	(type *)( (char *)__mptr - offsetof(type,member) );})
78 
79 /* First byte indicating packet type on the wire */
80 #define WG_PKT_INITIATION htole32(1)
81 #define WG_PKT_RESPONSE htole32(2)
82 #define WG_PKT_COOKIE htole32(3)
83 #define WG_PKT_DATA htole32(4)
84 
85 #define WG_PKT_WITH_PADDING(n)	(((n) + (16-1)) & (~(16-1)))
86 #define WG_KEY_SIZE		WG_KEY_LEN
87 
88 struct wg_pkt_initiation {
89 	uint32_t		t;
90 	uint32_t		s_idx;
91 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
92 	uint8_t			es[NOISE_PUBLIC_KEY_LEN + NOISE_AUTHTAG_LEN];
93 	uint8_t			ets[NOISE_TIMESTAMP_LEN + NOISE_AUTHTAG_LEN];
94 	struct cookie_macs	m;
95 };
96 
97 struct wg_pkt_response {
98 	uint32_t		t;
99 	uint32_t		s_idx;
100 	uint32_t		r_idx;
101 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
102 	uint8_t			en[0 + NOISE_AUTHTAG_LEN];
103 	struct cookie_macs	m;
104 };
105 
106 struct wg_pkt_cookie {
107 	uint32_t		t;
108 	uint32_t		r_idx;
109 	uint8_t			nonce[COOKIE_NONCE_SIZE];
110 	uint8_t			ec[COOKIE_ENCRYPTED_SIZE];
111 };
112 
113 struct wg_pkt_data {
114 	uint32_t		t;
115 	uint32_t		r_idx;
116 	uint8_t			nonce[sizeof(uint64_t)];
117 	uint8_t			buf[];
118 };
119 
120 struct wg_endpoint {
121 	union {
122 		struct sockaddr		r_sa;
123 		struct sockaddr_in	r_sin;
124 #ifdef INET6
125 		struct sockaddr_in6	r_sin6;
126 #endif
127 	} e_remote;
128 	union {
129 		struct in_addr		l_in;
130 #ifdef INET6
131 		struct in6_pktinfo	l_pktinfo6;
132 #define l_in6 l_pktinfo6.ipi6_addr
133 #endif
134 	} e_local;
135 };
136 
137 struct wg_tag {
138 	struct wg_endpoint	 t_endpoint;
139 	struct wg_peer		*t_peer;
140 	struct mbuf		*t_mbuf;
141 	int			 t_done;
142 	int			 t_mtu;
143 };
144 
145 struct wg_index {
146 	LIST_ENTRY(wg_index)	 i_entry;
147 	SLIST_ENTRY(wg_index)	 i_unused_entry;
148 	uint32_t		 i_key;
149 	struct noise_remote	*i_value;
150 };
151 
152 struct wg_timers {
153 	/* t_lock is for blocking wg_timers_event_* when setting t_disabled. */
154 	struct rwlock		 t_lock;
155 
156 	int			 t_disabled;
157 	int			 t_need_another_keepalive;
158 	uint16_t		 t_persistent_keepalive_interval;
159 	struct timeout		 t_new_handshake;
160 	struct timeout		 t_send_keepalive;
161 	struct timeout		 t_retry_handshake;
162 	struct timeout		 t_zero_key_material;
163 	struct timeout		 t_persistent_keepalive;
164 
165 	struct mutex		 t_handshake_mtx;
166 	struct timespec		 t_handshake_last_sent;	/* nanouptime */
167 	struct timespec		 t_handshake_complete;	/* nanotime */
168 	int			 t_handshake_retries;
169 };
170 
171 struct wg_aip {
172 	struct art_node		 a_node;
173 	LIST_ENTRY(wg_aip)	 a_entry;
174 	struct wg_peer		*a_peer;
175 	struct wg_aip_io	 a_data;
176 };
177 
178 struct wg_queue {
179 	struct mutex		 q_mtx;
180 	struct mbuf_list	 q_list;
181 };
182 
183 struct wg_ring {
184 	struct mutex	 r_mtx;
185 	uint32_t	 r_head;
186 	uint32_t	 r_tail;
187 	struct mbuf	*r_buf[MAX_QUEUED_PKT];
188 };
189 
190 struct wg_peer {
191 	LIST_ENTRY(wg_peer)	 p_pubkey_entry;
192 	TAILQ_ENTRY(wg_peer)	 p_seq_entry;
193 	uint64_t		 p_id;
194 	struct wg_softc		*p_sc;
195 
196 	struct noise_remote	 p_remote;
197 	struct cookie_maker	 p_cookie;
198 	struct wg_timers	 p_timers;
199 
200 	struct mutex		 p_counters_mtx;
201 	uint64_t		 p_counters_tx;
202 	uint64_t		 p_counters_rx;
203 
204 	struct mutex		 p_endpoint_mtx;
205 	struct wg_endpoint	 p_endpoint;
206 
207 	struct task		 p_send_initiation;
208 	struct task		 p_send_keepalive;
209 	struct task		 p_clear_secrets;
210 	struct task		 p_deliver_out;
211 	struct task		 p_deliver_in;
212 
213 	struct mbuf_queue	 p_stage_queue;
214 	struct wg_queue		 p_encap_queue;
215 	struct wg_queue		 p_decap_queue;
216 
217 	SLIST_HEAD(,wg_index)	 p_unused_index;
218 	struct wg_index		 p_index[3];
219 
220 	LIST_HEAD(,wg_aip)	 p_aip;
221 
222 	SLIST_ENTRY(wg_peer)	 p_start_list;
223 	int			 p_start_onlist;
224 };
225 
226 struct wg_softc {
227 	struct ifnet		 sc_if;
228 	SIPHASH_KEY		 sc_secret;
229 
230 	struct rwlock		 sc_lock;
231 	struct noise_local	 sc_local;
232 	struct cookie_checker	 sc_cookie;
233 	in_port_t		 sc_udp_port;
234 	int			 sc_udp_rtable;
235 
236 	struct rwlock		 sc_so_lock;
237 	struct socket		*sc_so4;
238 #ifdef INET6
239 	struct socket		*sc_so6;
240 #endif
241 
242 	size_t			 sc_aip_num;
243 	struct art_root		*sc_aip4;
244 #ifdef INET6
245 	struct art_root		*sc_aip6;
246 #endif
247 
248 	struct rwlock		 sc_peer_lock;
249 	size_t			 sc_peer_num;
250 	LIST_HEAD(,wg_peer)	*sc_peer;
251 	TAILQ_HEAD(,wg_peer)	 sc_peer_seq;
252 	u_long			 sc_peer_mask;
253 
254 	struct mutex		 sc_index_mtx;
255 	LIST_HEAD(,wg_index)	*sc_index;
256 	u_long			 sc_index_mask;
257 
258 	struct task		 sc_handshake;
259 	struct mbuf_queue	 sc_handshake_queue;
260 
261 	struct task		 sc_encap;
262 	struct task		 sc_decap;
263 	struct wg_ring		 sc_encap_ring;
264 	struct wg_ring		 sc_decap_ring;
265 };
266 
267 struct wg_peer *
268 	wg_peer_create(struct wg_softc *, uint8_t[WG_KEY_SIZE]);
269 struct wg_peer *
270 	wg_peer_lookup(struct wg_softc *, const uint8_t[WG_KEY_SIZE]);
271 void	wg_peer_destroy(struct wg_peer *);
272 void	wg_peer_set_endpoint_from_tag(struct wg_peer *, struct wg_tag *);
273 void	wg_peer_set_sockaddr(struct wg_peer *, struct sockaddr *);
274 int	wg_peer_get_sockaddr(struct wg_peer *, struct sockaddr *);
275 void	wg_peer_clear_src(struct wg_peer *);
276 void	wg_peer_get_endpoint(struct wg_peer *, struct wg_endpoint *);
277 void	wg_peer_counters_add(struct wg_peer *, uint64_t, uint64_t);
278 
279 int	wg_aip_add(struct wg_softc *, struct wg_peer *, struct wg_aip_io *);
280 struct wg_peer *
281 	wg_aip_lookup(struct art_root *, void *);
282 int	wg_aip_remove(struct wg_softc *, struct wg_peer *,
283 	    struct wg_aip_io *);
284 
285 int	wg_socket_open(struct socket **, int, in_port_t *, int *, void *);
286 void	wg_socket_close(struct socket **);
287 int	wg_bind(struct wg_softc *, in_port_t *, int *);
288 void	wg_unbind(struct wg_softc *);
289 int	wg_send(struct wg_softc *, struct wg_endpoint *, struct mbuf *);
290 void	wg_send_buf(struct wg_softc *, struct wg_endpoint *, uint8_t *,
291 	    size_t);
292 
293 struct wg_tag *
294 	wg_tag_get(struct mbuf *);
295 
296 void	wg_timers_init(struct wg_timers *);
297 void	wg_timers_enable(struct wg_timers *);
298 void	wg_timers_disable(struct wg_timers *);
299 void	wg_timers_set_persistent_keepalive(struct wg_timers *, uint16_t);
300 int	wg_timers_get_persistent_keepalive(struct wg_timers *, uint16_t *);
301 void	wg_timers_get_last_handshake(struct wg_timers *, struct timespec *);
302 int	wg_timers_expired_handshake_last_sent(struct wg_timers *);
303 int	wg_timers_check_handshake_last_sent(struct wg_timers *);
304 
305 void	wg_timers_event_data_sent(struct wg_timers *);
306 void	wg_timers_event_data_received(struct wg_timers *);
307 void	wg_timers_event_any_authenticated_packet_sent(struct wg_timers *);
308 void	wg_timers_event_any_authenticated_packet_received(struct wg_timers *);
309 void	wg_timers_event_handshake_initiated(struct wg_timers *);
310 void	wg_timers_event_handshake_responded(struct wg_timers *);
311 void	wg_timers_event_handshake_complete(struct wg_timers *);
312 void	wg_timers_event_session_derived(struct wg_timers *);
313 void	wg_timers_event_any_authenticated_packet_traversal(struct wg_timers *);
314 void	wg_timers_event_want_initiation(struct wg_timers *);
315 void	wg_timers_event_reset_handshake_last_sent(struct wg_timers *);
316 
317 void	wg_timers_run_send_initiation(void *, int);
318 void	wg_timers_run_retry_handshake(void *);
319 void	wg_timers_run_send_keepalive(void *);
320 void	wg_timers_run_new_handshake(void *);
321 void	wg_timers_run_zero_key_material(void *);
322 void	wg_timers_run_persistent_keepalive(void *);
323 
324 void	wg_peer_send_buf(struct wg_peer *, uint8_t *, size_t);
325 void	wg_send_initiation(void *);
326 void	wg_send_response(struct wg_peer *);
327 void	wg_send_cookie(struct wg_softc *, struct cookie_macs *, uint32_t,
328 	    struct wg_endpoint *);
329 void	wg_send_keepalive(void *);
330 void	wg_peer_clear_secrets(void *);
331 void	wg_handshake(struct wg_softc *, struct mbuf *);
332 void	wg_handshake_worker(void *);
333 
334 void	wg_encap(struct wg_softc *, struct mbuf *);
335 void	wg_decap(struct wg_softc *, struct mbuf *);
336 void	wg_encap_worker(void *);
337 void	wg_decap_worker(void *);
338 void	wg_deliver_out(void *);
339 void	wg_deliver_in(void *);
340 
341 int	wg_queue_in(struct wg_softc *, struct wg_peer *, struct mbuf *);
342 void	wg_queue_out(struct wg_softc *, struct wg_peer *);
343 struct mbuf *
344 	wg_ring_dequeue(struct wg_ring *);
345 struct mbuf *
346 	wg_queue_dequeue(struct wg_queue *, struct wg_tag **);
347 size_t	wg_queue_len(struct wg_queue *);
348 
349 struct noise_remote *
350 	wg_remote_get(void *, uint8_t[NOISE_PUBLIC_KEY_LEN]);
351 uint32_t
352 	wg_index_set(void *, struct noise_remote *);
353 struct noise_remote *
354 	wg_index_get(void *, uint32_t);
355 void	wg_index_drop(void *, uint32_t);
356 
357 struct mbuf *
358 	wg_input(void *, struct mbuf *, struct ip *, struct ip6_hdr *, void *,
359 	    int);
360 int	wg_output(struct ifnet *, struct mbuf *, struct sockaddr *,
361 	    struct rtentry *);
362 int	wg_ioctl_set(struct wg_softc *, struct wg_data_io *);
363 int	wg_ioctl_get(struct wg_softc *, struct wg_data_io *);
364 int	wg_ioctl(struct ifnet *, u_long, caddr_t);
365 int	wg_up(struct wg_softc *);
366 void	wg_down(struct wg_softc *);
367 
368 int	wg_clone_create(struct if_clone *, int);
369 int	wg_clone_destroy(struct ifnet *);
370 void	wgattach(int);
371 
372 uint64_t	peer_counter = 0;
373 uint64_t	keypair_counter = 0;
374 struct pool	wg_aip_pool;
375 struct pool	wg_peer_pool;
376 struct pool	wg_ratelimit_pool;
377 struct timeval	underload_interval = { UNDERLOAD_TIMEOUT, 0 };
378 
379 size_t		 wg_counter = 0;
380 struct taskq	*wg_handshake_taskq;
381 struct taskq	*wg_crypt_taskq;
382 
383 struct if_clone	wg_cloner =
384     IF_CLONE_INITIALIZER("wg", wg_clone_create, wg_clone_destroy);
385 
386 struct wg_peer *
387 wg_peer_create(struct wg_softc *sc, uint8_t public[WG_KEY_SIZE])
388 {
389 	struct wg_peer	*peer;
390 	uint64_t	 idx;
391 
392 	rw_assert_wrlock(&sc->sc_lock);
393 
394 	if (sc->sc_peer_num >= MAX_PEERS_PER_IFACE)
395 		return NULL;
396 
397 	if ((peer = pool_get(&wg_peer_pool, PR_NOWAIT)) == NULL)
398 		return NULL;
399 
400 	peer->p_id = peer_counter++;
401 	peer->p_sc = sc;
402 
403 	noise_remote_init(&peer->p_remote, public, &sc->sc_local);
404 	cookie_maker_init(&peer->p_cookie, public);
405 	wg_timers_init(&peer->p_timers);
406 
407 	mtx_init(&peer->p_counters_mtx, IPL_NET);
408 	peer->p_counters_tx = 0;
409 	peer->p_counters_rx = 0;
410 
411 	mtx_init(&peer->p_endpoint_mtx, IPL_NET);
412 	bzero(&peer->p_endpoint, sizeof(peer->p_endpoint));
413 
414 	task_set(&peer->p_send_initiation, wg_send_initiation, peer);
415 	task_set(&peer->p_send_keepalive, wg_send_keepalive, peer);
416 	task_set(&peer->p_clear_secrets, wg_peer_clear_secrets, peer);
417 	task_set(&peer->p_deliver_out, wg_deliver_out, peer);
418 	task_set(&peer->p_deliver_in, wg_deliver_in, peer);
419 
420 	mq_init(&peer->p_stage_queue, MAX_STAGED_PKT, IPL_NET);
421 	mtx_init(&peer->p_encap_queue.q_mtx, IPL_NET);
422 	ml_init(&peer->p_encap_queue.q_list);
423 	mtx_init(&peer->p_decap_queue.q_mtx, IPL_NET);
424 	ml_init(&peer->p_decap_queue.q_list);
425 
426 	SLIST_INIT(&peer->p_unused_index);
427 	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[0],
428 	    i_unused_entry);
429 	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[1],
430 	    i_unused_entry);
431 	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[2],
432 	    i_unused_entry);
433 
434 	LIST_INIT(&peer->p_aip);
435 
436 	peer->p_start_onlist = 0;
437 
438 	idx = SipHash24(&sc->sc_secret, public, WG_KEY_SIZE);
439 	idx &= sc->sc_peer_mask;
440 
441 	rw_enter_write(&sc->sc_peer_lock);
442 	LIST_INSERT_HEAD(&sc->sc_peer[idx], peer, p_pubkey_entry);
443 	TAILQ_INSERT_TAIL(&sc->sc_peer_seq, peer, p_seq_entry);
444 	sc->sc_peer_num++;
445 	rw_exit_write(&sc->sc_peer_lock);
446 
447 	DPRINTF(sc, "Peer %llu created\n", peer->p_id);
448 	return peer;
449 }
450 
451 struct wg_peer *
452 wg_peer_lookup(struct wg_softc *sc, const uint8_t public[WG_KEY_SIZE])
453 {
454 	uint8_t		 peer_key[WG_KEY_SIZE];
455 	struct wg_peer	*peer;
456 	uint64_t	 idx;
457 
458 	idx = SipHash24(&sc->sc_secret, public, WG_KEY_SIZE);
459 	idx &= sc->sc_peer_mask;
460 
461 	rw_enter_read(&sc->sc_peer_lock);
462 	LIST_FOREACH(peer, &sc->sc_peer[idx], p_pubkey_entry) {
463 		noise_remote_keys(&peer->p_remote, peer_key, NULL);
464 		if (timingsafe_bcmp(peer_key, public, WG_KEY_SIZE) == 0)
465 			goto done;
466 	}
467 	peer = NULL;
468 done:
469 	rw_exit_read(&sc->sc_peer_lock);
470 	return peer;
471 }
472 
473 void
474 wg_peer_destroy(struct wg_peer *peer)
475 {
476 	struct wg_softc	*sc = peer->p_sc;
477 	struct wg_aip *aip, *taip;
478 
479 	rw_assert_wrlock(&sc->sc_lock);
480 
481 	/*
482 	 * Remove peer from the pubkey hashtable and disable all timeouts.
483 	 * After this, and flushing wg_handshake_taskq, then no more handshakes
484 	 * can be started.
485 	 */
486 	rw_enter_write(&sc->sc_peer_lock);
487 	LIST_REMOVE(peer, p_pubkey_entry);
488 	TAILQ_REMOVE(&sc->sc_peer_seq, peer, p_seq_entry);
489 	sc->sc_peer_num--;
490 	rw_exit_write(&sc->sc_peer_lock);
491 
492 	wg_timers_disable(&peer->p_timers);
493 
494 	taskq_barrier(wg_handshake_taskq);
495 
496 	/*
497 	 * Now we drop all allowed ips, to drop all outgoing packets to the
498 	 * peer. Then drop all the indexes to drop all incoming packets to the
499 	 * peer. Then we can flush if_snd, wg_crypt_taskq and then nettq to
500 	 * ensure no more references to the peer exist.
501 	 */
502 	LIST_FOREACH_SAFE(aip, &peer->p_aip, a_entry, taip)
503 		wg_aip_remove(sc, peer, &aip->a_data);
504 
505 	noise_remote_clear(&peer->p_remote);
506 
507 	NET_LOCK();
508 	while (!ifq_empty(&sc->sc_if.if_snd)) {
509 		NET_UNLOCK();
510 		tsleep_nsec(sc, PWAIT, "wg_ifq", 1000);
511 		NET_LOCK();
512 	}
513 	NET_UNLOCK();
514 
515 	taskq_barrier(wg_crypt_taskq);
516 	taskq_barrier(net_tq(sc->sc_if.if_index));
517 
518 	DPRINTF(sc, "Peer %llu destroyed\n", peer->p_id);
519 	explicit_bzero(peer, sizeof(*peer));
520 	pool_put(&wg_peer_pool, peer);
521 }
522 
523 void
524 wg_peer_set_endpoint_from_tag(struct wg_peer *peer, struct wg_tag *t)
525 {
526 	if (memcmp(&t->t_endpoint, &peer->p_endpoint,
527 	    sizeof(t->t_endpoint)) == 0)
528 		return;
529 
530 	mtx_enter(&peer->p_endpoint_mtx);
531 	peer->p_endpoint = t->t_endpoint;
532 	mtx_leave(&peer->p_endpoint_mtx);
533 }
534 
535 void
536 wg_peer_set_sockaddr(struct wg_peer *peer, struct sockaddr *remote)
537 {
538 	mtx_enter(&peer->p_endpoint_mtx);
539 	memcpy(&peer->p_endpoint.e_remote, remote,
540 	       sizeof(peer->p_endpoint.e_remote));
541 	bzero(&peer->p_endpoint.e_local, sizeof(peer->p_endpoint.e_local));
542 	mtx_leave(&peer->p_endpoint_mtx);
543 }
544 
545 int
546 wg_peer_get_sockaddr(struct wg_peer *peer, struct sockaddr *remote)
547 {
548 	int	ret = 0;
549 
550 	mtx_enter(&peer->p_endpoint_mtx);
551 	if (peer->p_endpoint.e_remote.r_sa.sa_family != AF_UNSPEC)
552 		memcpy(remote, &peer->p_endpoint.e_remote,
553 		       sizeof(peer->p_endpoint.e_remote));
554 	else
555 		ret = ENOENT;
556 	mtx_leave(&peer->p_endpoint_mtx);
557 	return ret;
558 }
559 
560 void
561 wg_peer_clear_src(struct wg_peer *peer)
562 {
563 	mtx_enter(&peer->p_endpoint_mtx);
564 	bzero(&peer->p_endpoint.e_local, sizeof(peer->p_endpoint.e_local));
565 	mtx_leave(&peer->p_endpoint_mtx);
566 }
567 
568 void
569 wg_peer_get_endpoint(struct wg_peer *peer, struct wg_endpoint *endpoint)
570 {
571 	mtx_enter(&peer->p_endpoint_mtx);
572 	memcpy(endpoint, &peer->p_endpoint, sizeof(*endpoint));
573 	mtx_leave(&peer->p_endpoint_mtx);
574 }
575 
576 void
577 wg_peer_counters_add(struct wg_peer *peer, uint64_t tx, uint64_t rx)
578 {
579 	mtx_enter(&peer->p_counters_mtx);
580 	peer->p_counters_tx += tx;
581 	peer->p_counters_rx += rx;
582 	mtx_leave(&peer->p_counters_mtx);
583 }
584 
585 int
586 wg_aip_add(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
587 {
588 	struct art_root	*root;
589 	struct art_node	*node;
590 	struct wg_aip	*aip;
591 	int		 ret = 0;
592 
593 	switch (d->a_af) {
594 	case AF_INET:	root = sc->sc_aip4; break;
595 #ifdef INET6
596 	case AF_INET6:	root = sc->sc_aip6; break;
597 #endif
598 	default: return EAFNOSUPPORT;
599 	}
600 
601 	if ((aip = pool_get(&wg_aip_pool, PR_NOWAIT|PR_ZERO)) == NULL)
602 		return ENOBUFS;
603 
604 	rw_enter_write(&root->ar_lock);
605 	node = art_insert(root, &aip->a_node, &d->a_addr, d->a_cidr);
606 
607 	if (node == &aip->a_node) {
608 		aip->a_peer = peer;
609 		aip->a_data = *d;
610 		LIST_INSERT_HEAD(&peer->p_aip, aip, a_entry);
611 		sc->sc_aip_num++;
612 	} else {
613 		pool_put(&wg_aip_pool, aip);
614 		aip = (struct wg_aip *) node;
615 		if (aip->a_peer != peer) {
616 			LIST_REMOVE(aip, a_entry);
617 			LIST_INSERT_HEAD(&peer->p_aip, aip, a_entry);
618 			aip->a_peer = peer;
619 		}
620 	}
621 	rw_exit_write(&root->ar_lock);
622 	return ret;
623 }
624 
625 struct wg_peer *
626 wg_aip_lookup(struct art_root *root, void *addr)
627 {
628 	struct srp_ref	 sr;
629 	struct art_node	*node;
630 
631 	node = art_match(root, addr, &sr);
632 	srp_leave(&sr);
633 
634 	return node == NULL ? NULL : ((struct wg_aip *) node)->a_peer;
635 }
636 
637 int
638 wg_aip_remove(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
639 {
640 	struct srp_ref	 sr;
641 	struct art_root	*root;
642 	struct art_node	*node;
643 	struct wg_aip	*aip;
644 	int		 ret = 0;
645 
646 	switch (d->a_af) {
647 	case AF_INET:	root = sc->sc_aip4; break;
648 #ifdef INET6
649 	case AF_INET6:	root = sc->sc_aip6; break;
650 #endif
651 	default: return EAFNOSUPPORT;
652 	}
653 
654 	rw_enter_write(&root->ar_lock);
655 	if ((node = art_lookup(root, &d->a_addr, d->a_cidr, &sr)) == NULL) {
656 		ret = ENOENT;
657 	} else if (((struct wg_aip *) node)->a_peer != peer) {
658 		ret = EXDEV;
659 	} else {
660 		aip = (struct wg_aip *)node;
661 		if (art_delete(root, node, &d->a_addr, d->a_cidr) == NULL)
662 			panic("art_delete failed to delete node %p", node);
663 
664 		sc->sc_aip_num--;
665 		LIST_REMOVE(aip, a_entry);
666 		pool_put(&wg_aip_pool, aip);
667 	}
668 
669 	srp_leave(&sr);
670 	rw_exit_write(&root->ar_lock);
671 	return ret;
672 }
673 
674 int
675 wg_socket_open(struct socket **so, int af, in_port_t *port,
676     int *rtable, void *upcall_arg)
677 {
678 	struct mbuf		 mhostnam, mrtable;
679 #ifdef INET6
680 	struct sockaddr_in6	*sin6;
681 #endif
682 	struct sockaddr_in	*sin;
683 	int			 ret, s;
684 
685 	m_inithdr(&mhostnam);
686 	m_inithdr(&mrtable);
687 
688 	bzero(mtod(&mrtable, u_int *), sizeof(u_int));
689 	*mtod(&mrtable, u_int *) = *rtable;
690 	mrtable.m_len = sizeof(u_int);
691 
692 	if (af == AF_INET) {
693 		sin = mtod(&mhostnam, struct sockaddr_in *);
694 		bzero(sin, sizeof(*sin));
695 		sin->sin_len = sizeof(*sin);
696 		sin->sin_family = AF_INET;
697 		sin->sin_port = *port;
698 		sin->sin_addr.s_addr = INADDR_ANY;
699 		mhostnam.m_len = sin->sin_len;
700 #ifdef INET6
701 	} else if (af == AF_INET6) {
702 		sin6 = mtod(&mhostnam, struct sockaddr_in6 *);
703 		bzero(sin6, sizeof(*sin6));
704 		sin6->sin6_len = sizeof(*sin6);
705 		sin6->sin6_family = AF_INET6;
706 		sin6->sin6_port = *port;
707 		sin6->sin6_addr = (struct in6_addr) { .s6_addr = { 0 } };
708 		mhostnam.m_len = sin6->sin6_len;
709 #endif
710 	} else {
711 		return EAFNOSUPPORT;
712 	}
713 
714 	if ((ret = socreate(af, so, SOCK_DGRAM, 0)) != 0)
715 		return ret;
716 
717 	s = solock(*so);
718 	sotoinpcb(*so)->inp_upcall = wg_input;
719 	sotoinpcb(*so)->inp_upcall_arg = upcall_arg;
720 
721 	if ((ret = sosetopt(*so, SOL_SOCKET, SO_RTABLE, &mrtable)) == 0) {
722 		if ((ret = sobind(*so, &mhostnam, curproc)) == 0) {
723 			*port = sotoinpcb(*so)->inp_lport;
724 			*rtable = sotoinpcb(*so)->inp_rtableid;
725 		}
726 	}
727 	sounlock(*so, s);
728 
729 	if (ret != 0)
730 		wg_socket_close(so);
731 
732 	return ret;
733 }
734 
735 void
736 wg_socket_close(struct socket **so)
737 {
738 	if (*so != NULL && soclose(*so, 0) != 0)
739 		panic("Unable to close wg socket");
740 	*so = NULL;
741 }
742 
743 int
744 wg_bind(struct wg_softc *sc, in_port_t *portp, int *rtablep)
745 {
746 	int		 ret = 0, rtable = *rtablep;
747 	in_port_t	 port = *portp;
748 	struct socket	*so4;
749 #ifdef INET6
750 	struct socket	*so6;
751 	int		 retries = 0;
752 retry:
753 #endif
754 	if ((ret = wg_socket_open(&so4, AF_INET, &port, &rtable, sc)) != 0)
755 		return ret;
756 
757 #ifdef INET6
758 	if ((ret = wg_socket_open(&so6, AF_INET6, &port, &rtable, sc)) != 0) {
759 		if (ret == EADDRINUSE && *portp == 0 && retries++ < 100)
760 			goto retry;
761 		wg_socket_close(&so4);
762 		return ret;
763 	}
764 #endif
765 
766 	rw_enter_write(&sc->sc_so_lock);
767 	wg_socket_close(&sc->sc_so4);
768 	sc->sc_so4 = so4;
769 #ifdef INET6
770 	wg_socket_close(&sc->sc_so6);
771 	sc->sc_so6 = so6;
772 #endif
773 	rw_exit_write(&sc->sc_so_lock);
774 
775 	*portp = port;
776 	*rtablep = rtable;
777 	return 0;
778 }
779 
780 void
781 wg_unbind(struct wg_softc *sc)
782 {
783 	rw_enter_write(&sc->sc_so_lock);
784 	wg_socket_close(&sc->sc_so4);
785 #ifdef INET6
786 	wg_socket_close(&sc->sc_so6);
787 #endif
788 	rw_exit_write(&sc->sc_so_lock);
789 }
790 
791 int
792 wg_send(struct wg_softc *sc, struct wg_endpoint *e, struct mbuf *m)
793 {
794 	struct mbuf	 peernam, *control = NULL;
795 	int		 ret;
796 
797 	/* Get local control address before locking */
798 	if (e->e_remote.r_sa.sa_family == AF_INET) {
799 		if (e->e_local.l_in.s_addr != INADDR_ANY)
800 			control = sbcreatecontrol(&e->e_local.l_in,
801 			    sizeof(struct in_addr), IP_SENDSRCADDR,
802 			    IPPROTO_IP);
803 #ifdef INET6
804 	} else if (e->e_remote.r_sa.sa_family == AF_INET6) {
805 		if (!IN6_IS_ADDR_UNSPECIFIED(&e->e_local.l_in6))
806 			control = sbcreatecontrol(&e->e_local.l_pktinfo6,
807 			    sizeof(struct in6_pktinfo), IPV6_PKTINFO,
808 			    IPPROTO_IPV6);
809 #endif
810 	} else {
811 		m_freem(m);
812 		return EAFNOSUPPORT;
813 	}
814 
815 	/* Get remote address */
816 	peernam.m_type = MT_SONAME;
817 	peernam.m_next = NULL;
818 	peernam.m_nextpkt = NULL;
819 	peernam.m_data = (void *)&e->e_remote.r_sa;
820 	peernam.m_len = e->e_remote.r_sa.sa_len;
821 	peernam.m_flags = 0;
822 
823 	rw_enter_read(&sc->sc_so_lock);
824 	if (e->e_remote.r_sa.sa_family == AF_INET && sc->sc_so4 != NULL)
825 		ret = sosend(sc->sc_so4, &peernam, NULL, m, control, 0);
826 #ifdef INET6
827 	else if (e->e_remote.r_sa.sa_family == AF_INET6 && sc->sc_so6 != NULL)
828 		ret = sosend(sc->sc_so6, &peernam, NULL, m, control, 0);
829 #endif
830 	else {
831 		ret = ENOTCONN;
832 		m_freem(control);
833 		m_freem(m);
834 	}
835 	rw_exit_read(&sc->sc_so_lock);
836 
837 	return ret;
838 }
839 
840 void
841 wg_send_buf(struct wg_softc *sc, struct wg_endpoint *e, uint8_t *buf,
842     size_t len)
843 {
844 	struct mbuf	*m;
845 	int		 ret = 0;
846 
847 retry:
848 	m = m_gethdr(M_WAIT, MT_DATA);
849 	m->m_len = 0;
850 	m_copyback(m, 0, len, buf, M_WAIT);
851 
852 	/* As we're sending a handshake packet here, we want high priority */
853 	m->m_pkthdr.pf.prio = IFQ_MAXPRIO;
854 
855 	if (ret == 0) {
856 		ret = wg_send(sc, e, m);
857 		/* Retry if we couldn't bind to e->e_local */
858 		if (ret == EADDRNOTAVAIL) {
859 			bzero(&e->e_local, sizeof(e->e_local));
860 			goto retry;
861 		}
862 	} else {
863 		ret = wg_send(sc, e, m);
864 		if (ret != 0)
865 			DPRINTF(sc, "Unable to send packet\n");
866 	}
867 }
868 
869 struct wg_tag *
870 wg_tag_get(struct mbuf *m)
871 {
872 	struct m_tag	*mtag;
873 
874 	if ((mtag = m_tag_find(m, PACKET_TAG_WIREGUARD, NULL)) == NULL) {
875 		mtag = m_tag_get(PACKET_TAG_WIREGUARD, sizeof(struct wg_tag),
876 		    M_NOWAIT);
877 		if (mtag == NULL)
878 			return (NULL);
879 		bzero(mtag + 1, sizeof(struct wg_tag));
880 		m_tag_prepend(m, mtag);
881 	}
882 	return ((struct wg_tag *)(mtag + 1));
883 }
884 
885 /*
886  * The following section handles the timeout callbacks for a WireGuard session.
887  * These functions provide an "event based" model for controlling wg(8) session
888  * timers. All function calls occur after the specified event below.
889  *
890  * wg_timers_event_data_sent:
891  *	tx: data
892  * wg_timers_event_data_received:
893  *	rx: data
894  * wg_timers_event_any_authenticated_packet_sent:
895  *	tx: keepalive, data, handshake
896  * wg_timers_event_any_authenticated_packet_received:
897  *	rx: keepalive, data, handshake
898  * wg_timers_event_any_authenticated_packet_traversal:
899  *	tx, rx: keepalive, data, handshake
900  * wg_timers_event_handshake_initiated:
901  *	tx: initiation
902  * wg_timers_event_handshake_responded:
903  *	tx: response
904  * wg_timers_event_handshake_complete:
905  *	rx: response, confirmation data
906  * wg_timers_event_session_derived:
907  *	tx: response, rx: response
908  * wg_timers_event_want_initiation:
909  *	tx: data failed, old keys expiring
910  * wg_timers_event_reset_handshake_last_sent:
911  * 	anytime we may immediately want a new handshake
912  */
913 void
914 wg_timers_init(struct wg_timers *t)
915 {
916 	bzero(t, sizeof(*t));
917 	rw_init(&t->t_lock, "wg_timers");
918 	mtx_init(&t->t_handshake_mtx, IPL_NET);
919 
920 	timeout_set(&t->t_new_handshake, wg_timers_run_new_handshake, t);
921 	timeout_set(&t->t_send_keepalive, wg_timers_run_send_keepalive, t);
922 	timeout_set(&t->t_retry_handshake, wg_timers_run_retry_handshake, t);
923 	timeout_set(&t->t_persistent_keepalive,
924 	    wg_timers_run_persistent_keepalive, t);
925 	timeout_set(&t->t_zero_key_material,
926 	    wg_timers_run_zero_key_material, t);
927 }
928 
929 void
930 wg_timers_enable(struct wg_timers *t)
931 {
932 	rw_enter_write(&t->t_lock);
933 	t->t_disabled = 0;
934 	rw_exit_write(&t->t_lock);
935 	wg_timers_run_persistent_keepalive(t);
936 }
937 
938 void
939 wg_timers_disable(struct wg_timers *t)
940 {
941 	rw_enter_write(&t->t_lock);
942 	t->t_disabled = 1;
943 	t->t_need_another_keepalive = 0;
944 	rw_exit_write(&t->t_lock);
945 
946 	timeout_del_barrier(&t->t_new_handshake);
947 	timeout_del_barrier(&t->t_send_keepalive);
948 	timeout_del_barrier(&t->t_retry_handshake);
949 	timeout_del_barrier(&t->t_persistent_keepalive);
950 	timeout_del_barrier(&t->t_zero_key_material);
951 }
952 
953 void
954 wg_timers_set_persistent_keepalive(struct wg_timers *t, uint16_t interval)
955 {
956 	rw_enter_read(&t->t_lock);
957 	if (!t->t_disabled) {
958 		t->t_persistent_keepalive_interval = interval;
959 		wg_timers_run_persistent_keepalive(t);
960 	}
961 	rw_exit_read(&t->t_lock);
962 }
963 
964 int
965 wg_timers_get_persistent_keepalive(struct wg_timers *t, uint16_t *interval)
966 {
967 	*interval = t->t_persistent_keepalive_interval;
968 	return *interval > 0 ? 0 : ENOENT;
969 }
970 
971 void
972 wg_timers_get_last_handshake(struct wg_timers *t, struct timespec *time)
973 {
974 	mtx_enter(&t->t_handshake_mtx);
975 	*time = t->t_handshake_complete;
976 	mtx_leave(&t->t_handshake_mtx);
977 }
978 
979 int
980 wg_timers_expired_handshake_last_sent(struct wg_timers *t)
981 {
982 	struct timespec uptime;
983 	struct timespec expire = { .tv_sec = REKEY_TIMEOUT, .tv_nsec = 0 };
984 
985 	getnanouptime(&uptime);
986 	timespecadd(&t->t_handshake_last_sent, &expire, &expire);
987 	return timespeccmp(&uptime, &expire, >) ? ETIMEDOUT : 0;
988 }
989 
990 int
991 wg_timers_check_handshake_last_sent(struct wg_timers *t)
992 {
993 	int ret;
994 	mtx_enter(&t->t_handshake_mtx);
995 	if ((ret = wg_timers_expired_handshake_last_sent(t)) == ETIMEDOUT)
996 		getnanouptime(&t->t_handshake_last_sent);
997 	mtx_leave(&t->t_handshake_mtx);
998 	return ret;
999 }
1000 
1001 void
1002 wg_timers_event_data_sent(struct wg_timers *t)
1003 {
1004 	int	msecs = NEW_HANDSHAKE_TIMEOUT * 1000;
1005 	msecs += arc4random_uniform(REKEY_TIMEOUT_JITTER);
1006 
1007 	rw_enter_read(&t->t_lock);
1008 	if (!t->t_disabled && !timeout_pending(&t->t_new_handshake))
1009 		timeout_add_msec(&t->t_new_handshake, msecs);
1010 	rw_exit_read(&t->t_lock);
1011 }
1012 
1013 void
1014 wg_timers_event_data_received(struct wg_timers *t)
1015 {
1016 	rw_enter_read(&t->t_lock);
1017 	if (!t->t_disabled) {
1018 		if (!timeout_pending(&t->t_send_keepalive))
1019 			timeout_add_sec(&t->t_send_keepalive,
1020 			    KEEPALIVE_TIMEOUT);
1021 		else
1022 			t->t_need_another_keepalive = 1;
1023 	}
1024 	rw_exit_read(&t->t_lock);
1025 }
1026 
1027 void
1028 wg_timers_event_any_authenticated_packet_sent(struct wg_timers *t)
1029 {
1030 	timeout_del(&t->t_send_keepalive);
1031 }
1032 
1033 void
1034 wg_timers_event_any_authenticated_packet_received(struct wg_timers *t)
1035 {
1036 	timeout_del(&t->t_new_handshake);
1037 }
1038 
1039 void
1040 wg_timers_event_any_authenticated_packet_traversal(struct wg_timers *t)
1041 {
1042 	rw_enter_read(&t->t_lock);
1043 	if (!t->t_disabled && t->t_persistent_keepalive_interval > 0)
1044 		timeout_add_sec(&t->t_persistent_keepalive,
1045 		    t->t_persistent_keepalive_interval);
1046 	rw_exit_read(&t->t_lock);
1047 }
1048 
1049 void
1050 wg_timers_event_handshake_initiated(struct wg_timers *t)
1051 {
1052 	int	msecs = REKEY_TIMEOUT * 1000;
1053 	msecs += arc4random_uniform(REKEY_TIMEOUT_JITTER);
1054 
1055 	rw_enter_read(&t->t_lock);
1056 	if (!t->t_disabled)
1057 		timeout_add_msec(&t->t_retry_handshake, msecs);
1058 	rw_exit_read(&t->t_lock);
1059 }
1060 
1061 void
1062 wg_timers_event_handshake_responded(struct wg_timers *t)
1063 {
1064 	mtx_enter(&t->t_handshake_mtx);
1065 	getnanouptime(&t->t_handshake_last_sent);
1066 	mtx_leave(&t->t_handshake_mtx);
1067 }
1068 
1069 void
1070 wg_timers_event_handshake_complete(struct wg_timers *t)
1071 {
1072 	rw_enter_read(&t->t_lock);
1073 	if (!t->t_disabled) {
1074 		mtx_enter(&t->t_handshake_mtx);
1075 		timeout_del(&t->t_retry_handshake);
1076 		t->t_handshake_retries = 0;
1077 		getnanotime(&t->t_handshake_complete);
1078 		mtx_leave(&t->t_handshake_mtx);
1079 		wg_timers_run_send_keepalive(t);
1080 	}
1081 	rw_exit_read(&t->t_lock);
1082 }
1083 
1084 void
1085 wg_timers_event_session_derived(struct wg_timers *t)
1086 {
1087 	rw_enter_read(&t->t_lock);
1088 	if (!t->t_disabled)
1089 		timeout_add_sec(&t->t_zero_key_material, REJECT_AFTER_TIME * 3);
1090 	rw_exit_read(&t->t_lock);
1091 }
1092 
1093 void
1094 wg_timers_event_want_initiation(struct wg_timers *t)
1095 {
1096 	rw_enter_read(&t->t_lock);
1097 	if (!t->t_disabled)
1098 		wg_timers_run_send_initiation(t, 0);
1099 	rw_exit_read(&t->t_lock);
1100 }
1101 
1102 void
1103 wg_timers_event_reset_handshake_last_sent(struct wg_timers *t)
1104 {
1105 	mtx_enter(&t->t_handshake_mtx);
1106 	t->t_handshake_last_sent.tv_sec -= (REKEY_TIMEOUT + 1);
1107 	mtx_leave(&t->t_handshake_mtx);
1108 }
1109 
1110 void
1111 wg_timers_run_send_initiation(void *_t, int is_retry)
1112 {
1113 	struct wg_timers *t = _t;
1114 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1115 	if (!is_retry)
1116 		t->t_handshake_retries = 0;
1117 	if (wg_timers_expired_handshake_last_sent(t) == ETIMEDOUT)
1118 		task_add(wg_handshake_taskq, &peer->p_send_initiation);
1119 }
1120 
1121 void
1122 wg_timers_run_retry_handshake(void *_t)
1123 {
1124 	struct wg_timers *t = _t;
1125 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1126 
1127 	mtx_enter(&t->t_handshake_mtx);
1128 	if (t->t_handshake_retries <= MAX_TIMER_HANDSHAKES) {
1129 		t->t_handshake_retries++;
1130 		mtx_leave(&t->t_handshake_mtx);
1131 
1132 		DPRINTF(peer->p_sc, "Handshake for peer %llu did not complete "
1133 		    "after %d seconds, retrying (try %d)\n", peer->p_id,
1134 		    REKEY_TIMEOUT, t->t_handshake_retries + 1);
1135 		wg_peer_clear_src(peer);
1136 		wg_timers_run_send_initiation(t, 1);
1137 	} else {
1138 		mtx_leave(&t->t_handshake_mtx);
1139 
1140 		DPRINTF(peer->p_sc, "Handshake for peer %llu did not complete "
1141 		    "after %d retries, giving up\n", peer->p_id,
1142 		    MAX_TIMER_HANDSHAKES + 2);
1143 
1144 		timeout_del(&t->t_send_keepalive);
1145 		mq_purge(&peer->p_stage_queue);
1146 		if (!timeout_pending(&t->t_zero_key_material))
1147 			timeout_add_sec(&t->t_zero_key_material,
1148 			    REJECT_AFTER_TIME * 3);
1149 	}
1150 }
1151 
1152 void
1153 wg_timers_run_send_keepalive(void *_t)
1154 {
1155 	struct wg_timers *t = _t;
1156 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1157 
1158 	task_add(wg_crypt_taskq, &peer->p_send_keepalive);
1159 	if (t->t_need_another_keepalive) {
1160 		t->t_need_another_keepalive = 0;
1161 		timeout_add_sec(&t->t_send_keepalive, KEEPALIVE_TIMEOUT);
1162 	}
1163 }
1164 
1165 void
1166 wg_timers_run_new_handshake(void *_t)
1167 {
1168 	struct wg_timers *t = _t;
1169 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1170 
1171 	DPRINTF(peer->p_sc, "Retrying handshake with peer %llu because we "
1172 	    "stopped hearing back after %d seconds\n",
1173 	    peer->p_id, NEW_HANDSHAKE_TIMEOUT);
1174 	wg_peer_clear_src(peer);
1175 
1176 	wg_timers_run_send_initiation(t, 0);
1177 }
1178 
1179 void
1180 wg_timers_run_zero_key_material(void *_t)
1181 {
1182 	struct wg_timers *t = _t;
1183 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1184 
1185 	DPRINTF(peer->p_sc, "Zeroing out keys for peer %llu\n", peer->p_id);
1186 	task_add(wg_handshake_taskq, &peer->p_clear_secrets);
1187 }
1188 
1189 void
1190 wg_timers_run_persistent_keepalive(void *_t)
1191 {
1192 	struct wg_timers *t = _t;
1193 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1194 	if (t->t_persistent_keepalive_interval != 0)
1195 		task_add(wg_crypt_taskq, &peer->p_send_keepalive);
1196 }
1197 
1198 /* The following functions handle handshakes */
1199 void
1200 wg_peer_send_buf(struct wg_peer *peer, uint8_t *buf, size_t len)
1201 {
1202 	struct wg_endpoint	 endpoint;
1203 
1204 	wg_peer_counters_add(peer, len, 0);
1205 	wg_timers_event_any_authenticated_packet_traversal(&peer->p_timers);
1206 	wg_timers_event_any_authenticated_packet_sent(&peer->p_timers);
1207 	wg_peer_get_endpoint(peer, &endpoint);
1208 	wg_send_buf(peer->p_sc, &endpoint, buf, len);
1209 }
1210 
1211 void
1212 wg_send_initiation(void *_peer)
1213 {
1214 	struct wg_peer			*peer = _peer;
1215 	struct wg_pkt_initiation	 pkt;
1216 
1217 	if (wg_timers_check_handshake_last_sent(&peer->p_timers) != ETIMEDOUT)
1218 		return;
1219 
1220 	DPRINTF(peer->p_sc, "Sending handshake initiation to peer %llu\n",
1221 	    peer->p_id);
1222 
1223 	if (noise_create_initiation(&peer->p_remote, &pkt.s_idx, pkt.ue, pkt.es,
1224 				    pkt.ets) != 0)
1225 		return;
1226 	pkt.t = WG_PKT_INITIATION;
1227 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1228 	    sizeof(pkt)-sizeof(pkt.m));
1229 	wg_peer_send_buf(peer, (uint8_t *)&pkt, sizeof(pkt));
1230 	wg_timers_event_handshake_initiated(&peer->p_timers);
1231 }
1232 
1233 void
1234 wg_send_response(struct wg_peer *peer)
1235 {
1236 	struct wg_pkt_response	 pkt;
1237 
1238 	DPRINTF(peer->p_sc, "Sending handshake response to peer %llu\n",
1239 	    peer->p_id);
1240 
1241 	if (noise_create_response(&peer->p_remote, &pkt.s_idx, &pkt.r_idx,
1242 				  pkt.ue, pkt.en) != 0)
1243 		return;
1244 	if (noise_remote_begin_session(&peer->p_remote) != 0)
1245 		return;
1246 	wg_timers_event_session_derived(&peer->p_timers);
1247 	pkt.t = WG_PKT_RESPONSE;
1248 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1249 	    sizeof(pkt)-sizeof(pkt.m));
1250 	wg_timers_event_handshake_responded(&peer->p_timers);
1251 	wg_peer_send_buf(peer, (uint8_t *)&pkt, sizeof(pkt));
1252 }
1253 
1254 void
1255 wg_send_cookie(struct wg_softc *sc, struct cookie_macs *cm, uint32_t idx,
1256     struct wg_endpoint *e)
1257 {
1258 	struct wg_pkt_cookie	pkt;
1259 
1260 	DPRINTF(sc, "Sending cookie response for denied handshake message\n");
1261 
1262 	pkt.t = WG_PKT_COOKIE;
1263 	pkt.r_idx = idx;
1264 
1265 	cookie_checker_create_payload(&sc->sc_cookie, cm, pkt.nonce,
1266 	    pkt.ec, &e->e_remote.r_sa);
1267 
1268 	wg_send_buf(sc, e, (uint8_t *)&pkt, sizeof(pkt));
1269 }
1270 
1271 void
1272 wg_send_keepalive(void *_peer)
1273 {
1274 	struct wg_peer	*peer = _peer;
1275 	struct wg_softc	*sc = peer->p_sc;
1276 	struct wg_tag	*t;
1277 	struct mbuf	*m;
1278 
1279 	if (!mq_empty(&peer->p_stage_queue))
1280 		goto send;
1281 
1282 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1283 		return;
1284 
1285 	if ((t = wg_tag_get(m)) == NULL) {
1286 		m_freem(m);
1287 		return;
1288 	}
1289 
1290 	m->m_len = 0;
1291 	m_calchdrlen(m);
1292 
1293 	t->t_peer = peer;
1294 	t->t_mbuf = NULL;
1295 	t->t_done = 0;
1296 	t->t_mtu = 0; /* MTU == 0 OK for keepalive */
1297 
1298 	mq_push(&peer->p_stage_queue, m);
1299 send:
1300 	if (noise_remote_ready(&peer->p_remote) == 0) {
1301 		wg_queue_out(sc, peer);
1302 		task_add(wg_crypt_taskq, &sc->sc_encap);
1303 	} else {
1304 		wg_timers_event_want_initiation(&peer->p_timers);
1305 	}
1306 }
1307 
1308 void
1309 wg_peer_clear_secrets(void *_peer)
1310 {
1311 	struct wg_peer *peer = _peer;
1312 	noise_remote_clear(&peer->p_remote);
1313 }
1314 
1315 void
1316 wg_handshake(struct wg_softc *sc, struct mbuf *m)
1317 {
1318 	struct wg_tag			*t;
1319 	struct wg_pkt_initiation	*init;
1320 	struct wg_pkt_response		*resp;
1321 	struct wg_pkt_cookie		*cook;
1322 	struct wg_peer			*peer;
1323 	struct noise_remote		*remote;
1324 	int				 res, underload = 0;
1325 	static struct timeval		 wg_last_underload; /* microuptime */
1326 
1327 	if (mq_len(&sc->sc_handshake_queue) >= MAX_QUEUED_HANDSHAKES/8) {
1328 		getmicrouptime(&wg_last_underload);
1329 		underload = 1;
1330 	} else if (wg_last_underload.tv_sec != 0) {
1331 		if (!ratecheck(&wg_last_underload, &underload_interval))
1332 			underload = 1;
1333 		else
1334 			bzero(&wg_last_underload, sizeof(wg_last_underload));
1335 	}
1336 
1337 	t = wg_tag_get(m);
1338 
1339 	switch (*mtod(m, uint32_t *)) {
1340 	case WG_PKT_INITIATION:
1341 		init = mtod(m, struct wg_pkt_initiation *);
1342 
1343 		res = cookie_checker_validate_macs(&sc->sc_cookie, &init->m,
1344 				init, sizeof(*init) - sizeof(init->m),
1345 				underload, &t->t_endpoint.e_remote.r_sa);
1346 
1347 		if (res == EINVAL) {
1348 			DPRINTF(sc, "Invalid initiation MAC\n");
1349 			goto error;
1350 		} else if (res == ECONNREFUSED) {
1351 			DPRINTF(sc, "Handshake ratelimited\n");
1352 			goto error;
1353 		} else if (res == EAGAIN) {
1354 			wg_send_cookie(sc, &init->m, init->s_idx,
1355 			    &t->t_endpoint);
1356 			goto error;
1357 		} else if (res != 0) {
1358 			panic("unexpected response: %d", res);
1359 		}
1360 
1361 		if (noise_consume_initiation(&sc->sc_local, &remote,
1362 		    init->s_idx, init->ue, init->es, init->ets) != 0) {
1363 			DPRINTF(sc, "Invalid handshake initiation\n");
1364 			goto error;
1365 		}
1366 
1367 		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1368 
1369 		DPRINTF(sc, "Receiving handshake initiation from peer %llu\n",
1370 		    peer->p_id);
1371 
1372 		wg_peer_counters_add(peer, 0, sizeof(*init));
1373 		wg_peer_set_endpoint_from_tag(peer, t);
1374 		wg_send_response(peer);
1375 		break;
1376 	case WG_PKT_RESPONSE:
1377 		resp = mtod(m, struct wg_pkt_response *);
1378 
1379 		res = cookie_checker_validate_macs(&sc->sc_cookie, &resp->m,
1380 				resp, sizeof(*resp) - sizeof(resp->m),
1381 				underload, &t->t_endpoint.e_remote.r_sa);
1382 
1383 		if (res == EINVAL) {
1384 			DPRINTF(sc, "Invalid response MAC\n");
1385 			goto error;
1386 		} else if (res == ECONNREFUSED) {
1387 			DPRINTF(sc, "Handshake ratelimited\n");
1388 			goto error;
1389 		} else if (res == EAGAIN) {
1390 			wg_send_cookie(sc, &resp->m, resp->s_idx,
1391 			    &t->t_endpoint);
1392 			goto error;
1393 		} else if (res != 0) {
1394 			panic("unexpected response: %d", res);
1395 		}
1396 
1397 		if ((remote = wg_index_get(sc, resp->r_idx)) == NULL) {
1398 			DPRINTF(sc, "Unknown handshake response\n");
1399 			goto error;
1400 		}
1401 
1402 		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1403 
1404 		if (noise_consume_response(remote, resp->s_idx, resp->r_idx,
1405 					   resp->ue, resp->en) != 0) {
1406 			DPRINTF(sc, "Invalid handshake response\n");
1407 			goto error;
1408 		}
1409 
1410 		DPRINTF(sc, "Receiving handshake response from peer %llu\n",
1411 				peer->p_id);
1412 
1413 		wg_peer_counters_add(peer, 0, sizeof(*resp));
1414 		wg_peer_set_endpoint_from_tag(peer, t);
1415 		if (noise_remote_begin_session(&peer->p_remote) == 0) {
1416 			wg_timers_event_session_derived(&peer->p_timers);
1417 			wg_timers_event_handshake_complete(&peer->p_timers);
1418 		}
1419 		break;
1420 	case WG_PKT_COOKIE:
1421 		cook = mtod(m, struct wg_pkt_cookie *);
1422 
1423 		if ((remote = wg_index_get(sc, cook->r_idx)) == NULL) {
1424 			DPRINTF(sc, "Unknown cookie index\n");
1425 			goto error;
1426 		}
1427 
1428 		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1429 
1430 		if (cookie_maker_consume_payload(&peer->p_cookie,
1431 		    cook->nonce, cook->ec) != 0) {
1432 			DPRINTF(sc, "Could not decrypt cookie response\n");
1433 			goto error;
1434 		}
1435 
1436 		DPRINTF(sc, "Receiving cookie response\n");
1437 		goto error;
1438 	default:
1439 		panic("invalid packet in handshake queue");
1440 	}
1441 
1442 	wg_timers_event_any_authenticated_packet_received(&peer->p_timers);
1443 	wg_timers_event_any_authenticated_packet_traversal(&peer->p_timers);
1444 error:
1445 	m_freem(m);
1446 }
1447 
1448 void
1449 wg_handshake_worker(void *_sc)
1450 {
1451 	struct mbuf *m;
1452 	struct wg_softc *sc = _sc;
1453 	while ((m = mq_dequeue(&sc->sc_handshake_queue)) != NULL)
1454 		wg_handshake(sc, m);
1455 }
1456 
1457 /*
1458  * The following functions handle encapsulation (encryption) and
1459  * decapsulation (decryption). The wg_{en,de}cap functions will run in the
1460  * sc_crypt_taskq, while wg_deliver_{in,out} must be serialised and will run
1461  * in nettq.
1462  *
1463  * The packets are tracked in two queues, a serial queue and a parallel queue.
1464  *  - The parallel queue is used to distribute the encryption across multiple
1465  *    threads.
1466  *  - The serial queue ensures that packets are not reordered and are
1467  *    delievered in sequence.
1468  * The wg_tag attached to the packet contains two flags to help the two queues
1469  * interact.
1470  *  - t_done: The parallel queue has finished with the packet, now the serial
1471  *            queue can do it's work.
1472  *  - t_mbuf: Used to store the *crypted packet. in the case of encryption,
1473  *            this is a newly allocated packet, and in the case of decryption,
1474  *            it is a pointer to the same packet, that has been decrypted and
1475  *            truncated. If t_mbuf is NULL, then *cryption failed and this
1476  *            packet should not be passed.
1477  * wg_{en,de}cap work on the parallel queue, while wg_deliver_{in,out} work
1478  * on the serial queue.
1479  */
1480 void
1481 wg_encap(struct wg_softc *sc, struct mbuf *m)
1482 {
1483 	int res = 0;
1484 	struct wg_pkt_data	*data;
1485 	struct wg_peer		*peer;
1486 	struct wg_tag		*t;
1487 	struct mbuf		*mc;
1488 	size_t			 padding_len, plaintext_len, out_len;
1489 	uint64_t		 nonce;
1490 
1491 	t = wg_tag_get(m);
1492 	peer = t->t_peer;
1493 
1494 	plaintext_len = min(WG_PKT_WITH_PADDING(m->m_pkthdr.len), t->t_mtu);
1495 	padding_len = plaintext_len - m->m_pkthdr.len;
1496 	out_len = sizeof(struct wg_pkt_data) + plaintext_len + NOISE_AUTHTAG_LEN;
1497 
1498 	/*
1499 	 * For the time being we allocate a new packet with sufficient size to
1500 	 * hold the encrypted data and headers. It would be difficult to
1501 	 * overcome as p_encap_queue (mbuf_list) holds a reference to the mbuf.
1502 	 * If we m_makespace or similar, we risk corrupting that list.
1503 	 * Additionally, we only pass a buf and buf length to
1504 	 * noise_remote_encrypt. Technically it would be possible to teach
1505 	 * noise_remote_encrypt about mbufs, but we would need to sort out the
1506 	 * p_encap_queue situation first.
1507 	 */
1508 	if ((mc = m_clget(NULL, M_NOWAIT, out_len)) == NULL)
1509 		goto error;
1510 
1511 	data = mtod(mc, struct wg_pkt_data *);
1512 	m_copydata(m, 0, m->m_pkthdr.len, data->buf);
1513 	bzero(data->buf + m->m_pkthdr.len, padding_len);
1514 	data->t = WG_PKT_DATA;
1515 
1516 	/*
1517 	 * Copy the flow hash from the inner packet to the outer packet, so
1518 	 * that fq_codel can property separate streams, rather than falling
1519 	 * back to random buckets.
1520 	 */
1521 	mc->m_pkthdr.ph_flowid = m->m_pkthdr.ph_flowid;
1522 
1523 	res = noise_remote_encrypt(&peer->p_remote, &data->r_idx, &nonce,
1524 				   data->buf, plaintext_len);
1525 	nonce = htole64(nonce); /* Wire format is little endian. */
1526 	memcpy(data->nonce, &nonce, sizeof(data->nonce));
1527 
1528 	if (__predict_false(res == EINVAL)) {
1529 		m_freem(mc);
1530 		goto error;
1531 	} else if (__predict_false(res == ESTALE)) {
1532 		wg_timers_event_want_initiation(&peer->p_timers);
1533 	} else if (__predict_false(res != 0)) {
1534 		panic("unexpected result: %d", res);
1535 	}
1536 
1537 	/* A packet with length 0 is a keepalive packet */
1538 	if (__predict_false(m->m_pkthdr.len == 0))
1539 		DPRINTF(sc, "Sending keepalive packet to peer %llu\n",
1540 		    peer->p_id);
1541 
1542 	mc->m_pkthdr.ph_loopcnt = m->m_pkthdr.ph_loopcnt;
1543 	mc->m_flags &= ~(M_MCAST | M_BCAST);
1544 	mc->m_len = out_len;
1545 	m_calchdrlen(mc);
1546 
1547 	/*
1548 	 * We would count ifc_opackets, ifc_obytes of m here, except if_snd
1549 	 * already does that for us, so no need to worry about it.
1550 	counters_pkt(sc->sc_if.if_counters, ifc_opackets, ifc_obytes,
1551 	    m->m_pkthdr.len);
1552 	 */
1553 	wg_peer_counters_add(peer, mc->m_pkthdr.len, 0);
1554 
1555 	t->t_mbuf = mc;
1556 error:
1557 	t->t_done = 1;
1558 	task_add(net_tq(sc->sc_if.if_index), &peer->p_deliver_out);
1559 }
1560 
1561 void
1562 wg_decap(struct wg_softc *sc, struct mbuf *m)
1563 {
1564 	int			 res, len;
1565 	struct ip		*ip;
1566 	struct ip6_hdr		*ip6;
1567 	struct wg_pkt_data	*data;
1568 	struct wg_peer		*peer, *allowed_peer;
1569 	struct wg_tag		*t;
1570 	size_t			 payload_len;
1571 	uint64_t		 nonce;
1572 
1573 	t = wg_tag_get(m);
1574 	peer = t->t_peer;
1575 
1576 	/*
1577 	 * Likewise to wg_encap, we pass a buf and buf length to
1578 	 * noise_remote_decrypt. Again, possible to teach it about mbufs
1579 	 * but need to get over the p_decap_queue situation first. However,
1580 	 * we do not need to allocate a new mbuf as the decrypted packet is
1581 	 * strictly smaller than encrypted. We just set t_mbuf to m and
1582 	 * wg_deliver_in knows how to deal with that.
1583 	 */
1584 	data = mtod(m, struct wg_pkt_data *);
1585 	payload_len = m->m_pkthdr.len - sizeof(struct wg_pkt_data);
1586 	memcpy(&nonce, data->nonce, sizeof(nonce));
1587 	nonce = le64toh(nonce); /* Wire format is little endian. */
1588 	res = noise_remote_decrypt(&peer->p_remote, data->r_idx, nonce,
1589 				   data->buf, payload_len);
1590 
1591 	if (__predict_false(res == EINVAL)) {
1592 		goto error;
1593 	} else if (__predict_false(res == ECONNRESET)) {
1594 		wg_timers_event_handshake_complete(&peer->p_timers);
1595 	} else if (__predict_false(res == ESTALE)) {
1596 		wg_timers_event_want_initiation(&peer->p_timers);
1597 	} else if (__predict_false(res != 0)) {
1598 		panic("unexpected response: %d", res);
1599 	}
1600 
1601 	wg_peer_set_endpoint_from_tag(peer, t);
1602 
1603 	wg_peer_counters_add(peer, 0, m->m_pkthdr.len);
1604 
1605 	m_adj(m, sizeof(struct wg_pkt_data));
1606 	m_adj(m, -NOISE_AUTHTAG_LEN);
1607 
1608 	counters_pkt(sc->sc_if.if_counters, ifc_ipackets, ifc_ibytes,
1609 	    m->m_pkthdr.len);
1610 
1611 	/* A packet with length 0 is a keepalive packet */
1612 	if (__predict_false(m->m_pkthdr.len == 0)) {
1613 		DPRINTF(sc, "Receiving keepalive packet from peer "
1614 		    "%llu\n", peer->p_id);
1615 		goto done;
1616 	}
1617 
1618 	/*
1619 	 * We can let the network stack handle the intricate validation of the
1620 	 * IP header, we just worry about the sizeof and the version, so we can
1621 	 * read the source address in wg_aip_lookup.
1622 	 *
1623 	 * We also need to trim the packet, as it was likely padded before
1624 	 * encryption. While we could drop it here, it will be more helpful to
1625 	 * pass it to bpf_mtap and use the counters that people are expecting
1626 	 * in ipv4_input and ipv6_input. We can rely on ipv4_input and
1627 	 * ipv6_input to properly validate the headers.
1628 	 */
1629 	ip = mtod(m, struct ip *);
1630 	ip6 = mtod(m, struct ip6_hdr *);
1631 
1632 	if (m->m_pkthdr.len >= sizeof(struct ip) && ip->ip_v == IPVERSION) {
1633 		m->m_pkthdr.ph_family = AF_INET;
1634 
1635 		len = ntohs(ip->ip_len);
1636 		if (len >= sizeof(struct ip) && len < m->m_pkthdr.len)
1637 			m_adj(m, len - m->m_pkthdr.len);
1638 
1639 		allowed_peer = wg_aip_lookup(sc->sc_aip4, &ip->ip_src);
1640 #ifdef INET6
1641 	} else if (m->m_pkthdr.len >= sizeof(struct ip6_hdr) &&
1642 	    (ip6->ip6_vfc & IPV6_VERSION_MASK) == IPV6_VERSION) {
1643 		m->m_pkthdr.ph_family = AF_INET6;
1644 
1645 		len = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr);
1646 		if (len < m->m_pkthdr.len)
1647 			m_adj(m, len - m->m_pkthdr.len);
1648 
1649 		allowed_peer = wg_aip_lookup(sc->sc_aip6, &ip6->ip6_src);
1650 #endif
1651 	} else {
1652 		DPRINTF(sc, "Packet is neither ipv4 nor ipv6 from "
1653 		    "peer %llu\n", peer->p_id);
1654 		goto error;
1655 	}
1656 
1657 	if (__predict_false(peer != allowed_peer)) {
1658 		DPRINTF(sc, "Packet has unallowed src IP from peer "
1659 		    "%llu\n", peer->p_id);
1660 		goto error;
1661 	}
1662 
1663 	/* tunneled packet was not offloaded */
1664 	m->m_pkthdr.csum_flags = 0;
1665 
1666 	m->m_pkthdr.ph_ifidx = sc->sc_if.if_index;
1667 	m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1668 	m->m_flags &= ~(M_MCAST | M_BCAST);
1669 #if NPF > 0
1670 	pf_pkt_addr_changed(m);
1671 #endif /* NPF > 0 */
1672 
1673 done:
1674 	t->t_mbuf = m;
1675 error:
1676 	t->t_done = 1;
1677 	task_add(net_tq(sc->sc_if.if_index), &peer->p_deliver_in);
1678 }
1679 
1680 void
1681 wg_encap_worker(void *_sc)
1682 {
1683 	struct mbuf *m;
1684 	struct wg_softc *sc = _sc;
1685 	while ((m = wg_ring_dequeue(&sc->sc_encap_ring)) != NULL)
1686 		wg_encap(sc, m);
1687 }
1688 
1689 void
1690 wg_decap_worker(void *_sc)
1691 {
1692 	struct mbuf *m;
1693 	struct wg_softc *sc = _sc;
1694 	while ((m = wg_ring_dequeue(&sc->sc_decap_ring)) != NULL)
1695 		wg_decap(sc, m);
1696 }
1697 
1698 void
1699 wg_deliver_out(void *_peer)
1700 {
1701 	struct wg_peer		*peer = _peer;
1702 	struct wg_softc		*sc = peer->p_sc;
1703 	struct wg_endpoint	 endpoint;
1704 	struct wg_tag		*t;
1705 	struct mbuf		*m;
1706 	int			 ret;
1707 
1708 	wg_peer_get_endpoint(peer, &endpoint);
1709 
1710 	while ((m = wg_queue_dequeue(&peer->p_encap_queue, &t)) != NULL) {
1711 		/* t_mbuf will contain the encrypted packet */
1712 		if (t->t_mbuf == NULL){
1713 			counters_inc(sc->sc_if.if_counters, ifc_oerrors);
1714 			m_freem(m);
1715 			continue;
1716 		}
1717 
1718 		ret = wg_send(sc, &endpoint, t->t_mbuf);
1719 
1720 		if (ret == 0) {
1721 			wg_timers_event_any_authenticated_packet_traversal(
1722 			    &peer->p_timers);
1723 			wg_timers_event_any_authenticated_packet_sent(
1724 			    &peer->p_timers);
1725 
1726 			if (m->m_pkthdr.len != 0)
1727 				wg_timers_event_data_sent(&peer->p_timers);
1728 		} else if (ret == EADDRNOTAVAIL) {
1729 			wg_peer_clear_src(peer);
1730 			wg_peer_get_endpoint(peer, &endpoint);
1731 		}
1732 
1733 		m_freem(m);
1734 	}
1735 }
1736 
1737 void
1738 wg_deliver_in(void *_peer)
1739 {
1740 	struct wg_peer	*peer = _peer;
1741 	struct wg_softc	*sc = peer->p_sc;
1742 	struct wg_tag	*t;
1743 	struct mbuf	*m;
1744 
1745 	while ((m = wg_queue_dequeue(&peer->p_decap_queue, &t)) != NULL) {
1746 		/* t_mbuf will contain the decrypted packet */
1747 		if (t->t_mbuf == NULL) {
1748 			counters_inc(sc->sc_if.if_counters, ifc_ierrors);
1749 			m_freem(m);
1750 			continue;
1751 		}
1752 
1753 		/* From here on m == t->t_mbuf */
1754 		KASSERT(m == t->t_mbuf);
1755 
1756 		wg_timers_event_any_authenticated_packet_received(
1757 		    &peer->p_timers);
1758 		wg_timers_event_any_authenticated_packet_traversal(
1759 		    &peer->p_timers);
1760 
1761 		if (m->m_pkthdr.len == 0) {
1762 			m_freem(m);
1763 			continue;
1764 		}
1765 
1766 #if NBPFILTER > 0
1767 		if (sc->sc_if.if_bpf != NULL)
1768 			bpf_mtap_af(sc->sc_if.if_bpf,
1769 			    m->m_pkthdr.ph_family, m, BPF_DIRECTION_IN);
1770 #endif
1771 
1772 		NET_LOCK();
1773 		if (m->m_pkthdr.ph_family == AF_INET)
1774 			ipv4_input(&sc->sc_if, m);
1775 #ifdef INET6
1776 		else if (m->m_pkthdr.ph_family == AF_INET6)
1777 			ipv6_input(&sc->sc_if, m);
1778 #endif
1779 		else
1780 			panic("invalid ph_family");
1781 		NET_UNLOCK();
1782 
1783 		wg_timers_event_data_received(&peer->p_timers);
1784 	}
1785 }
1786 
1787 int
1788 wg_queue_in(struct wg_softc *sc, struct wg_peer *peer, struct mbuf *m)
1789 {
1790 	struct wg_ring		*parallel = &sc->sc_decap_ring;
1791 	struct wg_queue		*serial = &peer->p_decap_queue;
1792 	struct wg_tag		*t;
1793 
1794 	mtx_enter(&serial->q_mtx);
1795 	if (serial->q_list.ml_len < MAX_QUEUED_PKT) {
1796 		ml_enqueue(&serial->q_list, m);
1797 		mtx_leave(&serial->q_mtx);
1798 	} else {
1799 		mtx_leave(&serial->q_mtx);
1800 		m_freem(m);
1801 		return ENOBUFS;
1802 	}
1803 
1804 	mtx_enter(&parallel->r_mtx);
1805 	if (parallel->r_tail - parallel->r_head < MAX_QUEUED_PKT) {
1806 		parallel->r_buf[parallel->r_tail & MAX_QUEUED_PKT_MASK] = m;
1807 		parallel->r_tail++;
1808 		mtx_leave(&parallel->r_mtx);
1809 	} else {
1810 		mtx_leave(&parallel->r_mtx);
1811 		t = wg_tag_get(m);
1812 		t->t_done = 1;
1813 		return ENOBUFS;
1814 	}
1815 
1816 	return 0;
1817 }
1818 
1819 void
1820 wg_queue_out(struct wg_softc *sc, struct wg_peer *peer)
1821 {
1822 	struct wg_ring		*parallel = &sc->sc_encap_ring;
1823 	struct wg_queue		*serial = &peer->p_encap_queue;
1824 	struct mbuf_list 	 ml, ml_free;
1825 	struct mbuf		*m;
1826 	struct wg_tag		*t;
1827 	int			 dropped;
1828 
1829 	/*
1830 	 * We delist all staged packets and then add them to the queues. This
1831 	 * can race with wg_qstart when called from wg_send_keepalive, however
1832 	 * wg_qstart will not race as it is serialised.
1833 	 */
1834 	mq_delist(&peer->p_stage_queue, &ml);
1835 	ml_init(&ml_free);
1836 
1837 	while ((m = ml_dequeue(&ml)) != NULL) {
1838 		mtx_enter(&serial->q_mtx);
1839 		if (serial->q_list.ml_len < MAX_QUEUED_PKT) {
1840 			ml_enqueue(&serial->q_list, m);
1841 			mtx_leave(&serial->q_mtx);
1842 		} else {
1843 			mtx_leave(&serial->q_mtx);
1844 			ml_enqueue(&ml_free, m);
1845 			continue;
1846 		}
1847 
1848 		mtx_enter(&parallel->r_mtx);
1849 		if (parallel->r_tail - parallel->r_head < MAX_QUEUED_PKT) {
1850 			parallel->r_buf[parallel->r_tail & MAX_QUEUED_PKT_MASK] = m;
1851 			parallel->r_tail++;
1852 			mtx_leave(&parallel->r_mtx);
1853 		} else {
1854 			mtx_leave(&parallel->r_mtx);
1855 			t = wg_tag_get(m);
1856 			t->t_done = 1;
1857 		}
1858 	}
1859 
1860 	if ((dropped = ml_purge(&ml_free)) > 0)
1861 		counters_add(sc->sc_if.if_counters, ifc_oqdrops, dropped);
1862 }
1863 
1864 struct mbuf *
1865 wg_ring_dequeue(struct wg_ring *r)
1866 {
1867 	struct mbuf *m = NULL;
1868 	mtx_enter(&r->r_mtx);
1869 	if (r->r_head != r->r_tail) {
1870 		m = r->r_buf[r->r_head & MAX_QUEUED_PKT_MASK];
1871 		r->r_head++;
1872 	}
1873 	mtx_leave(&r->r_mtx);
1874 	return m;
1875 }
1876 
1877 struct mbuf *
1878 wg_queue_dequeue(struct wg_queue *q, struct wg_tag **t)
1879 {
1880 	struct mbuf *m;
1881 	mtx_enter(&q->q_mtx);
1882 	if ((m = q->q_list.ml_head) != NULL && (*t = wg_tag_get(m))->t_done)
1883 		ml_dequeue(&q->q_list);
1884 	else
1885 		m = NULL;
1886 	mtx_leave(&q->q_mtx);
1887 	return m;
1888 }
1889 
1890 size_t
1891 wg_queue_len(struct wg_queue *q)
1892 {
1893 	size_t len;
1894 	mtx_enter(&q->q_mtx);
1895 	len = q->q_list.ml_len;
1896 	mtx_leave(&q->q_mtx);
1897 	return len;
1898 }
1899 
1900 struct noise_remote *
1901 wg_remote_get(void *_sc, uint8_t public[NOISE_PUBLIC_KEY_LEN])
1902 {
1903 	struct wg_peer	*peer;
1904 	struct wg_softc	*sc = _sc;
1905 	if ((peer = wg_peer_lookup(sc, public)) == NULL)
1906 		return NULL;
1907 	return &peer->p_remote;
1908 }
1909 
1910 uint32_t
1911 wg_index_set(void *_sc, struct noise_remote *remote)
1912 {
1913 	struct wg_peer	*peer;
1914 	struct wg_softc	*sc = _sc;
1915 	struct wg_index *index, *iter;
1916 	uint32_t	 key;
1917 
1918 	/*
1919 	 * We can modify this without a lock as wg_index_set, wg_index_drop are
1920 	 * guaranteed to be serialised (per remote).
1921 	 */
1922 	peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1923 	index = SLIST_FIRST(&peer->p_unused_index);
1924 	KASSERT(index != NULL);
1925 	SLIST_REMOVE_HEAD(&peer->p_unused_index, i_unused_entry);
1926 
1927 	index->i_value = remote;
1928 
1929 	mtx_enter(&sc->sc_index_mtx);
1930 assign_id:
1931 	key = index->i_key = arc4random();
1932 	key &= sc->sc_index_mask;
1933 	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1934 		if (iter->i_key == index->i_key)
1935 			goto assign_id;
1936 
1937 	LIST_INSERT_HEAD(&sc->sc_index[key], index, i_entry);
1938 
1939 	mtx_leave(&sc->sc_index_mtx);
1940 
1941 	/* Likewise, no need to lock for index here. */
1942 	return index->i_key;
1943 }
1944 
1945 struct noise_remote *
1946 wg_index_get(void *_sc, uint32_t key0)
1947 {
1948 	struct wg_softc		*sc = _sc;
1949 	struct wg_index		*iter;
1950 	struct noise_remote	*remote = NULL;
1951 	uint32_t		 key = key0 & sc->sc_index_mask;
1952 
1953 	mtx_enter(&sc->sc_index_mtx);
1954 	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1955 		if (iter->i_key == key0) {
1956 			remote = iter->i_value;
1957 			break;
1958 		}
1959 	mtx_leave(&sc->sc_index_mtx);
1960 	return remote;
1961 }
1962 
1963 void
1964 wg_index_drop(void *_sc, uint32_t key0)
1965 {
1966 	struct wg_softc	*sc = _sc;
1967 	struct wg_index	*iter;
1968 	struct wg_peer	*peer = NULL;
1969 	uint32_t	 key = key0 & sc->sc_index_mask;
1970 
1971 	mtx_enter(&sc->sc_index_mtx);
1972 	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1973 		if (iter->i_key == key0) {
1974 			LIST_REMOVE(iter, i_entry);
1975 			break;
1976 		}
1977 	mtx_leave(&sc->sc_index_mtx);
1978 
1979 	/* We expect a peer */
1980 	peer = CONTAINER_OF(iter->i_value, struct wg_peer, p_remote);
1981 	KASSERT(peer != NULL);
1982 	SLIST_INSERT_HEAD(&peer->p_unused_index, iter, i_unused_entry);
1983 }
1984 
1985 struct mbuf *
1986 wg_input(void *_sc, struct mbuf *m, struct ip *ip, struct ip6_hdr *ip6,
1987     void *_uh, int hlen)
1988 {
1989 	struct wg_pkt_data	*data;
1990 	struct noise_remote	*remote;
1991 	struct wg_tag		*t;
1992 	struct wg_softc		*sc = _sc;
1993 	struct udphdr		*uh = _uh;
1994 
1995 	NET_ASSERT_LOCKED();
1996 
1997 	if ((t = wg_tag_get(m)) == NULL) {
1998 		m_freem(m);
1999 		return NULL;
2000 	}
2001 
2002 	if (ip != NULL) {
2003 		t->t_endpoint.e_remote.r_sa.sa_len = sizeof(struct sockaddr_in);
2004 		t->t_endpoint.e_remote.r_sa.sa_family = AF_INET;
2005 		t->t_endpoint.e_remote.r_sin.sin_port = uh->uh_sport;
2006 		t->t_endpoint.e_remote.r_sin.sin_addr = ip->ip_src;
2007 		t->t_endpoint.e_local.l_in = ip->ip_dst;
2008 #ifdef INET6
2009 	} else if (ip6 != NULL) {
2010 		t->t_endpoint.e_remote.r_sa.sa_len = sizeof(struct sockaddr_in6);
2011 		t->t_endpoint.e_remote.r_sa.sa_family = AF_INET6;
2012 		t->t_endpoint.e_remote.r_sin6.sin6_port = uh->uh_sport;
2013 		t->t_endpoint.e_remote.r_sin6.sin6_addr = ip6->ip6_src;
2014 		t->t_endpoint.e_local.l_in6 = ip6->ip6_dst;
2015 #endif
2016 	} else {
2017 		m_freem(m);
2018 		return NULL;
2019 	}
2020 
2021 	/* m has a IP/IPv6 header of hlen length, we don't need it anymore. */
2022 	m_adj(m, hlen);
2023 
2024 	/*
2025 	 * Ensure mbuf is contiguous over full length of packet. This is done
2026 	 * so we can directly read the handshake values in wg_handshake, and so
2027 	 * we can decrypt a transport packet by passing a single buffer to
2028 	 * noise_remote_decrypt in wg_decap.
2029 	 */
2030 	if ((m = m_pullup(m, m->m_pkthdr.len)) == NULL)
2031 		return NULL;
2032 
2033 	if ((m->m_pkthdr.len == sizeof(struct wg_pkt_initiation) &&
2034 		*mtod(m, uint32_t *) == WG_PKT_INITIATION) ||
2035 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_response) &&
2036 		*mtod(m, uint32_t *) == WG_PKT_RESPONSE) ||
2037 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_cookie) &&
2038 		*mtod(m, uint32_t *) == WG_PKT_COOKIE)) {
2039 
2040 		if (mq_enqueue(&sc->sc_handshake_queue, m) != 0)
2041 			DPRINTF(sc, "Dropping handshake packet\n");
2042 		task_add(wg_handshake_taskq, &sc->sc_handshake);
2043 
2044 	} else if (m->m_pkthdr.len >= sizeof(struct wg_pkt_data) +
2045 	    NOISE_AUTHTAG_LEN && *mtod(m, uint32_t *) == WG_PKT_DATA) {
2046 
2047 		data = mtod(m, struct wg_pkt_data *);
2048 
2049 		if ((remote = wg_index_get(sc, data->r_idx)) != NULL) {
2050 			t->t_peer = CONTAINER_OF(remote, struct wg_peer,
2051 			    p_remote);
2052 			t->t_mbuf = NULL;
2053 			t->t_done = 0;
2054 
2055 			if (wg_queue_in(sc, t->t_peer, m) != 0)
2056 				counters_inc(sc->sc_if.if_counters,
2057 				    ifc_iqdrops);
2058 			task_add(wg_crypt_taskq, &sc->sc_decap);
2059 		} else {
2060 			counters_inc(sc->sc_if.if_counters, ifc_ierrors);
2061 			m_freem(m);
2062 		}
2063 	} else {
2064 		counters_inc(sc->sc_if.if_counters, ifc_ierrors);
2065 		m_freem(m);
2066 	}
2067 
2068 	return NULL;
2069 }
2070 
2071 void
2072 wg_qstart(struct ifqueue *ifq)
2073 {
2074 	struct ifnet		*ifp = ifq->ifq_if;
2075 	struct wg_softc		*sc = ifp->if_softc;
2076 	struct wg_peer		*peer;
2077 	struct wg_tag		*t;
2078 	struct mbuf		*m;
2079 	SLIST_HEAD(,wg_peer)	 start_list;
2080 
2081 	SLIST_INIT(&start_list);
2082 
2083 	/*
2084 	 * We should be OK to modify p_start_list, p_start_onlist in this
2085 	 * function as there should only be one ifp->if_qstart invoked at a
2086 	 * time.
2087 	 */
2088 	while ((m = ifq_dequeue(ifq)) != NULL) {
2089 		t = wg_tag_get(m);
2090 		peer = t->t_peer;
2091 		if (mq_push(&peer->p_stage_queue, m) != 0)
2092 			counters_inc(ifp->if_counters, ifc_oqdrops);
2093 		if (!peer->p_start_onlist) {
2094 			SLIST_INSERT_HEAD(&start_list, peer, p_start_list);
2095 			peer->p_start_onlist = 1;
2096 		}
2097 	}
2098 	SLIST_FOREACH(peer, &start_list, p_start_list) {
2099 		if (noise_remote_ready(&peer->p_remote) == 0)
2100 			wg_queue_out(sc, peer);
2101 		else
2102 			wg_timers_event_want_initiation(&peer->p_timers);
2103 		peer->p_start_onlist = 0;
2104 	}
2105 	task_add(wg_crypt_taskq, &sc->sc_encap);
2106 }
2107 
2108 int
2109 wg_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2110     struct rtentry *rt)
2111 {
2112 	struct wg_softc	*sc = ifp->if_softc;
2113 	struct wg_peer	*peer;
2114 	struct wg_tag	*t;
2115 	int		 af, ret = EINVAL;
2116 
2117 	NET_ASSERT_LOCKED();
2118 
2119 	if ((t = wg_tag_get(m)) == NULL) {
2120 		ret = ENOBUFS;
2121 		goto error;
2122 	}
2123 
2124 	m->m_pkthdr.ph_family = sa->sa_family;
2125 	if (sa->sa_family == AF_INET) {
2126 		peer = wg_aip_lookup(sc->sc_aip4,
2127 		    &mtod(m, struct ip *)->ip_dst);
2128 #ifdef INET6
2129 	} else if (sa->sa_family == AF_INET6) {
2130 		peer = wg_aip_lookup(sc->sc_aip6,
2131 		    &mtod(m, struct ip6_hdr *)->ip6_dst);
2132 #endif
2133 	} else {
2134 		ret = EAFNOSUPPORT;
2135 		goto error;
2136 	}
2137 
2138 #if NBPFILTER > 0
2139 	if (sc->sc_if.if_bpf)
2140 		bpf_mtap_af(sc->sc_if.if_bpf, sa->sa_family, m,
2141 		    BPF_DIRECTION_OUT);
2142 #endif
2143 
2144 	if (peer == NULL) {
2145 		ret = ENETUNREACH;
2146 		goto error;
2147 	}
2148 
2149 	af = peer->p_endpoint.e_remote.r_sa.sa_family;
2150 	if (af != AF_INET && af != AF_INET6) {
2151 		DPRINTF(sc, "No valid endpoint has been configured or "
2152 				"discovered for peer %llu\n", peer->p_id);
2153 		ret = EDESTADDRREQ;
2154 		goto error;
2155 	}
2156 
2157 	if (m->m_pkthdr.ph_loopcnt++ > M_MAXLOOP) {
2158 		DPRINTF(sc, "Packet looped\n");
2159 		ret = ELOOP;
2160 		goto error;
2161 	}
2162 
2163 	/*
2164 	 * As we hold a reference to peer in the mbuf, we can't handle a
2165 	 * delayed packet without doing some refcnting. If a peer is removed
2166 	 * while a delayed holds a reference, bad things will happen. For the
2167 	 * time being, delayed packets are unsupported. This may be fixed with
2168 	 * another aip_lookup in wg_qstart, or refcnting as mentioned before.
2169 	 */
2170 	if (m->m_pkthdr.pf.delay > 0) {
2171 		DPRINTF(sc, "PF delay unsupported\n");
2172 		ret = EOPNOTSUPP;
2173 		goto error;
2174 	}
2175 
2176 	t->t_peer = peer;
2177 	t->t_mbuf = NULL;
2178 	t->t_done = 0;
2179 	t->t_mtu = ifp->if_mtu;
2180 
2181 	/*
2182 	 * We still have an issue with ifq that will count a packet that gets
2183 	 * dropped in wg_qstart, or not encrypted. These get counted as
2184 	 * ofails or oqdrops, so the packet gets counted twice.
2185 	 */
2186 	return if_enqueue(ifp, m);
2187 error:
2188 	counters_inc(ifp->if_counters, ifc_oerrors);
2189 	m_freem(m);
2190 	return ret;
2191 }
2192 
2193 int
2194 wg_ioctl_set(struct wg_softc *sc, struct wg_data_io *data)
2195 {
2196 	struct wg_interface_io	*iface_p, iface_o;
2197 	struct wg_peer_io	*peer_p, peer_o;
2198 	struct wg_aip_io	*aip_p, aip_o;
2199 
2200 	struct wg_peer		*peer, *tpeer;
2201 	struct wg_aip		*aip, *taip;
2202 
2203 	in_port_t		 port;
2204 	int			 rtable;
2205 
2206 	uint8_t			 public[WG_KEY_SIZE], private[WG_KEY_SIZE];
2207 	size_t			 i, j;
2208 	int			 ret, has_identity;
2209 
2210 	if ((ret = suser(curproc)) != 0)
2211 		return ret;
2212 
2213 	rw_enter_write(&sc->sc_lock);
2214 
2215 	iface_p = data->wgd_interface;
2216 	if ((ret = copyin(iface_p, &iface_o, sizeof(iface_o))) != 0)
2217 		goto error;
2218 
2219 	if (iface_o.i_flags & WG_INTERFACE_REPLACE_PEERS)
2220 		TAILQ_FOREACH_SAFE(peer, &sc->sc_peer_seq, p_seq_entry, tpeer)
2221 			wg_peer_destroy(peer);
2222 
2223 	if (iface_o.i_flags & WG_INTERFACE_HAS_PRIVATE &&
2224 	    (noise_local_keys(&sc->sc_local, NULL, private) ||
2225 	     timingsafe_bcmp(private, iface_o.i_private, WG_KEY_SIZE))) {
2226 		if (curve25519_generate_public(public, iface_o.i_private)) {
2227 			if ((peer = wg_peer_lookup(sc, public)) != NULL)
2228 				wg_peer_destroy(peer);
2229 		}
2230 		noise_local_lock_identity(&sc->sc_local);
2231 		has_identity = noise_local_set_private(&sc->sc_local,
2232 						       iface_o.i_private);
2233 		TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2234 			noise_remote_precompute(&peer->p_remote);
2235 			wg_timers_event_reset_handshake_last_sent(&peer->p_timers);
2236 			noise_remote_expire_current(&peer->p_remote);
2237 		}
2238 		cookie_checker_update(&sc->sc_cookie,
2239 				      has_identity == 0 ? public : NULL);
2240 		noise_local_unlock_identity(&sc->sc_local);
2241 	}
2242 
2243 	if (iface_o.i_flags & WG_INTERFACE_HAS_PORT)
2244 		port = htons(iface_o.i_port);
2245 	else
2246 		port = sc->sc_udp_port;
2247 
2248 	if (iface_o.i_flags & WG_INTERFACE_HAS_RTABLE)
2249 		rtable = iface_o.i_rtable;
2250 	else
2251 		rtable = sc->sc_udp_rtable;
2252 
2253 	if (port != sc->sc_udp_port || rtable != sc->sc_udp_rtable) {
2254 		TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry)
2255 			wg_peer_clear_src(peer);
2256 
2257 		if (sc->sc_if.if_flags & IFF_RUNNING)
2258 			if ((ret = wg_bind(sc, &port, &rtable)) != 0)
2259 				goto error;
2260 
2261 		sc->sc_udp_port = port;
2262 		sc->sc_udp_rtable = rtable;
2263 	}
2264 
2265 	peer_p = &iface_p->i_peers[0];
2266 	for (i = 0; i < iface_o.i_peers_count; i++) {
2267 		if ((ret = copyin(peer_p, &peer_o, sizeof(peer_o))) != 0)
2268 			goto error;
2269 
2270 		/* Peer must have public key */
2271 		if (!(peer_o.p_flags & WG_PEER_HAS_PUBLIC))
2272 			goto next_peer;
2273 
2274 		/* 0 = latest protocol, 1 = this protocol */
2275 		if (peer_o.p_protocol_version != 0) {
2276 			if (peer_o.p_protocol_version > 1) {
2277 				ret = EPFNOSUPPORT;
2278 				goto error;
2279 			}
2280 		}
2281 
2282 		/* Get local public and check that peer key doesn't match */
2283 		if (noise_local_keys(&sc->sc_local, public, NULL) == 0 &&
2284 		    bcmp(public, peer_o.p_public, WG_KEY_SIZE) == 0)
2285 			goto next_peer;
2286 
2287 		/* Lookup peer, or create if it doesn't exist */
2288 		if ((peer = wg_peer_lookup(sc, peer_o.p_public)) == NULL) {
2289 			/* If we want to delete, no need creating a new one.
2290 			 * Also, don't create a new one if we only want to
2291 			 * update. */
2292 			if (peer_o.p_flags & (WG_PEER_REMOVE|WG_PEER_UPDATE))
2293 				goto next_peer;
2294 
2295 			if ((peer = wg_peer_create(sc,
2296 			    peer_o.p_public)) == NULL) {
2297 				ret = ENOMEM;
2298 				goto error;
2299 			}
2300 		}
2301 
2302 		/* Remove peer and continue if specified */
2303 		if (peer_o.p_flags & WG_PEER_REMOVE) {
2304 			wg_peer_destroy(peer);
2305 			goto next_peer;
2306 		}
2307 
2308 		if (peer_o.p_flags & WG_PEER_HAS_ENDPOINT)
2309 			wg_peer_set_sockaddr(peer, &peer_o.p_sa);
2310 
2311 		if (peer_o.p_flags & WG_PEER_HAS_PSK)
2312 			noise_remote_set_psk(&peer->p_remote, peer_o.p_psk);
2313 
2314 		if (peer_o.p_flags & WG_PEER_HAS_PKA)
2315 			wg_timers_set_persistent_keepalive(&peer->p_timers,
2316 			    peer_o.p_pka);
2317 
2318 		if (peer_o.p_flags & WG_PEER_REPLACE_AIPS) {
2319 			LIST_FOREACH_SAFE(aip, &peer->p_aip, a_entry, taip) {
2320 				wg_aip_remove(sc, peer, &aip->a_data);
2321 			}
2322 		}
2323 
2324 		aip_p = &peer_p->p_aips[0];
2325 		for (j = 0; j < peer_o.p_aips_count; j++) {
2326 			if ((ret = copyin(aip_p, &aip_o, sizeof(aip_o))) != 0)
2327 				goto error;
2328 			ret = wg_aip_add(sc, peer, &aip_o);
2329 			if (ret != 0)
2330 				goto error;
2331 			aip_p++;
2332 		}
2333 
2334 		peer_p = (struct wg_peer_io *)aip_p;
2335 		continue;
2336 next_peer:
2337 		aip_p = &peer_p->p_aips[0];
2338 		aip_p += peer_o.p_aips_count;
2339 		peer_p = (struct wg_peer_io *)aip_p;
2340 	}
2341 
2342 error:
2343 	rw_exit_write(&sc->sc_lock);
2344 	explicit_bzero(&iface_o, sizeof(iface_o));
2345 	explicit_bzero(&peer_o, sizeof(peer_o));
2346 	explicit_bzero(&aip_o, sizeof(aip_o));
2347 	explicit_bzero(public, sizeof(public));
2348 	explicit_bzero(private, sizeof(private));
2349 	return ret;
2350 }
2351 
2352 int
2353 wg_ioctl_get(struct wg_softc *sc, struct wg_data_io *data)
2354 {
2355 	struct wg_interface_io	*iface_p, iface_o;
2356 	struct wg_peer_io	*peer_p, peer_o;
2357 	struct wg_aip_io	*aip_p;
2358 
2359 	struct wg_peer		*peer;
2360 	struct wg_aip		*aip;
2361 
2362 	size_t			 size, peer_count, aip_count;
2363 	int			 ret = 0, is_suser = suser(curproc) == 0;
2364 
2365 	size = sizeof(struct wg_interface_io);
2366 	if (data->wgd_size < size && !is_suser)
2367 		goto ret_size;
2368 
2369 	iface_p = data->wgd_interface;
2370 	bzero(&iface_o, sizeof(iface_o));
2371 
2372 	rw_enter_read(&sc->sc_lock);
2373 
2374 	if (sc->sc_udp_port != 0) {
2375 		iface_o.i_port = ntohs(sc->sc_udp_port);
2376 		iface_o.i_flags |= WG_INTERFACE_HAS_PORT;
2377 	}
2378 
2379 	if (sc->sc_udp_rtable != 0) {
2380 		iface_o.i_rtable = sc->sc_udp_rtable;
2381 		iface_o.i_flags |= WG_INTERFACE_HAS_RTABLE;
2382 	}
2383 
2384 	if (!is_suser)
2385 		goto copy_out_iface;
2386 
2387 	if (noise_local_keys(&sc->sc_local, iface_o.i_public,
2388 	    iface_o.i_private) == 0) {
2389 		iface_o.i_flags |= WG_INTERFACE_HAS_PUBLIC;
2390 		iface_o.i_flags |= WG_INTERFACE_HAS_PRIVATE;
2391 	}
2392 
2393 	size += sizeof(struct wg_peer_io) * sc->sc_peer_num;
2394 	size += sizeof(struct wg_aip_io) * sc->sc_aip_num;
2395 	if (data->wgd_size < size)
2396 		goto unlock_and_ret_size;
2397 
2398 	peer_count = 0;
2399 	peer_p = &iface_p->i_peers[0];
2400 	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2401 		bzero(&peer_o, sizeof(peer_o));
2402 		peer_o.p_flags = WG_PEER_HAS_PUBLIC;
2403 		peer_o.p_protocol_version = 1;
2404 
2405 		if (noise_remote_keys(&peer->p_remote, peer_o.p_public,
2406 		    peer_o.p_psk) == 0)
2407 			peer_o.p_flags |= WG_PEER_HAS_PSK;
2408 
2409 		if (wg_timers_get_persistent_keepalive(&peer->p_timers,
2410 		    &peer_o.p_pka) == 0)
2411 			peer_o.p_flags |= WG_PEER_HAS_PKA;
2412 
2413 		if (wg_peer_get_sockaddr(peer, &peer_o.p_sa) == 0)
2414 			peer_o.p_flags |= WG_PEER_HAS_ENDPOINT;
2415 
2416 		mtx_enter(&peer->p_counters_mtx);
2417 		peer_o.p_txbytes = peer->p_counters_tx;
2418 		peer_o.p_rxbytes = peer->p_counters_rx;
2419 		mtx_leave(&peer->p_counters_mtx);
2420 
2421 		wg_timers_get_last_handshake(&peer->p_timers,
2422 		    &peer_o.p_last_handshake);
2423 
2424 		aip_count = 0;
2425 		aip_p = &peer_p->p_aips[0];
2426 		LIST_FOREACH(aip, &peer->p_aip, a_entry) {
2427 			if ((ret = copyout(&aip->a_data, aip_p, sizeof(*aip_p))) != 0)
2428 				goto unlock_and_ret_size;
2429 			aip_p++;
2430 			aip_count++;
2431 		}
2432 		peer_o.p_aips_count = aip_count;
2433 
2434 		if ((ret = copyout(&peer_o, peer_p, sizeof(peer_o))) != 0)
2435 			goto unlock_and_ret_size;
2436 
2437 		peer_p = (struct wg_peer_io *)aip_p;
2438 		peer_count++;
2439 	}
2440 	iface_o.i_peers_count = peer_count;
2441 
2442 copy_out_iface:
2443 	ret = copyout(&iface_o, iface_p, sizeof(iface_o));
2444 unlock_and_ret_size:
2445 	rw_exit_read(&sc->sc_lock);
2446 	explicit_bzero(&iface_o, sizeof(iface_o));
2447 	explicit_bzero(&peer_o, sizeof(peer_o));
2448 ret_size:
2449 	data->wgd_size = size;
2450 	return ret;
2451 }
2452 
2453 int
2454 wg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2455 {
2456 	struct ifreq	*ifr = (struct ifreq *) data;
2457 	struct wg_softc	*sc = ifp->if_softc;
2458 	int		 ret = 0;
2459 
2460 	switch (cmd) {
2461 	case SIOCSWG:
2462 		NET_UNLOCK();
2463 		ret = wg_ioctl_set(sc, (struct wg_data_io *) data);
2464 		NET_LOCK();
2465 		break;
2466 	case SIOCGWG:
2467 		NET_UNLOCK();
2468 		ret = wg_ioctl_get(sc, (struct wg_data_io *) data);
2469 		NET_LOCK();
2470 		break;
2471 	/* Interface IOCTLs */
2472 	case SIOCSIFADDR:
2473 		SET(ifp->if_flags, IFF_UP);
2474 		/* FALLTHROUGH */
2475 	case SIOCSIFFLAGS:
2476 		if (ISSET(ifp->if_flags, IFF_UP))
2477 			ret = wg_up(sc);
2478 		else
2479 			wg_down(sc);
2480 		break;
2481 	case SIOCSIFMTU:
2482 		/* Arbitrary limits */
2483 		if (ifr->ifr_mtu <= 0 || ifr->ifr_mtu > 9000)
2484 			ret = EINVAL;
2485 		else
2486 			ifp->if_mtu = ifr->ifr_mtu;
2487 		break;
2488 	case SIOCADDMULTI:
2489 	case SIOCDELMULTI:
2490 		break;
2491 	default:
2492 		ret = ENOTTY;
2493 	}
2494 
2495 	return ret;
2496 }
2497 
2498 int
2499 wg_up(struct wg_softc *sc)
2500 {
2501 	struct wg_peer	*peer;
2502 	int		 ret = 0;
2503 
2504 	NET_ASSERT_LOCKED();
2505 	/*
2506 	 * We use IFF_RUNNING as an exclusive access here. We also may want
2507 	 * an exclusive sc_lock as wg_bind may write to sc_udp_port. We also
2508 	 * want to drop NET_LOCK as we want to call socreate, sobind, etc. Once
2509 	 * solock is no longer === NET_LOCK, we may be able to avoid this.
2510 	 */
2511 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
2512 		SET(sc->sc_if.if_flags, IFF_RUNNING);
2513 		NET_UNLOCK();
2514 
2515 		rw_enter_write(&sc->sc_lock);
2516 		/*
2517 		 * If we successfully bind the socket, then enable the timers
2518 		 * for the peer. This will send all staged packets and a
2519 		 * keepalive if necessary.
2520 		 */
2521 		ret = wg_bind(sc, &sc->sc_udp_port, &sc->sc_udp_rtable);
2522 		if (ret == 0) {
2523 			TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2524 				wg_timers_enable(&peer->p_timers);
2525 				wg_queue_out(sc, peer);
2526 			}
2527 		}
2528 		rw_exit_write(&sc->sc_lock);
2529 
2530 		NET_LOCK();
2531 		if (ret != 0)
2532 			CLR(sc->sc_if.if_flags, IFF_RUNNING);
2533 	}
2534 	return ret;
2535 }
2536 
2537 void
2538 wg_down(struct wg_softc *sc)
2539 {
2540 	struct wg_peer	*peer;
2541 
2542 	NET_ASSERT_LOCKED();
2543 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2544 		return;
2545 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
2546 	NET_UNLOCK();
2547 
2548 	/*
2549 	 * We only need a read lock here, as we aren't writing to anything
2550 	 * that isn't granularly locked.
2551 	 */
2552 	rw_enter_read(&sc->sc_lock);
2553 	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2554 		mq_purge(&peer->p_stage_queue);
2555 		wg_timers_disable(&peer->p_timers);
2556 	}
2557 
2558 	taskq_barrier(wg_handshake_taskq);
2559 	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2560 		noise_remote_clear(&peer->p_remote);
2561 		wg_timers_event_reset_handshake_last_sent(&peer->p_timers);
2562 	}
2563 
2564 	wg_unbind(sc);
2565 	rw_exit_read(&sc->sc_lock);
2566 	NET_LOCK();
2567 }
2568 
2569 int
2570 wg_clone_create(struct if_clone *ifc, int unit)
2571 {
2572 	struct ifnet		*ifp;
2573 	struct wg_softc		*sc;
2574 	struct noise_upcall	 local_upcall;
2575 
2576 	KERNEL_ASSERT_LOCKED();
2577 
2578 	if (wg_counter == 0) {
2579 		wg_handshake_taskq = taskq_create("wg_handshake",
2580 		    2, IPL_NET, TASKQ_MPSAFE);
2581 		wg_crypt_taskq = taskq_create("wg_crypt",
2582 		    ncpus, IPL_NET, TASKQ_MPSAFE);
2583 
2584 		if (wg_handshake_taskq == NULL || wg_crypt_taskq == NULL) {
2585 			if (wg_handshake_taskq != NULL)
2586 				taskq_destroy(wg_handshake_taskq);
2587 			if (wg_crypt_taskq != NULL)
2588 				taskq_destroy(wg_crypt_taskq);
2589 			wg_handshake_taskq = NULL;
2590 			wg_crypt_taskq = NULL;
2591 			return ENOTRECOVERABLE;
2592 		}
2593 	}
2594 	wg_counter++;
2595 
2596 	if ((sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL)
2597 		goto ret_00;
2598 
2599 	local_upcall.u_arg = sc;
2600 	local_upcall.u_remote_get = wg_remote_get;
2601 	local_upcall.u_index_set = wg_index_set;
2602 	local_upcall.u_index_drop = wg_index_drop;
2603 
2604 	TAILQ_INIT(&sc->sc_peer_seq);
2605 
2606 	/* sc_if is initialised after everything else */
2607 	arc4random_buf(&sc->sc_secret, sizeof(sc->sc_secret));
2608 
2609 	rw_init(&sc->sc_lock, "wg");
2610 	noise_local_init(&sc->sc_local, &local_upcall);
2611 	if (cookie_checker_init(&sc->sc_cookie, &wg_ratelimit_pool) != 0)
2612 		goto ret_01;
2613 	sc->sc_udp_port = 0;
2614 	sc->sc_udp_rtable = 0;
2615 
2616 	rw_init(&sc->sc_so_lock, "wg_so");
2617 	sc->sc_so4 = NULL;
2618 #ifdef INET6
2619 	sc->sc_so6 = NULL;
2620 #endif
2621 
2622 	sc->sc_aip_num = 0;
2623 	if ((sc->sc_aip4 = art_alloc(0, 32, 0)) == NULL)
2624 		goto ret_02;
2625 #ifdef INET6
2626 	if ((sc->sc_aip6 = art_alloc(0, 128, 0)) == NULL)
2627 		goto ret_03;
2628 #endif
2629 
2630 	rw_init(&sc->sc_peer_lock, "wg_peer");
2631 	sc->sc_peer_num = 0;
2632 	if ((sc->sc_peer = hashinit(HASHTABLE_PEER_SIZE, M_DEVBUF,
2633 	    M_NOWAIT, &sc->sc_peer_mask)) == NULL)
2634 		goto ret_04;
2635 
2636 	mtx_init(&sc->sc_index_mtx, IPL_NET);
2637 	if ((sc->sc_index = hashinit(HASHTABLE_INDEX_SIZE, M_DEVBUF,
2638 	    M_NOWAIT, &sc->sc_index_mask)) == NULL)
2639 		goto ret_05;
2640 
2641 	task_set(&sc->sc_handshake, wg_handshake_worker, sc);
2642 	mq_init(&sc->sc_handshake_queue, MAX_QUEUED_HANDSHAKES, IPL_NET);
2643 
2644 	task_set(&sc->sc_encap, wg_encap_worker, sc);
2645 	task_set(&sc->sc_decap, wg_decap_worker, sc);
2646 
2647 	bzero(&sc->sc_encap_ring, sizeof(sc->sc_encap_ring));
2648 	mtx_init(&sc->sc_encap_ring.r_mtx, IPL_NET);
2649 	bzero(&sc->sc_decap_ring, sizeof(sc->sc_decap_ring));
2650 	mtx_init(&sc->sc_decap_ring.r_mtx, IPL_NET);
2651 
2652 	/* We've setup the softc, now we can setup the ifnet */
2653 	ifp = &sc->sc_if;
2654 	ifp->if_softc = sc;
2655 
2656 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "wg%d", unit);
2657 
2658 	ifp->if_mtu = DEFAULT_MTU;
2659 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_NOARP;
2660 	ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
2661 	ifp->if_txmit = 64; /* Keep our workers active for longer. */
2662 
2663 	ifp->if_ioctl = wg_ioctl;
2664 	ifp->if_qstart = wg_qstart;
2665 	ifp->if_output = wg_output;
2666 
2667 	ifp->if_type = IFT_WIREGUARD;
2668 	ifp->if_rtrequest = p2p_rtrequest;
2669 
2670 	if_attach(ifp);
2671 	if_alloc_sadl(ifp);
2672 	if_counters_alloc(ifp);
2673 
2674 #if NBPFILTER > 0
2675 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
2676 #endif
2677 
2678 	DPRINTF(sc, "Interface created\n");
2679 
2680 	return 0;
2681 ret_05:
2682 	hashfree(sc->sc_peer, HASHTABLE_PEER_SIZE, M_DEVBUF);
2683 ret_04:
2684 #ifdef INET6
2685 	free(sc->sc_aip6, M_RTABLE, sizeof(*sc->sc_aip6));
2686 ret_03:
2687 #endif
2688 	free(sc->sc_aip4, M_RTABLE, sizeof(*sc->sc_aip4));
2689 ret_02:
2690 	cookie_checker_deinit(&sc->sc_cookie);
2691 ret_01:
2692 	free(sc, M_DEVBUF, sizeof(*sc));
2693 ret_00:
2694 	return ENOBUFS;
2695 }
2696 int
2697 wg_clone_destroy(struct ifnet *ifp)
2698 {
2699 	struct wg_softc	*sc = ifp->if_softc;
2700 	struct wg_peer	*peer, *tpeer;
2701 
2702 	KERNEL_ASSERT_LOCKED();
2703 
2704 	rw_enter_write(&sc->sc_lock);
2705 	TAILQ_FOREACH_SAFE(peer, &sc->sc_peer_seq, p_seq_entry, tpeer)
2706 		wg_peer_destroy(peer);
2707 	rw_exit_write(&sc->sc_lock);
2708 
2709 	wg_unbind(sc);
2710 	if_detach(ifp);
2711 
2712 	wg_counter--;
2713 	if (wg_counter == 0) {
2714 		KASSERT(wg_handshake_taskq != NULL && wg_crypt_taskq != NULL);
2715 		taskq_destroy(wg_handshake_taskq);
2716 		taskq_destroy(wg_crypt_taskq);
2717 		wg_handshake_taskq = NULL;
2718 		wg_crypt_taskq = NULL;
2719 	}
2720 
2721 	DPRINTF(sc, "Destroyed interface\n");
2722 
2723 	hashfree(sc->sc_index, HASHTABLE_INDEX_SIZE, M_DEVBUF);
2724 	hashfree(sc->sc_peer, HASHTABLE_PEER_SIZE, M_DEVBUF);
2725 #ifdef INET6
2726 	free(sc->sc_aip6, M_RTABLE, sizeof(*sc->sc_aip6));
2727 #endif
2728 	free(sc->sc_aip4, M_RTABLE, sizeof(*sc->sc_aip4));
2729 	cookie_checker_deinit(&sc->sc_cookie);
2730 	free(sc, M_DEVBUF, sizeof(*sc));
2731 	return 0;
2732 }
2733 
2734 void
2735 wgattach(int nwg)
2736 {
2737 #ifdef WGTEST
2738 	cookie_test();
2739 	noise_test();
2740 #endif
2741 	if_clone_attach(&wg_cloner);
2742 
2743 	pool_init(&wg_aip_pool, sizeof(struct wg_aip), 0,
2744 			IPL_NET, 0, "wgaip", NULL);
2745 	pool_init(&wg_peer_pool, sizeof(struct wg_peer), 0,
2746 			IPL_NET, 0, "wgpeer", NULL);
2747 	pool_init(&wg_ratelimit_pool, sizeof(struct ratelimit_entry), 0,
2748 			IPL_NET, 0, "wgratelimit", NULL);
2749 }
2750