xref: /openbsd-src/sys/net/if_wg.c (revision 1a8dbaac879b9f3335ad7fb25429ce63ac1d6bac)
1 /*	$OpenBSD: if_wg.c,v 1.14 2020/09/01 19:06:59 tb Exp $ */
2 
3 /*
4  * Copyright (C) 2015-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
5  * Copyright (C) 2019-2020 Matt Dunwoodie <ncon@noconroy.net>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include "bpfilter.h"
21 #include "pf.h"
22 
23 #include <sys/types.h>
24 #include <sys/systm.h>
25 #include <sys/param.h>
26 #include <sys/pool.h>
27 
28 #include <sys/socket.h>
29 #include <sys/socketvar.h>
30 #include <sys/percpu.h>
31 #include <sys/ioctl.h>
32 #include <sys/mbuf.h>
33 #include <sys/protosw.h>
34 
35 #include <net/if.h>
36 #include <net/if_var.h>
37 #include <net/if_types.h>
38 #include <net/if_wg.h>
39 
40 #include <net/wg_noise.h>
41 #include <net/wg_cookie.h>
42 
43 #include <net/pfvar.h>
44 #include <net/route.h>
45 #include <net/bpf.h>
46 
47 #include <netinet/ip.h>
48 #include <netinet/ip6.h>
49 #include <netinet/udp.h>
50 #include <netinet/in_pcb.h>
51 
52 #include <crypto/siphash.h>
53 
54 #define DEFAULT_MTU		1420
55 
56 #define MAX_STAGED_PKT		128
57 #define MAX_QUEUED_PKT		1024
58 #define MAX_QUEUED_PKT_MASK	(MAX_QUEUED_PKT - 1)
59 
60 #define MAX_QUEUED_HANDSHAKES	4096
61 
62 #define HASHTABLE_PEER_SIZE	(1 << 11)
63 #define HASHTABLE_INDEX_SIZE	(1 << 13)
64 #define MAX_PEERS_PER_IFACE	(1 << 20)
65 
66 #define REKEY_TIMEOUT		5
67 #define REKEY_TIMEOUT_JITTER	334 /* 1/3 sec, round for arc4random_uniform */
68 #define KEEPALIVE_TIMEOUT	10
69 #define MAX_TIMER_HANDSHAKES	(90 / REKEY_TIMEOUT)
70 #define NEW_HANDSHAKE_TIMEOUT	(REKEY_TIMEOUT + KEEPALIVE_TIMEOUT)
71 #define UNDERLOAD_TIMEOUT	1
72 
73 #define DPRINTF(sc, str, ...) do { if (ISSET((sc)->sc_if.if_flags, IFF_DEBUG))\
74     printf("%s: " str, (sc)->sc_if.if_xname, ##__VA_ARGS__); } while (0)
75 
76 #define CONTAINER_OF(ptr, type, member) ({			\
77 	const __typeof( ((type *)0)->member ) *__mptr = (ptr);	\
78 	(type *)( (char *)__mptr - offsetof(type,member) );})
79 
80 /* First byte indicating packet type on the wire */
81 #define WG_PKT_INITIATION htole32(1)
82 #define WG_PKT_RESPONSE htole32(2)
83 #define WG_PKT_COOKIE htole32(3)
84 #define WG_PKT_DATA htole32(4)
85 
86 #define WG_PKT_WITH_PADDING(n)	(((n) + (16-1)) & (~(16-1)))
87 #define WG_KEY_SIZE		WG_KEY_LEN
88 
89 struct wg_pkt_initiation {
90 	uint32_t		t;
91 	uint32_t		s_idx;
92 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
93 	uint8_t			es[NOISE_PUBLIC_KEY_LEN + NOISE_AUTHTAG_LEN];
94 	uint8_t			ets[NOISE_TIMESTAMP_LEN + NOISE_AUTHTAG_LEN];
95 	struct cookie_macs	m;
96 };
97 
98 struct wg_pkt_response {
99 	uint32_t		t;
100 	uint32_t		s_idx;
101 	uint32_t		r_idx;
102 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
103 	uint8_t			en[0 + NOISE_AUTHTAG_LEN];
104 	struct cookie_macs	m;
105 };
106 
107 struct wg_pkt_cookie {
108 	uint32_t		t;
109 	uint32_t		r_idx;
110 	uint8_t			nonce[COOKIE_NONCE_SIZE];
111 	uint8_t			ec[COOKIE_ENCRYPTED_SIZE];
112 };
113 
114 struct wg_pkt_data {
115 	uint32_t		t;
116 	uint32_t		r_idx;
117 	uint8_t			nonce[sizeof(uint64_t)];
118 	uint8_t			buf[];
119 };
120 
121 struct wg_endpoint {
122 	union {
123 		struct sockaddr		r_sa;
124 		struct sockaddr_in	r_sin;
125 #ifdef INET6
126 		struct sockaddr_in6	r_sin6;
127 #endif
128 	} e_remote;
129 	union {
130 		struct in_addr		l_in;
131 #ifdef INET6
132 		struct in6_pktinfo	l_pktinfo6;
133 #define l_in6 l_pktinfo6.ipi6_addr
134 #endif
135 	} e_local;
136 };
137 
138 struct wg_tag {
139 	struct wg_endpoint	 t_endpoint;
140 	struct wg_peer		*t_peer;
141 	struct mbuf		*t_mbuf;
142 	int			 t_done;
143 	int			 t_mtu;
144 };
145 
146 struct wg_index {
147 	LIST_ENTRY(wg_index)	 i_entry;
148 	SLIST_ENTRY(wg_index)	 i_unused_entry;
149 	uint32_t		 i_key;
150 	struct noise_remote	*i_value;
151 };
152 
153 struct wg_timers {
154 	/* t_lock is for blocking wg_timers_event_* when setting t_disabled. */
155 	struct rwlock		 t_lock;
156 
157 	int			 t_disabled;
158 	int			 t_need_another_keepalive;
159 	uint16_t		 t_persistent_keepalive_interval;
160 	struct timeout		 t_new_handshake;
161 	struct timeout		 t_send_keepalive;
162 	struct timeout		 t_retry_handshake;
163 	struct timeout		 t_zero_key_material;
164 	struct timeout		 t_persistent_keepalive;
165 
166 	struct mutex		 t_handshake_mtx;
167 	struct timespec		 t_handshake_last_sent;	/* nanouptime */
168 	struct timespec		 t_handshake_complete;	/* nanotime */
169 	int			 t_handshake_retries;
170 };
171 
172 struct wg_aip {
173 	struct art_node		 a_node;
174 	LIST_ENTRY(wg_aip)	 a_entry;
175 	struct wg_peer		*a_peer;
176 	struct wg_aip_io	 a_data;
177 };
178 
179 struct wg_queue {
180 	struct mutex		 q_mtx;
181 	struct mbuf_list	 q_list;
182 };
183 
184 struct wg_ring {
185 	struct mutex	 r_mtx;
186 	uint32_t	 r_head;
187 	uint32_t	 r_tail;
188 	struct mbuf	*r_buf[MAX_QUEUED_PKT];
189 };
190 
191 struct wg_peer {
192 	LIST_ENTRY(wg_peer)	 p_pubkey_entry;
193 	TAILQ_ENTRY(wg_peer)	 p_seq_entry;
194 	uint64_t		 p_id;
195 	struct wg_softc		*p_sc;
196 
197 	struct noise_remote	 p_remote;
198 	struct cookie_maker	 p_cookie;
199 	struct wg_timers	 p_timers;
200 
201 	struct mutex		 p_counters_mtx;
202 	uint64_t		 p_counters_tx;
203 	uint64_t		 p_counters_rx;
204 
205 	struct mutex		 p_endpoint_mtx;
206 	struct wg_endpoint	 p_endpoint;
207 
208 	struct task		 p_send_initiation;
209 	struct task		 p_send_keepalive;
210 	struct task		 p_clear_secrets;
211 	struct task		 p_deliver_out;
212 	struct task		 p_deliver_in;
213 
214 	struct mbuf_queue	 p_stage_queue;
215 	struct wg_queue		 p_encap_queue;
216 	struct wg_queue		 p_decap_queue;
217 
218 	SLIST_HEAD(,wg_index)	 p_unused_index;
219 	struct wg_index		 p_index[3];
220 
221 	LIST_HEAD(,wg_aip)	 p_aip;
222 
223 	SLIST_ENTRY(wg_peer)	 p_start_list;
224 	int			 p_start_onlist;
225 };
226 
227 struct wg_softc {
228 	struct ifnet		 sc_if;
229 	SIPHASH_KEY		 sc_secret;
230 
231 	struct rwlock		 sc_lock;
232 	struct noise_local	 sc_local;
233 	struct cookie_checker	 sc_cookie;
234 	in_port_t		 sc_udp_port;
235 	int			 sc_udp_rtable;
236 
237 	struct rwlock		 sc_so_lock;
238 	struct socket		*sc_so4;
239 #ifdef INET6
240 	struct socket		*sc_so6;
241 #endif
242 
243 	size_t			 sc_aip_num;
244 	struct art_root		*sc_aip4;
245 #ifdef INET6
246 	struct art_root		*sc_aip6;
247 #endif
248 
249 	struct rwlock		 sc_peer_lock;
250 	size_t			 sc_peer_num;
251 	LIST_HEAD(,wg_peer)	*sc_peer;
252 	TAILQ_HEAD(,wg_peer)	 sc_peer_seq;
253 	u_long			 sc_peer_mask;
254 
255 	struct mutex		 sc_index_mtx;
256 	LIST_HEAD(,wg_index)	*sc_index;
257 	u_long			 sc_index_mask;
258 
259 	struct task		 sc_handshake;
260 	struct mbuf_queue	 sc_handshake_queue;
261 
262 	struct task		 sc_encap;
263 	struct task		 sc_decap;
264 	struct wg_ring		 sc_encap_ring;
265 	struct wg_ring		 sc_decap_ring;
266 };
267 
268 struct wg_peer *
269 	wg_peer_create(struct wg_softc *, uint8_t[WG_KEY_SIZE]);
270 struct wg_peer *
271 	wg_peer_lookup(struct wg_softc *, const uint8_t[WG_KEY_SIZE]);
272 void	wg_peer_destroy(struct wg_peer *);
273 void	wg_peer_set_endpoint_from_tag(struct wg_peer *, struct wg_tag *);
274 void	wg_peer_set_sockaddr(struct wg_peer *, struct sockaddr *);
275 int	wg_peer_get_sockaddr(struct wg_peer *, struct sockaddr *);
276 void	wg_peer_clear_src(struct wg_peer *);
277 void	wg_peer_get_endpoint(struct wg_peer *, struct wg_endpoint *);
278 void	wg_peer_counters_add(struct wg_peer *, uint64_t, uint64_t);
279 
280 int	wg_aip_add(struct wg_softc *, struct wg_peer *, struct wg_aip_io *);
281 struct wg_peer *
282 	wg_aip_lookup(struct art_root *, void *);
283 int	wg_aip_remove(struct wg_softc *, struct wg_peer *,
284 	    struct wg_aip_io *);
285 
286 int	wg_socket_open(struct socket **, int, in_port_t *, int *, void *);
287 void	wg_socket_close(struct socket **);
288 int	wg_bind(struct wg_softc *, in_port_t *, int *);
289 void	wg_unbind(struct wg_softc *);
290 int	wg_send(struct wg_softc *, struct wg_endpoint *, struct mbuf *);
291 void	wg_send_buf(struct wg_softc *, struct wg_endpoint *, uint8_t *,
292 	    size_t);
293 
294 struct wg_tag *
295 	wg_tag_get(struct mbuf *);
296 
297 void	wg_timers_init(struct wg_timers *);
298 void	wg_timers_enable(struct wg_timers *);
299 void	wg_timers_disable(struct wg_timers *);
300 void	wg_timers_set_persistent_keepalive(struct wg_timers *, uint16_t);
301 int	wg_timers_get_persistent_keepalive(struct wg_timers *, uint16_t *);
302 void	wg_timers_get_last_handshake(struct wg_timers *, struct timespec *);
303 int	wg_timers_expired_handshake_last_sent(struct wg_timers *);
304 int	wg_timers_check_handshake_last_sent(struct wg_timers *);
305 
306 void	wg_timers_event_data_sent(struct wg_timers *);
307 void	wg_timers_event_data_received(struct wg_timers *);
308 void	wg_timers_event_any_authenticated_packet_sent(struct wg_timers *);
309 void	wg_timers_event_any_authenticated_packet_received(struct wg_timers *);
310 void	wg_timers_event_handshake_initiated(struct wg_timers *);
311 void	wg_timers_event_handshake_responded(struct wg_timers *);
312 void	wg_timers_event_handshake_complete(struct wg_timers *);
313 void	wg_timers_event_session_derived(struct wg_timers *);
314 void	wg_timers_event_any_authenticated_packet_traversal(struct wg_timers *);
315 void	wg_timers_event_want_initiation(struct wg_timers *);
316 void	wg_timers_event_reset_handshake_last_sent(struct wg_timers *);
317 
318 void	wg_timers_run_send_initiation(void *, int);
319 void	wg_timers_run_retry_handshake(void *);
320 void	wg_timers_run_send_keepalive(void *);
321 void	wg_timers_run_new_handshake(void *);
322 void	wg_timers_run_zero_key_material(void *);
323 void	wg_timers_run_persistent_keepalive(void *);
324 
325 void	wg_peer_send_buf(struct wg_peer *, uint8_t *, size_t);
326 void	wg_send_initiation(void *);
327 void	wg_send_response(struct wg_peer *);
328 void	wg_send_cookie(struct wg_softc *, struct cookie_macs *, uint32_t,
329 	    struct wg_endpoint *e);
330 void	wg_send_keepalive(void *);
331 void	wg_peer_clear_secrets(void *);
332 void	wg_handshake(struct wg_softc *, struct mbuf *);
333 void	wg_handshake_worker(void *);
334 
335 void	wg_encap(struct wg_softc *, struct mbuf *);
336 void	wg_decap(struct wg_softc *, struct mbuf *);
337 void	wg_encap_worker(void *);
338 void	wg_decap_worker(void *);
339 void	wg_deliver_out(void *);
340 void	wg_deliver_in(void *);
341 
342 int	wg_queue_in(struct wg_softc *, struct wg_peer *, struct mbuf *);
343 void	wg_queue_out(struct wg_softc *, struct wg_peer *);
344 struct mbuf *
345 	wg_ring_dequeue(struct wg_ring *);
346 struct mbuf *
347 	wg_queue_dequeue(struct wg_queue *, struct wg_tag **);
348 size_t	wg_queue_len(struct wg_queue *);
349 
350 struct noise_remote *
351 	wg_remote_get(void *, uint8_t[NOISE_PUBLIC_KEY_LEN]);
352 uint32_t
353 	wg_index_set(void *, struct noise_remote *);
354 struct noise_remote *
355 	wg_index_get(void *, uint32_t);
356 void	wg_index_drop(void *, uint32_t);
357 
358 struct mbuf *
359 	wg_input(void *, struct mbuf *, struct ip *, struct ip6_hdr *, void *,
360 	    int);
361 int	wg_output(struct ifnet *, struct mbuf *, struct sockaddr *,
362 	    struct rtentry *);
363 int	wg_ioctl_set(struct wg_softc *, struct wg_data_io *);
364 int	wg_ioctl_get(struct wg_softc *, struct wg_data_io *);
365 int	wg_ioctl(struct ifnet *, u_long, caddr_t);
366 int	wg_up(struct wg_softc *);
367 void	wg_down(struct wg_softc *);
368 
369 int	wg_clone_create(struct if_clone *, int);
370 int	wg_clone_destroy(struct ifnet *);
371 void	wgattach(int);
372 
373 uint64_t	peer_counter = 0;
374 uint64_t	keypair_counter = 0;
375 struct pool	wg_aip_pool;
376 struct pool	wg_peer_pool;
377 struct pool	wg_ratelimit_pool;
378 struct timeval	underload_interval = { UNDERLOAD_TIMEOUT, 0 };
379 
380 size_t		 wg_counter = 0;
381 struct taskq	*wg_handshake_taskq;
382 struct taskq	*wg_crypt_taskq;
383 
384 struct if_clone	wg_cloner =
385     IF_CLONE_INITIALIZER("wg", wg_clone_create, wg_clone_destroy);
386 
387 struct wg_peer *
388 wg_peer_create(struct wg_softc *sc, uint8_t public[WG_KEY_SIZE])
389 {
390 	struct wg_peer	*peer;
391 	uint64_t	 idx;
392 
393 	rw_assert_wrlock(&sc->sc_lock);
394 
395 	if (sc->sc_peer_num >= MAX_PEERS_PER_IFACE)
396 		return NULL;
397 
398 	if ((peer = pool_get(&wg_peer_pool, PR_NOWAIT)) == NULL)
399 		return NULL;
400 
401 	peer->p_id = peer_counter++;
402 	peer->p_sc = sc;
403 
404 	noise_remote_init(&peer->p_remote, public, &sc->sc_local);
405 	cookie_maker_init(&peer->p_cookie, public);
406 	wg_timers_init(&peer->p_timers);
407 
408 	mtx_init(&peer->p_counters_mtx, IPL_NET);
409 	peer->p_counters_tx = 0;
410 	peer->p_counters_rx = 0;
411 
412 	mtx_init(&peer->p_endpoint_mtx, IPL_NET);
413 	bzero(&peer->p_endpoint, sizeof(peer->p_endpoint));
414 
415 	task_set(&peer->p_send_initiation, wg_send_initiation, peer);
416 	task_set(&peer->p_send_keepalive, wg_send_keepalive, peer);
417 	task_set(&peer->p_clear_secrets, wg_peer_clear_secrets, peer);
418 	task_set(&peer->p_deliver_out, wg_deliver_out, peer);
419 	task_set(&peer->p_deliver_in, wg_deliver_in, peer);
420 
421 	mq_init(&peer->p_stage_queue, MAX_STAGED_PKT, IPL_NET);
422 	mtx_init(&peer->p_encap_queue.q_mtx, IPL_NET);
423 	ml_init(&peer->p_encap_queue.q_list);
424 	mtx_init(&peer->p_decap_queue.q_mtx, IPL_NET);
425 	ml_init(&peer->p_decap_queue.q_list);
426 
427 	SLIST_INIT(&peer->p_unused_index);
428 	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[0],
429 	    i_unused_entry);
430 	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[1],
431 	    i_unused_entry);
432 	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[2],
433 	    i_unused_entry);
434 
435 	LIST_INIT(&peer->p_aip);
436 
437 	peer->p_start_onlist = 0;
438 
439 	idx = SipHash24(&sc->sc_secret, public, WG_KEY_SIZE);
440 	idx &= sc->sc_peer_mask;
441 
442 	rw_enter_write(&sc->sc_peer_lock);
443 	LIST_INSERT_HEAD(&sc->sc_peer[idx], peer, p_pubkey_entry);
444 	TAILQ_INSERT_TAIL(&sc->sc_peer_seq, peer, p_seq_entry);
445 	sc->sc_peer_num++;
446 	rw_exit_write(&sc->sc_peer_lock);
447 
448 	DPRINTF(sc, "Peer %llu created\n", peer->p_id);
449 	return peer;
450 }
451 
452 struct wg_peer *
453 wg_peer_lookup(struct wg_softc *sc, const uint8_t public[WG_KEY_SIZE])
454 {
455 	uint8_t		 peer_key[WG_KEY_SIZE];
456 	struct wg_peer	*peer;
457 	uint64_t	 idx;
458 
459 	idx = SipHash24(&sc->sc_secret, public, WG_KEY_SIZE);
460 	idx &= sc->sc_peer_mask;
461 
462 	rw_enter_read(&sc->sc_peer_lock);
463 	LIST_FOREACH(peer, &sc->sc_peer[idx], p_pubkey_entry) {
464 		noise_remote_keys(&peer->p_remote, peer_key, NULL);
465 		if (timingsafe_bcmp(peer_key, public, WG_KEY_SIZE) == 0)
466 			goto done;
467 	}
468 	peer = NULL;
469 done:
470 	rw_exit_read(&sc->sc_peer_lock);
471 	return peer;
472 }
473 
474 void
475 wg_peer_destroy(struct wg_peer *peer)
476 {
477 	struct wg_softc	*sc = peer->p_sc;
478 	struct wg_aip *aip, *taip;
479 
480 	rw_assert_wrlock(&sc->sc_lock);
481 
482 	/*
483 	 * Remove peer from the pubkey hashtable and disable all timeouts.
484 	 * After this, and flushing wg_handshake_taskq, then no more handshakes
485 	 * can be started.
486 	 */
487 	rw_enter_write(&sc->sc_peer_lock);
488 	LIST_REMOVE(peer, p_pubkey_entry);
489 	TAILQ_REMOVE(&sc->sc_peer_seq, peer, p_seq_entry);
490 	sc->sc_peer_num--;
491 	rw_exit_write(&sc->sc_peer_lock);
492 
493 	wg_timers_disable(&peer->p_timers);
494 
495 	taskq_barrier(wg_handshake_taskq);
496 
497 	/*
498 	 * Now we drop all allowed ips, to drop all outgoing packets to the
499 	 * peer. Then drop all the indexes to drop all incoming packets to the
500 	 * peer. Then we can flush if_snd, wg_crypt_taskq and then nettq to
501 	 * ensure no more references to the peer exist.
502 	 */
503 	LIST_FOREACH_SAFE(aip, &peer->p_aip, a_entry, taip)
504 		wg_aip_remove(sc, peer, &aip->a_data);
505 
506 	noise_remote_clear(&peer->p_remote);
507 
508 	NET_LOCK();
509 	while (!ifq_empty(&sc->sc_if.if_snd)) {
510 		NET_UNLOCK();
511 		tsleep_nsec(sc, PWAIT, "wg_ifq", 1000);
512 		NET_LOCK();
513 	}
514 	NET_UNLOCK();
515 
516 	taskq_barrier(wg_crypt_taskq);
517 	taskq_barrier(net_tq(sc->sc_if.if_index));
518 
519 	DPRINTF(sc, "Peer %llu destroyed\n", peer->p_id);
520 	explicit_bzero(peer, sizeof(*peer));
521 	pool_put(&wg_peer_pool, peer);
522 }
523 
524 void
525 wg_peer_set_endpoint_from_tag(struct wg_peer *peer, struct wg_tag *t)
526 {
527 	if (memcmp(&t->t_endpoint, &peer->p_endpoint,
528 	    sizeof(t->t_endpoint)) == 0)
529 		return;
530 
531 	mtx_enter(&peer->p_endpoint_mtx);
532 	peer->p_endpoint = t->t_endpoint;
533 	mtx_leave(&peer->p_endpoint_mtx);
534 }
535 
536 void
537 wg_peer_set_sockaddr(struct wg_peer *peer, struct sockaddr *remote)
538 {
539 	mtx_enter(&peer->p_endpoint_mtx);
540 	memcpy(&peer->p_endpoint.e_remote, remote,
541 	       sizeof(peer->p_endpoint.e_remote));
542 	bzero(&peer->p_endpoint.e_local, sizeof(peer->p_endpoint.e_local));
543 	mtx_leave(&peer->p_endpoint_mtx);
544 }
545 
546 int
547 wg_peer_get_sockaddr(struct wg_peer *peer, struct sockaddr *remote)
548 {
549 	int	ret = 0;
550 
551 	mtx_enter(&peer->p_endpoint_mtx);
552 	if (peer->p_endpoint.e_remote.r_sa.sa_family != AF_UNSPEC)
553 		memcpy(remote, &peer->p_endpoint.e_remote,
554 		       sizeof(peer->p_endpoint.e_remote));
555 	else
556 		ret = ENOENT;
557 	mtx_leave(&peer->p_endpoint_mtx);
558 	return ret;
559 }
560 
561 void
562 wg_peer_clear_src(struct wg_peer *peer)
563 {
564 	mtx_enter(&peer->p_endpoint_mtx);
565 	bzero(&peer->p_endpoint.e_local, sizeof(peer->p_endpoint.e_local));
566 	mtx_leave(&peer->p_endpoint_mtx);
567 }
568 
569 void
570 wg_peer_get_endpoint(struct wg_peer *peer, struct wg_endpoint *endpoint)
571 {
572 	mtx_enter(&peer->p_endpoint_mtx);
573 	memcpy(endpoint, &peer->p_endpoint, sizeof(*endpoint));
574 	mtx_leave(&peer->p_endpoint_mtx);
575 }
576 
577 void
578 wg_peer_counters_add(struct wg_peer *peer, uint64_t tx, uint64_t rx)
579 {
580 	mtx_enter(&peer->p_counters_mtx);
581 	peer->p_counters_tx += tx;
582 	peer->p_counters_rx += rx;
583 	mtx_leave(&peer->p_counters_mtx);
584 }
585 
586 int
587 wg_aip_add(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
588 {
589 	struct art_root	*root;
590 	struct art_node	*node;
591 	struct wg_aip	*aip;
592 	int		 ret = 0;
593 
594 	switch (d->a_af) {
595 	case AF_INET:	root = sc->sc_aip4; break;
596 #ifdef INET6
597 	case AF_INET6:	root = sc->sc_aip6; break;
598 #endif
599 	default: return EAFNOSUPPORT;
600 	}
601 
602 	if ((aip = pool_get(&wg_aip_pool, PR_NOWAIT)) == NULL)
603 		return ENOBUFS;
604 	bzero(aip, sizeof(*aip));
605 
606 	rw_enter_write(&root->ar_lock);
607 	node = art_insert(root, &aip->a_node, &d->a_addr, d->a_cidr);
608 
609 	if (node == &aip->a_node) {
610 		aip->a_peer = peer;
611 		aip->a_data = *d;
612 		LIST_INSERT_HEAD(&peer->p_aip, aip, a_entry);
613 		sc->sc_aip_num++;
614 	} else {
615 		pool_put(&wg_aip_pool, aip);
616 		aip = (struct wg_aip *) node;
617 		if (aip->a_peer != peer) {
618 			LIST_REMOVE(aip, a_entry);
619 			LIST_INSERT_HEAD(&peer->p_aip, aip, a_entry);
620 			aip->a_peer = peer;
621 		}
622 	}
623 	rw_exit_write(&root->ar_lock);
624 	return ret;
625 }
626 
627 struct wg_peer *
628 wg_aip_lookup(struct art_root *root, void *addr)
629 {
630 	struct srp_ref	 sr;
631 	struct art_node	*node;
632 
633 	node = art_match(root, addr, &sr);
634 	srp_leave(&sr);
635 
636 	return node == NULL ? NULL : ((struct wg_aip *) node)->a_peer;
637 }
638 
639 int
640 wg_aip_remove(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
641 {
642 	struct srp_ref	 sr;
643 	struct art_root	*root;
644 	struct art_node	*node;
645 	struct wg_aip	*aip;
646 	int		 ret = 0;
647 
648 	switch (d->a_af) {
649 	case AF_INET:	root = sc->sc_aip4; break;
650 #ifdef INET6
651 	case AF_INET6:	root = sc->sc_aip6; break;
652 #endif
653 	default: return EAFNOSUPPORT;
654 	}
655 
656 	rw_enter_write(&root->ar_lock);
657 	if ((node = art_lookup(root, &d->a_addr, d->a_cidr, &sr)) == NULL) {
658 		ret = ENOENT;
659 	} else if (((struct wg_aip *) node)->a_peer != peer) {
660 		ret = EXDEV;
661 	} else {
662 		aip = (struct wg_aip *)node;
663 		if (art_delete(root, node, &d->a_addr, d->a_cidr) == NULL)
664 			panic("art_delete failed to delete node %p", node);
665 
666 		sc->sc_aip_num--;
667 		LIST_REMOVE(aip, a_entry);
668 		pool_put(&wg_aip_pool, aip);
669 	}
670 
671 	srp_leave(&sr);
672 	rw_exit_write(&root->ar_lock);
673 	return ret;
674 }
675 
676 int
677 wg_socket_open(struct socket **so, int af, in_port_t *port,
678     int *rtable, void *upcall_arg)
679 {
680 	struct mbuf		 mhostnam, mrtable;
681 #ifdef INET6
682 	struct sockaddr_in6	*sin6;
683 #endif
684 	struct sockaddr_in	*sin;
685 	int			 ret, s;
686 
687 	m_inithdr(&mhostnam);
688 	m_inithdr(&mrtable);
689 
690 	bzero(mtod(&mrtable, u_int *), sizeof(u_int));
691 	*mtod(&mrtable, u_int *) = *rtable;
692 	mrtable.m_len = sizeof(u_int);
693 
694 	if (af == AF_INET) {
695 		sin = mtod(&mhostnam, struct sockaddr_in *);
696 		bzero(sin, sizeof(*sin));
697 		sin->sin_len = sizeof(*sin);
698 		sin->sin_family = AF_INET;
699 		sin->sin_port = *port;
700 		sin->sin_addr.s_addr = INADDR_ANY;
701 		mhostnam.m_len = sin->sin_len;
702 #ifdef INET6
703 	} else if (af == AF_INET6) {
704 		sin6 = mtod(&mhostnam, struct sockaddr_in6 *);
705 		bzero(sin6, sizeof(*sin6));
706 		sin6->sin6_len = sizeof(*sin6);
707 		sin6->sin6_family = AF_INET6;
708 		sin6->sin6_port = *port;
709 		sin6->sin6_addr = (struct in6_addr) { .s6_addr = { 0 } };
710 		mhostnam.m_len = sin6->sin6_len;
711 #endif
712 	} else {
713 		return EAFNOSUPPORT;
714 	}
715 
716 	if ((ret = socreate(af, so, SOCK_DGRAM, 0)) != 0)
717 		return ret;
718 
719 	s = solock(*so);
720 	sotoinpcb(*so)->inp_upcall = wg_input;
721 	sotoinpcb(*so)->inp_upcall_arg = upcall_arg;
722 
723 	if ((ret = sosetopt(*so, SOL_SOCKET, SO_RTABLE, &mrtable)) == 0) {
724 		if ((ret = sobind(*so, &mhostnam, curproc)) == 0) {
725 			*port = sotoinpcb(*so)->inp_lport;
726 			*rtable = sotoinpcb(*so)->inp_rtableid;
727 		}
728 	}
729 	sounlock(*so, s);
730 
731 	if (ret != 0)
732 		wg_socket_close(so);
733 
734 	return ret;
735 }
736 
737 void
738 wg_socket_close(struct socket **so)
739 {
740 	if (*so != NULL && soclose(*so, 0) != 0)
741 		panic("Unable to close wg socket");
742 	*so = NULL;
743 }
744 
745 int
746 wg_bind(struct wg_softc *sc, in_port_t *portp, int *rtablep)
747 {
748 	int		 ret = 0, rtable = *rtablep;
749 	in_port_t	 port = *portp;
750 	struct socket	*so4;
751 #ifdef INET6
752 	struct socket	*so6;
753 	int		 retries = 0;
754 retry:
755 #endif
756 	if ((ret = wg_socket_open(&so4, AF_INET, &port, &rtable, sc)) != 0)
757 		return ret;
758 
759 #ifdef INET6
760 	if ((ret = wg_socket_open(&so6, AF_INET6, &port, &rtable, sc)) != 0) {
761 		if (ret == EADDRINUSE && *portp == 0 && retries++ < 100)
762 			goto retry;
763 		wg_socket_close(&so4);
764 		return ret;
765 	}
766 #endif
767 
768 	rw_enter_write(&sc->sc_so_lock);
769 	wg_socket_close(&sc->sc_so4);
770 	sc->sc_so4 = so4;
771 #ifdef INET6
772 	wg_socket_close(&sc->sc_so6);
773 	sc->sc_so6 = so6;
774 #endif
775 	rw_exit_write(&sc->sc_so_lock);
776 
777 	*portp = port;
778 	*rtablep = rtable;
779 	return 0;
780 }
781 
782 void
783 wg_unbind(struct wg_softc *sc)
784 {
785 	rw_enter_write(&sc->sc_so_lock);
786 	wg_socket_close(&sc->sc_so4);
787 #ifdef INET6
788 	wg_socket_close(&sc->sc_so6);
789 #endif
790 	rw_exit_write(&sc->sc_so_lock);
791 }
792 
793 int
794 wg_send(struct wg_softc *sc, struct wg_endpoint *e, struct mbuf *m)
795 {
796 	struct mbuf	 peernam, *control = NULL;
797 	int		 ret;
798 
799 	/* Get local control address before locking */
800 	if (e->e_remote.r_sa.sa_family == AF_INET) {
801 		if (e->e_local.l_in.s_addr != INADDR_ANY)
802 			control = sbcreatecontrol(&e->e_local.l_in,
803 			    sizeof(struct in_addr), IP_SENDSRCADDR,
804 			    IPPROTO_IP);
805 #ifdef INET6
806 	} else if (e->e_remote.r_sa.sa_family == AF_INET6) {
807 		if (!IN6_IS_ADDR_UNSPECIFIED(&e->e_local.l_in6))
808 			control = sbcreatecontrol(&e->e_local.l_pktinfo6,
809 			    sizeof(struct in6_pktinfo), IPV6_PKTINFO,
810 			    IPPROTO_IPV6);
811 #endif
812 	} else {
813 		return EAFNOSUPPORT;
814 	}
815 
816 	/* Get remote address */
817 	peernam.m_type = MT_SONAME;
818 	peernam.m_next = NULL;
819 	peernam.m_nextpkt = NULL;
820 	peernam.m_data = (void *)&e->e_remote.r_sa;
821 	peernam.m_len = e->e_remote.r_sa.sa_len;
822 	peernam.m_flags = 0;
823 
824 	rw_enter_read(&sc->sc_so_lock);
825 	if (e->e_remote.r_sa.sa_family == AF_INET && sc->sc_so4 != NULL)
826 		ret = sosend(sc->sc_so4, &peernam, NULL, m, control, 0);
827 #ifdef INET6
828 	else if (e->e_remote.r_sa.sa_family == AF_INET6 && sc->sc_so6 != NULL)
829 		ret = sosend(sc->sc_so6, &peernam, NULL, m, control, 0);
830 #endif
831 	else {
832 		ret = ENOTCONN;
833 		m_freem(control);
834 		m_freem(m);
835 	}
836 	rw_exit_read(&sc->sc_so_lock);
837 
838 	return ret;
839 }
840 
841 void
842 wg_send_buf(struct wg_softc *sc, struct wg_endpoint *e, uint8_t *buf,
843     size_t len)
844 {
845 	struct mbuf	*m;
846 	int		 ret = 0;
847 
848 retry:
849 	m = m_gethdr(M_WAIT, MT_DATA);
850 	m->m_len = 0;
851 	m_copyback(m, 0, len, buf, M_WAIT);
852 
853 	/* As we're sending a handshake packet here, we want high priority */
854 	m->m_pkthdr.pf.prio = IFQ_MAXPRIO;
855 
856 	if (ret == 0) {
857 		ret = wg_send(sc, e, m);
858 		/* Retry if we couldn't bind to e->e_local */
859 		if (ret == EADDRNOTAVAIL) {
860 			bzero(&e->e_local, sizeof(e->e_local));
861 			goto retry;
862 		}
863 	} else {
864 		ret = wg_send(sc, e, m);
865 		if (ret != 0)
866 			DPRINTF(sc, "Unable to send packet\n");
867 	}
868 }
869 
870 struct wg_tag *
871 wg_tag_get(struct mbuf *m)
872 {
873 	struct m_tag	*mtag;
874 
875 	if ((mtag = m_tag_find(m, PACKET_TAG_WIREGUARD, NULL)) == NULL) {
876 		mtag = m_tag_get(PACKET_TAG_WIREGUARD, sizeof(struct wg_tag),
877 		    M_NOWAIT);
878 		if (mtag == NULL)
879 			return (NULL);
880 		bzero(mtag + 1, sizeof(struct wg_tag));
881 		m_tag_prepend(m, mtag);
882 	}
883 	return ((struct wg_tag *)(mtag + 1));
884 }
885 
886 /*
887  * The following section handles the timeout callbacks for a WireGuard session.
888  * These functions provide an "event based" model for controling wg(8) session
889  * timers. All function calls occur after the specified event below.
890  *
891  * wg_timers_event_data_sent:
892  *	tx: data
893  * wg_timers_event_data_received:
894  *	rx: data
895  * wg_timers_event_any_authenticated_packet_sent:
896  *	tx: keepalive, data, handshake
897  * wg_timers_event_any_authenticated_packet_received:
898  *	rx: keepalive, data, handshake
899  * wg_timers_event_any_authenticated_packet_traversal:
900  *	tx, rx: keepalive, data, handshake
901  * wg_timers_event_handshake_initiated:
902  *	tx: initiation
903  * wg_timers_event_handshake_responded:
904  *	tx: response
905  * wg_timers_event_handshake_complete:
906  *	rx: response, confirmation data
907  * wg_timers_event_session_derived:
908  *	tx: response, rx: response
909  * wg_timers_event_want_initiation:
910  *	tx: data failed, old keys expiring
911  * wg_timers_event_reset_handshake_last_sent:
912  * 	anytime we may immediately want a new handshake
913  */
914 void
915 wg_timers_init(struct wg_timers *t)
916 {
917 	bzero(t, sizeof(*t));
918 	rw_init(&t->t_lock, "wg_timers");
919 	mtx_init(&t->t_handshake_mtx, IPL_NET);
920 
921 	timeout_set(&t->t_new_handshake, wg_timers_run_new_handshake, t);
922 	timeout_set(&t->t_send_keepalive, wg_timers_run_send_keepalive, t);
923 	timeout_set(&t->t_retry_handshake, wg_timers_run_retry_handshake, t);
924 	timeout_set(&t->t_persistent_keepalive,
925 	    wg_timers_run_persistent_keepalive, t);
926 	timeout_set(&t->t_zero_key_material,
927 	    wg_timers_run_zero_key_material, t);
928 }
929 
930 void
931 wg_timers_enable(struct wg_timers *t)
932 {
933 	rw_enter_write(&t->t_lock);
934 	t->t_disabled = 0;
935 	rw_exit_write(&t->t_lock);
936 	wg_timers_run_persistent_keepalive(t);
937 }
938 
939 void
940 wg_timers_disable(struct wg_timers *t)
941 {
942 	rw_enter_write(&t->t_lock);
943 	t->t_disabled = 1;
944 	t->t_need_another_keepalive = 0;
945 	rw_exit_write(&t->t_lock);
946 
947 	timeout_del_barrier(&t->t_new_handshake);
948 	timeout_del_barrier(&t->t_send_keepalive);
949 	timeout_del_barrier(&t->t_retry_handshake);
950 	timeout_del_barrier(&t->t_persistent_keepalive);
951 	timeout_del_barrier(&t->t_zero_key_material);
952 }
953 
954 void
955 wg_timers_set_persistent_keepalive(struct wg_timers *t, uint16_t interval)
956 {
957 	rw_enter_read(&t->t_lock);
958 	if (!t->t_disabled) {
959 		t->t_persistent_keepalive_interval = interval;
960 		wg_timers_run_persistent_keepalive(t);
961 	}
962 	rw_exit_read(&t->t_lock);
963 }
964 
965 int
966 wg_timers_get_persistent_keepalive(struct wg_timers *t, uint16_t *interval)
967 {
968 	*interval = t->t_persistent_keepalive_interval;
969 	return *interval > 0 ? 0 : ENOENT;
970 }
971 
972 void
973 wg_timers_get_last_handshake(struct wg_timers *t, struct timespec *time)
974 {
975 	mtx_enter(&t->t_handshake_mtx);
976 	*time = t->t_handshake_complete;
977 	mtx_leave(&t->t_handshake_mtx);
978 }
979 
980 int
981 wg_timers_expired_handshake_last_sent(struct wg_timers *t)
982 {
983 	struct timespec uptime;
984 	struct timespec expire = { .tv_sec = REKEY_TIMEOUT, .tv_nsec = 0 };
985 
986 	getnanouptime(&uptime);
987 	timespecadd(&t->t_handshake_last_sent, &expire, &expire);
988 	return timespeccmp(&uptime, &expire, >) ? ETIMEDOUT : 0;
989 }
990 
991 int
992 wg_timers_check_handshake_last_sent(struct wg_timers *t)
993 {
994 	int ret;
995 	mtx_enter(&t->t_handshake_mtx);
996 	if ((ret = wg_timers_expired_handshake_last_sent(t)) == ETIMEDOUT)
997 		getnanouptime(&t->t_handshake_last_sent);
998 	mtx_leave(&t->t_handshake_mtx);
999 	return ret;
1000 }
1001 
1002 void
1003 wg_timers_event_data_sent(struct wg_timers *t)
1004 {
1005 	int	msecs = NEW_HANDSHAKE_TIMEOUT * 1000;
1006 	msecs += arc4random_uniform(REKEY_TIMEOUT_JITTER);
1007 
1008 	rw_enter_read(&t->t_lock);
1009 	if (!t->t_disabled && !timeout_pending(&t->t_new_handshake))
1010 		timeout_add_msec(&t->t_new_handshake, msecs);
1011 	rw_exit_read(&t->t_lock);
1012 }
1013 
1014 void
1015 wg_timers_event_data_received(struct wg_timers *t)
1016 {
1017 	rw_enter_read(&t->t_lock);
1018 	if (!t->t_disabled) {
1019 		if (!timeout_pending(&t->t_send_keepalive))
1020 			timeout_add_sec(&t->t_send_keepalive,
1021 			    KEEPALIVE_TIMEOUT);
1022 		else
1023 			t->t_need_another_keepalive = 1;
1024 	}
1025 	rw_exit_read(&t->t_lock);
1026 }
1027 
1028 void
1029 wg_timers_event_any_authenticated_packet_sent(struct wg_timers *t)
1030 {
1031 	timeout_del(&t->t_send_keepalive);
1032 }
1033 
1034 void
1035 wg_timers_event_any_authenticated_packet_received(struct wg_timers *t)
1036 {
1037 	timeout_del(&t->t_new_handshake);
1038 }
1039 
1040 void
1041 wg_timers_event_any_authenticated_packet_traversal(struct wg_timers *t)
1042 {
1043 	rw_enter_read(&t->t_lock);
1044 	if (!t->t_disabled && t->t_persistent_keepalive_interval > 0)
1045 		timeout_add_sec(&t->t_persistent_keepalive,
1046 		    t->t_persistent_keepalive_interval);
1047 	rw_exit_read(&t->t_lock);
1048 }
1049 
1050 void
1051 wg_timers_event_handshake_initiated(struct wg_timers *t)
1052 {
1053 	int	msecs = REKEY_TIMEOUT * 1000;
1054 	msecs += arc4random_uniform(REKEY_TIMEOUT_JITTER);
1055 
1056 	rw_enter_read(&t->t_lock);
1057 	if (!t->t_disabled)
1058 		timeout_add_msec(&t->t_retry_handshake, msecs);
1059 	rw_exit_read(&t->t_lock);
1060 }
1061 
1062 void
1063 wg_timers_event_handshake_responded(struct wg_timers *t)
1064 {
1065 	mtx_enter(&t->t_handshake_mtx);
1066 	getnanouptime(&t->t_handshake_last_sent);
1067 	mtx_leave(&t->t_handshake_mtx);
1068 }
1069 
1070 void
1071 wg_timers_event_handshake_complete(struct wg_timers *t)
1072 {
1073 	rw_enter_read(&t->t_lock);
1074 	if (!t->t_disabled) {
1075 		mtx_enter(&t->t_handshake_mtx);
1076 		timeout_del(&t->t_retry_handshake);
1077 		t->t_handshake_retries = 0;
1078 		getnanotime(&t->t_handshake_complete);
1079 		mtx_leave(&t->t_handshake_mtx);
1080 		wg_timers_run_send_keepalive(t);
1081 	}
1082 	rw_exit_read(&t->t_lock);
1083 }
1084 
1085 void
1086 wg_timers_event_session_derived(struct wg_timers *t)
1087 {
1088 	rw_enter_read(&t->t_lock);
1089 	if (!t->t_disabled)
1090 		timeout_add_sec(&t->t_zero_key_material, REJECT_AFTER_TIME * 3);
1091 	rw_exit_read(&t->t_lock);
1092 }
1093 
1094 void
1095 wg_timers_event_want_initiation(struct wg_timers *t)
1096 {
1097 	rw_enter_read(&t->t_lock);
1098 	if (!t->t_disabled)
1099 		wg_timers_run_send_initiation(t, 0);
1100 	rw_exit_read(&t->t_lock);
1101 }
1102 
1103 void
1104 wg_timers_event_reset_handshake_last_sent(struct wg_timers *t)
1105 {
1106 	mtx_enter(&t->t_handshake_mtx);
1107 	t->t_handshake_last_sent.tv_sec -= (REKEY_TIMEOUT + 1);
1108 	mtx_leave(&t->t_handshake_mtx);
1109 }
1110 
1111 void
1112 wg_timers_run_send_initiation(void *_t, int is_retry)
1113 {
1114 	struct wg_timers *t = _t;
1115 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1116 	if (!is_retry)
1117 		t->t_handshake_retries = 0;
1118 	if (wg_timers_expired_handshake_last_sent(t) == ETIMEDOUT)
1119 		task_add(wg_handshake_taskq, &peer->p_send_initiation);
1120 }
1121 
1122 void
1123 wg_timers_run_retry_handshake(void *_t)
1124 {
1125 	struct wg_timers *t = _t;
1126 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1127 
1128 	mtx_enter(&t->t_handshake_mtx);
1129 	if (t->t_handshake_retries <= MAX_TIMER_HANDSHAKES) {
1130 		t->t_handshake_retries++;
1131 		mtx_leave(&t->t_handshake_mtx);
1132 
1133 		DPRINTF(peer->p_sc, "Handshake for peer %llu did not complete "
1134 		    "after %d seconds, retrying (try %d)\n", peer->p_id,
1135 		    REKEY_TIMEOUT, t->t_handshake_retries + 1);
1136 		wg_peer_clear_src(peer);
1137 		wg_timers_run_send_initiation(t, 1);
1138 	} else {
1139 		mtx_leave(&t->t_handshake_mtx);
1140 
1141 		DPRINTF(peer->p_sc, "Handshake for peer %llu did not complete "
1142 		    "after %d retries, giving up\n", peer->p_id,
1143 		    MAX_TIMER_HANDSHAKES + 2);
1144 
1145 		timeout_del(&t->t_send_keepalive);
1146 		mq_purge(&peer->p_stage_queue);
1147 		if (!timeout_pending(&t->t_zero_key_material))
1148 			timeout_add_sec(&t->t_zero_key_material,
1149 			    REJECT_AFTER_TIME * 3);
1150 	}
1151 }
1152 
1153 void
1154 wg_timers_run_send_keepalive(void *_t)
1155 {
1156 	struct wg_timers *t = _t;
1157 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1158 
1159 	task_add(wg_crypt_taskq, &peer->p_send_keepalive);
1160 	if (t->t_need_another_keepalive) {
1161 		t->t_need_another_keepalive = 0;
1162 		timeout_add_sec(&t->t_send_keepalive, KEEPALIVE_TIMEOUT);
1163 	}
1164 }
1165 
1166 void
1167 wg_timers_run_new_handshake(void *_t)
1168 {
1169 	struct wg_timers *t = _t;
1170 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1171 
1172 	DPRINTF(peer->p_sc, "Retrying handshake with peer %llu because we "
1173 	    "stopped hearing back after %d seconds\n",
1174 	    peer->p_id, NEW_HANDSHAKE_TIMEOUT);
1175 	wg_peer_clear_src(peer);
1176 
1177 	wg_timers_run_send_initiation(t, 0);
1178 }
1179 
1180 void
1181 wg_timers_run_zero_key_material(void *_t)
1182 {
1183 	struct wg_timers *t = _t;
1184 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1185 
1186 	DPRINTF(peer->p_sc, "Zeroing out keys for peer %llu\n", peer->p_id);
1187 	task_add(wg_handshake_taskq, &peer->p_clear_secrets);
1188 }
1189 
1190 void
1191 wg_timers_run_persistent_keepalive(void *_t)
1192 {
1193 	struct wg_timers *t = _t;
1194 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1195 	if (t->t_persistent_keepalive_interval != 0)
1196 		task_add(wg_crypt_taskq, &peer->p_send_keepalive);
1197 }
1198 
1199 /* The following functions handle handshakes */
1200 void
1201 wg_peer_send_buf(struct wg_peer *peer, uint8_t *buf, size_t len)
1202 {
1203 	struct wg_endpoint	 endpoint;
1204 
1205 	wg_peer_counters_add(peer, len, 0);
1206 	wg_timers_event_any_authenticated_packet_traversal(&peer->p_timers);
1207 	wg_timers_event_any_authenticated_packet_sent(&peer->p_timers);
1208 	wg_peer_get_endpoint(peer, &endpoint);
1209 	wg_send_buf(peer->p_sc, &endpoint, buf, len);
1210 }
1211 
1212 void
1213 wg_send_initiation(void *_peer)
1214 {
1215 	struct wg_peer			*peer = _peer;
1216 	struct wg_pkt_initiation	 pkt;
1217 
1218 	if (wg_timers_check_handshake_last_sent(&peer->p_timers) != ETIMEDOUT)
1219 		return;
1220 
1221 	DPRINTF(peer->p_sc, "Sending handshake initiation to peer %llu\n",
1222 	    peer->p_id);
1223 
1224 	if (noise_create_initiation(&peer->p_remote, &pkt.s_idx, pkt.ue, pkt.es,
1225 				    pkt.ets) != 0)
1226 		return;
1227 	pkt.t = WG_PKT_INITIATION;
1228 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1229 	    sizeof(pkt)-sizeof(pkt.m));
1230 	wg_peer_send_buf(peer, (uint8_t *)&pkt, sizeof(pkt));
1231 	wg_timers_event_handshake_initiated(&peer->p_timers);
1232 }
1233 
1234 void
1235 wg_send_response(struct wg_peer *peer)
1236 {
1237 	struct wg_pkt_response	 pkt;
1238 
1239 	DPRINTF(peer->p_sc, "Sending handshake response to peer %llu\n",
1240 	    peer->p_id);
1241 
1242 	if (noise_create_response(&peer->p_remote, &pkt.s_idx, &pkt.r_idx,
1243 				  pkt.ue, pkt.en) != 0)
1244 		return;
1245 	if (noise_remote_begin_session(&peer->p_remote) != 0)
1246 		return;
1247 	wg_timers_event_session_derived(&peer->p_timers);
1248 	pkt.t = WG_PKT_RESPONSE;
1249 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1250 	    sizeof(pkt)-sizeof(pkt.m));
1251 	wg_timers_event_handshake_responded(&peer->p_timers);
1252 	wg_peer_send_buf(peer, (uint8_t *)&pkt, sizeof(pkt));
1253 }
1254 
1255 void
1256 wg_send_cookie(struct wg_softc *sc, struct cookie_macs *cm, uint32_t idx,
1257     struct wg_endpoint *e)
1258 {
1259 	struct wg_pkt_cookie	pkt;
1260 
1261 	DPRINTF(sc, "Sending cookie response for denied handshake message\n");
1262 
1263 	pkt.t = WG_PKT_COOKIE;
1264 	pkt.r_idx = idx;
1265 
1266 	cookie_checker_create_payload(&sc->sc_cookie, cm, pkt.nonce,
1267 	    pkt.ec, &e->e_remote.r_sa);
1268 
1269 	wg_send_buf(sc, e, (uint8_t *)&pkt, sizeof(pkt));
1270 }
1271 
1272 void
1273 wg_send_keepalive(void *_peer)
1274 {
1275 	struct wg_peer	*peer = _peer;
1276 	struct wg_softc	*sc = peer->p_sc;
1277 	struct wg_tag	*t;
1278 	struct mbuf	*m;
1279 
1280 	if (!mq_empty(&peer->p_stage_queue))
1281 		goto send;
1282 
1283 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1284 		return;
1285 
1286 	if ((t = wg_tag_get(m)) == NULL) {
1287 		m_freem(m);
1288 		return;
1289 	}
1290 
1291 	m->m_len = 0;
1292 	m_calchdrlen(m);
1293 
1294 	t->t_peer = peer;
1295 	t->t_mbuf = NULL;
1296 	t->t_done = 0;
1297 	t->t_mtu = 0; /* MTU == 0 OK for keepalive */
1298 
1299 	mq_push(&peer->p_stage_queue, m);
1300 send:
1301 	if (noise_remote_ready(&peer->p_remote) == 0) {
1302 		wg_queue_out(sc, peer);
1303 		task_add(wg_crypt_taskq, &sc->sc_encap);
1304 	} else {
1305 		wg_timers_event_want_initiation(&peer->p_timers);
1306 	}
1307 }
1308 
1309 void
1310 wg_peer_clear_secrets(void *_peer)
1311 {
1312 	struct wg_peer *peer = _peer;
1313 	noise_remote_clear(&peer->p_remote);
1314 }
1315 
1316 void
1317 wg_handshake(struct wg_softc *sc, struct mbuf *m)
1318 {
1319 	struct wg_tag			*t;
1320 	struct wg_pkt_initiation	*init;
1321 	struct wg_pkt_response		*resp;
1322 	struct wg_pkt_cookie		*cook;
1323 	struct wg_peer			*peer;
1324 	struct noise_remote		*remote;
1325 	int				 res, underload = 0;
1326 	static struct timeval		 wg_last_underload; /* microuptime */
1327 
1328 	if (mq_len(&sc->sc_handshake_queue) >= MAX_QUEUED_HANDSHAKES/8) {
1329 		getmicrouptime(&wg_last_underload);
1330 		underload = 1;
1331 	} else if (wg_last_underload.tv_sec != 0) {
1332 		if (!ratecheck(&wg_last_underload, &underload_interval))
1333 			underload = 1;
1334 		else
1335 			bzero(&wg_last_underload, sizeof(wg_last_underload));
1336 	}
1337 
1338 	t = wg_tag_get(m);
1339 
1340 	switch (*mtod(m, uint32_t *)) {
1341 	case WG_PKT_INITIATION:
1342 		init = mtod(m, struct wg_pkt_initiation *);
1343 
1344 		res = cookie_checker_validate_macs(&sc->sc_cookie, &init->m,
1345 				init, sizeof(*init) - sizeof(init->m),
1346 				underload, &t->t_endpoint.e_remote.r_sa);
1347 
1348 		if (res == EINVAL) {
1349 			DPRINTF(sc, "Invalid initiation MAC\n");
1350 			goto error;
1351 		} else if (res == ECONNREFUSED) {
1352 			DPRINTF(sc, "Handshake ratelimited\n");
1353 			goto error;
1354 		} else if (res == EAGAIN) {
1355 			wg_send_cookie(sc, &init->m, init->s_idx,
1356 			    &t->t_endpoint);
1357 			goto error;
1358 		} else if (res != 0) {
1359 			panic("unexpected response: %d\n", res);
1360 		}
1361 
1362 		if (noise_consume_initiation(&sc->sc_local, &remote,
1363 		    init->s_idx, init->ue, init->es, init->ets) != 0) {
1364 			DPRINTF(sc, "Invalid handshake initiation\n");
1365 			goto error;
1366 		}
1367 
1368 		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1369 
1370 		DPRINTF(sc, "Receiving handshake initiation from peer %llu\n",
1371 		    peer->p_id);
1372 
1373 		wg_peer_counters_add(peer, 0, sizeof(*init));
1374 		wg_peer_set_endpoint_from_tag(peer, t);
1375 		wg_send_response(peer);
1376 		break;
1377 	case WG_PKT_RESPONSE:
1378 		resp = mtod(m, struct wg_pkt_response *);
1379 
1380 		res = cookie_checker_validate_macs(&sc->sc_cookie, &resp->m,
1381 				resp, sizeof(*resp) - sizeof(resp->m),
1382 				underload, &t->t_endpoint.e_remote.r_sa);
1383 
1384 		if (res == EINVAL) {
1385 			DPRINTF(sc, "Invalid response MAC\n");
1386 			goto error;
1387 		} else if (res == ECONNREFUSED) {
1388 			DPRINTF(sc, "Handshake ratelimited\n");
1389 			goto error;
1390 		} else if (res == EAGAIN) {
1391 			wg_send_cookie(sc, &resp->m, resp->s_idx,
1392 			    &t->t_endpoint);
1393 			goto error;
1394 		} else if (res != 0) {
1395 			panic("unexpected response: %d\n", res);
1396 		}
1397 
1398 		if ((remote = wg_index_get(sc, resp->r_idx)) == NULL) {
1399 			DPRINTF(sc, "Unknown handshake response\n");
1400 			goto error;
1401 		}
1402 
1403 		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1404 
1405 		if (noise_consume_response(remote, resp->s_idx, resp->r_idx,
1406 					   resp->ue, resp->en) != 0) {
1407 			DPRINTF(sc, "Invalid handshake response\n");
1408 			goto error;
1409 		}
1410 
1411 		DPRINTF(sc, "Receiving handshake response from peer %llu\n",
1412 				peer->p_id);
1413 
1414 		wg_peer_counters_add(peer, 0, sizeof(*resp));
1415 		wg_peer_set_endpoint_from_tag(peer, t);
1416 		if (noise_remote_begin_session(&peer->p_remote) == 0) {
1417 			wg_timers_event_session_derived(&peer->p_timers);
1418 			wg_timers_event_handshake_complete(&peer->p_timers);
1419 		}
1420 		break;
1421 	case WG_PKT_COOKIE:
1422 		cook = mtod(m, struct wg_pkt_cookie *);
1423 
1424 		if ((remote = wg_index_get(sc, cook->r_idx)) == NULL) {
1425 			DPRINTF(sc, "Unknown cookie index\n");
1426 			goto error;
1427 		}
1428 
1429 		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1430 
1431 		if (cookie_maker_consume_payload(&peer->p_cookie,
1432 		    cook->nonce, cook->ec) != 0) {
1433 			DPRINTF(sc, "Could not decrypt cookie response\n");
1434 			goto error;
1435 		}
1436 
1437 		DPRINTF(sc, "Receiving cookie response\n");
1438 		goto error;
1439 	default:
1440 		panic("invalid packet in handshake queue");
1441 	}
1442 
1443 	wg_timers_event_any_authenticated_packet_received(&peer->p_timers);
1444 	wg_timers_event_any_authenticated_packet_traversal(&peer->p_timers);
1445 error:
1446 	m_freem(m);
1447 }
1448 
1449 void
1450 wg_handshake_worker(void *_sc)
1451 {
1452 	struct mbuf *m;
1453 	struct wg_softc *sc = _sc;
1454 	while ((m = mq_dequeue(&sc->sc_handshake_queue)) != NULL)
1455 		wg_handshake(sc, m);
1456 }
1457 
1458 /*
1459  * The following functions handle encapsulation (encryption) and
1460  * decapsulation (decryption). The wg_{en,de}cap functions will run in the
1461  * sc_crypt_taskq, while wg_deliver_{in,out} must be serialised and will run
1462  * in nettq.
1463  *
1464  * The packets are tracked in two queues, a serial queue and a parallel queue.
1465  *  - The parallel queue is used to distribute the encryption across multiple
1466  *    threads.
1467  *  - The serial queue ensures that packets are not reordered and are
1468  *    delievered in sequence.
1469  * The wg_tag attached to the packet contains two flags to help the two queues
1470  * interact.
1471  *  - t_done: The parallel queue has finished with the packet, now the serial
1472  *            queue can do it's work.
1473  *  - t_mbuf: Used to store the *crypted packet. in the case of encryption,
1474  *            this is a newly allocated packet, and in the case of decryption,
1475  *            it is a pointer to the same packet, that has been decrypted and
1476  *            truncated. If t_mbuf is NULL, then *cryption failed and this
1477  *            packet should not be passed.
1478  * wg_{en,de}cap work on the parallel queue, while wg_deliver_{in,out} work
1479  * on the serial queue.
1480  */
1481 void
1482 wg_encap(struct wg_softc *sc, struct mbuf *m)
1483 {
1484 	int res = 0;
1485 	struct wg_pkt_data	*data;
1486 	struct wg_peer		*peer;
1487 	struct wg_tag		*t;
1488 	struct mbuf		*mc;
1489 	size_t			 padding_len, plaintext_len, out_len;
1490 	uint64_t		 nonce;
1491 
1492 	t = wg_tag_get(m);
1493 	peer = t->t_peer;
1494 
1495 	plaintext_len = min(WG_PKT_WITH_PADDING(m->m_pkthdr.len), t->t_mtu);
1496 	padding_len = plaintext_len - m->m_pkthdr.len;
1497 	out_len = sizeof(struct wg_pkt_data) + plaintext_len + NOISE_AUTHTAG_LEN;
1498 
1499 	/*
1500 	 * For the time being we allocate a new packet with sufficient size to
1501 	 * hold the encrypted data and headers. It would be difficult to
1502 	 * overcome as p_encap_queue (mbuf_list) holds a reference to the mbuf.
1503 	 * If we m_makespace or similar, we risk corrupting that list.
1504 	 * Additionally, we only pass a buf and buf length to
1505 	 * noise_remote_encrypt. Technically it would be possible to teach
1506 	 * noise_remote_encrypt about mbufs, but we would need to sort out the
1507 	 * p_encap_queue situation first.
1508 	 */
1509 	if ((mc = m_clget(NULL, M_NOWAIT, out_len)) == NULL)
1510 		goto error;
1511 
1512 	data = mtod(mc, struct wg_pkt_data *);
1513 	m_copydata(m, 0, m->m_pkthdr.len, data->buf);
1514 	bzero(data->buf + m->m_pkthdr.len, padding_len);
1515 	data->t = WG_PKT_DATA;
1516 
1517 	/*
1518 	 * Copy the flow hash from the inner packet to the outer packet, so
1519 	 * that fq_codel can property separate streams, rather than falling
1520 	 * back to random buckets.
1521 	 */
1522 	mc->m_pkthdr.ph_flowid = m->m_pkthdr.ph_flowid;
1523 
1524 	res = noise_remote_encrypt(&peer->p_remote, &data->r_idx, &nonce,
1525 				   data->buf, plaintext_len);
1526 	nonce = htole64(nonce); /* Wire format is little endian. */
1527 	memcpy(data->nonce, &nonce, sizeof(data->nonce));
1528 
1529 	if (__predict_false(res == EINVAL)) {
1530 		m_freem(mc);
1531 		goto error;
1532 	} else if (__predict_false(res == ESTALE)) {
1533 		wg_timers_event_want_initiation(&peer->p_timers);
1534 	} else if (__predict_false(res != 0)) {
1535 		panic("unexpected result: %d\n", res);
1536 	}
1537 
1538 	/* A packet with length 0 is a keepalive packet */
1539 	if (__predict_false(m->m_pkthdr.len == 0))
1540 		DPRINTF(sc, "Sending keepalive packet to peer %llu\n",
1541 		    peer->p_id);
1542 
1543 	mc->m_pkthdr.ph_loopcnt = m->m_pkthdr.ph_loopcnt;
1544 	mc->m_flags &= ~(M_MCAST | M_BCAST);
1545 	mc->m_len = out_len;
1546 	m_calchdrlen(mc);
1547 
1548 	/*
1549 	 * We would count ifc_opackets, ifc_obytes of m here, except if_snd
1550 	 * already does that for us, so no need to worry about it.
1551 	counters_pkt(sc->sc_if.if_counters, ifc_opackets, ifc_obytes,
1552 	    m->m_pkthdr.len);
1553 	 */
1554 	wg_peer_counters_add(peer, mc->m_pkthdr.len, 0);
1555 
1556 	t->t_mbuf = mc;
1557 error:
1558 	t->t_done = 1;
1559 	task_add(net_tq(sc->sc_if.if_index), &peer->p_deliver_out);
1560 }
1561 
1562 void
1563 wg_decap(struct wg_softc *sc, struct mbuf *m)
1564 {
1565 	int			 res, len;
1566 	struct ip		*ip;
1567 	struct ip6_hdr		*ip6;
1568 	struct wg_pkt_data	*data;
1569 	struct wg_peer		*peer, *allowed_peer;
1570 	struct wg_tag		*t;
1571 	size_t			 payload_len;
1572 	uint64_t		 nonce;
1573 
1574 	t = wg_tag_get(m);
1575 	peer = t->t_peer;
1576 
1577 	/*
1578 	 * Likewise to wg_encap, we pass a buf and buf length to
1579 	 * noise_remote_decrypt. Again, possible to teach it about mbufs
1580 	 * but need to get over the p_decap_queue situation first. However,
1581 	 * we do not need to allocate a new mbuf as the decrypted packet is
1582 	 * strictly smaller than encrypted. We just set t_mbuf to m and
1583 	 * wg_deliver_in knows how to deal with that.
1584 	 */
1585 	data = mtod(m, struct wg_pkt_data *);
1586 	payload_len = m->m_pkthdr.len - sizeof(struct wg_pkt_data);
1587 	memcpy(&nonce, data->nonce, sizeof(nonce));
1588 	nonce = le64toh(nonce); /* Wire format is little endian. */
1589 	res = noise_remote_decrypt(&peer->p_remote, data->r_idx, nonce,
1590 				   data->buf, payload_len);
1591 
1592 	if (__predict_false(res == EINVAL)) {
1593 		goto error;
1594 	} else if (__predict_false(res == ECONNRESET)) {
1595 		wg_timers_event_handshake_complete(&peer->p_timers);
1596 	} else if (__predict_false(res == ESTALE)) {
1597 		wg_timers_event_want_initiation(&peer->p_timers);
1598 	} else if (__predict_false(res != 0)) {
1599 		panic("unexpected response: %d\n", res);
1600 	}
1601 
1602 	wg_peer_set_endpoint_from_tag(peer, t);
1603 
1604 	wg_peer_counters_add(peer, 0, m->m_pkthdr.len);
1605 
1606 	m_adj(m, sizeof(struct wg_pkt_data));
1607 	m_adj(m, -NOISE_AUTHTAG_LEN);
1608 
1609 	counters_pkt(sc->sc_if.if_counters, ifc_ipackets, ifc_ibytes,
1610 	    m->m_pkthdr.len);
1611 
1612 	/* A packet with length 0 is a keepalive packet */
1613 	if (__predict_false(m->m_pkthdr.len == 0)) {
1614 		DPRINTF(sc, "Receiving keepalive packet from peer "
1615 		    "%llu\n", peer->p_id);
1616 		goto done;
1617 	}
1618 
1619 	/*
1620 	 * We can let the network stack handle the intricate validation of the
1621 	 * IP header, we just worry about the sizeof and the version, so we can
1622 	 * read the source address in wg_aip_lookup.
1623 	 *
1624 	 * We also need to trim the packet, as it was likely paddded before
1625 	 * encryption. While we could drop it here, it will be more helpful to
1626 	 * pass it to bpf_mtap and use the counters that people are expecting
1627 	 * in ipv4_input and ipv6_input. We can rely on ipv4_input and
1628 	 * ipv6_input to properly validate the headers.
1629 	 */
1630 	ip = mtod(m, struct ip *);
1631 	ip6 = mtod(m, struct ip6_hdr *);
1632 
1633 	if (m->m_pkthdr.len >= sizeof(struct ip) && ip->ip_v == IPVERSION) {
1634 		m->m_pkthdr.ph_family = AF_INET;
1635 
1636 		len = ntohs(ip->ip_len);
1637 		if (len >= sizeof(struct ip) && len < m->m_pkthdr.len)
1638 			m_adj(m, len - m->m_pkthdr.len);
1639 
1640 		allowed_peer = wg_aip_lookup(sc->sc_aip4, &ip->ip_src);
1641 #ifdef INET6
1642 	} else if (m->m_pkthdr.len >= sizeof(struct ip6_hdr) &&
1643 	    (ip6->ip6_vfc & IPV6_VERSION_MASK) == IPV6_VERSION) {
1644 		m->m_pkthdr.ph_family = AF_INET6;
1645 
1646 		len = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr);
1647 		if (len < m->m_pkthdr.len)
1648 			m_adj(m, len - m->m_pkthdr.len);
1649 
1650 		allowed_peer = wg_aip_lookup(sc->sc_aip6, &ip6->ip6_src);
1651 #endif
1652 	} else {
1653 		DPRINTF(sc, "Packet is neither ipv4 nor ipv6 from "
1654 		    "peer %llu\n", peer->p_id);
1655 		goto error;
1656 	}
1657 
1658 	if (__predict_false(peer != allowed_peer)) {
1659 		DPRINTF(sc, "Packet has unallowed src IP from peer "
1660 		    "%llu\n", peer->p_id);
1661 		goto error;
1662 	}
1663 
1664 	/* tunneled packet was not offloaded */
1665 	m->m_pkthdr.csum_flags = 0;
1666 
1667 	m->m_pkthdr.ph_ifidx = sc->sc_if.if_index;
1668 	m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1669 	m->m_flags &= ~(M_MCAST | M_BCAST);
1670 #if NPF > 0
1671 	pf_pkt_addr_changed(m);
1672 #endif /* NPF > 0 */
1673 
1674 done:
1675 	t->t_mbuf = m;
1676 error:
1677 	t->t_done = 1;
1678 	task_add(net_tq(sc->sc_if.if_index), &peer->p_deliver_in);
1679 }
1680 
1681 void
1682 wg_encap_worker(void *_sc)
1683 {
1684 	struct mbuf *m;
1685 	struct wg_softc *sc = _sc;
1686 	while ((m = wg_ring_dequeue(&sc->sc_encap_ring)) != NULL)
1687 		wg_encap(sc, m);
1688 }
1689 
1690 void
1691 wg_decap_worker(void *_sc)
1692 {
1693 	struct mbuf *m;
1694 	struct wg_softc *sc = _sc;
1695 	while ((m = wg_ring_dequeue(&sc->sc_decap_ring)) != NULL)
1696 		wg_decap(sc, m);
1697 }
1698 
1699 void
1700 wg_deliver_out(void *_peer)
1701 {
1702 	struct wg_peer		*peer = _peer;
1703 	struct wg_softc		*sc = peer->p_sc;
1704 	struct wg_endpoint	 endpoint;
1705 	struct wg_tag		*t;
1706 	struct mbuf		*m;
1707 	int			 ret;
1708 
1709 	wg_peer_get_endpoint(peer, &endpoint);
1710 
1711 	while ((m = wg_queue_dequeue(&peer->p_encap_queue, &t)) != NULL) {
1712 		/* t_mbuf will contain the encrypted packet */
1713 		if (t->t_mbuf == NULL){
1714 			counters_inc(sc->sc_if.if_counters, ifc_oerrors);
1715 			m_freem(m);
1716 			continue;
1717 		}
1718 
1719 		ret = wg_send(sc, &endpoint, t->t_mbuf);
1720 
1721 		if (ret == 0) {
1722 			wg_timers_event_any_authenticated_packet_traversal(
1723 			    &peer->p_timers);
1724 			wg_timers_event_any_authenticated_packet_sent(
1725 			    &peer->p_timers);
1726 
1727 			if (m->m_pkthdr.len != 0)
1728 				wg_timers_event_data_sent(&peer->p_timers);
1729 		} else if (ret == EADDRNOTAVAIL) {
1730 			wg_peer_clear_src(peer);
1731 			wg_peer_get_endpoint(peer, &endpoint);
1732 		}
1733 
1734 		m_freem(m);
1735 	}
1736 }
1737 
1738 void
1739 wg_deliver_in(void *_peer)
1740 {
1741 	struct wg_peer	*peer = _peer;
1742 	struct wg_softc	*sc = peer->p_sc;
1743 	struct wg_tag	*t;
1744 	struct mbuf	*m;
1745 
1746 	while ((m = wg_queue_dequeue(&peer->p_decap_queue, &t)) != NULL) {
1747 		/* t_mbuf will contain the decrypted packet */
1748 		if (t->t_mbuf == NULL) {
1749 			counters_inc(sc->sc_if.if_counters, ifc_ierrors);
1750 			m_freem(m);
1751 			continue;
1752 		}
1753 
1754 		/* From here on m == t->t_mbuf */
1755 		KASSERT(m == t->t_mbuf);
1756 
1757 		wg_timers_event_any_authenticated_packet_received(
1758 		    &peer->p_timers);
1759 		wg_timers_event_any_authenticated_packet_traversal(
1760 		    &peer->p_timers);
1761 
1762 		if (m->m_pkthdr.len == 0) {
1763 			m_freem(m);
1764 			continue;
1765 		}
1766 
1767 #if NBPFILTER > 0
1768 		if (sc->sc_if.if_bpf != NULL)
1769 			bpf_mtap_af(sc->sc_if.if_bpf,
1770 			    m->m_pkthdr.ph_family, m, BPF_DIRECTION_IN);
1771 #endif
1772 
1773 		NET_LOCK();
1774 		if (m->m_pkthdr.ph_family == AF_INET)
1775 			ipv4_input(&sc->sc_if, m);
1776 #ifdef INET6
1777 		else if (m->m_pkthdr.ph_family == AF_INET6)
1778 			ipv6_input(&sc->sc_if, m);
1779 #endif
1780 		else
1781 			panic("invalid ph_family");
1782 		NET_UNLOCK();
1783 
1784 		wg_timers_event_data_received(&peer->p_timers);
1785 	}
1786 }
1787 
1788 int
1789 wg_queue_in(struct wg_softc *sc, struct wg_peer *peer, struct mbuf *m)
1790 {
1791 	struct wg_ring		*parallel = &sc->sc_decap_ring;
1792 	struct wg_queue		*serial = &peer->p_decap_queue;
1793 	struct wg_tag		*t;
1794 
1795 	mtx_enter(&serial->q_mtx);
1796 	if (serial->q_list.ml_len < MAX_QUEUED_PKT) {
1797 		ml_enqueue(&serial->q_list, m);
1798 		mtx_leave(&serial->q_mtx);
1799 	} else {
1800 		mtx_leave(&serial->q_mtx);
1801 		m_freem(m);
1802 		return ENOBUFS;
1803 	}
1804 
1805 	mtx_enter(&parallel->r_mtx);
1806 	if (parallel->r_tail - parallel->r_head < MAX_QUEUED_PKT) {
1807 		parallel->r_buf[parallel->r_tail & MAX_QUEUED_PKT_MASK] = m;
1808 		parallel->r_tail++;
1809 		mtx_leave(&parallel->r_mtx);
1810 	} else {
1811 		mtx_leave(&parallel->r_mtx);
1812 		t = wg_tag_get(m);
1813 		t->t_done = 1;
1814 		return ENOBUFS;
1815 	}
1816 
1817 	return 0;
1818 }
1819 
1820 void
1821 wg_queue_out(struct wg_softc *sc, struct wg_peer *peer)
1822 {
1823 	struct wg_ring		*parallel = &sc->sc_encap_ring;
1824 	struct wg_queue		*serial = &peer->p_encap_queue;
1825 	struct mbuf_list 	 ml, ml_free;
1826 	struct mbuf		*m;
1827 	struct wg_tag		*t;
1828 	int			 dropped;
1829 
1830 	/*
1831 	 * We delist all staged packets and then add them to the queues. This
1832 	 * can race with wg_qstart when called from wg_send_keepalive, however
1833 	 * wg_qstart will not race as it is serialised.
1834 	 */
1835 	mq_delist(&peer->p_stage_queue, &ml);
1836 	ml_init(&ml_free);
1837 
1838 	while ((m = ml_dequeue(&ml)) != NULL) {
1839 		mtx_enter(&serial->q_mtx);
1840 		if (serial->q_list.ml_len < MAX_QUEUED_PKT) {
1841 			ml_enqueue(&serial->q_list, m);
1842 			mtx_leave(&serial->q_mtx);
1843 		} else {
1844 			mtx_leave(&serial->q_mtx);
1845 			ml_enqueue(&ml_free, m);
1846 			continue;
1847 		}
1848 
1849 		mtx_enter(&parallel->r_mtx);
1850 		if (parallel->r_tail - parallel->r_head < MAX_QUEUED_PKT) {
1851 			parallel->r_buf[parallel->r_tail & MAX_QUEUED_PKT_MASK] = m;
1852 			parallel->r_tail++;
1853 			mtx_leave(&parallel->r_mtx);
1854 		} else {
1855 			mtx_leave(&parallel->r_mtx);
1856 			t = wg_tag_get(m);
1857 			t->t_done = 1;
1858 		}
1859 	}
1860 
1861 	if ((dropped = ml_purge(&ml_free)) > 0)
1862 		counters_add(sc->sc_if.if_counters, ifc_oqdrops, dropped);
1863 }
1864 
1865 struct mbuf *
1866 wg_ring_dequeue(struct wg_ring *r)
1867 {
1868 	struct mbuf *m = NULL;
1869 	mtx_enter(&r->r_mtx);
1870 	if (r->r_head != r->r_tail) {
1871 		m = r->r_buf[r->r_head & MAX_QUEUED_PKT_MASK];
1872 		r->r_head++;
1873 	}
1874 	mtx_leave(&r->r_mtx);
1875 	return m;
1876 }
1877 
1878 struct mbuf *
1879 wg_queue_dequeue(struct wg_queue *q, struct wg_tag **t)
1880 {
1881 	struct mbuf *m;
1882 	mtx_enter(&q->q_mtx);
1883 	if ((m = q->q_list.ml_head) != NULL && (*t = wg_tag_get(m))->t_done)
1884 		ml_dequeue(&q->q_list);
1885 	else
1886 		m = NULL;
1887 	mtx_leave(&q->q_mtx);
1888 	return m;
1889 }
1890 
1891 size_t
1892 wg_queue_len(struct wg_queue *q)
1893 {
1894 	size_t len;
1895 	mtx_enter(&q->q_mtx);
1896 	len = q->q_list.ml_len;
1897 	mtx_leave(&q->q_mtx);
1898 	return len;
1899 }
1900 
1901 struct noise_remote *
1902 wg_remote_get(void *_sc, uint8_t public[NOISE_PUBLIC_KEY_LEN])
1903 {
1904 	struct wg_peer	*peer;
1905 	struct wg_softc	*sc = _sc;
1906 	if ((peer = wg_peer_lookup(sc, public)) == NULL)
1907 		return NULL;
1908 	return &peer->p_remote;
1909 }
1910 
1911 uint32_t
1912 wg_index_set(void *_sc, struct noise_remote *remote)
1913 {
1914 	struct wg_peer	*peer;
1915 	struct wg_softc	*sc = _sc;
1916 	struct wg_index *index, *iter;
1917 	uint32_t	 key;
1918 
1919 	/*
1920 	 * We can modify this without a lock as wg_index_set, wg_index_drop are
1921 	 * guaranteed to be serialised (per remote).
1922 	 */
1923 	peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1924 	index = SLIST_FIRST(&peer->p_unused_index);
1925 	KASSERT(index != NULL);
1926 	SLIST_REMOVE_HEAD(&peer->p_unused_index, i_unused_entry);
1927 
1928 	index->i_value = remote;
1929 
1930 	mtx_enter(&sc->sc_index_mtx);
1931 assign_id:
1932 	key = index->i_key = arc4random();
1933 	key &= sc->sc_index_mask;
1934 	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1935 		if (iter->i_key == index->i_key)
1936 			goto assign_id;
1937 
1938 	LIST_INSERT_HEAD(&sc->sc_index[key], index, i_entry);
1939 
1940 	mtx_leave(&sc->sc_index_mtx);
1941 
1942 	/* Likewise, no need to lock for index here. */
1943 	return index->i_key;
1944 }
1945 
1946 struct noise_remote *
1947 wg_index_get(void *_sc, uint32_t key0)
1948 {
1949 	struct wg_softc		*sc = _sc;
1950 	struct wg_index		*iter;
1951 	struct noise_remote	*remote = NULL;
1952 	uint32_t		 key = key0 & sc->sc_index_mask;
1953 
1954 	mtx_enter(&sc->sc_index_mtx);
1955 	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1956 		if (iter->i_key == key0) {
1957 			remote = iter->i_value;
1958 			break;
1959 		}
1960 	mtx_leave(&sc->sc_index_mtx);
1961 	return remote;
1962 }
1963 
1964 void
1965 wg_index_drop(void *_sc, uint32_t key0)
1966 {
1967 	struct wg_softc	*sc = _sc;
1968 	struct wg_index	*iter;
1969 	struct wg_peer	*peer = NULL;
1970 	uint32_t	 key = key0 & sc->sc_index_mask;
1971 
1972 	mtx_enter(&sc->sc_index_mtx);
1973 	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1974 		if (iter->i_key == key0) {
1975 			LIST_REMOVE(iter, i_entry);
1976 			break;
1977 		}
1978 	mtx_leave(&sc->sc_index_mtx);
1979 
1980 	/* We expect a peer */
1981 	peer = CONTAINER_OF(iter->i_value, struct wg_peer, p_remote);
1982 	KASSERT(peer != NULL);
1983 	SLIST_INSERT_HEAD(&peer->p_unused_index, iter, i_unused_entry);
1984 }
1985 
1986 struct mbuf *
1987 wg_input(void *_sc, struct mbuf *m, struct ip *ip, struct ip6_hdr *ip6,
1988     void *_uh, int hlen)
1989 {
1990 	struct wg_pkt_data	*data;
1991 	struct noise_remote	*remote;
1992 	struct wg_tag		*t;
1993 	struct wg_softc		*sc = _sc;
1994 	struct udphdr		*uh = _uh;
1995 
1996 	NET_ASSERT_LOCKED();
1997 
1998 	if ((t = wg_tag_get(m)) == NULL) {
1999 		m_freem(m);
2000 		return NULL;
2001 	}
2002 
2003 	if (ip != NULL) {
2004 		t->t_endpoint.e_remote.r_sa.sa_len = sizeof(struct sockaddr_in);
2005 		t->t_endpoint.e_remote.r_sa.sa_family = AF_INET;
2006 		t->t_endpoint.e_remote.r_sin.sin_port = uh->uh_sport;
2007 		t->t_endpoint.e_remote.r_sin.sin_addr = ip->ip_src;
2008 		t->t_endpoint.e_local.l_in = ip->ip_dst;
2009 #ifdef INET6
2010 	} else if (ip6 != NULL) {
2011 		t->t_endpoint.e_remote.r_sa.sa_len = sizeof(struct sockaddr_in6);
2012 		t->t_endpoint.e_remote.r_sa.sa_family = AF_INET6;
2013 		t->t_endpoint.e_remote.r_sin6.sin6_port = uh->uh_sport;
2014 		t->t_endpoint.e_remote.r_sin6.sin6_addr = ip6->ip6_src;
2015 		t->t_endpoint.e_local.l_in6 = ip6->ip6_dst;
2016 #endif
2017 	} else {
2018 		m_freem(m);
2019 		return NULL;
2020 	}
2021 
2022 	/* m has a IP/IPv6 header of hlen length, we don't need it anymore. */
2023 	m_adj(m, hlen);
2024 
2025 	/*
2026 	 * Ensure mbuf is contiguous over full length of packet. This is done
2027 	 * os we can directly read the handshake values in wg_handshake, and so
2028 	 * we can decrypt a transport packet by passing a single buffer to
2029 	 * noise_remote_decrypt in wg_decap.
2030 	 */
2031 	if ((m = m_pullup(m, m->m_pkthdr.len)) == NULL)
2032 		return NULL;
2033 
2034 	if ((m->m_pkthdr.len == sizeof(struct wg_pkt_initiation) &&
2035 		*mtod(m, uint32_t *) == WG_PKT_INITIATION) ||
2036 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_response) &&
2037 		*mtod(m, uint32_t *) == WG_PKT_RESPONSE) ||
2038 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_cookie) &&
2039 		*mtod(m, uint32_t *) == WG_PKT_COOKIE)) {
2040 
2041 		if (mq_enqueue(&sc->sc_handshake_queue, m) != 0)
2042 			DPRINTF(sc, "Dropping handshake packet\n");
2043 		task_add(wg_handshake_taskq, &sc->sc_handshake);
2044 
2045 	} else if (m->m_pkthdr.len >= sizeof(struct wg_pkt_data) +
2046 	    NOISE_AUTHTAG_LEN && *mtod(m, uint32_t *) == WG_PKT_DATA) {
2047 
2048 		data = mtod(m, struct wg_pkt_data *);
2049 
2050 		if ((remote = wg_index_get(sc, data->r_idx)) != NULL) {
2051 			t->t_peer = CONTAINER_OF(remote, struct wg_peer,
2052 			    p_remote);
2053 			t->t_mbuf = NULL;
2054 			t->t_done = 0;
2055 
2056 			if (wg_queue_in(sc, t->t_peer, m) != 0)
2057 				counters_inc(sc->sc_if.if_counters,
2058 				    ifc_iqdrops);
2059 			task_add(wg_crypt_taskq, &sc->sc_decap);
2060 		} else {
2061 			counters_inc(sc->sc_if.if_counters, ifc_ierrors);
2062 			m_freem(m);
2063 		}
2064 	} else {
2065 		counters_inc(sc->sc_if.if_counters, ifc_ierrors);
2066 		m_freem(m);
2067 	}
2068 
2069 	return NULL;
2070 }
2071 
2072 void
2073 wg_qstart(struct ifqueue *ifq)
2074 {
2075 	struct ifnet		*ifp = ifq->ifq_if;
2076 	struct wg_softc		*sc = ifp->if_softc;
2077 	struct wg_peer		*peer;
2078 	struct wg_tag		*t;
2079 	struct mbuf		*m;
2080 	SLIST_HEAD(,wg_peer)	 start_list;
2081 
2082 	SLIST_INIT(&start_list);
2083 
2084 	/*
2085 	 * We should be OK to modify p_start_list, p_start_onlist in this
2086 	 * function as there should only be one ifp->if_qstart invoked at a
2087 	 * time.
2088 	 */
2089 	while ((m = ifq_dequeue(ifq)) != NULL) {
2090 		t = wg_tag_get(m);
2091 		peer = t->t_peer;
2092 		if (mq_push(&peer->p_stage_queue, m) != 0)
2093 			counters_inc(ifp->if_counters, ifc_oqdrops);
2094 		if (!peer->p_start_onlist) {
2095 			SLIST_INSERT_HEAD(&start_list, peer, p_start_list);
2096 			peer->p_start_onlist = 1;
2097 		}
2098 	}
2099 	SLIST_FOREACH(peer, &start_list, p_start_list) {
2100 		if (noise_remote_ready(&peer->p_remote) == 0)
2101 			wg_queue_out(sc, peer);
2102 		else
2103 			wg_timers_event_want_initiation(&peer->p_timers);
2104 		peer->p_start_onlist = 0;
2105 	}
2106 	task_add(wg_crypt_taskq, &sc->sc_encap);
2107 }
2108 
2109 int
2110 wg_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2111     struct rtentry *rt)
2112 {
2113 	struct wg_softc	*sc = ifp->if_softc;
2114 	struct wg_peer	*peer;
2115 	struct wg_tag	*t;
2116 	int		 af, ret = EINVAL;
2117 
2118 	NET_ASSERT_LOCKED();
2119 
2120 	if ((t = wg_tag_get(m)) == NULL) {
2121 		ret = ENOBUFS;
2122 		goto error;
2123 	}
2124 
2125 	m->m_pkthdr.ph_family = sa->sa_family;
2126 	if (sa->sa_family == AF_INET) {
2127 		peer = wg_aip_lookup(sc->sc_aip4,
2128 		    &mtod(m, struct ip *)->ip_dst);
2129 #ifdef INET6
2130 	} else if (sa->sa_family == AF_INET6) {
2131 		peer = wg_aip_lookup(sc->sc_aip6,
2132 		    &mtod(m, struct ip6_hdr *)->ip6_dst);
2133 #endif
2134 	} else {
2135 		ret = EAFNOSUPPORT;
2136 		goto error;
2137 	}
2138 
2139 #if NBPFILTER > 0
2140 	if (sc->sc_if.if_bpf)
2141 		bpf_mtap_af(sc->sc_if.if_bpf, sa->sa_family, m,
2142 		    BPF_DIRECTION_OUT);
2143 #endif
2144 
2145 	if (peer == NULL) {
2146 		ret = ENETUNREACH;
2147 		goto error;
2148 	}
2149 
2150 	af = peer->p_endpoint.e_remote.r_sa.sa_family;
2151 	if (af != AF_INET && af != AF_INET6) {
2152 		DPRINTF(sc, "No valid endpoint has been configured or "
2153 				"discovered for peer %llu\n", peer->p_id);
2154 		ret = EDESTADDRREQ;
2155 		goto error;
2156 	}
2157 
2158 	if (m->m_pkthdr.ph_loopcnt++ > M_MAXLOOP) {
2159 		DPRINTF(sc, "Packet looped");
2160 		ret = ELOOP;
2161 		goto error;
2162 	}
2163 
2164 	/*
2165 	 * As we hold a reference to peer in the mbuf, we can't handle a
2166 	 * delayed packet without doing some refcnting. If a peer is removed
2167 	 * while a delayed holds a reference, bad things will happen. For the
2168 	 * time being, delayed packets are unsupported. This may be fixed with
2169 	 * another aip_lookup in wg_qstart, or refcnting as mentioned before.
2170 	 */
2171 	if (m->m_pkthdr.pf.delay > 0) {
2172 		DPRINTF(sc, "PF Delay Unsupported");
2173 		ret = EOPNOTSUPP;
2174 		goto error;
2175 	}
2176 
2177 	t->t_peer = peer;
2178 	t->t_mbuf = NULL;
2179 	t->t_done = 0;
2180 	t->t_mtu = ifp->if_mtu;
2181 
2182 	/*
2183 	 * We still have an issue with ifq that will count a packet that gets
2184 	 * dropped in wg_qstart, or not encrypted. These get counted as
2185 	 * ofails or oqdrops, so the packet gets counted twice.
2186 	 */
2187 	return if_enqueue(ifp, m);
2188 error:
2189 	counters_inc(ifp->if_counters, ifc_oerrors);
2190 	m_freem(m);
2191 	return ret;
2192 }
2193 
2194 int
2195 wg_ioctl_set(struct wg_softc *sc, struct wg_data_io *data)
2196 {
2197 	struct wg_interface_io	*iface_p, iface_o;
2198 	struct wg_peer_io	*peer_p, peer_o;
2199 	struct wg_aip_io	*aip_p, aip_o;
2200 
2201 	struct wg_peer		*peer, *tpeer;
2202 	struct wg_aip		*aip, *taip;
2203 
2204 	in_port_t		 port;
2205 	int			 rtable;
2206 
2207 	uint8_t			 public[WG_KEY_SIZE], private[WG_KEY_SIZE];
2208 	size_t			 i, j;
2209 	int			 ret, has_identity;
2210 
2211 	if ((ret = suser(curproc)) != 0)
2212 		return ret;
2213 
2214 	rw_enter_write(&sc->sc_lock);
2215 
2216 	iface_p = data->wgd_interface;
2217 	if ((ret = copyin(iface_p, &iface_o, sizeof(iface_o))) != 0)
2218 		goto error;
2219 
2220 	if (iface_o.i_flags & WG_INTERFACE_REPLACE_PEERS)
2221 		TAILQ_FOREACH_SAFE(peer, &sc->sc_peer_seq, p_seq_entry, tpeer)
2222 			wg_peer_destroy(peer);
2223 
2224 	if (iface_o.i_flags & WG_INTERFACE_HAS_PRIVATE &&
2225 	    (noise_local_keys(&sc->sc_local, NULL, private) ||
2226 	     timingsafe_bcmp(private, iface_o.i_private, WG_KEY_SIZE))) {
2227 		if (curve25519_generate_public(public, iface_o.i_private)) {
2228 			if ((peer = wg_peer_lookup(sc, public)) != NULL)
2229 				wg_peer_destroy(peer);
2230 		}
2231 		noise_local_lock_identity(&sc->sc_local);
2232 		has_identity = noise_local_set_private(&sc->sc_local,
2233 						       iface_o.i_private);
2234 		TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2235 			noise_remote_precompute(&peer->p_remote);
2236 			wg_timers_event_reset_handshake_last_sent(&peer->p_timers);
2237 			noise_remote_expire_current(&peer->p_remote);
2238 		}
2239 		cookie_checker_update(&sc->sc_cookie,
2240 				      has_identity == 0 ? public : NULL);
2241 		noise_local_unlock_identity(&sc->sc_local);
2242 	}
2243 
2244 	if (iface_o.i_flags & WG_INTERFACE_HAS_PORT)
2245 		port = htons(iface_o.i_port);
2246 	else
2247 		port = sc->sc_udp_port;
2248 
2249 	if (iface_o.i_flags & WG_INTERFACE_HAS_RTABLE)
2250 		rtable = iface_o.i_rtable;
2251 	else
2252 		rtable = sc->sc_udp_rtable;
2253 
2254 	if (port != sc->sc_udp_port || rtable != sc->sc_udp_rtable) {
2255 		TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry)
2256 			wg_peer_clear_src(peer);
2257 
2258 		if (sc->sc_if.if_flags & IFF_RUNNING)
2259 			if ((ret = wg_bind(sc, &port, &rtable)) != 0)
2260 				goto error;
2261 
2262 		sc->sc_udp_port = port;
2263 		sc->sc_udp_rtable = rtable;
2264 	}
2265 
2266 	peer_p = &iface_p->i_peers[0];
2267 	for (i = 0; i < iface_o.i_peers_count; i++) {
2268 		if ((ret = copyin(peer_p, &peer_o, sizeof(peer_o))) != 0)
2269 			goto error;
2270 
2271 		/* Peer must have public key */
2272 		if (!(peer_o.p_flags & WG_PEER_HAS_PUBLIC))
2273 			continue;
2274 
2275 		/* 0 = latest protocol, 1 = this protocol */
2276 		if (peer_o.p_protocol_version != 0) {
2277 			if (peer_o.p_protocol_version > 1) {
2278 				ret = EPFNOSUPPORT;
2279 				goto error;
2280 			}
2281 		}
2282 
2283 		/* Get local public and check that peer key doesn't match */
2284 		if (noise_local_keys(&sc->sc_local, public, NULL) == 0 &&
2285 		    bcmp(public, peer_o.p_public, WG_KEY_SIZE) == 0)
2286 			continue;
2287 
2288 		/* Lookup peer, or create if it doesn't exist */
2289 		if ((peer = wg_peer_lookup(sc, peer_o.p_public)) == NULL) {
2290 			/* If we want to delete, no need creating a new one.
2291 			 * Also, don't create a new one if we only want to
2292 			 * update. */
2293 			if (peer_o.p_flags & (WG_PEER_REMOVE|WG_PEER_UPDATE))
2294 				continue;
2295 
2296 			if ((peer = wg_peer_create(sc,
2297 			    peer_o.p_public)) == NULL) {
2298 				ret = ENOMEM;
2299 				goto error;
2300 			}
2301 		}
2302 
2303 		/* Remove peer and continue if specified */
2304 		if (peer_o.p_flags & WG_PEER_REMOVE) {
2305 			wg_peer_destroy(peer);
2306 			continue;
2307 		}
2308 
2309 		if (peer_o.p_flags & WG_PEER_HAS_ENDPOINT)
2310 			wg_peer_set_sockaddr(peer, &peer_o.p_sa);
2311 
2312 		if (peer_o.p_flags & WG_PEER_HAS_PSK)
2313 			noise_remote_set_psk(&peer->p_remote, peer_o.p_psk);
2314 
2315 		if (peer_o.p_flags & WG_PEER_HAS_PKA)
2316 			wg_timers_set_persistent_keepalive(&peer->p_timers,
2317 			    peer_o.p_pka);
2318 
2319 		if (peer_o.p_flags & WG_PEER_REPLACE_AIPS) {
2320 			LIST_FOREACH_SAFE(aip, &peer->p_aip, a_entry, taip) {
2321 				wg_aip_remove(sc, peer, &aip->a_data);
2322 			}
2323 		}
2324 
2325 		aip_p = &peer_p->p_aips[0];
2326 		for (j = 0; j < peer_o.p_aips_count; j++) {
2327 			if ((ret = copyin(aip_p, &aip_o, sizeof(aip_o))) != 0)
2328 				goto error;
2329 			ret = wg_aip_add(sc, peer, &aip_o);
2330 			if (ret != 0)
2331 				goto error;
2332 			aip_p++;
2333 		}
2334 
2335 		peer_p = (struct wg_peer_io *)aip_p;
2336 	}
2337 
2338 error:
2339 	rw_exit_write(&sc->sc_lock);
2340 	explicit_bzero(&iface_o, sizeof(iface_o));
2341 	explicit_bzero(&peer_o, sizeof(peer_o));
2342 	explicit_bzero(&aip_o, sizeof(aip_o));
2343 	explicit_bzero(public, sizeof(public));
2344 	explicit_bzero(private, sizeof(private));
2345 	return ret;
2346 }
2347 
2348 int
2349 wg_ioctl_get(struct wg_softc *sc, struct wg_data_io *data)
2350 {
2351 	struct wg_interface_io	*iface_p, iface_o;
2352 	struct wg_peer_io	*peer_p, peer_o;
2353 	struct wg_aip_io	*aip_p;
2354 
2355 	struct wg_peer		*peer;
2356 	struct wg_aip		*aip;
2357 
2358 	size_t			 size, peer_count, aip_count;
2359 	int			 ret = 0, is_suser = suser(curproc) == 0;
2360 
2361 	size = sizeof(struct wg_interface_io);
2362 	if (data->wgd_size < size && !is_suser)
2363 		goto ret_size;
2364 
2365 	iface_p = data->wgd_interface;
2366 	bzero(&iface_o, sizeof(iface_o));
2367 
2368 	rw_enter_read(&sc->sc_lock);
2369 
2370 	if (sc->sc_udp_port != 0) {
2371 		iface_o.i_port = ntohs(sc->sc_udp_port);
2372 		iface_o.i_flags |= WG_INTERFACE_HAS_PORT;
2373 	}
2374 
2375 	if (sc->sc_udp_rtable != 0) {
2376 		iface_o.i_rtable = sc->sc_udp_rtable;
2377 		iface_o.i_flags |= WG_INTERFACE_HAS_RTABLE;
2378 	}
2379 
2380 	if (!is_suser)
2381 		goto copy_out_iface;
2382 
2383 	if (noise_local_keys(&sc->sc_local, iface_o.i_public,
2384 	    iface_o.i_private) == 0) {
2385 		iface_o.i_flags |= WG_INTERFACE_HAS_PUBLIC;
2386 		iface_o.i_flags |= WG_INTERFACE_HAS_PRIVATE;
2387 	}
2388 
2389 	size += sizeof(struct wg_peer_io) * sc->sc_peer_num;
2390 	size += sizeof(struct wg_aip_io) * sc->sc_aip_num;
2391 	if (data->wgd_size < size)
2392 		goto unlock_and_ret_size;
2393 
2394 	peer_count = 0;
2395 	peer_p = &iface_p->i_peers[0];
2396 	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2397 		bzero(&peer_o, sizeof(peer_o));
2398 		peer_o.p_flags = WG_PEER_HAS_PUBLIC;
2399 		peer_o.p_protocol_version = 1;
2400 
2401 		if (noise_remote_keys(&peer->p_remote, peer_o.p_public,
2402 		    peer_o.p_psk) == 0)
2403 			peer_o.p_flags |= WG_PEER_HAS_PSK;
2404 
2405 		if (wg_timers_get_persistent_keepalive(&peer->p_timers,
2406 		    &peer_o.p_pka) == 0)
2407 			peer_o.p_flags |= WG_PEER_HAS_PKA;
2408 
2409 		if (wg_peer_get_sockaddr(peer, &peer_o.p_sa) == 0)
2410 			peer_o.p_flags |= WG_PEER_HAS_ENDPOINT;
2411 
2412 		mtx_enter(&peer->p_counters_mtx);
2413 		peer_o.p_txbytes = peer->p_counters_tx;
2414 		peer_o.p_rxbytes = peer->p_counters_rx;
2415 		mtx_leave(&peer->p_counters_mtx);
2416 
2417 		wg_timers_get_last_handshake(&peer->p_timers,
2418 		    &peer_o.p_last_handshake);
2419 
2420 		aip_count = 0;
2421 		aip_p = &peer_p->p_aips[0];
2422 		LIST_FOREACH(aip, &peer->p_aip, a_entry) {
2423 			if ((ret = copyout(&aip->a_data, aip_p, sizeof(*aip_p))) != 0)
2424 				goto unlock_and_ret_size;
2425 			aip_p++;
2426 			aip_count++;
2427 		}
2428 		peer_o.p_aips_count = aip_count;
2429 
2430 		if ((ret = copyout(&peer_o, peer_p, sizeof(peer_o))) != 0)
2431 			goto unlock_and_ret_size;
2432 
2433 		peer_p = (struct wg_peer_io *)aip_p;
2434 		peer_count++;
2435 	}
2436 	iface_o.i_peers_count = peer_count;
2437 
2438 copy_out_iface:
2439 	ret = copyout(&iface_o, iface_p, sizeof(iface_o));
2440 unlock_and_ret_size:
2441 	rw_exit_read(&sc->sc_lock);
2442 	explicit_bzero(&iface_o, sizeof(iface_o));
2443 	explicit_bzero(&peer_o, sizeof(peer_o));
2444 ret_size:
2445 	data->wgd_size = size;
2446 	return ret;
2447 }
2448 
2449 int
2450 wg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2451 {
2452 	struct ifreq	*ifr = (struct ifreq *) data;
2453 	struct wg_softc	*sc = ifp->if_softc;
2454 	int		 ret = 0;
2455 
2456 	switch (cmd) {
2457 	case SIOCSWG:
2458 		NET_UNLOCK();
2459 		ret = wg_ioctl_set(sc, (struct wg_data_io *) data);
2460 		NET_LOCK();
2461 		break;
2462 	case SIOCGWG:
2463 		NET_UNLOCK();
2464 		ret = wg_ioctl_get(sc, (struct wg_data_io *) data);
2465 		NET_LOCK();
2466 		break;
2467 	/* Interface IOCTLs */
2468 	case SIOCSIFADDR:
2469 		SET(ifp->if_flags, IFF_UP);
2470 		/* FALLTHROUGH */
2471 	case SIOCSIFFLAGS:
2472 		if (ISSET(ifp->if_flags, IFF_UP))
2473 			ret = wg_up(sc);
2474 		else
2475 			wg_down(sc);
2476 		break;
2477 	case SIOCSIFMTU:
2478 		/* Arbitrary limits */
2479 		if (ifr->ifr_mtu <= 0 || ifr->ifr_mtu > 9000)
2480 			ret = EINVAL;
2481 		else
2482 			ifp->if_mtu = ifr->ifr_mtu;
2483 		break;
2484 	case SIOCADDMULTI:
2485 	case SIOCDELMULTI:
2486 		break;
2487 	default:
2488 		ret = ENOTTY;
2489 	}
2490 
2491 	return ret;
2492 }
2493 
2494 int
2495 wg_up(struct wg_softc *sc)
2496 {
2497 	struct wg_peer	*peer;
2498 	int		 ret = 0;
2499 
2500 	NET_ASSERT_LOCKED();
2501 	/*
2502 	 * We use IFF_RUNNING as an exclusive access here. We also may want
2503 	 * an exclusive sc_lock as wg_bind may write to sc_udp_port. We also
2504 	 * want to drop NET_LOCK as we want to call socreate, sobind, etc. Once
2505 	 * solock is no longer === NET_LOCK, we may be able to avoid this.
2506 	 */
2507 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
2508 		SET(sc->sc_if.if_flags, IFF_RUNNING);
2509 		NET_UNLOCK();
2510 
2511 		rw_enter_write(&sc->sc_lock);
2512 		/*
2513 		 * If we successfully bind the socket, then enable the timers
2514 		 * for the peer. This will send all staged packets and a
2515 		 * keepalive if necessary.
2516 		 */
2517 		ret = wg_bind(sc, &sc->sc_udp_port, &sc->sc_udp_rtable);
2518 		if (ret == 0) {
2519 			TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2520 				wg_timers_enable(&peer->p_timers);
2521 				wg_queue_out(sc, peer);
2522 			}
2523 		}
2524 		rw_exit_write(&sc->sc_lock);
2525 
2526 		NET_LOCK();
2527 		if (ret != 0)
2528 			CLR(sc->sc_if.if_flags, IFF_RUNNING);
2529 	}
2530 	return ret;
2531 }
2532 
2533 void
2534 wg_down(struct wg_softc *sc)
2535 {
2536 	struct wg_peer	*peer;
2537 
2538 	NET_ASSERT_LOCKED();
2539 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2540 		return;
2541 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
2542 	NET_UNLOCK();
2543 
2544 	/*
2545 	 * We only need a read lock here, as we aren't writing to anything
2546 	 * that isn't granularly locked.
2547 	 */
2548 	rw_enter_read(&sc->sc_lock);
2549 	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2550 		mq_purge(&peer->p_stage_queue);
2551 		wg_timers_disable(&peer->p_timers);
2552 	}
2553 
2554 	taskq_barrier(wg_handshake_taskq);
2555 	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2556 		noise_remote_clear(&peer->p_remote);
2557 		wg_timers_event_reset_handshake_last_sent(&peer->p_timers);
2558 	}
2559 
2560 	wg_unbind(sc);
2561 	rw_exit_read(&sc->sc_lock);
2562 	NET_LOCK();
2563 }
2564 
2565 int
2566 wg_clone_create(struct if_clone *ifc, int unit)
2567 {
2568 	struct ifnet		*ifp;
2569 	struct wg_softc		*sc;
2570 	struct noise_upcall	 local_upcall;
2571 
2572 	KERNEL_ASSERT_LOCKED();
2573 
2574 	if (wg_counter == 0) {
2575 		wg_handshake_taskq = taskq_create("wg_handshake",
2576 		    2, IPL_NET, TASKQ_MPSAFE);
2577 		wg_crypt_taskq = taskq_create("wg_crypt",
2578 		    ncpus, IPL_NET, TASKQ_MPSAFE);
2579 
2580 		if (wg_handshake_taskq == NULL || wg_crypt_taskq == NULL) {
2581 			if (wg_handshake_taskq != NULL)
2582 				taskq_destroy(wg_handshake_taskq);
2583 			if (wg_crypt_taskq != NULL)
2584 				taskq_destroy(wg_crypt_taskq);
2585 			wg_handshake_taskq = NULL;
2586 			wg_crypt_taskq = NULL;
2587 			return ENOTRECOVERABLE;
2588 		}
2589 	}
2590 	wg_counter++;
2591 
2592 	if ((sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL)
2593 		goto ret_00;
2594 
2595 	local_upcall.u_arg = sc;
2596 	local_upcall.u_remote_get = wg_remote_get;
2597 	local_upcall.u_index_set = wg_index_set;
2598 	local_upcall.u_index_drop = wg_index_drop;
2599 
2600 	TAILQ_INIT(&sc->sc_peer_seq);
2601 
2602 	/* sc_if is initialised after everything else */
2603 	arc4random_buf(&sc->sc_secret, sizeof(sc->sc_secret));
2604 
2605 	rw_init(&sc->sc_lock, "wg");
2606 	noise_local_init(&sc->sc_local, &local_upcall);
2607 	if (cookie_checker_init(&sc->sc_cookie, &wg_ratelimit_pool) != 0)
2608 		goto ret_01;
2609 	sc->sc_udp_port = 0;
2610 	sc->sc_udp_rtable = 0;
2611 
2612 	rw_init(&sc->sc_so_lock, "wg_so");
2613 	sc->sc_so4 = NULL;
2614 #ifdef INET6
2615 	sc->sc_so6 = NULL;
2616 #endif
2617 
2618 	sc->sc_aip_num = 0;
2619 	if ((sc->sc_aip4 = art_alloc(0, 32, 0)) == NULL)
2620 		goto ret_02;
2621 #ifdef INET6
2622 	if ((sc->sc_aip6 = art_alloc(0, 128, 0)) == NULL)
2623 		goto ret_03;
2624 #endif
2625 
2626 	rw_init(&sc->sc_peer_lock, "wg_peer");
2627 	sc->sc_peer_num = 0;
2628 	if ((sc->sc_peer = hashinit(HASHTABLE_PEER_SIZE, M_DEVBUF,
2629 	    M_NOWAIT, &sc->sc_peer_mask)) == NULL)
2630 		goto ret_04;
2631 
2632 	mtx_init(&sc->sc_index_mtx, IPL_NET);
2633 	if ((sc->sc_index = hashinit(HASHTABLE_INDEX_SIZE, M_DEVBUF,
2634 	    M_NOWAIT, &sc->sc_index_mask)) == NULL)
2635 		goto ret_05;
2636 
2637 	task_set(&sc->sc_handshake, wg_handshake_worker, sc);
2638 	mq_init(&sc->sc_handshake_queue, MAX_QUEUED_HANDSHAKES, IPL_NET);
2639 
2640 	task_set(&sc->sc_encap, wg_encap_worker, sc);
2641 	task_set(&sc->sc_decap, wg_decap_worker, sc);
2642 
2643 	bzero(&sc->sc_encap_ring, sizeof(sc->sc_encap_ring));
2644 	mtx_init(&sc->sc_encap_ring.r_mtx, IPL_NET);
2645 	bzero(&sc->sc_decap_ring, sizeof(sc->sc_decap_ring));
2646 	mtx_init(&sc->sc_decap_ring.r_mtx, IPL_NET);
2647 
2648 	/* We've setup the softc, now we can setup the ifnet */
2649 	ifp = &sc->sc_if;
2650 	ifp->if_softc = sc;
2651 
2652 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "wg%d", unit);
2653 
2654 	ifp->if_mtu = DEFAULT_MTU;
2655 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_NOARP;
2656 	ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
2657 	ifp->if_txmit = 64; /* Keep our workers active for longer. */
2658 
2659 	ifp->if_ioctl = wg_ioctl;
2660 	ifp->if_qstart = wg_qstart;
2661 	ifp->if_output = wg_output;
2662 
2663 	ifp->if_type = IFT_WIREGUARD;
2664 	ifp->if_rtrequest = p2p_rtrequest;
2665 
2666 	if_attach(ifp);
2667 	if_alloc_sadl(ifp);
2668 	if_counters_alloc(ifp);
2669 
2670 #if NBPFILTER > 0
2671 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
2672 #endif
2673 
2674 	DPRINTF(sc, "Interface created\n");
2675 
2676 	return 0;
2677 ret_05:
2678 	hashfree(sc->sc_peer, HASHTABLE_PEER_SIZE, M_DEVBUF);
2679 ret_04:
2680 #ifdef INET6
2681 	free(sc->sc_aip6, M_RTABLE, sizeof(*sc->sc_aip6));
2682 ret_03:
2683 #endif
2684 	free(sc->sc_aip4, M_RTABLE, sizeof(*sc->sc_aip4));
2685 ret_02:
2686 	cookie_checker_deinit(&sc->sc_cookie);
2687 ret_01:
2688 	free(sc, M_DEVBUF, sizeof(*sc));
2689 ret_00:
2690 	return ENOBUFS;
2691 }
2692 int
2693 wg_clone_destroy(struct ifnet *ifp)
2694 {
2695 	struct wg_softc	*sc = ifp->if_softc;
2696 	struct wg_peer	*peer, *tpeer;
2697 
2698 	KERNEL_ASSERT_LOCKED();
2699 
2700 	rw_enter_write(&sc->sc_lock);
2701 	TAILQ_FOREACH_SAFE(peer, &sc->sc_peer_seq, p_seq_entry, tpeer)
2702 		wg_peer_destroy(peer);
2703 	rw_exit_write(&sc->sc_lock);
2704 
2705 	wg_unbind(sc);
2706 	if_detach(ifp);
2707 
2708 	wg_counter--;
2709 	if (wg_counter == 0) {
2710 		KASSERT(wg_handshake_taskq != NULL && wg_crypt_taskq != NULL);
2711 		taskq_destroy(wg_handshake_taskq);
2712 		taskq_destroy(wg_crypt_taskq);
2713 		wg_handshake_taskq = NULL;
2714 		wg_crypt_taskq = NULL;
2715 	}
2716 
2717 	DPRINTF(sc, "Destroyed interface\n");
2718 
2719 	hashfree(sc->sc_index, HASHTABLE_INDEX_SIZE, M_DEVBUF);
2720 	hashfree(sc->sc_peer, HASHTABLE_PEER_SIZE, M_DEVBUF);
2721 #ifdef INET6
2722 	free(sc->sc_aip6, M_RTABLE, sizeof(*sc->sc_aip6));
2723 #endif
2724 	free(sc->sc_aip4, M_RTABLE, sizeof(*sc->sc_aip4));
2725 	cookie_checker_deinit(&sc->sc_cookie);
2726 	free(sc, M_DEVBUF, sizeof(*sc));
2727 	return 0;
2728 }
2729 
2730 void
2731 wgattach(int nwg)
2732 {
2733 #ifdef WGTEST
2734 	cookie_test();
2735 	noise_test();
2736 #endif
2737 	if_clone_attach(&wg_cloner);
2738 
2739 	pool_init(&wg_aip_pool, sizeof(struct wg_aip), 0,
2740 			IPL_NET, 0, "wgaip", NULL);
2741 	pool_init(&wg_peer_pool, sizeof(struct wg_peer), 0,
2742 			IPL_NET, 0, "wgpeer", NULL);
2743 	pool_init(&wg_ratelimit_pool, sizeof(struct ratelimit_entry), 0,
2744 			IPL_NET, 0, "wgratelimit", NULL);
2745 }
2746