xref: /openbsd-src/sys/net/if_wg.c (revision 4e1ee0786f11cc571bd0be17d38e46f635c719fc)
1 /*	$OpenBSD: if_wg.c,v 1.18 2021/08/05 13:37:04 sthen Exp $ */
2 
3 /*
4  * Copyright (C) 2015-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
5  * Copyright (C) 2019-2020 Matt Dunwoodie <ncon@noconroy.net>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include "bpfilter.h"
21 #include "pf.h"
22 
23 #include <sys/types.h>
24 #include <sys/systm.h>
25 #include <sys/param.h>
26 #include <sys/pool.h>
27 
28 #include <sys/socket.h>
29 #include <sys/socketvar.h>
30 #include <sys/percpu.h>
31 #include <sys/ioctl.h>
32 #include <sys/mbuf.h>
33 #include <sys/protosw.h>
34 
35 #include <net/if.h>
36 #include <net/if_var.h>
37 #include <net/if_types.h>
38 #include <net/if_wg.h>
39 
40 #include <net/wg_noise.h>
41 #include <net/wg_cookie.h>
42 
43 #include <net/pfvar.h>
44 #include <net/route.h>
45 #include <net/bpf.h>
46 
47 #include <netinet/ip.h>
48 #include <netinet/ip6.h>
49 #include <netinet/udp.h>
50 #include <netinet/in_pcb.h>
51 
52 #include <crypto/siphash.h>
53 
54 #define DEFAULT_MTU		1420
55 
56 #define MAX_STAGED_PKT		128
57 #define MAX_QUEUED_PKT		1024
58 #define MAX_QUEUED_PKT_MASK	(MAX_QUEUED_PKT - 1)
59 
60 #define MAX_QUEUED_HANDSHAKES	4096
61 
62 #define HASHTABLE_PEER_SIZE	(1 << 11)
63 #define HASHTABLE_INDEX_SIZE	(1 << 13)
64 #define MAX_PEERS_PER_IFACE	(1 << 20)
65 
66 #define REKEY_TIMEOUT		5
67 #define REKEY_TIMEOUT_JITTER	334 /* 1/3 sec, round for arc4random_uniform */
68 #define KEEPALIVE_TIMEOUT	10
69 #define MAX_TIMER_HANDSHAKES	(90 / REKEY_TIMEOUT)
70 #define NEW_HANDSHAKE_TIMEOUT	(REKEY_TIMEOUT + KEEPALIVE_TIMEOUT)
71 #define UNDERLOAD_TIMEOUT	1
72 
73 #define DPRINTF(sc, str, ...) do { if (ISSET((sc)->sc_if.if_flags, IFF_DEBUG))\
74     printf("%s: " str, (sc)->sc_if.if_xname, ##__VA_ARGS__); } while (0)
75 
76 #define CONTAINER_OF(ptr, type, member) ({			\
77 	const __typeof( ((type *)0)->member ) *__mptr = (ptr);	\
78 	(type *)( (char *)__mptr - offsetof(type,member) );})
79 
80 /* First byte indicating packet type on the wire */
81 #define WG_PKT_INITIATION htole32(1)
82 #define WG_PKT_RESPONSE htole32(2)
83 #define WG_PKT_COOKIE htole32(3)
84 #define WG_PKT_DATA htole32(4)
85 
86 #define WG_PKT_WITH_PADDING(n)	(((n) + (16-1)) & (~(16-1)))
87 #define WG_KEY_SIZE		WG_KEY_LEN
88 
89 struct wg_pkt_initiation {
90 	uint32_t		t;
91 	uint32_t		s_idx;
92 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
93 	uint8_t			es[NOISE_PUBLIC_KEY_LEN + NOISE_AUTHTAG_LEN];
94 	uint8_t			ets[NOISE_TIMESTAMP_LEN + NOISE_AUTHTAG_LEN];
95 	struct cookie_macs	m;
96 };
97 
98 struct wg_pkt_response {
99 	uint32_t		t;
100 	uint32_t		s_idx;
101 	uint32_t		r_idx;
102 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
103 	uint8_t			en[0 + NOISE_AUTHTAG_LEN];
104 	struct cookie_macs	m;
105 };
106 
107 struct wg_pkt_cookie {
108 	uint32_t		t;
109 	uint32_t		r_idx;
110 	uint8_t			nonce[COOKIE_NONCE_SIZE];
111 	uint8_t			ec[COOKIE_ENCRYPTED_SIZE];
112 };
113 
114 struct wg_pkt_data {
115 	uint32_t		t;
116 	uint32_t		r_idx;
117 	uint8_t			nonce[sizeof(uint64_t)];
118 	uint8_t			buf[];
119 };
120 
121 struct wg_endpoint {
122 	union {
123 		struct sockaddr		r_sa;
124 		struct sockaddr_in	r_sin;
125 #ifdef INET6
126 		struct sockaddr_in6	r_sin6;
127 #endif
128 	} e_remote;
129 	union {
130 		struct in_addr		l_in;
131 #ifdef INET6
132 		struct in6_pktinfo	l_pktinfo6;
133 #define l_in6 l_pktinfo6.ipi6_addr
134 #endif
135 	} e_local;
136 };
137 
138 struct wg_tag {
139 	struct wg_endpoint	 t_endpoint;
140 	struct wg_peer		*t_peer;
141 	struct mbuf		*t_mbuf;
142 	int			 t_done;
143 	int			 t_mtu;
144 };
145 
146 struct wg_index {
147 	LIST_ENTRY(wg_index)	 i_entry;
148 	SLIST_ENTRY(wg_index)	 i_unused_entry;
149 	uint32_t		 i_key;
150 	struct noise_remote	*i_value;
151 };
152 
153 struct wg_timers {
154 	/* t_lock is for blocking wg_timers_event_* when setting t_disabled. */
155 	struct rwlock		 t_lock;
156 
157 	int			 t_disabled;
158 	int			 t_need_another_keepalive;
159 	uint16_t		 t_persistent_keepalive_interval;
160 	struct timeout		 t_new_handshake;
161 	struct timeout		 t_send_keepalive;
162 	struct timeout		 t_retry_handshake;
163 	struct timeout		 t_zero_key_material;
164 	struct timeout		 t_persistent_keepalive;
165 
166 	struct mutex		 t_handshake_mtx;
167 	struct timespec		 t_handshake_last_sent;	/* nanouptime */
168 	struct timespec		 t_handshake_complete;	/* nanotime */
169 	int			 t_handshake_retries;
170 };
171 
172 struct wg_aip {
173 	struct art_node		 a_node;
174 	LIST_ENTRY(wg_aip)	 a_entry;
175 	struct wg_peer		*a_peer;
176 	struct wg_aip_io	 a_data;
177 };
178 
179 struct wg_queue {
180 	struct mutex		 q_mtx;
181 	struct mbuf_list	 q_list;
182 };
183 
184 struct wg_ring {
185 	struct mutex	 r_mtx;
186 	uint32_t	 r_head;
187 	uint32_t	 r_tail;
188 	struct mbuf	*r_buf[MAX_QUEUED_PKT];
189 };
190 
191 struct wg_peer {
192 	LIST_ENTRY(wg_peer)	 p_pubkey_entry;
193 	TAILQ_ENTRY(wg_peer)	 p_seq_entry;
194 	uint64_t		 p_id;
195 	struct wg_softc		*p_sc;
196 
197 	struct noise_remote	 p_remote;
198 	struct cookie_maker	 p_cookie;
199 	struct wg_timers	 p_timers;
200 
201 	struct mutex		 p_counters_mtx;
202 	uint64_t		 p_counters_tx;
203 	uint64_t		 p_counters_rx;
204 
205 	struct mutex		 p_endpoint_mtx;
206 	struct wg_endpoint	 p_endpoint;
207 
208 	struct task		 p_send_initiation;
209 	struct task		 p_send_keepalive;
210 	struct task		 p_clear_secrets;
211 	struct task		 p_deliver_out;
212 	struct task		 p_deliver_in;
213 
214 	struct mbuf_queue	 p_stage_queue;
215 	struct wg_queue		 p_encap_queue;
216 	struct wg_queue		 p_decap_queue;
217 
218 	SLIST_HEAD(,wg_index)	 p_unused_index;
219 	struct wg_index		 p_index[3];
220 
221 	LIST_HEAD(,wg_aip)	 p_aip;
222 
223 	SLIST_ENTRY(wg_peer)	 p_start_list;
224 	int			 p_start_onlist;
225 };
226 
227 struct wg_softc {
228 	struct ifnet		 sc_if;
229 	SIPHASH_KEY		 sc_secret;
230 
231 	struct rwlock		 sc_lock;
232 	struct noise_local	 sc_local;
233 	struct cookie_checker	 sc_cookie;
234 	in_port_t		 sc_udp_port;
235 	int			 sc_udp_rtable;
236 
237 	struct rwlock		 sc_so_lock;
238 	struct socket		*sc_so4;
239 #ifdef INET6
240 	struct socket		*sc_so6;
241 #endif
242 
243 	size_t			 sc_aip_num;
244 	struct art_root		*sc_aip4;
245 #ifdef INET6
246 	struct art_root		*sc_aip6;
247 #endif
248 
249 	struct rwlock		 sc_peer_lock;
250 	size_t			 sc_peer_num;
251 	LIST_HEAD(,wg_peer)	*sc_peer;
252 	TAILQ_HEAD(,wg_peer)	 sc_peer_seq;
253 	u_long			 sc_peer_mask;
254 
255 	struct mutex		 sc_index_mtx;
256 	LIST_HEAD(,wg_index)	*sc_index;
257 	u_long			 sc_index_mask;
258 
259 	struct task		 sc_handshake;
260 	struct mbuf_queue	 sc_handshake_queue;
261 
262 	struct task		 sc_encap;
263 	struct task		 sc_decap;
264 	struct wg_ring		 sc_encap_ring;
265 	struct wg_ring		 sc_decap_ring;
266 };
267 
268 struct wg_peer *
269 	wg_peer_create(struct wg_softc *, uint8_t[WG_KEY_SIZE]);
270 struct wg_peer *
271 	wg_peer_lookup(struct wg_softc *, const uint8_t[WG_KEY_SIZE]);
272 void	wg_peer_destroy(struct wg_peer *);
273 void	wg_peer_set_endpoint_from_tag(struct wg_peer *, struct wg_tag *);
274 void	wg_peer_set_sockaddr(struct wg_peer *, struct sockaddr *);
275 int	wg_peer_get_sockaddr(struct wg_peer *, struct sockaddr *);
276 void	wg_peer_clear_src(struct wg_peer *);
277 void	wg_peer_get_endpoint(struct wg_peer *, struct wg_endpoint *);
278 void	wg_peer_counters_add(struct wg_peer *, uint64_t, uint64_t);
279 
280 int	wg_aip_add(struct wg_softc *, struct wg_peer *, struct wg_aip_io *);
281 struct wg_peer *
282 	wg_aip_lookup(struct art_root *, void *);
283 int	wg_aip_remove(struct wg_softc *, struct wg_peer *,
284 	    struct wg_aip_io *);
285 
286 int	wg_socket_open(struct socket **, int, in_port_t *, int *, void *);
287 void	wg_socket_close(struct socket **);
288 int	wg_bind(struct wg_softc *, in_port_t *, int *);
289 void	wg_unbind(struct wg_softc *);
290 int	wg_send(struct wg_softc *, struct wg_endpoint *, struct mbuf *);
291 void	wg_send_buf(struct wg_softc *, struct wg_endpoint *, uint8_t *,
292 	    size_t);
293 
294 struct wg_tag *
295 	wg_tag_get(struct mbuf *);
296 
297 void	wg_timers_init(struct wg_timers *);
298 void	wg_timers_enable(struct wg_timers *);
299 void	wg_timers_disable(struct wg_timers *);
300 void	wg_timers_set_persistent_keepalive(struct wg_timers *, uint16_t);
301 int	wg_timers_get_persistent_keepalive(struct wg_timers *, uint16_t *);
302 void	wg_timers_get_last_handshake(struct wg_timers *, struct timespec *);
303 int	wg_timers_expired_handshake_last_sent(struct wg_timers *);
304 int	wg_timers_check_handshake_last_sent(struct wg_timers *);
305 
306 void	wg_timers_event_data_sent(struct wg_timers *);
307 void	wg_timers_event_data_received(struct wg_timers *);
308 void	wg_timers_event_any_authenticated_packet_sent(struct wg_timers *);
309 void	wg_timers_event_any_authenticated_packet_received(struct wg_timers *);
310 void	wg_timers_event_handshake_initiated(struct wg_timers *);
311 void	wg_timers_event_handshake_responded(struct wg_timers *);
312 void	wg_timers_event_handshake_complete(struct wg_timers *);
313 void	wg_timers_event_session_derived(struct wg_timers *);
314 void	wg_timers_event_any_authenticated_packet_traversal(struct wg_timers *);
315 void	wg_timers_event_want_initiation(struct wg_timers *);
316 void	wg_timers_event_reset_handshake_last_sent(struct wg_timers *);
317 
318 void	wg_timers_run_send_initiation(void *, int);
319 void	wg_timers_run_retry_handshake(void *);
320 void	wg_timers_run_send_keepalive(void *);
321 void	wg_timers_run_new_handshake(void *);
322 void	wg_timers_run_zero_key_material(void *);
323 void	wg_timers_run_persistent_keepalive(void *);
324 
325 void	wg_peer_send_buf(struct wg_peer *, uint8_t *, size_t);
326 void	wg_send_initiation(void *);
327 void	wg_send_response(struct wg_peer *);
328 void	wg_send_cookie(struct wg_softc *, struct cookie_macs *, uint32_t,
329 	    struct wg_endpoint *e);
330 void	wg_send_keepalive(void *);
331 void	wg_peer_clear_secrets(void *);
332 void	wg_handshake(struct wg_softc *, struct mbuf *);
333 void	wg_handshake_worker(void *);
334 
335 void	wg_encap(struct wg_softc *, struct mbuf *);
336 void	wg_decap(struct wg_softc *, struct mbuf *);
337 void	wg_encap_worker(void *);
338 void	wg_decap_worker(void *);
339 void	wg_deliver_out(void *);
340 void	wg_deliver_in(void *);
341 
342 int	wg_queue_in(struct wg_softc *, struct wg_peer *, struct mbuf *);
343 void	wg_queue_out(struct wg_softc *, struct wg_peer *);
344 struct mbuf *
345 	wg_ring_dequeue(struct wg_ring *);
346 struct mbuf *
347 	wg_queue_dequeue(struct wg_queue *, struct wg_tag **);
348 size_t	wg_queue_len(struct wg_queue *);
349 
350 struct noise_remote *
351 	wg_remote_get(void *, uint8_t[NOISE_PUBLIC_KEY_LEN]);
352 uint32_t
353 	wg_index_set(void *, struct noise_remote *);
354 struct noise_remote *
355 	wg_index_get(void *, uint32_t);
356 void	wg_index_drop(void *, uint32_t);
357 
358 struct mbuf *
359 	wg_input(void *, struct mbuf *, struct ip *, struct ip6_hdr *, void *,
360 	    int);
361 int	wg_output(struct ifnet *, struct mbuf *, struct sockaddr *,
362 	    struct rtentry *);
363 int	wg_ioctl_set(struct wg_softc *, struct wg_data_io *);
364 int	wg_ioctl_get(struct wg_softc *, struct wg_data_io *);
365 int	wg_ioctl(struct ifnet *, u_long, caddr_t);
366 int	wg_up(struct wg_softc *);
367 void	wg_down(struct wg_softc *);
368 
369 int	wg_clone_create(struct if_clone *, int);
370 int	wg_clone_destroy(struct ifnet *);
371 void	wgattach(int);
372 
373 uint64_t	peer_counter = 0;
374 uint64_t	keypair_counter = 0;
375 struct pool	wg_aip_pool;
376 struct pool	wg_peer_pool;
377 struct pool	wg_ratelimit_pool;
378 struct timeval	underload_interval = { UNDERLOAD_TIMEOUT, 0 };
379 
380 size_t		 wg_counter = 0;
381 struct taskq	*wg_handshake_taskq;
382 struct taskq	*wg_crypt_taskq;
383 
384 struct if_clone	wg_cloner =
385     IF_CLONE_INITIALIZER("wg", wg_clone_create, wg_clone_destroy);
386 
387 struct wg_peer *
388 wg_peer_create(struct wg_softc *sc, uint8_t public[WG_KEY_SIZE])
389 {
390 	struct wg_peer	*peer;
391 	uint64_t	 idx;
392 
393 	rw_assert_wrlock(&sc->sc_lock);
394 
395 	if (sc->sc_peer_num >= MAX_PEERS_PER_IFACE)
396 		return NULL;
397 
398 	if ((peer = pool_get(&wg_peer_pool, PR_NOWAIT)) == NULL)
399 		return NULL;
400 
401 	peer->p_id = peer_counter++;
402 	peer->p_sc = sc;
403 
404 	noise_remote_init(&peer->p_remote, public, &sc->sc_local);
405 	cookie_maker_init(&peer->p_cookie, public);
406 	wg_timers_init(&peer->p_timers);
407 
408 	mtx_init(&peer->p_counters_mtx, IPL_NET);
409 	peer->p_counters_tx = 0;
410 	peer->p_counters_rx = 0;
411 
412 	mtx_init(&peer->p_endpoint_mtx, IPL_NET);
413 	bzero(&peer->p_endpoint, sizeof(peer->p_endpoint));
414 
415 	task_set(&peer->p_send_initiation, wg_send_initiation, peer);
416 	task_set(&peer->p_send_keepalive, wg_send_keepalive, peer);
417 	task_set(&peer->p_clear_secrets, wg_peer_clear_secrets, peer);
418 	task_set(&peer->p_deliver_out, wg_deliver_out, peer);
419 	task_set(&peer->p_deliver_in, wg_deliver_in, peer);
420 
421 	mq_init(&peer->p_stage_queue, MAX_STAGED_PKT, IPL_NET);
422 	mtx_init(&peer->p_encap_queue.q_mtx, IPL_NET);
423 	ml_init(&peer->p_encap_queue.q_list);
424 	mtx_init(&peer->p_decap_queue.q_mtx, IPL_NET);
425 	ml_init(&peer->p_decap_queue.q_list);
426 
427 	SLIST_INIT(&peer->p_unused_index);
428 	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[0],
429 	    i_unused_entry);
430 	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[1],
431 	    i_unused_entry);
432 	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[2],
433 	    i_unused_entry);
434 
435 	LIST_INIT(&peer->p_aip);
436 
437 	peer->p_start_onlist = 0;
438 
439 	idx = SipHash24(&sc->sc_secret, public, WG_KEY_SIZE);
440 	idx &= sc->sc_peer_mask;
441 
442 	rw_enter_write(&sc->sc_peer_lock);
443 	LIST_INSERT_HEAD(&sc->sc_peer[idx], peer, p_pubkey_entry);
444 	TAILQ_INSERT_TAIL(&sc->sc_peer_seq, peer, p_seq_entry);
445 	sc->sc_peer_num++;
446 	rw_exit_write(&sc->sc_peer_lock);
447 
448 	DPRINTF(sc, "Peer %llu created\n", peer->p_id);
449 	return peer;
450 }
451 
452 struct wg_peer *
453 wg_peer_lookup(struct wg_softc *sc, const uint8_t public[WG_KEY_SIZE])
454 {
455 	uint8_t		 peer_key[WG_KEY_SIZE];
456 	struct wg_peer	*peer;
457 	uint64_t	 idx;
458 
459 	idx = SipHash24(&sc->sc_secret, public, WG_KEY_SIZE);
460 	idx &= sc->sc_peer_mask;
461 
462 	rw_enter_read(&sc->sc_peer_lock);
463 	LIST_FOREACH(peer, &sc->sc_peer[idx], p_pubkey_entry) {
464 		noise_remote_keys(&peer->p_remote, peer_key, NULL);
465 		if (timingsafe_bcmp(peer_key, public, WG_KEY_SIZE) == 0)
466 			goto done;
467 	}
468 	peer = NULL;
469 done:
470 	rw_exit_read(&sc->sc_peer_lock);
471 	return peer;
472 }
473 
474 void
475 wg_peer_destroy(struct wg_peer *peer)
476 {
477 	struct wg_softc	*sc = peer->p_sc;
478 	struct wg_aip *aip, *taip;
479 
480 	rw_assert_wrlock(&sc->sc_lock);
481 
482 	/*
483 	 * Remove peer from the pubkey hashtable and disable all timeouts.
484 	 * After this, and flushing wg_handshake_taskq, then no more handshakes
485 	 * can be started.
486 	 */
487 	rw_enter_write(&sc->sc_peer_lock);
488 	LIST_REMOVE(peer, p_pubkey_entry);
489 	TAILQ_REMOVE(&sc->sc_peer_seq, peer, p_seq_entry);
490 	sc->sc_peer_num--;
491 	rw_exit_write(&sc->sc_peer_lock);
492 
493 	wg_timers_disable(&peer->p_timers);
494 
495 	taskq_barrier(wg_handshake_taskq);
496 
497 	/*
498 	 * Now we drop all allowed ips, to drop all outgoing packets to the
499 	 * peer. Then drop all the indexes to drop all incoming packets to the
500 	 * peer. Then we can flush if_snd, wg_crypt_taskq and then nettq to
501 	 * ensure no more references to the peer exist.
502 	 */
503 	LIST_FOREACH_SAFE(aip, &peer->p_aip, a_entry, taip)
504 		wg_aip_remove(sc, peer, &aip->a_data);
505 
506 	noise_remote_clear(&peer->p_remote);
507 
508 	NET_LOCK();
509 	while (!ifq_empty(&sc->sc_if.if_snd)) {
510 		NET_UNLOCK();
511 		tsleep_nsec(sc, PWAIT, "wg_ifq", 1000);
512 		NET_LOCK();
513 	}
514 	NET_UNLOCK();
515 
516 	taskq_barrier(wg_crypt_taskq);
517 	taskq_barrier(net_tq(sc->sc_if.if_index));
518 
519 	DPRINTF(sc, "Peer %llu destroyed\n", peer->p_id);
520 	explicit_bzero(peer, sizeof(*peer));
521 	pool_put(&wg_peer_pool, peer);
522 }
523 
524 void
525 wg_peer_set_endpoint_from_tag(struct wg_peer *peer, struct wg_tag *t)
526 {
527 	if (memcmp(&t->t_endpoint, &peer->p_endpoint,
528 	    sizeof(t->t_endpoint)) == 0)
529 		return;
530 
531 	mtx_enter(&peer->p_endpoint_mtx);
532 	peer->p_endpoint = t->t_endpoint;
533 	mtx_leave(&peer->p_endpoint_mtx);
534 }
535 
536 void
537 wg_peer_set_sockaddr(struct wg_peer *peer, struct sockaddr *remote)
538 {
539 	mtx_enter(&peer->p_endpoint_mtx);
540 	memcpy(&peer->p_endpoint.e_remote, remote,
541 	       sizeof(peer->p_endpoint.e_remote));
542 	bzero(&peer->p_endpoint.e_local, sizeof(peer->p_endpoint.e_local));
543 	mtx_leave(&peer->p_endpoint_mtx);
544 }
545 
546 int
547 wg_peer_get_sockaddr(struct wg_peer *peer, struct sockaddr *remote)
548 {
549 	int	ret = 0;
550 
551 	mtx_enter(&peer->p_endpoint_mtx);
552 	if (peer->p_endpoint.e_remote.r_sa.sa_family != AF_UNSPEC)
553 		memcpy(remote, &peer->p_endpoint.e_remote,
554 		       sizeof(peer->p_endpoint.e_remote));
555 	else
556 		ret = ENOENT;
557 	mtx_leave(&peer->p_endpoint_mtx);
558 	return ret;
559 }
560 
561 void
562 wg_peer_clear_src(struct wg_peer *peer)
563 {
564 	mtx_enter(&peer->p_endpoint_mtx);
565 	bzero(&peer->p_endpoint.e_local, sizeof(peer->p_endpoint.e_local));
566 	mtx_leave(&peer->p_endpoint_mtx);
567 }
568 
569 void
570 wg_peer_get_endpoint(struct wg_peer *peer, struct wg_endpoint *endpoint)
571 {
572 	mtx_enter(&peer->p_endpoint_mtx);
573 	memcpy(endpoint, &peer->p_endpoint, sizeof(*endpoint));
574 	mtx_leave(&peer->p_endpoint_mtx);
575 }
576 
577 void
578 wg_peer_counters_add(struct wg_peer *peer, uint64_t tx, uint64_t rx)
579 {
580 	mtx_enter(&peer->p_counters_mtx);
581 	peer->p_counters_tx += tx;
582 	peer->p_counters_rx += rx;
583 	mtx_leave(&peer->p_counters_mtx);
584 }
585 
586 int
587 wg_aip_add(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
588 {
589 	struct art_root	*root;
590 	struct art_node	*node;
591 	struct wg_aip	*aip;
592 	int		 ret = 0;
593 
594 	switch (d->a_af) {
595 	case AF_INET:	root = sc->sc_aip4; break;
596 #ifdef INET6
597 	case AF_INET6:	root = sc->sc_aip6; break;
598 #endif
599 	default: return EAFNOSUPPORT;
600 	}
601 
602 	if ((aip = pool_get(&wg_aip_pool, PR_NOWAIT)) == NULL)
603 		return ENOBUFS;
604 	bzero(aip, sizeof(*aip));
605 
606 	rw_enter_write(&root->ar_lock);
607 	node = art_insert(root, &aip->a_node, &d->a_addr, d->a_cidr);
608 
609 	if (node == &aip->a_node) {
610 		aip->a_peer = peer;
611 		aip->a_data = *d;
612 		LIST_INSERT_HEAD(&peer->p_aip, aip, a_entry);
613 		sc->sc_aip_num++;
614 	} else {
615 		pool_put(&wg_aip_pool, aip);
616 		aip = (struct wg_aip *) node;
617 		if (aip->a_peer != peer) {
618 			LIST_REMOVE(aip, a_entry);
619 			LIST_INSERT_HEAD(&peer->p_aip, aip, a_entry);
620 			aip->a_peer = peer;
621 		}
622 	}
623 	rw_exit_write(&root->ar_lock);
624 	return ret;
625 }
626 
627 struct wg_peer *
628 wg_aip_lookup(struct art_root *root, void *addr)
629 {
630 	struct srp_ref	 sr;
631 	struct art_node	*node;
632 
633 	node = art_match(root, addr, &sr);
634 	srp_leave(&sr);
635 
636 	return node == NULL ? NULL : ((struct wg_aip *) node)->a_peer;
637 }
638 
639 int
640 wg_aip_remove(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
641 {
642 	struct srp_ref	 sr;
643 	struct art_root	*root;
644 	struct art_node	*node;
645 	struct wg_aip	*aip;
646 	int		 ret = 0;
647 
648 	switch (d->a_af) {
649 	case AF_INET:	root = sc->sc_aip4; break;
650 #ifdef INET6
651 	case AF_INET6:	root = sc->sc_aip6; break;
652 #endif
653 	default: return EAFNOSUPPORT;
654 	}
655 
656 	rw_enter_write(&root->ar_lock);
657 	if ((node = art_lookup(root, &d->a_addr, d->a_cidr, &sr)) == NULL) {
658 		ret = ENOENT;
659 	} else if (((struct wg_aip *) node)->a_peer != peer) {
660 		ret = EXDEV;
661 	} else {
662 		aip = (struct wg_aip *)node;
663 		if (art_delete(root, node, &d->a_addr, d->a_cidr) == NULL)
664 			panic("art_delete failed to delete node %p", node);
665 
666 		sc->sc_aip_num--;
667 		LIST_REMOVE(aip, a_entry);
668 		pool_put(&wg_aip_pool, aip);
669 	}
670 
671 	srp_leave(&sr);
672 	rw_exit_write(&root->ar_lock);
673 	return ret;
674 }
675 
676 int
677 wg_socket_open(struct socket **so, int af, in_port_t *port,
678     int *rtable, void *upcall_arg)
679 {
680 	struct mbuf		 mhostnam, mrtable;
681 #ifdef INET6
682 	struct sockaddr_in6	*sin6;
683 #endif
684 	struct sockaddr_in	*sin;
685 	int			 ret, s;
686 
687 	m_inithdr(&mhostnam);
688 	m_inithdr(&mrtable);
689 
690 	bzero(mtod(&mrtable, u_int *), sizeof(u_int));
691 	*mtod(&mrtable, u_int *) = *rtable;
692 	mrtable.m_len = sizeof(u_int);
693 
694 	if (af == AF_INET) {
695 		sin = mtod(&mhostnam, struct sockaddr_in *);
696 		bzero(sin, sizeof(*sin));
697 		sin->sin_len = sizeof(*sin);
698 		sin->sin_family = AF_INET;
699 		sin->sin_port = *port;
700 		sin->sin_addr.s_addr = INADDR_ANY;
701 		mhostnam.m_len = sin->sin_len;
702 #ifdef INET6
703 	} else if (af == AF_INET6) {
704 		sin6 = mtod(&mhostnam, struct sockaddr_in6 *);
705 		bzero(sin6, sizeof(*sin6));
706 		sin6->sin6_len = sizeof(*sin6);
707 		sin6->sin6_family = AF_INET6;
708 		sin6->sin6_port = *port;
709 		sin6->sin6_addr = (struct in6_addr) { .s6_addr = { 0 } };
710 		mhostnam.m_len = sin6->sin6_len;
711 #endif
712 	} else {
713 		return EAFNOSUPPORT;
714 	}
715 
716 	if ((ret = socreate(af, so, SOCK_DGRAM, 0)) != 0)
717 		return ret;
718 
719 	s = solock(*so);
720 	sotoinpcb(*so)->inp_upcall = wg_input;
721 	sotoinpcb(*so)->inp_upcall_arg = upcall_arg;
722 
723 	if ((ret = sosetopt(*so, SOL_SOCKET, SO_RTABLE, &mrtable)) == 0) {
724 		if ((ret = sobind(*so, &mhostnam, curproc)) == 0) {
725 			*port = sotoinpcb(*so)->inp_lport;
726 			*rtable = sotoinpcb(*so)->inp_rtableid;
727 		}
728 	}
729 	sounlock(*so, s);
730 
731 	if (ret != 0)
732 		wg_socket_close(so);
733 
734 	return ret;
735 }
736 
737 void
738 wg_socket_close(struct socket **so)
739 {
740 	if (*so != NULL && soclose(*so, 0) != 0)
741 		panic("Unable to close wg socket");
742 	*so = NULL;
743 }
744 
745 int
746 wg_bind(struct wg_softc *sc, in_port_t *portp, int *rtablep)
747 {
748 	int		 ret = 0, rtable = *rtablep;
749 	in_port_t	 port = *portp;
750 	struct socket	*so4;
751 #ifdef INET6
752 	struct socket	*so6;
753 	int		 retries = 0;
754 retry:
755 #endif
756 	if ((ret = wg_socket_open(&so4, AF_INET, &port, &rtable, sc)) != 0)
757 		return ret;
758 
759 #ifdef INET6
760 	if ((ret = wg_socket_open(&so6, AF_INET6, &port, &rtable, sc)) != 0) {
761 		if (ret == EADDRINUSE && *portp == 0 && retries++ < 100)
762 			goto retry;
763 		wg_socket_close(&so4);
764 		return ret;
765 	}
766 #endif
767 
768 	rw_enter_write(&sc->sc_so_lock);
769 	wg_socket_close(&sc->sc_so4);
770 	sc->sc_so4 = so4;
771 #ifdef INET6
772 	wg_socket_close(&sc->sc_so6);
773 	sc->sc_so6 = so6;
774 #endif
775 	rw_exit_write(&sc->sc_so_lock);
776 
777 	*portp = port;
778 	*rtablep = rtable;
779 	return 0;
780 }
781 
782 void
783 wg_unbind(struct wg_softc *sc)
784 {
785 	rw_enter_write(&sc->sc_so_lock);
786 	wg_socket_close(&sc->sc_so4);
787 #ifdef INET6
788 	wg_socket_close(&sc->sc_so6);
789 #endif
790 	rw_exit_write(&sc->sc_so_lock);
791 }
792 
793 int
794 wg_send(struct wg_softc *sc, struct wg_endpoint *e, struct mbuf *m)
795 {
796 	struct mbuf	 peernam, *control = NULL;
797 	int		 ret;
798 
799 	/* Get local control address before locking */
800 	if (e->e_remote.r_sa.sa_family == AF_INET) {
801 		if (e->e_local.l_in.s_addr != INADDR_ANY)
802 			control = sbcreatecontrol(&e->e_local.l_in,
803 			    sizeof(struct in_addr), IP_SENDSRCADDR,
804 			    IPPROTO_IP);
805 #ifdef INET6
806 	} else if (e->e_remote.r_sa.sa_family == AF_INET6) {
807 		if (!IN6_IS_ADDR_UNSPECIFIED(&e->e_local.l_in6))
808 			control = sbcreatecontrol(&e->e_local.l_pktinfo6,
809 			    sizeof(struct in6_pktinfo), IPV6_PKTINFO,
810 			    IPPROTO_IPV6);
811 #endif
812 	} else {
813 		m_freem(m);
814 		return EAFNOSUPPORT;
815 	}
816 
817 	/* Get remote address */
818 	peernam.m_type = MT_SONAME;
819 	peernam.m_next = NULL;
820 	peernam.m_nextpkt = NULL;
821 	peernam.m_data = (void *)&e->e_remote.r_sa;
822 	peernam.m_len = e->e_remote.r_sa.sa_len;
823 	peernam.m_flags = 0;
824 
825 	rw_enter_read(&sc->sc_so_lock);
826 	if (e->e_remote.r_sa.sa_family == AF_INET && sc->sc_so4 != NULL)
827 		ret = sosend(sc->sc_so4, &peernam, NULL, m, control, 0);
828 #ifdef INET6
829 	else if (e->e_remote.r_sa.sa_family == AF_INET6 && sc->sc_so6 != NULL)
830 		ret = sosend(sc->sc_so6, &peernam, NULL, m, control, 0);
831 #endif
832 	else {
833 		ret = ENOTCONN;
834 		m_freem(control);
835 		m_freem(m);
836 	}
837 	rw_exit_read(&sc->sc_so_lock);
838 
839 	return ret;
840 }
841 
842 void
843 wg_send_buf(struct wg_softc *sc, struct wg_endpoint *e, uint8_t *buf,
844     size_t len)
845 {
846 	struct mbuf	*m;
847 	int		 ret = 0;
848 
849 retry:
850 	m = m_gethdr(M_WAIT, MT_DATA);
851 	m->m_len = 0;
852 	m_copyback(m, 0, len, buf, M_WAIT);
853 
854 	/* As we're sending a handshake packet here, we want high priority */
855 	m->m_pkthdr.pf.prio = IFQ_MAXPRIO;
856 
857 	if (ret == 0) {
858 		ret = wg_send(sc, e, m);
859 		/* Retry if we couldn't bind to e->e_local */
860 		if (ret == EADDRNOTAVAIL) {
861 			bzero(&e->e_local, sizeof(e->e_local));
862 			goto retry;
863 		}
864 	} else {
865 		ret = wg_send(sc, e, m);
866 		if (ret != 0)
867 			DPRINTF(sc, "Unable to send packet\n");
868 	}
869 }
870 
871 struct wg_tag *
872 wg_tag_get(struct mbuf *m)
873 {
874 	struct m_tag	*mtag;
875 
876 	if ((mtag = m_tag_find(m, PACKET_TAG_WIREGUARD, NULL)) == NULL) {
877 		mtag = m_tag_get(PACKET_TAG_WIREGUARD, sizeof(struct wg_tag),
878 		    M_NOWAIT);
879 		if (mtag == NULL)
880 			return (NULL);
881 		bzero(mtag + 1, sizeof(struct wg_tag));
882 		m_tag_prepend(m, mtag);
883 	}
884 	return ((struct wg_tag *)(mtag + 1));
885 }
886 
887 /*
888  * The following section handles the timeout callbacks for a WireGuard session.
889  * These functions provide an "event based" model for controlling wg(8) session
890  * timers. All function calls occur after the specified event below.
891  *
892  * wg_timers_event_data_sent:
893  *	tx: data
894  * wg_timers_event_data_received:
895  *	rx: data
896  * wg_timers_event_any_authenticated_packet_sent:
897  *	tx: keepalive, data, handshake
898  * wg_timers_event_any_authenticated_packet_received:
899  *	rx: keepalive, data, handshake
900  * wg_timers_event_any_authenticated_packet_traversal:
901  *	tx, rx: keepalive, data, handshake
902  * wg_timers_event_handshake_initiated:
903  *	tx: initiation
904  * wg_timers_event_handshake_responded:
905  *	tx: response
906  * wg_timers_event_handshake_complete:
907  *	rx: response, confirmation data
908  * wg_timers_event_session_derived:
909  *	tx: response, rx: response
910  * wg_timers_event_want_initiation:
911  *	tx: data failed, old keys expiring
912  * wg_timers_event_reset_handshake_last_sent:
913  * 	anytime we may immediately want a new handshake
914  */
915 void
916 wg_timers_init(struct wg_timers *t)
917 {
918 	bzero(t, sizeof(*t));
919 	rw_init(&t->t_lock, "wg_timers");
920 	mtx_init(&t->t_handshake_mtx, IPL_NET);
921 
922 	timeout_set(&t->t_new_handshake, wg_timers_run_new_handshake, t);
923 	timeout_set(&t->t_send_keepalive, wg_timers_run_send_keepalive, t);
924 	timeout_set(&t->t_retry_handshake, wg_timers_run_retry_handshake, t);
925 	timeout_set(&t->t_persistent_keepalive,
926 	    wg_timers_run_persistent_keepalive, t);
927 	timeout_set(&t->t_zero_key_material,
928 	    wg_timers_run_zero_key_material, t);
929 }
930 
931 void
932 wg_timers_enable(struct wg_timers *t)
933 {
934 	rw_enter_write(&t->t_lock);
935 	t->t_disabled = 0;
936 	rw_exit_write(&t->t_lock);
937 	wg_timers_run_persistent_keepalive(t);
938 }
939 
940 void
941 wg_timers_disable(struct wg_timers *t)
942 {
943 	rw_enter_write(&t->t_lock);
944 	t->t_disabled = 1;
945 	t->t_need_another_keepalive = 0;
946 	rw_exit_write(&t->t_lock);
947 
948 	timeout_del_barrier(&t->t_new_handshake);
949 	timeout_del_barrier(&t->t_send_keepalive);
950 	timeout_del_barrier(&t->t_retry_handshake);
951 	timeout_del_barrier(&t->t_persistent_keepalive);
952 	timeout_del_barrier(&t->t_zero_key_material);
953 }
954 
955 void
956 wg_timers_set_persistent_keepalive(struct wg_timers *t, uint16_t interval)
957 {
958 	rw_enter_read(&t->t_lock);
959 	if (!t->t_disabled) {
960 		t->t_persistent_keepalive_interval = interval;
961 		wg_timers_run_persistent_keepalive(t);
962 	}
963 	rw_exit_read(&t->t_lock);
964 }
965 
966 int
967 wg_timers_get_persistent_keepalive(struct wg_timers *t, uint16_t *interval)
968 {
969 	*interval = t->t_persistent_keepalive_interval;
970 	return *interval > 0 ? 0 : ENOENT;
971 }
972 
973 void
974 wg_timers_get_last_handshake(struct wg_timers *t, struct timespec *time)
975 {
976 	mtx_enter(&t->t_handshake_mtx);
977 	*time = t->t_handshake_complete;
978 	mtx_leave(&t->t_handshake_mtx);
979 }
980 
981 int
982 wg_timers_expired_handshake_last_sent(struct wg_timers *t)
983 {
984 	struct timespec uptime;
985 	struct timespec expire = { .tv_sec = REKEY_TIMEOUT, .tv_nsec = 0 };
986 
987 	getnanouptime(&uptime);
988 	timespecadd(&t->t_handshake_last_sent, &expire, &expire);
989 	return timespeccmp(&uptime, &expire, >) ? ETIMEDOUT : 0;
990 }
991 
992 int
993 wg_timers_check_handshake_last_sent(struct wg_timers *t)
994 {
995 	int ret;
996 	mtx_enter(&t->t_handshake_mtx);
997 	if ((ret = wg_timers_expired_handshake_last_sent(t)) == ETIMEDOUT)
998 		getnanouptime(&t->t_handshake_last_sent);
999 	mtx_leave(&t->t_handshake_mtx);
1000 	return ret;
1001 }
1002 
1003 void
1004 wg_timers_event_data_sent(struct wg_timers *t)
1005 {
1006 	int	msecs = NEW_HANDSHAKE_TIMEOUT * 1000;
1007 	msecs += arc4random_uniform(REKEY_TIMEOUT_JITTER);
1008 
1009 	rw_enter_read(&t->t_lock);
1010 	if (!t->t_disabled && !timeout_pending(&t->t_new_handshake))
1011 		timeout_add_msec(&t->t_new_handshake, msecs);
1012 	rw_exit_read(&t->t_lock);
1013 }
1014 
1015 void
1016 wg_timers_event_data_received(struct wg_timers *t)
1017 {
1018 	rw_enter_read(&t->t_lock);
1019 	if (!t->t_disabled) {
1020 		if (!timeout_pending(&t->t_send_keepalive))
1021 			timeout_add_sec(&t->t_send_keepalive,
1022 			    KEEPALIVE_TIMEOUT);
1023 		else
1024 			t->t_need_another_keepalive = 1;
1025 	}
1026 	rw_exit_read(&t->t_lock);
1027 }
1028 
1029 void
1030 wg_timers_event_any_authenticated_packet_sent(struct wg_timers *t)
1031 {
1032 	timeout_del(&t->t_send_keepalive);
1033 }
1034 
1035 void
1036 wg_timers_event_any_authenticated_packet_received(struct wg_timers *t)
1037 {
1038 	timeout_del(&t->t_new_handshake);
1039 }
1040 
1041 void
1042 wg_timers_event_any_authenticated_packet_traversal(struct wg_timers *t)
1043 {
1044 	rw_enter_read(&t->t_lock);
1045 	if (!t->t_disabled && t->t_persistent_keepalive_interval > 0)
1046 		timeout_add_sec(&t->t_persistent_keepalive,
1047 		    t->t_persistent_keepalive_interval);
1048 	rw_exit_read(&t->t_lock);
1049 }
1050 
1051 void
1052 wg_timers_event_handshake_initiated(struct wg_timers *t)
1053 {
1054 	int	msecs = REKEY_TIMEOUT * 1000;
1055 	msecs += arc4random_uniform(REKEY_TIMEOUT_JITTER);
1056 
1057 	rw_enter_read(&t->t_lock);
1058 	if (!t->t_disabled)
1059 		timeout_add_msec(&t->t_retry_handshake, msecs);
1060 	rw_exit_read(&t->t_lock);
1061 }
1062 
1063 void
1064 wg_timers_event_handshake_responded(struct wg_timers *t)
1065 {
1066 	mtx_enter(&t->t_handshake_mtx);
1067 	getnanouptime(&t->t_handshake_last_sent);
1068 	mtx_leave(&t->t_handshake_mtx);
1069 }
1070 
1071 void
1072 wg_timers_event_handshake_complete(struct wg_timers *t)
1073 {
1074 	rw_enter_read(&t->t_lock);
1075 	if (!t->t_disabled) {
1076 		mtx_enter(&t->t_handshake_mtx);
1077 		timeout_del(&t->t_retry_handshake);
1078 		t->t_handshake_retries = 0;
1079 		getnanotime(&t->t_handshake_complete);
1080 		mtx_leave(&t->t_handshake_mtx);
1081 		wg_timers_run_send_keepalive(t);
1082 	}
1083 	rw_exit_read(&t->t_lock);
1084 }
1085 
1086 void
1087 wg_timers_event_session_derived(struct wg_timers *t)
1088 {
1089 	rw_enter_read(&t->t_lock);
1090 	if (!t->t_disabled)
1091 		timeout_add_sec(&t->t_zero_key_material, REJECT_AFTER_TIME * 3);
1092 	rw_exit_read(&t->t_lock);
1093 }
1094 
1095 void
1096 wg_timers_event_want_initiation(struct wg_timers *t)
1097 {
1098 	rw_enter_read(&t->t_lock);
1099 	if (!t->t_disabled)
1100 		wg_timers_run_send_initiation(t, 0);
1101 	rw_exit_read(&t->t_lock);
1102 }
1103 
1104 void
1105 wg_timers_event_reset_handshake_last_sent(struct wg_timers *t)
1106 {
1107 	mtx_enter(&t->t_handshake_mtx);
1108 	t->t_handshake_last_sent.tv_sec -= (REKEY_TIMEOUT + 1);
1109 	mtx_leave(&t->t_handshake_mtx);
1110 }
1111 
1112 void
1113 wg_timers_run_send_initiation(void *_t, int is_retry)
1114 {
1115 	struct wg_timers *t = _t;
1116 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1117 	if (!is_retry)
1118 		t->t_handshake_retries = 0;
1119 	if (wg_timers_expired_handshake_last_sent(t) == ETIMEDOUT)
1120 		task_add(wg_handshake_taskq, &peer->p_send_initiation);
1121 }
1122 
1123 void
1124 wg_timers_run_retry_handshake(void *_t)
1125 {
1126 	struct wg_timers *t = _t;
1127 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1128 
1129 	mtx_enter(&t->t_handshake_mtx);
1130 	if (t->t_handshake_retries <= MAX_TIMER_HANDSHAKES) {
1131 		t->t_handshake_retries++;
1132 		mtx_leave(&t->t_handshake_mtx);
1133 
1134 		DPRINTF(peer->p_sc, "Handshake for peer %llu did not complete "
1135 		    "after %d seconds, retrying (try %d)\n", peer->p_id,
1136 		    REKEY_TIMEOUT, t->t_handshake_retries + 1);
1137 		wg_peer_clear_src(peer);
1138 		wg_timers_run_send_initiation(t, 1);
1139 	} else {
1140 		mtx_leave(&t->t_handshake_mtx);
1141 
1142 		DPRINTF(peer->p_sc, "Handshake for peer %llu did not complete "
1143 		    "after %d retries, giving up\n", peer->p_id,
1144 		    MAX_TIMER_HANDSHAKES + 2);
1145 
1146 		timeout_del(&t->t_send_keepalive);
1147 		mq_purge(&peer->p_stage_queue);
1148 		if (!timeout_pending(&t->t_zero_key_material))
1149 			timeout_add_sec(&t->t_zero_key_material,
1150 			    REJECT_AFTER_TIME * 3);
1151 	}
1152 }
1153 
1154 void
1155 wg_timers_run_send_keepalive(void *_t)
1156 {
1157 	struct wg_timers *t = _t;
1158 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1159 
1160 	task_add(wg_crypt_taskq, &peer->p_send_keepalive);
1161 	if (t->t_need_another_keepalive) {
1162 		t->t_need_another_keepalive = 0;
1163 		timeout_add_sec(&t->t_send_keepalive, KEEPALIVE_TIMEOUT);
1164 	}
1165 }
1166 
1167 void
1168 wg_timers_run_new_handshake(void *_t)
1169 {
1170 	struct wg_timers *t = _t;
1171 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1172 
1173 	DPRINTF(peer->p_sc, "Retrying handshake with peer %llu because we "
1174 	    "stopped hearing back after %d seconds\n",
1175 	    peer->p_id, NEW_HANDSHAKE_TIMEOUT);
1176 	wg_peer_clear_src(peer);
1177 
1178 	wg_timers_run_send_initiation(t, 0);
1179 }
1180 
1181 void
1182 wg_timers_run_zero_key_material(void *_t)
1183 {
1184 	struct wg_timers *t = _t;
1185 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1186 
1187 	DPRINTF(peer->p_sc, "Zeroing out keys for peer %llu\n", peer->p_id);
1188 	task_add(wg_handshake_taskq, &peer->p_clear_secrets);
1189 }
1190 
1191 void
1192 wg_timers_run_persistent_keepalive(void *_t)
1193 {
1194 	struct wg_timers *t = _t;
1195 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1196 	if (t->t_persistent_keepalive_interval != 0)
1197 		task_add(wg_crypt_taskq, &peer->p_send_keepalive);
1198 }
1199 
1200 /* The following functions handle handshakes */
1201 void
1202 wg_peer_send_buf(struct wg_peer *peer, uint8_t *buf, size_t len)
1203 {
1204 	struct wg_endpoint	 endpoint;
1205 
1206 	wg_peer_counters_add(peer, len, 0);
1207 	wg_timers_event_any_authenticated_packet_traversal(&peer->p_timers);
1208 	wg_timers_event_any_authenticated_packet_sent(&peer->p_timers);
1209 	wg_peer_get_endpoint(peer, &endpoint);
1210 	wg_send_buf(peer->p_sc, &endpoint, buf, len);
1211 }
1212 
1213 void
1214 wg_send_initiation(void *_peer)
1215 {
1216 	struct wg_peer			*peer = _peer;
1217 	struct wg_pkt_initiation	 pkt;
1218 
1219 	if (wg_timers_check_handshake_last_sent(&peer->p_timers) != ETIMEDOUT)
1220 		return;
1221 
1222 	DPRINTF(peer->p_sc, "Sending handshake initiation to peer %llu\n",
1223 	    peer->p_id);
1224 
1225 	if (noise_create_initiation(&peer->p_remote, &pkt.s_idx, pkt.ue, pkt.es,
1226 				    pkt.ets) != 0)
1227 		return;
1228 	pkt.t = WG_PKT_INITIATION;
1229 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1230 	    sizeof(pkt)-sizeof(pkt.m));
1231 	wg_peer_send_buf(peer, (uint8_t *)&pkt, sizeof(pkt));
1232 	wg_timers_event_handshake_initiated(&peer->p_timers);
1233 }
1234 
1235 void
1236 wg_send_response(struct wg_peer *peer)
1237 {
1238 	struct wg_pkt_response	 pkt;
1239 
1240 	DPRINTF(peer->p_sc, "Sending handshake response to peer %llu\n",
1241 	    peer->p_id);
1242 
1243 	if (noise_create_response(&peer->p_remote, &pkt.s_idx, &pkt.r_idx,
1244 				  pkt.ue, pkt.en) != 0)
1245 		return;
1246 	if (noise_remote_begin_session(&peer->p_remote) != 0)
1247 		return;
1248 	wg_timers_event_session_derived(&peer->p_timers);
1249 	pkt.t = WG_PKT_RESPONSE;
1250 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1251 	    sizeof(pkt)-sizeof(pkt.m));
1252 	wg_timers_event_handshake_responded(&peer->p_timers);
1253 	wg_peer_send_buf(peer, (uint8_t *)&pkt, sizeof(pkt));
1254 }
1255 
1256 void
1257 wg_send_cookie(struct wg_softc *sc, struct cookie_macs *cm, uint32_t idx,
1258     struct wg_endpoint *e)
1259 {
1260 	struct wg_pkt_cookie	pkt;
1261 
1262 	DPRINTF(sc, "Sending cookie response for denied handshake message\n");
1263 
1264 	pkt.t = WG_PKT_COOKIE;
1265 	pkt.r_idx = idx;
1266 
1267 	cookie_checker_create_payload(&sc->sc_cookie, cm, pkt.nonce,
1268 	    pkt.ec, &e->e_remote.r_sa);
1269 
1270 	wg_send_buf(sc, e, (uint8_t *)&pkt, sizeof(pkt));
1271 }
1272 
1273 void
1274 wg_send_keepalive(void *_peer)
1275 {
1276 	struct wg_peer	*peer = _peer;
1277 	struct wg_softc	*sc = peer->p_sc;
1278 	struct wg_tag	*t;
1279 	struct mbuf	*m;
1280 
1281 	if (!mq_empty(&peer->p_stage_queue))
1282 		goto send;
1283 
1284 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1285 		return;
1286 
1287 	if ((t = wg_tag_get(m)) == NULL) {
1288 		m_freem(m);
1289 		return;
1290 	}
1291 
1292 	m->m_len = 0;
1293 	m_calchdrlen(m);
1294 
1295 	t->t_peer = peer;
1296 	t->t_mbuf = NULL;
1297 	t->t_done = 0;
1298 	t->t_mtu = 0; /* MTU == 0 OK for keepalive */
1299 
1300 	mq_push(&peer->p_stage_queue, m);
1301 send:
1302 	if (noise_remote_ready(&peer->p_remote) == 0) {
1303 		wg_queue_out(sc, peer);
1304 		task_add(wg_crypt_taskq, &sc->sc_encap);
1305 	} else {
1306 		wg_timers_event_want_initiation(&peer->p_timers);
1307 	}
1308 }
1309 
1310 void
1311 wg_peer_clear_secrets(void *_peer)
1312 {
1313 	struct wg_peer *peer = _peer;
1314 	noise_remote_clear(&peer->p_remote);
1315 }
1316 
1317 void
1318 wg_handshake(struct wg_softc *sc, struct mbuf *m)
1319 {
1320 	struct wg_tag			*t;
1321 	struct wg_pkt_initiation	*init;
1322 	struct wg_pkt_response		*resp;
1323 	struct wg_pkt_cookie		*cook;
1324 	struct wg_peer			*peer;
1325 	struct noise_remote		*remote;
1326 	int				 res, underload = 0;
1327 	static struct timeval		 wg_last_underload; /* microuptime */
1328 
1329 	if (mq_len(&sc->sc_handshake_queue) >= MAX_QUEUED_HANDSHAKES/8) {
1330 		getmicrouptime(&wg_last_underload);
1331 		underload = 1;
1332 	} else if (wg_last_underload.tv_sec != 0) {
1333 		if (!ratecheck(&wg_last_underload, &underload_interval))
1334 			underload = 1;
1335 		else
1336 			bzero(&wg_last_underload, sizeof(wg_last_underload));
1337 	}
1338 
1339 	t = wg_tag_get(m);
1340 
1341 	switch (*mtod(m, uint32_t *)) {
1342 	case WG_PKT_INITIATION:
1343 		init = mtod(m, struct wg_pkt_initiation *);
1344 
1345 		res = cookie_checker_validate_macs(&sc->sc_cookie, &init->m,
1346 				init, sizeof(*init) - sizeof(init->m),
1347 				underload, &t->t_endpoint.e_remote.r_sa);
1348 
1349 		if (res == EINVAL) {
1350 			DPRINTF(sc, "Invalid initiation MAC\n");
1351 			goto error;
1352 		} else if (res == ECONNREFUSED) {
1353 			DPRINTF(sc, "Handshake ratelimited\n");
1354 			goto error;
1355 		} else if (res == EAGAIN) {
1356 			wg_send_cookie(sc, &init->m, init->s_idx,
1357 			    &t->t_endpoint);
1358 			goto error;
1359 		} else if (res != 0) {
1360 			panic("unexpected response: %d", res);
1361 		}
1362 
1363 		if (noise_consume_initiation(&sc->sc_local, &remote,
1364 		    init->s_idx, init->ue, init->es, init->ets) != 0) {
1365 			DPRINTF(sc, "Invalid handshake initiation\n");
1366 			goto error;
1367 		}
1368 
1369 		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1370 
1371 		DPRINTF(sc, "Receiving handshake initiation from peer %llu\n",
1372 		    peer->p_id);
1373 
1374 		wg_peer_counters_add(peer, 0, sizeof(*init));
1375 		wg_peer_set_endpoint_from_tag(peer, t);
1376 		wg_send_response(peer);
1377 		break;
1378 	case WG_PKT_RESPONSE:
1379 		resp = mtod(m, struct wg_pkt_response *);
1380 
1381 		res = cookie_checker_validate_macs(&sc->sc_cookie, &resp->m,
1382 				resp, sizeof(*resp) - sizeof(resp->m),
1383 				underload, &t->t_endpoint.e_remote.r_sa);
1384 
1385 		if (res == EINVAL) {
1386 			DPRINTF(sc, "Invalid response MAC\n");
1387 			goto error;
1388 		} else if (res == ECONNREFUSED) {
1389 			DPRINTF(sc, "Handshake ratelimited\n");
1390 			goto error;
1391 		} else if (res == EAGAIN) {
1392 			wg_send_cookie(sc, &resp->m, resp->s_idx,
1393 			    &t->t_endpoint);
1394 			goto error;
1395 		} else if (res != 0) {
1396 			panic("unexpected response: %d", res);
1397 		}
1398 
1399 		if ((remote = wg_index_get(sc, resp->r_idx)) == NULL) {
1400 			DPRINTF(sc, "Unknown handshake response\n");
1401 			goto error;
1402 		}
1403 
1404 		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1405 
1406 		if (noise_consume_response(remote, resp->s_idx, resp->r_idx,
1407 					   resp->ue, resp->en) != 0) {
1408 			DPRINTF(sc, "Invalid handshake response\n");
1409 			goto error;
1410 		}
1411 
1412 		DPRINTF(sc, "Receiving handshake response from peer %llu\n",
1413 				peer->p_id);
1414 
1415 		wg_peer_counters_add(peer, 0, sizeof(*resp));
1416 		wg_peer_set_endpoint_from_tag(peer, t);
1417 		if (noise_remote_begin_session(&peer->p_remote) == 0) {
1418 			wg_timers_event_session_derived(&peer->p_timers);
1419 			wg_timers_event_handshake_complete(&peer->p_timers);
1420 		}
1421 		break;
1422 	case WG_PKT_COOKIE:
1423 		cook = mtod(m, struct wg_pkt_cookie *);
1424 
1425 		if ((remote = wg_index_get(sc, cook->r_idx)) == NULL) {
1426 			DPRINTF(sc, "Unknown cookie index\n");
1427 			goto error;
1428 		}
1429 
1430 		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1431 
1432 		if (cookie_maker_consume_payload(&peer->p_cookie,
1433 		    cook->nonce, cook->ec) != 0) {
1434 			DPRINTF(sc, "Could not decrypt cookie response\n");
1435 			goto error;
1436 		}
1437 
1438 		DPRINTF(sc, "Receiving cookie response\n");
1439 		goto error;
1440 	default:
1441 		panic("invalid packet in handshake queue");
1442 	}
1443 
1444 	wg_timers_event_any_authenticated_packet_received(&peer->p_timers);
1445 	wg_timers_event_any_authenticated_packet_traversal(&peer->p_timers);
1446 error:
1447 	m_freem(m);
1448 }
1449 
1450 void
1451 wg_handshake_worker(void *_sc)
1452 {
1453 	struct mbuf *m;
1454 	struct wg_softc *sc = _sc;
1455 	while ((m = mq_dequeue(&sc->sc_handshake_queue)) != NULL)
1456 		wg_handshake(sc, m);
1457 }
1458 
1459 /*
1460  * The following functions handle encapsulation (encryption) and
1461  * decapsulation (decryption). The wg_{en,de}cap functions will run in the
1462  * sc_crypt_taskq, while wg_deliver_{in,out} must be serialised and will run
1463  * in nettq.
1464  *
1465  * The packets are tracked in two queues, a serial queue and a parallel queue.
1466  *  - The parallel queue is used to distribute the encryption across multiple
1467  *    threads.
1468  *  - The serial queue ensures that packets are not reordered and are
1469  *    delievered in sequence.
1470  * The wg_tag attached to the packet contains two flags to help the two queues
1471  * interact.
1472  *  - t_done: The parallel queue has finished with the packet, now the serial
1473  *            queue can do it's work.
1474  *  - t_mbuf: Used to store the *crypted packet. in the case of encryption,
1475  *            this is a newly allocated packet, and in the case of decryption,
1476  *            it is a pointer to the same packet, that has been decrypted and
1477  *            truncated. If t_mbuf is NULL, then *cryption failed and this
1478  *            packet should not be passed.
1479  * wg_{en,de}cap work on the parallel queue, while wg_deliver_{in,out} work
1480  * on the serial queue.
1481  */
1482 void
1483 wg_encap(struct wg_softc *sc, struct mbuf *m)
1484 {
1485 	int res = 0;
1486 	struct wg_pkt_data	*data;
1487 	struct wg_peer		*peer;
1488 	struct wg_tag		*t;
1489 	struct mbuf		*mc;
1490 	size_t			 padding_len, plaintext_len, out_len;
1491 	uint64_t		 nonce;
1492 
1493 	t = wg_tag_get(m);
1494 	peer = t->t_peer;
1495 
1496 	plaintext_len = min(WG_PKT_WITH_PADDING(m->m_pkthdr.len), t->t_mtu);
1497 	padding_len = plaintext_len - m->m_pkthdr.len;
1498 	out_len = sizeof(struct wg_pkt_data) + plaintext_len + NOISE_AUTHTAG_LEN;
1499 
1500 	/*
1501 	 * For the time being we allocate a new packet with sufficient size to
1502 	 * hold the encrypted data and headers. It would be difficult to
1503 	 * overcome as p_encap_queue (mbuf_list) holds a reference to the mbuf.
1504 	 * If we m_makespace or similar, we risk corrupting that list.
1505 	 * Additionally, we only pass a buf and buf length to
1506 	 * noise_remote_encrypt. Technically it would be possible to teach
1507 	 * noise_remote_encrypt about mbufs, but we would need to sort out the
1508 	 * p_encap_queue situation first.
1509 	 */
1510 	if ((mc = m_clget(NULL, M_NOWAIT, out_len)) == NULL)
1511 		goto error;
1512 
1513 	data = mtod(mc, struct wg_pkt_data *);
1514 	m_copydata(m, 0, m->m_pkthdr.len, data->buf);
1515 	bzero(data->buf + m->m_pkthdr.len, padding_len);
1516 	data->t = WG_PKT_DATA;
1517 
1518 	/*
1519 	 * Copy the flow hash from the inner packet to the outer packet, so
1520 	 * that fq_codel can property separate streams, rather than falling
1521 	 * back to random buckets.
1522 	 */
1523 	mc->m_pkthdr.ph_flowid = m->m_pkthdr.ph_flowid;
1524 
1525 	res = noise_remote_encrypt(&peer->p_remote, &data->r_idx, &nonce,
1526 				   data->buf, plaintext_len);
1527 	nonce = htole64(nonce); /* Wire format is little endian. */
1528 	memcpy(data->nonce, &nonce, sizeof(data->nonce));
1529 
1530 	if (__predict_false(res == EINVAL)) {
1531 		m_freem(mc);
1532 		goto error;
1533 	} else if (__predict_false(res == ESTALE)) {
1534 		wg_timers_event_want_initiation(&peer->p_timers);
1535 	} else if (__predict_false(res != 0)) {
1536 		panic("unexpected result: %d", res);
1537 	}
1538 
1539 	/* A packet with length 0 is a keepalive packet */
1540 	if (__predict_false(m->m_pkthdr.len == 0))
1541 		DPRINTF(sc, "Sending keepalive packet to peer %llu\n",
1542 		    peer->p_id);
1543 
1544 	mc->m_pkthdr.ph_loopcnt = m->m_pkthdr.ph_loopcnt;
1545 	mc->m_flags &= ~(M_MCAST | M_BCAST);
1546 	mc->m_len = out_len;
1547 	m_calchdrlen(mc);
1548 
1549 	/*
1550 	 * We would count ifc_opackets, ifc_obytes of m here, except if_snd
1551 	 * already does that for us, so no need to worry about it.
1552 	counters_pkt(sc->sc_if.if_counters, ifc_opackets, ifc_obytes,
1553 	    m->m_pkthdr.len);
1554 	 */
1555 	wg_peer_counters_add(peer, mc->m_pkthdr.len, 0);
1556 
1557 	t->t_mbuf = mc;
1558 error:
1559 	t->t_done = 1;
1560 	task_add(net_tq(sc->sc_if.if_index), &peer->p_deliver_out);
1561 }
1562 
1563 void
1564 wg_decap(struct wg_softc *sc, struct mbuf *m)
1565 {
1566 	int			 res, len;
1567 	struct ip		*ip;
1568 	struct ip6_hdr		*ip6;
1569 	struct wg_pkt_data	*data;
1570 	struct wg_peer		*peer, *allowed_peer;
1571 	struct wg_tag		*t;
1572 	size_t			 payload_len;
1573 	uint64_t		 nonce;
1574 
1575 	t = wg_tag_get(m);
1576 	peer = t->t_peer;
1577 
1578 	/*
1579 	 * Likewise to wg_encap, we pass a buf and buf length to
1580 	 * noise_remote_decrypt. Again, possible to teach it about mbufs
1581 	 * but need to get over the p_decap_queue situation first. However,
1582 	 * we do not need to allocate a new mbuf as the decrypted packet is
1583 	 * strictly smaller than encrypted. We just set t_mbuf to m and
1584 	 * wg_deliver_in knows how to deal with that.
1585 	 */
1586 	data = mtod(m, struct wg_pkt_data *);
1587 	payload_len = m->m_pkthdr.len - sizeof(struct wg_pkt_data);
1588 	memcpy(&nonce, data->nonce, sizeof(nonce));
1589 	nonce = le64toh(nonce); /* Wire format is little endian. */
1590 	res = noise_remote_decrypt(&peer->p_remote, data->r_idx, nonce,
1591 				   data->buf, payload_len);
1592 
1593 	if (__predict_false(res == EINVAL)) {
1594 		goto error;
1595 	} else if (__predict_false(res == ECONNRESET)) {
1596 		wg_timers_event_handshake_complete(&peer->p_timers);
1597 	} else if (__predict_false(res == ESTALE)) {
1598 		wg_timers_event_want_initiation(&peer->p_timers);
1599 	} else if (__predict_false(res != 0)) {
1600 		panic("unexpected response: %d", res);
1601 	}
1602 
1603 	wg_peer_set_endpoint_from_tag(peer, t);
1604 
1605 	wg_peer_counters_add(peer, 0, m->m_pkthdr.len);
1606 
1607 	m_adj(m, sizeof(struct wg_pkt_data));
1608 	m_adj(m, -NOISE_AUTHTAG_LEN);
1609 
1610 	counters_pkt(sc->sc_if.if_counters, ifc_ipackets, ifc_ibytes,
1611 	    m->m_pkthdr.len);
1612 
1613 	/* A packet with length 0 is a keepalive packet */
1614 	if (__predict_false(m->m_pkthdr.len == 0)) {
1615 		DPRINTF(sc, "Receiving keepalive packet from peer "
1616 		    "%llu\n", peer->p_id);
1617 		goto done;
1618 	}
1619 
1620 	/*
1621 	 * We can let the network stack handle the intricate validation of the
1622 	 * IP header, we just worry about the sizeof and the version, so we can
1623 	 * read the source address in wg_aip_lookup.
1624 	 *
1625 	 * We also need to trim the packet, as it was likely paddded before
1626 	 * encryption. While we could drop it here, it will be more helpful to
1627 	 * pass it to bpf_mtap and use the counters that people are expecting
1628 	 * in ipv4_input and ipv6_input. We can rely on ipv4_input and
1629 	 * ipv6_input to properly validate the headers.
1630 	 */
1631 	ip = mtod(m, struct ip *);
1632 	ip6 = mtod(m, struct ip6_hdr *);
1633 
1634 	if (m->m_pkthdr.len >= sizeof(struct ip) && ip->ip_v == IPVERSION) {
1635 		m->m_pkthdr.ph_family = AF_INET;
1636 
1637 		len = ntohs(ip->ip_len);
1638 		if (len >= sizeof(struct ip) && len < m->m_pkthdr.len)
1639 			m_adj(m, len - m->m_pkthdr.len);
1640 
1641 		allowed_peer = wg_aip_lookup(sc->sc_aip4, &ip->ip_src);
1642 #ifdef INET6
1643 	} else if (m->m_pkthdr.len >= sizeof(struct ip6_hdr) &&
1644 	    (ip6->ip6_vfc & IPV6_VERSION_MASK) == IPV6_VERSION) {
1645 		m->m_pkthdr.ph_family = AF_INET6;
1646 
1647 		len = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr);
1648 		if (len < m->m_pkthdr.len)
1649 			m_adj(m, len - m->m_pkthdr.len);
1650 
1651 		allowed_peer = wg_aip_lookup(sc->sc_aip6, &ip6->ip6_src);
1652 #endif
1653 	} else {
1654 		DPRINTF(sc, "Packet is neither ipv4 nor ipv6 from "
1655 		    "peer %llu\n", peer->p_id);
1656 		goto error;
1657 	}
1658 
1659 	if (__predict_false(peer != allowed_peer)) {
1660 		DPRINTF(sc, "Packet has unallowed src IP from peer "
1661 		    "%llu\n", peer->p_id);
1662 		goto error;
1663 	}
1664 
1665 	/* tunneled packet was not offloaded */
1666 	m->m_pkthdr.csum_flags = 0;
1667 
1668 	m->m_pkthdr.ph_ifidx = sc->sc_if.if_index;
1669 	m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1670 	m->m_flags &= ~(M_MCAST | M_BCAST);
1671 #if NPF > 0
1672 	pf_pkt_addr_changed(m);
1673 #endif /* NPF > 0 */
1674 
1675 done:
1676 	t->t_mbuf = m;
1677 error:
1678 	t->t_done = 1;
1679 	task_add(net_tq(sc->sc_if.if_index), &peer->p_deliver_in);
1680 }
1681 
1682 void
1683 wg_encap_worker(void *_sc)
1684 {
1685 	struct mbuf *m;
1686 	struct wg_softc *sc = _sc;
1687 	while ((m = wg_ring_dequeue(&sc->sc_encap_ring)) != NULL)
1688 		wg_encap(sc, m);
1689 }
1690 
1691 void
1692 wg_decap_worker(void *_sc)
1693 {
1694 	struct mbuf *m;
1695 	struct wg_softc *sc = _sc;
1696 	while ((m = wg_ring_dequeue(&sc->sc_decap_ring)) != NULL)
1697 		wg_decap(sc, m);
1698 }
1699 
1700 void
1701 wg_deliver_out(void *_peer)
1702 {
1703 	struct wg_peer		*peer = _peer;
1704 	struct wg_softc		*sc = peer->p_sc;
1705 	struct wg_endpoint	 endpoint;
1706 	struct wg_tag		*t;
1707 	struct mbuf		*m;
1708 	int			 ret;
1709 
1710 	wg_peer_get_endpoint(peer, &endpoint);
1711 
1712 	while ((m = wg_queue_dequeue(&peer->p_encap_queue, &t)) != NULL) {
1713 		/* t_mbuf will contain the encrypted packet */
1714 		if (t->t_mbuf == NULL){
1715 			counters_inc(sc->sc_if.if_counters, ifc_oerrors);
1716 			m_freem(m);
1717 			continue;
1718 		}
1719 
1720 		ret = wg_send(sc, &endpoint, t->t_mbuf);
1721 
1722 		if (ret == 0) {
1723 			wg_timers_event_any_authenticated_packet_traversal(
1724 			    &peer->p_timers);
1725 			wg_timers_event_any_authenticated_packet_sent(
1726 			    &peer->p_timers);
1727 
1728 			if (m->m_pkthdr.len != 0)
1729 				wg_timers_event_data_sent(&peer->p_timers);
1730 		} else if (ret == EADDRNOTAVAIL) {
1731 			wg_peer_clear_src(peer);
1732 			wg_peer_get_endpoint(peer, &endpoint);
1733 		}
1734 
1735 		m_freem(m);
1736 	}
1737 }
1738 
1739 void
1740 wg_deliver_in(void *_peer)
1741 {
1742 	struct wg_peer	*peer = _peer;
1743 	struct wg_softc	*sc = peer->p_sc;
1744 	struct wg_tag	*t;
1745 	struct mbuf	*m;
1746 
1747 	while ((m = wg_queue_dequeue(&peer->p_decap_queue, &t)) != NULL) {
1748 		/* t_mbuf will contain the decrypted packet */
1749 		if (t->t_mbuf == NULL) {
1750 			counters_inc(sc->sc_if.if_counters, ifc_ierrors);
1751 			m_freem(m);
1752 			continue;
1753 		}
1754 
1755 		/* From here on m == t->t_mbuf */
1756 		KASSERT(m == t->t_mbuf);
1757 
1758 		wg_timers_event_any_authenticated_packet_received(
1759 		    &peer->p_timers);
1760 		wg_timers_event_any_authenticated_packet_traversal(
1761 		    &peer->p_timers);
1762 
1763 		if (m->m_pkthdr.len == 0) {
1764 			m_freem(m);
1765 			continue;
1766 		}
1767 
1768 #if NBPFILTER > 0
1769 		if (sc->sc_if.if_bpf != NULL)
1770 			bpf_mtap_af(sc->sc_if.if_bpf,
1771 			    m->m_pkthdr.ph_family, m, BPF_DIRECTION_IN);
1772 #endif
1773 
1774 		NET_LOCK();
1775 		if (m->m_pkthdr.ph_family == AF_INET)
1776 			ipv4_input(&sc->sc_if, m);
1777 #ifdef INET6
1778 		else if (m->m_pkthdr.ph_family == AF_INET6)
1779 			ipv6_input(&sc->sc_if, m);
1780 #endif
1781 		else
1782 			panic("invalid ph_family");
1783 		NET_UNLOCK();
1784 
1785 		wg_timers_event_data_received(&peer->p_timers);
1786 	}
1787 }
1788 
1789 int
1790 wg_queue_in(struct wg_softc *sc, struct wg_peer *peer, struct mbuf *m)
1791 {
1792 	struct wg_ring		*parallel = &sc->sc_decap_ring;
1793 	struct wg_queue		*serial = &peer->p_decap_queue;
1794 	struct wg_tag		*t;
1795 
1796 	mtx_enter(&serial->q_mtx);
1797 	if (serial->q_list.ml_len < MAX_QUEUED_PKT) {
1798 		ml_enqueue(&serial->q_list, m);
1799 		mtx_leave(&serial->q_mtx);
1800 	} else {
1801 		mtx_leave(&serial->q_mtx);
1802 		m_freem(m);
1803 		return ENOBUFS;
1804 	}
1805 
1806 	mtx_enter(&parallel->r_mtx);
1807 	if (parallel->r_tail - parallel->r_head < MAX_QUEUED_PKT) {
1808 		parallel->r_buf[parallel->r_tail & MAX_QUEUED_PKT_MASK] = m;
1809 		parallel->r_tail++;
1810 		mtx_leave(&parallel->r_mtx);
1811 	} else {
1812 		mtx_leave(&parallel->r_mtx);
1813 		t = wg_tag_get(m);
1814 		t->t_done = 1;
1815 		return ENOBUFS;
1816 	}
1817 
1818 	return 0;
1819 }
1820 
1821 void
1822 wg_queue_out(struct wg_softc *sc, struct wg_peer *peer)
1823 {
1824 	struct wg_ring		*parallel = &sc->sc_encap_ring;
1825 	struct wg_queue		*serial = &peer->p_encap_queue;
1826 	struct mbuf_list 	 ml, ml_free;
1827 	struct mbuf		*m;
1828 	struct wg_tag		*t;
1829 	int			 dropped;
1830 
1831 	/*
1832 	 * We delist all staged packets and then add them to the queues. This
1833 	 * can race with wg_qstart when called from wg_send_keepalive, however
1834 	 * wg_qstart will not race as it is serialised.
1835 	 */
1836 	mq_delist(&peer->p_stage_queue, &ml);
1837 	ml_init(&ml_free);
1838 
1839 	while ((m = ml_dequeue(&ml)) != NULL) {
1840 		mtx_enter(&serial->q_mtx);
1841 		if (serial->q_list.ml_len < MAX_QUEUED_PKT) {
1842 			ml_enqueue(&serial->q_list, m);
1843 			mtx_leave(&serial->q_mtx);
1844 		} else {
1845 			mtx_leave(&serial->q_mtx);
1846 			ml_enqueue(&ml_free, m);
1847 			continue;
1848 		}
1849 
1850 		mtx_enter(&parallel->r_mtx);
1851 		if (parallel->r_tail - parallel->r_head < MAX_QUEUED_PKT) {
1852 			parallel->r_buf[parallel->r_tail & MAX_QUEUED_PKT_MASK] = m;
1853 			parallel->r_tail++;
1854 			mtx_leave(&parallel->r_mtx);
1855 		} else {
1856 			mtx_leave(&parallel->r_mtx);
1857 			t = wg_tag_get(m);
1858 			t->t_done = 1;
1859 		}
1860 	}
1861 
1862 	if ((dropped = ml_purge(&ml_free)) > 0)
1863 		counters_add(sc->sc_if.if_counters, ifc_oqdrops, dropped);
1864 }
1865 
1866 struct mbuf *
1867 wg_ring_dequeue(struct wg_ring *r)
1868 {
1869 	struct mbuf *m = NULL;
1870 	mtx_enter(&r->r_mtx);
1871 	if (r->r_head != r->r_tail) {
1872 		m = r->r_buf[r->r_head & MAX_QUEUED_PKT_MASK];
1873 		r->r_head++;
1874 	}
1875 	mtx_leave(&r->r_mtx);
1876 	return m;
1877 }
1878 
1879 struct mbuf *
1880 wg_queue_dequeue(struct wg_queue *q, struct wg_tag **t)
1881 {
1882 	struct mbuf *m;
1883 	mtx_enter(&q->q_mtx);
1884 	if ((m = q->q_list.ml_head) != NULL && (*t = wg_tag_get(m))->t_done)
1885 		ml_dequeue(&q->q_list);
1886 	else
1887 		m = NULL;
1888 	mtx_leave(&q->q_mtx);
1889 	return m;
1890 }
1891 
1892 size_t
1893 wg_queue_len(struct wg_queue *q)
1894 {
1895 	size_t len;
1896 	mtx_enter(&q->q_mtx);
1897 	len = q->q_list.ml_len;
1898 	mtx_leave(&q->q_mtx);
1899 	return len;
1900 }
1901 
1902 struct noise_remote *
1903 wg_remote_get(void *_sc, uint8_t public[NOISE_PUBLIC_KEY_LEN])
1904 {
1905 	struct wg_peer	*peer;
1906 	struct wg_softc	*sc = _sc;
1907 	if ((peer = wg_peer_lookup(sc, public)) == NULL)
1908 		return NULL;
1909 	return &peer->p_remote;
1910 }
1911 
1912 uint32_t
1913 wg_index_set(void *_sc, struct noise_remote *remote)
1914 {
1915 	struct wg_peer	*peer;
1916 	struct wg_softc	*sc = _sc;
1917 	struct wg_index *index, *iter;
1918 	uint32_t	 key;
1919 
1920 	/*
1921 	 * We can modify this without a lock as wg_index_set, wg_index_drop are
1922 	 * guaranteed to be serialised (per remote).
1923 	 */
1924 	peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1925 	index = SLIST_FIRST(&peer->p_unused_index);
1926 	KASSERT(index != NULL);
1927 	SLIST_REMOVE_HEAD(&peer->p_unused_index, i_unused_entry);
1928 
1929 	index->i_value = remote;
1930 
1931 	mtx_enter(&sc->sc_index_mtx);
1932 assign_id:
1933 	key = index->i_key = arc4random();
1934 	key &= sc->sc_index_mask;
1935 	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1936 		if (iter->i_key == index->i_key)
1937 			goto assign_id;
1938 
1939 	LIST_INSERT_HEAD(&sc->sc_index[key], index, i_entry);
1940 
1941 	mtx_leave(&sc->sc_index_mtx);
1942 
1943 	/* Likewise, no need to lock for index here. */
1944 	return index->i_key;
1945 }
1946 
1947 struct noise_remote *
1948 wg_index_get(void *_sc, uint32_t key0)
1949 {
1950 	struct wg_softc		*sc = _sc;
1951 	struct wg_index		*iter;
1952 	struct noise_remote	*remote = NULL;
1953 	uint32_t		 key = key0 & sc->sc_index_mask;
1954 
1955 	mtx_enter(&sc->sc_index_mtx);
1956 	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1957 		if (iter->i_key == key0) {
1958 			remote = iter->i_value;
1959 			break;
1960 		}
1961 	mtx_leave(&sc->sc_index_mtx);
1962 	return remote;
1963 }
1964 
1965 void
1966 wg_index_drop(void *_sc, uint32_t key0)
1967 {
1968 	struct wg_softc	*sc = _sc;
1969 	struct wg_index	*iter;
1970 	struct wg_peer	*peer = NULL;
1971 	uint32_t	 key = key0 & sc->sc_index_mask;
1972 
1973 	mtx_enter(&sc->sc_index_mtx);
1974 	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1975 		if (iter->i_key == key0) {
1976 			LIST_REMOVE(iter, i_entry);
1977 			break;
1978 		}
1979 	mtx_leave(&sc->sc_index_mtx);
1980 
1981 	/* We expect a peer */
1982 	peer = CONTAINER_OF(iter->i_value, struct wg_peer, p_remote);
1983 	KASSERT(peer != NULL);
1984 	SLIST_INSERT_HEAD(&peer->p_unused_index, iter, i_unused_entry);
1985 }
1986 
1987 struct mbuf *
1988 wg_input(void *_sc, struct mbuf *m, struct ip *ip, struct ip6_hdr *ip6,
1989     void *_uh, int hlen)
1990 {
1991 	struct wg_pkt_data	*data;
1992 	struct noise_remote	*remote;
1993 	struct wg_tag		*t;
1994 	struct wg_softc		*sc = _sc;
1995 	struct udphdr		*uh = _uh;
1996 
1997 	NET_ASSERT_LOCKED();
1998 
1999 	if ((t = wg_tag_get(m)) == NULL) {
2000 		m_freem(m);
2001 		return NULL;
2002 	}
2003 
2004 	if (ip != NULL) {
2005 		t->t_endpoint.e_remote.r_sa.sa_len = sizeof(struct sockaddr_in);
2006 		t->t_endpoint.e_remote.r_sa.sa_family = AF_INET;
2007 		t->t_endpoint.e_remote.r_sin.sin_port = uh->uh_sport;
2008 		t->t_endpoint.e_remote.r_sin.sin_addr = ip->ip_src;
2009 		t->t_endpoint.e_local.l_in = ip->ip_dst;
2010 #ifdef INET6
2011 	} else if (ip6 != NULL) {
2012 		t->t_endpoint.e_remote.r_sa.sa_len = sizeof(struct sockaddr_in6);
2013 		t->t_endpoint.e_remote.r_sa.sa_family = AF_INET6;
2014 		t->t_endpoint.e_remote.r_sin6.sin6_port = uh->uh_sport;
2015 		t->t_endpoint.e_remote.r_sin6.sin6_addr = ip6->ip6_src;
2016 		t->t_endpoint.e_local.l_in6 = ip6->ip6_dst;
2017 #endif
2018 	} else {
2019 		m_freem(m);
2020 		return NULL;
2021 	}
2022 
2023 	/* m has a IP/IPv6 header of hlen length, we don't need it anymore. */
2024 	m_adj(m, hlen);
2025 
2026 	/*
2027 	 * Ensure mbuf is contiguous over full length of packet. This is done
2028 	 * os we can directly read the handshake values in wg_handshake, and so
2029 	 * we can decrypt a transport packet by passing a single buffer to
2030 	 * noise_remote_decrypt in wg_decap.
2031 	 */
2032 	if ((m = m_pullup(m, m->m_pkthdr.len)) == NULL)
2033 		return NULL;
2034 
2035 	if ((m->m_pkthdr.len == sizeof(struct wg_pkt_initiation) &&
2036 		*mtod(m, uint32_t *) == WG_PKT_INITIATION) ||
2037 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_response) &&
2038 		*mtod(m, uint32_t *) == WG_PKT_RESPONSE) ||
2039 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_cookie) &&
2040 		*mtod(m, uint32_t *) == WG_PKT_COOKIE)) {
2041 
2042 		if (mq_enqueue(&sc->sc_handshake_queue, m) != 0)
2043 			DPRINTF(sc, "Dropping handshake packet\n");
2044 		task_add(wg_handshake_taskq, &sc->sc_handshake);
2045 
2046 	} else if (m->m_pkthdr.len >= sizeof(struct wg_pkt_data) +
2047 	    NOISE_AUTHTAG_LEN && *mtod(m, uint32_t *) == WG_PKT_DATA) {
2048 
2049 		data = mtod(m, struct wg_pkt_data *);
2050 
2051 		if ((remote = wg_index_get(sc, data->r_idx)) != NULL) {
2052 			t->t_peer = CONTAINER_OF(remote, struct wg_peer,
2053 			    p_remote);
2054 			t->t_mbuf = NULL;
2055 			t->t_done = 0;
2056 
2057 			if (wg_queue_in(sc, t->t_peer, m) != 0)
2058 				counters_inc(sc->sc_if.if_counters,
2059 				    ifc_iqdrops);
2060 			task_add(wg_crypt_taskq, &sc->sc_decap);
2061 		} else {
2062 			counters_inc(sc->sc_if.if_counters, ifc_ierrors);
2063 			m_freem(m);
2064 		}
2065 	} else {
2066 		counters_inc(sc->sc_if.if_counters, ifc_ierrors);
2067 		m_freem(m);
2068 	}
2069 
2070 	return NULL;
2071 }
2072 
2073 void
2074 wg_qstart(struct ifqueue *ifq)
2075 {
2076 	struct ifnet		*ifp = ifq->ifq_if;
2077 	struct wg_softc		*sc = ifp->if_softc;
2078 	struct wg_peer		*peer;
2079 	struct wg_tag		*t;
2080 	struct mbuf		*m;
2081 	SLIST_HEAD(,wg_peer)	 start_list;
2082 
2083 	SLIST_INIT(&start_list);
2084 
2085 	/*
2086 	 * We should be OK to modify p_start_list, p_start_onlist in this
2087 	 * function as there should only be one ifp->if_qstart invoked at a
2088 	 * time.
2089 	 */
2090 	while ((m = ifq_dequeue(ifq)) != NULL) {
2091 		t = wg_tag_get(m);
2092 		peer = t->t_peer;
2093 		if (mq_push(&peer->p_stage_queue, m) != 0)
2094 			counters_inc(ifp->if_counters, ifc_oqdrops);
2095 		if (!peer->p_start_onlist) {
2096 			SLIST_INSERT_HEAD(&start_list, peer, p_start_list);
2097 			peer->p_start_onlist = 1;
2098 		}
2099 	}
2100 	SLIST_FOREACH(peer, &start_list, p_start_list) {
2101 		if (noise_remote_ready(&peer->p_remote) == 0)
2102 			wg_queue_out(sc, peer);
2103 		else
2104 			wg_timers_event_want_initiation(&peer->p_timers);
2105 		peer->p_start_onlist = 0;
2106 	}
2107 	task_add(wg_crypt_taskq, &sc->sc_encap);
2108 }
2109 
2110 int
2111 wg_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2112     struct rtentry *rt)
2113 {
2114 	struct wg_softc	*sc = ifp->if_softc;
2115 	struct wg_peer	*peer;
2116 	struct wg_tag	*t;
2117 	int		 af, ret = EINVAL;
2118 
2119 	NET_ASSERT_LOCKED();
2120 
2121 	if ((t = wg_tag_get(m)) == NULL) {
2122 		ret = ENOBUFS;
2123 		goto error;
2124 	}
2125 
2126 	m->m_pkthdr.ph_family = sa->sa_family;
2127 	if (sa->sa_family == AF_INET) {
2128 		peer = wg_aip_lookup(sc->sc_aip4,
2129 		    &mtod(m, struct ip *)->ip_dst);
2130 #ifdef INET6
2131 	} else if (sa->sa_family == AF_INET6) {
2132 		peer = wg_aip_lookup(sc->sc_aip6,
2133 		    &mtod(m, struct ip6_hdr *)->ip6_dst);
2134 #endif
2135 	} else {
2136 		ret = EAFNOSUPPORT;
2137 		goto error;
2138 	}
2139 
2140 #if NBPFILTER > 0
2141 	if (sc->sc_if.if_bpf)
2142 		bpf_mtap_af(sc->sc_if.if_bpf, sa->sa_family, m,
2143 		    BPF_DIRECTION_OUT);
2144 #endif
2145 
2146 	if (peer == NULL) {
2147 		ret = ENETUNREACH;
2148 		goto error;
2149 	}
2150 
2151 	af = peer->p_endpoint.e_remote.r_sa.sa_family;
2152 	if (af != AF_INET && af != AF_INET6) {
2153 		DPRINTF(sc, "No valid endpoint has been configured or "
2154 				"discovered for peer %llu\n", peer->p_id);
2155 		ret = EDESTADDRREQ;
2156 		goto error;
2157 	}
2158 
2159 	if (m->m_pkthdr.ph_loopcnt++ > M_MAXLOOP) {
2160 		DPRINTF(sc, "Packet looped");
2161 		ret = ELOOP;
2162 		goto error;
2163 	}
2164 
2165 	/*
2166 	 * As we hold a reference to peer in the mbuf, we can't handle a
2167 	 * delayed packet without doing some refcnting. If a peer is removed
2168 	 * while a delayed holds a reference, bad things will happen. For the
2169 	 * time being, delayed packets are unsupported. This may be fixed with
2170 	 * another aip_lookup in wg_qstart, or refcnting as mentioned before.
2171 	 */
2172 	if (m->m_pkthdr.pf.delay > 0) {
2173 		DPRINTF(sc, "PF Delay Unsupported");
2174 		ret = EOPNOTSUPP;
2175 		goto error;
2176 	}
2177 
2178 	t->t_peer = peer;
2179 	t->t_mbuf = NULL;
2180 	t->t_done = 0;
2181 	t->t_mtu = ifp->if_mtu;
2182 
2183 	/*
2184 	 * We still have an issue with ifq that will count a packet that gets
2185 	 * dropped in wg_qstart, or not encrypted. These get counted as
2186 	 * ofails or oqdrops, so the packet gets counted twice.
2187 	 */
2188 	return if_enqueue(ifp, m);
2189 error:
2190 	counters_inc(ifp->if_counters, ifc_oerrors);
2191 	m_freem(m);
2192 	return ret;
2193 }
2194 
2195 int
2196 wg_ioctl_set(struct wg_softc *sc, struct wg_data_io *data)
2197 {
2198 	struct wg_interface_io	*iface_p, iface_o;
2199 	struct wg_peer_io	*peer_p, peer_o;
2200 	struct wg_aip_io	*aip_p, aip_o;
2201 
2202 	struct wg_peer		*peer, *tpeer;
2203 	struct wg_aip		*aip, *taip;
2204 
2205 	in_port_t		 port;
2206 	int			 rtable;
2207 
2208 	uint8_t			 public[WG_KEY_SIZE], private[WG_KEY_SIZE];
2209 	size_t			 i, j;
2210 	int			 ret, has_identity;
2211 
2212 	if ((ret = suser(curproc)) != 0)
2213 		return ret;
2214 
2215 	rw_enter_write(&sc->sc_lock);
2216 
2217 	iface_p = data->wgd_interface;
2218 	if ((ret = copyin(iface_p, &iface_o, sizeof(iface_o))) != 0)
2219 		goto error;
2220 
2221 	if (iface_o.i_flags & WG_INTERFACE_REPLACE_PEERS)
2222 		TAILQ_FOREACH_SAFE(peer, &sc->sc_peer_seq, p_seq_entry, tpeer)
2223 			wg_peer_destroy(peer);
2224 
2225 	if (iface_o.i_flags & WG_INTERFACE_HAS_PRIVATE &&
2226 	    (noise_local_keys(&sc->sc_local, NULL, private) ||
2227 	     timingsafe_bcmp(private, iface_o.i_private, WG_KEY_SIZE))) {
2228 		if (curve25519_generate_public(public, iface_o.i_private)) {
2229 			if ((peer = wg_peer_lookup(sc, public)) != NULL)
2230 				wg_peer_destroy(peer);
2231 		}
2232 		noise_local_lock_identity(&sc->sc_local);
2233 		has_identity = noise_local_set_private(&sc->sc_local,
2234 						       iface_o.i_private);
2235 		TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2236 			noise_remote_precompute(&peer->p_remote);
2237 			wg_timers_event_reset_handshake_last_sent(&peer->p_timers);
2238 			noise_remote_expire_current(&peer->p_remote);
2239 		}
2240 		cookie_checker_update(&sc->sc_cookie,
2241 				      has_identity == 0 ? public : NULL);
2242 		noise_local_unlock_identity(&sc->sc_local);
2243 	}
2244 
2245 	if (iface_o.i_flags & WG_INTERFACE_HAS_PORT)
2246 		port = htons(iface_o.i_port);
2247 	else
2248 		port = sc->sc_udp_port;
2249 
2250 	if (iface_o.i_flags & WG_INTERFACE_HAS_RTABLE)
2251 		rtable = iface_o.i_rtable;
2252 	else
2253 		rtable = sc->sc_udp_rtable;
2254 
2255 	if (port != sc->sc_udp_port || rtable != sc->sc_udp_rtable) {
2256 		TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry)
2257 			wg_peer_clear_src(peer);
2258 
2259 		if (sc->sc_if.if_flags & IFF_RUNNING)
2260 			if ((ret = wg_bind(sc, &port, &rtable)) != 0)
2261 				goto error;
2262 
2263 		sc->sc_udp_port = port;
2264 		sc->sc_udp_rtable = rtable;
2265 	}
2266 
2267 	peer_p = &iface_p->i_peers[0];
2268 	for (i = 0; i < iface_o.i_peers_count; i++) {
2269 		if ((ret = copyin(peer_p, &peer_o, sizeof(peer_o))) != 0)
2270 			goto error;
2271 
2272 		/* Peer must have public key */
2273 		if (!(peer_o.p_flags & WG_PEER_HAS_PUBLIC))
2274 			goto next_peer;
2275 
2276 		/* 0 = latest protocol, 1 = this protocol */
2277 		if (peer_o.p_protocol_version != 0) {
2278 			if (peer_o.p_protocol_version > 1) {
2279 				ret = EPFNOSUPPORT;
2280 				goto error;
2281 			}
2282 		}
2283 
2284 		/* Get local public and check that peer key doesn't match */
2285 		if (noise_local_keys(&sc->sc_local, public, NULL) == 0 &&
2286 		    bcmp(public, peer_o.p_public, WG_KEY_SIZE) == 0)
2287 			goto next_peer;
2288 
2289 		/* Lookup peer, or create if it doesn't exist */
2290 		if ((peer = wg_peer_lookup(sc, peer_o.p_public)) == NULL) {
2291 			/* If we want to delete, no need creating a new one.
2292 			 * Also, don't create a new one if we only want to
2293 			 * update. */
2294 			if (peer_o.p_flags & (WG_PEER_REMOVE|WG_PEER_UPDATE))
2295 				goto next_peer;
2296 
2297 			if ((peer = wg_peer_create(sc,
2298 			    peer_o.p_public)) == NULL) {
2299 				ret = ENOMEM;
2300 				goto error;
2301 			}
2302 		}
2303 
2304 		/* Remove peer and continue if specified */
2305 		if (peer_o.p_flags & WG_PEER_REMOVE) {
2306 			wg_peer_destroy(peer);
2307 			goto next_peer;
2308 		}
2309 
2310 		if (peer_o.p_flags & WG_PEER_HAS_ENDPOINT)
2311 			wg_peer_set_sockaddr(peer, &peer_o.p_sa);
2312 
2313 		if (peer_o.p_flags & WG_PEER_HAS_PSK)
2314 			noise_remote_set_psk(&peer->p_remote, peer_o.p_psk);
2315 
2316 		if (peer_o.p_flags & WG_PEER_HAS_PKA)
2317 			wg_timers_set_persistent_keepalive(&peer->p_timers,
2318 			    peer_o.p_pka);
2319 
2320 		if (peer_o.p_flags & WG_PEER_REPLACE_AIPS) {
2321 			LIST_FOREACH_SAFE(aip, &peer->p_aip, a_entry, taip) {
2322 				wg_aip_remove(sc, peer, &aip->a_data);
2323 			}
2324 		}
2325 
2326 		aip_p = &peer_p->p_aips[0];
2327 		for (j = 0; j < peer_o.p_aips_count; j++) {
2328 			if ((ret = copyin(aip_p, &aip_o, sizeof(aip_o))) != 0)
2329 				goto error;
2330 			ret = wg_aip_add(sc, peer, &aip_o);
2331 			if (ret != 0)
2332 				goto error;
2333 			aip_p++;
2334 		}
2335 
2336 		peer_p = (struct wg_peer_io *)aip_p;
2337 		continue;
2338 next_peer:
2339 		aip_p = &peer_p->p_aips[0];
2340 		aip_p += peer_o.p_aips_count;
2341 		peer_p = (struct wg_peer_io *)aip_p;
2342 	}
2343 
2344 error:
2345 	rw_exit_write(&sc->sc_lock);
2346 	explicit_bzero(&iface_o, sizeof(iface_o));
2347 	explicit_bzero(&peer_o, sizeof(peer_o));
2348 	explicit_bzero(&aip_o, sizeof(aip_o));
2349 	explicit_bzero(public, sizeof(public));
2350 	explicit_bzero(private, sizeof(private));
2351 	return ret;
2352 }
2353 
2354 int
2355 wg_ioctl_get(struct wg_softc *sc, struct wg_data_io *data)
2356 {
2357 	struct wg_interface_io	*iface_p, iface_o;
2358 	struct wg_peer_io	*peer_p, peer_o;
2359 	struct wg_aip_io	*aip_p;
2360 
2361 	struct wg_peer		*peer;
2362 	struct wg_aip		*aip;
2363 
2364 	size_t			 size, peer_count, aip_count;
2365 	int			 ret = 0, is_suser = suser(curproc) == 0;
2366 
2367 	size = sizeof(struct wg_interface_io);
2368 	if (data->wgd_size < size && !is_suser)
2369 		goto ret_size;
2370 
2371 	iface_p = data->wgd_interface;
2372 	bzero(&iface_o, sizeof(iface_o));
2373 
2374 	rw_enter_read(&sc->sc_lock);
2375 
2376 	if (sc->sc_udp_port != 0) {
2377 		iface_o.i_port = ntohs(sc->sc_udp_port);
2378 		iface_o.i_flags |= WG_INTERFACE_HAS_PORT;
2379 	}
2380 
2381 	if (sc->sc_udp_rtable != 0) {
2382 		iface_o.i_rtable = sc->sc_udp_rtable;
2383 		iface_o.i_flags |= WG_INTERFACE_HAS_RTABLE;
2384 	}
2385 
2386 	if (!is_suser)
2387 		goto copy_out_iface;
2388 
2389 	if (noise_local_keys(&sc->sc_local, iface_o.i_public,
2390 	    iface_o.i_private) == 0) {
2391 		iface_o.i_flags |= WG_INTERFACE_HAS_PUBLIC;
2392 		iface_o.i_flags |= WG_INTERFACE_HAS_PRIVATE;
2393 	}
2394 
2395 	size += sizeof(struct wg_peer_io) * sc->sc_peer_num;
2396 	size += sizeof(struct wg_aip_io) * sc->sc_aip_num;
2397 	if (data->wgd_size < size)
2398 		goto unlock_and_ret_size;
2399 
2400 	peer_count = 0;
2401 	peer_p = &iface_p->i_peers[0];
2402 	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2403 		bzero(&peer_o, sizeof(peer_o));
2404 		peer_o.p_flags = WG_PEER_HAS_PUBLIC;
2405 		peer_o.p_protocol_version = 1;
2406 
2407 		if (noise_remote_keys(&peer->p_remote, peer_o.p_public,
2408 		    peer_o.p_psk) == 0)
2409 			peer_o.p_flags |= WG_PEER_HAS_PSK;
2410 
2411 		if (wg_timers_get_persistent_keepalive(&peer->p_timers,
2412 		    &peer_o.p_pka) == 0)
2413 			peer_o.p_flags |= WG_PEER_HAS_PKA;
2414 
2415 		if (wg_peer_get_sockaddr(peer, &peer_o.p_sa) == 0)
2416 			peer_o.p_flags |= WG_PEER_HAS_ENDPOINT;
2417 
2418 		mtx_enter(&peer->p_counters_mtx);
2419 		peer_o.p_txbytes = peer->p_counters_tx;
2420 		peer_o.p_rxbytes = peer->p_counters_rx;
2421 		mtx_leave(&peer->p_counters_mtx);
2422 
2423 		wg_timers_get_last_handshake(&peer->p_timers,
2424 		    &peer_o.p_last_handshake);
2425 
2426 		aip_count = 0;
2427 		aip_p = &peer_p->p_aips[0];
2428 		LIST_FOREACH(aip, &peer->p_aip, a_entry) {
2429 			if ((ret = copyout(&aip->a_data, aip_p, sizeof(*aip_p))) != 0)
2430 				goto unlock_and_ret_size;
2431 			aip_p++;
2432 			aip_count++;
2433 		}
2434 		peer_o.p_aips_count = aip_count;
2435 
2436 		if ((ret = copyout(&peer_o, peer_p, sizeof(peer_o))) != 0)
2437 			goto unlock_and_ret_size;
2438 
2439 		peer_p = (struct wg_peer_io *)aip_p;
2440 		peer_count++;
2441 	}
2442 	iface_o.i_peers_count = peer_count;
2443 
2444 copy_out_iface:
2445 	ret = copyout(&iface_o, iface_p, sizeof(iface_o));
2446 unlock_and_ret_size:
2447 	rw_exit_read(&sc->sc_lock);
2448 	explicit_bzero(&iface_o, sizeof(iface_o));
2449 	explicit_bzero(&peer_o, sizeof(peer_o));
2450 ret_size:
2451 	data->wgd_size = size;
2452 	return ret;
2453 }
2454 
2455 int
2456 wg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2457 {
2458 	struct ifreq	*ifr = (struct ifreq *) data;
2459 	struct wg_softc	*sc = ifp->if_softc;
2460 	int		 ret = 0;
2461 
2462 	switch (cmd) {
2463 	case SIOCSWG:
2464 		NET_UNLOCK();
2465 		ret = wg_ioctl_set(sc, (struct wg_data_io *) data);
2466 		NET_LOCK();
2467 		break;
2468 	case SIOCGWG:
2469 		NET_UNLOCK();
2470 		ret = wg_ioctl_get(sc, (struct wg_data_io *) data);
2471 		NET_LOCK();
2472 		break;
2473 	/* Interface IOCTLs */
2474 	case SIOCSIFADDR:
2475 		SET(ifp->if_flags, IFF_UP);
2476 		/* FALLTHROUGH */
2477 	case SIOCSIFFLAGS:
2478 		if (ISSET(ifp->if_flags, IFF_UP))
2479 			ret = wg_up(sc);
2480 		else
2481 			wg_down(sc);
2482 		break;
2483 	case SIOCSIFMTU:
2484 		/* Arbitrary limits */
2485 		if (ifr->ifr_mtu <= 0 || ifr->ifr_mtu > 9000)
2486 			ret = EINVAL;
2487 		else
2488 			ifp->if_mtu = ifr->ifr_mtu;
2489 		break;
2490 	case SIOCADDMULTI:
2491 	case SIOCDELMULTI:
2492 		break;
2493 	default:
2494 		ret = ENOTTY;
2495 	}
2496 
2497 	return ret;
2498 }
2499 
2500 int
2501 wg_up(struct wg_softc *sc)
2502 {
2503 	struct wg_peer	*peer;
2504 	int		 ret = 0;
2505 
2506 	NET_ASSERT_LOCKED();
2507 	/*
2508 	 * We use IFF_RUNNING as an exclusive access here. We also may want
2509 	 * an exclusive sc_lock as wg_bind may write to sc_udp_port. We also
2510 	 * want to drop NET_LOCK as we want to call socreate, sobind, etc. Once
2511 	 * solock is no longer === NET_LOCK, we may be able to avoid this.
2512 	 */
2513 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
2514 		SET(sc->sc_if.if_flags, IFF_RUNNING);
2515 		NET_UNLOCK();
2516 
2517 		rw_enter_write(&sc->sc_lock);
2518 		/*
2519 		 * If we successfully bind the socket, then enable the timers
2520 		 * for the peer. This will send all staged packets and a
2521 		 * keepalive if necessary.
2522 		 */
2523 		ret = wg_bind(sc, &sc->sc_udp_port, &sc->sc_udp_rtable);
2524 		if (ret == 0) {
2525 			TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2526 				wg_timers_enable(&peer->p_timers);
2527 				wg_queue_out(sc, peer);
2528 			}
2529 		}
2530 		rw_exit_write(&sc->sc_lock);
2531 
2532 		NET_LOCK();
2533 		if (ret != 0)
2534 			CLR(sc->sc_if.if_flags, IFF_RUNNING);
2535 	}
2536 	return ret;
2537 }
2538 
2539 void
2540 wg_down(struct wg_softc *sc)
2541 {
2542 	struct wg_peer	*peer;
2543 
2544 	NET_ASSERT_LOCKED();
2545 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2546 		return;
2547 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
2548 	NET_UNLOCK();
2549 
2550 	/*
2551 	 * We only need a read lock here, as we aren't writing to anything
2552 	 * that isn't granularly locked.
2553 	 */
2554 	rw_enter_read(&sc->sc_lock);
2555 	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2556 		mq_purge(&peer->p_stage_queue);
2557 		wg_timers_disable(&peer->p_timers);
2558 	}
2559 
2560 	taskq_barrier(wg_handshake_taskq);
2561 	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2562 		noise_remote_clear(&peer->p_remote);
2563 		wg_timers_event_reset_handshake_last_sent(&peer->p_timers);
2564 	}
2565 
2566 	wg_unbind(sc);
2567 	rw_exit_read(&sc->sc_lock);
2568 	NET_LOCK();
2569 }
2570 
2571 int
2572 wg_clone_create(struct if_clone *ifc, int unit)
2573 {
2574 	struct ifnet		*ifp;
2575 	struct wg_softc		*sc;
2576 	struct noise_upcall	 local_upcall;
2577 
2578 	KERNEL_ASSERT_LOCKED();
2579 
2580 	if (wg_counter == 0) {
2581 		wg_handshake_taskq = taskq_create("wg_handshake",
2582 		    2, IPL_NET, TASKQ_MPSAFE);
2583 		wg_crypt_taskq = taskq_create("wg_crypt",
2584 		    ncpus, IPL_NET, TASKQ_MPSAFE);
2585 
2586 		if (wg_handshake_taskq == NULL || wg_crypt_taskq == NULL) {
2587 			if (wg_handshake_taskq != NULL)
2588 				taskq_destroy(wg_handshake_taskq);
2589 			if (wg_crypt_taskq != NULL)
2590 				taskq_destroy(wg_crypt_taskq);
2591 			wg_handshake_taskq = NULL;
2592 			wg_crypt_taskq = NULL;
2593 			return ENOTRECOVERABLE;
2594 		}
2595 	}
2596 	wg_counter++;
2597 
2598 	if ((sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL)
2599 		goto ret_00;
2600 
2601 	local_upcall.u_arg = sc;
2602 	local_upcall.u_remote_get = wg_remote_get;
2603 	local_upcall.u_index_set = wg_index_set;
2604 	local_upcall.u_index_drop = wg_index_drop;
2605 
2606 	TAILQ_INIT(&sc->sc_peer_seq);
2607 
2608 	/* sc_if is initialised after everything else */
2609 	arc4random_buf(&sc->sc_secret, sizeof(sc->sc_secret));
2610 
2611 	rw_init(&sc->sc_lock, "wg");
2612 	noise_local_init(&sc->sc_local, &local_upcall);
2613 	if (cookie_checker_init(&sc->sc_cookie, &wg_ratelimit_pool) != 0)
2614 		goto ret_01;
2615 	sc->sc_udp_port = 0;
2616 	sc->sc_udp_rtable = 0;
2617 
2618 	rw_init(&sc->sc_so_lock, "wg_so");
2619 	sc->sc_so4 = NULL;
2620 #ifdef INET6
2621 	sc->sc_so6 = NULL;
2622 #endif
2623 
2624 	sc->sc_aip_num = 0;
2625 	if ((sc->sc_aip4 = art_alloc(0, 32, 0)) == NULL)
2626 		goto ret_02;
2627 #ifdef INET6
2628 	if ((sc->sc_aip6 = art_alloc(0, 128, 0)) == NULL)
2629 		goto ret_03;
2630 #endif
2631 
2632 	rw_init(&sc->sc_peer_lock, "wg_peer");
2633 	sc->sc_peer_num = 0;
2634 	if ((sc->sc_peer = hashinit(HASHTABLE_PEER_SIZE, M_DEVBUF,
2635 	    M_NOWAIT, &sc->sc_peer_mask)) == NULL)
2636 		goto ret_04;
2637 
2638 	mtx_init(&sc->sc_index_mtx, IPL_NET);
2639 	if ((sc->sc_index = hashinit(HASHTABLE_INDEX_SIZE, M_DEVBUF,
2640 	    M_NOWAIT, &sc->sc_index_mask)) == NULL)
2641 		goto ret_05;
2642 
2643 	task_set(&sc->sc_handshake, wg_handshake_worker, sc);
2644 	mq_init(&sc->sc_handshake_queue, MAX_QUEUED_HANDSHAKES, IPL_NET);
2645 
2646 	task_set(&sc->sc_encap, wg_encap_worker, sc);
2647 	task_set(&sc->sc_decap, wg_decap_worker, sc);
2648 
2649 	bzero(&sc->sc_encap_ring, sizeof(sc->sc_encap_ring));
2650 	mtx_init(&sc->sc_encap_ring.r_mtx, IPL_NET);
2651 	bzero(&sc->sc_decap_ring, sizeof(sc->sc_decap_ring));
2652 	mtx_init(&sc->sc_decap_ring.r_mtx, IPL_NET);
2653 
2654 	/* We've setup the softc, now we can setup the ifnet */
2655 	ifp = &sc->sc_if;
2656 	ifp->if_softc = sc;
2657 
2658 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "wg%d", unit);
2659 
2660 	ifp->if_mtu = DEFAULT_MTU;
2661 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_NOARP;
2662 	ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
2663 	ifp->if_txmit = 64; /* Keep our workers active for longer. */
2664 
2665 	ifp->if_ioctl = wg_ioctl;
2666 	ifp->if_qstart = wg_qstart;
2667 	ifp->if_output = wg_output;
2668 
2669 	ifp->if_type = IFT_WIREGUARD;
2670 	ifp->if_rtrequest = p2p_rtrequest;
2671 
2672 	if_attach(ifp);
2673 	if_alloc_sadl(ifp);
2674 	if_counters_alloc(ifp);
2675 
2676 #if NBPFILTER > 0
2677 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
2678 #endif
2679 
2680 	DPRINTF(sc, "Interface created\n");
2681 
2682 	return 0;
2683 ret_05:
2684 	hashfree(sc->sc_peer, HASHTABLE_PEER_SIZE, M_DEVBUF);
2685 ret_04:
2686 #ifdef INET6
2687 	free(sc->sc_aip6, M_RTABLE, sizeof(*sc->sc_aip6));
2688 ret_03:
2689 #endif
2690 	free(sc->sc_aip4, M_RTABLE, sizeof(*sc->sc_aip4));
2691 ret_02:
2692 	cookie_checker_deinit(&sc->sc_cookie);
2693 ret_01:
2694 	free(sc, M_DEVBUF, sizeof(*sc));
2695 ret_00:
2696 	return ENOBUFS;
2697 }
2698 int
2699 wg_clone_destroy(struct ifnet *ifp)
2700 {
2701 	struct wg_softc	*sc = ifp->if_softc;
2702 	struct wg_peer	*peer, *tpeer;
2703 
2704 	KERNEL_ASSERT_LOCKED();
2705 
2706 	rw_enter_write(&sc->sc_lock);
2707 	TAILQ_FOREACH_SAFE(peer, &sc->sc_peer_seq, p_seq_entry, tpeer)
2708 		wg_peer_destroy(peer);
2709 	rw_exit_write(&sc->sc_lock);
2710 
2711 	wg_unbind(sc);
2712 	if_detach(ifp);
2713 
2714 	wg_counter--;
2715 	if (wg_counter == 0) {
2716 		KASSERT(wg_handshake_taskq != NULL && wg_crypt_taskq != NULL);
2717 		taskq_destroy(wg_handshake_taskq);
2718 		taskq_destroy(wg_crypt_taskq);
2719 		wg_handshake_taskq = NULL;
2720 		wg_crypt_taskq = NULL;
2721 	}
2722 
2723 	DPRINTF(sc, "Destroyed interface\n");
2724 
2725 	hashfree(sc->sc_index, HASHTABLE_INDEX_SIZE, M_DEVBUF);
2726 	hashfree(sc->sc_peer, HASHTABLE_PEER_SIZE, M_DEVBUF);
2727 #ifdef INET6
2728 	free(sc->sc_aip6, M_RTABLE, sizeof(*sc->sc_aip6));
2729 #endif
2730 	free(sc->sc_aip4, M_RTABLE, sizeof(*sc->sc_aip4));
2731 	cookie_checker_deinit(&sc->sc_cookie);
2732 	free(sc, M_DEVBUF, sizeof(*sc));
2733 	return 0;
2734 }
2735 
2736 void
2737 wgattach(int nwg)
2738 {
2739 #ifdef WGTEST
2740 	cookie_test();
2741 	noise_test();
2742 #endif
2743 	if_clone_attach(&wg_cloner);
2744 
2745 	pool_init(&wg_aip_pool, sizeof(struct wg_aip), 0,
2746 			IPL_NET, 0, "wgaip", NULL);
2747 	pool_init(&wg_peer_pool, sizeof(struct wg_peer), 0,
2748 			IPL_NET, 0, "wgpeer", NULL);
2749 	pool_init(&wg_ratelimit_pool, sizeof(struct ratelimit_entry), 0,
2750 			IPL_NET, 0, "wgratelimit", NULL);
2751 }
2752