xref: /openbsd-src/sys/net/if_wg.c (revision 53555c846a0a6f917dbd0a191f826da995ab1c42)
1 /*	$OpenBSD: if_wg.c,v 1.39 2024/10/31 12:33:11 claudio Exp $ */
2 
3 /*
4  * Copyright (C) 2015-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
5  * Copyright (C) 2019-2020 Matt Dunwoodie <ncon@noconroy.net>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include "bpfilter.h"
21 #include "pf.h"
22 
23 #include <sys/types.h>
24 #include <sys/systm.h>
25 #include <sys/param.h>
26 #include <sys/pool.h>
27 
28 #include <sys/socket.h>
29 #include <sys/socketvar.h>
30 #include <sys/percpu.h>
31 #include <sys/ioctl.h>
32 #include <sys/mbuf.h>
33 
34 #include <net/if.h>
35 #include <net/if_var.h>
36 #include <net/if_types.h>
37 #include <net/if_wg.h>
38 
39 #include <net/wg_noise.h>
40 #include <net/wg_cookie.h>
41 
42 #include <net/pfvar.h>
43 #include <net/route.h>
44 #include <net/bpf.h>
45 #include <net/art.h>
46 
47 #include <netinet/ip.h>
48 #include <netinet/ip6.h>
49 #include <netinet/udp.h>
50 #include <netinet/in_pcb.h>
51 
52 #include <crypto/siphash.h>
53 
54 #define DEFAULT_MTU		1420
55 
56 #define MAX_STAGED_PKT		128
57 #define MAX_QUEUED_PKT		1024
58 #define MAX_QUEUED_PKT_MASK	(MAX_QUEUED_PKT - 1)
59 
60 #define MAX_QUEUED_HANDSHAKES	4096
61 
62 #define HASHTABLE_PEER_SIZE	(1 << 11)
63 #define HASHTABLE_INDEX_SIZE	(1 << 13)
64 #define MAX_PEERS_PER_IFACE	(1 << 20)
65 
66 #define REKEY_TIMEOUT		5
67 #define REKEY_TIMEOUT_JITTER	334 /* 1/3 sec, round for arc4random_uniform */
68 #define KEEPALIVE_TIMEOUT	10
69 #define MAX_TIMER_HANDSHAKES	(90 / REKEY_TIMEOUT)
70 #define NEW_HANDSHAKE_TIMEOUT	(REKEY_TIMEOUT + KEEPALIVE_TIMEOUT)
71 #define UNDERLOAD_TIMEOUT	1
72 
73 #define DPRINTF(sc, str, ...) do { if (ISSET((sc)->sc_if.if_flags, IFF_DEBUG))\
74     printf("%s: " str, (sc)->sc_if.if_xname, ##__VA_ARGS__); } while (0)
75 
76 #define CONTAINER_OF(ptr, type, member) ({			\
77 	const __typeof( ((type *)0)->member ) *__mptr = (ptr);	\
78 	(type *)( (char *)__mptr - offsetof(type,member) );})
79 
80 /* First byte indicating packet type on the wire */
81 #define WG_PKT_INITIATION htole32(1)
82 #define WG_PKT_RESPONSE htole32(2)
83 #define WG_PKT_COOKIE htole32(3)
84 #define WG_PKT_DATA htole32(4)
85 
86 #define WG_PKT_WITH_PADDING(n)	(((n) + (16-1)) & (~(16-1)))
87 #define WG_KEY_SIZE		WG_KEY_LEN
88 
89 struct wg_pkt_initiation {
90 	uint32_t		t;
91 	uint32_t		s_idx;
92 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
93 	uint8_t			es[NOISE_PUBLIC_KEY_LEN + NOISE_AUTHTAG_LEN];
94 	uint8_t			ets[NOISE_TIMESTAMP_LEN + NOISE_AUTHTAG_LEN];
95 	struct cookie_macs	m;
96 };
97 
98 struct wg_pkt_response {
99 	uint32_t		t;
100 	uint32_t		s_idx;
101 	uint32_t		r_idx;
102 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
103 	uint8_t			en[0 + NOISE_AUTHTAG_LEN];
104 	struct cookie_macs	m;
105 };
106 
107 struct wg_pkt_cookie {
108 	uint32_t		t;
109 	uint32_t		r_idx;
110 	uint8_t			nonce[COOKIE_NONCE_SIZE];
111 	uint8_t			ec[COOKIE_ENCRYPTED_SIZE];
112 };
113 
114 struct wg_pkt_data {
115 	uint32_t		t;
116 	uint32_t		r_idx;
117 	uint8_t			nonce[sizeof(uint64_t)];
118 	uint8_t			buf[];
119 };
120 
121 struct wg_endpoint {
122 	union {
123 		struct sockaddr		r_sa;
124 		struct sockaddr_in	r_sin;
125 #ifdef INET6
126 		struct sockaddr_in6	r_sin6;
127 #endif
128 	} e_remote;
129 	union {
130 		struct in_addr		l_in;
131 #ifdef INET6
132 		struct in6_pktinfo	l_pktinfo6;
133 #define l_in6 l_pktinfo6.ipi6_addr
134 #endif
135 	} e_local;
136 };
137 
138 struct wg_tag {
139 	struct wg_endpoint	 t_endpoint;
140 	struct wg_peer		*t_peer;
141 	struct mbuf		*t_mbuf;
142 	int			 t_done;
143 	int			 t_mtu;
144 };
145 
146 struct wg_index {
147 	LIST_ENTRY(wg_index)	 i_entry;
148 	SLIST_ENTRY(wg_index)	 i_unused_entry;
149 	uint32_t		 i_key;
150 	struct noise_remote	*i_value;
151 };
152 
153 struct wg_timers {
154 	/* t_mtx is for blocking wg_timers_event_* when setting t_disabled. */
155 	struct mutex		 t_mtx;
156 
157 	int			 t_disabled;
158 	int			 t_need_another_keepalive;
159 	uint16_t		 t_persistent_keepalive_interval;
160 	struct timeout		 t_new_handshake;
161 	struct timeout		 t_send_keepalive;
162 	struct timeout		 t_retry_handshake;
163 	struct timeout		 t_zero_key_material;
164 	struct timeout		 t_persistent_keepalive;
165 
166 	struct mutex		 t_handshake_mtx;
167 	struct timespec		 t_handshake_last_sent;	/* nanouptime */
168 	struct timespec		 t_handshake_complete;	/* nanotime */
169 	int			 t_handshake_retries;
170 };
171 
172 struct wg_aip {
173 	struct art_node		 a_node;
174 	LIST_ENTRY(wg_aip)	 a_entry;
175 	struct wg_peer		*a_peer;
176 	struct wg_aip_io	 a_data;
177 };
178 
179 struct wg_queue {
180 	struct mutex		 q_mtx;
181 	struct mbuf_list	 q_list;
182 };
183 
184 struct wg_ring {
185 	struct mutex	 r_mtx;
186 	uint32_t	 r_head;
187 	uint32_t	 r_tail;
188 	struct mbuf	*r_buf[MAX_QUEUED_PKT];
189 };
190 
191 struct wg_peer {
192 	LIST_ENTRY(wg_peer)	 p_pubkey_entry;
193 	TAILQ_ENTRY(wg_peer)	 p_seq_entry;
194 	uint64_t		 p_id;
195 	struct wg_softc		*p_sc;
196 
197 	struct noise_remote	 p_remote;
198 	struct cookie_maker	 p_cookie;
199 	struct wg_timers	 p_timers;
200 
201 	struct mutex		 p_counters_mtx;
202 	uint64_t		 p_counters_tx;
203 	uint64_t		 p_counters_rx;
204 
205 	struct mutex		 p_endpoint_mtx;
206 	struct wg_endpoint	 p_endpoint;
207 
208 	struct task		 p_send_initiation;
209 	struct task		 p_send_keepalive;
210 	struct task		 p_clear_secrets;
211 	struct task		 p_deliver_out;
212 	struct task		 p_deliver_in;
213 
214 	struct mbuf_queue	 p_stage_queue;
215 	struct wg_queue		 p_encap_queue;
216 	struct wg_queue		 p_decap_queue;
217 
218 	SLIST_HEAD(,wg_index)	 p_unused_index;
219 	struct wg_index		 p_index[3];
220 
221 	LIST_HEAD(,wg_aip)	 p_aip;
222 
223 	SLIST_ENTRY(wg_peer)	 p_start_list;
224 	int			 p_start_onlist;
225 
226 	char			 p_description[IFDESCRSIZE];
227 };
228 
229 struct wg_softc {
230 	struct ifnet		 sc_if;
231 	SIPHASH_KEY		 sc_secret;
232 
233 	struct rwlock		 sc_lock;
234 	struct noise_local	 sc_local;
235 	struct cookie_checker	 sc_cookie;
236 	in_port_t		 sc_udp_port;
237 	int			 sc_udp_rtable;
238 
239 	struct rwlock		 sc_so_lock;
240 	struct socket		*sc_so4;
241 #ifdef INET6
242 	struct socket		*sc_so6;
243 #endif
244 
245 	size_t			 sc_aip_num;
246 	struct art_root		*sc_aip4;
247 #ifdef INET6
248 	struct art_root		*sc_aip6;
249 #endif
250 
251 	struct rwlock		 sc_peer_lock;
252 	size_t			 sc_peer_num;
253 	LIST_HEAD(,wg_peer)	*sc_peer;
254 	TAILQ_HEAD(,wg_peer)	 sc_peer_seq;
255 	u_long			 sc_peer_mask;
256 
257 	struct mutex		 sc_index_mtx;
258 	LIST_HEAD(,wg_index)	*sc_index;
259 	u_long			 sc_index_mask;
260 
261 	struct task		 sc_handshake;
262 	struct mbuf_queue	 sc_handshake_queue;
263 
264 	struct task		 sc_encap;
265 	struct task		 sc_decap;
266 	struct wg_ring		 sc_encap_ring;
267 	struct wg_ring		 sc_decap_ring;
268 };
269 
270 struct wg_peer *
271 	wg_peer_create(struct wg_softc *, uint8_t[WG_KEY_SIZE]);
272 struct wg_peer *
273 	wg_peer_lookup(struct wg_softc *, const uint8_t[WG_KEY_SIZE]);
274 void	wg_peer_destroy(struct wg_peer *);
275 void	wg_peer_set_endpoint_from_tag(struct wg_peer *, struct wg_tag *);
276 void	wg_peer_set_sockaddr(struct wg_peer *, struct sockaddr *);
277 int	wg_peer_get_sockaddr(struct wg_peer *, struct sockaddr *);
278 void	wg_peer_clear_src(struct wg_peer *);
279 void	wg_peer_get_endpoint(struct wg_peer *, struct wg_endpoint *);
280 void	wg_peer_counters_add(struct wg_peer *, uint64_t, uint64_t);
281 
282 int	wg_aip_add(struct wg_softc *, struct wg_peer *, struct wg_aip_io *);
283 struct wg_peer *
284 	wg_aip_lookup(struct art_root *, void *);
285 int	wg_aip_remove(struct wg_softc *, struct wg_peer *,
286 	    struct wg_aip_io *);
287 
288 int	wg_socket_open(struct socket **, int, in_port_t *, int *, void *);
289 void	wg_socket_close(struct socket **);
290 int	wg_bind(struct wg_softc *, in_port_t *, int *);
291 void	wg_unbind(struct wg_softc *);
292 int	wg_send(struct wg_softc *, struct wg_endpoint *, struct mbuf *);
293 void	wg_send_buf(struct wg_softc *, struct wg_endpoint *, uint8_t *,
294 	    size_t);
295 
296 struct wg_tag *
297 	wg_tag_get(struct mbuf *);
298 
299 void	wg_timers_init(struct wg_timers *);
300 void	wg_timers_enable(struct wg_timers *);
301 void	wg_timers_disable(struct wg_timers *);
302 void	wg_timers_set_persistent_keepalive(struct wg_timers *, uint16_t);
303 int	wg_timers_get_persistent_keepalive(struct wg_timers *, uint16_t *);
304 void	wg_timers_get_last_handshake(struct wg_timers *, struct timespec *);
305 int	wg_timers_expired_handshake_last_sent(struct wg_timers *);
306 int	wg_timers_check_handshake_last_sent(struct wg_timers *);
307 
308 void	wg_timers_event_data_sent(struct wg_timers *);
309 void	wg_timers_event_data_received(struct wg_timers *);
310 void	wg_timers_event_any_authenticated_packet_sent(struct wg_timers *);
311 void	wg_timers_event_any_authenticated_packet_received(struct wg_timers *);
312 void	wg_timers_event_handshake_initiated(struct wg_timers *);
313 void	wg_timers_event_handshake_responded(struct wg_timers *);
314 void	wg_timers_event_handshake_complete(struct wg_timers *);
315 void	wg_timers_event_session_derived(struct wg_timers *);
316 void	wg_timers_event_any_authenticated_packet_traversal(struct wg_timers *);
317 void	wg_timers_event_want_initiation(struct wg_timers *);
318 void	wg_timers_event_reset_handshake_last_sent(struct wg_timers *);
319 
320 void	wg_timers_run_send_initiation(void *, int);
321 void	wg_timers_run_retry_handshake(void *);
322 void	wg_timers_run_send_keepalive(void *);
323 void	wg_timers_run_new_handshake(void *);
324 void	wg_timers_run_zero_key_material(void *);
325 void	wg_timers_run_persistent_keepalive(void *);
326 
327 void	wg_peer_send_buf(struct wg_peer *, uint8_t *, size_t);
328 void	wg_send_initiation(void *);
329 void	wg_send_response(struct wg_peer *);
330 void	wg_send_cookie(struct wg_softc *, struct cookie_macs *, uint32_t,
331 	    struct wg_endpoint *);
332 void	wg_send_keepalive(void *);
333 void	wg_peer_clear_secrets(void *);
334 void	wg_handshake(struct wg_softc *, struct mbuf *);
335 void	wg_handshake_worker(void *);
336 
337 void	wg_encap(struct wg_softc *, struct mbuf *);
338 void	wg_decap(struct wg_softc *, struct mbuf *);
339 void	wg_encap_worker(void *);
340 void	wg_decap_worker(void *);
341 void	wg_deliver_out(void *);
342 void	wg_deliver_in(void *);
343 
344 int	wg_queue_in(struct wg_softc *, struct wg_peer *, struct mbuf *);
345 void	wg_queue_out(struct wg_softc *, struct wg_peer *);
346 struct mbuf *
347 	wg_ring_dequeue(struct wg_ring *);
348 struct mbuf *
349 	wg_queue_dequeue(struct wg_queue *, struct wg_tag **);
350 size_t	wg_queue_len(struct wg_queue *);
351 
352 struct noise_remote *
353 	wg_remote_get(void *, uint8_t[NOISE_PUBLIC_KEY_LEN]);
354 uint32_t
355 	wg_index_set(void *, struct noise_remote *);
356 struct noise_remote *
357 	wg_index_get(void *, uint32_t);
358 void	wg_index_drop(void *, uint32_t);
359 
360 struct mbuf *
361 	wg_input(void *, struct mbuf *, struct ip *, struct ip6_hdr *, void *,
362 	    int);
363 int	wg_output(struct ifnet *, struct mbuf *, struct sockaddr *,
364 	    struct rtentry *);
365 int	wg_ioctl_set(struct wg_softc *, struct wg_data_io *);
366 int	wg_ioctl_get(struct wg_softc *, struct wg_data_io *);
367 int	wg_ioctl(struct ifnet *, u_long, caddr_t);
368 int	wg_up(struct wg_softc *);
369 void	wg_down(struct wg_softc *);
370 
371 int	wg_clone_create(struct if_clone *, int);
372 int	wg_clone_destroy(struct ifnet *);
373 void	wgattach(int);
374 
375 uint64_t	peer_counter = 0;
376 struct pool	wg_aip_pool;
377 struct pool	wg_peer_pool;
378 struct pool	wg_ratelimit_pool;
379 struct timeval	underload_interval = { UNDERLOAD_TIMEOUT, 0 };
380 
381 size_t		 wg_counter = 0;
382 struct taskq	*wg_handshake_taskq;
383 struct taskq	*wg_crypt_taskq;
384 
385 struct if_clone	wg_cloner =
386     IF_CLONE_INITIALIZER("wg", wg_clone_create, wg_clone_destroy);
387 
388 struct wg_peer *
389 wg_peer_create(struct wg_softc *sc, uint8_t public[WG_KEY_SIZE])
390 {
391 	struct wg_peer	*peer;
392 	uint64_t	 idx;
393 
394 	rw_assert_wrlock(&sc->sc_lock);
395 
396 	if (sc->sc_peer_num >= MAX_PEERS_PER_IFACE)
397 		return NULL;
398 
399 	if ((peer = pool_get(&wg_peer_pool, PR_NOWAIT)) == NULL)
400 		return NULL;
401 
402 	peer->p_id = peer_counter++;
403 	peer->p_sc = sc;
404 
405 	noise_remote_init(&peer->p_remote, public, &sc->sc_local);
406 	cookie_maker_init(&peer->p_cookie, public);
407 	wg_timers_init(&peer->p_timers);
408 
409 	mtx_init(&peer->p_counters_mtx, IPL_NET);
410 	peer->p_counters_tx = 0;
411 	peer->p_counters_rx = 0;
412 
413 	strlcpy(peer->p_description, "", IFDESCRSIZE);
414 
415 	mtx_init(&peer->p_endpoint_mtx, IPL_NET);
416 	bzero(&peer->p_endpoint, sizeof(peer->p_endpoint));
417 
418 	task_set(&peer->p_send_initiation, wg_send_initiation, peer);
419 	task_set(&peer->p_send_keepalive, wg_send_keepalive, peer);
420 	task_set(&peer->p_clear_secrets, wg_peer_clear_secrets, peer);
421 	task_set(&peer->p_deliver_out, wg_deliver_out, peer);
422 	task_set(&peer->p_deliver_in, wg_deliver_in, peer);
423 
424 	mq_init(&peer->p_stage_queue, MAX_STAGED_PKT, IPL_NET);
425 	mtx_init(&peer->p_encap_queue.q_mtx, IPL_NET);
426 	ml_init(&peer->p_encap_queue.q_list);
427 	mtx_init(&peer->p_decap_queue.q_mtx, IPL_NET);
428 	ml_init(&peer->p_decap_queue.q_list);
429 
430 	SLIST_INIT(&peer->p_unused_index);
431 	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[0],
432 	    i_unused_entry);
433 	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[1],
434 	    i_unused_entry);
435 	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[2],
436 	    i_unused_entry);
437 
438 	LIST_INIT(&peer->p_aip);
439 
440 	peer->p_start_onlist = 0;
441 
442 	idx = SipHash24(&sc->sc_secret, public, WG_KEY_SIZE);
443 	idx &= sc->sc_peer_mask;
444 
445 	rw_enter_write(&sc->sc_peer_lock);
446 	LIST_INSERT_HEAD(&sc->sc_peer[idx], peer, p_pubkey_entry);
447 	TAILQ_INSERT_TAIL(&sc->sc_peer_seq, peer, p_seq_entry);
448 	sc->sc_peer_num++;
449 	rw_exit_write(&sc->sc_peer_lock);
450 
451 	DPRINTF(sc, "Peer %llu created\n", peer->p_id);
452 	return peer;
453 }
454 
455 struct wg_peer *
456 wg_peer_lookup(struct wg_softc *sc, const uint8_t public[WG_KEY_SIZE])
457 {
458 	uint8_t		 peer_key[WG_KEY_SIZE];
459 	struct wg_peer	*peer;
460 	uint64_t	 idx;
461 
462 	idx = SipHash24(&sc->sc_secret, public, WG_KEY_SIZE);
463 	idx &= sc->sc_peer_mask;
464 
465 	rw_enter_read(&sc->sc_peer_lock);
466 	LIST_FOREACH(peer, &sc->sc_peer[idx], p_pubkey_entry) {
467 		noise_remote_keys(&peer->p_remote, peer_key, NULL);
468 		if (timingsafe_bcmp(peer_key, public, WG_KEY_SIZE) == 0)
469 			goto done;
470 	}
471 	peer = NULL;
472 done:
473 	rw_exit_read(&sc->sc_peer_lock);
474 	return peer;
475 }
476 
477 void
478 wg_peer_destroy(struct wg_peer *peer)
479 {
480 	struct wg_softc	*sc = peer->p_sc;
481 	struct wg_aip *aip, *taip;
482 
483 	rw_assert_wrlock(&sc->sc_lock);
484 
485 	/*
486 	 * Remove peer from the pubkey hashtable and disable all timeouts.
487 	 * After this, and flushing wg_handshake_taskq, then no more handshakes
488 	 * can be started.
489 	 */
490 	rw_enter_write(&sc->sc_peer_lock);
491 	LIST_REMOVE(peer, p_pubkey_entry);
492 	TAILQ_REMOVE(&sc->sc_peer_seq, peer, p_seq_entry);
493 	sc->sc_peer_num--;
494 	rw_exit_write(&sc->sc_peer_lock);
495 
496 	wg_timers_disable(&peer->p_timers);
497 
498 	taskq_barrier(wg_handshake_taskq);
499 
500 	/*
501 	 * Now we drop all allowed ips, to drop all outgoing packets to the
502 	 * peer. Then drop all the indexes to drop all incoming packets to the
503 	 * peer. Then we can flush if_snd, wg_crypt_taskq and then nettq to
504 	 * ensure no more references to the peer exist.
505 	 */
506 	LIST_FOREACH_SAFE(aip, &peer->p_aip, a_entry, taip)
507 		wg_aip_remove(sc, peer, &aip->a_data);
508 
509 	noise_remote_clear(&peer->p_remote);
510 
511 	NET_LOCK();
512 	while (!ifq_empty(&sc->sc_if.if_snd)) {
513 		/*
514 		 * XXX: `if_snd' of stopped interface could still
515 		 * contain packets
516 		 */
517 		if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
518 			ifq_purge(&sc->sc_if.if_snd);
519 			continue;
520 		}
521 		NET_UNLOCK();
522 		tsleep_nsec(&nowake, PWAIT, "wg_ifq", 1000);
523 		NET_LOCK();
524 	}
525 	NET_UNLOCK();
526 
527 	taskq_barrier(wg_crypt_taskq);
528 	taskq_barrier(net_tq(sc->sc_if.if_index));
529 
530 	if (!mq_empty(&peer->p_stage_queue))
531 		mq_purge(&peer->p_stage_queue);
532 
533 	DPRINTF(sc, "Peer %llu destroyed\n", peer->p_id);
534 	explicit_bzero(peer, sizeof(*peer));
535 	pool_put(&wg_peer_pool, peer);
536 }
537 
538 void
539 wg_peer_set_endpoint_from_tag(struct wg_peer *peer, struct wg_tag *t)
540 {
541 	if (memcmp(&t->t_endpoint, &peer->p_endpoint,
542 	    sizeof(t->t_endpoint)) == 0)
543 		return;
544 
545 	mtx_enter(&peer->p_endpoint_mtx);
546 	peer->p_endpoint = t->t_endpoint;
547 	mtx_leave(&peer->p_endpoint_mtx);
548 }
549 
550 void
551 wg_peer_set_sockaddr(struct wg_peer *peer, struct sockaddr *remote)
552 {
553 	mtx_enter(&peer->p_endpoint_mtx);
554 	memcpy(&peer->p_endpoint.e_remote, remote,
555 	       sizeof(peer->p_endpoint.e_remote));
556 	bzero(&peer->p_endpoint.e_local, sizeof(peer->p_endpoint.e_local));
557 	mtx_leave(&peer->p_endpoint_mtx);
558 }
559 
560 int
561 wg_peer_get_sockaddr(struct wg_peer *peer, struct sockaddr *remote)
562 {
563 	int	ret = 0;
564 
565 	mtx_enter(&peer->p_endpoint_mtx);
566 	if (peer->p_endpoint.e_remote.r_sa.sa_family != AF_UNSPEC)
567 		memcpy(remote, &peer->p_endpoint.e_remote,
568 		       sizeof(peer->p_endpoint.e_remote));
569 	else
570 		ret = ENOENT;
571 	mtx_leave(&peer->p_endpoint_mtx);
572 	return ret;
573 }
574 
575 void
576 wg_peer_clear_src(struct wg_peer *peer)
577 {
578 	mtx_enter(&peer->p_endpoint_mtx);
579 	bzero(&peer->p_endpoint.e_local, sizeof(peer->p_endpoint.e_local));
580 	mtx_leave(&peer->p_endpoint_mtx);
581 }
582 
583 void
584 wg_peer_get_endpoint(struct wg_peer *peer, struct wg_endpoint *endpoint)
585 {
586 	mtx_enter(&peer->p_endpoint_mtx);
587 	memcpy(endpoint, &peer->p_endpoint, sizeof(*endpoint));
588 	mtx_leave(&peer->p_endpoint_mtx);
589 }
590 
591 void
592 wg_peer_counters_add(struct wg_peer *peer, uint64_t tx, uint64_t rx)
593 {
594 	mtx_enter(&peer->p_counters_mtx);
595 	peer->p_counters_tx += tx;
596 	peer->p_counters_rx += rx;
597 	mtx_leave(&peer->p_counters_mtx);
598 }
599 
600 int
601 wg_aip_add(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
602 {
603 	struct art_root	*root;
604 	struct art_node	*node;
605 	struct wg_aip	*aip;
606 	int		 ret = 0;
607 
608 	switch (d->a_af) {
609 	case AF_INET:	root = sc->sc_aip4; break;
610 #ifdef INET6
611 	case AF_INET6:	root = sc->sc_aip6; break;
612 #endif
613 	default: return EAFNOSUPPORT;
614 	}
615 
616 	if ((aip = pool_get(&wg_aip_pool, PR_NOWAIT|PR_ZERO)) == NULL)
617 		return ENOBUFS;
618 
619 	rw_enter_write(&root->ar_lock);
620 	node = art_insert(root, &aip->a_node, &d->a_addr, d->a_cidr);
621 
622 	if (node == &aip->a_node) {
623 		aip->a_peer = peer;
624 		aip->a_data = *d;
625 		LIST_INSERT_HEAD(&peer->p_aip, aip, a_entry);
626 		sc->sc_aip_num++;
627 	} else {
628 		pool_put(&wg_aip_pool, aip);
629 		aip = (struct wg_aip *) node;
630 		if (aip->a_peer != peer) {
631 			LIST_REMOVE(aip, a_entry);
632 			LIST_INSERT_HEAD(&peer->p_aip, aip, a_entry);
633 			aip->a_peer = peer;
634 		}
635 	}
636 	rw_exit_write(&root->ar_lock);
637 	return ret;
638 }
639 
640 struct wg_peer *
641 wg_aip_lookup(struct art_root *root, void *addr)
642 {
643 	struct srp_ref	 sr;
644 	struct art_node	*node;
645 
646 	node = art_match(root, addr, &sr);
647 	srp_leave(&sr);
648 
649 	return node == NULL ? NULL : ((struct wg_aip *) node)->a_peer;
650 }
651 
652 int
653 wg_aip_remove(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
654 {
655 	struct srp_ref	 sr;
656 	struct art_root	*root;
657 	struct art_node	*node;
658 	struct wg_aip	*aip;
659 	int		 ret = 0;
660 
661 	switch (d->a_af) {
662 	case AF_INET:	root = sc->sc_aip4; break;
663 #ifdef INET6
664 	case AF_INET6:	root = sc->sc_aip6; break;
665 #endif
666 	default: return EAFNOSUPPORT;
667 	}
668 
669 	rw_enter_write(&root->ar_lock);
670 	if ((node = art_lookup(root, &d->a_addr, d->a_cidr, &sr)) == NULL) {
671 		ret = ENOENT;
672 	} else if (((struct wg_aip *) node)->a_peer != peer) {
673 		ret = EXDEV;
674 	} else {
675 		aip = (struct wg_aip *)node;
676 		if (art_delete(root, node, &d->a_addr, d->a_cidr) == NULL)
677 			panic("art_delete failed to delete node %p", node);
678 
679 		sc->sc_aip_num--;
680 		LIST_REMOVE(aip, a_entry);
681 		pool_put(&wg_aip_pool, aip);
682 	}
683 
684 	srp_leave(&sr);
685 	rw_exit_write(&root->ar_lock);
686 	return ret;
687 }
688 
689 int
690 wg_socket_open(struct socket **so, int af, in_port_t *port,
691     int *rtable, void *upcall_arg)
692 {
693 	struct mbuf		 mhostnam, mrtable;
694 #ifdef INET6
695 	struct sockaddr_in6	*sin6;
696 #endif
697 	struct sockaddr_in	*sin;
698 	int			 ret;
699 
700 	m_inithdr(&mhostnam);
701 	m_inithdr(&mrtable);
702 
703 	bzero(mtod(&mrtable, u_int *), sizeof(u_int));
704 	*mtod(&mrtable, u_int *) = *rtable;
705 	mrtable.m_len = sizeof(u_int);
706 
707 	if (af == AF_INET) {
708 		sin = mtod(&mhostnam, struct sockaddr_in *);
709 		bzero(sin, sizeof(*sin));
710 		sin->sin_len = sizeof(*sin);
711 		sin->sin_family = AF_INET;
712 		sin->sin_port = *port;
713 		sin->sin_addr.s_addr = INADDR_ANY;
714 		mhostnam.m_len = sin->sin_len;
715 #ifdef INET6
716 	} else if (af == AF_INET6) {
717 		sin6 = mtod(&mhostnam, struct sockaddr_in6 *);
718 		bzero(sin6, sizeof(*sin6));
719 		sin6->sin6_len = sizeof(*sin6);
720 		sin6->sin6_family = AF_INET6;
721 		sin6->sin6_port = *port;
722 		sin6->sin6_addr = (struct in6_addr) { .s6_addr = { 0 } };
723 		mhostnam.m_len = sin6->sin6_len;
724 #endif
725 	} else {
726 		return EAFNOSUPPORT;
727 	}
728 
729 	if ((ret = socreate(af, so, SOCK_DGRAM, 0)) != 0)
730 		return ret;
731 
732 	solock(*so);
733 	sotoinpcb(*so)->inp_upcall = wg_input;
734 	sotoinpcb(*so)->inp_upcall_arg = upcall_arg;
735 	sounlock(*so);
736 
737 	if ((ret = sosetopt(*so, SOL_SOCKET, SO_RTABLE, &mrtable)) == 0) {
738 		solock(*so);
739 		if ((ret = sobind(*so, &mhostnam, curproc)) == 0) {
740 			*port = sotoinpcb(*so)->inp_lport;
741 			*rtable = sotoinpcb(*so)->inp_rtableid;
742 		}
743 		sounlock(*so);
744 	}
745 
746 	if (ret != 0)
747 		wg_socket_close(so);
748 
749 	return ret;
750 }
751 
752 void
753 wg_socket_close(struct socket **so)
754 {
755 	if (*so != NULL && soclose(*so, 0) != 0)
756 		panic("Unable to close wg socket");
757 	*so = NULL;
758 }
759 
760 int
761 wg_bind(struct wg_softc *sc, in_port_t *portp, int *rtablep)
762 {
763 	int		 ret = 0, rtable = *rtablep;
764 	in_port_t	 port = *portp;
765 	struct socket	*so4;
766 #ifdef INET6
767 	struct socket	*so6;
768 	int		 retries = 0;
769 retry:
770 #endif
771 	if ((ret = wg_socket_open(&so4, AF_INET, &port, &rtable, sc)) != 0)
772 		return ret;
773 
774 #ifdef INET6
775 	if ((ret = wg_socket_open(&so6, AF_INET6, &port, &rtable, sc)) != 0) {
776 		if (ret == EADDRINUSE && *portp == 0 && retries++ < 100)
777 			goto retry;
778 		wg_socket_close(&so4);
779 		return ret;
780 	}
781 #endif
782 
783 	rw_enter_write(&sc->sc_so_lock);
784 	wg_socket_close(&sc->sc_so4);
785 	sc->sc_so4 = so4;
786 #ifdef INET6
787 	wg_socket_close(&sc->sc_so6);
788 	sc->sc_so6 = so6;
789 #endif
790 	rw_exit_write(&sc->sc_so_lock);
791 
792 	*portp = port;
793 	*rtablep = rtable;
794 	return 0;
795 }
796 
797 void
798 wg_unbind(struct wg_softc *sc)
799 {
800 	rw_enter_write(&sc->sc_so_lock);
801 	wg_socket_close(&sc->sc_so4);
802 #ifdef INET6
803 	wg_socket_close(&sc->sc_so6);
804 #endif
805 	rw_exit_write(&sc->sc_so_lock);
806 }
807 
808 int
809 wg_send(struct wg_softc *sc, struct wg_endpoint *e, struct mbuf *m)
810 {
811 	struct mbuf	 peernam, *control = NULL;
812 	int		 ret;
813 
814 	/* Get local control address before locking */
815 	if (e->e_remote.r_sa.sa_family == AF_INET) {
816 		if (e->e_local.l_in.s_addr != INADDR_ANY)
817 			control = sbcreatecontrol(&e->e_local.l_in,
818 			    sizeof(struct in_addr), IP_SENDSRCADDR,
819 			    IPPROTO_IP);
820 #ifdef INET6
821 	} else if (e->e_remote.r_sa.sa_family == AF_INET6) {
822 		if (!IN6_IS_ADDR_UNSPECIFIED(&e->e_local.l_in6))
823 			control = sbcreatecontrol(&e->e_local.l_pktinfo6,
824 			    sizeof(struct in6_pktinfo), IPV6_PKTINFO,
825 			    IPPROTO_IPV6);
826 #endif
827 	} else {
828 		m_freem(m);
829 		return EAFNOSUPPORT;
830 	}
831 
832 	/* Get remote address */
833 	peernam.m_type = MT_SONAME;
834 	peernam.m_next = NULL;
835 	peernam.m_nextpkt = NULL;
836 	peernam.m_data = (void *)&e->e_remote.r_sa;
837 	peernam.m_len = e->e_remote.r_sa.sa_len;
838 	peernam.m_flags = 0;
839 
840 	rw_enter_read(&sc->sc_so_lock);
841 	if (e->e_remote.r_sa.sa_family == AF_INET && sc->sc_so4 != NULL)
842 		ret = sosend(sc->sc_so4, &peernam, NULL, m, control, 0);
843 #ifdef INET6
844 	else if (e->e_remote.r_sa.sa_family == AF_INET6 && sc->sc_so6 != NULL)
845 		ret = sosend(sc->sc_so6, &peernam, NULL, m, control, 0);
846 #endif
847 	else {
848 		ret = ENOTCONN;
849 		m_freem(control);
850 		m_freem(m);
851 	}
852 	rw_exit_read(&sc->sc_so_lock);
853 
854 	return ret;
855 }
856 
857 void
858 wg_send_buf(struct wg_softc *sc, struct wg_endpoint *e, uint8_t *buf,
859     size_t len)
860 {
861 	struct mbuf	*m;
862 	int		 ret = 0;
863 	size_t		 mlen = len + max_hdr;
864 
865 retry:
866 	m = m_gethdr(M_WAIT, MT_DATA);
867 	if (mlen > MHLEN)
868 		MCLGETL(m, M_WAIT, mlen);
869 	m_align(m, len);
870 	m->m_pkthdr.len = m->m_len = len;
871 	memcpy(mtod(m, void *), buf, len);
872 
873 	/* As we're sending a handshake packet here, we want high priority */
874 	m->m_pkthdr.pf.prio = IFQ_MAXPRIO;
875 
876 	if (ret == 0) {
877 		ret = wg_send(sc, e, m);
878 		/* Retry if we couldn't bind to e->e_local */
879 		if (ret == EADDRNOTAVAIL) {
880 			bzero(&e->e_local, sizeof(e->e_local));
881 			goto retry;
882 		}
883 	} else {
884 		ret = wg_send(sc, e, m);
885 		if (ret != 0)
886 			DPRINTF(sc, "Unable to send packet\n");
887 	}
888 }
889 
890 struct wg_tag *
891 wg_tag_get(struct mbuf *m)
892 {
893 	struct m_tag	*mtag;
894 
895 	if ((mtag = m_tag_find(m, PACKET_TAG_WIREGUARD, NULL)) == NULL) {
896 		mtag = m_tag_get(PACKET_TAG_WIREGUARD, sizeof(struct wg_tag),
897 		    M_NOWAIT);
898 		if (mtag == NULL)
899 			return (NULL);
900 		bzero(mtag + 1, sizeof(struct wg_tag));
901 		m_tag_prepend(m, mtag);
902 	}
903 	return ((struct wg_tag *)(mtag + 1));
904 }
905 
906 /*
907  * The following section handles the timeout callbacks for a WireGuard session.
908  * These functions provide an "event based" model for controlling wg(8) session
909  * timers. All function calls occur after the specified event below.
910  *
911  * wg_timers_event_data_sent:
912  *	tx: data
913  * wg_timers_event_data_received:
914  *	rx: data
915  * wg_timers_event_any_authenticated_packet_sent:
916  *	tx: keepalive, data, handshake
917  * wg_timers_event_any_authenticated_packet_received:
918  *	rx: keepalive, data, handshake
919  * wg_timers_event_any_authenticated_packet_traversal:
920  *	tx, rx: keepalive, data, handshake
921  * wg_timers_event_handshake_initiated:
922  *	tx: initiation
923  * wg_timers_event_handshake_responded:
924  *	tx: response
925  * wg_timers_event_handshake_complete:
926  *	rx: response, confirmation data
927  * wg_timers_event_session_derived:
928  *	tx: response, rx: response
929  * wg_timers_event_want_initiation:
930  *	tx: data failed, old keys expiring
931  * wg_timers_event_reset_handshake_last_sent:
932  * 	anytime we may immediately want a new handshake
933  */
934 void
935 wg_timers_init(struct wg_timers *t)
936 {
937 	bzero(t, sizeof(*t));
938 	mtx_init_flags(&t->t_mtx, IPL_NET, "wg_timers", 0);
939 	mtx_init(&t->t_handshake_mtx, IPL_NET);
940 
941 	timeout_set(&t->t_new_handshake, wg_timers_run_new_handshake, t);
942 	timeout_set(&t->t_send_keepalive, wg_timers_run_send_keepalive, t);
943 	timeout_set(&t->t_retry_handshake, wg_timers_run_retry_handshake, t);
944 	timeout_set(&t->t_persistent_keepalive,
945 	    wg_timers_run_persistent_keepalive, t);
946 	timeout_set(&t->t_zero_key_material,
947 	    wg_timers_run_zero_key_material, t);
948 }
949 
950 void
951 wg_timers_enable(struct wg_timers *t)
952 {
953 	mtx_enter(&t->t_mtx);
954 	t->t_disabled = 0;
955 	mtx_leave(&t->t_mtx);
956 	wg_timers_run_persistent_keepalive(t);
957 }
958 
959 void
960 wg_timers_disable(struct wg_timers *t)
961 {
962 	mtx_enter(&t->t_mtx);
963 	t->t_disabled = 1;
964 	t->t_need_another_keepalive = 0;
965 	mtx_leave(&t->t_mtx);
966 
967 	timeout_del_barrier(&t->t_new_handshake);
968 	timeout_del_barrier(&t->t_send_keepalive);
969 	timeout_del_barrier(&t->t_retry_handshake);
970 	timeout_del_barrier(&t->t_persistent_keepalive);
971 	timeout_del_barrier(&t->t_zero_key_material);
972 }
973 
974 void
975 wg_timers_set_persistent_keepalive(struct wg_timers *t, uint16_t interval)
976 {
977 	mtx_enter(&t->t_mtx);
978 	if (!t->t_disabled) {
979 		t->t_persistent_keepalive_interval = interval;
980 		wg_timers_run_persistent_keepalive(t);
981 	}
982 	mtx_leave(&t->t_mtx);
983 }
984 
985 int
986 wg_timers_get_persistent_keepalive(struct wg_timers *t, uint16_t *interval)
987 {
988 	*interval = t->t_persistent_keepalive_interval;
989 	return *interval > 0 ? 0 : ENOENT;
990 }
991 
992 void
993 wg_timers_get_last_handshake(struct wg_timers *t, struct timespec *time)
994 {
995 	mtx_enter(&t->t_handshake_mtx);
996 	*time = t->t_handshake_complete;
997 	mtx_leave(&t->t_handshake_mtx);
998 }
999 
1000 int
1001 wg_timers_expired_handshake_last_sent(struct wg_timers *t)
1002 {
1003 	struct timespec uptime;
1004 	struct timespec expire = { .tv_sec = REKEY_TIMEOUT, .tv_nsec = 0 };
1005 
1006 	getnanouptime(&uptime);
1007 	timespecadd(&t->t_handshake_last_sent, &expire, &expire);
1008 	return timespeccmp(&uptime, &expire, >) ? ETIMEDOUT : 0;
1009 }
1010 
1011 int
1012 wg_timers_check_handshake_last_sent(struct wg_timers *t)
1013 {
1014 	int ret;
1015 	mtx_enter(&t->t_handshake_mtx);
1016 	if ((ret = wg_timers_expired_handshake_last_sent(t)) == ETIMEDOUT)
1017 		getnanouptime(&t->t_handshake_last_sent);
1018 	mtx_leave(&t->t_handshake_mtx);
1019 	return ret;
1020 }
1021 
1022 void
1023 wg_timers_event_data_sent(struct wg_timers *t)
1024 {
1025 	int	msecs = NEW_HANDSHAKE_TIMEOUT * 1000;
1026 	msecs += arc4random_uniform(REKEY_TIMEOUT_JITTER);
1027 
1028 	mtx_enter(&t->t_mtx);
1029 	if (!t->t_disabled && !timeout_pending(&t->t_new_handshake))
1030 		timeout_add_msec(&t->t_new_handshake, msecs);
1031 	mtx_leave(&t->t_mtx);
1032 }
1033 
1034 void
1035 wg_timers_event_data_received(struct wg_timers *t)
1036 {
1037 	mtx_enter(&t->t_mtx);
1038 	if (!t->t_disabled) {
1039 		if (!timeout_pending(&t->t_send_keepalive))
1040 			timeout_add_sec(&t->t_send_keepalive,
1041 			    KEEPALIVE_TIMEOUT);
1042 		else
1043 			t->t_need_another_keepalive = 1;
1044 	}
1045 	mtx_leave(&t->t_mtx);
1046 }
1047 
1048 void
1049 wg_timers_event_any_authenticated_packet_sent(struct wg_timers *t)
1050 {
1051 	timeout_del(&t->t_send_keepalive);
1052 }
1053 
1054 void
1055 wg_timers_event_any_authenticated_packet_received(struct wg_timers *t)
1056 {
1057 	timeout_del(&t->t_new_handshake);
1058 }
1059 
1060 void
1061 wg_timers_event_any_authenticated_packet_traversal(struct wg_timers *t)
1062 {
1063 	mtx_enter(&t->t_mtx);
1064 	if (!t->t_disabled && t->t_persistent_keepalive_interval > 0)
1065 		timeout_add_sec(&t->t_persistent_keepalive,
1066 		    t->t_persistent_keepalive_interval);
1067 	mtx_leave(&t->t_mtx);
1068 }
1069 
1070 void
1071 wg_timers_event_handshake_initiated(struct wg_timers *t)
1072 {
1073 	int	msecs = REKEY_TIMEOUT * 1000;
1074 	msecs += arc4random_uniform(REKEY_TIMEOUT_JITTER);
1075 
1076 	mtx_enter(&t->t_mtx);
1077 	if (!t->t_disabled)
1078 		timeout_add_msec(&t->t_retry_handshake, msecs);
1079 	mtx_leave(&t->t_mtx);
1080 }
1081 
1082 void
1083 wg_timers_event_handshake_responded(struct wg_timers *t)
1084 {
1085 	mtx_enter(&t->t_handshake_mtx);
1086 	getnanouptime(&t->t_handshake_last_sent);
1087 	mtx_leave(&t->t_handshake_mtx);
1088 }
1089 
1090 void
1091 wg_timers_event_handshake_complete(struct wg_timers *t)
1092 {
1093 	mtx_enter(&t->t_mtx);
1094 	if (!t->t_disabled) {
1095 		mtx_enter(&t->t_handshake_mtx);
1096 		timeout_del(&t->t_retry_handshake);
1097 		t->t_handshake_retries = 0;
1098 		getnanotime(&t->t_handshake_complete);
1099 		mtx_leave(&t->t_handshake_mtx);
1100 		wg_timers_run_send_keepalive(t);
1101 	}
1102 	mtx_leave(&t->t_mtx);
1103 }
1104 
1105 void
1106 wg_timers_event_session_derived(struct wg_timers *t)
1107 {
1108 	mtx_enter(&t->t_mtx);
1109 	if (!t->t_disabled)
1110 		timeout_add_sec(&t->t_zero_key_material, REJECT_AFTER_TIME * 3);
1111 	mtx_leave(&t->t_mtx);
1112 }
1113 
1114 void
1115 wg_timers_event_want_initiation(struct wg_timers *t)
1116 {
1117 	mtx_enter(&t->t_mtx);
1118 	if (!t->t_disabled)
1119 		wg_timers_run_send_initiation(t, 0);
1120 	mtx_leave(&t->t_mtx);
1121 }
1122 
1123 void
1124 wg_timers_event_reset_handshake_last_sent(struct wg_timers *t)
1125 {
1126 	mtx_enter(&t->t_handshake_mtx);
1127 	t->t_handshake_last_sent.tv_sec -= (REKEY_TIMEOUT + 1);
1128 	mtx_leave(&t->t_handshake_mtx);
1129 }
1130 
1131 void
1132 wg_timers_run_send_initiation(void *_t, int is_retry)
1133 {
1134 	struct wg_timers *t = _t;
1135 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1136 	if (!is_retry)
1137 		t->t_handshake_retries = 0;
1138 	if (wg_timers_expired_handshake_last_sent(t) == ETIMEDOUT)
1139 		task_add(wg_handshake_taskq, &peer->p_send_initiation);
1140 }
1141 
1142 void
1143 wg_timers_run_retry_handshake(void *_t)
1144 {
1145 	struct wg_timers *t = _t;
1146 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1147 
1148 	mtx_enter(&t->t_handshake_mtx);
1149 	if (t->t_handshake_retries <= MAX_TIMER_HANDSHAKES) {
1150 		t->t_handshake_retries++;
1151 		mtx_leave(&t->t_handshake_mtx);
1152 
1153 		DPRINTF(peer->p_sc, "Handshake for peer %llu did not complete "
1154 		    "after %d seconds, retrying (try %d)\n", peer->p_id,
1155 		    REKEY_TIMEOUT, t->t_handshake_retries + 1);
1156 		wg_peer_clear_src(peer);
1157 		wg_timers_run_send_initiation(t, 1);
1158 	} else {
1159 		mtx_leave(&t->t_handshake_mtx);
1160 
1161 		DPRINTF(peer->p_sc, "Handshake for peer %llu did not complete "
1162 		    "after %d retries, giving up\n", peer->p_id,
1163 		    MAX_TIMER_HANDSHAKES + 2);
1164 
1165 		timeout_del(&t->t_send_keepalive);
1166 		mq_purge(&peer->p_stage_queue);
1167 		if (!timeout_pending(&t->t_zero_key_material))
1168 			timeout_add_sec(&t->t_zero_key_material,
1169 			    REJECT_AFTER_TIME * 3);
1170 	}
1171 }
1172 
1173 void
1174 wg_timers_run_send_keepalive(void *_t)
1175 {
1176 	struct wg_timers *t = _t;
1177 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1178 
1179 	task_add(wg_crypt_taskq, &peer->p_send_keepalive);
1180 	if (t->t_need_another_keepalive) {
1181 		t->t_need_another_keepalive = 0;
1182 		timeout_add_sec(&t->t_send_keepalive, KEEPALIVE_TIMEOUT);
1183 	}
1184 }
1185 
1186 void
1187 wg_timers_run_new_handshake(void *_t)
1188 {
1189 	struct wg_timers *t = _t;
1190 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1191 
1192 	DPRINTF(peer->p_sc, "Retrying handshake with peer %llu because we "
1193 	    "stopped hearing back after %d seconds\n",
1194 	    peer->p_id, NEW_HANDSHAKE_TIMEOUT);
1195 	wg_peer_clear_src(peer);
1196 
1197 	wg_timers_run_send_initiation(t, 0);
1198 }
1199 
1200 void
1201 wg_timers_run_zero_key_material(void *_t)
1202 {
1203 	struct wg_timers *t = _t;
1204 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1205 
1206 	DPRINTF(peer->p_sc, "Zeroing out keys for peer %llu\n", peer->p_id);
1207 	task_add(wg_handshake_taskq, &peer->p_clear_secrets);
1208 }
1209 
1210 void
1211 wg_timers_run_persistent_keepalive(void *_t)
1212 {
1213 	struct wg_timers *t = _t;
1214 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1215 	if (t->t_persistent_keepalive_interval != 0)
1216 		task_add(wg_crypt_taskq, &peer->p_send_keepalive);
1217 }
1218 
1219 /* The following functions handle handshakes */
1220 void
1221 wg_peer_send_buf(struct wg_peer *peer, uint8_t *buf, size_t len)
1222 {
1223 	struct wg_endpoint	 endpoint;
1224 
1225 	wg_peer_counters_add(peer, len, 0);
1226 	wg_timers_event_any_authenticated_packet_traversal(&peer->p_timers);
1227 	wg_timers_event_any_authenticated_packet_sent(&peer->p_timers);
1228 	wg_peer_get_endpoint(peer, &endpoint);
1229 	wg_send_buf(peer->p_sc, &endpoint, buf, len);
1230 }
1231 
1232 void
1233 wg_send_initiation(void *_peer)
1234 {
1235 	struct wg_peer			*peer = _peer;
1236 	struct wg_pkt_initiation	 pkt;
1237 
1238 	if (wg_timers_check_handshake_last_sent(&peer->p_timers) != ETIMEDOUT)
1239 		return;
1240 
1241 	DPRINTF(peer->p_sc, "Sending handshake initiation to peer %llu\n",
1242 	    peer->p_id);
1243 
1244 	if (noise_create_initiation(&peer->p_remote, &pkt.s_idx, pkt.ue, pkt.es,
1245 				    pkt.ets) != 0)
1246 		return;
1247 	pkt.t = WG_PKT_INITIATION;
1248 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1249 	    sizeof(pkt)-sizeof(pkt.m));
1250 	wg_peer_send_buf(peer, (uint8_t *)&pkt, sizeof(pkt));
1251 	wg_timers_event_handshake_initiated(&peer->p_timers);
1252 }
1253 
1254 void
1255 wg_send_response(struct wg_peer *peer)
1256 {
1257 	struct wg_pkt_response	 pkt;
1258 
1259 	DPRINTF(peer->p_sc, "Sending handshake response to peer %llu\n",
1260 	    peer->p_id);
1261 
1262 	if (noise_create_response(&peer->p_remote, &pkt.s_idx, &pkt.r_idx,
1263 				  pkt.ue, pkt.en) != 0)
1264 		return;
1265 	if (noise_remote_begin_session(&peer->p_remote) != 0)
1266 		return;
1267 	wg_timers_event_session_derived(&peer->p_timers);
1268 	pkt.t = WG_PKT_RESPONSE;
1269 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1270 	    sizeof(pkt)-sizeof(pkt.m));
1271 	wg_timers_event_handshake_responded(&peer->p_timers);
1272 	wg_peer_send_buf(peer, (uint8_t *)&pkt, sizeof(pkt));
1273 }
1274 
1275 void
1276 wg_send_cookie(struct wg_softc *sc, struct cookie_macs *cm, uint32_t idx,
1277     struct wg_endpoint *e)
1278 {
1279 	struct wg_pkt_cookie	pkt;
1280 
1281 	DPRINTF(sc, "Sending cookie response for denied handshake message\n");
1282 
1283 	pkt.t = WG_PKT_COOKIE;
1284 	pkt.r_idx = idx;
1285 
1286 	cookie_checker_create_payload(&sc->sc_cookie, cm, pkt.nonce,
1287 	    pkt.ec, &e->e_remote.r_sa);
1288 
1289 	wg_send_buf(sc, e, (uint8_t *)&pkt, sizeof(pkt));
1290 }
1291 
1292 void
1293 wg_send_keepalive(void *_peer)
1294 {
1295 	struct wg_peer	*peer = _peer;
1296 	struct wg_softc	*sc = peer->p_sc;
1297 	struct wg_tag	*t;
1298 	struct mbuf	*m;
1299 
1300 	if (!mq_empty(&peer->p_stage_queue))
1301 		goto send;
1302 
1303 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1304 		return;
1305 
1306 	if ((t = wg_tag_get(m)) == NULL) {
1307 		m_freem(m);
1308 		return;
1309 	}
1310 
1311 	t->t_peer = peer;
1312 	t->t_mbuf = NULL;
1313 	t->t_done = 0;
1314 	t->t_mtu = 0; /* MTU == 0 OK for keepalive */
1315 
1316 	mq_push(&peer->p_stage_queue, m);
1317 send:
1318 	if (noise_remote_ready(&peer->p_remote) == 0) {
1319 		wg_queue_out(sc, peer);
1320 		task_add(wg_crypt_taskq, &sc->sc_encap);
1321 	} else {
1322 		wg_timers_event_want_initiation(&peer->p_timers);
1323 	}
1324 }
1325 
1326 void
1327 wg_peer_clear_secrets(void *_peer)
1328 {
1329 	struct wg_peer *peer = _peer;
1330 	noise_remote_clear(&peer->p_remote);
1331 }
1332 
1333 void
1334 wg_handshake(struct wg_softc *sc, struct mbuf *m)
1335 {
1336 	struct wg_tag			*t;
1337 	struct wg_pkt_initiation	*init;
1338 	struct wg_pkt_response		*resp;
1339 	struct wg_pkt_cookie		*cook;
1340 	struct wg_peer			*peer;
1341 	struct noise_remote		*remote;
1342 	int				 res, underload = 0;
1343 	static struct timeval		 wg_last_underload; /* microuptime */
1344 
1345 	if (mq_len(&sc->sc_handshake_queue) >= MAX_QUEUED_HANDSHAKES/8) {
1346 		getmicrouptime(&wg_last_underload);
1347 		underload = 1;
1348 	} else if (wg_last_underload.tv_sec != 0) {
1349 		if (!ratecheck(&wg_last_underload, &underload_interval))
1350 			underload = 1;
1351 		else
1352 			bzero(&wg_last_underload, sizeof(wg_last_underload));
1353 	}
1354 
1355 	t = wg_tag_get(m);
1356 
1357 	switch (*mtod(m, uint32_t *)) {
1358 	case WG_PKT_INITIATION:
1359 		init = mtod(m, struct wg_pkt_initiation *);
1360 
1361 		res = cookie_checker_validate_macs(&sc->sc_cookie, &init->m,
1362 				init, sizeof(*init) - sizeof(init->m),
1363 				underload, &t->t_endpoint.e_remote.r_sa);
1364 
1365 		if (res == EINVAL) {
1366 			DPRINTF(sc, "Invalid initiation MAC\n");
1367 			goto error;
1368 		} else if (res == ECONNREFUSED) {
1369 			DPRINTF(sc, "Handshake ratelimited\n");
1370 			goto error;
1371 		} else if (res == EAGAIN) {
1372 			wg_send_cookie(sc, &init->m, init->s_idx,
1373 			    &t->t_endpoint);
1374 			goto error;
1375 		} else if (res != 0) {
1376 			panic("unexpected response: %d", res);
1377 		}
1378 
1379 		if (noise_consume_initiation(&sc->sc_local, &remote,
1380 		    init->s_idx, init->ue, init->es, init->ets) != 0) {
1381 			DPRINTF(sc, "Invalid handshake initiation\n");
1382 			goto error;
1383 		}
1384 
1385 		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1386 
1387 		DPRINTF(sc, "Receiving handshake initiation from peer %llu\n",
1388 		    peer->p_id);
1389 
1390 		wg_peer_counters_add(peer, 0, sizeof(*init));
1391 		wg_peer_set_endpoint_from_tag(peer, t);
1392 		wg_send_response(peer);
1393 		break;
1394 	case WG_PKT_RESPONSE:
1395 		resp = mtod(m, struct wg_pkt_response *);
1396 
1397 		res = cookie_checker_validate_macs(&sc->sc_cookie, &resp->m,
1398 				resp, sizeof(*resp) - sizeof(resp->m),
1399 				underload, &t->t_endpoint.e_remote.r_sa);
1400 
1401 		if (res == EINVAL) {
1402 			DPRINTF(sc, "Invalid response MAC\n");
1403 			goto error;
1404 		} else if (res == ECONNREFUSED) {
1405 			DPRINTF(sc, "Handshake ratelimited\n");
1406 			goto error;
1407 		} else if (res == EAGAIN) {
1408 			wg_send_cookie(sc, &resp->m, resp->s_idx,
1409 			    &t->t_endpoint);
1410 			goto error;
1411 		} else if (res != 0) {
1412 			panic("unexpected response: %d", res);
1413 		}
1414 
1415 		if ((remote = wg_index_get(sc, resp->r_idx)) == NULL) {
1416 			DPRINTF(sc, "Unknown handshake response\n");
1417 			goto error;
1418 		}
1419 
1420 		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1421 
1422 		if (noise_consume_response(remote, resp->s_idx, resp->r_idx,
1423 					   resp->ue, resp->en) != 0) {
1424 			DPRINTF(sc, "Invalid handshake response\n");
1425 			goto error;
1426 		}
1427 
1428 		DPRINTF(sc, "Receiving handshake response from peer %llu\n",
1429 				peer->p_id);
1430 
1431 		wg_peer_counters_add(peer, 0, sizeof(*resp));
1432 		wg_peer_set_endpoint_from_tag(peer, t);
1433 		if (noise_remote_begin_session(&peer->p_remote) == 0) {
1434 			wg_timers_event_session_derived(&peer->p_timers);
1435 			wg_timers_event_handshake_complete(&peer->p_timers);
1436 		}
1437 		break;
1438 	case WG_PKT_COOKIE:
1439 		cook = mtod(m, struct wg_pkt_cookie *);
1440 
1441 		if ((remote = wg_index_get(sc, cook->r_idx)) == NULL) {
1442 			DPRINTF(sc, "Unknown cookie index\n");
1443 			goto error;
1444 		}
1445 
1446 		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1447 
1448 		if (cookie_maker_consume_payload(&peer->p_cookie,
1449 		    cook->nonce, cook->ec) != 0) {
1450 			DPRINTF(sc, "Could not decrypt cookie response\n");
1451 			goto error;
1452 		}
1453 
1454 		DPRINTF(sc, "Receiving cookie response\n");
1455 		goto error;
1456 	default:
1457 		panic("invalid packet in handshake queue");
1458 	}
1459 
1460 	wg_timers_event_any_authenticated_packet_received(&peer->p_timers);
1461 	wg_timers_event_any_authenticated_packet_traversal(&peer->p_timers);
1462 error:
1463 	m_freem(m);
1464 }
1465 
1466 void
1467 wg_handshake_worker(void *_sc)
1468 {
1469 	struct mbuf *m;
1470 	struct wg_softc *sc = _sc;
1471 	while ((m = mq_dequeue(&sc->sc_handshake_queue)) != NULL)
1472 		wg_handshake(sc, m);
1473 }
1474 
1475 /*
1476  * The following functions handle encapsulation (encryption) and
1477  * decapsulation (decryption). The wg_{en,de}cap functions will run in the
1478  * sc_crypt_taskq, while wg_deliver_{in,out} must be serialised and will run
1479  * in nettq.
1480  *
1481  * The packets are tracked in two queues, a serial queue and a parallel queue.
1482  *  - The parallel queue is used to distribute the encryption across multiple
1483  *    threads.
1484  *  - The serial queue ensures that packets are not reordered and are
1485  *    delivered in sequence.
1486  * The wg_tag attached to the packet contains two flags to help the two queues
1487  * interact.
1488  *  - t_done: The parallel queue has finished with the packet, now the serial
1489  *            queue can do it's work.
1490  *  - t_mbuf: Used to store the *crypted packet. in the case of encryption,
1491  *            this is a newly allocated packet, and in the case of decryption,
1492  *            it is a pointer to the same packet, that has been decrypted and
1493  *            truncated. If t_mbuf is NULL, then *cryption failed and this
1494  *            packet should not be passed.
1495  * wg_{en,de}cap work on the parallel queue, while wg_deliver_{in,out} work
1496  * on the serial queue.
1497  */
1498 void
1499 wg_encap(struct wg_softc *sc, struct mbuf *m)
1500 {
1501 	int res = 0;
1502 	struct wg_pkt_data	*data;
1503 	struct wg_peer		*peer;
1504 	struct wg_tag		*t;
1505 	struct mbuf		*mc;
1506 	size_t			 padding_len, plaintext_len, out_len;
1507 	uint64_t		 nonce;
1508 
1509 	t = wg_tag_get(m);
1510 	peer = t->t_peer;
1511 
1512 	plaintext_len = WG_PKT_WITH_PADDING(m->m_pkthdr.len);
1513 	padding_len = plaintext_len - m->m_pkthdr.len;
1514 	out_len = sizeof(struct wg_pkt_data) + plaintext_len +
1515 	    NOISE_AUTHTAG_LEN;
1516 
1517 	/*
1518 	 * For the time being we allocate a new packet with sufficient size to
1519 	 * hold the encrypted data and headers. It would be difficult to
1520 	 * overcome as p_encap_queue (mbuf_list) holds a reference to the mbuf.
1521 	 * If we m_makespace or similar, we risk corrupting that list.
1522 	 * Additionally, we only pass a buf and buf length to
1523 	 * noise_remote_encrypt. Technically it would be possible to teach
1524 	 * noise_remote_encrypt about mbufs, but we would need to sort out the
1525 	 * p_encap_queue situation first.
1526 	 */
1527 	if ((mc = m_clget(NULL, M_NOWAIT, out_len + max_hdr)) == NULL)
1528 		goto error;
1529 	m_align(mc, out_len);
1530 
1531 	data = mtod(mc, struct wg_pkt_data *);
1532 	m_copydata(m, 0, m->m_pkthdr.len, data->buf);
1533 	bzero(data->buf + m->m_pkthdr.len, padding_len);
1534 	data->t = WG_PKT_DATA;
1535 
1536 	/*
1537 	 * Copy the flow hash from the inner packet to the outer packet, so
1538 	 * that fq_codel can property separate streams, rather than falling
1539 	 * back to random buckets.
1540 	 */
1541 	mc->m_pkthdr.ph_flowid = m->m_pkthdr.ph_flowid;
1542 
1543 	mc->m_pkthdr.pf.prio = m->m_pkthdr.pf.prio;
1544 
1545 	res = noise_remote_encrypt(&peer->p_remote, &data->r_idx, &nonce,
1546 				   data->buf, plaintext_len);
1547 	nonce = htole64(nonce); /* Wire format is little endian. */
1548 	memcpy(data->nonce, &nonce, sizeof(data->nonce));
1549 
1550 	if (__predict_false(res == EINVAL)) {
1551 		m_freem(mc);
1552 		goto error;
1553 	} else if (__predict_false(res == ESTALE)) {
1554 		wg_timers_event_want_initiation(&peer->p_timers);
1555 	} else if (__predict_false(res != 0)) {
1556 		panic("unexpected result: %d", res);
1557 	}
1558 
1559 	/* A packet with length 0 is a keepalive packet */
1560 	if (__predict_false(m->m_pkthdr.len == 0))
1561 		DPRINTF(sc, "Sending keepalive packet to peer %llu\n",
1562 		    peer->p_id);
1563 
1564 	mc->m_pkthdr.ph_loopcnt = m->m_pkthdr.ph_loopcnt;
1565 	mc->m_flags &= ~(M_MCAST | M_BCAST);
1566 	mc->m_pkthdr.len = mc->m_len = out_len;
1567 
1568 	/*
1569 	 * We would count ifc_opackets, ifc_obytes of m here, except if_snd
1570 	 * already does that for us, so no need to worry about it.
1571 	counters_pkt(sc->sc_if.if_counters, ifc_opackets, ifc_obytes,
1572 	    m->m_pkthdr.len);
1573 	 */
1574 	wg_peer_counters_add(peer, mc->m_pkthdr.len, 0);
1575 
1576 	t->t_mbuf = mc;
1577 error:
1578 	t->t_done = 1;
1579 	task_add(net_tq(sc->sc_if.if_index), &peer->p_deliver_out);
1580 }
1581 
1582 void
1583 wg_decap(struct wg_softc *sc, struct mbuf *m)
1584 {
1585 	int			 res, len;
1586 	struct ip		*ip;
1587 	struct ip6_hdr		*ip6;
1588 	struct wg_pkt_data	*data;
1589 	struct wg_peer		*peer, *allowed_peer;
1590 	struct wg_tag		*t;
1591 	size_t			 payload_len;
1592 	uint64_t		 nonce;
1593 
1594 	t = wg_tag_get(m);
1595 	peer = t->t_peer;
1596 
1597 	/*
1598 	 * Likewise to wg_encap, we pass a buf and buf length to
1599 	 * noise_remote_decrypt. Again, possible to teach it about mbufs
1600 	 * but need to get over the p_decap_queue situation first. However,
1601 	 * we do not need to allocate a new mbuf as the decrypted packet is
1602 	 * strictly smaller than encrypted. We just set t_mbuf to m and
1603 	 * wg_deliver_in knows how to deal with that.
1604 	 */
1605 	data = mtod(m, struct wg_pkt_data *);
1606 	payload_len = m->m_pkthdr.len - sizeof(struct wg_pkt_data);
1607 	memcpy(&nonce, data->nonce, sizeof(nonce));
1608 	nonce = le64toh(nonce); /* Wire format is little endian. */
1609 	res = noise_remote_decrypt(&peer->p_remote, data->r_idx, nonce,
1610 				   data->buf, payload_len);
1611 
1612 	if (__predict_false(res == EINVAL)) {
1613 		goto error;
1614 	} else if (__predict_false(res == ECONNRESET)) {
1615 		wg_timers_event_handshake_complete(&peer->p_timers);
1616 	} else if (__predict_false(res == ESTALE)) {
1617 		wg_timers_event_want_initiation(&peer->p_timers);
1618 	} else if (__predict_false(res != 0)) {
1619 		panic("unexpected response: %d", res);
1620 	}
1621 
1622 	wg_peer_set_endpoint_from_tag(peer, t);
1623 
1624 	wg_peer_counters_add(peer, 0, m->m_pkthdr.len);
1625 
1626 	m_adj(m, sizeof(struct wg_pkt_data));
1627 	m_adj(m, -NOISE_AUTHTAG_LEN);
1628 
1629 	counters_pkt(sc->sc_if.if_counters, ifc_ipackets, ifc_ibytes,
1630 	    m->m_pkthdr.len);
1631 
1632 	/* A packet with length 0 is a keepalive packet */
1633 	if (__predict_false(m->m_pkthdr.len == 0)) {
1634 		DPRINTF(sc, "Receiving keepalive packet from peer "
1635 		    "%llu\n", peer->p_id);
1636 		goto done;
1637 	}
1638 
1639 	/*
1640 	 * We can let the network stack handle the intricate validation of the
1641 	 * IP header, we just worry about the sizeof and the version, so we can
1642 	 * read the source address in wg_aip_lookup.
1643 	 *
1644 	 * We also need to trim the packet, as it was likely padded before
1645 	 * encryption. While we could drop it here, it will be more helpful to
1646 	 * pass it to bpf_mtap and use the counters that people are expecting
1647 	 * in ipv4_input and ipv6_input. We can rely on ipv4_input and
1648 	 * ipv6_input to properly validate the headers.
1649 	 */
1650 	ip = mtod(m, struct ip *);
1651 	ip6 = mtod(m, struct ip6_hdr *);
1652 
1653 	if (m->m_pkthdr.len >= sizeof(struct ip) && ip->ip_v == IPVERSION) {
1654 		m->m_pkthdr.ph_family = AF_INET;
1655 
1656 		len = ntohs(ip->ip_len);
1657 		if (len >= sizeof(struct ip) && len < m->m_pkthdr.len)
1658 			m_adj(m, len - m->m_pkthdr.len);
1659 
1660 		allowed_peer = wg_aip_lookup(sc->sc_aip4, &ip->ip_src);
1661 #ifdef INET6
1662 	} else if (m->m_pkthdr.len >= sizeof(struct ip6_hdr) &&
1663 	    (ip6->ip6_vfc & IPV6_VERSION_MASK) == IPV6_VERSION) {
1664 		m->m_pkthdr.ph_family = AF_INET6;
1665 
1666 		len = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr);
1667 		if (len < m->m_pkthdr.len)
1668 			m_adj(m, len - m->m_pkthdr.len);
1669 
1670 		allowed_peer = wg_aip_lookup(sc->sc_aip6, &ip6->ip6_src);
1671 #endif
1672 	} else {
1673 		DPRINTF(sc, "Packet is neither ipv4 nor ipv6 from "
1674 		    "peer %llu\n", peer->p_id);
1675 		goto error;
1676 	}
1677 
1678 	if (__predict_false(peer != allowed_peer)) {
1679 		DPRINTF(sc, "Packet has unallowed src IP from peer "
1680 		    "%llu\n", peer->p_id);
1681 		goto error;
1682 	}
1683 
1684 	/* tunneled packet was not offloaded */
1685 	m->m_pkthdr.csum_flags = 0;
1686 
1687 	m->m_pkthdr.ph_ifidx = sc->sc_if.if_index;
1688 	m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1689 	m->m_flags &= ~(M_MCAST | M_BCAST);
1690 #if NPF > 0
1691 	pf_pkt_addr_changed(m);
1692 #endif /* NPF > 0 */
1693 
1694 done:
1695 	t->t_mbuf = m;
1696 error:
1697 	t->t_done = 1;
1698 	task_add(net_tq(sc->sc_if.if_index), &peer->p_deliver_in);
1699 }
1700 
1701 void
1702 wg_encap_worker(void *_sc)
1703 {
1704 	struct mbuf *m;
1705 	struct wg_softc *sc = _sc;
1706 	while ((m = wg_ring_dequeue(&sc->sc_encap_ring)) != NULL)
1707 		wg_encap(sc, m);
1708 }
1709 
1710 void
1711 wg_decap_worker(void *_sc)
1712 {
1713 	struct mbuf *m;
1714 	struct wg_softc *sc = _sc;
1715 	while ((m = wg_ring_dequeue(&sc->sc_decap_ring)) != NULL)
1716 		wg_decap(sc, m);
1717 }
1718 
1719 void
1720 wg_deliver_out(void *_peer)
1721 {
1722 	struct wg_peer		*peer = _peer;
1723 	struct wg_softc		*sc = peer->p_sc;
1724 	struct wg_endpoint	 endpoint;
1725 	struct wg_tag		*t;
1726 	struct mbuf		*m;
1727 	int			 ret;
1728 
1729 	wg_peer_get_endpoint(peer, &endpoint);
1730 
1731 	while ((m = wg_queue_dequeue(&peer->p_encap_queue, &t)) != NULL) {
1732 		/* t_mbuf will contain the encrypted packet */
1733 		if (t->t_mbuf == NULL){
1734 			counters_inc(sc->sc_if.if_counters, ifc_oerrors);
1735 			m_freem(m);
1736 			continue;
1737 		}
1738 
1739 		ret = wg_send(sc, &endpoint, t->t_mbuf);
1740 
1741 		if (ret == 0) {
1742 			wg_timers_event_any_authenticated_packet_traversal(
1743 			    &peer->p_timers);
1744 			wg_timers_event_any_authenticated_packet_sent(
1745 			    &peer->p_timers);
1746 
1747 			if (m->m_pkthdr.len != 0)
1748 				wg_timers_event_data_sent(&peer->p_timers);
1749 		} else if (ret == EADDRNOTAVAIL) {
1750 			wg_peer_clear_src(peer);
1751 			wg_peer_get_endpoint(peer, &endpoint);
1752 		}
1753 
1754 		m_freem(m);
1755 	}
1756 }
1757 
1758 void
1759 wg_deliver_in(void *_peer)
1760 {
1761 	struct wg_peer	*peer = _peer;
1762 	struct wg_softc	*sc = peer->p_sc;
1763 	struct wg_tag	*t;
1764 	struct mbuf	*m;
1765 
1766 	while ((m = wg_queue_dequeue(&peer->p_decap_queue, &t)) != NULL) {
1767 		/* t_mbuf will contain the decrypted packet */
1768 		if (t->t_mbuf == NULL) {
1769 			counters_inc(sc->sc_if.if_counters, ifc_ierrors);
1770 			m_freem(m);
1771 			continue;
1772 		}
1773 
1774 		/* From here on m == t->t_mbuf */
1775 		KASSERT(m == t->t_mbuf);
1776 
1777 		wg_timers_event_any_authenticated_packet_received(
1778 		    &peer->p_timers);
1779 		wg_timers_event_any_authenticated_packet_traversal(
1780 		    &peer->p_timers);
1781 
1782 		if (m->m_pkthdr.len == 0) {
1783 			m_freem(m);
1784 			continue;
1785 		}
1786 
1787 #if NBPFILTER > 0
1788 		if (sc->sc_if.if_bpf != NULL)
1789 			bpf_mtap_af(sc->sc_if.if_bpf,
1790 			    m->m_pkthdr.ph_family, m, BPF_DIRECTION_IN);
1791 #endif
1792 
1793 		NET_LOCK();
1794 		if (m->m_pkthdr.ph_family == AF_INET)
1795 			ipv4_input(&sc->sc_if, m);
1796 #ifdef INET6
1797 		else if (m->m_pkthdr.ph_family == AF_INET6)
1798 			ipv6_input(&sc->sc_if, m);
1799 #endif
1800 		else
1801 			panic("invalid ph_family");
1802 		NET_UNLOCK();
1803 
1804 		wg_timers_event_data_received(&peer->p_timers);
1805 	}
1806 }
1807 
1808 int
1809 wg_queue_in(struct wg_softc *sc, struct wg_peer *peer, struct mbuf *m)
1810 {
1811 	struct wg_ring		*parallel = &sc->sc_decap_ring;
1812 	struct wg_queue		*serial = &peer->p_decap_queue;
1813 	struct wg_tag		*t;
1814 
1815 	mtx_enter(&serial->q_mtx);
1816 	if (serial->q_list.ml_len < MAX_QUEUED_PKT) {
1817 		ml_enqueue(&serial->q_list, m);
1818 		mtx_leave(&serial->q_mtx);
1819 	} else {
1820 		mtx_leave(&serial->q_mtx);
1821 		m_freem(m);
1822 		return ENOBUFS;
1823 	}
1824 
1825 	mtx_enter(&parallel->r_mtx);
1826 	if (parallel->r_tail - parallel->r_head < MAX_QUEUED_PKT) {
1827 		parallel->r_buf[parallel->r_tail & MAX_QUEUED_PKT_MASK] = m;
1828 		parallel->r_tail++;
1829 		mtx_leave(&parallel->r_mtx);
1830 	} else {
1831 		mtx_leave(&parallel->r_mtx);
1832 		t = wg_tag_get(m);
1833 		t->t_done = 1;
1834 		return ENOBUFS;
1835 	}
1836 
1837 	return 0;
1838 }
1839 
1840 void
1841 wg_queue_out(struct wg_softc *sc, struct wg_peer *peer)
1842 {
1843 	struct wg_ring		*parallel = &sc->sc_encap_ring;
1844 	struct wg_queue		*serial = &peer->p_encap_queue;
1845 	struct mbuf_list 	 ml, ml_free;
1846 	struct mbuf		*m;
1847 	struct wg_tag		*t;
1848 	int			 dropped;
1849 
1850 	/*
1851 	 * We delist all staged packets and then add them to the queues. This
1852 	 * can race with wg_qstart when called from wg_send_keepalive, however
1853 	 * wg_qstart will not race as it is serialised.
1854 	 */
1855 	mq_delist(&peer->p_stage_queue, &ml);
1856 	ml_init(&ml_free);
1857 
1858 	while ((m = ml_dequeue(&ml)) != NULL) {
1859 		mtx_enter(&serial->q_mtx);
1860 		if (serial->q_list.ml_len < MAX_QUEUED_PKT) {
1861 			ml_enqueue(&serial->q_list, m);
1862 			mtx_leave(&serial->q_mtx);
1863 		} else {
1864 			mtx_leave(&serial->q_mtx);
1865 			ml_enqueue(&ml_free, m);
1866 			continue;
1867 		}
1868 
1869 		mtx_enter(&parallel->r_mtx);
1870 		if (parallel->r_tail - parallel->r_head < MAX_QUEUED_PKT) {
1871 			parallel->r_buf[parallel->r_tail & MAX_QUEUED_PKT_MASK] = m;
1872 			parallel->r_tail++;
1873 			mtx_leave(&parallel->r_mtx);
1874 		} else {
1875 			mtx_leave(&parallel->r_mtx);
1876 			t = wg_tag_get(m);
1877 			t->t_done = 1;
1878 		}
1879 	}
1880 
1881 	if ((dropped = ml_purge(&ml_free)) > 0)
1882 		counters_add(sc->sc_if.if_counters, ifc_oqdrops, dropped);
1883 }
1884 
1885 struct mbuf *
1886 wg_ring_dequeue(struct wg_ring *r)
1887 {
1888 	struct mbuf *m = NULL;
1889 	mtx_enter(&r->r_mtx);
1890 	if (r->r_head != r->r_tail) {
1891 		m = r->r_buf[r->r_head & MAX_QUEUED_PKT_MASK];
1892 		r->r_head++;
1893 	}
1894 	mtx_leave(&r->r_mtx);
1895 	return m;
1896 }
1897 
1898 struct mbuf *
1899 wg_queue_dequeue(struct wg_queue *q, struct wg_tag **t)
1900 {
1901 	struct mbuf *m;
1902 	mtx_enter(&q->q_mtx);
1903 	if ((m = q->q_list.ml_head) != NULL && (*t = wg_tag_get(m))->t_done)
1904 		ml_dequeue(&q->q_list);
1905 	else
1906 		m = NULL;
1907 	mtx_leave(&q->q_mtx);
1908 	return m;
1909 }
1910 
1911 size_t
1912 wg_queue_len(struct wg_queue *q)
1913 {
1914 	size_t len;
1915 	mtx_enter(&q->q_mtx);
1916 	len = q->q_list.ml_len;
1917 	mtx_leave(&q->q_mtx);
1918 	return len;
1919 }
1920 
1921 struct noise_remote *
1922 wg_remote_get(void *_sc, uint8_t public[NOISE_PUBLIC_KEY_LEN])
1923 {
1924 	struct wg_peer	*peer;
1925 	struct wg_softc	*sc = _sc;
1926 	if ((peer = wg_peer_lookup(sc, public)) == NULL)
1927 		return NULL;
1928 	return &peer->p_remote;
1929 }
1930 
1931 uint32_t
1932 wg_index_set(void *_sc, struct noise_remote *remote)
1933 {
1934 	struct wg_peer	*peer;
1935 	struct wg_softc	*sc = _sc;
1936 	struct wg_index *index, *iter;
1937 	uint32_t	 key;
1938 
1939 	/*
1940 	 * We can modify this without a lock as wg_index_set, wg_index_drop are
1941 	 * guaranteed to be serialised (per remote).
1942 	 */
1943 	peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1944 	index = SLIST_FIRST(&peer->p_unused_index);
1945 	KASSERT(index != NULL);
1946 	SLIST_REMOVE_HEAD(&peer->p_unused_index, i_unused_entry);
1947 
1948 	index->i_value = remote;
1949 
1950 	mtx_enter(&sc->sc_index_mtx);
1951 assign_id:
1952 	key = index->i_key = arc4random();
1953 	key &= sc->sc_index_mask;
1954 	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1955 		if (iter->i_key == index->i_key)
1956 			goto assign_id;
1957 
1958 	LIST_INSERT_HEAD(&sc->sc_index[key], index, i_entry);
1959 
1960 	mtx_leave(&sc->sc_index_mtx);
1961 
1962 	/* Likewise, no need to lock for index here. */
1963 	return index->i_key;
1964 }
1965 
1966 struct noise_remote *
1967 wg_index_get(void *_sc, uint32_t key0)
1968 {
1969 	struct wg_softc		*sc = _sc;
1970 	struct wg_index		*iter;
1971 	struct noise_remote	*remote = NULL;
1972 	uint32_t		 key = key0 & sc->sc_index_mask;
1973 
1974 	mtx_enter(&sc->sc_index_mtx);
1975 	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1976 		if (iter->i_key == key0) {
1977 			remote = iter->i_value;
1978 			break;
1979 		}
1980 	mtx_leave(&sc->sc_index_mtx);
1981 	return remote;
1982 }
1983 
1984 void
1985 wg_index_drop(void *_sc, uint32_t key0)
1986 {
1987 	struct wg_softc	*sc = _sc;
1988 	struct wg_index	*iter;
1989 	struct wg_peer	*peer = NULL;
1990 	uint32_t	 key = key0 & sc->sc_index_mask;
1991 
1992 	mtx_enter(&sc->sc_index_mtx);
1993 	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1994 		if (iter->i_key == key0) {
1995 			LIST_REMOVE(iter, i_entry);
1996 			break;
1997 		}
1998 	mtx_leave(&sc->sc_index_mtx);
1999 
2000 	/* We expect a peer */
2001 	peer = CONTAINER_OF(iter->i_value, struct wg_peer, p_remote);
2002 	KASSERT(peer != NULL);
2003 	SLIST_INSERT_HEAD(&peer->p_unused_index, iter, i_unused_entry);
2004 }
2005 
2006 struct mbuf *
2007 wg_input(void *_sc, struct mbuf *m, struct ip *ip, struct ip6_hdr *ip6,
2008     void *_uh, int hlen)
2009 {
2010 	struct wg_pkt_data	*data;
2011 	struct noise_remote	*remote;
2012 	struct wg_tag		*t;
2013 	struct wg_softc		*sc = _sc;
2014 	struct udphdr		*uh = _uh;
2015 
2016 	NET_ASSERT_LOCKED();
2017 
2018 	if ((t = wg_tag_get(m)) == NULL) {
2019 		m_freem(m);
2020 		return NULL;
2021 	}
2022 
2023 	if (ip != NULL) {
2024 		t->t_endpoint.e_remote.r_sa.sa_len = sizeof(struct sockaddr_in);
2025 		t->t_endpoint.e_remote.r_sa.sa_family = AF_INET;
2026 		t->t_endpoint.e_remote.r_sin.sin_port = uh->uh_sport;
2027 		t->t_endpoint.e_remote.r_sin.sin_addr = ip->ip_src;
2028 		t->t_endpoint.e_local.l_in = ip->ip_dst;
2029 #ifdef INET6
2030 	} else if (ip6 != NULL) {
2031 		t->t_endpoint.e_remote.r_sa.sa_len = sizeof(struct sockaddr_in6);
2032 		t->t_endpoint.e_remote.r_sa.sa_family = AF_INET6;
2033 		t->t_endpoint.e_remote.r_sin6.sin6_port = uh->uh_sport;
2034 		t->t_endpoint.e_remote.r_sin6.sin6_addr = ip6->ip6_src;
2035 		t->t_endpoint.e_local.l_in6 = ip6->ip6_dst;
2036 #endif
2037 	} else {
2038 		m_freem(m);
2039 		return NULL;
2040 	}
2041 
2042 	/* m has a IP/IPv6 header of hlen length, we don't need it anymore. */
2043 	m_adj(m, hlen);
2044 
2045 	/*
2046 	 * Ensure mbuf is contiguous over full length of packet. This is done
2047 	 * so we can directly read the handshake values in wg_handshake, and so
2048 	 * we can decrypt a transport packet by passing a single buffer to
2049 	 * noise_remote_decrypt in wg_decap.
2050 	 */
2051 	if ((m = m_pullup(m, m->m_pkthdr.len)) == NULL)
2052 		return NULL;
2053 
2054 	if ((m->m_pkthdr.len == sizeof(struct wg_pkt_initiation) &&
2055 		*mtod(m, uint32_t *) == WG_PKT_INITIATION) ||
2056 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_response) &&
2057 		*mtod(m, uint32_t *) == WG_PKT_RESPONSE) ||
2058 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_cookie) &&
2059 		*mtod(m, uint32_t *) == WG_PKT_COOKIE)) {
2060 
2061 		if (mq_enqueue(&sc->sc_handshake_queue, m) != 0)
2062 			DPRINTF(sc, "Dropping handshake packet\n");
2063 		task_add(wg_handshake_taskq, &sc->sc_handshake);
2064 
2065 	} else if (m->m_pkthdr.len >= sizeof(struct wg_pkt_data) +
2066 	    NOISE_AUTHTAG_LEN && *mtod(m, uint32_t *) == WG_PKT_DATA) {
2067 
2068 		data = mtod(m, struct wg_pkt_data *);
2069 
2070 		if ((remote = wg_index_get(sc, data->r_idx)) != NULL) {
2071 			t->t_peer = CONTAINER_OF(remote, struct wg_peer,
2072 			    p_remote);
2073 			t->t_mbuf = NULL;
2074 			t->t_done = 0;
2075 
2076 			if (wg_queue_in(sc, t->t_peer, m) != 0)
2077 				counters_inc(sc->sc_if.if_counters,
2078 				    ifc_iqdrops);
2079 			task_add(wg_crypt_taskq, &sc->sc_decap);
2080 		} else {
2081 			counters_inc(sc->sc_if.if_counters, ifc_ierrors);
2082 			m_freem(m);
2083 		}
2084 	} else {
2085 		counters_inc(sc->sc_if.if_counters, ifc_ierrors);
2086 		m_freem(m);
2087 	}
2088 
2089 	return NULL;
2090 }
2091 
2092 void
2093 wg_qstart(struct ifqueue *ifq)
2094 {
2095 	struct ifnet		*ifp = ifq->ifq_if;
2096 	struct wg_softc		*sc = ifp->if_softc;
2097 	struct wg_peer		*peer;
2098 	struct wg_tag		*t;
2099 	struct mbuf		*m;
2100 	SLIST_HEAD(,wg_peer)	 start_list;
2101 
2102 	SLIST_INIT(&start_list);
2103 
2104 	/*
2105 	 * We should be OK to modify p_start_list, p_start_onlist in this
2106 	 * function as there should only be one ifp->if_qstart invoked at a
2107 	 * time.
2108 	 */
2109 	while ((m = ifq_dequeue(ifq)) != NULL) {
2110 		t = wg_tag_get(m);
2111 		peer = t->t_peer;
2112 		if (mq_push(&peer->p_stage_queue, m) != 0)
2113 			counters_inc(ifp->if_counters, ifc_oqdrops);
2114 		if (!peer->p_start_onlist) {
2115 			SLIST_INSERT_HEAD(&start_list, peer, p_start_list);
2116 			peer->p_start_onlist = 1;
2117 		}
2118 	}
2119 	SLIST_FOREACH(peer, &start_list, p_start_list) {
2120 		if (noise_remote_ready(&peer->p_remote) == 0)
2121 			wg_queue_out(sc, peer);
2122 		else
2123 			wg_timers_event_want_initiation(&peer->p_timers);
2124 		peer->p_start_onlist = 0;
2125 	}
2126 	task_add(wg_crypt_taskq, &sc->sc_encap);
2127 }
2128 
2129 int
2130 wg_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2131     struct rtentry *rt)
2132 {
2133 	struct wg_softc	*sc = ifp->if_softc;
2134 	struct wg_peer	*peer;
2135 	struct wg_tag	*t;
2136 	int		 af, ret = EINVAL;
2137 
2138 	NET_ASSERT_LOCKED();
2139 
2140 	if ((t = wg_tag_get(m)) == NULL) {
2141 		ret = ENOBUFS;
2142 		goto error;
2143 	}
2144 
2145 	m->m_pkthdr.ph_family = sa->sa_family;
2146 	if (sa->sa_family == AF_INET) {
2147 		peer = wg_aip_lookup(sc->sc_aip4,
2148 		    &mtod(m, struct ip *)->ip_dst);
2149 #ifdef INET6
2150 	} else if (sa->sa_family == AF_INET6) {
2151 		peer = wg_aip_lookup(sc->sc_aip6,
2152 		    &mtod(m, struct ip6_hdr *)->ip6_dst);
2153 #endif
2154 	} else {
2155 		ret = EAFNOSUPPORT;
2156 		goto error;
2157 	}
2158 
2159 #if NBPFILTER > 0
2160 	if (sc->sc_if.if_bpf)
2161 		bpf_mtap_af(sc->sc_if.if_bpf, sa->sa_family, m,
2162 		    BPF_DIRECTION_OUT);
2163 #endif
2164 
2165 	if (peer == NULL) {
2166 		ret = ENETUNREACH;
2167 		goto error;
2168 	}
2169 
2170 	af = peer->p_endpoint.e_remote.r_sa.sa_family;
2171 	if (af != AF_INET && af != AF_INET6) {
2172 		DPRINTF(sc, "No valid endpoint has been configured or "
2173 				"discovered for peer %llu\n", peer->p_id);
2174 		ret = EDESTADDRREQ;
2175 		goto error;
2176 	}
2177 
2178 	if (m->m_pkthdr.ph_loopcnt++ > M_MAXLOOP) {
2179 		DPRINTF(sc, "Packet looped\n");
2180 		ret = ELOOP;
2181 		goto error;
2182 	}
2183 
2184 	/*
2185 	 * As we hold a reference to peer in the mbuf, we can't handle a
2186 	 * delayed packet without doing some refcnting. If a peer is removed
2187 	 * while a delayed holds a reference, bad things will happen. For the
2188 	 * time being, delayed packets are unsupported. This may be fixed with
2189 	 * another aip_lookup in wg_qstart, or refcnting as mentioned before.
2190 	 */
2191 	if (m->m_pkthdr.pf.delay > 0) {
2192 		DPRINTF(sc, "PF delay unsupported\n");
2193 		ret = EOPNOTSUPP;
2194 		goto error;
2195 	}
2196 
2197 	t->t_peer = peer;
2198 	t->t_mbuf = NULL;
2199 	t->t_done = 0;
2200 	t->t_mtu = ifp->if_mtu;
2201 
2202 	/*
2203 	 * We still have an issue with ifq that will count a packet that gets
2204 	 * dropped in wg_qstart, or not encrypted. These get counted as
2205 	 * ofails or oqdrops, so the packet gets counted twice.
2206 	 */
2207 	return if_enqueue(ifp, m);
2208 error:
2209 	counters_inc(ifp->if_counters, ifc_oerrors);
2210 	m_freem(m);
2211 	return ret;
2212 }
2213 
2214 int
2215 wg_ioctl_set(struct wg_softc *sc, struct wg_data_io *data)
2216 {
2217 	struct wg_interface_io	*iface_p, iface_o;
2218 	struct wg_peer_io	*peer_p, peer_o;
2219 	struct wg_aip_io	*aip_p, aip_o;
2220 
2221 	struct wg_peer		*peer, *tpeer;
2222 	struct wg_aip		*aip, *taip;
2223 
2224 	in_port_t		 port;
2225 	int			 rtable;
2226 
2227 	uint8_t			 public[WG_KEY_SIZE], private[WG_KEY_SIZE];
2228 	size_t			 i, j;
2229 	int			 ret, has_identity;
2230 
2231 	if ((ret = suser(curproc)) != 0)
2232 		return ret;
2233 
2234 	rw_enter_write(&sc->sc_lock);
2235 
2236 	iface_p = data->wgd_interface;
2237 	if ((ret = copyin(iface_p, &iface_o, sizeof(iface_o))) != 0)
2238 		goto error;
2239 
2240 	if (iface_o.i_flags & WG_INTERFACE_REPLACE_PEERS)
2241 		TAILQ_FOREACH_SAFE(peer, &sc->sc_peer_seq, p_seq_entry, tpeer)
2242 			wg_peer_destroy(peer);
2243 
2244 	if (iface_o.i_flags & WG_INTERFACE_HAS_PRIVATE &&
2245 	    (noise_local_keys(&sc->sc_local, NULL, private) ||
2246 	     timingsafe_bcmp(private, iface_o.i_private, WG_KEY_SIZE))) {
2247 		if (curve25519_generate_public(public, iface_o.i_private)) {
2248 			if ((peer = wg_peer_lookup(sc, public)) != NULL)
2249 				wg_peer_destroy(peer);
2250 		}
2251 		noise_local_lock_identity(&sc->sc_local);
2252 		has_identity = noise_local_set_private(&sc->sc_local,
2253 						       iface_o.i_private);
2254 		TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2255 			noise_remote_precompute(&peer->p_remote);
2256 			wg_timers_event_reset_handshake_last_sent(&peer->p_timers);
2257 			noise_remote_expire_current(&peer->p_remote);
2258 		}
2259 		cookie_checker_update(&sc->sc_cookie,
2260 				      has_identity == 0 ? public : NULL);
2261 		noise_local_unlock_identity(&sc->sc_local);
2262 	}
2263 
2264 	if (iface_o.i_flags & WG_INTERFACE_HAS_PORT)
2265 		port = htons(iface_o.i_port);
2266 	else
2267 		port = sc->sc_udp_port;
2268 
2269 	if (iface_o.i_flags & WG_INTERFACE_HAS_RTABLE)
2270 		rtable = iface_o.i_rtable;
2271 	else
2272 		rtable = sc->sc_udp_rtable;
2273 
2274 	if (port != sc->sc_udp_port || rtable != sc->sc_udp_rtable) {
2275 		TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry)
2276 			wg_peer_clear_src(peer);
2277 
2278 		if (sc->sc_if.if_flags & IFF_RUNNING)
2279 			if ((ret = wg_bind(sc, &port, &rtable)) != 0)
2280 				goto error;
2281 
2282 		sc->sc_udp_port = port;
2283 		sc->sc_udp_rtable = rtable;
2284 	}
2285 
2286 	peer_p = &iface_p->i_peers[0];
2287 	for (i = 0; i < iface_o.i_peers_count; i++) {
2288 		if ((ret = copyin(peer_p, &peer_o, sizeof(peer_o))) != 0)
2289 			goto error;
2290 
2291 		/* Peer must have public key */
2292 		if (!(peer_o.p_flags & WG_PEER_HAS_PUBLIC))
2293 			goto next_peer;
2294 
2295 		/* 0 = latest protocol, 1 = this protocol */
2296 		if (peer_o.p_protocol_version != 0) {
2297 			if (peer_o.p_protocol_version > 1) {
2298 				ret = EPFNOSUPPORT;
2299 				goto error;
2300 			}
2301 		}
2302 
2303 		/* Get local public and check that peer key doesn't match */
2304 		if (noise_local_keys(&sc->sc_local, public, NULL) == 0 &&
2305 		    bcmp(public, peer_o.p_public, WG_KEY_SIZE) == 0)
2306 			goto next_peer;
2307 
2308 		/* Lookup peer, or create if it doesn't exist */
2309 		if ((peer = wg_peer_lookup(sc, peer_o.p_public)) == NULL) {
2310 			/* If we want to delete, no need creating a new one.
2311 			 * Also, don't create a new one if we only want to
2312 			 * update. */
2313 			if (peer_o.p_flags & (WG_PEER_REMOVE|WG_PEER_UPDATE))
2314 				goto next_peer;
2315 
2316 			if ((peer = wg_peer_create(sc,
2317 			    peer_o.p_public)) == NULL) {
2318 				ret = ENOMEM;
2319 				goto error;
2320 			}
2321 		}
2322 
2323 		/* Remove peer and continue if specified */
2324 		if (peer_o.p_flags & WG_PEER_REMOVE) {
2325 			wg_peer_destroy(peer);
2326 			goto next_peer;
2327 		}
2328 
2329 		if (peer_o.p_flags & WG_PEER_HAS_ENDPOINT)
2330 			wg_peer_set_sockaddr(peer, &peer_o.p_sa);
2331 
2332 		if (peer_o.p_flags & WG_PEER_HAS_PSK)
2333 			noise_remote_set_psk(&peer->p_remote, peer_o.p_psk);
2334 
2335 		if (peer_o.p_flags & WG_PEER_HAS_PKA)
2336 			wg_timers_set_persistent_keepalive(&peer->p_timers,
2337 			    peer_o.p_pka);
2338 
2339 		if (peer_o.p_flags & WG_PEER_REPLACE_AIPS) {
2340 			LIST_FOREACH_SAFE(aip, &peer->p_aip, a_entry, taip) {
2341 				wg_aip_remove(sc, peer, &aip->a_data);
2342 			}
2343 		}
2344 
2345 		if (peer_o.p_flags & WG_PEER_SET_DESCRIPTION)
2346 			strlcpy(peer->p_description, peer_o.p_description,
2347 			    IFDESCRSIZE);
2348 
2349 		aip_p = &peer_p->p_aips[0];
2350 		for (j = 0; j < peer_o.p_aips_count; j++) {
2351 			if ((ret = copyin(aip_p, &aip_o, sizeof(aip_o))) != 0)
2352 				goto error;
2353 			ret = wg_aip_add(sc, peer, &aip_o);
2354 			if (ret != 0)
2355 				goto error;
2356 			aip_p++;
2357 		}
2358 
2359 		peer_p = (struct wg_peer_io *)aip_p;
2360 		continue;
2361 next_peer:
2362 		aip_p = &peer_p->p_aips[0];
2363 		aip_p += peer_o.p_aips_count;
2364 		peer_p = (struct wg_peer_io *)aip_p;
2365 	}
2366 
2367 error:
2368 	rw_exit_write(&sc->sc_lock);
2369 	explicit_bzero(&iface_o, sizeof(iface_o));
2370 	explicit_bzero(&peer_o, sizeof(peer_o));
2371 	explicit_bzero(&aip_o, sizeof(aip_o));
2372 	explicit_bzero(public, sizeof(public));
2373 	explicit_bzero(private, sizeof(private));
2374 	return ret;
2375 }
2376 
2377 int
2378 wg_ioctl_get(struct wg_softc *sc, struct wg_data_io *data)
2379 {
2380 	struct wg_interface_io	*iface_p, iface_o;
2381 	struct wg_peer_io	*peer_p, peer_o;
2382 	struct wg_aip_io	*aip_p;
2383 
2384 	struct wg_peer		*peer;
2385 	struct wg_aip		*aip;
2386 
2387 	size_t			 size, peer_count, aip_count;
2388 	int			 ret = 0, is_suser = suser(curproc) == 0;
2389 
2390 	size = sizeof(struct wg_interface_io);
2391 	if (data->wgd_size < size && !is_suser)
2392 		goto ret_size;
2393 
2394 	iface_p = data->wgd_interface;
2395 	bzero(&iface_o, sizeof(iface_o));
2396 
2397 	rw_enter_read(&sc->sc_lock);
2398 
2399 	if (sc->sc_udp_port != 0) {
2400 		iface_o.i_port = ntohs(sc->sc_udp_port);
2401 		iface_o.i_flags |= WG_INTERFACE_HAS_PORT;
2402 	}
2403 
2404 	if (sc->sc_udp_rtable != 0) {
2405 		iface_o.i_rtable = sc->sc_udp_rtable;
2406 		iface_o.i_flags |= WG_INTERFACE_HAS_RTABLE;
2407 	}
2408 
2409 	if (!is_suser)
2410 		goto copy_out_iface;
2411 
2412 	if (noise_local_keys(&sc->sc_local, iface_o.i_public,
2413 	    iface_o.i_private) == 0) {
2414 		iface_o.i_flags |= WG_INTERFACE_HAS_PUBLIC;
2415 		iface_o.i_flags |= WG_INTERFACE_HAS_PRIVATE;
2416 	}
2417 
2418 	size += sizeof(struct wg_peer_io) * sc->sc_peer_num;
2419 	size += sizeof(struct wg_aip_io) * sc->sc_aip_num;
2420 	if (data->wgd_size < size)
2421 		goto unlock_and_ret_size;
2422 
2423 	peer_count = 0;
2424 	peer_p = &iface_p->i_peers[0];
2425 	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2426 		bzero(&peer_o, sizeof(peer_o));
2427 		peer_o.p_flags = WG_PEER_HAS_PUBLIC;
2428 		peer_o.p_protocol_version = 1;
2429 
2430 		if (noise_remote_keys(&peer->p_remote, peer_o.p_public,
2431 		    peer_o.p_psk) == 0)
2432 			peer_o.p_flags |= WG_PEER_HAS_PSK;
2433 
2434 		if (wg_timers_get_persistent_keepalive(&peer->p_timers,
2435 		    &peer_o.p_pka) == 0)
2436 			peer_o.p_flags |= WG_PEER_HAS_PKA;
2437 
2438 		if (wg_peer_get_sockaddr(peer, &peer_o.p_sa) == 0)
2439 			peer_o.p_flags |= WG_PEER_HAS_ENDPOINT;
2440 
2441 		mtx_enter(&peer->p_counters_mtx);
2442 		peer_o.p_txbytes = peer->p_counters_tx;
2443 		peer_o.p_rxbytes = peer->p_counters_rx;
2444 		mtx_leave(&peer->p_counters_mtx);
2445 
2446 		wg_timers_get_last_handshake(&peer->p_timers,
2447 		    &peer_o.p_last_handshake);
2448 
2449 		aip_count = 0;
2450 		aip_p = &peer_p->p_aips[0];
2451 		LIST_FOREACH(aip, &peer->p_aip, a_entry) {
2452 			if ((ret = copyout(&aip->a_data, aip_p, sizeof(*aip_p))) != 0)
2453 				goto unlock_and_ret_size;
2454 			aip_p++;
2455 			aip_count++;
2456 		}
2457 		peer_o.p_aips_count = aip_count;
2458 
2459 		strlcpy(peer_o.p_description, peer->p_description, IFDESCRSIZE);
2460 
2461 		if ((ret = copyout(&peer_o, peer_p, sizeof(peer_o))) != 0)
2462 			goto unlock_and_ret_size;
2463 
2464 		peer_p = (struct wg_peer_io *)aip_p;
2465 		peer_count++;
2466 	}
2467 	iface_o.i_peers_count = peer_count;
2468 
2469 copy_out_iface:
2470 	ret = copyout(&iface_o, iface_p, sizeof(iface_o));
2471 unlock_and_ret_size:
2472 	rw_exit_read(&sc->sc_lock);
2473 	explicit_bzero(&iface_o, sizeof(iface_o));
2474 	explicit_bzero(&peer_o, sizeof(peer_o));
2475 ret_size:
2476 	data->wgd_size = size;
2477 	return ret;
2478 }
2479 
2480 int
2481 wg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2482 {
2483 	struct ifreq	*ifr = (struct ifreq *) data;
2484 	struct wg_softc	*sc = ifp->if_softc;
2485 	int		 ret = 0;
2486 
2487 	switch (cmd) {
2488 	case SIOCSWG:
2489 		NET_UNLOCK();
2490 		ret = wg_ioctl_set(sc, (struct wg_data_io *) data);
2491 		NET_LOCK();
2492 		break;
2493 	case SIOCGWG:
2494 		NET_UNLOCK();
2495 		ret = wg_ioctl_get(sc, (struct wg_data_io *) data);
2496 		NET_LOCK();
2497 		break;
2498 	/* Interface IOCTLs */
2499 	case SIOCSIFADDR:
2500 		SET(ifp->if_flags, IFF_UP);
2501 		/* FALLTHROUGH */
2502 	case SIOCSIFFLAGS:
2503 		if (ISSET(ifp->if_flags, IFF_UP))
2504 			ret = wg_up(sc);
2505 		else
2506 			wg_down(sc);
2507 		break;
2508 	case SIOCSIFMTU:
2509 		/* Arbitrary limits */
2510 		if (ifr->ifr_mtu <= 0 || ifr->ifr_mtu > 9000)
2511 			ret = EINVAL;
2512 		else
2513 			ifp->if_mtu = ifr->ifr_mtu;
2514 		break;
2515 	case SIOCADDMULTI:
2516 	case SIOCDELMULTI:
2517 		break;
2518 	default:
2519 		ret = ENOTTY;
2520 	}
2521 
2522 	return ret;
2523 }
2524 
2525 int
2526 wg_up(struct wg_softc *sc)
2527 {
2528 	struct wg_peer	*peer;
2529 	int		 ret = 0;
2530 
2531 	NET_ASSERT_LOCKED();
2532 	/*
2533 	 * We use IFF_RUNNING as an exclusive access here. We also may want
2534 	 * an exclusive sc_lock as wg_bind may write to sc_udp_port. We also
2535 	 * want to drop NET_LOCK as we want to call socreate, sobind, etc. Once
2536 	 * solock is no longer === NET_LOCK, we may be able to avoid this.
2537 	 */
2538 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
2539 		SET(sc->sc_if.if_flags, IFF_RUNNING);
2540 		NET_UNLOCK();
2541 
2542 		rw_enter_write(&sc->sc_lock);
2543 		/*
2544 		 * If we successfully bind the socket, then enable the timers
2545 		 * for the peer. This will send all staged packets and a
2546 		 * keepalive if necessary.
2547 		 */
2548 		ret = wg_bind(sc, &sc->sc_udp_port, &sc->sc_udp_rtable);
2549 		if (ret == 0) {
2550 			TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2551 				wg_timers_enable(&peer->p_timers);
2552 				wg_queue_out(sc, peer);
2553 			}
2554 		}
2555 		rw_exit_write(&sc->sc_lock);
2556 
2557 		NET_LOCK();
2558 		if (ret != 0)
2559 			CLR(sc->sc_if.if_flags, IFF_RUNNING);
2560 	}
2561 	return ret;
2562 }
2563 
2564 void
2565 wg_down(struct wg_softc *sc)
2566 {
2567 	struct wg_peer	*peer;
2568 
2569 	NET_ASSERT_LOCKED();
2570 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2571 		return;
2572 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
2573 	NET_UNLOCK();
2574 
2575 	/*
2576 	 * We only need a read lock here, as we aren't writing to anything
2577 	 * that isn't granularly locked.
2578 	 */
2579 	rw_enter_read(&sc->sc_lock);
2580 	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2581 		mq_purge(&peer->p_stage_queue);
2582 		wg_timers_disable(&peer->p_timers);
2583 	}
2584 
2585 	taskq_barrier(wg_handshake_taskq);
2586 	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2587 		noise_remote_clear(&peer->p_remote);
2588 		wg_timers_event_reset_handshake_last_sent(&peer->p_timers);
2589 	}
2590 
2591 	wg_unbind(sc);
2592 	rw_exit_read(&sc->sc_lock);
2593 	NET_LOCK();
2594 }
2595 
2596 int
2597 wg_clone_create(struct if_clone *ifc, int unit)
2598 {
2599 	struct ifnet		*ifp;
2600 	struct wg_softc		*sc;
2601 	struct noise_upcall	 local_upcall;
2602 
2603 	KERNEL_ASSERT_LOCKED();
2604 
2605 	if (wg_counter == 0) {
2606 		wg_handshake_taskq = taskq_create("wg_handshake",
2607 		    2, IPL_NET, TASKQ_MPSAFE);
2608 		wg_crypt_taskq = taskq_create("wg_crypt",
2609 		    ncpus, IPL_NET, TASKQ_MPSAFE);
2610 
2611 		if (wg_handshake_taskq == NULL || wg_crypt_taskq == NULL) {
2612 			if (wg_handshake_taskq != NULL)
2613 				taskq_destroy(wg_handshake_taskq);
2614 			if (wg_crypt_taskq != NULL)
2615 				taskq_destroy(wg_crypt_taskq);
2616 			wg_handshake_taskq = NULL;
2617 			wg_crypt_taskq = NULL;
2618 			return ENOTRECOVERABLE;
2619 		}
2620 	}
2621 	wg_counter++;
2622 
2623 	if ((sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL)
2624 		goto ret_00;
2625 
2626 	local_upcall.u_arg = sc;
2627 	local_upcall.u_remote_get = wg_remote_get;
2628 	local_upcall.u_index_set = wg_index_set;
2629 	local_upcall.u_index_drop = wg_index_drop;
2630 
2631 	TAILQ_INIT(&sc->sc_peer_seq);
2632 
2633 	/* sc_if is initialised after everything else */
2634 	arc4random_buf(&sc->sc_secret, sizeof(sc->sc_secret));
2635 
2636 	rw_init(&sc->sc_lock, "wg");
2637 	noise_local_init(&sc->sc_local, &local_upcall);
2638 	if (cookie_checker_init(&sc->sc_cookie, &wg_ratelimit_pool) != 0)
2639 		goto ret_01;
2640 	sc->sc_udp_port = 0;
2641 	sc->sc_udp_rtable = 0;
2642 
2643 	rw_init(&sc->sc_so_lock, "wg_so");
2644 	sc->sc_so4 = NULL;
2645 #ifdef INET6
2646 	sc->sc_so6 = NULL;
2647 #endif
2648 
2649 	sc->sc_aip_num = 0;
2650 	if ((sc->sc_aip4 = art_alloc(0, 32, 0)) == NULL)
2651 		goto ret_02;
2652 #ifdef INET6
2653 	if ((sc->sc_aip6 = art_alloc(0, 128, 0)) == NULL)
2654 		goto ret_03;
2655 #endif
2656 
2657 	rw_init(&sc->sc_peer_lock, "wg_peer");
2658 	sc->sc_peer_num = 0;
2659 	if ((sc->sc_peer = hashinit(HASHTABLE_PEER_SIZE, M_DEVBUF,
2660 	    M_NOWAIT, &sc->sc_peer_mask)) == NULL)
2661 		goto ret_04;
2662 
2663 	mtx_init(&sc->sc_index_mtx, IPL_NET);
2664 	if ((sc->sc_index = hashinit(HASHTABLE_INDEX_SIZE, M_DEVBUF,
2665 	    M_NOWAIT, &sc->sc_index_mask)) == NULL)
2666 		goto ret_05;
2667 
2668 	task_set(&sc->sc_handshake, wg_handshake_worker, sc);
2669 	mq_init(&sc->sc_handshake_queue, MAX_QUEUED_HANDSHAKES, IPL_NET);
2670 
2671 	task_set(&sc->sc_encap, wg_encap_worker, sc);
2672 	task_set(&sc->sc_decap, wg_decap_worker, sc);
2673 
2674 	bzero(&sc->sc_encap_ring, sizeof(sc->sc_encap_ring));
2675 	mtx_init(&sc->sc_encap_ring.r_mtx, IPL_NET);
2676 	bzero(&sc->sc_decap_ring, sizeof(sc->sc_decap_ring));
2677 	mtx_init(&sc->sc_decap_ring.r_mtx, IPL_NET);
2678 
2679 	/* We've setup the softc, now we can setup the ifnet */
2680 	ifp = &sc->sc_if;
2681 	ifp->if_softc = sc;
2682 
2683 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "wg%d", unit);
2684 
2685 	ifp->if_mtu = DEFAULT_MTU;
2686 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_NOARP;
2687 	ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
2688 	ifp->if_txmit = 64; /* Keep our workers active for longer. */
2689 
2690 	ifp->if_ioctl = wg_ioctl;
2691 	ifp->if_qstart = wg_qstart;
2692 	ifp->if_output = wg_output;
2693 
2694 	ifp->if_type = IFT_WIREGUARD;
2695 	ifp->if_rtrequest = p2p_rtrequest;
2696 
2697 	if_counters_alloc(ifp);
2698 	if_attach(ifp);
2699 	if_alloc_sadl(ifp);
2700 
2701 #if NBPFILTER > 0
2702 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
2703 #endif
2704 
2705 	DPRINTF(sc, "Interface created\n");
2706 
2707 	return 0;
2708 ret_05:
2709 	hashfree(sc->sc_peer, HASHTABLE_PEER_SIZE, M_DEVBUF);
2710 ret_04:
2711 #ifdef INET6
2712 	free(sc->sc_aip6, M_RTABLE, sizeof(*sc->sc_aip6));
2713 ret_03:
2714 #endif
2715 	free(sc->sc_aip4, M_RTABLE, sizeof(*sc->sc_aip4));
2716 ret_02:
2717 	cookie_checker_deinit(&sc->sc_cookie);
2718 ret_01:
2719 	free(sc, M_DEVBUF, sizeof(*sc));
2720 ret_00:
2721 	return ENOBUFS;
2722 }
2723 int
2724 wg_clone_destroy(struct ifnet *ifp)
2725 {
2726 	struct wg_softc	*sc = ifp->if_softc;
2727 	struct wg_peer	*peer, *tpeer;
2728 
2729 	KERNEL_ASSERT_LOCKED();
2730 
2731 	rw_enter_write(&sc->sc_lock);
2732 	TAILQ_FOREACH_SAFE(peer, &sc->sc_peer_seq, p_seq_entry, tpeer)
2733 		wg_peer_destroy(peer);
2734 	rw_exit_write(&sc->sc_lock);
2735 
2736 	wg_unbind(sc);
2737 	if_detach(ifp);
2738 
2739 	wg_counter--;
2740 	if (wg_counter == 0) {
2741 		KASSERT(wg_handshake_taskq != NULL && wg_crypt_taskq != NULL);
2742 		taskq_destroy(wg_handshake_taskq);
2743 		taskq_destroy(wg_crypt_taskq);
2744 		wg_handshake_taskq = NULL;
2745 		wg_crypt_taskq = NULL;
2746 	}
2747 
2748 	DPRINTF(sc, "Destroyed interface\n");
2749 
2750 	hashfree(sc->sc_index, HASHTABLE_INDEX_SIZE, M_DEVBUF);
2751 	hashfree(sc->sc_peer, HASHTABLE_PEER_SIZE, M_DEVBUF);
2752 #ifdef INET6
2753 	free(sc->sc_aip6, M_RTABLE, sizeof(*sc->sc_aip6));
2754 #endif
2755 	free(sc->sc_aip4, M_RTABLE, sizeof(*sc->sc_aip4));
2756 	cookie_checker_deinit(&sc->sc_cookie);
2757 	free(sc, M_DEVBUF, sizeof(*sc));
2758 	return 0;
2759 }
2760 
2761 void
2762 wgattach(int nwg)
2763 {
2764 #ifdef WGTEST
2765 	cookie_test();
2766 	noise_test();
2767 #endif
2768 	if_clone_attach(&wg_cloner);
2769 
2770 	pool_init(&wg_aip_pool, sizeof(struct wg_aip), 0,
2771 			IPL_NET, 0, "wgaip", NULL);
2772 	pool_init(&wg_peer_pool, sizeof(struct wg_peer), 0,
2773 			IPL_NET, 0, "wgpeer", NULL);
2774 	pool_init(&wg_ratelimit_pool, sizeof(struct ratelimit_entry), 0,
2775 			IPL_NET, 0, "wgratelimit", NULL);
2776 }
2777