xref: /openbsd-src/sys/net/if_wg.c (revision 25c4e8bd056e974b28f4a0ffd39d76c190a56013)
1 /*	$OpenBSD: if_wg.c,v 1.26 2022/07/21 11:26:50 kn Exp $ */
2 
3 /*
4  * Copyright (C) 2015-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
5  * Copyright (C) 2019-2020 Matt Dunwoodie <ncon@noconroy.net>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include "bpfilter.h"
21 #include "pf.h"
22 
23 #include <sys/types.h>
24 #include <sys/systm.h>
25 #include <sys/param.h>
26 #include <sys/pool.h>
27 
28 #include <sys/socket.h>
29 #include <sys/socketvar.h>
30 #include <sys/percpu.h>
31 #include <sys/ioctl.h>
32 #include <sys/mbuf.h>
33 
34 #include <net/if.h>
35 #include <net/if_var.h>
36 #include <net/if_types.h>
37 #include <net/if_wg.h>
38 
39 #include <net/wg_noise.h>
40 #include <net/wg_cookie.h>
41 
42 #include <net/pfvar.h>
43 #include <net/route.h>
44 #include <net/bpf.h>
45 
46 #include <netinet/ip.h>
47 #include <netinet/ip6.h>
48 #include <netinet/udp.h>
49 #include <netinet/in_pcb.h>
50 
51 #include <crypto/siphash.h>
52 
53 #define DEFAULT_MTU		1420
54 
55 #define MAX_STAGED_PKT		128
56 #define MAX_QUEUED_PKT		1024
57 #define MAX_QUEUED_PKT_MASK	(MAX_QUEUED_PKT - 1)
58 
59 #define MAX_QUEUED_HANDSHAKES	4096
60 
61 #define HASHTABLE_PEER_SIZE	(1 << 11)
62 #define HASHTABLE_INDEX_SIZE	(1 << 13)
63 #define MAX_PEERS_PER_IFACE	(1 << 20)
64 
65 #define REKEY_TIMEOUT		5
66 #define REKEY_TIMEOUT_JITTER	334 /* 1/3 sec, round for arc4random_uniform */
67 #define KEEPALIVE_TIMEOUT	10
68 #define MAX_TIMER_HANDSHAKES	(90 / REKEY_TIMEOUT)
69 #define NEW_HANDSHAKE_TIMEOUT	(REKEY_TIMEOUT + KEEPALIVE_TIMEOUT)
70 #define UNDERLOAD_TIMEOUT	1
71 
72 #define DPRINTF(sc, str, ...) do { if (ISSET((sc)->sc_if.if_flags, IFF_DEBUG))\
73     printf("%s: " str, (sc)->sc_if.if_xname, ##__VA_ARGS__); } while (0)
74 
75 #define CONTAINER_OF(ptr, type, member) ({			\
76 	const __typeof( ((type *)0)->member ) *__mptr = (ptr);	\
77 	(type *)( (char *)__mptr - offsetof(type,member) );})
78 
79 /* First byte indicating packet type on the wire */
80 #define WG_PKT_INITIATION htole32(1)
81 #define WG_PKT_RESPONSE htole32(2)
82 #define WG_PKT_COOKIE htole32(3)
83 #define WG_PKT_DATA htole32(4)
84 
85 #define WG_PKT_WITH_PADDING(n)	(((n) + (16-1)) & (~(16-1)))
86 #define WG_KEY_SIZE		WG_KEY_LEN
87 
88 struct wg_pkt_initiation {
89 	uint32_t		t;
90 	uint32_t		s_idx;
91 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
92 	uint8_t			es[NOISE_PUBLIC_KEY_LEN + NOISE_AUTHTAG_LEN];
93 	uint8_t			ets[NOISE_TIMESTAMP_LEN + NOISE_AUTHTAG_LEN];
94 	struct cookie_macs	m;
95 };
96 
97 struct wg_pkt_response {
98 	uint32_t		t;
99 	uint32_t		s_idx;
100 	uint32_t		r_idx;
101 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
102 	uint8_t			en[0 + NOISE_AUTHTAG_LEN];
103 	struct cookie_macs	m;
104 };
105 
106 struct wg_pkt_cookie {
107 	uint32_t		t;
108 	uint32_t		r_idx;
109 	uint8_t			nonce[COOKIE_NONCE_SIZE];
110 	uint8_t			ec[COOKIE_ENCRYPTED_SIZE];
111 };
112 
113 struct wg_pkt_data {
114 	uint32_t		t;
115 	uint32_t		r_idx;
116 	uint8_t			nonce[sizeof(uint64_t)];
117 	uint8_t			buf[];
118 };
119 
120 struct wg_endpoint {
121 	union {
122 		struct sockaddr		r_sa;
123 		struct sockaddr_in	r_sin;
124 #ifdef INET6
125 		struct sockaddr_in6	r_sin6;
126 #endif
127 	} e_remote;
128 	union {
129 		struct in_addr		l_in;
130 #ifdef INET6
131 		struct in6_pktinfo	l_pktinfo6;
132 #define l_in6 l_pktinfo6.ipi6_addr
133 #endif
134 	} e_local;
135 };
136 
137 struct wg_tag {
138 	struct wg_endpoint	 t_endpoint;
139 	struct wg_peer		*t_peer;
140 	struct mbuf		*t_mbuf;
141 	int			 t_done;
142 	int			 t_mtu;
143 };
144 
145 struct wg_index {
146 	LIST_ENTRY(wg_index)	 i_entry;
147 	SLIST_ENTRY(wg_index)	 i_unused_entry;
148 	uint32_t		 i_key;
149 	struct noise_remote	*i_value;
150 };
151 
152 struct wg_timers {
153 	/* t_lock is for blocking wg_timers_event_* when setting t_disabled. */
154 	struct rwlock		 t_lock;
155 
156 	int			 t_disabled;
157 	int			 t_need_another_keepalive;
158 	uint16_t		 t_persistent_keepalive_interval;
159 	struct timeout		 t_new_handshake;
160 	struct timeout		 t_send_keepalive;
161 	struct timeout		 t_retry_handshake;
162 	struct timeout		 t_zero_key_material;
163 	struct timeout		 t_persistent_keepalive;
164 
165 	struct mutex		 t_handshake_mtx;
166 	struct timespec		 t_handshake_last_sent;	/* nanouptime */
167 	struct timespec		 t_handshake_complete;	/* nanotime */
168 	int			 t_handshake_retries;
169 };
170 
171 struct wg_aip {
172 	struct art_node		 a_node;
173 	LIST_ENTRY(wg_aip)	 a_entry;
174 	struct wg_peer		*a_peer;
175 	struct wg_aip_io	 a_data;
176 };
177 
178 struct wg_queue {
179 	struct mutex		 q_mtx;
180 	struct mbuf_list	 q_list;
181 };
182 
183 struct wg_ring {
184 	struct mutex	 r_mtx;
185 	uint32_t	 r_head;
186 	uint32_t	 r_tail;
187 	struct mbuf	*r_buf[MAX_QUEUED_PKT];
188 };
189 
190 struct wg_peer {
191 	LIST_ENTRY(wg_peer)	 p_pubkey_entry;
192 	TAILQ_ENTRY(wg_peer)	 p_seq_entry;
193 	uint64_t		 p_id;
194 	struct wg_softc		*p_sc;
195 
196 	struct noise_remote	 p_remote;
197 	struct cookie_maker	 p_cookie;
198 	struct wg_timers	 p_timers;
199 
200 	struct mutex		 p_counters_mtx;
201 	uint64_t		 p_counters_tx;
202 	uint64_t		 p_counters_rx;
203 
204 	struct mutex		 p_endpoint_mtx;
205 	struct wg_endpoint	 p_endpoint;
206 
207 	struct task		 p_send_initiation;
208 	struct task		 p_send_keepalive;
209 	struct task		 p_clear_secrets;
210 	struct task		 p_deliver_out;
211 	struct task		 p_deliver_in;
212 
213 	struct mbuf_queue	 p_stage_queue;
214 	struct wg_queue		 p_encap_queue;
215 	struct wg_queue		 p_decap_queue;
216 
217 	SLIST_HEAD(,wg_index)	 p_unused_index;
218 	struct wg_index		 p_index[3];
219 
220 	LIST_HEAD(,wg_aip)	 p_aip;
221 
222 	SLIST_ENTRY(wg_peer)	 p_start_list;
223 	int			 p_start_onlist;
224 };
225 
226 struct wg_softc {
227 	struct ifnet		 sc_if;
228 	SIPHASH_KEY		 sc_secret;
229 
230 	struct rwlock		 sc_lock;
231 	struct noise_local	 sc_local;
232 	struct cookie_checker	 sc_cookie;
233 	in_port_t		 sc_udp_port;
234 	int			 sc_udp_rtable;
235 
236 	struct rwlock		 sc_so_lock;
237 	struct socket		*sc_so4;
238 #ifdef INET6
239 	struct socket		*sc_so6;
240 #endif
241 
242 	size_t			 sc_aip_num;
243 	struct art_root		*sc_aip4;
244 #ifdef INET6
245 	struct art_root		*sc_aip6;
246 #endif
247 
248 	struct rwlock		 sc_peer_lock;
249 	size_t			 sc_peer_num;
250 	LIST_HEAD(,wg_peer)	*sc_peer;
251 	TAILQ_HEAD(,wg_peer)	 sc_peer_seq;
252 	u_long			 sc_peer_mask;
253 
254 	struct mutex		 sc_index_mtx;
255 	LIST_HEAD(,wg_index)	*sc_index;
256 	u_long			 sc_index_mask;
257 
258 	struct task		 sc_handshake;
259 	struct mbuf_queue	 sc_handshake_queue;
260 
261 	struct task		 sc_encap;
262 	struct task		 sc_decap;
263 	struct wg_ring		 sc_encap_ring;
264 	struct wg_ring		 sc_decap_ring;
265 };
266 
267 struct wg_peer *
268 	wg_peer_create(struct wg_softc *, uint8_t[WG_KEY_SIZE]);
269 struct wg_peer *
270 	wg_peer_lookup(struct wg_softc *, const uint8_t[WG_KEY_SIZE]);
271 void	wg_peer_destroy(struct wg_peer *);
272 void	wg_peer_set_endpoint_from_tag(struct wg_peer *, struct wg_tag *);
273 void	wg_peer_set_sockaddr(struct wg_peer *, struct sockaddr *);
274 int	wg_peer_get_sockaddr(struct wg_peer *, struct sockaddr *);
275 void	wg_peer_clear_src(struct wg_peer *);
276 void	wg_peer_get_endpoint(struct wg_peer *, struct wg_endpoint *);
277 void	wg_peer_counters_add(struct wg_peer *, uint64_t, uint64_t);
278 
279 int	wg_aip_add(struct wg_softc *, struct wg_peer *, struct wg_aip_io *);
280 struct wg_peer *
281 	wg_aip_lookup(struct art_root *, void *);
282 int	wg_aip_remove(struct wg_softc *, struct wg_peer *,
283 	    struct wg_aip_io *);
284 
285 int	wg_socket_open(struct socket **, int, in_port_t *, int *, void *);
286 void	wg_socket_close(struct socket **);
287 int	wg_bind(struct wg_softc *, in_port_t *, int *);
288 void	wg_unbind(struct wg_softc *);
289 int	wg_send(struct wg_softc *, struct wg_endpoint *, struct mbuf *);
290 void	wg_send_buf(struct wg_softc *, struct wg_endpoint *, uint8_t *,
291 	    size_t);
292 
293 struct wg_tag *
294 	wg_tag_get(struct mbuf *);
295 
296 void	wg_timers_init(struct wg_timers *);
297 void	wg_timers_enable(struct wg_timers *);
298 void	wg_timers_disable(struct wg_timers *);
299 void	wg_timers_set_persistent_keepalive(struct wg_timers *, uint16_t);
300 int	wg_timers_get_persistent_keepalive(struct wg_timers *, uint16_t *);
301 void	wg_timers_get_last_handshake(struct wg_timers *, struct timespec *);
302 int	wg_timers_expired_handshake_last_sent(struct wg_timers *);
303 int	wg_timers_check_handshake_last_sent(struct wg_timers *);
304 
305 void	wg_timers_event_data_sent(struct wg_timers *);
306 void	wg_timers_event_data_received(struct wg_timers *);
307 void	wg_timers_event_any_authenticated_packet_sent(struct wg_timers *);
308 void	wg_timers_event_any_authenticated_packet_received(struct wg_timers *);
309 void	wg_timers_event_handshake_initiated(struct wg_timers *);
310 void	wg_timers_event_handshake_responded(struct wg_timers *);
311 void	wg_timers_event_handshake_complete(struct wg_timers *);
312 void	wg_timers_event_session_derived(struct wg_timers *);
313 void	wg_timers_event_any_authenticated_packet_traversal(struct wg_timers *);
314 void	wg_timers_event_want_initiation(struct wg_timers *);
315 void	wg_timers_event_reset_handshake_last_sent(struct wg_timers *);
316 
317 void	wg_timers_run_send_initiation(void *, int);
318 void	wg_timers_run_retry_handshake(void *);
319 void	wg_timers_run_send_keepalive(void *);
320 void	wg_timers_run_new_handshake(void *);
321 void	wg_timers_run_zero_key_material(void *);
322 void	wg_timers_run_persistent_keepalive(void *);
323 
324 void	wg_peer_send_buf(struct wg_peer *, uint8_t *, size_t);
325 void	wg_send_initiation(void *);
326 void	wg_send_response(struct wg_peer *);
327 void	wg_send_cookie(struct wg_softc *, struct cookie_macs *, uint32_t,
328 	    struct wg_endpoint *);
329 void	wg_send_keepalive(void *);
330 void	wg_peer_clear_secrets(void *);
331 void	wg_handshake(struct wg_softc *, struct mbuf *);
332 void	wg_handshake_worker(void *);
333 
334 void	wg_encap(struct wg_softc *, struct mbuf *);
335 void	wg_decap(struct wg_softc *, struct mbuf *);
336 void	wg_encap_worker(void *);
337 void	wg_decap_worker(void *);
338 void	wg_deliver_out(void *);
339 void	wg_deliver_in(void *);
340 
341 int	wg_queue_in(struct wg_softc *, struct wg_peer *, struct mbuf *);
342 void	wg_queue_out(struct wg_softc *, struct wg_peer *);
343 struct mbuf *
344 	wg_ring_dequeue(struct wg_ring *);
345 struct mbuf *
346 	wg_queue_dequeue(struct wg_queue *, struct wg_tag **);
347 size_t	wg_queue_len(struct wg_queue *);
348 
349 struct noise_remote *
350 	wg_remote_get(void *, uint8_t[NOISE_PUBLIC_KEY_LEN]);
351 uint32_t
352 	wg_index_set(void *, struct noise_remote *);
353 struct noise_remote *
354 	wg_index_get(void *, uint32_t);
355 void	wg_index_drop(void *, uint32_t);
356 
357 struct mbuf *
358 	wg_input(void *, struct mbuf *, struct ip *, struct ip6_hdr *, void *,
359 	    int);
360 int	wg_output(struct ifnet *, struct mbuf *, struct sockaddr *,
361 	    struct rtentry *);
362 int	wg_ioctl_set(struct wg_softc *, struct wg_data_io *);
363 int	wg_ioctl_get(struct wg_softc *, struct wg_data_io *);
364 int	wg_ioctl(struct ifnet *, u_long, caddr_t);
365 int	wg_up(struct wg_softc *);
366 void	wg_down(struct wg_softc *);
367 
368 int	wg_clone_create(struct if_clone *, int);
369 int	wg_clone_destroy(struct ifnet *);
370 void	wgattach(int);
371 
372 uint64_t	peer_counter = 0;
373 struct pool	wg_aip_pool;
374 struct pool	wg_peer_pool;
375 struct pool	wg_ratelimit_pool;
376 struct timeval	underload_interval = { UNDERLOAD_TIMEOUT, 0 };
377 
378 size_t		 wg_counter = 0;
379 struct taskq	*wg_handshake_taskq;
380 struct taskq	*wg_crypt_taskq;
381 
382 struct if_clone	wg_cloner =
383     IF_CLONE_INITIALIZER("wg", wg_clone_create, wg_clone_destroy);
384 
385 struct wg_peer *
386 wg_peer_create(struct wg_softc *sc, uint8_t public[WG_KEY_SIZE])
387 {
388 	struct wg_peer	*peer;
389 	uint64_t	 idx;
390 
391 	rw_assert_wrlock(&sc->sc_lock);
392 
393 	if (sc->sc_peer_num >= MAX_PEERS_PER_IFACE)
394 		return NULL;
395 
396 	if ((peer = pool_get(&wg_peer_pool, PR_NOWAIT)) == NULL)
397 		return NULL;
398 
399 	peer->p_id = peer_counter++;
400 	peer->p_sc = sc;
401 
402 	noise_remote_init(&peer->p_remote, public, &sc->sc_local);
403 	cookie_maker_init(&peer->p_cookie, public);
404 	wg_timers_init(&peer->p_timers);
405 
406 	mtx_init(&peer->p_counters_mtx, IPL_NET);
407 	peer->p_counters_tx = 0;
408 	peer->p_counters_rx = 0;
409 
410 	mtx_init(&peer->p_endpoint_mtx, IPL_NET);
411 	bzero(&peer->p_endpoint, sizeof(peer->p_endpoint));
412 
413 	task_set(&peer->p_send_initiation, wg_send_initiation, peer);
414 	task_set(&peer->p_send_keepalive, wg_send_keepalive, peer);
415 	task_set(&peer->p_clear_secrets, wg_peer_clear_secrets, peer);
416 	task_set(&peer->p_deliver_out, wg_deliver_out, peer);
417 	task_set(&peer->p_deliver_in, wg_deliver_in, peer);
418 
419 	mq_init(&peer->p_stage_queue, MAX_STAGED_PKT, IPL_NET);
420 	mtx_init(&peer->p_encap_queue.q_mtx, IPL_NET);
421 	ml_init(&peer->p_encap_queue.q_list);
422 	mtx_init(&peer->p_decap_queue.q_mtx, IPL_NET);
423 	ml_init(&peer->p_decap_queue.q_list);
424 
425 	SLIST_INIT(&peer->p_unused_index);
426 	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[0],
427 	    i_unused_entry);
428 	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[1],
429 	    i_unused_entry);
430 	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[2],
431 	    i_unused_entry);
432 
433 	LIST_INIT(&peer->p_aip);
434 
435 	peer->p_start_onlist = 0;
436 
437 	idx = SipHash24(&sc->sc_secret, public, WG_KEY_SIZE);
438 	idx &= sc->sc_peer_mask;
439 
440 	rw_enter_write(&sc->sc_peer_lock);
441 	LIST_INSERT_HEAD(&sc->sc_peer[idx], peer, p_pubkey_entry);
442 	TAILQ_INSERT_TAIL(&sc->sc_peer_seq, peer, p_seq_entry);
443 	sc->sc_peer_num++;
444 	rw_exit_write(&sc->sc_peer_lock);
445 
446 	DPRINTF(sc, "Peer %llu created\n", peer->p_id);
447 	return peer;
448 }
449 
450 struct wg_peer *
451 wg_peer_lookup(struct wg_softc *sc, const uint8_t public[WG_KEY_SIZE])
452 {
453 	uint8_t		 peer_key[WG_KEY_SIZE];
454 	struct wg_peer	*peer;
455 	uint64_t	 idx;
456 
457 	idx = SipHash24(&sc->sc_secret, public, WG_KEY_SIZE);
458 	idx &= sc->sc_peer_mask;
459 
460 	rw_enter_read(&sc->sc_peer_lock);
461 	LIST_FOREACH(peer, &sc->sc_peer[idx], p_pubkey_entry) {
462 		noise_remote_keys(&peer->p_remote, peer_key, NULL);
463 		if (timingsafe_bcmp(peer_key, public, WG_KEY_SIZE) == 0)
464 			goto done;
465 	}
466 	peer = NULL;
467 done:
468 	rw_exit_read(&sc->sc_peer_lock);
469 	return peer;
470 }
471 
472 void
473 wg_peer_destroy(struct wg_peer *peer)
474 {
475 	struct wg_softc	*sc = peer->p_sc;
476 	struct wg_aip *aip, *taip;
477 
478 	rw_assert_wrlock(&sc->sc_lock);
479 
480 	/*
481 	 * Remove peer from the pubkey hashtable and disable all timeouts.
482 	 * After this, and flushing wg_handshake_taskq, then no more handshakes
483 	 * can be started.
484 	 */
485 	rw_enter_write(&sc->sc_peer_lock);
486 	LIST_REMOVE(peer, p_pubkey_entry);
487 	TAILQ_REMOVE(&sc->sc_peer_seq, peer, p_seq_entry);
488 	sc->sc_peer_num--;
489 	rw_exit_write(&sc->sc_peer_lock);
490 
491 	wg_timers_disable(&peer->p_timers);
492 
493 	taskq_barrier(wg_handshake_taskq);
494 
495 	/*
496 	 * Now we drop all allowed ips, to drop all outgoing packets to the
497 	 * peer. Then drop all the indexes to drop all incoming packets to the
498 	 * peer. Then we can flush if_snd, wg_crypt_taskq and then nettq to
499 	 * ensure no more references to the peer exist.
500 	 */
501 	LIST_FOREACH_SAFE(aip, &peer->p_aip, a_entry, taip)
502 		wg_aip_remove(sc, peer, &aip->a_data);
503 
504 	noise_remote_clear(&peer->p_remote);
505 
506 	NET_LOCK();
507 	while (!ifq_empty(&sc->sc_if.if_snd)) {
508 		NET_UNLOCK();
509 		tsleep_nsec(sc, PWAIT, "wg_ifq", 1000);
510 		NET_LOCK();
511 	}
512 	NET_UNLOCK();
513 
514 	taskq_barrier(wg_crypt_taskq);
515 	taskq_barrier(net_tq(sc->sc_if.if_index));
516 
517 	DPRINTF(sc, "Peer %llu destroyed\n", peer->p_id);
518 	explicit_bzero(peer, sizeof(*peer));
519 	pool_put(&wg_peer_pool, peer);
520 }
521 
522 void
523 wg_peer_set_endpoint_from_tag(struct wg_peer *peer, struct wg_tag *t)
524 {
525 	if (memcmp(&t->t_endpoint, &peer->p_endpoint,
526 	    sizeof(t->t_endpoint)) == 0)
527 		return;
528 
529 	mtx_enter(&peer->p_endpoint_mtx);
530 	peer->p_endpoint = t->t_endpoint;
531 	mtx_leave(&peer->p_endpoint_mtx);
532 }
533 
534 void
535 wg_peer_set_sockaddr(struct wg_peer *peer, struct sockaddr *remote)
536 {
537 	mtx_enter(&peer->p_endpoint_mtx);
538 	memcpy(&peer->p_endpoint.e_remote, remote,
539 	       sizeof(peer->p_endpoint.e_remote));
540 	bzero(&peer->p_endpoint.e_local, sizeof(peer->p_endpoint.e_local));
541 	mtx_leave(&peer->p_endpoint_mtx);
542 }
543 
544 int
545 wg_peer_get_sockaddr(struct wg_peer *peer, struct sockaddr *remote)
546 {
547 	int	ret = 0;
548 
549 	mtx_enter(&peer->p_endpoint_mtx);
550 	if (peer->p_endpoint.e_remote.r_sa.sa_family != AF_UNSPEC)
551 		memcpy(remote, &peer->p_endpoint.e_remote,
552 		       sizeof(peer->p_endpoint.e_remote));
553 	else
554 		ret = ENOENT;
555 	mtx_leave(&peer->p_endpoint_mtx);
556 	return ret;
557 }
558 
559 void
560 wg_peer_clear_src(struct wg_peer *peer)
561 {
562 	mtx_enter(&peer->p_endpoint_mtx);
563 	bzero(&peer->p_endpoint.e_local, sizeof(peer->p_endpoint.e_local));
564 	mtx_leave(&peer->p_endpoint_mtx);
565 }
566 
567 void
568 wg_peer_get_endpoint(struct wg_peer *peer, struct wg_endpoint *endpoint)
569 {
570 	mtx_enter(&peer->p_endpoint_mtx);
571 	memcpy(endpoint, &peer->p_endpoint, sizeof(*endpoint));
572 	mtx_leave(&peer->p_endpoint_mtx);
573 }
574 
575 void
576 wg_peer_counters_add(struct wg_peer *peer, uint64_t tx, uint64_t rx)
577 {
578 	mtx_enter(&peer->p_counters_mtx);
579 	peer->p_counters_tx += tx;
580 	peer->p_counters_rx += rx;
581 	mtx_leave(&peer->p_counters_mtx);
582 }
583 
584 int
585 wg_aip_add(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
586 {
587 	struct art_root	*root;
588 	struct art_node	*node;
589 	struct wg_aip	*aip;
590 	int		 ret = 0;
591 
592 	switch (d->a_af) {
593 	case AF_INET:	root = sc->sc_aip4; break;
594 #ifdef INET6
595 	case AF_INET6:	root = sc->sc_aip6; break;
596 #endif
597 	default: return EAFNOSUPPORT;
598 	}
599 
600 	if ((aip = pool_get(&wg_aip_pool, PR_NOWAIT|PR_ZERO)) == NULL)
601 		return ENOBUFS;
602 
603 	rw_enter_write(&root->ar_lock);
604 	node = art_insert(root, &aip->a_node, &d->a_addr, d->a_cidr);
605 
606 	if (node == &aip->a_node) {
607 		aip->a_peer = peer;
608 		aip->a_data = *d;
609 		LIST_INSERT_HEAD(&peer->p_aip, aip, a_entry);
610 		sc->sc_aip_num++;
611 	} else {
612 		pool_put(&wg_aip_pool, aip);
613 		aip = (struct wg_aip *) node;
614 		if (aip->a_peer != peer) {
615 			LIST_REMOVE(aip, a_entry);
616 			LIST_INSERT_HEAD(&peer->p_aip, aip, a_entry);
617 			aip->a_peer = peer;
618 		}
619 	}
620 	rw_exit_write(&root->ar_lock);
621 	return ret;
622 }
623 
624 struct wg_peer *
625 wg_aip_lookup(struct art_root *root, void *addr)
626 {
627 	struct srp_ref	 sr;
628 	struct art_node	*node;
629 
630 	node = art_match(root, addr, &sr);
631 	srp_leave(&sr);
632 
633 	return node == NULL ? NULL : ((struct wg_aip *) node)->a_peer;
634 }
635 
636 int
637 wg_aip_remove(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
638 {
639 	struct srp_ref	 sr;
640 	struct art_root	*root;
641 	struct art_node	*node;
642 	struct wg_aip	*aip;
643 	int		 ret = 0;
644 
645 	switch (d->a_af) {
646 	case AF_INET:	root = sc->sc_aip4; break;
647 #ifdef INET6
648 	case AF_INET6:	root = sc->sc_aip6; break;
649 #endif
650 	default: return EAFNOSUPPORT;
651 	}
652 
653 	rw_enter_write(&root->ar_lock);
654 	if ((node = art_lookup(root, &d->a_addr, d->a_cidr, &sr)) == NULL) {
655 		ret = ENOENT;
656 	} else if (((struct wg_aip *) node)->a_peer != peer) {
657 		ret = EXDEV;
658 	} else {
659 		aip = (struct wg_aip *)node;
660 		if (art_delete(root, node, &d->a_addr, d->a_cidr) == NULL)
661 			panic("art_delete failed to delete node %p", node);
662 
663 		sc->sc_aip_num--;
664 		LIST_REMOVE(aip, a_entry);
665 		pool_put(&wg_aip_pool, aip);
666 	}
667 
668 	srp_leave(&sr);
669 	rw_exit_write(&root->ar_lock);
670 	return ret;
671 }
672 
673 int
674 wg_socket_open(struct socket **so, int af, in_port_t *port,
675     int *rtable, void *upcall_arg)
676 {
677 	struct mbuf		 mhostnam, mrtable;
678 #ifdef INET6
679 	struct sockaddr_in6	*sin6;
680 #endif
681 	struct sockaddr_in	*sin;
682 	int			 ret;
683 
684 	m_inithdr(&mhostnam);
685 	m_inithdr(&mrtable);
686 
687 	bzero(mtod(&mrtable, u_int *), sizeof(u_int));
688 	*mtod(&mrtable, u_int *) = *rtable;
689 	mrtable.m_len = sizeof(u_int);
690 
691 	if (af == AF_INET) {
692 		sin = mtod(&mhostnam, struct sockaddr_in *);
693 		bzero(sin, sizeof(*sin));
694 		sin->sin_len = sizeof(*sin);
695 		sin->sin_family = AF_INET;
696 		sin->sin_port = *port;
697 		sin->sin_addr.s_addr = INADDR_ANY;
698 		mhostnam.m_len = sin->sin_len;
699 #ifdef INET6
700 	} else if (af == AF_INET6) {
701 		sin6 = mtod(&mhostnam, struct sockaddr_in6 *);
702 		bzero(sin6, sizeof(*sin6));
703 		sin6->sin6_len = sizeof(*sin6);
704 		sin6->sin6_family = AF_INET6;
705 		sin6->sin6_port = *port;
706 		sin6->sin6_addr = (struct in6_addr) { .s6_addr = { 0 } };
707 		mhostnam.m_len = sin6->sin6_len;
708 #endif
709 	} else {
710 		return EAFNOSUPPORT;
711 	}
712 
713 	if ((ret = socreate(af, so, SOCK_DGRAM, 0)) != 0)
714 		return ret;
715 
716 	solock(*so);
717 	sotoinpcb(*so)->inp_upcall = wg_input;
718 	sotoinpcb(*so)->inp_upcall_arg = upcall_arg;
719 
720 	if ((ret = sosetopt(*so, SOL_SOCKET, SO_RTABLE, &mrtable)) == 0) {
721 		if ((ret = sobind(*so, &mhostnam, curproc)) == 0) {
722 			*port = sotoinpcb(*so)->inp_lport;
723 			*rtable = sotoinpcb(*so)->inp_rtableid;
724 		}
725 	}
726 	sounlock(*so);
727 
728 	if (ret != 0)
729 		wg_socket_close(so);
730 
731 	return ret;
732 }
733 
734 void
735 wg_socket_close(struct socket **so)
736 {
737 	if (*so != NULL && soclose(*so, 0) != 0)
738 		panic("Unable to close wg socket");
739 	*so = NULL;
740 }
741 
742 int
743 wg_bind(struct wg_softc *sc, in_port_t *portp, int *rtablep)
744 {
745 	int		 ret = 0, rtable = *rtablep;
746 	in_port_t	 port = *portp;
747 	struct socket	*so4;
748 #ifdef INET6
749 	struct socket	*so6;
750 	int		 retries = 0;
751 retry:
752 #endif
753 	if ((ret = wg_socket_open(&so4, AF_INET, &port, &rtable, sc)) != 0)
754 		return ret;
755 
756 #ifdef INET6
757 	if ((ret = wg_socket_open(&so6, AF_INET6, &port, &rtable, sc)) != 0) {
758 		if (ret == EADDRINUSE && *portp == 0 && retries++ < 100)
759 			goto retry;
760 		wg_socket_close(&so4);
761 		return ret;
762 	}
763 #endif
764 
765 	rw_enter_write(&sc->sc_so_lock);
766 	wg_socket_close(&sc->sc_so4);
767 	sc->sc_so4 = so4;
768 #ifdef INET6
769 	wg_socket_close(&sc->sc_so6);
770 	sc->sc_so6 = so6;
771 #endif
772 	rw_exit_write(&sc->sc_so_lock);
773 
774 	*portp = port;
775 	*rtablep = rtable;
776 	return 0;
777 }
778 
779 void
780 wg_unbind(struct wg_softc *sc)
781 {
782 	rw_enter_write(&sc->sc_so_lock);
783 	wg_socket_close(&sc->sc_so4);
784 #ifdef INET6
785 	wg_socket_close(&sc->sc_so6);
786 #endif
787 	rw_exit_write(&sc->sc_so_lock);
788 }
789 
790 int
791 wg_send(struct wg_softc *sc, struct wg_endpoint *e, struct mbuf *m)
792 {
793 	struct mbuf	 peernam, *control = NULL;
794 	int		 ret;
795 
796 	/* Get local control address before locking */
797 	if (e->e_remote.r_sa.sa_family == AF_INET) {
798 		if (e->e_local.l_in.s_addr != INADDR_ANY)
799 			control = sbcreatecontrol(&e->e_local.l_in,
800 			    sizeof(struct in_addr), IP_SENDSRCADDR,
801 			    IPPROTO_IP);
802 #ifdef INET6
803 	} else if (e->e_remote.r_sa.sa_family == AF_INET6) {
804 		if (!IN6_IS_ADDR_UNSPECIFIED(&e->e_local.l_in6))
805 			control = sbcreatecontrol(&e->e_local.l_pktinfo6,
806 			    sizeof(struct in6_pktinfo), IPV6_PKTINFO,
807 			    IPPROTO_IPV6);
808 #endif
809 	} else {
810 		m_freem(m);
811 		return EAFNOSUPPORT;
812 	}
813 
814 	/* Get remote address */
815 	peernam.m_type = MT_SONAME;
816 	peernam.m_next = NULL;
817 	peernam.m_nextpkt = NULL;
818 	peernam.m_data = (void *)&e->e_remote.r_sa;
819 	peernam.m_len = e->e_remote.r_sa.sa_len;
820 	peernam.m_flags = 0;
821 
822 	rw_enter_read(&sc->sc_so_lock);
823 	if (e->e_remote.r_sa.sa_family == AF_INET && sc->sc_so4 != NULL)
824 		ret = sosend(sc->sc_so4, &peernam, NULL, m, control, 0);
825 #ifdef INET6
826 	else if (e->e_remote.r_sa.sa_family == AF_INET6 && sc->sc_so6 != NULL)
827 		ret = sosend(sc->sc_so6, &peernam, NULL, m, control, 0);
828 #endif
829 	else {
830 		ret = ENOTCONN;
831 		m_freem(control);
832 		m_freem(m);
833 	}
834 	rw_exit_read(&sc->sc_so_lock);
835 
836 	return ret;
837 }
838 
839 void
840 wg_send_buf(struct wg_softc *sc, struct wg_endpoint *e, uint8_t *buf,
841     size_t len)
842 {
843 	struct mbuf	*m;
844 	int		 ret = 0;
845 
846 retry:
847 	m = m_gethdr(M_WAIT, MT_DATA);
848 	m->m_len = 0;
849 	m_copyback(m, 0, len, buf, M_WAIT);
850 
851 	/* As we're sending a handshake packet here, we want high priority */
852 	m->m_pkthdr.pf.prio = IFQ_MAXPRIO;
853 
854 	if (ret == 0) {
855 		ret = wg_send(sc, e, m);
856 		/* Retry if we couldn't bind to e->e_local */
857 		if (ret == EADDRNOTAVAIL) {
858 			bzero(&e->e_local, sizeof(e->e_local));
859 			goto retry;
860 		}
861 	} else {
862 		ret = wg_send(sc, e, m);
863 		if (ret != 0)
864 			DPRINTF(sc, "Unable to send packet\n");
865 	}
866 }
867 
868 struct wg_tag *
869 wg_tag_get(struct mbuf *m)
870 {
871 	struct m_tag	*mtag;
872 
873 	if ((mtag = m_tag_find(m, PACKET_TAG_WIREGUARD, NULL)) == NULL) {
874 		mtag = m_tag_get(PACKET_TAG_WIREGUARD, sizeof(struct wg_tag),
875 		    M_NOWAIT);
876 		if (mtag == NULL)
877 			return (NULL);
878 		bzero(mtag + 1, sizeof(struct wg_tag));
879 		m_tag_prepend(m, mtag);
880 	}
881 	return ((struct wg_tag *)(mtag + 1));
882 }
883 
884 /*
885  * The following section handles the timeout callbacks for a WireGuard session.
886  * These functions provide an "event based" model for controlling wg(8) session
887  * timers. All function calls occur after the specified event below.
888  *
889  * wg_timers_event_data_sent:
890  *	tx: data
891  * wg_timers_event_data_received:
892  *	rx: data
893  * wg_timers_event_any_authenticated_packet_sent:
894  *	tx: keepalive, data, handshake
895  * wg_timers_event_any_authenticated_packet_received:
896  *	rx: keepalive, data, handshake
897  * wg_timers_event_any_authenticated_packet_traversal:
898  *	tx, rx: keepalive, data, handshake
899  * wg_timers_event_handshake_initiated:
900  *	tx: initiation
901  * wg_timers_event_handshake_responded:
902  *	tx: response
903  * wg_timers_event_handshake_complete:
904  *	rx: response, confirmation data
905  * wg_timers_event_session_derived:
906  *	tx: response, rx: response
907  * wg_timers_event_want_initiation:
908  *	tx: data failed, old keys expiring
909  * wg_timers_event_reset_handshake_last_sent:
910  * 	anytime we may immediately want a new handshake
911  */
912 void
913 wg_timers_init(struct wg_timers *t)
914 {
915 	bzero(t, sizeof(*t));
916 	rw_init(&t->t_lock, "wg_timers");
917 	mtx_init(&t->t_handshake_mtx, IPL_NET);
918 
919 	timeout_set(&t->t_new_handshake, wg_timers_run_new_handshake, t);
920 	timeout_set(&t->t_send_keepalive, wg_timers_run_send_keepalive, t);
921 	timeout_set(&t->t_retry_handshake, wg_timers_run_retry_handshake, t);
922 	timeout_set(&t->t_persistent_keepalive,
923 	    wg_timers_run_persistent_keepalive, t);
924 	timeout_set(&t->t_zero_key_material,
925 	    wg_timers_run_zero_key_material, t);
926 }
927 
928 void
929 wg_timers_enable(struct wg_timers *t)
930 {
931 	rw_enter_write(&t->t_lock);
932 	t->t_disabled = 0;
933 	rw_exit_write(&t->t_lock);
934 	wg_timers_run_persistent_keepalive(t);
935 }
936 
937 void
938 wg_timers_disable(struct wg_timers *t)
939 {
940 	rw_enter_write(&t->t_lock);
941 	t->t_disabled = 1;
942 	t->t_need_another_keepalive = 0;
943 	rw_exit_write(&t->t_lock);
944 
945 	timeout_del_barrier(&t->t_new_handshake);
946 	timeout_del_barrier(&t->t_send_keepalive);
947 	timeout_del_barrier(&t->t_retry_handshake);
948 	timeout_del_barrier(&t->t_persistent_keepalive);
949 	timeout_del_barrier(&t->t_zero_key_material);
950 }
951 
952 void
953 wg_timers_set_persistent_keepalive(struct wg_timers *t, uint16_t interval)
954 {
955 	rw_enter_read(&t->t_lock);
956 	if (!t->t_disabled) {
957 		t->t_persistent_keepalive_interval = interval;
958 		wg_timers_run_persistent_keepalive(t);
959 	}
960 	rw_exit_read(&t->t_lock);
961 }
962 
963 int
964 wg_timers_get_persistent_keepalive(struct wg_timers *t, uint16_t *interval)
965 {
966 	*interval = t->t_persistent_keepalive_interval;
967 	return *interval > 0 ? 0 : ENOENT;
968 }
969 
970 void
971 wg_timers_get_last_handshake(struct wg_timers *t, struct timespec *time)
972 {
973 	mtx_enter(&t->t_handshake_mtx);
974 	*time = t->t_handshake_complete;
975 	mtx_leave(&t->t_handshake_mtx);
976 }
977 
978 int
979 wg_timers_expired_handshake_last_sent(struct wg_timers *t)
980 {
981 	struct timespec uptime;
982 	struct timespec expire = { .tv_sec = REKEY_TIMEOUT, .tv_nsec = 0 };
983 
984 	getnanouptime(&uptime);
985 	timespecadd(&t->t_handshake_last_sent, &expire, &expire);
986 	return timespeccmp(&uptime, &expire, >) ? ETIMEDOUT : 0;
987 }
988 
989 int
990 wg_timers_check_handshake_last_sent(struct wg_timers *t)
991 {
992 	int ret;
993 	mtx_enter(&t->t_handshake_mtx);
994 	if ((ret = wg_timers_expired_handshake_last_sent(t)) == ETIMEDOUT)
995 		getnanouptime(&t->t_handshake_last_sent);
996 	mtx_leave(&t->t_handshake_mtx);
997 	return ret;
998 }
999 
1000 void
1001 wg_timers_event_data_sent(struct wg_timers *t)
1002 {
1003 	int	msecs = NEW_HANDSHAKE_TIMEOUT * 1000;
1004 	msecs += arc4random_uniform(REKEY_TIMEOUT_JITTER);
1005 
1006 	rw_enter_read(&t->t_lock);
1007 	if (!t->t_disabled && !timeout_pending(&t->t_new_handshake))
1008 		timeout_add_msec(&t->t_new_handshake, msecs);
1009 	rw_exit_read(&t->t_lock);
1010 }
1011 
1012 void
1013 wg_timers_event_data_received(struct wg_timers *t)
1014 {
1015 	rw_enter_read(&t->t_lock);
1016 	if (!t->t_disabled) {
1017 		if (!timeout_pending(&t->t_send_keepalive))
1018 			timeout_add_sec(&t->t_send_keepalive,
1019 			    KEEPALIVE_TIMEOUT);
1020 		else
1021 			t->t_need_another_keepalive = 1;
1022 	}
1023 	rw_exit_read(&t->t_lock);
1024 }
1025 
1026 void
1027 wg_timers_event_any_authenticated_packet_sent(struct wg_timers *t)
1028 {
1029 	timeout_del(&t->t_send_keepalive);
1030 }
1031 
1032 void
1033 wg_timers_event_any_authenticated_packet_received(struct wg_timers *t)
1034 {
1035 	timeout_del(&t->t_new_handshake);
1036 }
1037 
1038 void
1039 wg_timers_event_any_authenticated_packet_traversal(struct wg_timers *t)
1040 {
1041 	rw_enter_read(&t->t_lock);
1042 	if (!t->t_disabled && t->t_persistent_keepalive_interval > 0)
1043 		timeout_add_sec(&t->t_persistent_keepalive,
1044 		    t->t_persistent_keepalive_interval);
1045 	rw_exit_read(&t->t_lock);
1046 }
1047 
1048 void
1049 wg_timers_event_handshake_initiated(struct wg_timers *t)
1050 {
1051 	int	msecs = REKEY_TIMEOUT * 1000;
1052 	msecs += arc4random_uniform(REKEY_TIMEOUT_JITTER);
1053 
1054 	rw_enter_read(&t->t_lock);
1055 	if (!t->t_disabled)
1056 		timeout_add_msec(&t->t_retry_handshake, msecs);
1057 	rw_exit_read(&t->t_lock);
1058 }
1059 
1060 void
1061 wg_timers_event_handshake_responded(struct wg_timers *t)
1062 {
1063 	mtx_enter(&t->t_handshake_mtx);
1064 	getnanouptime(&t->t_handshake_last_sent);
1065 	mtx_leave(&t->t_handshake_mtx);
1066 }
1067 
1068 void
1069 wg_timers_event_handshake_complete(struct wg_timers *t)
1070 {
1071 	rw_enter_read(&t->t_lock);
1072 	if (!t->t_disabled) {
1073 		mtx_enter(&t->t_handshake_mtx);
1074 		timeout_del(&t->t_retry_handshake);
1075 		t->t_handshake_retries = 0;
1076 		getnanotime(&t->t_handshake_complete);
1077 		mtx_leave(&t->t_handshake_mtx);
1078 		wg_timers_run_send_keepalive(t);
1079 	}
1080 	rw_exit_read(&t->t_lock);
1081 }
1082 
1083 void
1084 wg_timers_event_session_derived(struct wg_timers *t)
1085 {
1086 	rw_enter_read(&t->t_lock);
1087 	if (!t->t_disabled)
1088 		timeout_add_sec(&t->t_zero_key_material, REJECT_AFTER_TIME * 3);
1089 	rw_exit_read(&t->t_lock);
1090 }
1091 
1092 void
1093 wg_timers_event_want_initiation(struct wg_timers *t)
1094 {
1095 	rw_enter_read(&t->t_lock);
1096 	if (!t->t_disabled)
1097 		wg_timers_run_send_initiation(t, 0);
1098 	rw_exit_read(&t->t_lock);
1099 }
1100 
1101 void
1102 wg_timers_event_reset_handshake_last_sent(struct wg_timers *t)
1103 {
1104 	mtx_enter(&t->t_handshake_mtx);
1105 	t->t_handshake_last_sent.tv_sec -= (REKEY_TIMEOUT + 1);
1106 	mtx_leave(&t->t_handshake_mtx);
1107 }
1108 
1109 void
1110 wg_timers_run_send_initiation(void *_t, int is_retry)
1111 {
1112 	struct wg_timers *t = _t;
1113 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1114 	if (!is_retry)
1115 		t->t_handshake_retries = 0;
1116 	if (wg_timers_expired_handshake_last_sent(t) == ETIMEDOUT)
1117 		task_add(wg_handshake_taskq, &peer->p_send_initiation);
1118 }
1119 
1120 void
1121 wg_timers_run_retry_handshake(void *_t)
1122 {
1123 	struct wg_timers *t = _t;
1124 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1125 
1126 	mtx_enter(&t->t_handshake_mtx);
1127 	if (t->t_handshake_retries <= MAX_TIMER_HANDSHAKES) {
1128 		t->t_handshake_retries++;
1129 		mtx_leave(&t->t_handshake_mtx);
1130 
1131 		DPRINTF(peer->p_sc, "Handshake for peer %llu did not complete "
1132 		    "after %d seconds, retrying (try %d)\n", peer->p_id,
1133 		    REKEY_TIMEOUT, t->t_handshake_retries + 1);
1134 		wg_peer_clear_src(peer);
1135 		wg_timers_run_send_initiation(t, 1);
1136 	} else {
1137 		mtx_leave(&t->t_handshake_mtx);
1138 
1139 		DPRINTF(peer->p_sc, "Handshake for peer %llu did not complete "
1140 		    "after %d retries, giving up\n", peer->p_id,
1141 		    MAX_TIMER_HANDSHAKES + 2);
1142 
1143 		timeout_del(&t->t_send_keepalive);
1144 		mq_purge(&peer->p_stage_queue);
1145 		if (!timeout_pending(&t->t_zero_key_material))
1146 			timeout_add_sec(&t->t_zero_key_material,
1147 			    REJECT_AFTER_TIME * 3);
1148 	}
1149 }
1150 
1151 void
1152 wg_timers_run_send_keepalive(void *_t)
1153 {
1154 	struct wg_timers *t = _t;
1155 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1156 
1157 	task_add(wg_crypt_taskq, &peer->p_send_keepalive);
1158 	if (t->t_need_another_keepalive) {
1159 		t->t_need_another_keepalive = 0;
1160 		timeout_add_sec(&t->t_send_keepalive, KEEPALIVE_TIMEOUT);
1161 	}
1162 }
1163 
1164 void
1165 wg_timers_run_new_handshake(void *_t)
1166 {
1167 	struct wg_timers *t = _t;
1168 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1169 
1170 	DPRINTF(peer->p_sc, "Retrying handshake with peer %llu because we "
1171 	    "stopped hearing back after %d seconds\n",
1172 	    peer->p_id, NEW_HANDSHAKE_TIMEOUT);
1173 	wg_peer_clear_src(peer);
1174 
1175 	wg_timers_run_send_initiation(t, 0);
1176 }
1177 
1178 void
1179 wg_timers_run_zero_key_material(void *_t)
1180 {
1181 	struct wg_timers *t = _t;
1182 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1183 
1184 	DPRINTF(peer->p_sc, "Zeroing out keys for peer %llu\n", peer->p_id);
1185 	task_add(wg_handshake_taskq, &peer->p_clear_secrets);
1186 }
1187 
1188 void
1189 wg_timers_run_persistent_keepalive(void *_t)
1190 {
1191 	struct wg_timers *t = _t;
1192 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1193 	if (t->t_persistent_keepalive_interval != 0)
1194 		task_add(wg_crypt_taskq, &peer->p_send_keepalive);
1195 }
1196 
1197 /* The following functions handle handshakes */
1198 void
1199 wg_peer_send_buf(struct wg_peer *peer, uint8_t *buf, size_t len)
1200 {
1201 	struct wg_endpoint	 endpoint;
1202 
1203 	wg_peer_counters_add(peer, len, 0);
1204 	wg_timers_event_any_authenticated_packet_traversal(&peer->p_timers);
1205 	wg_timers_event_any_authenticated_packet_sent(&peer->p_timers);
1206 	wg_peer_get_endpoint(peer, &endpoint);
1207 	wg_send_buf(peer->p_sc, &endpoint, buf, len);
1208 }
1209 
1210 void
1211 wg_send_initiation(void *_peer)
1212 {
1213 	struct wg_peer			*peer = _peer;
1214 	struct wg_pkt_initiation	 pkt;
1215 
1216 	if (wg_timers_check_handshake_last_sent(&peer->p_timers) != ETIMEDOUT)
1217 		return;
1218 
1219 	DPRINTF(peer->p_sc, "Sending handshake initiation to peer %llu\n",
1220 	    peer->p_id);
1221 
1222 	if (noise_create_initiation(&peer->p_remote, &pkt.s_idx, pkt.ue, pkt.es,
1223 				    pkt.ets) != 0)
1224 		return;
1225 	pkt.t = WG_PKT_INITIATION;
1226 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1227 	    sizeof(pkt)-sizeof(pkt.m));
1228 	wg_peer_send_buf(peer, (uint8_t *)&pkt, sizeof(pkt));
1229 	wg_timers_event_handshake_initiated(&peer->p_timers);
1230 }
1231 
1232 void
1233 wg_send_response(struct wg_peer *peer)
1234 {
1235 	struct wg_pkt_response	 pkt;
1236 
1237 	DPRINTF(peer->p_sc, "Sending handshake response to peer %llu\n",
1238 	    peer->p_id);
1239 
1240 	if (noise_create_response(&peer->p_remote, &pkt.s_idx, &pkt.r_idx,
1241 				  pkt.ue, pkt.en) != 0)
1242 		return;
1243 	if (noise_remote_begin_session(&peer->p_remote) != 0)
1244 		return;
1245 	wg_timers_event_session_derived(&peer->p_timers);
1246 	pkt.t = WG_PKT_RESPONSE;
1247 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1248 	    sizeof(pkt)-sizeof(pkt.m));
1249 	wg_timers_event_handshake_responded(&peer->p_timers);
1250 	wg_peer_send_buf(peer, (uint8_t *)&pkt, sizeof(pkt));
1251 }
1252 
1253 void
1254 wg_send_cookie(struct wg_softc *sc, struct cookie_macs *cm, uint32_t idx,
1255     struct wg_endpoint *e)
1256 {
1257 	struct wg_pkt_cookie	pkt;
1258 
1259 	DPRINTF(sc, "Sending cookie response for denied handshake message\n");
1260 
1261 	pkt.t = WG_PKT_COOKIE;
1262 	pkt.r_idx = idx;
1263 
1264 	cookie_checker_create_payload(&sc->sc_cookie, cm, pkt.nonce,
1265 	    pkt.ec, &e->e_remote.r_sa);
1266 
1267 	wg_send_buf(sc, e, (uint8_t *)&pkt, sizeof(pkt));
1268 }
1269 
1270 void
1271 wg_send_keepalive(void *_peer)
1272 {
1273 	struct wg_peer	*peer = _peer;
1274 	struct wg_softc	*sc = peer->p_sc;
1275 	struct wg_tag	*t;
1276 	struct mbuf	*m;
1277 
1278 	if (!mq_empty(&peer->p_stage_queue))
1279 		goto send;
1280 
1281 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1282 		return;
1283 
1284 	if ((t = wg_tag_get(m)) == NULL) {
1285 		m_freem(m);
1286 		return;
1287 	}
1288 
1289 	m->m_len = 0;
1290 	m_calchdrlen(m);
1291 
1292 	t->t_peer = peer;
1293 	t->t_mbuf = NULL;
1294 	t->t_done = 0;
1295 	t->t_mtu = 0; /* MTU == 0 OK for keepalive */
1296 
1297 	mq_push(&peer->p_stage_queue, m);
1298 send:
1299 	if (noise_remote_ready(&peer->p_remote) == 0) {
1300 		wg_queue_out(sc, peer);
1301 		task_add(wg_crypt_taskq, &sc->sc_encap);
1302 	} else {
1303 		wg_timers_event_want_initiation(&peer->p_timers);
1304 	}
1305 }
1306 
1307 void
1308 wg_peer_clear_secrets(void *_peer)
1309 {
1310 	struct wg_peer *peer = _peer;
1311 	noise_remote_clear(&peer->p_remote);
1312 }
1313 
1314 void
1315 wg_handshake(struct wg_softc *sc, struct mbuf *m)
1316 {
1317 	struct wg_tag			*t;
1318 	struct wg_pkt_initiation	*init;
1319 	struct wg_pkt_response		*resp;
1320 	struct wg_pkt_cookie		*cook;
1321 	struct wg_peer			*peer;
1322 	struct noise_remote		*remote;
1323 	int				 res, underload = 0;
1324 	static struct timeval		 wg_last_underload; /* microuptime */
1325 
1326 	if (mq_len(&sc->sc_handshake_queue) >= MAX_QUEUED_HANDSHAKES/8) {
1327 		getmicrouptime(&wg_last_underload);
1328 		underload = 1;
1329 	} else if (wg_last_underload.tv_sec != 0) {
1330 		if (!ratecheck(&wg_last_underload, &underload_interval))
1331 			underload = 1;
1332 		else
1333 			bzero(&wg_last_underload, sizeof(wg_last_underload));
1334 	}
1335 
1336 	t = wg_tag_get(m);
1337 
1338 	switch (*mtod(m, uint32_t *)) {
1339 	case WG_PKT_INITIATION:
1340 		init = mtod(m, struct wg_pkt_initiation *);
1341 
1342 		res = cookie_checker_validate_macs(&sc->sc_cookie, &init->m,
1343 				init, sizeof(*init) - sizeof(init->m),
1344 				underload, &t->t_endpoint.e_remote.r_sa);
1345 
1346 		if (res == EINVAL) {
1347 			DPRINTF(sc, "Invalid initiation MAC\n");
1348 			goto error;
1349 		} else if (res == ECONNREFUSED) {
1350 			DPRINTF(sc, "Handshake ratelimited\n");
1351 			goto error;
1352 		} else if (res == EAGAIN) {
1353 			wg_send_cookie(sc, &init->m, init->s_idx,
1354 			    &t->t_endpoint);
1355 			goto error;
1356 		} else if (res != 0) {
1357 			panic("unexpected response: %d", res);
1358 		}
1359 
1360 		if (noise_consume_initiation(&sc->sc_local, &remote,
1361 		    init->s_idx, init->ue, init->es, init->ets) != 0) {
1362 			DPRINTF(sc, "Invalid handshake initiation\n");
1363 			goto error;
1364 		}
1365 
1366 		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1367 
1368 		DPRINTF(sc, "Receiving handshake initiation from peer %llu\n",
1369 		    peer->p_id);
1370 
1371 		wg_peer_counters_add(peer, 0, sizeof(*init));
1372 		wg_peer_set_endpoint_from_tag(peer, t);
1373 		wg_send_response(peer);
1374 		break;
1375 	case WG_PKT_RESPONSE:
1376 		resp = mtod(m, struct wg_pkt_response *);
1377 
1378 		res = cookie_checker_validate_macs(&sc->sc_cookie, &resp->m,
1379 				resp, sizeof(*resp) - sizeof(resp->m),
1380 				underload, &t->t_endpoint.e_remote.r_sa);
1381 
1382 		if (res == EINVAL) {
1383 			DPRINTF(sc, "Invalid response MAC\n");
1384 			goto error;
1385 		} else if (res == ECONNREFUSED) {
1386 			DPRINTF(sc, "Handshake ratelimited\n");
1387 			goto error;
1388 		} else if (res == EAGAIN) {
1389 			wg_send_cookie(sc, &resp->m, resp->s_idx,
1390 			    &t->t_endpoint);
1391 			goto error;
1392 		} else if (res != 0) {
1393 			panic("unexpected response: %d", res);
1394 		}
1395 
1396 		if ((remote = wg_index_get(sc, resp->r_idx)) == NULL) {
1397 			DPRINTF(sc, "Unknown handshake response\n");
1398 			goto error;
1399 		}
1400 
1401 		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1402 
1403 		if (noise_consume_response(remote, resp->s_idx, resp->r_idx,
1404 					   resp->ue, resp->en) != 0) {
1405 			DPRINTF(sc, "Invalid handshake response\n");
1406 			goto error;
1407 		}
1408 
1409 		DPRINTF(sc, "Receiving handshake response from peer %llu\n",
1410 				peer->p_id);
1411 
1412 		wg_peer_counters_add(peer, 0, sizeof(*resp));
1413 		wg_peer_set_endpoint_from_tag(peer, t);
1414 		if (noise_remote_begin_session(&peer->p_remote) == 0) {
1415 			wg_timers_event_session_derived(&peer->p_timers);
1416 			wg_timers_event_handshake_complete(&peer->p_timers);
1417 		}
1418 		break;
1419 	case WG_PKT_COOKIE:
1420 		cook = mtod(m, struct wg_pkt_cookie *);
1421 
1422 		if ((remote = wg_index_get(sc, cook->r_idx)) == NULL) {
1423 			DPRINTF(sc, "Unknown cookie index\n");
1424 			goto error;
1425 		}
1426 
1427 		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1428 
1429 		if (cookie_maker_consume_payload(&peer->p_cookie,
1430 		    cook->nonce, cook->ec) != 0) {
1431 			DPRINTF(sc, "Could not decrypt cookie response\n");
1432 			goto error;
1433 		}
1434 
1435 		DPRINTF(sc, "Receiving cookie response\n");
1436 		goto error;
1437 	default:
1438 		panic("invalid packet in handshake queue");
1439 	}
1440 
1441 	wg_timers_event_any_authenticated_packet_received(&peer->p_timers);
1442 	wg_timers_event_any_authenticated_packet_traversal(&peer->p_timers);
1443 error:
1444 	m_freem(m);
1445 }
1446 
1447 void
1448 wg_handshake_worker(void *_sc)
1449 {
1450 	struct mbuf *m;
1451 	struct wg_softc *sc = _sc;
1452 	while ((m = mq_dequeue(&sc->sc_handshake_queue)) != NULL)
1453 		wg_handshake(sc, m);
1454 }
1455 
1456 /*
1457  * The following functions handle encapsulation (encryption) and
1458  * decapsulation (decryption). The wg_{en,de}cap functions will run in the
1459  * sc_crypt_taskq, while wg_deliver_{in,out} must be serialised and will run
1460  * in nettq.
1461  *
1462  * The packets are tracked in two queues, a serial queue and a parallel queue.
1463  *  - The parallel queue is used to distribute the encryption across multiple
1464  *    threads.
1465  *  - The serial queue ensures that packets are not reordered and are
1466  *    delievered in sequence.
1467  * The wg_tag attached to the packet contains two flags to help the two queues
1468  * interact.
1469  *  - t_done: The parallel queue has finished with the packet, now the serial
1470  *            queue can do it's work.
1471  *  - t_mbuf: Used to store the *crypted packet. in the case of encryption,
1472  *            this is a newly allocated packet, and in the case of decryption,
1473  *            it is a pointer to the same packet, that has been decrypted and
1474  *            truncated. If t_mbuf is NULL, then *cryption failed and this
1475  *            packet should not be passed.
1476  * wg_{en,de}cap work on the parallel queue, while wg_deliver_{in,out} work
1477  * on the serial queue.
1478  */
1479 void
1480 wg_encap(struct wg_softc *sc, struct mbuf *m)
1481 {
1482 	int res = 0;
1483 	struct wg_pkt_data	*data;
1484 	struct wg_peer		*peer;
1485 	struct wg_tag		*t;
1486 	struct mbuf		*mc;
1487 	size_t			 padding_len, plaintext_len, out_len;
1488 	uint64_t		 nonce;
1489 
1490 	t = wg_tag_get(m);
1491 	peer = t->t_peer;
1492 
1493 	plaintext_len = min(WG_PKT_WITH_PADDING(m->m_pkthdr.len), t->t_mtu);
1494 	padding_len = plaintext_len - m->m_pkthdr.len;
1495 	out_len = sizeof(struct wg_pkt_data) + plaintext_len + NOISE_AUTHTAG_LEN;
1496 
1497 	/*
1498 	 * For the time being we allocate a new packet with sufficient size to
1499 	 * hold the encrypted data and headers. It would be difficult to
1500 	 * overcome as p_encap_queue (mbuf_list) holds a reference to the mbuf.
1501 	 * If we m_makespace or similar, we risk corrupting that list.
1502 	 * Additionally, we only pass a buf and buf length to
1503 	 * noise_remote_encrypt. Technically it would be possible to teach
1504 	 * noise_remote_encrypt about mbufs, but we would need to sort out the
1505 	 * p_encap_queue situation first.
1506 	 */
1507 	if ((mc = m_clget(NULL, M_NOWAIT, out_len)) == NULL)
1508 		goto error;
1509 
1510 	data = mtod(mc, struct wg_pkt_data *);
1511 	m_copydata(m, 0, m->m_pkthdr.len, data->buf);
1512 	bzero(data->buf + m->m_pkthdr.len, padding_len);
1513 	data->t = WG_PKT_DATA;
1514 
1515 	/*
1516 	 * Copy the flow hash from the inner packet to the outer packet, so
1517 	 * that fq_codel can property separate streams, rather than falling
1518 	 * back to random buckets.
1519 	 */
1520 	mc->m_pkthdr.ph_flowid = m->m_pkthdr.ph_flowid;
1521 
1522 	res = noise_remote_encrypt(&peer->p_remote, &data->r_idx, &nonce,
1523 				   data->buf, plaintext_len);
1524 	nonce = htole64(nonce); /* Wire format is little endian. */
1525 	memcpy(data->nonce, &nonce, sizeof(data->nonce));
1526 
1527 	if (__predict_false(res == EINVAL)) {
1528 		m_freem(mc);
1529 		goto error;
1530 	} else if (__predict_false(res == ESTALE)) {
1531 		wg_timers_event_want_initiation(&peer->p_timers);
1532 	} else if (__predict_false(res != 0)) {
1533 		panic("unexpected result: %d", res);
1534 	}
1535 
1536 	/* A packet with length 0 is a keepalive packet */
1537 	if (__predict_false(m->m_pkthdr.len == 0))
1538 		DPRINTF(sc, "Sending keepalive packet to peer %llu\n",
1539 		    peer->p_id);
1540 
1541 	mc->m_pkthdr.ph_loopcnt = m->m_pkthdr.ph_loopcnt;
1542 	mc->m_flags &= ~(M_MCAST | M_BCAST);
1543 	mc->m_len = out_len;
1544 	m_calchdrlen(mc);
1545 
1546 	/*
1547 	 * We would count ifc_opackets, ifc_obytes of m here, except if_snd
1548 	 * already does that for us, so no need to worry about it.
1549 	counters_pkt(sc->sc_if.if_counters, ifc_opackets, ifc_obytes,
1550 	    m->m_pkthdr.len);
1551 	 */
1552 	wg_peer_counters_add(peer, mc->m_pkthdr.len, 0);
1553 
1554 	t->t_mbuf = mc;
1555 error:
1556 	t->t_done = 1;
1557 	task_add(net_tq(sc->sc_if.if_index), &peer->p_deliver_out);
1558 }
1559 
1560 void
1561 wg_decap(struct wg_softc *sc, struct mbuf *m)
1562 {
1563 	int			 res, len;
1564 	struct ip		*ip;
1565 	struct ip6_hdr		*ip6;
1566 	struct wg_pkt_data	*data;
1567 	struct wg_peer		*peer, *allowed_peer;
1568 	struct wg_tag		*t;
1569 	size_t			 payload_len;
1570 	uint64_t		 nonce;
1571 
1572 	t = wg_tag_get(m);
1573 	peer = t->t_peer;
1574 
1575 	/*
1576 	 * Likewise to wg_encap, we pass a buf and buf length to
1577 	 * noise_remote_decrypt. Again, possible to teach it about mbufs
1578 	 * but need to get over the p_decap_queue situation first. However,
1579 	 * we do not need to allocate a new mbuf as the decrypted packet is
1580 	 * strictly smaller than encrypted. We just set t_mbuf to m and
1581 	 * wg_deliver_in knows how to deal with that.
1582 	 */
1583 	data = mtod(m, struct wg_pkt_data *);
1584 	payload_len = m->m_pkthdr.len - sizeof(struct wg_pkt_data);
1585 	memcpy(&nonce, data->nonce, sizeof(nonce));
1586 	nonce = le64toh(nonce); /* Wire format is little endian. */
1587 	res = noise_remote_decrypt(&peer->p_remote, data->r_idx, nonce,
1588 				   data->buf, payload_len);
1589 
1590 	if (__predict_false(res == EINVAL)) {
1591 		goto error;
1592 	} else if (__predict_false(res == ECONNRESET)) {
1593 		wg_timers_event_handshake_complete(&peer->p_timers);
1594 	} else if (__predict_false(res == ESTALE)) {
1595 		wg_timers_event_want_initiation(&peer->p_timers);
1596 	} else if (__predict_false(res != 0)) {
1597 		panic("unexpected response: %d", res);
1598 	}
1599 
1600 	wg_peer_set_endpoint_from_tag(peer, t);
1601 
1602 	wg_peer_counters_add(peer, 0, m->m_pkthdr.len);
1603 
1604 	m_adj(m, sizeof(struct wg_pkt_data));
1605 	m_adj(m, -NOISE_AUTHTAG_LEN);
1606 
1607 	counters_pkt(sc->sc_if.if_counters, ifc_ipackets, ifc_ibytes,
1608 	    m->m_pkthdr.len);
1609 
1610 	/* A packet with length 0 is a keepalive packet */
1611 	if (__predict_false(m->m_pkthdr.len == 0)) {
1612 		DPRINTF(sc, "Receiving keepalive packet from peer "
1613 		    "%llu\n", peer->p_id);
1614 		goto done;
1615 	}
1616 
1617 	/*
1618 	 * We can let the network stack handle the intricate validation of the
1619 	 * IP header, we just worry about the sizeof and the version, so we can
1620 	 * read the source address in wg_aip_lookup.
1621 	 *
1622 	 * We also need to trim the packet, as it was likely padded before
1623 	 * encryption. While we could drop it here, it will be more helpful to
1624 	 * pass it to bpf_mtap and use the counters that people are expecting
1625 	 * in ipv4_input and ipv6_input. We can rely on ipv4_input and
1626 	 * ipv6_input to properly validate the headers.
1627 	 */
1628 	ip = mtod(m, struct ip *);
1629 	ip6 = mtod(m, struct ip6_hdr *);
1630 
1631 	if (m->m_pkthdr.len >= sizeof(struct ip) && ip->ip_v == IPVERSION) {
1632 		m->m_pkthdr.ph_family = AF_INET;
1633 
1634 		len = ntohs(ip->ip_len);
1635 		if (len >= sizeof(struct ip) && len < m->m_pkthdr.len)
1636 			m_adj(m, len - m->m_pkthdr.len);
1637 
1638 		allowed_peer = wg_aip_lookup(sc->sc_aip4, &ip->ip_src);
1639 #ifdef INET6
1640 	} else if (m->m_pkthdr.len >= sizeof(struct ip6_hdr) &&
1641 	    (ip6->ip6_vfc & IPV6_VERSION_MASK) == IPV6_VERSION) {
1642 		m->m_pkthdr.ph_family = AF_INET6;
1643 
1644 		len = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr);
1645 		if (len < m->m_pkthdr.len)
1646 			m_adj(m, len - m->m_pkthdr.len);
1647 
1648 		allowed_peer = wg_aip_lookup(sc->sc_aip6, &ip6->ip6_src);
1649 #endif
1650 	} else {
1651 		DPRINTF(sc, "Packet is neither ipv4 nor ipv6 from "
1652 		    "peer %llu\n", peer->p_id);
1653 		goto error;
1654 	}
1655 
1656 	if (__predict_false(peer != allowed_peer)) {
1657 		DPRINTF(sc, "Packet has unallowed src IP from peer "
1658 		    "%llu\n", peer->p_id);
1659 		goto error;
1660 	}
1661 
1662 	/* tunneled packet was not offloaded */
1663 	m->m_pkthdr.csum_flags = 0;
1664 
1665 	m->m_pkthdr.ph_ifidx = sc->sc_if.if_index;
1666 	m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1667 	m->m_flags &= ~(M_MCAST | M_BCAST);
1668 #if NPF > 0
1669 	pf_pkt_addr_changed(m);
1670 #endif /* NPF > 0 */
1671 
1672 done:
1673 	t->t_mbuf = m;
1674 error:
1675 	t->t_done = 1;
1676 	task_add(net_tq(sc->sc_if.if_index), &peer->p_deliver_in);
1677 }
1678 
1679 void
1680 wg_encap_worker(void *_sc)
1681 {
1682 	struct mbuf *m;
1683 	struct wg_softc *sc = _sc;
1684 	while ((m = wg_ring_dequeue(&sc->sc_encap_ring)) != NULL)
1685 		wg_encap(sc, m);
1686 }
1687 
1688 void
1689 wg_decap_worker(void *_sc)
1690 {
1691 	struct mbuf *m;
1692 	struct wg_softc *sc = _sc;
1693 	while ((m = wg_ring_dequeue(&sc->sc_decap_ring)) != NULL)
1694 		wg_decap(sc, m);
1695 }
1696 
1697 void
1698 wg_deliver_out(void *_peer)
1699 {
1700 	struct wg_peer		*peer = _peer;
1701 	struct wg_softc		*sc = peer->p_sc;
1702 	struct wg_endpoint	 endpoint;
1703 	struct wg_tag		*t;
1704 	struct mbuf		*m;
1705 	int			 ret;
1706 
1707 	wg_peer_get_endpoint(peer, &endpoint);
1708 
1709 	while ((m = wg_queue_dequeue(&peer->p_encap_queue, &t)) != NULL) {
1710 		/* t_mbuf will contain the encrypted packet */
1711 		if (t->t_mbuf == NULL){
1712 			counters_inc(sc->sc_if.if_counters, ifc_oerrors);
1713 			m_freem(m);
1714 			continue;
1715 		}
1716 
1717 		ret = wg_send(sc, &endpoint, t->t_mbuf);
1718 
1719 		if (ret == 0) {
1720 			wg_timers_event_any_authenticated_packet_traversal(
1721 			    &peer->p_timers);
1722 			wg_timers_event_any_authenticated_packet_sent(
1723 			    &peer->p_timers);
1724 
1725 			if (m->m_pkthdr.len != 0)
1726 				wg_timers_event_data_sent(&peer->p_timers);
1727 		} else if (ret == EADDRNOTAVAIL) {
1728 			wg_peer_clear_src(peer);
1729 			wg_peer_get_endpoint(peer, &endpoint);
1730 		}
1731 
1732 		m_freem(m);
1733 	}
1734 }
1735 
1736 void
1737 wg_deliver_in(void *_peer)
1738 {
1739 	struct wg_peer	*peer = _peer;
1740 	struct wg_softc	*sc = peer->p_sc;
1741 	struct wg_tag	*t;
1742 	struct mbuf	*m;
1743 
1744 	while ((m = wg_queue_dequeue(&peer->p_decap_queue, &t)) != NULL) {
1745 		/* t_mbuf will contain the decrypted packet */
1746 		if (t->t_mbuf == NULL) {
1747 			counters_inc(sc->sc_if.if_counters, ifc_ierrors);
1748 			m_freem(m);
1749 			continue;
1750 		}
1751 
1752 		/* From here on m == t->t_mbuf */
1753 		KASSERT(m == t->t_mbuf);
1754 
1755 		wg_timers_event_any_authenticated_packet_received(
1756 		    &peer->p_timers);
1757 		wg_timers_event_any_authenticated_packet_traversal(
1758 		    &peer->p_timers);
1759 
1760 		if (m->m_pkthdr.len == 0) {
1761 			m_freem(m);
1762 			continue;
1763 		}
1764 
1765 #if NBPFILTER > 0
1766 		if (sc->sc_if.if_bpf != NULL)
1767 			bpf_mtap_af(sc->sc_if.if_bpf,
1768 			    m->m_pkthdr.ph_family, m, BPF_DIRECTION_IN);
1769 #endif
1770 
1771 		NET_LOCK();
1772 		if (m->m_pkthdr.ph_family == AF_INET)
1773 			ipv4_input(&sc->sc_if, m);
1774 #ifdef INET6
1775 		else if (m->m_pkthdr.ph_family == AF_INET6)
1776 			ipv6_input(&sc->sc_if, m);
1777 #endif
1778 		else
1779 			panic("invalid ph_family");
1780 		NET_UNLOCK();
1781 
1782 		wg_timers_event_data_received(&peer->p_timers);
1783 	}
1784 }
1785 
1786 int
1787 wg_queue_in(struct wg_softc *sc, struct wg_peer *peer, struct mbuf *m)
1788 {
1789 	struct wg_ring		*parallel = &sc->sc_decap_ring;
1790 	struct wg_queue		*serial = &peer->p_decap_queue;
1791 	struct wg_tag		*t;
1792 
1793 	mtx_enter(&serial->q_mtx);
1794 	if (serial->q_list.ml_len < MAX_QUEUED_PKT) {
1795 		ml_enqueue(&serial->q_list, m);
1796 		mtx_leave(&serial->q_mtx);
1797 	} else {
1798 		mtx_leave(&serial->q_mtx);
1799 		m_freem(m);
1800 		return ENOBUFS;
1801 	}
1802 
1803 	mtx_enter(&parallel->r_mtx);
1804 	if (parallel->r_tail - parallel->r_head < MAX_QUEUED_PKT) {
1805 		parallel->r_buf[parallel->r_tail & MAX_QUEUED_PKT_MASK] = m;
1806 		parallel->r_tail++;
1807 		mtx_leave(&parallel->r_mtx);
1808 	} else {
1809 		mtx_leave(&parallel->r_mtx);
1810 		t = wg_tag_get(m);
1811 		t->t_done = 1;
1812 		return ENOBUFS;
1813 	}
1814 
1815 	return 0;
1816 }
1817 
1818 void
1819 wg_queue_out(struct wg_softc *sc, struct wg_peer *peer)
1820 {
1821 	struct wg_ring		*parallel = &sc->sc_encap_ring;
1822 	struct wg_queue		*serial = &peer->p_encap_queue;
1823 	struct mbuf_list 	 ml, ml_free;
1824 	struct mbuf		*m;
1825 	struct wg_tag		*t;
1826 	int			 dropped;
1827 
1828 	/*
1829 	 * We delist all staged packets and then add them to the queues. This
1830 	 * can race with wg_qstart when called from wg_send_keepalive, however
1831 	 * wg_qstart will not race as it is serialised.
1832 	 */
1833 	mq_delist(&peer->p_stage_queue, &ml);
1834 	ml_init(&ml_free);
1835 
1836 	while ((m = ml_dequeue(&ml)) != NULL) {
1837 		mtx_enter(&serial->q_mtx);
1838 		if (serial->q_list.ml_len < MAX_QUEUED_PKT) {
1839 			ml_enqueue(&serial->q_list, m);
1840 			mtx_leave(&serial->q_mtx);
1841 		} else {
1842 			mtx_leave(&serial->q_mtx);
1843 			ml_enqueue(&ml_free, m);
1844 			continue;
1845 		}
1846 
1847 		mtx_enter(&parallel->r_mtx);
1848 		if (parallel->r_tail - parallel->r_head < MAX_QUEUED_PKT) {
1849 			parallel->r_buf[parallel->r_tail & MAX_QUEUED_PKT_MASK] = m;
1850 			parallel->r_tail++;
1851 			mtx_leave(&parallel->r_mtx);
1852 		} else {
1853 			mtx_leave(&parallel->r_mtx);
1854 			t = wg_tag_get(m);
1855 			t->t_done = 1;
1856 		}
1857 	}
1858 
1859 	if ((dropped = ml_purge(&ml_free)) > 0)
1860 		counters_add(sc->sc_if.if_counters, ifc_oqdrops, dropped);
1861 }
1862 
1863 struct mbuf *
1864 wg_ring_dequeue(struct wg_ring *r)
1865 {
1866 	struct mbuf *m = NULL;
1867 	mtx_enter(&r->r_mtx);
1868 	if (r->r_head != r->r_tail) {
1869 		m = r->r_buf[r->r_head & MAX_QUEUED_PKT_MASK];
1870 		r->r_head++;
1871 	}
1872 	mtx_leave(&r->r_mtx);
1873 	return m;
1874 }
1875 
1876 struct mbuf *
1877 wg_queue_dequeue(struct wg_queue *q, struct wg_tag **t)
1878 {
1879 	struct mbuf *m;
1880 	mtx_enter(&q->q_mtx);
1881 	if ((m = q->q_list.ml_head) != NULL && (*t = wg_tag_get(m))->t_done)
1882 		ml_dequeue(&q->q_list);
1883 	else
1884 		m = NULL;
1885 	mtx_leave(&q->q_mtx);
1886 	return m;
1887 }
1888 
1889 size_t
1890 wg_queue_len(struct wg_queue *q)
1891 {
1892 	size_t len;
1893 	mtx_enter(&q->q_mtx);
1894 	len = q->q_list.ml_len;
1895 	mtx_leave(&q->q_mtx);
1896 	return len;
1897 }
1898 
1899 struct noise_remote *
1900 wg_remote_get(void *_sc, uint8_t public[NOISE_PUBLIC_KEY_LEN])
1901 {
1902 	struct wg_peer	*peer;
1903 	struct wg_softc	*sc = _sc;
1904 	if ((peer = wg_peer_lookup(sc, public)) == NULL)
1905 		return NULL;
1906 	return &peer->p_remote;
1907 }
1908 
1909 uint32_t
1910 wg_index_set(void *_sc, struct noise_remote *remote)
1911 {
1912 	struct wg_peer	*peer;
1913 	struct wg_softc	*sc = _sc;
1914 	struct wg_index *index, *iter;
1915 	uint32_t	 key;
1916 
1917 	/*
1918 	 * We can modify this without a lock as wg_index_set, wg_index_drop are
1919 	 * guaranteed to be serialised (per remote).
1920 	 */
1921 	peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1922 	index = SLIST_FIRST(&peer->p_unused_index);
1923 	KASSERT(index != NULL);
1924 	SLIST_REMOVE_HEAD(&peer->p_unused_index, i_unused_entry);
1925 
1926 	index->i_value = remote;
1927 
1928 	mtx_enter(&sc->sc_index_mtx);
1929 assign_id:
1930 	key = index->i_key = arc4random();
1931 	key &= sc->sc_index_mask;
1932 	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1933 		if (iter->i_key == index->i_key)
1934 			goto assign_id;
1935 
1936 	LIST_INSERT_HEAD(&sc->sc_index[key], index, i_entry);
1937 
1938 	mtx_leave(&sc->sc_index_mtx);
1939 
1940 	/* Likewise, no need to lock for index here. */
1941 	return index->i_key;
1942 }
1943 
1944 struct noise_remote *
1945 wg_index_get(void *_sc, uint32_t key0)
1946 {
1947 	struct wg_softc		*sc = _sc;
1948 	struct wg_index		*iter;
1949 	struct noise_remote	*remote = NULL;
1950 	uint32_t		 key = key0 & sc->sc_index_mask;
1951 
1952 	mtx_enter(&sc->sc_index_mtx);
1953 	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1954 		if (iter->i_key == key0) {
1955 			remote = iter->i_value;
1956 			break;
1957 		}
1958 	mtx_leave(&sc->sc_index_mtx);
1959 	return remote;
1960 }
1961 
1962 void
1963 wg_index_drop(void *_sc, uint32_t key0)
1964 {
1965 	struct wg_softc	*sc = _sc;
1966 	struct wg_index	*iter;
1967 	struct wg_peer	*peer = NULL;
1968 	uint32_t	 key = key0 & sc->sc_index_mask;
1969 
1970 	mtx_enter(&sc->sc_index_mtx);
1971 	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1972 		if (iter->i_key == key0) {
1973 			LIST_REMOVE(iter, i_entry);
1974 			break;
1975 		}
1976 	mtx_leave(&sc->sc_index_mtx);
1977 
1978 	/* We expect a peer */
1979 	peer = CONTAINER_OF(iter->i_value, struct wg_peer, p_remote);
1980 	KASSERT(peer != NULL);
1981 	SLIST_INSERT_HEAD(&peer->p_unused_index, iter, i_unused_entry);
1982 }
1983 
1984 struct mbuf *
1985 wg_input(void *_sc, struct mbuf *m, struct ip *ip, struct ip6_hdr *ip6,
1986     void *_uh, int hlen)
1987 {
1988 	struct wg_pkt_data	*data;
1989 	struct noise_remote	*remote;
1990 	struct wg_tag		*t;
1991 	struct wg_softc		*sc = _sc;
1992 	struct udphdr		*uh = _uh;
1993 
1994 	NET_ASSERT_LOCKED();
1995 
1996 	if ((t = wg_tag_get(m)) == NULL) {
1997 		m_freem(m);
1998 		return NULL;
1999 	}
2000 
2001 	if (ip != NULL) {
2002 		t->t_endpoint.e_remote.r_sa.sa_len = sizeof(struct sockaddr_in);
2003 		t->t_endpoint.e_remote.r_sa.sa_family = AF_INET;
2004 		t->t_endpoint.e_remote.r_sin.sin_port = uh->uh_sport;
2005 		t->t_endpoint.e_remote.r_sin.sin_addr = ip->ip_src;
2006 		t->t_endpoint.e_local.l_in = ip->ip_dst;
2007 #ifdef INET6
2008 	} else if (ip6 != NULL) {
2009 		t->t_endpoint.e_remote.r_sa.sa_len = sizeof(struct sockaddr_in6);
2010 		t->t_endpoint.e_remote.r_sa.sa_family = AF_INET6;
2011 		t->t_endpoint.e_remote.r_sin6.sin6_port = uh->uh_sport;
2012 		t->t_endpoint.e_remote.r_sin6.sin6_addr = ip6->ip6_src;
2013 		t->t_endpoint.e_local.l_in6 = ip6->ip6_dst;
2014 #endif
2015 	} else {
2016 		m_freem(m);
2017 		return NULL;
2018 	}
2019 
2020 	/* m has a IP/IPv6 header of hlen length, we don't need it anymore. */
2021 	m_adj(m, hlen);
2022 
2023 	/*
2024 	 * Ensure mbuf is contiguous over full length of packet. This is done
2025 	 * so we can directly read the handshake values in wg_handshake, and so
2026 	 * we can decrypt a transport packet by passing a single buffer to
2027 	 * noise_remote_decrypt in wg_decap.
2028 	 */
2029 	if ((m = m_pullup(m, m->m_pkthdr.len)) == NULL)
2030 		return NULL;
2031 
2032 	if ((m->m_pkthdr.len == sizeof(struct wg_pkt_initiation) &&
2033 		*mtod(m, uint32_t *) == WG_PKT_INITIATION) ||
2034 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_response) &&
2035 		*mtod(m, uint32_t *) == WG_PKT_RESPONSE) ||
2036 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_cookie) &&
2037 		*mtod(m, uint32_t *) == WG_PKT_COOKIE)) {
2038 
2039 		if (mq_enqueue(&sc->sc_handshake_queue, m) != 0)
2040 			DPRINTF(sc, "Dropping handshake packet\n");
2041 		task_add(wg_handshake_taskq, &sc->sc_handshake);
2042 
2043 	} else if (m->m_pkthdr.len >= sizeof(struct wg_pkt_data) +
2044 	    NOISE_AUTHTAG_LEN && *mtod(m, uint32_t *) == WG_PKT_DATA) {
2045 
2046 		data = mtod(m, struct wg_pkt_data *);
2047 
2048 		if ((remote = wg_index_get(sc, data->r_idx)) != NULL) {
2049 			t->t_peer = CONTAINER_OF(remote, struct wg_peer,
2050 			    p_remote);
2051 			t->t_mbuf = NULL;
2052 			t->t_done = 0;
2053 
2054 			if (wg_queue_in(sc, t->t_peer, m) != 0)
2055 				counters_inc(sc->sc_if.if_counters,
2056 				    ifc_iqdrops);
2057 			task_add(wg_crypt_taskq, &sc->sc_decap);
2058 		} else {
2059 			counters_inc(sc->sc_if.if_counters, ifc_ierrors);
2060 			m_freem(m);
2061 		}
2062 	} else {
2063 		counters_inc(sc->sc_if.if_counters, ifc_ierrors);
2064 		m_freem(m);
2065 	}
2066 
2067 	return NULL;
2068 }
2069 
2070 void
2071 wg_qstart(struct ifqueue *ifq)
2072 {
2073 	struct ifnet		*ifp = ifq->ifq_if;
2074 	struct wg_softc		*sc = ifp->if_softc;
2075 	struct wg_peer		*peer;
2076 	struct wg_tag		*t;
2077 	struct mbuf		*m;
2078 	SLIST_HEAD(,wg_peer)	 start_list;
2079 
2080 	SLIST_INIT(&start_list);
2081 
2082 	/*
2083 	 * We should be OK to modify p_start_list, p_start_onlist in this
2084 	 * function as there should only be one ifp->if_qstart invoked at a
2085 	 * time.
2086 	 */
2087 	while ((m = ifq_dequeue(ifq)) != NULL) {
2088 		t = wg_tag_get(m);
2089 		peer = t->t_peer;
2090 		if (mq_push(&peer->p_stage_queue, m) != 0)
2091 			counters_inc(ifp->if_counters, ifc_oqdrops);
2092 		if (!peer->p_start_onlist) {
2093 			SLIST_INSERT_HEAD(&start_list, peer, p_start_list);
2094 			peer->p_start_onlist = 1;
2095 		}
2096 	}
2097 	SLIST_FOREACH(peer, &start_list, p_start_list) {
2098 		if (noise_remote_ready(&peer->p_remote) == 0)
2099 			wg_queue_out(sc, peer);
2100 		else
2101 			wg_timers_event_want_initiation(&peer->p_timers);
2102 		peer->p_start_onlist = 0;
2103 	}
2104 	task_add(wg_crypt_taskq, &sc->sc_encap);
2105 }
2106 
2107 int
2108 wg_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2109     struct rtentry *rt)
2110 {
2111 	struct wg_softc	*sc = ifp->if_softc;
2112 	struct wg_peer	*peer;
2113 	struct wg_tag	*t;
2114 	int		 af, ret = EINVAL;
2115 
2116 	NET_ASSERT_LOCKED();
2117 
2118 	if ((t = wg_tag_get(m)) == NULL) {
2119 		ret = ENOBUFS;
2120 		goto error;
2121 	}
2122 
2123 	m->m_pkthdr.ph_family = sa->sa_family;
2124 	if (sa->sa_family == AF_INET) {
2125 		peer = wg_aip_lookup(sc->sc_aip4,
2126 		    &mtod(m, struct ip *)->ip_dst);
2127 #ifdef INET6
2128 	} else if (sa->sa_family == AF_INET6) {
2129 		peer = wg_aip_lookup(sc->sc_aip6,
2130 		    &mtod(m, struct ip6_hdr *)->ip6_dst);
2131 #endif
2132 	} else {
2133 		ret = EAFNOSUPPORT;
2134 		goto error;
2135 	}
2136 
2137 #if NBPFILTER > 0
2138 	if (sc->sc_if.if_bpf)
2139 		bpf_mtap_af(sc->sc_if.if_bpf, sa->sa_family, m,
2140 		    BPF_DIRECTION_OUT);
2141 #endif
2142 
2143 	if (peer == NULL) {
2144 		ret = ENETUNREACH;
2145 		goto error;
2146 	}
2147 
2148 	af = peer->p_endpoint.e_remote.r_sa.sa_family;
2149 	if (af != AF_INET && af != AF_INET6) {
2150 		DPRINTF(sc, "No valid endpoint has been configured or "
2151 				"discovered for peer %llu\n", peer->p_id);
2152 		ret = EDESTADDRREQ;
2153 		goto error;
2154 	}
2155 
2156 	if (m->m_pkthdr.ph_loopcnt++ > M_MAXLOOP) {
2157 		DPRINTF(sc, "Packet looped\n");
2158 		ret = ELOOP;
2159 		goto error;
2160 	}
2161 
2162 	/*
2163 	 * As we hold a reference to peer in the mbuf, we can't handle a
2164 	 * delayed packet without doing some refcnting. If a peer is removed
2165 	 * while a delayed holds a reference, bad things will happen. For the
2166 	 * time being, delayed packets are unsupported. This may be fixed with
2167 	 * another aip_lookup in wg_qstart, or refcnting as mentioned before.
2168 	 */
2169 	if (m->m_pkthdr.pf.delay > 0) {
2170 		DPRINTF(sc, "PF delay unsupported\n");
2171 		ret = EOPNOTSUPP;
2172 		goto error;
2173 	}
2174 
2175 	t->t_peer = peer;
2176 	t->t_mbuf = NULL;
2177 	t->t_done = 0;
2178 	t->t_mtu = ifp->if_mtu;
2179 
2180 	/*
2181 	 * We still have an issue with ifq that will count a packet that gets
2182 	 * dropped in wg_qstart, or not encrypted. These get counted as
2183 	 * ofails or oqdrops, so the packet gets counted twice.
2184 	 */
2185 	return if_enqueue(ifp, m);
2186 error:
2187 	counters_inc(ifp->if_counters, ifc_oerrors);
2188 	m_freem(m);
2189 	return ret;
2190 }
2191 
2192 int
2193 wg_ioctl_set(struct wg_softc *sc, struct wg_data_io *data)
2194 {
2195 	struct wg_interface_io	*iface_p, iface_o;
2196 	struct wg_peer_io	*peer_p, peer_o;
2197 	struct wg_aip_io	*aip_p, aip_o;
2198 
2199 	struct wg_peer		*peer, *tpeer;
2200 	struct wg_aip		*aip, *taip;
2201 
2202 	in_port_t		 port;
2203 	int			 rtable;
2204 
2205 	uint8_t			 public[WG_KEY_SIZE], private[WG_KEY_SIZE];
2206 	size_t			 i, j;
2207 	int			 ret, has_identity;
2208 
2209 	if ((ret = suser(curproc)) != 0)
2210 		return ret;
2211 
2212 	rw_enter_write(&sc->sc_lock);
2213 
2214 	iface_p = data->wgd_interface;
2215 	if ((ret = copyin(iface_p, &iface_o, sizeof(iface_o))) != 0)
2216 		goto error;
2217 
2218 	if (iface_o.i_flags & WG_INTERFACE_REPLACE_PEERS)
2219 		TAILQ_FOREACH_SAFE(peer, &sc->sc_peer_seq, p_seq_entry, tpeer)
2220 			wg_peer_destroy(peer);
2221 
2222 	if (iface_o.i_flags & WG_INTERFACE_HAS_PRIVATE &&
2223 	    (noise_local_keys(&sc->sc_local, NULL, private) ||
2224 	     timingsafe_bcmp(private, iface_o.i_private, WG_KEY_SIZE))) {
2225 		if (curve25519_generate_public(public, iface_o.i_private)) {
2226 			if ((peer = wg_peer_lookup(sc, public)) != NULL)
2227 				wg_peer_destroy(peer);
2228 		}
2229 		noise_local_lock_identity(&sc->sc_local);
2230 		has_identity = noise_local_set_private(&sc->sc_local,
2231 						       iface_o.i_private);
2232 		TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2233 			noise_remote_precompute(&peer->p_remote);
2234 			wg_timers_event_reset_handshake_last_sent(&peer->p_timers);
2235 			noise_remote_expire_current(&peer->p_remote);
2236 		}
2237 		cookie_checker_update(&sc->sc_cookie,
2238 				      has_identity == 0 ? public : NULL);
2239 		noise_local_unlock_identity(&sc->sc_local);
2240 	}
2241 
2242 	if (iface_o.i_flags & WG_INTERFACE_HAS_PORT)
2243 		port = htons(iface_o.i_port);
2244 	else
2245 		port = sc->sc_udp_port;
2246 
2247 	if (iface_o.i_flags & WG_INTERFACE_HAS_RTABLE)
2248 		rtable = iface_o.i_rtable;
2249 	else
2250 		rtable = sc->sc_udp_rtable;
2251 
2252 	if (port != sc->sc_udp_port || rtable != sc->sc_udp_rtable) {
2253 		TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry)
2254 			wg_peer_clear_src(peer);
2255 
2256 		if (sc->sc_if.if_flags & IFF_RUNNING)
2257 			if ((ret = wg_bind(sc, &port, &rtable)) != 0)
2258 				goto error;
2259 
2260 		sc->sc_udp_port = port;
2261 		sc->sc_udp_rtable = rtable;
2262 	}
2263 
2264 	peer_p = &iface_p->i_peers[0];
2265 	for (i = 0; i < iface_o.i_peers_count; i++) {
2266 		if ((ret = copyin(peer_p, &peer_o, sizeof(peer_o))) != 0)
2267 			goto error;
2268 
2269 		/* Peer must have public key */
2270 		if (!(peer_o.p_flags & WG_PEER_HAS_PUBLIC))
2271 			goto next_peer;
2272 
2273 		/* 0 = latest protocol, 1 = this protocol */
2274 		if (peer_o.p_protocol_version != 0) {
2275 			if (peer_o.p_protocol_version > 1) {
2276 				ret = EPFNOSUPPORT;
2277 				goto error;
2278 			}
2279 		}
2280 
2281 		/* Get local public and check that peer key doesn't match */
2282 		if (noise_local_keys(&sc->sc_local, public, NULL) == 0 &&
2283 		    bcmp(public, peer_o.p_public, WG_KEY_SIZE) == 0)
2284 			goto next_peer;
2285 
2286 		/* Lookup peer, or create if it doesn't exist */
2287 		if ((peer = wg_peer_lookup(sc, peer_o.p_public)) == NULL) {
2288 			/* If we want to delete, no need creating a new one.
2289 			 * Also, don't create a new one if we only want to
2290 			 * update. */
2291 			if (peer_o.p_flags & (WG_PEER_REMOVE|WG_PEER_UPDATE))
2292 				goto next_peer;
2293 
2294 			if ((peer = wg_peer_create(sc,
2295 			    peer_o.p_public)) == NULL) {
2296 				ret = ENOMEM;
2297 				goto error;
2298 			}
2299 		}
2300 
2301 		/* Remove peer and continue if specified */
2302 		if (peer_o.p_flags & WG_PEER_REMOVE) {
2303 			wg_peer_destroy(peer);
2304 			goto next_peer;
2305 		}
2306 
2307 		if (peer_o.p_flags & WG_PEER_HAS_ENDPOINT)
2308 			wg_peer_set_sockaddr(peer, &peer_o.p_sa);
2309 
2310 		if (peer_o.p_flags & WG_PEER_HAS_PSK)
2311 			noise_remote_set_psk(&peer->p_remote, peer_o.p_psk);
2312 
2313 		if (peer_o.p_flags & WG_PEER_HAS_PKA)
2314 			wg_timers_set_persistent_keepalive(&peer->p_timers,
2315 			    peer_o.p_pka);
2316 
2317 		if (peer_o.p_flags & WG_PEER_REPLACE_AIPS) {
2318 			LIST_FOREACH_SAFE(aip, &peer->p_aip, a_entry, taip) {
2319 				wg_aip_remove(sc, peer, &aip->a_data);
2320 			}
2321 		}
2322 
2323 		aip_p = &peer_p->p_aips[0];
2324 		for (j = 0; j < peer_o.p_aips_count; j++) {
2325 			if ((ret = copyin(aip_p, &aip_o, sizeof(aip_o))) != 0)
2326 				goto error;
2327 			ret = wg_aip_add(sc, peer, &aip_o);
2328 			if (ret != 0)
2329 				goto error;
2330 			aip_p++;
2331 		}
2332 
2333 		peer_p = (struct wg_peer_io *)aip_p;
2334 		continue;
2335 next_peer:
2336 		aip_p = &peer_p->p_aips[0];
2337 		aip_p += peer_o.p_aips_count;
2338 		peer_p = (struct wg_peer_io *)aip_p;
2339 	}
2340 
2341 error:
2342 	rw_exit_write(&sc->sc_lock);
2343 	explicit_bzero(&iface_o, sizeof(iface_o));
2344 	explicit_bzero(&peer_o, sizeof(peer_o));
2345 	explicit_bzero(&aip_o, sizeof(aip_o));
2346 	explicit_bzero(public, sizeof(public));
2347 	explicit_bzero(private, sizeof(private));
2348 	return ret;
2349 }
2350 
2351 int
2352 wg_ioctl_get(struct wg_softc *sc, struct wg_data_io *data)
2353 {
2354 	struct wg_interface_io	*iface_p, iface_o;
2355 	struct wg_peer_io	*peer_p, peer_o;
2356 	struct wg_aip_io	*aip_p;
2357 
2358 	struct wg_peer		*peer;
2359 	struct wg_aip		*aip;
2360 
2361 	size_t			 size, peer_count, aip_count;
2362 	int			 ret = 0, is_suser = suser(curproc) == 0;
2363 
2364 	size = sizeof(struct wg_interface_io);
2365 	if (data->wgd_size < size && !is_suser)
2366 		goto ret_size;
2367 
2368 	iface_p = data->wgd_interface;
2369 	bzero(&iface_o, sizeof(iface_o));
2370 
2371 	rw_enter_read(&sc->sc_lock);
2372 
2373 	if (sc->sc_udp_port != 0) {
2374 		iface_o.i_port = ntohs(sc->sc_udp_port);
2375 		iface_o.i_flags |= WG_INTERFACE_HAS_PORT;
2376 	}
2377 
2378 	if (sc->sc_udp_rtable != 0) {
2379 		iface_o.i_rtable = sc->sc_udp_rtable;
2380 		iface_o.i_flags |= WG_INTERFACE_HAS_RTABLE;
2381 	}
2382 
2383 	if (!is_suser)
2384 		goto copy_out_iface;
2385 
2386 	if (noise_local_keys(&sc->sc_local, iface_o.i_public,
2387 	    iface_o.i_private) == 0) {
2388 		iface_o.i_flags |= WG_INTERFACE_HAS_PUBLIC;
2389 		iface_o.i_flags |= WG_INTERFACE_HAS_PRIVATE;
2390 	}
2391 
2392 	size += sizeof(struct wg_peer_io) * sc->sc_peer_num;
2393 	size += sizeof(struct wg_aip_io) * sc->sc_aip_num;
2394 	if (data->wgd_size < size)
2395 		goto unlock_and_ret_size;
2396 
2397 	peer_count = 0;
2398 	peer_p = &iface_p->i_peers[0];
2399 	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2400 		bzero(&peer_o, sizeof(peer_o));
2401 		peer_o.p_flags = WG_PEER_HAS_PUBLIC;
2402 		peer_o.p_protocol_version = 1;
2403 
2404 		if (noise_remote_keys(&peer->p_remote, peer_o.p_public,
2405 		    peer_o.p_psk) == 0)
2406 			peer_o.p_flags |= WG_PEER_HAS_PSK;
2407 
2408 		if (wg_timers_get_persistent_keepalive(&peer->p_timers,
2409 		    &peer_o.p_pka) == 0)
2410 			peer_o.p_flags |= WG_PEER_HAS_PKA;
2411 
2412 		if (wg_peer_get_sockaddr(peer, &peer_o.p_sa) == 0)
2413 			peer_o.p_flags |= WG_PEER_HAS_ENDPOINT;
2414 
2415 		mtx_enter(&peer->p_counters_mtx);
2416 		peer_o.p_txbytes = peer->p_counters_tx;
2417 		peer_o.p_rxbytes = peer->p_counters_rx;
2418 		mtx_leave(&peer->p_counters_mtx);
2419 
2420 		wg_timers_get_last_handshake(&peer->p_timers,
2421 		    &peer_o.p_last_handshake);
2422 
2423 		aip_count = 0;
2424 		aip_p = &peer_p->p_aips[0];
2425 		LIST_FOREACH(aip, &peer->p_aip, a_entry) {
2426 			if ((ret = copyout(&aip->a_data, aip_p, sizeof(*aip_p))) != 0)
2427 				goto unlock_and_ret_size;
2428 			aip_p++;
2429 			aip_count++;
2430 		}
2431 		peer_o.p_aips_count = aip_count;
2432 
2433 		if ((ret = copyout(&peer_o, peer_p, sizeof(peer_o))) != 0)
2434 			goto unlock_and_ret_size;
2435 
2436 		peer_p = (struct wg_peer_io *)aip_p;
2437 		peer_count++;
2438 	}
2439 	iface_o.i_peers_count = peer_count;
2440 
2441 copy_out_iface:
2442 	ret = copyout(&iface_o, iface_p, sizeof(iface_o));
2443 unlock_and_ret_size:
2444 	rw_exit_read(&sc->sc_lock);
2445 	explicit_bzero(&iface_o, sizeof(iface_o));
2446 	explicit_bzero(&peer_o, sizeof(peer_o));
2447 ret_size:
2448 	data->wgd_size = size;
2449 	return ret;
2450 }
2451 
2452 int
2453 wg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2454 {
2455 	struct ifreq	*ifr = (struct ifreq *) data;
2456 	struct wg_softc	*sc = ifp->if_softc;
2457 	int		 ret = 0;
2458 
2459 	switch (cmd) {
2460 	case SIOCSWG:
2461 		NET_UNLOCK();
2462 		ret = wg_ioctl_set(sc, (struct wg_data_io *) data);
2463 		NET_LOCK();
2464 		break;
2465 	case SIOCGWG:
2466 		NET_UNLOCK();
2467 		ret = wg_ioctl_get(sc, (struct wg_data_io *) data);
2468 		NET_LOCK();
2469 		break;
2470 	/* Interface IOCTLs */
2471 	case SIOCSIFADDR:
2472 		SET(ifp->if_flags, IFF_UP);
2473 		/* FALLTHROUGH */
2474 	case SIOCSIFFLAGS:
2475 		if (ISSET(ifp->if_flags, IFF_UP))
2476 			ret = wg_up(sc);
2477 		else
2478 			wg_down(sc);
2479 		break;
2480 	case SIOCSIFMTU:
2481 		/* Arbitrary limits */
2482 		if (ifr->ifr_mtu <= 0 || ifr->ifr_mtu > 9000)
2483 			ret = EINVAL;
2484 		else
2485 			ifp->if_mtu = ifr->ifr_mtu;
2486 		break;
2487 	case SIOCADDMULTI:
2488 	case SIOCDELMULTI:
2489 		break;
2490 	default:
2491 		ret = ENOTTY;
2492 	}
2493 
2494 	return ret;
2495 }
2496 
2497 int
2498 wg_up(struct wg_softc *sc)
2499 {
2500 	struct wg_peer	*peer;
2501 	int		 ret = 0;
2502 
2503 	NET_ASSERT_LOCKED();
2504 	/*
2505 	 * We use IFF_RUNNING as an exclusive access here. We also may want
2506 	 * an exclusive sc_lock as wg_bind may write to sc_udp_port. We also
2507 	 * want to drop NET_LOCK as we want to call socreate, sobind, etc. Once
2508 	 * solock is no longer === NET_LOCK, we may be able to avoid this.
2509 	 */
2510 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
2511 		SET(sc->sc_if.if_flags, IFF_RUNNING);
2512 		NET_UNLOCK();
2513 
2514 		rw_enter_write(&sc->sc_lock);
2515 		/*
2516 		 * If we successfully bind the socket, then enable the timers
2517 		 * for the peer. This will send all staged packets and a
2518 		 * keepalive if necessary.
2519 		 */
2520 		ret = wg_bind(sc, &sc->sc_udp_port, &sc->sc_udp_rtable);
2521 		if (ret == 0) {
2522 			TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2523 				wg_timers_enable(&peer->p_timers);
2524 				wg_queue_out(sc, peer);
2525 			}
2526 		}
2527 		rw_exit_write(&sc->sc_lock);
2528 
2529 		NET_LOCK();
2530 		if (ret != 0)
2531 			CLR(sc->sc_if.if_flags, IFF_RUNNING);
2532 	}
2533 	return ret;
2534 }
2535 
2536 void
2537 wg_down(struct wg_softc *sc)
2538 {
2539 	struct wg_peer	*peer;
2540 
2541 	NET_ASSERT_LOCKED();
2542 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2543 		return;
2544 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
2545 	NET_UNLOCK();
2546 
2547 	/*
2548 	 * We only need a read lock here, as we aren't writing to anything
2549 	 * that isn't granularly locked.
2550 	 */
2551 	rw_enter_read(&sc->sc_lock);
2552 	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2553 		mq_purge(&peer->p_stage_queue);
2554 		wg_timers_disable(&peer->p_timers);
2555 	}
2556 
2557 	taskq_barrier(wg_handshake_taskq);
2558 	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2559 		noise_remote_clear(&peer->p_remote);
2560 		wg_timers_event_reset_handshake_last_sent(&peer->p_timers);
2561 	}
2562 
2563 	wg_unbind(sc);
2564 	rw_exit_read(&sc->sc_lock);
2565 	NET_LOCK();
2566 }
2567 
2568 int
2569 wg_clone_create(struct if_clone *ifc, int unit)
2570 {
2571 	struct ifnet		*ifp;
2572 	struct wg_softc		*sc;
2573 	struct noise_upcall	 local_upcall;
2574 
2575 	KERNEL_ASSERT_LOCKED();
2576 
2577 	if (wg_counter == 0) {
2578 		wg_handshake_taskq = taskq_create("wg_handshake",
2579 		    2, IPL_NET, TASKQ_MPSAFE);
2580 		wg_crypt_taskq = taskq_create("wg_crypt",
2581 		    ncpus, IPL_NET, TASKQ_MPSAFE);
2582 
2583 		if (wg_handshake_taskq == NULL || wg_crypt_taskq == NULL) {
2584 			if (wg_handshake_taskq != NULL)
2585 				taskq_destroy(wg_handshake_taskq);
2586 			if (wg_crypt_taskq != NULL)
2587 				taskq_destroy(wg_crypt_taskq);
2588 			wg_handshake_taskq = NULL;
2589 			wg_crypt_taskq = NULL;
2590 			return ENOTRECOVERABLE;
2591 		}
2592 	}
2593 	wg_counter++;
2594 
2595 	if ((sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL)
2596 		goto ret_00;
2597 
2598 	local_upcall.u_arg = sc;
2599 	local_upcall.u_remote_get = wg_remote_get;
2600 	local_upcall.u_index_set = wg_index_set;
2601 	local_upcall.u_index_drop = wg_index_drop;
2602 
2603 	TAILQ_INIT(&sc->sc_peer_seq);
2604 
2605 	/* sc_if is initialised after everything else */
2606 	arc4random_buf(&sc->sc_secret, sizeof(sc->sc_secret));
2607 
2608 	rw_init(&sc->sc_lock, "wg");
2609 	noise_local_init(&sc->sc_local, &local_upcall);
2610 	if (cookie_checker_init(&sc->sc_cookie, &wg_ratelimit_pool) != 0)
2611 		goto ret_01;
2612 	sc->sc_udp_port = 0;
2613 	sc->sc_udp_rtable = 0;
2614 
2615 	rw_init(&sc->sc_so_lock, "wg_so");
2616 	sc->sc_so4 = NULL;
2617 #ifdef INET6
2618 	sc->sc_so6 = NULL;
2619 #endif
2620 
2621 	sc->sc_aip_num = 0;
2622 	if ((sc->sc_aip4 = art_alloc(0, 32, 0)) == NULL)
2623 		goto ret_02;
2624 #ifdef INET6
2625 	if ((sc->sc_aip6 = art_alloc(0, 128, 0)) == NULL)
2626 		goto ret_03;
2627 #endif
2628 
2629 	rw_init(&sc->sc_peer_lock, "wg_peer");
2630 	sc->sc_peer_num = 0;
2631 	if ((sc->sc_peer = hashinit(HASHTABLE_PEER_SIZE, M_DEVBUF,
2632 	    M_NOWAIT, &sc->sc_peer_mask)) == NULL)
2633 		goto ret_04;
2634 
2635 	mtx_init(&sc->sc_index_mtx, IPL_NET);
2636 	if ((sc->sc_index = hashinit(HASHTABLE_INDEX_SIZE, M_DEVBUF,
2637 	    M_NOWAIT, &sc->sc_index_mask)) == NULL)
2638 		goto ret_05;
2639 
2640 	task_set(&sc->sc_handshake, wg_handshake_worker, sc);
2641 	mq_init(&sc->sc_handshake_queue, MAX_QUEUED_HANDSHAKES, IPL_NET);
2642 
2643 	task_set(&sc->sc_encap, wg_encap_worker, sc);
2644 	task_set(&sc->sc_decap, wg_decap_worker, sc);
2645 
2646 	bzero(&sc->sc_encap_ring, sizeof(sc->sc_encap_ring));
2647 	mtx_init(&sc->sc_encap_ring.r_mtx, IPL_NET);
2648 	bzero(&sc->sc_decap_ring, sizeof(sc->sc_decap_ring));
2649 	mtx_init(&sc->sc_decap_ring.r_mtx, IPL_NET);
2650 
2651 	/* We've setup the softc, now we can setup the ifnet */
2652 	ifp = &sc->sc_if;
2653 	ifp->if_softc = sc;
2654 
2655 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "wg%d", unit);
2656 
2657 	ifp->if_mtu = DEFAULT_MTU;
2658 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_NOARP;
2659 	ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
2660 	ifp->if_txmit = 64; /* Keep our workers active for longer. */
2661 
2662 	ifp->if_ioctl = wg_ioctl;
2663 	ifp->if_qstart = wg_qstart;
2664 	ifp->if_output = wg_output;
2665 
2666 	ifp->if_type = IFT_WIREGUARD;
2667 	ifp->if_rtrequest = p2p_rtrequest;
2668 
2669 	if_attach(ifp);
2670 	if_alloc_sadl(ifp);
2671 	if_counters_alloc(ifp);
2672 
2673 #if NBPFILTER > 0
2674 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
2675 #endif
2676 
2677 	DPRINTF(sc, "Interface created\n");
2678 
2679 	return 0;
2680 ret_05:
2681 	hashfree(sc->sc_peer, HASHTABLE_PEER_SIZE, M_DEVBUF);
2682 ret_04:
2683 #ifdef INET6
2684 	free(sc->sc_aip6, M_RTABLE, sizeof(*sc->sc_aip6));
2685 ret_03:
2686 #endif
2687 	free(sc->sc_aip4, M_RTABLE, sizeof(*sc->sc_aip4));
2688 ret_02:
2689 	cookie_checker_deinit(&sc->sc_cookie);
2690 ret_01:
2691 	free(sc, M_DEVBUF, sizeof(*sc));
2692 ret_00:
2693 	return ENOBUFS;
2694 }
2695 int
2696 wg_clone_destroy(struct ifnet *ifp)
2697 {
2698 	struct wg_softc	*sc = ifp->if_softc;
2699 	struct wg_peer	*peer, *tpeer;
2700 
2701 	KERNEL_ASSERT_LOCKED();
2702 
2703 	rw_enter_write(&sc->sc_lock);
2704 	TAILQ_FOREACH_SAFE(peer, &sc->sc_peer_seq, p_seq_entry, tpeer)
2705 		wg_peer_destroy(peer);
2706 	rw_exit_write(&sc->sc_lock);
2707 
2708 	wg_unbind(sc);
2709 	if_detach(ifp);
2710 
2711 	wg_counter--;
2712 	if (wg_counter == 0) {
2713 		KASSERT(wg_handshake_taskq != NULL && wg_crypt_taskq != NULL);
2714 		taskq_destroy(wg_handshake_taskq);
2715 		taskq_destroy(wg_crypt_taskq);
2716 		wg_handshake_taskq = NULL;
2717 		wg_crypt_taskq = NULL;
2718 	}
2719 
2720 	DPRINTF(sc, "Destroyed interface\n");
2721 
2722 	hashfree(sc->sc_index, HASHTABLE_INDEX_SIZE, M_DEVBUF);
2723 	hashfree(sc->sc_peer, HASHTABLE_PEER_SIZE, M_DEVBUF);
2724 #ifdef INET6
2725 	free(sc->sc_aip6, M_RTABLE, sizeof(*sc->sc_aip6));
2726 #endif
2727 	free(sc->sc_aip4, M_RTABLE, sizeof(*sc->sc_aip4));
2728 	cookie_checker_deinit(&sc->sc_cookie);
2729 	free(sc, M_DEVBUF, sizeof(*sc));
2730 	return 0;
2731 }
2732 
2733 void
2734 wgattach(int nwg)
2735 {
2736 #ifdef WGTEST
2737 	cookie_test();
2738 	noise_test();
2739 #endif
2740 	if_clone_attach(&wg_cloner);
2741 
2742 	pool_init(&wg_aip_pool, sizeof(struct wg_aip), 0,
2743 			IPL_NET, 0, "wgaip", NULL);
2744 	pool_init(&wg_peer_pool, sizeof(struct wg_peer), 0,
2745 			IPL_NET, 0, "wgpeer", NULL);
2746 	pool_init(&wg_ratelimit_pool, sizeof(struct ratelimit_entry), 0,
2747 			IPL_NET, 0, "wgratelimit", NULL);
2748 }
2749