xref: /openbsd-src/sys/net/if_wg.c (revision 1ad61ae0a79a724d2d3ec69e69c8e1d1ff6b53a0)
1 /*	$OpenBSD: if_wg.c,v 1.32 2023/10/23 10:22:05 mvs Exp $ */
2 
3 /*
4  * Copyright (C) 2015-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
5  * Copyright (C) 2019-2020 Matt Dunwoodie <ncon@noconroy.net>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include "bpfilter.h"
21 #include "pf.h"
22 
23 #include <sys/types.h>
24 #include <sys/systm.h>
25 #include <sys/param.h>
26 #include <sys/pool.h>
27 
28 #include <sys/socket.h>
29 #include <sys/socketvar.h>
30 #include <sys/percpu.h>
31 #include <sys/ioctl.h>
32 #include <sys/mbuf.h>
33 
34 #include <net/if.h>
35 #include <net/if_var.h>
36 #include <net/if_types.h>
37 #include <net/if_wg.h>
38 
39 #include <net/wg_noise.h>
40 #include <net/wg_cookie.h>
41 
42 #include <net/pfvar.h>
43 #include <net/route.h>
44 #include <net/bpf.h>
45 
46 #include <netinet/ip.h>
47 #include <netinet/ip6.h>
48 #include <netinet/udp.h>
49 #include <netinet/in_pcb.h>
50 
51 #include <crypto/siphash.h>
52 
53 #define DEFAULT_MTU		1420
54 
55 #define MAX_STAGED_PKT		128
56 #define MAX_QUEUED_PKT		1024
57 #define MAX_QUEUED_PKT_MASK	(MAX_QUEUED_PKT - 1)
58 
59 #define MAX_QUEUED_HANDSHAKES	4096
60 
61 #define HASHTABLE_PEER_SIZE	(1 << 11)
62 #define HASHTABLE_INDEX_SIZE	(1 << 13)
63 #define MAX_PEERS_PER_IFACE	(1 << 20)
64 
65 #define REKEY_TIMEOUT		5
66 #define REKEY_TIMEOUT_JITTER	334 /* 1/3 sec, round for arc4random_uniform */
67 #define KEEPALIVE_TIMEOUT	10
68 #define MAX_TIMER_HANDSHAKES	(90 / REKEY_TIMEOUT)
69 #define NEW_HANDSHAKE_TIMEOUT	(REKEY_TIMEOUT + KEEPALIVE_TIMEOUT)
70 #define UNDERLOAD_TIMEOUT	1
71 
72 #define DPRINTF(sc, str, ...) do { if (ISSET((sc)->sc_if.if_flags, IFF_DEBUG))\
73     printf("%s: " str, (sc)->sc_if.if_xname, ##__VA_ARGS__); } while (0)
74 
75 #define CONTAINER_OF(ptr, type, member) ({			\
76 	const __typeof( ((type *)0)->member ) *__mptr = (ptr);	\
77 	(type *)( (char *)__mptr - offsetof(type,member) );})
78 
79 /* First byte indicating packet type on the wire */
80 #define WG_PKT_INITIATION htole32(1)
81 #define WG_PKT_RESPONSE htole32(2)
82 #define WG_PKT_COOKIE htole32(3)
83 #define WG_PKT_DATA htole32(4)
84 
85 #define WG_PKT_WITH_PADDING(n)	(((n) + (16-1)) & (~(16-1)))
86 #define WG_KEY_SIZE		WG_KEY_LEN
87 
88 struct wg_pkt_initiation {
89 	uint32_t		t;
90 	uint32_t		s_idx;
91 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
92 	uint8_t			es[NOISE_PUBLIC_KEY_LEN + NOISE_AUTHTAG_LEN];
93 	uint8_t			ets[NOISE_TIMESTAMP_LEN + NOISE_AUTHTAG_LEN];
94 	struct cookie_macs	m;
95 };
96 
97 struct wg_pkt_response {
98 	uint32_t		t;
99 	uint32_t		s_idx;
100 	uint32_t		r_idx;
101 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
102 	uint8_t			en[0 + NOISE_AUTHTAG_LEN];
103 	struct cookie_macs	m;
104 };
105 
106 struct wg_pkt_cookie {
107 	uint32_t		t;
108 	uint32_t		r_idx;
109 	uint8_t			nonce[COOKIE_NONCE_SIZE];
110 	uint8_t			ec[COOKIE_ENCRYPTED_SIZE];
111 };
112 
113 struct wg_pkt_data {
114 	uint32_t		t;
115 	uint32_t		r_idx;
116 	uint8_t			nonce[sizeof(uint64_t)];
117 	uint8_t			buf[];
118 };
119 
120 struct wg_endpoint {
121 	union {
122 		struct sockaddr		r_sa;
123 		struct sockaddr_in	r_sin;
124 #ifdef INET6
125 		struct sockaddr_in6	r_sin6;
126 #endif
127 	} e_remote;
128 	union {
129 		struct in_addr		l_in;
130 #ifdef INET6
131 		struct in6_pktinfo	l_pktinfo6;
132 #define l_in6 l_pktinfo6.ipi6_addr
133 #endif
134 	} e_local;
135 };
136 
137 struct wg_tag {
138 	struct wg_endpoint	 t_endpoint;
139 	struct wg_peer		*t_peer;
140 	struct mbuf		*t_mbuf;
141 	int			 t_done;
142 	int			 t_mtu;
143 };
144 
145 struct wg_index {
146 	LIST_ENTRY(wg_index)	 i_entry;
147 	SLIST_ENTRY(wg_index)	 i_unused_entry;
148 	uint32_t		 i_key;
149 	struct noise_remote	*i_value;
150 };
151 
152 struct wg_timers {
153 	/* t_lock is for blocking wg_timers_event_* when setting t_disabled. */
154 	struct rwlock		 t_lock;
155 
156 	int			 t_disabled;
157 	int			 t_need_another_keepalive;
158 	uint16_t		 t_persistent_keepalive_interval;
159 	struct timeout		 t_new_handshake;
160 	struct timeout		 t_send_keepalive;
161 	struct timeout		 t_retry_handshake;
162 	struct timeout		 t_zero_key_material;
163 	struct timeout		 t_persistent_keepalive;
164 
165 	struct mutex		 t_handshake_mtx;
166 	struct timespec		 t_handshake_last_sent;	/* nanouptime */
167 	struct timespec		 t_handshake_complete;	/* nanotime */
168 	int			 t_handshake_retries;
169 };
170 
171 struct wg_aip {
172 	struct art_node		 a_node;
173 	LIST_ENTRY(wg_aip)	 a_entry;
174 	struct wg_peer		*a_peer;
175 	struct wg_aip_io	 a_data;
176 };
177 
178 struct wg_queue {
179 	struct mutex		 q_mtx;
180 	struct mbuf_list	 q_list;
181 };
182 
183 struct wg_ring {
184 	struct mutex	 r_mtx;
185 	uint32_t	 r_head;
186 	uint32_t	 r_tail;
187 	struct mbuf	*r_buf[MAX_QUEUED_PKT];
188 };
189 
190 struct wg_peer {
191 	LIST_ENTRY(wg_peer)	 p_pubkey_entry;
192 	TAILQ_ENTRY(wg_peer)	 p_seq_entry;
193 	uint64_t		 p_id;
194 	struct wg_softc		*p_sc;
195 
196 	struct noise_remote	 p_remote;
197 	struct cookie_maker	 p_cookie;
198 	struct wg_timers	 p_timers;
199 
200 	struct mutex		 p_counters_mtx;
201 	uint64_t		 p_counters_tx;
202 	uint64_t		 p_counters_rx;
203 
204 	struct mutex		 p_endpoint_mtx;
205 	struct wg_endpoint	 p_endpoint;
206 
207 	struct task		 p_send_initiation;
208 	struct task		 p_send_keepalive;
209 	struct task		 p_clear_secrets;
210 	struct task		 p_deliver_out;
211 	struct task		 p_deliver_in;
212 
213 	struct mbuf_queue	 p_stage_queue;
214 	struct wg_queue		 p_encap_queue;
215 	struct wg_queue		 p_decap_queue;
216 
217 	SLIST_HEAD(,wg_index)	 p_unused_index;
218 	struct wg_index		 p_index[3];
219 
220 	LIST_HEAD(,wg_aip)	 p_aip;
221 
222 	SLIST_ENTRY(wg_peer)	 p_start_list;
223 	int			 p_start_onlist;
224 
225 	char			 p_description[IFDESCRSIZE];
226 };
227 
228 struct wg_softc {
229 	struct ifnet		 sc_if;
230 	SIPHASH_KEY		 sc_secret;
231 
232 	struct rwlock		 sc_lock;
233 	struct noise_local	 sc_local;
234 	struct cookie_checker	 sc_cookie;
235 	in_port_t		 sc_udp_port;
236 	int			 sc_udp_rtable;
237 
238 	struct rwlock		 sc_so_lock;
239 	struct socket		*sc_so4;
240 #ifdef INET6
241 	struct socket		*sc_so6;
242 #endif
243 
244 	size_t			 sc_aip_num;
245 	struct art_root		*sc_aip4;
246 #ifdef INET6
247 	struct art_root		*sc_aip6;
248 #endif
249 
250 	struct rwlock		 sc_peer_lock;
251 	size_t			 sc_peer_num;
252 	LIST_HEAD(,wg_peer)	*sc_peer;
253 	TAILQ_HEAD(,wg_peer)	 sc_peer_seq;
254 	u_long			 sc_peer_mask;
255 
256 	struct mutex		 sc_index_mtx;
257 	LIST_HEAD(,wg_index)	*sc_index;
258 	u_long			 sc_index_mask;
259 
260 	struct task		 sc_handshake;
261 	struct mbuf_queue	 sc_handshake_queue;
262 
263 	struct task		 sc_encap;
264 	struct task		 sc_decap;
265 	struct wg_ring		 sc_encap_ring;
266 	struct wg_ring		 sc_decap_ring;
267 };
268 
269 struct wg_peer *
270 	wg_peer_create(struct wg_softc *, uint8_t[WG_KEY_SIZE]);
271 struct wg_peer *
272 	wg_peer_lookup(struct wg_softc *, const uint8_t[WG_KEY_SIZE]);
273 void	wg_peer_destroy(struct wg_peer *);
274 void	wg_peer_set_endpoint_from_tag(struct wg_peer *, struct wg_tag *);
275 void	wg_peer_set_sockaddr(struct wg_peer *, struct sockaddr *);
276 int	wg_peer_get_sockaddr(struct wg_peer *, struct sockaddr *);
277 void	wg_peer_clear_src(struct wg_peer *);
278 void	wg_peer_get_endpoint(struct wg_peer *, struct wg_endpoint *);
279 void	wg_peer_counters_add(struct wg_peer *, uint64_t, uint64_t);
280 
281 int	wg_aip_add(struct wg_softc *, struct wg_peer *, struct wg_aip_io *);
282 struct wg_peer *
283 	wg_aip_lookup(struct art_root *, void *);
284 int	wg_aip_remove(struct wg_softc *, struct wg_peer *,
285 	    struct wg_aip_io *);
286 
287 int	wg_socket_open(struct socket **, int, in_port_t *, int *, void *);
288 void	wg_socket_close(struct socket **);
289 int	wg_bind(struct wg_softc *, in_port_t *, int *);
290 void	wg_unbind(struct wg_softc *);
291 int	wg_send(struct wg_softc *, struct wg_endpoint *, struct mbuf *);
292 void	wg_send_buf(struct wg_softc *, struct wg_endpoint *, uint8_t *,
293 	    size_t);
294 
295 struct wg_tag *
296 	wg_tag_get(struct mbuf *);
297 
298 void	wg_timers_init(struct wg_timers *);
299 void	wg_timers_enable(struct wg_timers *);
300 void	wg_timers_disable(struct wg_timers *);
301 void	wg_timers_set_persistent_keepalive(struct wg_timers *, uint16_t);
302 int	wg_timers_get_persistent_keepalive(struct wg_timers *, uint16_t *);
303 void	wg_timers_get_last_handshake(struct wg_timers *, struct timespec *);
304 int	wg_timers_expired_handshake_last_sent(struct wg_timers *);
305 int	wg_timers_check_handshake_last_sent(struct wg_timers *);
306 
307 void	wg_timers_event_data_sent(struct wg_timers *);
308 void	wg_timers_event_data_received(struct wg_timers *);
309 void	wg_timers_event_any_authenticated_packet_sent(struct wg_timers *);
310 void	wg_timers_event_any_authenticated_packet_received(struct wg_timers *);
311 void	wg_timers_event_handshake_initiated(struct wg_timers *);
312 void	wg_timers_event_handshake_responded(struct wg_timers *);
313 void	wg_timers_event_handshake_complete(struct wg_timers *);
314 void	wg_timers_event_session_derived(struct wg_timers *);
315 void	wg_timers_event_any_authenticated_packet_traversal(struct wg_timers *);
316 void	wg_timers_event_want_initiation(struct wg_timers *);
317 void	wg_timers_event_reset_handshake_last_sent(struct wg_timers *);
318 
319 void	wg_timers_run_send_initiation(void *, int);
320 void	wg_timers_run_retry_handshake(void *);
321 void	wg_timers_run_send_keepalive(void *);
322 void	wg_timers_run_new_handshake(void *);
323 void	wg_timers_run_zero_key_material(void *);
324 void	wg_timers_run_persistent_keepalive(void *);
325 
326 void	wg_peer_send_buf(struct wg_peer *, uint8_t *, size_t);
327 void	wg_send_initiation(void *);
328 void	wg_send_response(struct wg_peer *);
329 void	wg_send_cookie(struct wg_softc *, struct cookie_macs *, uint32_t,
330 	    struct wg_endpoint *);
331 void	wg_send_keepalive(void *);
332 void	wg_peer_clear_secrets(void *);
333 void	wg_handshake(struct wg_softc *, struct mbuf *);
334 void	wg_handshake_worker(void *);
335 
336 void	wg_encap(struct wg_softc *, struct mbuf *);
337 void	wg_decap(struct wg_softc *, struct mbuf *);
338 void	wg_encap_worker(void *);
339 void	wg_decap_worker(void *);
340 void	wg_deliver_out(void *);
341 void	wg_deliver_in(void *);
342 
343 int	wg_queue_in(struct wg_softc *, struct wg_peer *, struct mbuf *);
344 void	wg_queue_out(struct wg_softc *, struct wg_peer *);
345 struct mbuf *
346 	wg_ring_dequeue(struct wg_ring *);
347 struct mbuf *
348 	wg_queue_dequeue(struct wg_queue *, struct wg_tag **);
349 size_t	wg_queue_len(struct wg_queue *);
350 
351 struct noise_remote *
352 	wg_remote_get(void *, uint8_t[NOISE_PUBLIC_KEY_LEN]);
353 uint32_t
354 	wg_index_set(void *, struct noise_remote *);
355 struct noise_remote *
356 	wg_index_get(void *, uint32_t);
357 void	wg_index_drop(void *, uint32_t);
358 
359 struct mbuf *
360 	wg_input(void *, struct mbuf *, struct ip *, struct ip6_hdr *, void *,
361 	    int);
362 int	wg_output(struct ifnet *, struct mbuf *, struct sockaddr *,
363 	    struct rtentry *);
364 int	wg_ioctl_set(struct wg_softc *, struct wg_data_io *);
365 int	wg_ioctl_get(struct wg_softc *, struct wg_data_io *);
366 int	wg_ioctl(struct ifnet *, u_long, caddr_t);
367 int	wg_up(struct wg_softc *);
368 void	wg_down(struct wg_softc *);
369 
370 int	wg_clone_create(struct if_clone *, int);
371 int	wg_clone_destroy(struct ifnet *);
372 void	wgattach(int);
373 
374 uint64_t	peer_counter = 0;
375 struct pool	wg_aip_pool;
376 struct pool	wg_peer_pool;
377 struct pool	wg_ratelimit_pool;
378 struct timeval	underload_interval = { UNDERLOAD_TIMEOUT, 0 };
379 
380 size_t		 wg_counter = 0;
381 struct taskq	*wg_handshake_taskq;
382 struct taskq	*wg_crypt_taskq;
383 
384 struct if_clone	wg_cloner =
385     IF_CLONE_INITIALIZER("wg", wg_clone_create, wg_clone_destroy);
386 
387 struct wg_peer *
388 wg_peer_create(struct wg_softc *sc, uint8_t public[WG_KEY_SIZE])
389 {
390 	struct wg_peer	*peer;
391 	uint64_t	 idx;
392 
393 	rw_assert_wrlock(&sc->sc_lock);
394 
395 	if (sc->sc_peer_num >= MAX_PEERS_PER_IFACE)
396 		return NULL;
397 
398 	if ((peer = pool_get(&wg_peer_pool, PR_NOWAIT)) == NULL)
399 		return NULL;
400 
401 	peer->p_id = peer_counter++;
402 	peer->p_sc = sc;
403 
404 	noise_remote_init(&peer->p_remote, public, &sc->sc_local);
405 	cookie_maker_init(&peer->p_cookie, public);
406 	wg_timers_init(&peer->p_timers);
407 
408 	mtx_init(&peer->p_counters_mtx, IPL_NET);
409 	peer->p_counters_tx = 0;
410 	peer->p_counters_rx = 0;
411 
412 	strlcpy(peer->p_description, "", IFDESCRSIZE);
413 
414 	mtx_init(&peer->p_endpoint_mtx, IPL_NET);
415 	bzero(&peer->p_endpoint, sizeof(peer->p_endpoint));
416 
417 	task_set(&peer->p_send_initiation, wg_send_initiation, peer);
418 	task_set(&peer->p_send_keepalive, wg_send_keepalive, peer);
419 	task_set(&peer->p_clear_secrets, wg_peer_clear_secrets, peer);
420 	task_set(&peer->p_deliver_out, wg_deliver_out, peer);
421 	task_set(&peer->p_deliver_in, wg_deliver_in, peer);
422 
423 	mq_init(&peer->p_stage_queue, MAX_STAGED_PKT, IPL_NET);
424 	mtx_init(&peer->p_encap_queue.q_mtx, IPL_NET);
425 	ml_init(&peer->p_encap_queue.q_list);
426 	mtx_init(&peer->p_decap_queue.q_mtx, IPL_NET);
427 	ml_init(&peer->p_decap_queue.q_list);
428 
429 	SLIST_INIT(&peer->p_unused_index);
430 	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[0],
431 	    i_unused_entry);
432 	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[1],
433 	    i_unused_entry);
434 	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[2],
435 	    i_unused_entry);
436 
437 	LIST_INIT(&peer->p_aip);
438 
439 	peer->p_start_onlist = 0;
440 
441 	idx = SipHash24(&sc->sc_secret, public, WG_KEY_SIZE);
442 	idx &= sc->sc_peer_mask;
443 
444 	rw_enter_write(&sc->sc_peer_lock);
445 	LIST_INSERT_HEAD(&sc->sc_peer[idx], peer, p_pubkey_entry);
446 	TAILQ_INSERT_TAIL(&sc->sc_peer_seq, peer, p_seq_entry);
447 	sc->sc_peer_num++;
448 	rw_exit_write(&sc->sc_peer_lock);
449 
450 	DPRINTF(sc, "Peer %llu created\n", peer->p_id);
451 	return peer;
452 }
453 
454 struct wg_peer *
455 wg_peer_lookup(struct wg_softc *sc, const uint8_t public[WG_KEY_SIZE])
456 {
457 	uint8_t		 peer_key[WG_KEY_SIZE];
458 	struct wg_peer	*peer;
459 	uint64_t	 idx;
460 
461 	idx = SipHash24(&sc->sc_secret, public, WG_KEY_SIZE);
462 	idx &= sc->sc_peer_mask;
463 
464 	rw_enter_read(&sc->sc_peer_lock);
465 	LIST_FOREACH(peer, &sc->sc_peer[idx], p_pubkey_entry) {
466 		noise_remote_keys(&peer->p_remote, peer_key, NULL);
467 		if (timingsafe_bcmp(peer_key, public, WG_KEY_SIZE) == 0)
468 			goto done;
469 	}
470 	peer = NULL;
471 done:
472 	rw_exit_read(&sc->sc_peer_lock);
473 	return peer;
474 }
475 
476 void
477 wg_peer_destroy(struct wg_peer *peer)
478 {
479 	struct wg_softc	*sc = peer->p_sc;
480 	struct wg_aip *aip, *taip;
481 
482 	rw_assert_wrlock(&sc->sc_lock);
483 
484 	/*
485 	 * Remove peer from the pubkey hashtable and disable all timeouts.
486 	 * After this, and flushing wg_handshake_taskq, then no more handshakes
487 	 * can be started.
488 	 */
489 	rw_enter_write(&sc->sc_peer_lock);
490 	LIST_REMOVE(peer, p_pubkey_entry);
491 	TAILQ_REMOVE(&sc->sc_peer_seq, peer, p_seq_entry);
492 	sc->sc_peer_num--;
493 	rw_exit_write(&sc->sc_peer_lock);
494 
495 	wg_timers_disable(&peer->p_timers);
496 
497 	taskq_barrier(wg_handshake_taskq);
498 
499 	/*
500 	 * Now we drop all allowed ips, to drop all outgoing packets to the
501 	 * peer. Then drop all the indexes to drop all incoming packets to the
502 	 * peer. Then we can flush if_snd, wg_crypt_taskq and then nettq to
503 	 * ensure no more references to the peer exist.
504 	 */
505 	LIST_FOREACH_SAFE(aip, &peer->p_aip, a_entry, taip)
506 		wg_aip_remove(sc, peer, &aip->a_data);
507 
508 	noise_remote_clear(&peer->p_remote);
509 
510 	NET_LOCK();
511 	while (!ifq_empty(&sc->sc_if.if_snd)) {
512 		/*
513 		 * XXX: `if_snd' of stopped interface could still
514 		 * contain packets
515 		 */
516 		if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
517 			ifq_purge(&sc->sc_if.if_snd);
518 			continue;
519 		}
520 		NET_UNLOCK();
521 		tsleep_nsec(sc, PWAIT, "wg_ifq", 1000);
522 		NET_LOCK();
523 	}
524 	NET_UNLOCK();
525 
526 	taskq_barrier(wg_crypt_taskq);
527 	taskq_barrier(net_tq(sc->sc_if.if_index));
528 
529 	if (!mq_empty(&peer->p_stage_queue))
530 		mq_purge(&peer->p_stage_queue);
531 
532 	DPRINTF(sc, "Peer %llu destroyed\n", peer->p_id);
533 	explicit_bzero(peer, sizeof(*peer));
534 	pool_put(&wg_peer_pool, peer);
535 }
536 
537 void
538 wg_peer_set_endpoint_from_tag(struct wg_peer *peer, struct wg_tag *t)
539 {
540 	if (memcmp(&t->t_endpoint, &peer->p_endpoint,
541 	    sizeof(t->t_endpoint)) == 0)
542 		return;
543 
544 	mtx_enter(&peer->p_endpoint_mtx);
545 	peer->p_endpoint = t->t_endpoint;
546 	mtx_leave(&peer->p_endpoint_mtx);
547 }
548 
549 void
550 wg_peer_set_sockaddr(struct wg_peer *peer, struct sockaddr *remote)
551 {
552 	mtx_enter(&peer->p_endpoint_mtx);
553 	memcpy(&peer->p_endpoint.e_remote, remote,
554 	       sizeof(peer->p_endpoint.e_remote));
555 	bzero(&peer->p_endpoint.e_local, sizeof(peer->p_endpoint.e_local));
556 	mtx_leave(&peer->p_endpoint_mtx);
557 }
558 
559 int
560 wg_peer_get_sockaddr(struct wg_peer *peer, struct sockaddr *remote)
561 {
562 	int	ret = 0;
563 
564 	mtx_enter(&peer->p_endpoint_mtx);
565 	if (peer->p_endpoint.e_remote.r_sa.sa_family != AF_UNSPEC)
566 		memcpy(remote, &peer->p_endpoint.e_remote,
567 		       sizeof(peer->p_endpoint.e_remote));
568 	else
569 		ret = ENOENT;
570 	mtx_leave(&peer->p_endpoint_mtx);
571 	return ret;
572 }
573 
574 void
575 wg_peer_clear_src(struct wg_peer *peer)
576 {
577 	mtx_enter(&peer->p_endpoint_mtx);
578 	bzero(&peer->p_endpoint.e_local, sizeof(peer->p_endpoint.e_local));
579 	mtx_leave(&peer->p_endpoint_mtx);
580 }
581 
582 void
583 wg_peer_get_endpoint(struct wg_peer *peer, struct wg_endpoint *endpoint)
584 {
585 	mtx_enter(&peer->p_endpoint_mtx);
586 	memcpy(endpoint, &peer->p_endpoint, sizeof(*endpoint));
587 	mtx_leave(&peer->p_endpoint_mtx);
588 }
589 
590 void
591 wg_peer_counters_add(struct wg_peer *peer, uint64_t tx, uint64_t rx)
592 {
593 	mtx_enter(&peer->p_counters_mtx);
594 	peer->p_counters_tx += tx;
595 	peer->p_counters_rx += rx;
596 	mtx_leave(&peer->p_counters_mtx);
597 }
598 
599 int
600 wg_aip_add(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
601 {
602 	struct art_root	*root;
603 	struct art_node	*node;
604 	struct wg_aip	*aip;
605 	int		 ret = 0;
606 
607 	switch (d->a_af) {
608 	case AF_INET:	root = sc->sc_aip4; break;
609 #ifdef INET6
610 	case AF_INET6:	root = sc->sc_aip6; break;
611 #endif
612 	default: return EAFNOSUPPORT;
613 	}
614 
615 	if ((aip = pool_get(&wg_aip_pool, PR_NOWAIT|PR_ZERO)) == NULL)
616 		return ENOBUFS;
617 
618 	rw_enter_write(&root->ar_lock);
619 	node = art_insert(root, &aip->a_node, &d->a_addr, d->a_cidr);
620 
621 	if (node == &aip->a_node) {
622 		aip->a_peer = peer;
623 		aip->a_data = *d;
624 		LIST_INSERT_HEAD(&peer->p_aip, aip, a_entry);
625 		sc->sc_aip_num++;
626 	} else {
627 		pool_put(&wg_aip_pool, aip);
628 		aip = (struct wg_aip *) node;
629 		if (aip->a_peer != peer) {
630 			LIST_REMOVE(aip, a_entry);
631 			LIST_INSERT_HEAD(&peer->p_aip, aip, a_entry);
632 			aip->a_peer = peer;
633 		}
634 	}
635 	rw_exit_write(&root->ar_lock);
636 	return ret;
637 }
638 
639 struct wg_peer *
640 wg_aip_lookup(struct art_root *root, void *addr)
641 {
642 	struct srp_ref	 sr;
643 	struct art_node	*node;
644 
645 	node = art_match(root, addr, &sr);
646 	srp_leave(&sr);
647 
648 	return node == NULL ? NULL : ((struct wg_aip *) node)->a_peer;
649 }
650 
651 int
652 wg_aip_remove(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
653 {
654 	struct srp_ref	 sr;
655 	struct art_root	*root;
656 	struct art_node	*node;
657 	struct wg_aip	*aip;
658 	int		 ret = 0;
659 
660 	switch (d->a_af) {
661 	case AF_INET:	root = sc->sc_aip4; break;
662 #ifdef INET6
663 	case AF_INET6:	root = sc->sc_aip6; break;
664 #endif
665 	default: return EAFNOSUPPORT;
666 	}
667 
668 	rw_enter_write(&root->ar_lock);
669 	if ((node = art_lookup(root, &d->a_addr, d->a_cidr, &sr)) == NULL) {
670 		ret = ENOENT;
671 	} else if (((struct wg_aip *) node)->a_peer != peer) {
672 		ret = EXDEV;
673 	} else {
674 		aip = (struct wg_aip *)node;
675 		if (art_delete(root, node, &d->a_addr, d->a_cidr) == NULL)
676 			panic("art_delete failed to delete node %p", node);
677 
678 		sc->sc_aip_num--;
679 		LIST_REMOVE(aip, a_entry);
680 		pool_put(&wg_aip_pool, aip);
681 	}
682 
683 	srp_leave(&sr);
684 	rw_exit_write(&root->ar_lock);
685 	return ret;
686 }
687 
688 int
689 wg_socket_open(struct socket **so, int af, in_port_t *port,
690     int *rtable, void *upcall_arg)
691 {
692 	struct mbuf		 mhostnam, mrtable;
693 #ifdef INET6
694 	struct sockaddr_in6	*sin6;
695 #endif
696 	struct sockaddr_in	*sin;
697 	int			 ret;
698 
699 	m_inithdr(&mhostnam);
700 	m_inithdr(&mrtable);
701 
702 	bzero(mtod(&mrtable, u_int *), sizeof(u_int));
703 	*mtod(&mrtable, u_int *) = *rtable;
704 	mrtable.m_len = sizeof(u_int);
705 
706 	if (af == AF_INET) {
707 		sin = mtod(&mhostnam, struct sockaddr_in *);
708 		bzero(sin, sizeof(*sin));
709 		sin->sin_len = sizeof(*sin);
710 		sin->sin_family = AF_INET;
711 		sin->sin_port = *port;
712 		sin->sin_addr.s_addr = INADDR_ANY;
713 		mhostnam.m_len = sin->sin_len;
714 #ifdef INET6
715 	} else if (af == AF_INET6) {
716 		sin6 = mtod(&mhostnam, struct sockaddr_in6 *);
717 		bzero(sin6, sizeof(*sin6));
718 		sin6->sin6_len = sizeof(*sin6);
719 		sin6->sin6_family = AF_INET6;
720 		sin6->sin6_port = *port;
721 		sin6->sin6_addr = (struct in6_addr) { .s6_addr = { 0 } };
722 		mhostnam.m_len = sin6->sin6_len;
723 #endif
724 	} else {
725 		return EAFNOSUPPORT;
726 	}
727 
728 	if ((ret = socreate(af, so, SOCK_DGRAM, 0)) != 0)
729 		return ret;
730 
731 	solock(*so);
732 	sotoinpcb(*so)->inp_upcall = wg_input;
733 	sotoinpcb(*so)->inp_upcall_arg = upcall_arg;
734 	sounlock(*so);
735 
736 	if ((ret = sosetopt(*so, SOL_SOCKET, SO_RTABLE, &mrtable)) == 0) {
737 		solock(*so);
738 		if ((ret = sobind(*so, &mhostnam, curproc)) == 0) {
739 			*port = sotoinpcb(*so)->inp_lport;
740 			*rtable = sotoinpcb(*so)->inp_rtableid;
741 		}
742 		sounlock(*so);
743 	}
744 
745 	if (ret != 0)
746 		wg_socket_close(so);
747 
748 	return ret;
749 }
750 
751 void
752 wg_socket_close(struct socket **so)
753 {
754 	if (*so != NULL && soclose(*so, 0) != 0)
755 		panic("Unable to close wg socket");
756 	*so = NULL;
757 }
758 
759 int
760 wg_bind(struct wg_softc *sc, in_port_t *portp, int *rtablep)
761 {
762 	int		 ret = 0, rtable = *rtablep;
763 	in_port_t	 port = *portp;
764 	struct socket	*so4;
765 #ifdef INET6
766 	struct socket	*so6;
767 	int		 retries = 0;
768 retry:
769 #endif
770 	if ((ret = wg_socket_open(&so4, AF_INET, &port, &rtable, sc)) != 0)
771 		return ret;
772 
773 #ifdef INET6
774 	if ((ret = wg_socket_open(&so6, AF_INET6, &port, &rtable, sc)) != 0) {
775 		if (ret == EADDRINUSE && *portp == 0 && retries++ < 100)
776 			goto retry;
777 		wg_socket_close(&so4);
778 		return ret;
779 	}
780 #endif
781 
782 	rw_enter_write(&sc->sc_so_lock);
783 	wg_socket_close(&sc->sc_so4);
784 	sc->sc_so4 = so4;
785 #ifdef INET6
786 	wg_socket_close(&sc->sc_so6);
787 	sc->sc_so6 = so6;
788 #endif
789 	rw_exit_write(&sc->sc_so_lock);
790 
791 	*portp = port;
792 	*rtablep = rtable;
793 	return 0;
794 }
795 
796 void
797 wg_unbind(struct wg_softc *sc)
798 {
799 	rw_enter_write(&sc->sc_so_lock);
800 	wg_socket_close(&sc->sc_so4);
801 #ifdef INET6
802 	wg_socket_close(&sc->sc_so6);
803 #endif
804 	rw_exit_write(&sc->sc_so_lock);
805 }
806 
807 int
808 wg_send(struct wg_softc *sc, struct wg_endpoint *e, struct mbuf *m)
809 {
810 	struct mbuf	 peernam, *control = NULL;
811 	int		 ret;
812 
813 	/* Get local control address before locking */
814 	if (e->e_remote.r_sa.sa_family == AF_INET) {
815 		if (e->e_local.l_in.s_addr != INADDR_ANY)
816 			control = sbcreatecontrol(&e->e_local.l_in,
817 			    sizeof(struct in_addr), IP_SENDSRCADDR,
818 			    IPPROTO_IP);
819 #ifdef INET6
820 	} else if (e->e_remote.r_sa.sa_family == AF_INET6) {
821 		if (!IN6_IS_ADDR_UNSPECIFIED(&e->e_local.l_in6))
822 			control = sbcreatecontrol(&e->e_local.l_pktinfo6,
823 			    sizeof(struct in6_pktinfo), IPV6_PKTINFO,
824 			    IPPROTO_IPV6);
825 #endif
826 	} else {
827 		m_freem(m);
828 		return EAFNOSUPPORT;
829 	}
830 
831 	/* Get remote address */
832 	peernam.m_type = MT_SONAME;
833 	peernam.m_next = NULL;
834 	peernam.m_nextpkt = NULL;
835 	peernam.m_data = (void *)&e->e_remote.r_sa;
836 	peernam.m_len = e->e_remote.r_sa.sa_len;
837 	peernam.m_flags = 0;
838 
839 	rw_enter_read(&sc->sc_so_lock);
840 	if (e->e_remote.r_sa.sa_family == AF_INET && sc->sc_so4 != NULL)
841 		ret = sosend(sc->sc_so4, &peernam, NULL, m, control, 0);
842 #ifdef INET6
843 	else if (e->e_remote.r_sa.sa_family == AF_INET6 && sc->sc_so6 != NULL)
844 		ret = sosend(sc->sc_so6, &peernam, NULL, m, control, 0);
845 #endif
846 	else {
847 		ret = ENOTCONN;
848 		m_freem(control);
849 		m_freem(m);
850 	}
851 	rw_exit_read(&sc->sc_so_lock);
852 
853 	return ret;
854 }
855 
856 void
857 wg_send_buf(struct wg_softc *sc, struct wg_endpoint *e, uint8_t *buf,
858     size_t len)
859 {
860 	struct mbuf	*m;
861 	int		 ret = 0;
862 
863 retry:
864 	m = m_gethdr(M_WAIT, MT_DATA);
865 	m->m_len = 0;
866 	m_copyback(m, 0, len, buf, M_WAIT);
867 
868 	/* As we're sending a handshake packet here, we want high priority */
869 	m->m_pkthdr.pf.prio = IFQ_MAXPRIO;
870 
871 	if (ret == 0) {
872 		ret = wg_send(sc, e, m);
873 		/* Retry if we couldn't bind to e->e_local */
874 		if (ret == EADDRNOTAVAIL) {
875 			bzero(&e->e_local, sizeof(e->e_local));
876 			goto retry;
877 		}
878 	} else {
879 		ret = wg_send(sc, e, m);
880 		if (ret != 0)
881 			DPRINTF(sc, "Unable to send packet\n");
882 	}
883 }
884 
885 struct wg_tag *
886 wg_tag_get(struct mbuf *m)
887 {
888 	struct m_tag	*mtag;
889 
890 	if ((mtag = m_tag_find(m, PACKET_TAG_WIREGUARD, NULL)) == NULL) {
891 		mtag = m_tag_get(PACKET_TAG_WIREGUARD, sizeof(struct wg_tag),
892 		    M_NOWAIT);
893 		if (mtag == NULL)
894 			return (NULL);
895 		bzero(mtag + 1, sizeof(struct wg_tag));
896 		m_tag_prepend(m, mtag);
897 	}
898 	return ((struct wg_tag *)(mtag + 1));
899 }
900 
901 /*
902  * The following section handles the timeout callbacks for a WireGuard session.
903  * These functions provide an "event based" model for controlling wg(8) session
904  * timers. All function calls occur after the specified event below.
905  *
906  * wg_timers_event_data_sent:
907  *	tx: data
908  * wg_timers_event_data_received:
909  *	rx: data
910  * wg_timers_event_any_authenticated_packet_sent:
911  *	tx: keepalive, data, handshake
912  * wg_timers_event_any_authenticated_packet_received:
913  *	rx: keepalive, data, handshake
914  * wg_timers_event_any_authenticated_packet_traversal:
915  *	tx, rx: keepalive, data, handshake
916  * wg_timers_event_handshake_initiated:
917  *	tx: initiation
918  * wg_timers_event_handshake_responded:
919  *	tx: response
920  * wg_timers_event_handshake_complete:
921  *	rx: response, confirmation data
922  * wg_timers_event_session_derived:
923  *	tx: response, rx: response
924  * wg_timers_event_want_initiation:
925  *	tx: data failed, old keys expiring
926  * wg_timers_event_reset_handshake_last_sent:
927  * 	anytime we may immediately want a new handshake
928  */
929 void
930 wg_timers_init(struct wg_timers *t)
931 {
932 	bzero(t, sizeof(*t));
933 	rw_init(&t->t_lock, "wg_timers");
934 	mtx_init(&t->t_handshake_mtx, IPL_NET);
935 
936 	timeout_set(&t->t_new_handshake, wg_timers_run_new_handshake, t);
937 	timeout_set(&t->t_send_keepalive, wg_timers_run_send_keepalive, t);
938 	timeout_set(&t->t_retry_handshake, wg_timers_run_retry_handshake, t);
939 	timeout_set(&t->t_persistent_keepalive,
940 	    wg_timers_run_persistent_keepalive, t);
941 	timeout_set(&t->t_zero_key_material,
942 	    wg_timers_run_zero_key_material, t);
943 }
944 
945 void
946 wg_timers_enable(struct wg_timers *t)
947 {
948 	rw_enter_write(&t->t_lock);
949 	t->t_disabled = 0;
950 	rw_exit_write(&t->t_lock);
951 	wg_timers_run_persistent_keepalive(t);
952 }
953 
954 void
955 wg_timers_disable(struct wg_timers *t)
956 {
957 	rw_enter_write(&t->t_lock);
958 	t->t_disabled = 1;
959 	t->t_need_another_keepalive = 0;
960 	rw_exit_write(&t->t_lock);
961 
962 	timeout_del_barrier(&t->t_new_handshake);
963 	timeout_del_barrier(&t->t_send_keepalive);
964 	timeout_del_barrier(&t->t_retry_handshake);
965 	timeout_del_barrier(&t->t_persistent_keepalive);
966 	timeout_del_barrier(&t->t_zero_key_material);
967 }
968 
969 void
970 wg_timers_set_persistent_keepalive(struct wg_timers *t, uint16_t interval)
971 {
972 	rw_enter_read(&t->t_lock);
973 	if (!t->t_disabled) {
974 		t->t_persistent_keepalive_interval = interval;
975 		wg_timers_run_persistent_keepalive(t);
976 	}
977 	rw_exit_read(&t->t_lock);
978 }
979 
980 int
981 wg_timers_get_persistent_keepalive(struct wg_timers *t, uint16_t *interval)
982 {
983 	*interval = t->t_persistent_keepalive_interval;
984 	return *interval > 0 ? 0 : ENOENT;
985 }
986 
987 void
988 wg_timers_get_last_handshake(struct wg_timers *t, struct timespec *time)
989 {
990 	mtx_enter(&t->t_handshake_mtx);
991 	*time = t->t_handshake_complete;
992 	mtx_leave(&t->t_handshake_mtx);
993 }
994 
995 int
996 wg_timers_expired_handshake_last_sent(struct wg_timers *t)
997 {
998 	struct timespec uptime;
999 	struct timespec expire = { .tv_sec = REKEY_TIMEOUT, .tv_nsec = 0 };
1000 
1001 	getnanouptime(&uptime);
1002 	timespecadd(&t->t_handshake_last_sent, &expire, &expire);
1003 	return timespeccmp(&uptime, &expire, >) ? ETIMEDOUT : 0;
1004 }
1005 
1006 int
1007 wg_timers_check_handshake_last_sent(struct wg_timers *t)
1008 {
1009 	int ret;
1010 	mtx_enter(&t->t_handshake_mtx);
1011 	if ((ret = wg_timers_expired_handshake_last_sent(t)) == ETIMEDOUT)
1012 		getnanouptime(&t->t_handshake_last_sent);
1013 	mtx_leave(&t->t_handshake_mtx);
1014 	return ret;
1015 }
1016 
1017 void
1018 wg_timers_event_data_sent(struct wg_timers *t)
1019 {
1020 	int	msecs = NEW_HANDSHAKE_TIMEOUT * 1000;
1021 	msecs += arc4random_uniform(REKEY_TIMEOUT_JITTER);
1022 
1023 	rw_enter_read(&t->t_lock);
1024 	if (!t->t_disabled && !timeout_pending(&t->t_new_handshake))
1025 		timeout_add_msec(&t->t_new_handshake, msecs);
1026 	rw_exit_read(&t->t_lock);
1027 }
1028 
1029 void
1030 wg_timers_event_data_received(struct wg_timers *t)
1031 {
1032 	rw_enter_read(&t->t_lock);
1033 	if (!t->t_disabled) {
1034 		if (!timeout_pending(&t->t_send_keepalive))
1035 			timeout_add_sec(&t->t_send_keepalive,
1036 			    KEEPALIVE_TIMEOUT);
1037 		else
1038 			t->t_need_another_keepalive = 1;
1039 	}
1040 	rw_exit_read(&t->t_lock);
1041 }
1042 
1043 void
1044 wg_timers_event_any_authenticated_packet_sent(struct wg_timers *t)
1045 {
1046 	timeout_del(&t->t_send_keepalive);
1047 }
1048 
1049 void
1050 wg_timers_event_any_authenticated_packet_received(struct wg_timers *t)
1051 {
1052 	timeout_del(&t->t_new_handshake);
1053 }
1054 
1055 void
1056 wg_timers_event_any_authenticated_packet_traversal(struct wg_timers *t)
1057 {
1058 	rw_enter_read(&t->t_lock);
1059 	if (!t->t_disabled && t->t_persistent_keepalive_interval > 0)
1060 		timeout_add_sec(&t->t_persistent_keepalive,
1061 		    t->t_persistent_keepalive_interval);
1062 	rw_exit_read(&t->t_lock);
1063 }
1064 
1065 void
1066 wg_timers_event_handshake_initiated(struct wg_timers *t)
1067 {
1068 	int	msecs = REKEY_TIMEOUT * 1000;
1069 	msecs += arc4random_uniform(REKEY_TIMEOUT_JITTER);
1070 
1071 	rw_enter_read(&t->t_lock);
1072 	if (!t->t_disabled)
1073 		timeout_add_msec(&t->t_retry_handshake, msecs);
1074 	rw_exit_read(&t->t_lock);
1075 }
1076 
1077 void
1078 wg_timers_event_handshake_responded(struct wg_timers *t)
1079 {
1080 	mtx_enter(&t->t_handshake_mtx);
1081 	getnanouptime(&t->t_handshake_last_sent);
1082 	mtx_leave(&t->t_handshake_mtx);
1083 }
1084 
1085 void
1086 wg_timers_event_handshake_complete(struct wg_timers *t)
1087 {
1088 	rw_enter_read(&t->t_lock);
1089 	if (!t->t_disabled) {
1090 		mtx_enter(&t->t_handshake_mtx);
1091 		timeout_del(&t->t_retry_handshake);
1092 		t->t_handshake_retries = 0;
1093 		getnanotime(&t->t_handshake_complete);
1094 		mtx_leave(&t->t_handshake_mtx);
1095 		wg_timers_run_send_keepalive(t);
1096 	}
1097 	rw_exit_read(&t->t_lock);
1098 }
1099 
1100 void
1101 wg_timers_event_session_derived(struct wg_timers *t)
1102 {
1103 	rw_enter_read(&t->t_lock);
1104 	if (!t->t_disabled)
1105 		timeout_add_sec(&t->t_zero_key_material, REJECT_AFTER_TIME * 3);
1106 	rw_exit_read(&t->t_lock);
1107 }
1108 
1109 void
1110 wg_timers_event_want_initiation(struct wg_timers *t)
1111 {
1112 	rw_enter_read(&t->t_lock);
1113 	if (!t->t_disabled)
1114 		wg_timers_run_send_initiation(t, 0);
1115 	rw_exit_read(&t->t_lock);
1116 }
1117 
1118 void
1119 wg_timers_event_reset_handshake_last_sent(struct wg_timers *t)
1120 {
1121 	mtx_enter(&t->t_handshake_mtx);
1122 	t->t_handshake_last_sent.tv_sec -= (REKEY_TIMEOUT + 1);
1123 	mtx_leave(&t->t_handshake_mtx);
1124 }
1125 
1126 void
1127 wg_timers_run_send_initiation(void *_t, int is_retry)
1128 {
1129 	struct wg_timers *t = _t;
1130 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1131 	if (!is_retry)
1132 		t->t_handshake_retries = 0;
1133 	if (wg_timers_expired_handshake_last_sent(t) == ETIMEDOUT)
1134 		task_add(wg_handshake_taskq, &peer->p_send_initiation);
1135 }
1136 
1137 void
1138 wg_timers_run_retry_handshake(void *_t)
1139 {
1140 	struct wg_timers *t = _t;
1141 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1142 
1143 	mtx_enter(&t->t_handshake_mtx);
1144 	if (t->t_handshake_retries <= MAX_TIMER_HANDSHAKES) {
1145 		t->t_handshake_retries++;
1146 		mtx_leave(&t->t_handshake_mtx);
1147 
1148 		DPRINTF(peer->p_sc, "Handshake for peer %llu did not complete "
1149 		    "after %d seconds, retrying (try %d)\n", peer->p_id,
1150 		    REKEY_TIMEOUT, t->t_handshake_retries + 1);
1151 		wg_peer_clear_src(peer);
1152 		wg_timers_run_send_initiation(t, 1);
1153 	} else {
1154 		mtx_leave(&t->t_handshake_mtx);
1155 
1156 		DPRINTF(peer->p_sc, "Handshake for peer %llu did not complete "
1157 		    "after %d retries, giving up\n", peer->p_id,
1158 		    MAX_TIMER_HANDSHAKES + 2);
1159 
1160 		timeout_del(&t->t_send_keepalive);
1161 		mq_purge(&peer->p_stage_queue);
1162 		if (!timeout_pending(&t->t_zero_key_material))
1163 			timeout_add_sec(&t->t_zero_key_material,
1164 			    REJECT_AFTER_TIME * 3);
1165 	}
1166 }
1167 
1168 void
1169 wg_timers_run_send_keepalive(void *_t)
1170 {
1171 	struct wg_timers *t = _t;
1172 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1173 
1174 	task_add(wg_crypt_taskq, &peer->p_send_keepalive);
1175 	if (t->t_need_another_keepalive) {
1176 		t->t_need_another_keepalive = 0;
1177 		timeout_add_sec(&t->t_send_keepalive, KEEPALIVE_TIMEOUT);
1178 	}
1179 }
1180 
1181 void
1182 wg_timers_run_new_handshake(void *_t)
1183 {
1184 	struct wg_timers *t = _t;
1185 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1186 
1187 	DPRINTF(peer->p_sc, "Retrying handshake with peer %llu because we "
1188 	    "stopped hearing back after %d seconds\n",
1189 	    peer->p_id, NEW_HANDSHAKE_TIMEOUT);
1190 	wg_peer_clear_src(peer);
1191 
1192 	wg_timers_run_send_initiation(t, 0);
1193 }
1194 
1195 void
1196 wg_timers_run_zero_key_material(void *_t)
1197 {
1198 	struct wg_timers *t = _t;
1199 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1200 
1201 	DPRINTF(peer->p_sc, "Zeroing out keys for peer %llu\n", peer->p_id);
1202 	task_add(wg_handshake_taskq, &peer->p_clear_secrets);
1203 }
1204 
1205 void
1206 wg_timers_run_persistent_keepalive(void *_t)
1207 {
1208 	struct wg_timers *t = _t;
1209 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1210 	if (t->t_persistent_keepalive_interval != 0)
1211 		task_add(wg_crypt_taskq, &peer->p_send_keepalive);
1212 }
1213 
1214 /* The following functions handle handshakes */
1215 void
1216 wg_peer_send_buf(struct wg_peer *peer, uint8_t *buf, size_t len)
1217 {
1218 	struct wg_endpoint	 endpoint;
1219 
1220 	wg_peer_counters_add(peer, len, 0);
1221 	wg_timers_event_any_authenticated_packet_traversal(&peer->p_timers);
1222 	wg_timers_event_any_authenticated_packet_sent(&peer->p_timers);
1223 	wg_peer_get_endpoint(peer, &endpoint);
1224 	wg_send_buf(peer->p_sc, &endpoint, buf, len);
1225 }
1226 
1227 void
1228 wg_send_initiation(void *_peer)
1229 {
1230 	struct wg_peer			*peer = _peer;
1231 	struct wg_pkt_initiation	 pkt;
1232 
1233 	if (wg_timers_check_handshake_last_sent(&peer->p_timers) != ETIMEDOUT)
1234 		return;
1235 
1236 	DPRINTF(peer->p_sc, "Sending handshake initiation to peer %llu\n",
1237 	    peer->p_id);
1238 
1239 	if (noise_create_initiation(&peer->p_remote, &pkt.s_idx, pkt.ue, pkt.es,
1240 				    pkt.ets) != 0)
1241 		return;
1242 	pkt.t = WG_PKT_INITIATION;
1243 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1244 	    sizeof(pkt)-sizeof(pkt.m));
1245 	wg_peer_send_buf(peer, (uint8_t *)&pkt, sizeof(pkt));
1246 	wg_timers_event_handshake_initiated(&peer->p_timers);
1247 }
1248 
1249 void
1250 wg_send_response(struct wg_peer *peer)
1251 {
1252 	struct wg_pkt_response	 pkt;
1253 
1254 	DPRINTF(peer->p_sc, "Sending handshake response to peer %llu\n",
1255 	    peer->p_id);
1256 
1257 	if (noise_create_response(&peer->p_remote, &pkt.s_idx, &pkt.r_idx,
1258 				  pkt.ue, pkt.en) != 0)
1259 		return;
1260 	if (noise_remote_begin_session(&peer->p_remote) != 0)
1261 		return;
1262 	wg_timers_event_session_derived(&peer->p_timers);
1263 	pkt.t = WG_PKT_RESPONSE;
1264 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1265 	    sizeof(pkt)-sizeof(pkt.m));
1266 	wg_timers_event_handshake_responded(&peer->p_timers);
1267 	wg_peer_send_buf(peer, (uint8_t *)&pkt, sizeof(pkt));
1268 }
1269 
1270 void
1271 wg_send_cookie(struct wg_softc *sc, struct cookie_macs *cm, uint32_t idx,
1272     struct wg_endpoint *e)
1273 {
1274 	struct wg_pkt_cookie	pkt;
1275 
1276 	DPRINTF(sc, "Sending cookie response for denied handshake message\n");
1277 
1278 	pkt.t = WG_PKT_COOKIE;
1279 	pkt.r_idx = idx;
1280 
1281 	cookie_checker_create_payload(&sc->sc_cookie, cm, pkt.nonce,
1282 	    pkt.ec, &e->e_remote.r_sa);
1283 
1284 	wg_send_buf(sc, e, (uint8_t *)&pkt, sizeof(pkt));
1285 }
1286 
1287 void
1288 wg_send_keepalive(void *_peer)
1289 {
1290 	struct wg_peer	*peer = _peer;
1291 	struct wg_softc	*sc = peer->p_sc;
1292 	struct wg_tag	*t;
1293 	struct mbuf	*m;
1294 
1295 	if (!mq_empty(&peer->p_stage_queue))
1296 		goto send;
1297 
1298 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1299 		return;
1300 
1301 	if ((t = wg_tag_get(m)) == NULL) {
1302 		m_freem(m);
1303 		return;
1304 	}
1305 
1306 	m->m_len = 0;
1307 	m_calchdrlen(m);
1308 
1309 	t->t_peer = peer;
1310 	t->t_mbuf = NULL;
1311 	t->t_done = 0;
1312 	t->t_mtu = 0; /* MTU == 0 OK for keepalive */
1313 
1314 	mq_push(&peer->p_stage_queue, m);
1315 send:
1316 	if (noise_remote_ready(&peer->p_remote) == 0) {
1317 		wg_queue_out(sc, peer);
1318 		task_add(wg_crypt_taskq, &sc->sc_encap);
1319 	} else {
1320 		wg_timers_event_want_initiation(&peer->p_timers);
1321 	}
1322 }
1323 
1324 void
1325 wg_peer_clear_secrets(void *_peer)
1326 {
1327 	struct wg_peer *peer = _peer;
1328 	noise_remote_clear(&peer->p_remote);
1329 }
1330 
1331 void
1332 wg_handshake(struct wg_softc *sc, struct mbuf *m)
1333 {
1334 	struct wg_tag			*t;
1335 	struct wg_pkt_initiation	*init;
1336 	struct wg_pkt_response		*resp;
1337 	struct wg_pkt_cookie		*cook;
1338 	struct wg_peer			*peer;
1339 	struct noise_remote		*remote;
1340 	int				 res, underload = 0;
1341 	static struct timeval		 wg_last_underload; /* microuptime */
1342 
1343 	if (mq_len(&sc->sc_handshake_queue) >= MAX_QUEUED_HANDSHAKES/8) {
1344 		getmicrouptime(&wg_last_underload);
1345 		underload = 1;
1346 	} else if (wg_last_underload.tv_sec != 0) {
1347 		if (!ratecheck(&wg_last_underload, &underload_interval))
1348 			underload = 1;
1349 		else
1350 			bzero(&wg_last_underload, sizeof(wg_last_underload));
1351 	}
1352 
1353 	t = wg_tag_get(m);
1354 
1355 	switch (*mtod(m, uint32_t *)) {
1356 	case WG_PKT_INITIATION:
1357 		init = mtod(m, struct wg_pkt_initiation *);
1358 
1359 		res = cookie_checker_validate_macs(&sc->sc_cookie, &init->m,
1360 				init, sizeof(*init) - sizeof(init->m),
1361 				underload, &t->t_endpoint.e_remote.r_sa);
1362 
1363 		if (res == EINVAL) {
1364 			DPRINTF(sc, "Invalid initiation MAC\n");
1365 			goto error;
1366 		} else if (res == ECONNREFUSED) {
1367 			DPRINTF(sc, "Handshake ratelimited\n");
1368 			goto error;
1369 		} else if (res == EAGAIN) {
1370 			wg_send_cookie(sc, &init->m, init->s_idx,
1371 			    &t->t_endpoint);
1372 			goto error;
1373 		} else if (res != 0) {
1374 			panic("unexpected response: %d", res);
1375 		}
1376 
1377 		if (noise_consume_initiation(&sc->sc_local, &remote,
1378 		    init->s_idx, init->ue, init->es, init->ets) != 0) {
1379 			DPRINTF(sc, "Invalid handshake initiation\n");
1380 			goto error;
1381 		}
1382 
1383 		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1384 
1385 		DPRINTF(sc, "Receiving handshake initiation from peer %llu\n",
1386 		    peer->p_id);
1387 
1388 		wg_peer_counters_add(peer, 0, sizeof(*init));
1389 		wg_peer_set_endpoint_from_tag(peer, t);
1390 		wg_send_response(peer);
1391 		break;
1392 	case WG_PKT_RESPONSE:
1393 		resp = mtod(m, struct wg_pkt_response *);
1394 
1395 		res = cookie_checker_validate_macs(&sc->sc_cookie, &resp->m,
1396 				resp, sizeof(*resp) - sizeof(resp->m),
1397 				underload, &t->t_endpoint.e_remote.r_sa);
1398 
1399 		if (res == EINVAL) {
1400 			DPRINTF(sc, "Invalid response MAC\n");
1401 			goto error;
1402 		} else if (res == ECONNREFUSED) {
1403 			DPRINTF(sc, "Handshake ratelimited\n");
1404 			goto error;
1405 		} else if (res == EAGAIN) {
1406 			wg_send_cookie(sc, &resp->m, resp->s_idx,
1407 			    &t->t_endpoint);
1408 			goto error;
1409 		} else if (res != 0) {
1410 			panic("unexpected response: %d", res);
1411 		}
1412 
1413 		if ((remote = wg_index_get(sc, resp->r_idx)) == NULL) {
1414 			DPRINTF(sc, "Unknown handshake response\n");
1415 			goto error;
1416 		}
1417 
1418 		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1419 
1420 		if (noise_consume_response(remote, resp->s_idx, resp->r_idx,
1421 					   resp->ue, resp->en) != 0) {
1422 			DPRINTF(sc, "Invalid handshake response\n");
1423 			goto error;
1424 		}
1425 
1426 		DPRINTF(sc, "Receiving handshake response from peer %llu\n",
1427 				peer->p_id);
1428 
1429 		wg_peer_counters_add(peer, 0, sizeof(*resp));
1430 		wg_peer_set_endpoint_from_tag(peer, t);
1431 		if (noise_remote_begin_session(&peer->p_remote) == 0) {
1432 			wg_timers_event_session_derived(&peer->p_timers);
1433 			wg_timers_event_handshake_complete(&peer->p_timers);
1434 		}
1435 		break;
1436 	case WG_PKT_COOKIE:
1437 		cook = mtod(m, struct wg_pkt_cookie *);
1438 
1439 		if ((remote = wg_index_get(sc, cook->r_idx)) == NULL) {
1440 			DPRINTF(sc, "Unknown cookie index\n");
1441 			goto error;
1442 		}
1443 
1444 		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1445 
1446 		if (cookie_maker_consume_payload(&peer->p_cookie,
1447 		    cook->nonce, cook->ec) != 0) {
1448 			DPRINTF(sc, "Could not decrypt cookie response\n");
1449 			goto error;
1450 		}
1451 
1452 		DPRINTF(sc, "Receiving cookie response\n");
1453 		goto error;
1454 	default:
1455 		panic("invalid packet in handshake queue");
1456 	}
1457 
1458 	wg_timers_event_any_authenticated_packet_received(&peer->p_timers);
1459 	wg_timers_event_any_authenticated_packet_traversal(&peer->p_timers);
1460 error:
1461 	m_freem(m);
1462 }
1463 
1464 void
1465 wg_handshake_worker(void *_sc)
1466 {
1467 	struct mbuf *m;
1468 	struct wg_softc *sc = _sc;
1469 	while ((m = mq_dequeue(&sc->sc_handshake_queue)) != NULL)
1470 		wg_handshake(sc, m);
1471 }
1472 
1473 /*
1474  * The following functions handle encapsulation (encryption) and
1475  * decapsulation (decryption). The wg_{en,de}cap functions will run in the
1476  * sc_crypt_taskq, while wg_deliver_{in,out} must be serialised and will run
1477  * in nettq.
1478  *
1479  * The packets are tracked in two queues, a serial queue and a parallel queue.
1480  *  - The parallel queue is used to distribute the encryption across multiple
1481  *    threads.
1482  *  - The serial queue ensures that packets are not reordered and are
1483  *    delivered in sequence.
1484  * The wg_tag attached to the packet contains two flags to help the two queues
1485  * interact.
1486  *  - t_done: The parallel queue has finished with the packet, now the serial
1487  *            queue can do it's work.
1488  *  - t_mbuf: Used to store the *crypted packet. in the case of encryption,
1489  *            this is a newly allocated packet, and in the case of decryption,
1490  *            it is a pointer to the same packet, that has been decrypted and
1491  *            truncated. If t_mbuf is NULL, then *cryption failed and this
1492  *            packet should not be passed.
1493  * wg_{en,de}cap work on the parallel queue, while wg_deliver_{in,out} work
1494  * on the serial queue.
1495  */
1496 void
1497 wg_encap(struct wg_softc *sc, struct mbuf *m)
1498 {
1499 	int res = 0;
1500 	struct wg_pkt_data	*data;
1501 	struct wg_peer		*peer;
1502 	struct wg_tag		*t;
1503 	struct mbuf		*mc;
1504 	size_t			 padding_len, plaintext_len, out_len;
1505 	uint64_t		 nonce;
1506 
1507 	t = wg_tag_get(m);
1508 	peer = t->t_peer;
1509 
1510 	plaintext_len = min(WG_PKT_WITH_PADDING(m->m_pkthdr.len), t->t_mtu);
1511 	padding_len = plaintext_len - m->m_pkthdr.len;
1512 	out_len = sizeof(struct wg_pkt_data) + plaintext_len + NOISE_AUTHTAG_LEN;
1513 
1514 	/*
1515 	 * For the time being we allocate a new packet with sufficient size to
1516 	 * hold the encrypted data and headers. It would be difficult to
1517 	 * overcome as p_encap_queue (mbuf_list) holds a reference to the mbuf.
1518 	 * If we m_makespace or similar, we risk corrupting that list.
1519 	 * Additionally, we only pass a buf and buf length to
1520 	 * noise_remote_encrypt. Technically it would be possible to teach
1521 	 * noise_remote_encrypt about mbufs, but we would need to sort out the
1522 	 * p_encap_queue situation first.
1523 	 */
1524 	if ((mc = m_clget(NULL, M_NOWAIT, out_len)) == NULL)
1525 		goto error;
1526 
1527 	data = mtod(mc, struct wg_pkt_data *);
1528 	m_copydata(m, 0, m->m_pkthdr.len, data->buf);
1529 	bzero(data->buf + m->m_pkthdr.len, padding_len);
1530 	data->t = WG_PKT_DATA;
1531 
1532 	/*
1533 	 * Copy the flow hash from the inner packet to the outer packet, so
1534 	 * that fq_codel can property separate streams, rather than falling
1535 	 * back to random buckets.
1536 	 */
1537 	mc->m_pkthdr.ph_flowid = m->m_pkthdr.ph_flowid;
1538 
1539 	mc->m_pkthdr.pf.prio = m->m_pkthdr.pf.prio;
1540 
1541 	res = noise_remote_encrypt(&peer->p_remote, &data->r_idx, &nonce,
1542 				   data->buf, plaintext_len);
1543 	nonce = htole64(nonce); /* Wire format is little endian. */
1544 	memcpy(data->nonce, &nonce, sizeof(data->nonce));
1545 
1546 	if (__predict_false(res == EINVAL)) {
1547 		m_freem(mc);
1548 		goto error;
1549 	} else if (__predict_false(res == ESTALE)) {
1550 		wg_timers_event_want_initiation(&peer->p_timers);
1551 	} else if (__predict_false(res != 0)) {
1552 		panic("unexpected result: %d", res);
1553 	}
1554 
1555 	/* A packet with length 0 is a keepalive packet */
1556 	if (__predict_false(m->m_pkthdr.len == 0))
1557 		DPRINTF(sc, "Sending keepalive packet to peer %llu\n",
1558 		    peer->p_id);
1559 
1560 	mc->m_pkthdr.ph_loopcnt = m->m_pkthdr.ph_loopcnt;
1561 	mc->m_flags &= ~(M_MCAST | M_BCAST);
1562 	mc->m_len = out_len;
1563 	m_calchdrlen(mc);
1564 
1565 	/*
1566 	 * We would count ifc_opackets, ifc_obytes of m here, except if_snd
1567 	 * already does that for us, so no need to worry about it.
1568 	counters_pkt(sc->sc_if.if_counters, ifc_opackets, ifc_obytes,
1569 	    m->m_pkthdr.len);
1570 	 */
1571 	wg_peer_counters_add(peer, mc->m_pkthdr.len, 0);
1572 
1573 	t->t_mbuf = mc;
1574 error:
1575 	t->t_done = 1;
1576 	task_add(net_tq(sc->sc_if.if_index), &peer->p_deliver_out);
1577 }
1578 
1579 void
1580 wg_decap(struct wg_softc *sc, struct mbuf *m)
1581 {
1582 	int			 res, len;
1583 	struct ip		*ip;
1584 	struct ip6_hdr		*ip6;
1585 	struct wg_pkt_data	*data;
1586 	struct wg_peer		*peer, *allowed_peer;
1587 	struct wg_tag		*t;
1588 	size_t			 payload_len;
1589 	uint64_t		 nonce;
1590 
1591 	t = wg_tag_get(m);
1592 	peer = t->t_peer;
1593 
1594 	/*
1595 	 * Likewise to wg_encap, we pass a buf and buf length to
1596 	 * noise_remote_decrypt. Again, possible to teach it about mbufs
1597 	 * but need to get over the p_decap_queue situation first. However,
1598 	 * we do not need to allocate a new mbuf as the decrypted packet is
1599 	 * strictly smaller than encrypted. We just set t_mbuf to m and
1600 	 * wg_deliver_in knows how to deal with that.
1601 	 */
1602 	data = mtod(m, struct wg_pkt_data *);
1603 	payload_len = m->m_pkthdr.len - sizeof(struct wg_pkt_data);
1604 	memcpy(&nonce, data->nonce, sizeof(nonce));
1605 	nonce = le64toh(nonce); /* Wire format is little endian. */
1606 	res = noise_remote_decrypt(&peer->p_remote, data->r_idx, nonce,
1607 				   data->buf, payload_len);
1608 
1609 	if (__predict_false(res == EINVAL)) {
1610 		goto error;
1611 	} else if (__predict_false(res == ECONNRESET)) {
1612 		wg_timers_event_handshake_complete(&peer->p_timers);
1613 	} else if (__predict_false(res == ESTALE)) {
1614 		wg_timers_event_want_initiation(&peer->p_timers);
1615 	} else if (__predict_false(res != 0)) {
1616 		panic("unexpected response: %d", res);
1617 	}
1618 
1619 	wg_peer_set_endpoint_from_tag(peer, t);
1620 
1621 	wg_peer_counters_add(peer, 0, m->m_pkthdr.len);
1622 
1623 	m_adj(m, sizeof(struct wg_pkt_data));
1624 	m_adj(m, -NOISE_AUTHTAG_LEN);
1625 
1626 	counters_pkt(sc->sc_if.if_counters, ifc_ipackets, ifc_ibytes,
1627 	    m->m_pkthdr.len);
1628 
1629 	/* A packet with length 0 is a keepalive packet */
1630 	if (__predict_false(m->m_pkthdr.len == 0)) {
1631 		DPRINTF(sc, "Receiving keepalive packet from peer "
1632 		    "%llu\n", peer->p_id);
1633 		goto done;
1634 	}
1635 
1636 	/*
1637 	 * We can let the network stack handle the intricate validation of the
1638 	 * IP header, we just worry about the sizeof and the version, so we can
1639 	 * read the source address in wg_aip_lookup.
1640 	 *
1641 	 * We also need to trim the packet, as it was likely padded before
1642 	 * encryption. While we could drop it here, it will be more helpful to
1643 	 * pass it to bpf_mtap and use the counters that people are expecting
1644 	 * in ipv4_input and ipv6_input. We can rely on ipv4_input and
1645 	 * ipv6_input to properly validate the headers.
1646 	 */
1647 	ip = mtod(m, struct ip *);
1648 	ip6 = mtod(m, struct ip6_hdr *);
1649 
1650 	if (m->m_pkthdr.len >= sizeof(struct ip) && ip->ip_v == IPVERSION) {
1651 		m->m_pkthdr.ph_family = AF_INET;
1652 
1653 		len = ntohs(ip->ip_len);
1654 		if (len >= sizeof(struct ip) && len < m->m_pkthdr.len)
1655 			m_adj(m, len - m->m_pkthdr.len);
1656 
1657 		allowed_peer = wg_aip_lookup(sc->sc_aip4, &ip->ip_src);
1658 #ifdef INET6
1659 	} else if (m->m_pkthdr.len >= sizeof(struct ip6_hdr) &&
1660 	    (ip6->ip6_vfc & IPV6_VERSION_MASK) == IPV6_VERSION) {
1661 		m->m_pkthdr.ph_family = AF_INET6;
1662 
1663 		len = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr);
1664 		if (len < m->m_pkthdr.len)
1665 			m_adj(m, len - m->m_pkthdr.len);
1666 
1667 		allowed_peer = wg_aip_lookup(sc->sc_aip6, &ip6->ip6_src);
1668 #endif
1669 	} else {
1670 		DPRINTF(sc, "Packet is neither ipv4 nor ipv6 from "
1671 		    "peer %llu\n", peer->p_id);
1672 		goto error;
1673 	}
1674 
1675 	if (__predict_false(peer != allowed_peer)) {
1676 		DPRINTF(sc, "Packet has unallowed src IP from peer "
1677 		    "%llu\n", peer->p_id);
1678 		goto error;
1679 	}
1680 
1681 	/* tunneled packet was not offloaded */
1682 	m->m_pkthdr.csum_flags = 0;
1683 
1684 	m->m_pkthdr.ph_ifidx = sc->sc_if.if_index;
1685 	m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1686 	m->m_flags &= ~(M_MCAST | M_BCAST);
1687 #if NPF > 0
1688 	pf_pkt_addr_changed(m);
1689 #endif /* NPF > 0 */
1690 
1691 done:
1692 	t->t_mbuf = m;
1693 error:
1694 	t->t_done = 1;
1695 	task_add(net_tq(sc->sc_if.if_index), &peer->p_deliver_in);
1696 }
1697 
1698 void
1699 wg_encap_worker(void *_sc)
1700 {
1701 	struct mbuf *m;
1702 	struct wg_softc *sc = _sc;
1703 	while ((m = wg_ring_dequeue(&sc->sc_encap_ring)) != NULL)
1704 		wg_encap(sc, m);
1705 }
1706 
1707 void
1708 wg_decap_worker(void *_sc)
1709 {
1710 	struct mbuf *m;
1711 	struct wg_softc *sc = _sc;
1712 	while ((m = wg_ring_dequeue(&sc->sc_decap_ring)) != NULL)
1713 		wg_decap(sc, m);
1714 }
1715 
1716 void
1717 wg_deliver_out(void *_peer)
1718 {
1719 	struct wg_peer		*peer = _peer;
1720 	struct wg_softc		*sc = peer->p_sc;
1721 	struct wg_endpoint	 endpoint;
1722 	struct wg_tag		*t;
1723 	struct mbuf		*m;
1724 	int			 ret;
1725 
1726 	wg_peer_get_endpoint(peer, &endpoint);
1727 
1728 	while ((m = wg_queue_dequeue(&peer->p_encap_queue, &t)) != NULL) {
1729 		/* t_mbuf will contain the encrypted packet */
1730 		if (t->t_mbuf == NULL){
1731 			counters_inc(sc->sc_if.if_counters, ifc_oerrors);
1732 			m_freem(m);
1733 			continue;
1734 		}
1735 
1736 		ret = wg_send(sc, &endpoint, t->t_mbuf);
1737 
1738 		if (ret == 0) {
1739 			wg_timers_event_any_authenticated_packet_traversal(
1740 			    &peer->p_timers);
1741 			wg_timers_event_any_authenticated_packet_sent(
1742 			    &peer->p_timers);
1743 
1744 			if (m->m_pkthdr.len != 0)
1745 				wg_timers_event_data_sent(&peer->p_timers);
1746 		} else if (ret == EADDRNOTAVAIL) {
1747 			wg_peer_clear_src(peer);
1748 			wg_peer_get_endpoint(peer, &endpoint);
1749 		}
1750 
1751 		m_freem(m);
1752 	}
1753 }
1754 
1755 void
1756 wg_deliver_in(void *_peer)
1757 {
1758 	struct wg_peer	*peer = _peer;
1759 	struct wg_softc	*sc = peer->p_sc;
1760 	struct wg_tag	*t;
1761 	struct mbuf	*m;
1762 
1763 	while ((m = wg_queue_dequeue(&peer->p_decap_queue, &t)) != NULL) {
1764 		/* t_mbuf will contain the decrypted packet */
1765 		if (t->t_mbuf == NULL) {
1766 			counters_inc(sc->sc_if.if_counters, ifc_ierrors);
1767 			m_freem(m);
1768 			continue;
1769 		}
1770 
1771 		/* From here on m == t->t_mbuf */
1772 		KASSERT(m == t->t_mbuf);
1773 
1774 		wg_timers_event_any_authenticated_packet_received(
1775 		    &peer->p_timers);
1776 		wg_timers_event_any_authenticated_packet_traversal(
1777 		    &peer->p_timers);
1778 
1779 		if (m->m_pkthdr.len == 0) {
1780 			m_freem(m);
1781 			continue;
1782 		}
1783 
1784 #if NBPFILTER > 0
1785 		if (sc->sc_if.if_bpf != NULL)
1786 			bpf_mtap_af(sc->sc_if.if_bpf,
1787 			    m->m_pkthdr.ph_family, m, BPF_DIRECTION_IN);
1788 #endif
1789 
1790 		NET_LOCK();
1791 		if (m->m_pkthdr.ph_family == AF_INET)
1792 			ipv4_input(&sc->sc_if, m);
1793 #ifdef INET6
1794 		else if (m->m_pkthdr.ph_family == AF_INET6)
1795 			ipv6_input(&sc->sc_if, m);
1796 #endif
1797 		else
1798 			panic("invalid ph_family");
1799 		NET_UNLOCK();
1800 
1801 		wg_timers_event_data_received(&peer->p_timers);
1802 	}
1803 }
1804 
1805 int
1806 wg_queue_in(struct wg_softc *sc, struct wg_peer *peer, struct mbuf *m)
1807 {
1808 	struct wg_ring		*parallel = &sc->sc_decap_ring;
1809 	struct wg_queue		*serial = &peer->p_decap_queue;
1810 	struct wg_tag		*t;
1811 
1812 	mtx_enter(&serial->q_mtx);
1813 	if (serial->q_list.ml_len < MAX_QUEUED_PKT) {
1814 		ml_enqueue(&serial->q_list, m);
1815 		mtx_leave(&serial->q_mtx);
1816 	} else {
1817 		mtx_leave(&serial->q_mtx);
1818 		m_freem(m);
1819 		return ENOBUFS;
1820 	}
1821 
1822 	mtx_enter(&parallel->r_mtx);
1823 	if (parallel->r_tail - parallel->r_head < MAX_QUEUED_PKT) {
1824 		parallel->r_buf[parallel->r_tail & MAX_QUEUED_PKT_MASK] = m;
1825 		parallel->r_tail++;
1826 		mtx_leave(&parallel->r_mtx);
1827 	} else {
1828 		mtx_leave(&parallel->r_mtx);
1829 		t = wg_tag_get(m);
1830 		t->t_done = 1;
1831 		return ENOBUFS;
1832 	}
1833 
1834 	return 0;
1835 }
1836 
1837 void
1838 wg_queue_out(struct wg_softc *sc, struct wg_peer *peer)
1839 {
1840 	struct wg_ring		*parallel = &sc->sc_encap_ring;
1841 	struct wg_queue		*serial = &peer->p_encap_queue;
1842 	struct mbuf_list 	 ml, ml_free;
1843 	struct mbuf		*m;
1844 	struct wg_tag		*t;
1845 	int			 dropped;
1846 
1847 	/*
1848 	 * We delist all staged packets and then add them to the queues. This
1849 	 * can race with wg_qstart when called from wg_send_keepalive, however
1850 	 * wg_qstart will not race as it is serialised.
1851 	 */
1852 	mq_delist(&peer->p_stage_queue, &ml);
1853 	ml_init(&ml_free);
1854 
1855 	while ((m = ml_dequeue(&ml)) != NULL) {
1856 		mtx_enter(&serial->q_mtx);
1857 		if (serial->q_list.ml_len < MAX_QUEUED_PKT) {
1858 			ml_enqueue(&serial->q_list, m);
1859 			mtx_leave(&serial->q_mtx);
1860 		} else {
1861 			mtx_leave(&serial->q_mtx);
1862 			ml_enqueue(&ml_free, m);
1863 			continue;
1864 		}
1865 
1866 		mtx_enter(&parallel->r_mtx);
1867 		if (parallel->r_tail - parallel->r_head < MAX_QUEUED_PKT) {
1868 			parallel->r_buf[parallel->r_tail & MAX_QUEUED_PKT_MASK] = m;
1869 			parallel->r_tail++;
1870 			mtx_leave(&parallel->r_mtx);
1871 		} else {
1872 			mtx_leave(&parallel->r_mtx);
1873 			t = wg_tag_get(m);
1874 			t->t_done = 1;
1875 		}
1876 	}
1877 
1878 	if ((dropped = ml_purge(&ml_free)) > 0)
1879 		counters_add(sc->sc_if.if_counters, ifc_oqdrops, dropped);
1880 }
1881 
1882 struct mbuf *
1883 wg_ring_dequeue(struct wg_ring *r)
1884 {
1885 	struct mbuf *m = NULL;
1886 	mtx_enter(&r->r_mtx);
1887 	if (r->r_head != r->r_tail) {
1888 		m = r->r_buf[r->r_head & MAX_QUEUED_PKT_MASK];
1889 		r->r_head++;
1890 	}
1891 	mtx_leave(&r->r_mtx);
1892 	return m;
1893 }
1894 
1895 struct mbuf *
1896 wg_queue_dequeue(struct wg_queue *q, struct wg_tag **t)
1897 {
1898 	struct mbuf *m;
1899 	mtx_enter(&q->q_mtx);
1900 	if ((m = q->q_list.ml_head) != NULL && (*t = wg_tag_get(m))->t_done)
1901 		ml_dequeue(&q->q_list);
1902 	else
1903 		m = NULL;
1904 	mtx_leave(&q->q_mtx);
1905 	return m;
1906 }
1907 
1908 size_t
1909 wg_queue_len(struct wg_queue *q)
1910 {
1911 	size_t len;
1912 	mtx_enter(&q->q_mtx);
1913 	len = q->q_list.ml_len;
1914 	mtx_leave(&q->q_mtx);
1915 	return len;
1916 }
1917 
1918 struct noise_remote *
1919 wg_remote_get(void *_sc, uint8_t public[NOISE_PUBLIC_KEY_LEN])
1920 {
1921 	struct wg_peer	*peer;
1922 	struct wg_softc	*sc = _sc;
1923 	if ((peer = wg_peer_lookup(sc, public)) == NULL)
1924 		return NULL;
1925 	return &peer->p_remote;
1926 }
1927 
1928 uint32_t
1929 wg_index_set(void *_sc, struct noise_remote *remote)
1930 {
1931 	struct wg_peer	*peer;
1932 	struct wg_softc	*sc = _sc;
1933 	struct wg_index *index, *iter;
1934 	uint32_t	 key;
1935 
1936 	/*
1937 	 * We can modify this without a lock as wg_index_set, wg_index_drop are
1938 	 * guaranteed to be serialised (per remote).
1939 	 */
1940 	peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1941 	index = SLIST_FIRST(&peer->p_unused_index);
1942 	KASSERT(index != NULL);
1943 	SLIST_REMOVE_HEAD(&peer->p_unused_index, i_unused_entry);
1944 
1945 	index->i_value = remote;
1946 
1947 	mtx_enter(&sc->sc_index_mtx);
1948 assign_id:
1949 	key = index->i_key = arc4random();
1950 	key &= sc->sc_index_mask;
1951 	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1952 		if (iter->i_key == index->i_key)
1953 			goto assign_id;
1954 
1955 	LIST_INSERT_HEAD(&sc->sc_index[key], index, i_entry);
1956 
1957 	mtx_leave(&sc->sc_index_mtx);
1958 
1959 	/* Likewise, no need to lock for index here. */
1960 	return index->i_key;
1961 }
1962 
1963 struct noise_remote *
1964 wg_index_get(void *_sc, uint32_t key0)
1965 {
1966 	struct wg_softc		*sc = _sc;
1967 	struct wg_index		*iter;
1968 	struct noise_remote	*remote = NULL;
1969 	uint32_t		 key = key0 & sc->sc_index_mask;
1970 
1971 	mtx_enter(&sc->sc_index_mtx);
1972 	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1973 		if (iter->i_key == key0) {
1974 			remote = iter->i_value;
1975 			break;
1976 		}
1977 	mtx_leave(&sc->sc_index_mtx);
1978 	return remote;
1979 }
1980 
1981 void
1982 wg_index_drop(void *_sc, uint32_t key0)
1983 {
1984 	struct wg_softc	*sc = _sc;
1985 	struct wg_index	*iter;
1986 	struct wg_peer	*peer = NULL;
1987 	uint32_t	 key = key0 & sc->sc_index_mask;
1988 
1989 	mtx_enter(&sc->sc_index_mtx);
1990 	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1991 		if (iter->i_key == key0) {
1992 			LIST_REMOVE(iter, i_entry);
1993 			break;
1994 		}
1995 	mtx_leave(&sc->sc_index_mtx);
1996 
1997 	/* We expect a peer */
1998 	peer = CONTAINER_OF(iter->i_value, struct wg_peer, p_remote);
1999 	KASSERT(peer != NULL);
2000 	SLIST_INSERT_HEAD(&peer->p_unused_index, iter, i_unused_entry);
2001 }
2002 
2003 struct mbuf *
2004 wg_input(void *_sc, struct mbuf *m, struct ip *ip, struct ip6_hdr *ip6,
2005     void *_uh, int hlen)
2006 {
2007 	struct wg_pkt_data	*data;
2008 	struct noise_remote	*remote;
2009 	struct wg_tag		*t;
2010 	struct wg_softc		*sc = _sc;
2011 	struct udphdr		*uh = _uh;
2012 
2013 	NET_ASSERT_LOCKED();
2014 
2015 	if ((t = wg_tag_get(m)) == NULL) {
2016 		m_freem(m);
2017 		return NULL;
2018 	}
2019 
2020 	if (ip != NULL) {
2021 		t->t_endpoint.e_remote.r_sa.sa_len = sizeof(struct sockaddr_in);
2022 		t->t_endpoint.e_remote.r_sa.sa_family = AF_INET;
2023 		t->t_endpoint.e_remote.r_sin.sin_port = uh->uh_sport;
2024 		t->t_endpoint.e_remote.r_sin.sin_addr = ip->ip_src;
2025 		t->t_endpoint.e_local.l_in = ip->ip_dst;
2026 #ifdef INET6
2027 	} else if (ip6 != NULL) {
2028 		t->t_endpoint.e_remote.r_sa.sa_len = sizeof(struct sockaddr_in6);
2029 		t->t_endpoint.e_remote.r_sa.sa_family = AF_INET6;
2030 		t->t_endpoint.e_remote.r_sin6.sin6_port = uh->uh_sport;
2031 		t->t_endpoint.e_remote.r_sin6.sin6_addr = ip6->ip6_src;
2032 		t->t_endpoint.e_local.l_in6 = ip6->ip6_dst;
2033 #endif
2034 	} else {
2035 		m_freem(m);
2036 		return NULL;
2037 	}
2038 
2039 	/* m has a IP/IPv6 header of hlen length, we don't need it anymore. */
2040 	m_adj(m, hlen);
2041 
2042 	/*
2043 	 * Ensure mbuf is contiguous over full length of packet. This is done
2044 	 * so we can directly read the handshake values in wg_handshake, and so
2045 	 * we can decrypt a transport packet by passing a single buffer to
2046 	 * noise_remote_decrypt in wg_decap.
2047 	 */
2048 	if ((m = m_pullup(m, m->m_pkthdr.len)) == NULL)
2049 		return NULL;
2050 
2051 	if ((m->m_pkthdr.len == sizeof(struct wg_pkt_initiation) &&
2052 		*mtod(m, uint32_t *) == WG_PKT_INITIATION) ||
2053 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_response) &&
2054 		*mtod(m, uint32_t *) == WG_PKT_RESPONSE) ||
2055 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_cookie) &&
2056 		*mtod(m, uint32_t *) == WG_PKT_COOKIE)) {
2057 
2058 		if (mq_enqueue(&sc->sc_handshake_queue, m) != 0)
2059 			DPRINTF(sc, "Dropping handshake packet\n");
2060 		task_add(wg_handshake_taskq, &sc->sc_handshake);
2061 
2062 	} else if (m->m_pkthdr.len >= sizeof(struct wg_pkt_data) +
2063 	    NOISE_AUTHTAG_LEN && *mtod(m, uint32_t *) == WG_PKT_DATA) {
2064 
2065 		data = mtod(m, struct wg_pkt_data *);
2066 
2067 		if ((remote = wg_index_get(sc, data->r_idx)) != NULL) {
2068 			t->t_peer = CONTAINER_OF(remote, struct wg_peer,
2069 			    p_remote);
2070 			t->t_mbuf = NULL;
2071 			t->t_done = 0;
2072 
2073 			if (wg_queue_in(sc, t->t_peer, m) != 0)
2074 				counters_inc(sc->sc_if.if_counters,
2075 				    ifc_iqdrops);
2076 			task_add(wg_crypt_taskq, &sc->sc_decap);
2077 		} else {
2078 			counters_inc(sc->sc_if.if_counters, ifc_ierrors);
2079 			m_freem(m);
2080 		}
2081 	} else {
2082 		counters_inc(sc->sc_if.if_counters, ifc_ierrors);
2083 		m_freem(m);
2084 	}
2085 
2086 	return NULL;
2087 }
2088 
2089 void
2090 wg_qstart(struct ifqueue *ifq)
2091 {
2092 	struct ifnet		*ifp = ifq->ifq_if;
2093 	struct wg_softc		*sc = ifp->if_softc;
2094 	struct wg_peer		*peer;
2095 	struct wg_tag		*t;
2096 	struct mbuf		*m;
2097 	SLIST_HEAD(,wg_peer)	 start_list;
2098 
2099 	SLIST_INIT(&start_list);
2100 
2101 	/*
2102 	 * We should be OK to modify p_start_list, p_start_onlist in this
2103 	 * function as there should only be one ifp->if_qstart invoked at a
2104 	 * time.
2105 	 */
2106 	while ((m = ifq_dequeue(ifq)) != NULL) {
2107 		t = wg_tag_get(m);
2108 		peer = t->t_peer;
2109 		if (mq_push(&peer->p_stage_queue, m) != 0)
2110 			counters_inc(ifp->if_counters, ifc_oqdrops);
2111 		if (!peer->p_start_onlist) {
2112 			SLIST_INSERT_HEAD(&start_list, peer, p_start_list);
2113 			peer->p_start_onlist = 1;
2114 		}
2115 	}
2116 	SLIST_FOREACH(peer, &start_list, p_start_list) {
2117 		if (noise_remote_ready(&peer->p_remote) == 0)
2118 			wg_queue_out(sc, peer);
2119 		else
2120 			wg_timers_event_want_initiation(&peer->p_timers);
2121 		peer->p_start_onlist = 0;
2122 	}
2123 	task_add(wg_crypt_taskq, &sc->sc_encap);
2124 }
2125 
2126 int
2127 wg_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2128     struct rtentry *rt)
2129 {
2130 	struct wg_softc	*sc = ifp->if_softc;
2131 	struct wg_peer	*peer;
2132 	struct wg_tag	*t;
2133 	int		 af, ret = EINVAL;
2134 
2135 	NET_ASSERT_LOCKED();
2136 
2137 	if ((t = wg_tag_get(m)) == NULL) {
2138 		ret = ENOBUFS;
2139 		goto error;
2140 	}
2141 
2142 	m->m_pkthdr.ph_family = sa->sa_family;
2143 	if (sa->sa_family == AF_INET) {
2144 		peer = wg_aip_lookup(sc->sc_aip4,
2145 		    &mtod(m, struct ip *)->ip_dst);
2146 #ifdef INET6
2147 	} else if (sa->sa_family == AF_INET6) {
2148 		peer = wg_aip_lookup(sc->sc_aip6,
2149 		    &mtod(m, struct ip6_hdr *)->ip6_dst);
2150 #endif
2151 	} else {
2152 		ret = EAFNOSUPPORT;
2153 		goto error;
2154 	}
2155 
2156 #if NBPFILTER > 0
2157 	if (sc->sc_if.if_bpf)
2158 		bpf_mtap_af(sc->sc_if.if_bpf, sa->sa_family, m,
2159 		    BPF_DIRECTION_OUT);
2160 #endif
2161 
2162 	if (peer == NULL) {
2163 		ret = ENETUNREACH;
2164 		goto error;
2165 	}
2166 
2167 	af = peer->p_endpoint.e_remote.r_sa.sa_family;
2168 	if (af != AF_INET && af != AF_INET6) {
2169 		DPRINTF(sc, "No valid endpoint has been configured or "
2170 				"discovered for peer %llu\n", peer->p_id);
2171 		ret = EDESTADDRREQ;
2172 		goto error;
2173 	}
2174 
2175 	if (m->m_pkthdr.ph_loopcnt++ > M_MAXLOOP) {
2176 		DPRINTF(sc, "Packet looped\n");
2177 		ret = ELOOP;
2178 		goto error;
2179 	}
2180 
2181 	/*
2182 	 * As we hold a reference to peer in the mbuf, we can't handle a
2183 	 * delayed packet without doing some refcnting. If a peer is removed
2184 	 * while a delayed holds a reference, bad things will happen. For the
2185 	 * time being, delayed packets are unsupported. This may be fixed with
2186 	 * another aip_lookup in wg_qstart, or refcnting as mentioned before.
2187 	 */
2188 	if (m->m_pkthdr.pf.delay > 0) {
2189 		DPRINTF(sc, "PF delay unsupported\n");
2190 		ret = EOPNOTSUPP;
2191 		goto error;
2192 	}
2193 
2194 	t->t_peer = peer;
2195 	t->t_mbuf = NULL;
2196 	t->t_done = 0;
2197 	t->t_mtu = ifp->if_mtu;
2198 
2199 	/*
2200 	 * We still have an issue with ifq that will count a packet that gets
2201 	 * dropped in wg_qstart, or not encrypted. These get counted as
2202 	 * ofails or oqdrops, so the packet gets counted twice.
2203 	 */
2204 	return if_enqueue(ifp, m);
2205 error:
2206 	counters_inc(ifp->if_counters, ifc_oerrors);
2207 	m_freem(m);
2208 	return ret;
2209 }
2210 
2211 int
2212 wg_ioctl_set(struct wg_softc *sc, struct wg_data_io *data)
2213 {
2214 	struct wg_interface_io	*iface_p, iface_o;
2215 	struct wg_peer_io	*peer_p, peer_o;
2216 	struct wg_aip_io	*aip_p, aip_o;
2217 
2218 	struct wg_peer		*peer, *tpeer;
2219 	struct wg_aip		*aip, *taip;
2220 
2221 	in_port_t		 port;
2222 	int			 rtable;
2223 
2224 	uint8_t			 public[WG_KEY_SIZE], private[WG_KEY_SIZE];
2225 	size_t			 i, j;
2226 	int			 ret, has_identity;
2227 
2228 	if ((ret = suser(curproc)) != 0)
2229 		return ret;
2230 
2231 	rw_enter_write(&sc->sc_lock);
2232 
2233 	iface_p = data->wgd_interface;
2234 	if ((ret = copyin(iface_p, &iface_o, sizeof(iface_o))) != 0)
2235 		goto error;
2236 
2237 	if (iface_o.i_flags & WG_INTERFACE_REPLACE_PEERS)
2238 		TAILQ_FOREACH_SAFE(peer, &sc->sc_peer_seq, p_seq_entry, tpeer)
2239 			wg_peer_destroy(peer);
2240 
2241 	if (iface_o.i_flags & WG_INTERFACE_HAS_PRIVATE &&
2242 	    (noise_local_keys(&sc->sc_local, NULL, private) ||
2243 	     timingsafe_bcmp(private, iface_o.i_private, WG_KEY_SIZE))) {
2244 		if (curve25519_generate_public(public, iface_o.i_private)) {
2245 			if ((peer = wg_peer_lookup(sc, public)) != NULL)
2246 				wg_peer_destroy(peer);
2247 		}
2248 		noise_local_lock_identity(&sc->sc_local);
2249 		has_identity = noise_local_set_private(&sc->sc_local,
2250 						       iface_o.i_private);
2251 		TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2252 			noise_remote_precompute(&peer->p_remote);
2253 			wg_timers_event_reset_handshake_last_sent(&peer->p_timers);
2254 			noise_remote_expire_current(&peer->p_remote);
2255 		}
2256 		cookie_checker_update(&sc->sc_cookie,
2257 				      has_identity == 0 ? public : NULL);
2258 		noise_local_unlock_identity(&sc->sc_local);
2259 	}
2260 
2261 	if (iface_o.i_flags & WG_INTERFACE_HAS_PORT)
2262 		port = htons(iface_o.i_port);
2263 	else
2264 		port = sc->sc_udp_port;
2265 
2266 	if (iface_o.i_flags & WG_INTERFACE_HAS_RTABLE)
2267 		rtable = iface_o.i_rtable;
2268 	else
2269 		rtable = sc->sc_udp_rtable;
2270 
2271 	if (port != sc->sc_udp_port || rtable != sc->sc_udp_rtable) {
2272 		TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry)
2273 			wg_peer_clear_src(peer);
2274 
2275 		if (sc->sc_if.if_flags & IFF_RUNNING)
2276 			if ((ret = wg_bind(sc, &port, &rtable)) != 0)
2277 				goto error;
2278 
2279 		sc->sc_udp_port = port;
2280 		sc->sc_udp_rtable = rtable;
2281 	}
2282 
2283 	peer_p = &iface_p->i_peers[0];
2284 	for (i = 0; i < iface_o.i_peers_count; i++) {
2285 		if ((ret = copyin(peer_p, &peer_o, sizeof(peer_o))) != 0)
2286 			goto error;
2287 
2288 		/* Peer must have public key */
2289 		if (!(peer_o.p_flags & WG_PEER_HAS_PUBLIC))
2290 			goto next_peer;
2291 
2292 		/* 0 = latest protocol, 1 = this protocol */
2293 		if (peer_o.p_protocol_version != 0) {
2294 			if (peer_o.p_protocol_version > 1) {
2295 				ret = EPFNOSUPPORT;
2296 				goto error;
2297 			}
2298 		}
2299 
2300 		/* Get local public and check that peer key doesn't match */
2301 		if (noise_local_keys(&sc->sc_local, public, NULL) == 0 &&
2302 		    bcmp(public, peer_o.p_public, WG_KEY_SIZE) == 0)
2303 			goto next_peer;
2304 
2305 		/* Lookup peer, or create if it doesn't exist */
2306 		if ((peer = wg_peer_lookup(sc, peer_o.p_public)) == NULL) {
2307 			/* If we want to delete, no need creating a new one.
2308 			 * Also, don't create a new one if we only want to
2309 			 * update. */
2310 			if (peer_o.p_flags & (WG_PEER_REMOVE|WG_PEER_UPDATE))
2311 				goto next_peer;
2312 
2313 			if ((peer = wg_peer_create(sc,
2314 			    peer_o.p_public)) == NULL) {
2315 				ret = ENOMEM;
2316 				goto error;
2317 			}
2318 		}
2319 
2320 		/* Remove peer and continue if specified */
2321 		if (peer_o.p_flags & WG_PEER_REMOVE) {
2322 			wg_peer_destroy(peer);
2323 			goto next_peer;
2324 		}
2325 
2326 		if (peer_o.p_flags & WG_PEER_HAS_ENDPOINT)
2327 			wg_peer_set_sockaddr(peer, &peer_o.p_sa);
2328 
2329 		if (peer_o.p_flags & WG_PEER_HAS_PSK)
2330 			noise_remote_set_psk(&peer->p_remote, peer_o.p_psk);
2331 
2332 		if (peer_o.p_flags & WG_PEER_HAS_PKA)
2333 			wg_timers_set_persistent_keepalive(&peer->p_timers,
2334 			    peer_o.p_pka);
2335 
2336 		if (peer_o.p_flags & WG_PEER_REPLACE_AIPS) {
2337 			LIST_FOREACH_SAFE(aip, &peer->p_aip, a_entry, taip) {
2338 				wg_aip_remove(sc, peer, &aip->a_data);
2339 			}
2340 		}
2341 
2342 		if (peer_o.p_flags & WG_PEER_SET_DESCRIPTION)
2343 			strlcpy(peer->p_description, peer_o.p_description,
2344 			    IFDESCRSIZE);
2345 
2346 		aip_p = &peer_p->p_aips[0];
2347 		for (j = 0; j < peer_o.p_aips_count; j++) {
2348 			if ((ret = copyin(aip_p, &aip_o, sizeof(aip_o))) != 0)
2349 				goto error;
2350 			ret = wg_aip_add(sc, peer, &aip_o);
2351 			if (ret != 0)
2352 				goto error;
2353 			aip_p++;
2354 		}
2355 
2356 		peer_p = (struct wg_peer_io *)aip_p;
2357 		continue;
2358 next_peer:
2359 		aip_p = &peer_p->p_aips[0];
2360 		aip_p += peer_o.p_aips_count;
2361 		peer_p = (struct wg_peer_io *)aip_p;
2362 	}
2363 
2364 error:
2365 	rw_exit_write(&sc->sc_lock);
2366 	explicit_bzero(&iface_o, sizeof(iface_o));
2367 	explicit_bzero(&peer_o, sizeof(peer_o));
2368 	explicit_bzero(&aip_o, sizeof(aip_o));
2369 	explicit_bzero(public, sizeof(public));
2370 	explicit_bzero(private, sizeof(private));
2371 	return ret;
2372 }
2373 
2374 int
2375 wg_ioctl_get(struct wg_softc *sc, struct wg_data_io *data)
2376 {
2377 	struct wg_interface_io	*iface_p, iface_o;
2378 	struct wg_peer_io	*peer_p, peer_o;
2379 	struct wg_aip_io	*aip_p;
2380 
2381 	struct wg_peer		*peer;
2382 	struct wg_aip		*aip;
2383 
2384 	size_t			 size, peer_count, aip_count;
2385 	int			 ret = 0, is_suser = suser(curproc) == 0;
2386 
2387 	size = sizeof(struct wg_interface_io);
2388 	if (data->wgd_size < size && !is_suser)
2389 		goto ret_size;
2390 
2391 	iface_p = data->wgd_interface;
2392 	bzero(&iface_o, sizeof(iface_o));
2393 
2394 	rw_enter_read(&sc->sc_lock);
2395 
2396 	if (sc->sc_udp_port != 0) {
2397 		iface_o.i_port = ntohs(sc->sc_udp_port);
2398 		iface_o.i_flags |= WG_INTERFACE_HAS_PORT;
2399 	}
2400 
2401 	if (sc->sc_udp_rtable != 0) {
2402 		iface_o.i_rtable = sc->sc_udp_rtable;
2403 		iface_o.i_flags |= WG_INTERFACE_HAS_RTABLE;
2404 	}
2405 
2406 	if (!is_suser)
2407 		goto copy_out_iface;
2408 
2409 	if (noise_local_keys(&sc->sc_local, iface_o.i_public,
2410 	    iface_o.i_private) == 0) {
2411 		iface_o.i_flags |= WG_INTERFACE_HAS_PUBLIC;
2412 		iface_o.i_flags |= WG_INTERFACE_HAS_PRIVATE;
2413 	}
2414 
2415 	size += sizeof(struct wg_peer_io) * sc->sc_peer_num;
2416 	size += sizeof(struct wg_aip_io) * sc->sc_aip_num;
2417 	if (data->wgd_size < size)
2418 		goto unlock_and_ret_size;
2419 
2420 	peer_count = 0;
2421 	peer_p = &iface_p->i_peers[0];
2422 	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2423 		bzero(&peer_o, sizeof(peer_o));
2424 		peer_o.p_flags = WG_PEER_HAS_PUBLIC;
2425 		peer_o.p_protocol_version = 1;
2426 
2427 		if (noise_remote_keys(&peer->p_remote, peer_o.p_public,
2428 		    peer_o.p_psk) == 0)
2429 			peer_o.p_flags |= WG_PEER_HAS_PSK;
2430 
2431 		if (wg_timers_get_persistent_keepalive(&peer->p_timers,
2432 		    &peer_o.p_pka) == 0)
2433 			peer_o.p_flags |= WG_PEER_HAS_PKA;
2434 
2435 		if (wg_peer_get_sockaddr(peer, &peer_o.p_sa) == 0)
2436 			peer_o.p_flags |= WG_PEER_HAS_ENDPOINT;
2437 
2438 		mtx_enter(&peer->p_counters_mtx);
2439 		peer_o.p_txbytes = peer->p_counters_tx;
2440 		peer_o.p_rxbytes = peer->p_counters_rx;
2441 		mtx_leave(&peer->p_counters_mtx);
2442 
2443 		wg_timers_get_last_handshake(&peer->p_timers,
2444 		    &peer_o.p_last_handshake);
2445 
2446 		aip_count = 0;
2447 		aip_p = &peer_p->p_aips[0];
2448 		LIST_FOREACH(aip, &peer->p_aip, a_entry) {
2449 			if ((ret = copyout(&aip->a_data, aip_p, sizeof(*aip_p))) != 0)
2450 				goto unlock_and_ret_size;
2451 			aip_p++;
2452 			aip_count++;
2453 		}
2454 		peer_o.p_aips_count = aip_count;
2455 
2456 		strlcpy(peer_o.p_description, peer->p_description, IFDESCRSIZE);
2457 
2458 		if ((ret = copyout(&peer_o, peer_p, sizeof(peer_o))) != 0)
2459 			goto unlock_and_ret_size;
2460 
2461 		peer_p = (struct wg_peer_io *)aip_p;
2462 		peer_count++;
2463 	}
2464 	iface_o.i_peers_count = peer_count;
2465 
2466 copy_out_iface:
2467 	ret = copyout(&iface_o, iface_p, sizeof(iface_o));
2468 unlock_and_ret_size:
2469 	rw_exit_read(&sc->sc_lock);
2470 	explicit_bzero(&iface_o, sizeof(iface_o));
2471 	explicit_bzero(&peer_o, sizeof(peer_o));
2472 ret_size:
2473 	data->wgd_size = size;
2474 	return ret;
2475 }
2476 
2477 int
2478 wg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2479 {
2480 	struct ifreq	*ifr = (struct ifreq *) data;
2481 	struct wg_softc	*sc = ifp->if_softc;
2482 	int		 ret = 0;
2483 
2484 	switch (cmd) {
2485 	case SIOCSWG:
2486 		NET_UNLOCK();
2487 		ret = wg_ioctl_set(sc, (struct wg_data_io *) data);
2488 		NET_LOCK();
2489 		break;
2490 	case SIOCGWG:
2491 		NET_UNLOCK();
2492 		ret = wg_ioctl_get(sc, (struct wg_data_io *) data);
2493 		NET_LOCK();
2494 		break;
2495 	/* Interface IOCTLs */
2496 	case SIOCSIFADDR:
2497 		SET(ifp->if_flags, IFF_UP);
2498 		/* FALLTHROUGH */
2499 	case SIOCSIFFLAGS:
2500 		if (ISSET(ifp->if_flags, IFF_UP))
2501 			ret = wg_up(sc);
2502 		else
2503 			wg_down(sc);
2504 		break;
2505 	case SIOCSIFMTU:
2506 		/* Arbitrary limits */
2507 		if (ifr->ifr_mtu <= 0 || ifr->ifr_mtu > 9000)
2508 			ret = EINVAL;
2509 		else
2510 			ifp->if_mtu = ifr->ifr_mtu;
2511 		break;
2512 	case SIOCADDMULTI:
2513 	case SIOCDELMULTI:
2514 		break;
2515 	default:
2516 		ret = ENOTTY;
2517 	}
2518 
2519 	return ret;
2520 }
2521 
2522 int
2523 wg_up(struct wg_softc *sc)
2524 {
2525 	struct wg_peer	*peer;
2526 	int		 ret = 0;
2527 
2528 	NET_ASSERT_LOCKED();
2529 	/*
2530 	 * We use IFF_RUNNING as an exclusive access here. We also may want
2531 	 * an exclusive sc_lock as wg_bind may write to sc_udp_port. We also
2532 	 * want to drop NET_LOCK as we want to call socreate, sobind, etc. Once
2533 	 * solock is no longer === NET_LOCK, we may be able to avoid this.
2534 	 */
2535 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
2536 		SET(sc->sc_if.if_flags, IFF_RUNNING);
2537 		NET_UNLOCK();
2538 
2539 		rw_enter_write(&sc->sc_lock);
2540 		/*
2541 		 * If we successfully bind the socket, then enable the timers
2542 		 * for the peer. This will send all staged packets and a
2543 		 * keepalive if necessary.
2544 		 */
2545 		ret = wg_bind(sc, &sc->sc_udp_port, &sc->sc_udp_rtable);
2546 		if (ret == 0) {
2547 			TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2548 				wg_timers_enable(&peer->p_timers);
2549 				wg_queue_out(sc, peer);
2550 			}
2551 		}
2552 		rw_exit_write(&sc->sc_lock);
2553 
2554 		NET_LOCK();
2555 		if (ret != 0)
2556 			CLR(sc->sc_if.if_flags, IFF_RUNNING);
2557 	}
2558 	return ret;
2559 }
2560 
2561 void
2562 wg_down(struct wg_softc *sc)
2563 {
2564 	struct wg_peer	*peer;
2565 
2566 	NET_ASSERT_LOCKED();
2567 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2568 		return;
2569 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
2570 	NET_UNLOCK();
2571 
2572 	/*
2573 	 * We only need a read lock here, as we aren't writing to anything
2574 	 * that isn't granularly locked.
2575 	 */
2576 	rw_enter_read(&sc->sc_lock);
2577 	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2578 		mq_purge(&peer->p_stage_queue);
2579 		wg_timers_disable(&peer->p_timers);
2580 	}
2581 
2582 	taskq_barrier(wg_handshake_taskq);
2583 	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2584 		noise_remote_clear(&peer->p_remote);
2585 		wg_timers_event_reset_handshake_last_sent(&peer->p_timers);
2586 	}
2587 
2588 	wg_unbind(sc);
2589 	rw_exit_read(&sc->sc_lock);
2590 	NET_LOCK();
2591 }
2592 
2593 int
2594 wg_clone_create(struct if_clone *ifc, int unit)
2595 {
2596 	struct ifnet		*ifp;
2597 	struct wg_softc		*sc;
2598 	struct noise_upcall	 local_upcall;
2599 
2600 	KERNEL_ASSERT_LOCKED();
2601 
2602 	if (wg_counter == 0) {
2603 		wg_handshake_taskq = taskq_create("wg_handshake",
2604 		    2, IPL_NET, TASKQ_MPSAFE);
2605 		wg_crypt_taskq = taskq_create("wg_crypt",
2606 		    ncpus, IPL_NET, TASKQ_MPSAFE);
2607 
2608 		if (wg_handshake_taskq == NULL || wg_crypt_taskq == NULL) {
2609 			if (wg_handshake_taskq != NULL)
2610 				taskq_destroy(wg_handshake_taskq);
2611 			if (wg_crypt_taskq != NULL)
2612 				taskq_destroy(wg_crypt_taskq);
2613 			wg_handshake_taskq = NULL;
2614 			wg_crypt_taskq = NULL;
2615 			return ENOTRECOVERABLE;
2616 		}
2617 	}
2618 	wg_counter++;
2619 
2620 	if ((sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL)
2621 		goto ret_00;
2622 
2623 	local_upcall.u_arg = sc;
2624 	local_upcall.u_remote_get = wg_remote_get;
2625 	local_upcall.u_index_set = wg_index_set;
2626 	local_upcall.u_index_drop = wg_index_drop;
2627 
2628 	TAILQ_INIT(&sc->sc_peer_seq);
2629 
2630 	/* sc_if is initialised after everything else */
2631 	arc4random_buf(&sc->sc_secret, sizeof(sc->sc_secret));
2632 
2633 	rw_init(&sc->sc_lock, "wg");
2634 	noise_local_init(&sc->sc_local, &local_upcall);
2635 	if (cookie_checker_init(&sc->sc_cookie, &wg_ratelimit_pool) != 0)
2636 		goto ret_01;
2637 	sc->sc_udp_port = 0;
2638 	sc->sc_udp_rtable = 0;
2639 
2640 	rw_init(&sc->sc_so_lock, "wg_so");
2641 	sc->sc_so4 = NULL;
2642 #ifdef INET6
2643 	sc->sc_so6 = NULL;
2644 #endif
2645 
2646 	sc->sc_aip_num = 0;
2647 	if ((sc->sc_aip4 = art_alloc(0, 32, 0)) == NULL)
2648 		goto ret_02;
2649 #ifdef INET6
2650 	if ((sc->sc_aip6 = art_alloc(0, 128, 0)) == NULL)
2651 		goto ret_03;
2652 #endif
2653 
2654 	rw_init(&sc->sc_peer_lock, "wg_peer");
2655 	sc->sc_peer_num = 0;
2656 	if ((sc->sc_peer = hashinit(HASHTABLE_PEER_SIZE, M_DEVBUF,
2657 	    M_NOWAIT, &sc->sc_peer_mask)) == NULL)
2658 		goto ret_04;
2659 
2660 	mtx_init(&sc->sc_index_mtx, IPL_NET);
2661 	if ((sc->sc_index = hashinit(HASHTABLE_INDEX_SIZE, M_DEVBUF,
2662 	    M_NOWAIT, &sc->sc_index_mask)) == NULL)
2663 		goto ret_05;
2664 
2665 	task_set(&sc->sc_handshake, wg_handshake_worker, sc);
2666 	mq_init(&sc->sc_handshake_queue, MAX_QUEUED_HANDSHAKES, IPL_NET);
2667 
2668 	task_set(&sc->sc_encap, wg_encap_worker, sc);
2669 	task_set(&sc->sc_decap, wg_decap_worker, sc);
2670 
2671 	bzero(&sc->sc_encap_ring, sizeof(sc->sc_encap_ring));
2672 	mtx_init(&sc->sc_encap_ring.r_mtx, IPL_NET);
2673 	bzero(&sc->sc_decap_ring, sizeof(sc->sc_decap_ring));
2674 	mtx_init(&sc->sc_decap_ring.r_mtx, IPL_NET);
2675 
2676 	/* We've setup the softc, now we can setup the ifnet */
2677 	ifp = &sc->sc_if;
2678 	ifp->if_softc = sc;
2679 
2680 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "wg%d", unit);
2681 
2682 	ifp->if_mtu = DEFAULT_MTU;
2683 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_NOARP;
2684 	ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
2685 	ifp->if_txmit = 64; /* Keep our workers active for longer. */
2686 
2687 	ifp->if_ioctl = wg_ioctl;
2688 	ifp->if_qstart = wg_qstart;
2689 	ifp->if_output = wg_output;
2690 
2691 	ifp->if_type = IFT_WIREGUARD;
2692 	ifp->if_rtrequest = p2p_rtrequest;
2693 
2694 	if_attach(ifp);
2695 	if_alloc_sadl(ifp);
2696 	if_counters_alloc(ifp);
2697 
2698 #if NBPFILTER > 0
2699 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
2700 #endif
2701 
2702 	DPRINTF(sc, "Interface created\n");
2703 
2704 	return 0;
2705 ret_05:
2706 	hashfree(sc->sc_peer, HASHTABLE_PEER_SIZE, M_DEVBUF);
2707 ret_04:
2708 #ifdef INET6
2709 	free(sc->sc_aip6, M_RTABLE, sizeof(*sc->sc_aip6));
2710 ret_03:
2711 #endif
2712 	free(sc->sc_aip4, M_RTABLE, sizeof(*sc->sc_aip4));
2713 ret_02:
2714 	cookie_checker_deinit(&sc->sc_cookie);
2715 ret_01:
2716 	free(sc, M_DEVBUF, sizeof(*sc));
2717 ret_00:
2718 	return ENOBUFS;
2719 }
2720 int
2721 wg_clone_destroy(struct ifnet *ifp)
2722 {
2723 	struct wg_softc	*sc = ifp->if_softc;
2724 	struct wg_peer	*peer, *tpeer;
2725 
2726 	KERNEL_ASSERT_LOCKED();
2727 
2728 	rw_enter_write(&sc->sc_lock);
2729 	TAILQ_FOREACH_SAFE(peer, &sc->sc_peer_seq, p_seq_entry, tpeer)
2730 		wg_peer_destroy(peer);
2731 	rw_exit_write(&sc->sc_lock);
2732 
2733 	wg_unbind(sc);
2734 	if_detach(ifp);
2735 
2736 	wg_counter--;
2737 	if (wg_counter == 0) {
2738 		KASSERT(wg_handshake_taskq != NULL && wg_crypt_taskq != NULL);
2739 		taskq_destroy(wg_handshake_taskq);
2740 		taskq_destroy(wg_crypt_taskq);
2741 		wg_handshake_taskq = NULL;
2742 		wg_crypt_taskq = NULL;
2743 	}
2744 
2745 	DPRINTF(sc, "Destroyed interface\n");
2746 
2747 	hashfree(sc->sc_index, HASHTABLE_INDEX_SIZE, M_DEVBUF);
2748 	hashfree(sc->sc_peer, HASHTABLE_PEER_SIZE, M_DEVBUF);
2749 #ifdef INET6
2750 	free(sc->sc_aip6, M_RTABLE, sizeof(*sc->sc_aip6));
2751 #endif
2752 	free(sc->sc_aip4, M_RTABLE, sizeof(*sc->sc_aip4));
2753 	cookie_checker_deinit(&sc->sc_cookie);
2754 	free(sc, M_DEVBUF, sizeof(*sc));
2755 	return 0;
2756 }
2757 
2758 void
2759 wgattach(int nwg)
2760 {
2761 #ifdef WGTEST
2762 	cookie_test();
2763 	noise_test();
2764 #endif
2765 	if_clone_attach(&wg_cloner);
2766 
2767 	pool_init(&wg_aip_pool, sizeof(struct wg_aip), 0,
2768 			IPL_NET, 0, "wgaip", NULL);
2769 	pool_init(&wg_peer_pool, sizeof(struct wg_peer), 0,
2770 			IPL_NET, 0, "wgpeer", NULL);
2771 	pool_init(&wg_ratelimit_pool, sizeof(struct ratelimit_entry), 0,
2772 			IPL_NET, 0, "wgratelimit", NULL);
2773 }
2774