xref: /openbsd-src/sys/net/if_wg.c (revision d5abdd01d7a5f24fb6f9b0aab446ef59a9e9067a)
1 /*	$OpenBSD: if_wg.c,v 1.28 2023/06/01 18:57:53 kn Exp $ */
2 
3 /*
4  * Copyright (C) 2015-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
5  * Copyright (C) 2019-2020 Matt Dunwoodie <ncon@noconroy.net>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include "bpfilter.h"
21 #include "pf.h"
22 
23 #include <sys/types.h>
24 #include <sys/systm.h>
25 #include <sys/param.h>
26 #include <sys/pool.h>
27 
28 #include <sys/socket.h>
29 #include <sys/socketvar.h>
30 #include <sys/percpu.h>
31 #include <sys/ioctl.h>
32 #include <sys/mbuf.h>
33 
34 #include <net/if.h>
35 #include <net/if_var.h>
36 #include <net/if_types.h>
37 #include <net/if_wg.h>
38 
39 #include <net/wg_noise.h>
40 #include <net/wg_cookie.h>
41 
42 #include <net/pfvar.h>
43 #include <net/route.h>
44 #include <net/bpf.h>
45 
46 #include <netinet/ip.h>
47 #include <netinet/ip6.h>
48 #include <netinet/udp.h>
49 #include <netinet/in_pcb.h>
50 
51 #include <crypto/siphash.h>
52 
53 #define DEFAULT_MTU		1420
54 
55 #define MAX_STAGED_PKT		128
56 #define MAX_QUEUED_PKT		1024
57 #define MAX_QUEUED_PKT_MASK	(MAX_QUEUED_PKT - 1)
58 
59 #define MAX_QUEUED_HANDSHAKES	4096
60 
61 #define HASHTABLE_PEER_SIZE	(1 << 11)
62 #define HASHTABLE_INDEX_SIZE	(1 << 13)
63 #define MAX_PEERS_PER_IFACE	(1 << 20)
64 
65 #define REKEY_TIMEOUT		5
66 #define REKEY_TIMEOUT_JITTER	334 /* 1/3 sec, round for arc4random_uniform */
67 #define KEEPALIVE_TIMEOUT	10
68 #define MAX_TIMER_HANDSHAKES	(90 / REKEY_TIMEOUT)
69 #define NEW_HANDSHAKE_TIMEOUT	(REKEY_TIMEOUT + KEEPALIVE_TIMEOUT)
70 #define UNDERLOAD_TIMEOUT	1
71 
72 #define DPRINTF(sc, str, ...) do { if (ISSET((sc)->sc_if.if_flags, IFF_DEBUG))\
73     printf("%s: " str, (sc)->sc_if.if_xname, ##__VA_ARGS__); } while (0)
74 
75 #define CONTAINER_OF(ptr, type, member) ({			\
76 	const __typeof( ((type *)0)->member ) *__mptr = (ptr);	\
77 	(type *)( (char *)__mptr - offsetof(type,member) );})
78 
79 /* First byte indicating packet type on the wire */
80 #define WG_PKT_INITIATION htole32(1)
81 #define WG_PKT_RESPONSE htole32(2)
82 #define WG_PKT_COOKIE htole32(3)
83 #define WG_PKT_DATA htole32(4)
84 
85 #define WG_PKT_WITH_PADDING(n)	(((n) + (16-1)) & (~(16-1)))
86 #define WG_KEY_SIZE		WG_KEY_LEN
87 
88 struct wg_pkt_initiation {
89 	uint32_t		t;
90 	uint32_t		s_idx;
91 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
92 	uint8_t			es[NOISE_PUBLIC_KEY_LEN + NOISE_AUTHTAG_LEN];
93 	uint8_t			ets[NOISE_TIMESTAMP_LEN + NOISE_AUTHTAG_LEN];
94 	struct cookie_macs	m;
95 };
96 
97 struct wg_pkt_response {
98 	uint32_t		t;
99 	uint32_t		s_idx;
100 	uint32_t		r_idx;
101 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
102 	uint8_t			en[0 + NOISE_AUTHTAG_LEN];
103 	struct cookie_macs	m;
104 };
105 
106 struct wg_pkt_cookie {
107 	uint32_t		t;
108 	uint32_t		r_idx;
109 	uint8_t			nonce[COOKIE_NONCE_SIZE];
110 	uint8_t			ec[COOKIE_ENCRYPTED_SIZE];
111 };
112 
113 struct wg_pkt_data {
114 	uint32_t		t;
115 	uint32_t		r_idx;
116 	uint8_t			nonce[sizeof(uint64_t)];
117 	uint8_t			buf[];
118 };
119 
120 struct wg_endpoint {
121 	union {
122 		struct sockaddr		r_sa;
123 		struct sockaddr_in	r_sin;
124 #ifdef INET6
125 		struct sockaddr_in6	r_sin6;
126 #endif
127 	} e_remote;
128 	union {
129 		struct in_addr		l_in;
130 #ifdef INET6
131 		struct in6_pktinfo	l_pktinfo6;
132 #define l_in6 l_pktinfo6.ipi6_addr
133 #endif
134 	} e_local;
135 };
136 
137 struct wg_tag {
138 	struct wg_endpoint	 t_endpoint;
139 	struct wg_peer		*t_peer;
140 	struct mbuf		*t_mbuf;
141 	int			 t_done;
142 	int			 t_mtu;
143 };
144 
145 struct wg_index {
146 	LIST_ENTRY(wg_index)	 i_entry;
147 	SLIST_ENTRY(wg_index)	 i_unused_entry;
148 	uint32_t		 i_key;
149 	struct noise_remote	*i_value;
150 };
151 
152 struct wg_timers {
153 	/* t_lock is for blocking wg_timers_event_* when setting t_disabled. */
154 	struct rwlock		 t_lock;
155 
156 	int			 t_disabled;
157 	int			 t_need_another_keepalive;
158 	uint16_t		 t_persistent_keepalive_interval;
159 	struct timeout		 t_new_handshake;
160 	struct timeout		 t_send_keepalive;
161 	struct timeout		 t_retry_handshake;
162 	struct timeout		 t_zero_key_material;
163 	struct timeout		 t_persistent_keepalive;
164 
165 	struct mutex		 t_handshake_mtx;
166 	struct timespec		 t_handshake_last_sent;	/* nanouptime */
167 	struct timespec		 t_handshake_complete;	/* nanotime */
168 	int			 t_handshake_retries;
169 };
170 
171 struct wg_aip {
172 	struct art_node		 a_node;
173 	LIST_ENTRY(wg_aip)	 a_entry;
174 	struct wg_peer		*a_peer;
175 	struct wg_aip_io	 a_data;
176 };
177 
178 struct wg_queue {
179 	struct mutex		 q_mtx;
180 	struct mbuf_list	 q_list;
181 };
182 
183 struct wg_ring {
184 	struct mutex	 r_mtx;
185 	uint32_t	 r_head;
186 	uint32_t	 r_tail;
187 	struct mbuf	*r_buf[MAX_QUEUED_PKT];
188 };
189 
190 struct wg_peer {
191 	LIST_ENTRY(wg_peer)	 p_pubkey_entry;
192 	TAILQ_ENTRY(wg_peer)	 p_seq_entry;
193 	uint64_t		 p_id;
194 	struct wg_softc		*p_sc;
195 
196 	struct noise_remote	 p_remote;
197 	struct cookie_maker	 p_cookie;
198 	struct wg_timers	 p_timers;
199 
200 	struct mutex		 p_counters_mtx;
201 	uint64_t		 p_counters_tx;
202 	uint64_t		 p_counters_rx;
203 
204 	struct mutex		 p_endpoint_mtx;
205 	struct wg_endpoint	 p_endpoint;
206 
207 	struct task		 p_send_initiation;
208 	struct task		 p_send_keepalive;
209 	struct task		 p_clear_secrets;
210 	struct task		 p_deliver_out;
211 	struct task		 p_deliver_in;
212 
213 	struct mbuf_queue	 p_stage_queue;
214 	struct wg_queue		 p_encap_queue;
215 	struct wg_queue		 p_decap_queue;
216 
217 	SLIST_HEAD(,wg_index)	 p_unused_index;
218 	struct wg_index		 p_index[3];
219 
220 	LIST_HEAD(,wg_aip)	 p_aip;
221 
222 	SLIST_ENTRY(wg_peer)	 p_start_list;
223 	int			 p_start_onlist;
224 
225 	char			 p_description[IFDESCRSIZE];
226 };
227 
228 struct wg_softc {
229 	struct ifnet		 sc_if;
230 	SIPHASH_KEY		 sc_secret;
231 
232 	struct rwlock		 sc_lock;
233 	struct noise_local	 sc_local;
234 	struct cookie_checker	 sc_cookie;
235 	in_port_t		 sc_udp_port;
236 	int			 sc_udp_rtable;
237 
238 	struct rwlock		 sc_so_lock;
239 	struct socket		*sc_so4;
240 #ifdef INET6
241 	struct socket		*sc_so6;
242 #endif
243 
244 	size_t			 sc_aip_num;
245 	struct art_root		*sc_aip4;
246 #ifdef INET6
247 	struct art_root		*sc_aip6;
248 #endif
249 
250 	struct rwlock		 sc_peer_lock;
251 	size_t			 sc_peer_num;
252 	LIST_HEAD(,wg_peer)	*sc_peer;
253 	TAILQ_HEAD(,wg_peer)	 sc_peer_seq;
254 	u_long			 sc_peer_mask;
255 
256 	struct mutex		 sc_index_mtx;
257 	LIST_HEAD(,wg_index)	*sc_index;
258 	u_long			 sc_index_mask;
259 
260 	struct task		 sc_handshake;
261 	struct mbuf_queue	 sc_handshake_queue;
262 
263 	struct task		 sc_encap;
264 	struct task		 sc_decap;
265 	struct wg_ring		 sc_encap_ring;
266 	struct wg_ring		 sc_decap_ring;
267 };
268 
269 struct wg_peer *
270 	wg_peer_create(struct wg_softc *, uint8_t[WG_KEY_SIZE]);
271 struct wg_peer *
272 	wg_peer_lookup(struct wg_softc *, const uint8_t[WG_KEY_SIZE]);
273 void	wg_peer_destroy(struct wg_peer *);
274 void	wg_peer_set_endpoint_from_tag(struct wg_peer *, struct wg_tag *);
275 void	wg_peer_set_sockaddr(struct wg_peer *, struct sockaddr *);
276 int	wg_peer_get_sockaddr(struct wg_peer *, struct sockaddr *);
277 void	wg_peer_clear_src(struct wg_peer *);
278 void	wg_peer_get_endpoint(struct wg_peer *, struct wg_endpoint *);
279 void	wg_peer_counters_add(struct wg_peer *, uint64_t, uint64_t);
280 
281 int	wg_aip_add(struct wg_softc *, struct wg_peer *, struct wg_aip_io *);
282 struct wg_peer *
283 	wg_aip_lookup(struct art_root *, void *);
284 int	wg_aip_remove(struct wg_softc *, struct wg_peer *,
285 	    struct wg_aip_io *);
286 
287 int	wg_socket_open(struct socket **, int, in_port_t *, int *, void *);
288 void	wg_socket_close(struct socket **);
289 int	wg_bind(struct wg_softc *, in_port_t *, int *);
290 void	wg_unbind(struct wg_softc *);
291 int	wg_send(struct wg_softc *, struct wg_endpoint *, struct mbuf *);
292 void	wg_send_buf(struct wg_softc *, struct wg_endpoint *, uint8_t *,
293 	    size_t);
294 
295 struct wg_tag *
296 	wg_tag_get(struct mbuf *);
297 
298 void	wg_timers_init(struct wg_timers *);
299 void	wg_timers_enable(struct wg_timers *);
300 void	wg_timers_disable(struct wg_timers *);
301 void	wg_timers_set_persistent_keepalive(struct wg_timers *, uint16_t);
302 int	wg_timers_get_persistent_keepalive(struct wg_timers *, uint16_t *);
303 void	wg_timers_get_last_handshake(struct wg_timers *, struct timespec *);
304 int	wg_timers_expired_handshake_last_sent(struct wg_timers *);
305 int	wg_timers_check_handshake_last_sent(struct wg_timers *);
306 
307 void	wg_timers_event_data_sent(struct wg_timers *);
308 void	wg_timers_event_data_received(struct wg_timers *);
309 void	wg_timers_event_any_authenticated_packet_sent(struct wg_timers *);
310 void	wg_timers_event_any_authenticated_packet_received(struct wg_timers *);
311 void	wg_timers_event_handshake_initiated(struct wg_timers *);
312 void	wg_timers_event_handshake_responded(struct wg_timers *);
313 void	wg_timers_event_handshake_complete(struct wg_timers *);
314 void	wg_timers_event_session_derived(struct wg_timers *);
315 void	wg_timers_event_any_authenticated_packet_traversal(struct wg_timers *);
316 void	wg_timers_event_want_initiation(struct wg_timers *);
317 void	wg_timers_event_reset_handshake_last_sent(struct wg_timers *);
318 
319 void	wg_timers_run_send_initiation(void *, int);
320 void	wg_timers_run_retry_handshake(void *);
321 void	wg_timers_run_send_keepalive(void *);
322 void	wg_timers_run_new_handshake(void *);
323 void	wg_timers_run_zero_key_material(void *);
324 void	wg_timers_run_persistent_keepalive(void *);
325 
326 void	wg_peer_send_buf(struct wg_peer *, uint8_t *, size_t);
327 void	wg_send_initiation(void *);
328 void	wg_send_response(struct wg_peer *);
329 void	wg_send_cookie(struct wg_softc *, struct cookie_macs *, uint32_t,
330 	    struct wg_endpoint *);
331 void	wg_send_keepalive(void *);
332 void	wg_peer_clear_secrets(void *);
333 void	wg_handshake(struct wg_softc *, struct mbuf *);
334 void	wg_handshake_worker(void *);
335 
336 void	wg_encap(struct wg_softc *, struct mbuf *);
337 void	wg_decap(struct wg_softc *, struct mbuf *);
338 void	wg_encap_worker(void *);
339 void	wg_decap_worker(void *);
340 void	wg_deliver_out(void *);
341 void	wg_deliver_in(void *);
342 
343 int	wg_queue_in(struct wg_softc *, struct wg_peer *, struct mbuf *);
344 void	wg_queue_out(struct wg_softc *, struct wg_peer *);
345 struct mbuf *
346 	wg_ring_dequeue(struct wg_ring *);
347 struct mbuf *
348 	wg_queue_dequeue(struct wg_queue *, struct wg_tag **);
349 size_t	wg_queue_len(struct wg_queue *);
350 
351 struct noise_remote *
352 	wg_remote_get(void *, uint8_t[NOISE_PUBLIC_KEY_LEN]);
353 uint32_t
354 	wg_index_set(void *, struct noise_remote *);
355 struct noise_remote *
356 	wg_index_get(void *, uint32_t);
357 void	wg_index_drop(void *, uint32_t);
358 
359 struct mbuf *
360 	wg_input(void *, struct mbuf *, struct ip *, struct ip6_hdr *, void *,
361 	    int);
362 int	wg_output(struct ifnet *, struct mbuf *, struct sockaddr *,
363 	    struct rtentry *);
364 int	wg_ioctl_set(struct wg_softc *, struct wg_data_io *);
365 int	wg_ioctl_get(struct wg_softc *, struct wg_data_io *);
366 int	wg_ioctl(struct ifnet *, u_long, caddr_t);
367 int	wg_up(struct wg_softc *);
368 void	wg_down(struct wg_softc *);
369 
370 int	wg_clone_create(struct if_clone *, int);
371 int	wg_clone_destroy(struct ifnet *);
372 void	wgattach(int);
373 
374 uint64_t	peer_counter = 0;
375 struct pool	wg_aip_pool;
376 struct pool	wg_peer_pool;
377 struct pool	wg_ratelimit_pool;
378 struct timeval	underload_interval = { UNDERLOAD_TIMEOUT, 0 };
379 
380 size_t		 wg_counter = 0;
381 struct taskq	*wg_handshake_taskq;
382 struct taskq	*wg_crypt_taskq;
383 
384 struct if_clone	wg_cloner =
385     IF_CLONE_INITIALIZER("wg", wg_clone_create, wg_clone_destroy);
386 
387 struct wg_peer *
388 wg_peer_create(struct wg_softc *sc, uint8_t public[WG_KEY_SIZE])
389 {
390 	struct wg_peer	*peer;
391 	uint64_t	 idx;
392 
393 	rw_assert_wrlock(&sc->sc_lock);
394 
395 	if (sc->sc_peer_num >= MAX_PEERS_PER_IFACE)
396 		return NULL;
397 
398 	if ((peer = pool_get(&wg_peer_pool, PR_NOWAIT)) == NULL)
399 		return NULL;
400 
401 	peer->p_id = peer_counter++;
402 	peer->p_sc = sc;
403 
404 	noise_remote_init(&peer->p_remote, public, &sc->sc_local);
405 	cookie_maker_init(&peer->p_cookie, public);
406 	wg_timers_init(&peer->p_timers);
407 
408 	mtx_init(&peer->p_counters_mtx, IPL_NET);
409 	peer->p_counters_tx = 0;
410 	peer->p_counters_rx = 0;
411 
412 	strlcpy(peer->p_description, "", IFDESCRSIZE);
413 
414 	mtx_init(&peer->p_endpoint_mtx, IPL_NET);
415 	bzero(&peer->p_endpoint, sizeof(peer->p_endpoint));
416 
417 	task_set(&peer->p_send_initiation, wg_send_initiation, peer);
418 	task_set(&peer->p_send_keepalive, wg_send_keepalive, peer);
419 	task_set(&peer->p_clear_secrets, wg_peer_clear_secrets, peer);
420 	task_set(&peer->p_deliver_out, wg_deliver_out, peer);
421 	task_set(&peer->p_deliver_in, wg_deliver_in, peer);
422 
423 	mq_init(&peer->p_stage_queue, MAX_STAGED_PKT, IPL_NET);
424 	mtx_init(&peer->p_encap_queue.q_mtx, IPL_NET);
425 	ml_init(&peer->p_encap_queue.q_list);
426 	mtx_init(&peer->p_decap_queue.q_mtx, IPL_NET);
427 	ml_init(&peer->p_decap_queue.q_list);
428 
429 	SLIST_INIT(&peer->p_unused_index);
430 	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[0],
431 	    i_unused_entry);
432 	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[1],
433 	    i_unused_entry);
434 	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[2],
435 	    i_unused_entry);
436 
437 	LIST_INIT(&peer->p_aip);
438 
439 	peer->p_start_onlist = 0;
440 
441 	idx = SipHash24(&sc->sc_secret, public, WG_KEY_SIZE);
442 	idx &= sc->sc_peer_mask;
443 
444 	rw_enter_write(&sc->sc_peer_lock);
445 	LIST_INSERT_HEAD(&sc->sc_peer[idx], peer, p_pubkey_entry);
446 	TAILQ_INSERT_TAIL(&sc->sc_peer_seq, peer, p_seq_entry);
447 	sc->sc_peer_num++;
448 	rw_exit_write(&sc->sc_peer_lock);
449 
450 	DPRINTF(sc, "Peer %llu created\n", peer->p_id);
451 	return peer;
452 }
453 
454 struct wg_peer *
455 wg_peer_lookup(struct wg_softc *sc, const uint8_t public[WG_KEY_SIZE])
456 {
457 	uint8_t		 peer_key[WG_KEY_SIZE];
458 	struct wg_peer	*peer;
459 	uint64_t	 idx;
460 
461 	idx = SipHash24(&sc->sc_secret, public, WG_KEY_SIZE);
462 	idx &= sc->sc_peer_mask;
463 
464 	rw_enter_read(&sc->sc_peer_lock);
465 	LIST_FOREACH(peer, &sc->sc_peer[idx], p_pubkey_entry) {
466 		noise_remote_keys(&peer->p_remote, peer_key, NULL);
467 		if (timingsafe_bcmp(peer_key, public, WG_KEY_SIZE) == 0)
468 			goto done;
469 	}
470 	peer = NULL;
471 done:
472 	rw_exit_read(&sc->sc_peer_lock);
473 	return peer;
474 }
475 
476 void
477 wg_peer_destroy(struct wg_peer *peer)
478 {
479 	struct wg_softc	*sc = peer->p_sc;
480 	struct wg_aip *aip, *taip;
481 
482 	rw_assert_wrlock(&sc->sc_lock);
483 
484 	/*
485 	 * Remove peer from the pubkey hashtable and disable all timeouts.
486 	 * After this, and flushing wg_handshake_taskq, then no more handshakes
487 	 * can be started.
488 	 */
489 	rw_enter_write(&sc->sc_peer_lock);
490 	LIST_REMOVE(peer, p_pubkey_entry);
491 	TAILQ_REMOVE(&sc->sc_peer_seq, peer, p_seq_entry);
492 	sc->sc_peer_num--;
493 	rw_exit_write(&sc->sc_peer_lock);
494 
495 	wg_timers_disable(&peer->p_timers);
496 
497 	taskq_barrier(wg_handshake_taskq);
498 
499 	/*
500 	 * Now we drop all allowed ips, to drop all outgoing packets to the
501 	 * peer. Then drop all the indexes to drop all incoming packets to the
502 	 * peer. Then we can flush if_snd, wg_crypt_taskq and then nettq to
503 	 * ensure no more references to the peer exist.
504 	 */
505 	LIST_FOREACH_SAFE(aip, &peer->p_aip, a_entry, taip)
506 		wg_aip_remove(sc, peer, &aip->a_data);
507 
508 	noise_remote_clear(&peer->p_remote);
509 
510 	NET_LOCK();
511 	while (!ifq_empty(&sc->sc_if.if_snd)) {
512 		NET_UNLOCK();
513 		tsleep_nsec(sc, PWAIT, "wg_ifq", 1000);
514 		NET_LOCK();
515 	}
516 	NET_UNLOCK();
517 
518 	taskq_barrier(wg_crypt_taskq);
519 	taskq_barrier(net_tq(sc->sc_if.if_index));
520 
521 	DPRINTF(sc, "Peer %llu destroyed\n", peer->p_id);
522 	explicit_bzero(peer, sizeof(*peer));
523 	pool_put(&wg_peer_pool, peer);
524 }
525 
526 void
527 wg_peer_set_endpoint_from_tag(struct wg_peer *peer, struct wg_tag *t)
528 {
529 	if (memcmp(&t->t_endpoint, &peer->p_endpoint,
530 	    sizeof(t->t_endpoint)) == 0)
531 		return;
532 
533 	mtx_enter(&peer->p_endpoint_mtx);
534 	peer->p_endpoint = t->t_endpoint;
535 	mtx_leave(&peer->p_endpoint_mtx);
536 }
537 
538 void
539 wg_peer_set_sockaddr(struct wg_peer *peer, struct sockaddr *remote)
540 {
541 	mtx_enter(&peer->p_endpoint_mtx);
542 	memcpy(&peer->p_endpoint.e_remote, remote,
543 	       sizeof(peer->p_endpoint.e_remote));
544 	bzero(&peer->p_endpoint.e_local, sizeof(peer->p_endpoint.e_local));
545 	mtx_leave(&peer->p_endpoint_mtx);
546 }
547 
548 int
549 wg_peer_get_sockaddr(struct wg_peer *peer, struct sockaddr *remote)
550 {
551 	int	ret = 0;
552 
553 	mtx_enter(&peer->p_endpoint_mtx);
554 	if (peer->p_endpoint.e_remote.r_sa.sa_family != AF_UNSPEC)
555 		memcpy(remote, &peer->p_endpoint.e_remote,
556 		       sizeof(peer->p_endpoint.e_remote));
557 	else
558 		ret = ENOENT;
559 	mtx_leave(&peer->p_endpoint_mtx);
560 	return ret;
561 }
562 
563 void
564 wg_peer_clear_src(struct wg_peer *peer)
565 {
566 	mtx_enter(&peer->p_endpoint_mtx);
567 	bzero(&peer->p_endpoint.e_local, sizeof(peer->p_endpoint.e_local));
568 	mtx_leave(&peer->p_endpoint_mtx);
569 }
570 
571 void
572 wg_peer_get_endpoint(struct wg_peer *peer, struct wg_endpoint *endpoint)
573 {
574 	mtx_enter(&peer->p_endpoint_mtx);
575 	memcpy(endpoint, &peer->p_endpoint, sizeof(*endpoint));
576 	mtx_leave(&peer->p_endpoint_mtx);
577 }
578 
579 void
580 wg_peer_counters_add(struct wg_peer *peer, uint64_t tx, uint64_t rx)
581 {
582 	mtx_enter(&peer->p_counters_mtx);
583 	peer->p_counters_tx += tx;
584 	peer->p_counters_rx += rx;
585 	mtx_leave(&peer->p_counters_mtx);
586 }
587 
588 int
589 wg_aip_add(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
590 {
591 	struct art_root	*root;
592 	struct art_node	*node;
593 	struct wg_aip	*aip;
594 	int		 ret = 0;
595 
596 	switch (d->a_af) {
597 	case AF_INET:	root = sc->sc_aip4; break;
598 #ifdef INET6
599 	case AF_INET6:	root = sc->sc_aip6; break;
600 #endif
601 	default: return EAFNOSUPPORT;
602 	}
603 
604 	if ((aip = pool_get(&wg_aip_pool, PR_NOWAIT|PR_ZERO)) == NULL)
605 		return ENOBUFS;
606 
607 	rw_enter_write(&root->ar_lock);
608 	node = art_insert(root, &aip->a_node, &d->a_addr, d->a_cidr);
609 
610 	if (node == &aip->a_node) {
611 		aip->a_peer = peer;
612 		aip->a_data = *d;
613 		LIST_INSERT_HEAD(&peer->p_aip, aip, a_entry);
614 		sc->sc_aip_num++;
615 	} else {
616 		pool_put(&wg_aip_pool, aip);
617 		aip = (struct wg_aip *) node;
618 		if (aip->a_peer != peer) {
619 			LIST_REMOVE(aip, a_entry);
620 			LIST_INSERT_HEAD(&peer->p_aip, aip, a_entry);
621 			aip->a_peer = peer;
622 		}
623 	}
624 	rw_exit_write(&root->ar_lock);
625 	return ret;
626 }
627 
628 struct wg_peer *
629 wg_aip_lookup(struct art_root *root, void *addr)
630 {
631 	struct srp_ref	 sr;
632 	struct art_node	*node;
633 
634 	node = art_match(root, addr, &sr);
635 	srp_leave(&sr);
636 
637 	return node == NULL ? NULL : ((struct wg_aip *) node)->a_peer;
638 }
639 
640 int
641 wg_aip_remove(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
642 {
643 	struct srp_ref	 sr;
644 	struct art_root	*root;
645 	struct art_node	*node;
646 	struct wg_aip	*aip;
647 	int		 ret = 0;
648 
649 	switch (d->a_af) {
650 	case AF_INET:	root = sc->sc_aip4; break;
651 #ifdef INET6
652 	case AF_INET6:	root = sc->sc_aip6; break;
653 #endif
654 	default: return EAFNOSUPPORT;
655 	}
656 
657 	rw_enter_write(&root->ar_lock);
658 	if ((node = art_lookup(root, &d->a_addr, d->a_cidr, &sr)) == NULL) {
659 		ret = ENOENT;
660 	} else if (((struct wg_aip *) node)->a_peer != peer) {
661 		ret = EXDEV;
662 	} else {
663 		aip = (struct wg_aip *)node;
664 		if (art_delete(root, node, &d->a_addr, d->a_cidr) == NULL)
665 			panic("art_delete failed to delete node %p", node);
666 
667 		sc->sc_aip_num--;
668 		LIST_REMOVE(aip, a_entry);
669 		pool_put(&wg_aip_pool, aip);
670 	}
671 
672 	srp_leave(&sr);
673 	rw_exit_write(&root->ar_lock);
674 	return ret;
675 }
676 
677 int
678 wg_socket_open(struct socket **so, int af, in_port_t *port,
679     int *rtable, void *upcall_arg)
680 {
681 	struct mbuf		 mhostnam, mrtable;
682 #ifdef INET6
683 	struct sockaddr_in6	*sin6;
684 #endif
685 	struct sockaddr_in	*sin;
686 	int			 ret;
687 
688 	m_inithdr(&mhostnam);
689 	m_inithdr(&mrtable);
690 
691 	bzero(mtod(&mrtable, u_int *), sizeof(u_int));
692 	*mtod(&mrtable, u_int *) = *rtable;
693 	mrtable.m_len = sizeof(u_int);
694 
695 	if (af == AF_INET) {
696 		sin = mtod(&mhostnam, struct sockaddr_in *);
697 		bzero(sin, sizeof(*sin));
698 		sin->sin_len = sizeof(*sin);
699 		sin->sin_family = AF_INET;
700 		sin->sin_port = *port;
701 		sin->sin_addr.s_addr = INADDR_ANY;
702 		mhostnam.m_len = sin->sin_len;
703 #ifdef INET6
704 	} else if (af == AF_INET6) {
705 		sin6 = mtod(&mhostnam, struct sockaddr_in6 *);
706 		bzero(sin6, sizeof(*sin6));
707 		sin6->sin6_len = sizeof(*sin6);
708 		sin6->sin6_family = AF_INET6;
709 		sin6->sin6_port = *port;
710 		sin6->sin6_addr = (struct in6_addr) { .s6_addr = { 0 } };
711 		mhostnam.m_len = sin6->sin6_len;
712 #endif
713 	} else {
714 		return EAFNOSUPPORT;
715 	}
716 
717 	if ((ret = socreate(af, so, SOCK_DGRAM, 0)) != 0)
718 		return ret;
719 
720 	solock(*so);
721 	sotoinpcb(*so)->inp_upcall = wg_input;
722 	sotoinpcb(*so)->inp_upcall_arg = upcall_arg;
723 
724 	if ((ret = sosetopt(*so, SOL_SOCKET, SO_RTABLE, &mrtable)) == 0) {
725 		if ((ret = sobind(*so, &mhostnam, curproc)) == 0) {
726 			*port = sotoinpcb(*so)->inp_lport;
727 			*rtable = sotoinpcb(*so)->inp_rtableid;
728 		}
729 	}
730 	sounlock(*so);
731 
732 	if (ret != 0)
733 		wg_socket_close(so);
734 
735 	return ret;
736 }
737 
738 void
739 wg_socket_close(struct socket **so)
740 {
741 	if (*so != NULL && soclose(*so, 0) != 0)
742 		panic("Unable to close wg socket");
743 	*so = NULL;
744 }
745 
746 int
747 wg_bind(struct wg_softc *sc, in_port_t *portp, int *rtablep)
748 {
749 	int		 ret = 0, rtable = *rtablep;
750 	in_port_t	 port = *portp;
751 	struct socket	*so4;
752 #ifdef INET6
753 	struct socket	*so6;
754 	int		 retries = 0;
755 retry:
756 #endif
757 	if ((ret = wg_socket_open(&so4, AF_INET, &port, &rtable, sc)) != 0)
758 		return ret;
759 
760 #ifdef INET6
761 	if ((ret = wg_socket_open(&so6, AF_INET6, &port, &rtable, sc)) != 0) {
762 		if (ret == EADDRINUSE && *portp == 0 && retries++ < 100)
763 			goto retry;
764 		wg_socket_close(&so4);
765 		return ret;
766 	}
767 #endif
768 
769 	rw_enter_write(&sc->sc_so_lock);
770 	wg_socket_close(&sc->sc_so4);
771 	sc->sc_so4 = so4;
772 #ifdef INET6
773 	wg_socket_close(&sc->sc_so6);
774 	sc->sc_so6 = so6;
775 #endif
776 	rw_exit_write(&sc->sc_so_lock);
777 
778 	*portp = port;
779 	*rtablep = rtable;
780 	return 0;
781 }
782 
783 void
784 wg_unbind(struct wg_softc *sc)
785 {
786 	rw_enter_write(&sc->sc_so_lock);
787 	wg_socket_close(&sc->sc_so4);
788 #ifdef INET6
789 	wg_socket_close(&sc->sc_so6);
790 #endif
791 	rw_exit_write(&sc->sc_so_lock);
792 }
793 
794 int
795 wg_send(struct wg_softc *sc, struct wg_endpoint *e, struct mbuf *m)
796 {
797 	struct mbuf	 peernam, *control = NULL;
798 	int		 ret;
799 
800 	/* Get local control address before locking */
801 	if (e->e_remote.r_sa.sa_family == AF_INET) {
802 		if (e->e_local.l_in.s_addr != INADDR_ANY)
803 			control = sbcreatecontrol(&e->e_local.l_in,
804 			    sizeof(struct in_addr), IP_SENDSRCADDR,
805 			    IPPROTO_IP);
806 #ifdef INET6
807 	} else if (e->e_remote.r_sa.sa_family == AF_INET6) {
808 		if (!IN6_IS_ADDR_UNSPECIFIED(&e->e_local.l_in6))
809 			control = sbcreatecontrol(&e->e_local.l_pktinfo6,
810 			    sizeof(struct in6_pktinfo), IPV6_PKTINFO,
811 			    IPPROTO_IPV6);
812 #endif
813 	} else {
814 		m_freem(m);
815 		return EAFNOSUPPORT;
816 	}
817 
818 	/* Get remote address */
819 	peernam.m_type = MT_SONAME;
820 	peernam.m_next = NULL;
821 	peernam.m_nextpkt = NULL;
822 	peernam.m_data = (void *)&e->e_remote.r_sa;
823 	peernam.m_len = e->e_remote.r_sa.sa_len;
824 	peernam.m_flags = 0;
825 
826 	rw_enter_read(&sc->sc_so_lock);
827 	if (e->e_remote.r_sa.sa_family == AF_INET && sc->sc_so4 != NULL)
828 		ret = sosend(sc->sc_so4, &peernam, NULL, m, control, 0);
829 #ifdef INET6
830 	else if (e->e_remote.r_sa.sa_family == AF_INET6 && sc->sc_so6 != NULL)
831 		ret = sosend(sc->sc_so6, &peernam, NULL, m, control, 0);
832 #endif
833 	else {
834 		ret = ENOTCONN;
835 		m_freem(control);
836 		m_freem(m);
837 	}
838 	rw_exit_read(&sc->sc_so_lock);
839 
840 	return ret;
841 }
842 
843 void
844 wg_send_buf(struct wg_softc *sc, struct wg_endpoint *e, uint8_t *buf,
845     size_t len)
846 {
847 	struct mbuf	*m;
848 	int		 ret = 0;
849 
850 retry:
851 	m = m_gethdr(M_WAIT, MT_DATA);
852 	m->m_len = 0;
853 	m_copyback(m, 0, len, buf, M_WAIT);
854 
855 	/* As we're sending a handshake packet here, we want high priority */
856 	m->m_pkthdr.pf.prio = IFQ_MAXPRIO;
857 
858 	if (ret == 0) {
859 		ret = wg_send(sc, e, m);
860 		/* Retry if we couldn't bind to e->e_local */
861 		if (ret == EADDRNOTAVAIL) {
862 			bzero(&e->e_local, sizeof(e->e_local));
863 			goto retry;
864 		}
865 	} else {
866 		ret = wg_send(sc, e, m);
867 		if (ret != 0)
868 			DPRINTF(sc, "Unable to send packet\n");
869 	}
870 }
871 
872 struct wg_tag *
873 wg_tag_get(struct mbuf *m)
874 {
875 	struct m_tag	*mtag;
876 
877 	if ((mtag = m_tag_find(m, PACKET_TAG_WIREGUARD, NULL)) == NULL) {
878 		mtag = m_tag_get(PACKET_TAG_WIREGUARD, sizeof(struct wg_tag),
879 		    M_NOWAIT);
880 		if (mtag == NULL)
881 			return (NULL);
882 		bzero(mtag + 1, sizeof(struct wg_tag));
883 		m_tag_prepend(m, mtag);
884 	}
885 	return ((struct wg_tag *)(mtag + 1));
886 }
887 
888 /*
889  * The following section handles the timeout callbacks for a WireGuard session.
890  * These functions provide an "event based" model for controlling wg(8) session
891  * timers. All function calls occur after the specified event below.
892  *
893  * wg_timers_event_data_sent:
894  *	tx: data
895  * wg_timers_event_data_received:
896  *	rx: data
897  * wg_timers_event_any_authenticated_packet_sent:
898  *	tx: keepalive, data, handshake
899  * wg_timers_event_any_authenticated_packet_received:
900  *	rx: keepalive, data, handshake
901  * wg_timers_event_any_authenticated_packet_traversal:
902  *	tx, rx: keepalive, data, handshake
903  * wg_timers_event_handshake_initiated:
904  *	tx: initiation
905  * wg_timers_event_handshake_responded:
906  *	tx: response
907  * wg_timers_event_handshake_complete:
908  *	rx: response, confirmation data
909  * wg_timers_event_session_derived:
910  *	tx: response, rx: response
911  * wg_timers_event_want_initiation:
912  *	tx: data failed, old keys expiring
913  * wg_timers_event_reset_handshake_last_sent:
914  * 	anytime we may immediately want a new handshake
915  */
916 void
917 wg_timers_init(struct wg_timers *t)
918 {
919 	bzero(t, sizeof(*t));
920 	rw_init(&t->t_lock, "wg_timers");
921 	mtx_init(&t->t_handshake_mtx, IPL_NET);
922 
923 	timeout_set(&t->t_new_handshake, wg_timers_run_new_handshake, t);
924 	timeout_set(&t->t_send_keepalive, wg_timers_run_send_keepalive, t);
925 	timeout_set(&t->t_retry_handshake, wg_timers_run_retry_handshake, t);
926 	timeout_set(&t->t_persistent_keepalive,
927 	    wg_timers_run_persistent_keepalive, t);
928 	timeout_set(&t->t_zero_key_material,
929 	    wg_timers_run_zero_key_material, t);
930 }
931 
932 void
933 wg_timers_enable(struct wg_timers *t)
934 {
935 	rw_enter_write(&t->t_lock);
936 	t->t_disabled = 0;
937 	rw_exit_write(&t->t_lock);
938 	wg_timers_run_persistent_keepalive(t);
939 }
940 
941 void
942 wg_timers_disable(struct wg_timers *t)
943 {
944 	rw_enter_write(&t->t_lock);
945 	t->t_disabled = 1;
946 	t->t_need_another_keepalive = 0;
947 	rw_exit_write(&t->t_lock);
948 
949 	timeout_del_barrier(&t->t_new_handshake);
950 	timeout_del_barrier(&t->t_send_keepalive);
951 	timeout_del_barrier(&t->t_retry_handshake);
952 	timeout_del_barrier(&t->t_persistent_keepalive);
953 	timeout_del_barrier(&t->t_zero_key_material);
954 }
955 
956 void
957 wg_timers_set_persistent_keepalive(struct wg_timers *t, uint16_t interval)
958 {
959 	rw_enter_read(&t->t_lock);
960 	if (!t->t_disabled) {
961 		t->t_persistent_keepalive_interval = interval;
962 		wg_timers_run_persistent_keepalive(t);
963 	}
964 	rw_exit_read(&t->t_lock);
965 }
966 
967 int
968 wg_timers_get_persistent_keepalive(struct wg_timers *t, uint16_t *interval)
969 {
970 	*interval = t->t_persistent_keepalive_interval;
971 	return *interval > 0 ? 0 : ENOENT;
972 }
973 
974 void
975 wg_timers_get_last_handshake(struct wg_timers *t, struct timespec *time)
976 {
977 	mtx_enter(&t->t_handshake_mtx);
978 	*time = t->t_handshake_complete;
979 	mtx_leave(&t->t_handshake_mtx);
980 }
981 
982 int
983 wg_timers_expired_handshake_last_sent(struct wg_timers *t)
984 {
985 	struct timespec uptime;
986 	struct timespec expire = { .tv_sec = REKEY_TIMEOUT, .tv_nsec = 0 };
987 
988 	getnanouptime(&uptime);
989 	timespecadd(&t->t_handshake_last_sent, &expire, &expire);
990 	return timespeccmp(&uptime, &expire, >) ? ETIMEDOUT : 0;
991 }
992 
993 int
994 wg_timers_check_handshake_last_sent(struct wg_timers *t)
995 {
996 	int ret;
997 	mtx_enter(&t->t_handshake_mtx);
998 	if ((ret = wg_timers_expired_handshake_last_sent(t)) == ETIMEDOUT)
999 		getnanouptime(&t->t_handshake_last_sent);
1000 	mtx_leave(&t->t_handshake_mtx);
1001 	return ret;
1002 }
1003 
1004 void
1005 wg_timers_event_data_sent(struct wg_timers *t)
1006 {
1007 	int	msecs = NEW_HANDSHAKE_TIMEOUT * 1000;
1008 	msecs += arc4random_uniform(REKEY_TIMEOUT_JITTER);
1009 
1010 	rw_enter_read(&t->t_lock);
1011 	if (!t->t_disabled && !timeout_pending(&t->t_new_handshake))
1012 		timeout_add_msec(&t->t_new_handshake, msecs);
1013 	rw_exit_read(&t->t_lock);
1014 }
1015 
1016 void
1017 wg_timers_event_data_received(struct wg_timers *t)
1018 {
1019 	rw_enter_read(&t->t_lock);
1020 	if (!t->t_disabled) {
1021 		if (!timeout_pending(&t->t_send_keepalive))
1022 			timeout_add_sec(&t->t_send_keepalive,
1023 			    KEEPALIVE_TIMEOUT);
1024 		else
1025 			t->t_need_another_keepalive = 1;
1026 	}
1027 	rw_exit_read(&t->t_lock);
1028 }
1029 
1030 void
1031 wg_timers_event_any_authenticated_packet_sent(struct wg_timers *t)
1032 {
1033 	timeout_del(&t->t_send_keepalive);
1034 }
1035 
1036 void
1037 wg_timers_event_any_authenticated_packet_received(struct wg_timers *t)
1038 {
1039 	timeout_del(&t->t_new_handshake);
1040 }
1041 
1042 void
1043 wg_timers_event_any_authenticated_packet_traversal(struct wg_timers *t)
1044 {
1045 	rw_enter_read(&t->t_lock);
1046 	if (!t->t_disabled && t->t_persistent_keepalive_interval > 0)
1047 		timeout_add_sec(&t->t_persistent_keepalive,
1048 		    t->t_persistent_keepalive_interval);
1049 	rw_exit_read(&t->t_lock);
1050 }
1051 
1052 void
1053 wg_timers_event_handshake_initiated(struct wg_timers *t)
1054 {
1055 	int	msecs = REKEY_TIMEOUT * 1000;
1056 	msecs += arc4random_uniform(REKEY_TIMEOUT_JITTER);
1057 
1058 	rw_enter_read(&t->t_lock);
1059 	if (!t->t_disabled)
1060 		timeout_add_msec(&t->t_retry_handshake, msecs);
1061 	rw_exit_read(&t->t_lock);
1062 }
1063 
1064 void
1065 wg_timers_event_handshake_responded(struct wg_timers *t)
1066 {
1067 	mtx_enter(&t->t_handshake_mtx);
1068 	getnanouptime(&t->t_handshake_last_sent);
1069 	mtx_leave(&t->t_handshake_mtx);
1070 }
1071 
1072 void
1073 wg_timers_event_handshake_complete(struct wg_timers *t)
1074 {
1075 	rw_enter_read(&t->t_lock);
1076 	if (!t->t_disabled) {
1077 		mtx_enter(&t->t_handshake_mtx);
1078 		timeout_del(&t->t_retry_handshake);
1079 		t->t_handshake_retries = 0;
1080 		getnanotime(&t->t_handshake_complete);
1081 		mtx_leave(&t->t_handshake_mtx);
1082 		wg_timers_run_send_keepalive(t);
1083 	}
1084 	rw_exit_read(&t->t_lock);
1085 }
1086 
1087 void
1088 wg_timers_event_session_derived(struct wg_timers *t)
1089 {
1090 	rw_enter_read(&t->t_lock);
1091 	if (!t->t_disabled)
1092 		timeout_add_sec(&t->t_zero_key_material, REJECT_AFTER_TIME * 3);
1093 	rw_exit_read(&t->t_lock);
1094 }
1095 
1096 void
1097 wg_timers_event_want_initiation(struct wg_timers *t)
1098 {
1099 	rw_enter_read(&t->t_lock);
1100 	if (!t->t_disabled)
1101 		wg_timers_run_send_initiation(t, 0);
1102 	rw_exit_read(&t->t_lock);
1103 }
1104 
1105 void
1106 wg_timers_event_reset_handshake_last_sent(struct wg_timers *t)
1107 {
1108 	mtx_enter(&t->t_handshake_mtx);
1109 	t->t_handshake_last_sent.tv_sec -= (REKEY_TIMEOUT + 1);
1110 	mtx_leave(&t->t_handshake_mtx);
1111 }
1112 
1113 void
1114 wg_timers_run_send_initiation(void *_t, int is_retry)
1115 {
1116 	struct wg_timers *t = _t;
1117 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1118 	if (!is_retry)
1119 		t->t_handshake_retries = 0;
1120 	if (wg_timers_expired_handshake_last_sent(t) == ETIMEDOUT)
1121 		task_add(wg_handshake_taskq, &peer->p_send_initiation);
1122 }
1123 
1124 void
1125 wg_timers_run_retry_handshake(void *_t)
1126 {
1127 	struct wg_timers *t = _t;
1128 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1129 
1130 	mtx_enter(&t->t_handshake_mtx);
1131 	if (t->t_handshake_retries <= MAX_TIMER_HANDSHAKES) {
1132 		t->t_handshake_retries++;
1133 		mtx_leave(&t->t_handshake_mtx);
1134 
1135 		DPRINTF(peer->p_sc, "Handshake for peer %llu did not complete "
1136 		    "after %d seconds, retrying (try %d)\n", peer->p_id,
1137 		    REKEY_TIMEOUT, t->t_handshake_retries + 1);
1138 		wg_peer_clear_src(peer);
1139 		wg_timers_run_send_initiation(t, 1);
1140 	} else {
1141 		mtx_leave(&t->t_handshake_mtx);
1142 
1143 		DPRINTF(peer->p_sc, "Handshake for peer %llu did not complete "
1144 		    "after %d retries, giving up\n", peer->p_id,
1145 		    MAX_TIMER_HANDSHAKES + 2);
1146 
1147 		timeout_del(&t->t_send_keepalive);
1148 		mq_purge(&peer->p_stage_queue);
1149 		if (!timeout_pending(&t->t_zero_key_material))
1150 			timeout_add_sec(&t->t_zero_key_material,
1151 			    REJECT_AFTER_TIME * 3);
1152 	}
1153 }
1154 
1155 void
1156 wg_timers_run_send_keepalive(void *_t)
1157 {
1158 	struct wg_timers *t = _t;
1159 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1160 
1161 	task_add(wg_crypt_taskq, &peer->p_send_keepalive);
1162 	if (t->t_need_another_keepalive) {
1163 		t->t_need_another_keepalive = 0;
1164 		timeout_add_sec(&t->t_send_keepalive, KEEPALIVE_TIMEOUT);
1165 	}
1166 }
1167 
1168 void
1169 wg_timers_run_new_handshake(void *_t)
1170 {
1171 	struct wg_timers *t = _t;
1172 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1173 
1174 	DPRINTF(peer->p_sc, "Retrying handshake with peer %llu because we "
1175 	    "stopped hearing back after %d seconds\n",
1176 	    peer->p_id, NEW_HANDSHAKE_TIMEOUT);
1177 	wg_peer_clear_src(peer);
1178 
1179 	wg_timers_run_send_initiation(t, 0);
1180 }
1181 
1182 void
1183 wg_timers_run_zero_key_material(void *_t)
1184 {
1185 	struct wg_timers *t = _t;
1186 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1187 
1188 	DPRINTF(peer->p_sc, "Zeroing out keys for peer %llu\n", peer->p_id);
1189 	task_add(wg_handshake_taskq, &peer->p_clear_secrets);
1190 }
1191 
1192 void
1193 wg_timers_run_persistent_keepalive(void *_t)
1194 {
1195 	struct wg_timers *t = _t;
1196 	struct wg_peer	 *peer = CONTAINER_OF(t, struct wg_peer, p_timers);
1197 	if (t->t_persistent_keepalive_interval != 0)
1198 		task_add(wg_crypt_taskq, &peer->p_send_keepalive);
1199 }
1200 
1201 /* The following functions handle handshakes */
1202 void
1203 wg_peer_send_buf(struct wg_peer *peer, uint8_t *buf, size_t len)
1204 {
1205 	struct wg_endpoint	 endpoint;
1206 
1207 	wg_peer_counters_add(peer, len, 0);
1208 	wg_timers_event_any_authenticated_packet_traversal(&peer->p_timers);
1209 	wg_timers_event_any_authenticated_packet_sent(&peer->p_timers);
1210 	wg_peer_get_endpoint(peer, &endpoint);
1211 	wg_send_buf(peer->p_sc, &endpoint, buf, len);
1212 }
1213 
1214 void
1215 wg_send_initiation(void *_peer)
1216 {
1217 	struct wg_peer			*peer = _peer;
1218 	struct wg_pkt_initiation	 pkt;
1219 
1220 	if (wg_timers_check_handshake_last_sent(&peer->p_timers) != ETIMEDOUT)
1221 		return;
1222 
1223 	DPRINTF(peer->p_sc, "Sending handshake initiation to peer %llu\n",
1224 	    peer->p_id);
1225 
1226 	if (noise_create_initiation(&peer->p_remote, &pkt.s_idx, pkt.ue, pkt.es,
1227 				    pkt.ets) != 0)
1228 		return;
1229 	pkt.t = WG_PKT_INITIATION;
1230 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1231 	    sizeof(pkt)-sizeof(pkt.m));
1232 	wg_peer_send_buf(peer, (uint8_t *)&pkt, sizeof(pkt));
1233 	wg_timers_event_handshake_initiated(&peer->p_timers);
1234 }
1235 
1236 void
1237 wg_send_response(struct wg_peer *peer)
1238 {
1239 	struct wg_pkt_response	 pkt;
1240 
1241 	DPRINTF(peer->p_sc, "Sending handshake response to peer %llu\n",
1242 	    peer->p_id);
1243 
1244 	if (noise_create_response(&peer->p_remote, &pkt.s_idx, &pkt.r_idx,
1245 				  pkt.ue, pkt.en) != 0)
1246 		return;
1247 	if (noise_remote_begin_session(&peer->p_remote) != 0)
1248 		return;
1249 	wg_timers_event_session_derived(&peer->p_timers);
1250 	pkt.t = WG_PKT_RESPONSE;
1251 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
1252 	    sizeof(pkt)-sizeof(pkt.m));
1253 	wg_timers_event_handshake_responded(&peer->p_timers);
1254 	wg_peer_send_buf(peer, (uint8_t *)&pkt, sizeof(pkt));
1255 }
1256 
1257 void
1258 wg_send_cookie(struct wg_softc *sc, struct cookie_macs *cm, uint32_t idx,
1259     struct wg_endpoint *e)
1260 {
1261 	struct wg_pkt_cookie	pkt;
1262 
1263 	DPRINTF(sc, "Sending cookie response for denied handshake message\n");
1264 
1265 	pkt.t = WG_PKT_COOKIE;
1266 	pkt.r_idx = idx;
1267 
1268 	cookie_checker_create_payload(&sc->sc_cookie, cm, pkt.nonce,
1269 	    pkt.ec, &e->e_remote.r_sa);
1270 
1271 	wg_send_buf(sc, e, (uint8_t *)&pkt, sizeof(pkt));
1272 }
1273 
1274 void
1275 wg_send_keepalive(void *_peer)
1276 {
1277 	struct wg_peer	*peer = _peer;
1278 	struct wg_softc	*sc = peer->p_sc;
1279 	struct wg_tag	*t;
1280 	struct mbuf	*m;
1281 
1282 	if (!mq_empty(&peer->p_stage_queue))
1283 		goto send;
1284 
1285 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1286 		return;
1287 
1288 	if ((t = wg_tag_get(m)) == NULL) {
1289 		m_freem(m);
1290 		return;
1291 	}
1292 
1293 	m->m_len = 0;
1294 	m_calchdrlen(m);
1295 
1296 	t->t_peer = peer;
1297 	t->t_mbuf = NULL;
1298 	t->t_done = 0;
1299 	t->t_mtu = 0; /* MTU == 0 OK for keepalive */
1300 
1301 	mq_push(&peer->p_stage_queue, m);
1302 send:
1303 	if (noise_remote_ready(&peer->p_remote) == 0) {
1304 		wg_queue_out(sc, peer);
1305 		task_add(wg_crypt_taskq, &sc->sc_encap);
1306 	} else {
1307 		wg_timers_event_want_initiation(&peer->p_timers);
1308 	}
1309 }
1310 
1311 void
1312 wg_peer_clear_secrets(void *_peer)
1313 {
1314 	struct wg_peer *peer = _peer;
1315 	noise_remote_clear(&peer->p_remote);
1316 }
1317 
1318 void
1319 wg_handshake(struct wg_softc *sc, struct mbuf *m)
1320 {
1321 	struct wg_tag			*t;
1322 	struct wg_pkt_initiation	*init;
1323 	struct wg_pkt_response		*resp;
1324 	struct wg_pkt_cookie		*cook;
1325 	struct wg_peer			*peer;
1326 	struct noise_remote		*remote;
1327 	int				 res, underload = 0;
1328 	static struct timeval		 wg_last_underload; /* microuptime */
1329 
1330 	if (mq_len(&sc->sc_handshake_queue) >= MAX_QUEUED_HANDSHAKES/8) {
1331 		getmicrouptime(&wg_last_underload);
1332 		underload = 1;
1333 	} else if (wg_last_underload.tv_sec != 0) {
1334 		if (!ratecheck(&wg_last_underload, &underload_interval))
1335 			underload = 1;
1336 		else
1337 			bzero(&wg_last_underload, sizeof(wg_last_underload));
1338 	}
1339 
1340 	t = wg_tag_get(m);
1341 
1342 	switch (*mtod(m, uint32_t *)) {
1343 	case WG_PKT_INITIATION:
1344 		init = mtod(m, struct wg_pkt_initiation *);
1345 
1346 		res = cookie_checker_validate_macs(&sc->sc_cookie, &init->m,
1347 				init, sizeof(*init) - sizeof(init->m),
1348 				underload, &t->t_endpoint.e_remote.r_sa);
1349 
1350 		if (res == EINVAL) {
1351 			DPRINTF(sc, "Invalid initiation MAC\n");
1352 			goto error;
1353 		} else if (res == ECONNREFUSED) {
1354 			DPRINTF(sc, "Handshake ratelimited\n");
1355 			goto error;
1356 		} else if (res == EAGAIN) {
1357 			wg_send_cookie(sc, &init->m, init->s_idx,
1358 			    &t->t_endpoint);
1359 			goto error;
1360 		} else if (res != 0) {
1361 			panic("unexpected response: %d", res);
1362 		}
1363 
1364 		if (noise_consume_initiation(&sc->sc_local, &remote,
1365 		    init->s_idx, init->ue, init->es, init->ets) != 0) {
1366 			DPRINTF(sc, "Invalid handshake initiation\n");
1367 			goto error;
1368 		}
1369 
1370 		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1371 
1372 		DPRINTF(sc, "Receiving handshake initiation from peer %llu\n",
1373 		    peer->p_id);
1374 
1375 		wg_peer_counters_add(peer, 0, sizeof(*init));
1376 		wg_peer_set_endpoint_from_tag(peer, t);
1377 		wg_send_response(peer);
1378 		break;
1379 	case WG_PKT_RESPONSE:
1380 		resp = mtod(m, struct wg_pkt_response *);
1381 
1382 		res = cookie_checker_validate_macs(&sc->sc_cookie, &resp->m,
1383 				resp, sizeof(*resp) - sizeof(resp->m),
1384 				underload, &t->t_endpoint.e_remote.r_sa);
1385 
1386 		if (res == EINVAL) {
1387 			DPRINTF(sc, "Invalid response MAC\n");
1388 			goto error;
1389 		} else if (res == ECONNREFUSED) {
1390 			DPRINTF(sc, "Handshake ratelimited\n");
1391 			goto error;
1392 		} else if (res == EAGAIN) {
1393 			wg_send_cookie(sc, &resp->m, resp->s_idx,
1394 			    &t->t_endpoint);
1395 			goto error;
1396 		} else if (res != 0) {
1397 			panic("unexpected response: %d", res);
1398 		}
1399 
1400 		if ((remote = wg_index_get(sc, resp->r_idx)) == NULL) {
1401 			DPRINTF(sc, "Unknown handshake response\n");
1402 			goto error;
1403 		}
1404 
1405 		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1406 
1407 		if (noise_consume_response(remote, resp->s_idx, resp->r_idx,
1408 					   resp->ue, resp->en) != 0) {
1409 			DPRINTF(sc, "Invalid handshake response\n");
1410 			goto error;
1411 		}
1412 
1413 		DPRINTF(sc, "Receiving handshake response from peer %llu\n",
1414 				peer->p_id);
1415 
1416 		wg_peer_counters_add(peer, 0, sizeof(*resp));
1417 		wg_peer_set_endpoint_from_tag(peer, t);
1418 		if (noise_remote_begin_session(&peer->p_remote) == 0) {
1419 			wg_timers_event_session_derived(&peer->p_timers);
1420 			wg_timers_event_handshake_complete(&peer->p_timers);
1421 		}
1422 		break;
1423 	case WG_PKT_COOKIE:
1424 		cook = mtod(m, struct wg_pkt_cookie *);
1425 
1426 		if ((remote = wg_index_get(sc, cook->r_idx)) == NULL) {
1427 			DPRINTF(sc, "Unknown cookie index\n");
1428 			goto error;
1429 		}
1430 
1431 		peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1432 
1433 		if (cookie_maker_consume_payload(&peer->p_cookie,
1434 		    cook->nonce, cook->ec) != 0) {
1435 			DPRINTF(sc, "Could not decrypt cookie response\n");
1436 			goto error;
1437 		}
1438 
1439 		DPRINTF(sc, "Receiving cookie response\n");
1440 		goto error;
1441 	default:
1442 		panic("invalid packet in handshake queue");
1443 	}
1444 
1445 	wg_timers_event_any_authenticated_packet_received(&peer->p_timers);
1446 	wg_timers_event_any_authenticated_packet_traversal(&peer->p_timers);
1447 error:
1448 	m_freem(m);
1449 }
1450 
1451 void
1452 wg_handshake_worker(void *_sc)
1453 {
1454 	struct mbuf *m;
1455 	struct wg_softc *sc = _sc;
1456 	while ((m = mq_dequeue(&sc->sc_handshake_queue)) != NULL)
1457 		wg_handshake(sc, m);
1458 }
1459 
1460 /*
1461  * The following functions handle encapsulation (encryption) and
1462  * decapsulation (decryption). The wg_{en,de}cap functions will run in the
1463  * sc_crypt_taskq, while wg_deliver_{in,out} must be serialised and will run
1464  * in nettq.
1465  *
1466  * The packets are tracked in two queues, a serial queue and a parallel queue.
1467  *  - The parallel queue is used to distribute the encryption across multiple
1468  *    threads.
1469  *  - The serial queue ensures that packets are not reordered and are
1470  *    delivered in sequence.
1471  * The wg_tag attached to the packet contains two flags to help the two queues
1472  * interact.
1473  *  - t_done: The parallel queue has finished with the packet, now the serial
1474  *            queue can do it's work.
1475  *  - t_mbuf: Used to store the *crypted packet. in the case of encryption,
1476  *            this is a newly allocated packet, and in the case of decryption,
1477  *            it is a pointer to the same packet, that has been decrypted and
1478  *            truncated. If t_mbuf is NULL, then *cryption failed and this
1479  *            packet should not be passed.
1480  * wg_{en,de}cap work on the parallel queue, while wg_deliver_{in,out} work
1481  * on the serial queue.
1482  */
1483 void
1484 wg_encap(struct wg_softc *sc, struct mbuf *m)
1485 {
1486 	int res = 0;
1487 	struct wg_pkt_data	*data;
1488 	struct wg_peer		*peer;
1489 	struct wg_tag		*t;
1490 	struct mbuf		*mc;
1491 	size_t			 padding_len, plaintext_len, out_len;
1492 	uint64_t		 nonce;
1493 
1494 	t = wg_tag_get(m);
1495 	peer = t->t_peer;
1496 
1497 	plaintext_len = min(WG_PKT_WITH_PADDING(m->m_pkthdr.len), t->t_mtu);
1498 	padding_len = plaintext_len - m->m_pkthdr.len;
1499 	out_len = sizeof(struct wg_pkt_data) + plaintext_len + NOISE_AUTHTAG_LEN;
1500 
1501 	/*
1502 	 * For the time being we allocate a new packet with sufficient size to
1503 	 * hold the encrypted data and headers. It would be difficult to
1504 	 * overcome as p_encap_queue (mbuf_list) holds a reference to the mbuf.
1505 	 * If we m_makespace or similar, we risk corrupting that list.
1506 	 * Additionally, we only pass a buf and buf length to
1507 	 * noise_remote_encrypt. Technically it would be possible to teach
1508 	 * noise_remote_encrypt about mbufs, but we would need to sort out the
1509 	 * p_encap_queue situation first.
1510 	 */
1511 	if ((mc = m_clget(NULL, M_NOWAIT, out_len)) == NULL)
1512 		goto error;
1513 
1514 	data = mtod(mc, struct wg_pkt_data *);
1515 	m_copydata(m, 0, m->m_pkthdr.len, data->buf);
1516 	bzero(data->buf + m->m_pkthdr.len, padding_len);
1517 	data->t = WG_PKT_DATA;
1518 
1519 	/*
1520 	 * Copy the flow hash from the inner packet to the outer packet, so
1521 	 * that fq_codel can property separate streams, rather than falling
1522 	 * back to random buckets.
1523 	 */
1524 	mc->m_pkthdr.ph_flowid = m->m_pkthdr.ph_flowid;
1525 
1526 	res = noise_remote_encrypt(&peer->p_remote, &data->r_idx, &nonce,
1527 				   data->buf, plaintext_len);
1528 	nonce = htole64(nonce); /* Wire format is little endian. */
1529 	memcpy(data->nonce, &nonce, sizeof(data->nonce));
1530 
1531 	if (__predict_false(res == EINVAL)) {
1532 		m_freem(mc);
1533 		goto error;
1534 	} else if (__predict_false(res == ESTALE)) {
1535 		wg_timers_event_want_initiation(&peer->p_timers);
1536 	} else if (__predict_false(res != 0)) {
1537 		panic("unexpected result: %d", res);
1538 	}
1539 
1540 	/* A packet with length 0 is a keepalive packet */
1541 	if (__predict_false(m->m_pkthdr.len == 0))
1542 		DPRINTF(sc, "Sending keepalive packet to peer %llu\n",
1543 		    peer->p_id);
1544 
1545 	mc->m_pkthdr.ph_loopcnt = m->m_pkthdr.ph_loopcnt;
1546 	mc->m_flags &= ~(M_MCAST | M_BCAST);
1547 	mc->m_len = out_len;
1548 	m_calchdrlen(mc);
1549 
1550 	/*
1551 	 * We would count ifc_opackets, ifc_obytes of m here, except if_snd
1552 	 * already does that for us, so no need to worry about it.
1553 	counters_pkt(sc->sc_if.if_counters, ifc_opackets, ifc_obytes,
1554 	    m->m_pkthdr.len);
1555 	 */
1556 	wg_peer_counters_add(peer, mc->m_pkthdr.len, 0);
1557 
1558 	t->t_mbuf = mc;
1559 error:
1560 	t->t_done = 1;
1561 	task_add(net_tq(sc->sc_if.if_index), &peer->p_deliver_out);
1562 }
1563 
1564 void
1565 wg_decap(struct wg_softc *sc, struct mbuf *m)
1566 {
1567 	int			 res, len;
1568 	struct ip		*ip;
1569 	struct ip6_hdr		*ip6;
1570 	struct wg_pkt_data	*data;
1571 	struct wg_peer		*peer, *allowed_peer;
1572 	struct wg_tag		*t;
1573 	size_t			 payload_len;
1574 	uint64_t		 nonce;
1575 
1576 	t = wg_tag_get(m);
1577 	peer = t->t_peer;
1578 
1579 	/*
1580 	 * Likewise to wg_encap, we pass a buf and buf length to
1581 	 * noise_remote_decrypt. Again, possible to teach it about mbufs
1582 	 * but need to get over the p_decap_queue situation first. However,
1583 	 * we do not need to allocate a new mbuf as the decrypted packet is
1584 	 * strictly smaller than encrypted. We just set t_mbuf to m and
1585 	 * wg_deliver_in knows how to deal with that.
1586 	 */
1587 	data = mtod(m, struct wg_pkt_data *);
1588 	payload_len = m->m_pkthdr.len - sizeof(struct wg_pkt_data);
1589 	memcpy(&nonce, data->nonce, sizeof(nonce));
1590 	nonce = le64toh(nonce); /* Wire format is little endian. */
1591 	res = noise_remote_decrypt(&peer->p_remote, data->r_idx, nonce,
1592 				   data->buf, payload_len);
1593 
1594 	if (__predict_false(res == EINVAL)) {
1595 		goto error;
1596 	} else if (__predict_false(res == ECONNRESET)) {
1597 		wg_timers_event_handshake_complete(&peer->p_timers);
1598 	} else if (__predict_false(res == ESTALE)) {
1599 		wg_timers_event_want_initiation(&peer->p_timers);
1600 	} else if (__predict_false(res != 0)) {
1601 		panic("unexpected response: %d", res);
1602 	}
1603 
1604 	wg_peer_set_endpoint_from_tag(peer, t);
1605 
1606 	wg_peer_counters_add(peer, 0, m->m_pkthdr.len);
1607 
1608 	m_adj(m, sizeof(struct wg_pkt_data));
1609 	m_adj(m, -NOISE_AUTHTAG_LEN);
1610 
1611 	counters_pkt(sc->sc_if.if_counters, ifc_ipackets, ifc_ibytes,
1612 	    m->m_pkthdr.len);
1613 
1614 	/* A packet with length 0 is a keepalive packet */
1615 	if (__predict_false(m->m_pkthdr.len == 0)) {
1616 		DPRINTF(sc, "Receiving keepalive packet from peer "
1617 		    "%llu\n", peer->p_id);
1618 		goto done;
1619 	}
1620 
1621 	/*
1622 	 * We can let the network stack handle the intricate validation of the
1623 	 * IP header, we just worry about the sizeof and the version, so we can
1624 	 * read the source address in wg_aip_lookup.
1625 	 *
1626 	 * We also need to trim the packet, as it was likely padded before
1627 	 * encryption. While we could drop it here, it will be more helpful to
1628 	 * pass it to bpf_mtap and use the counters that people are expecting
1629 	 * in ipv4_input and ipv6_input. We can rely on ipv4_input and
1630 	 * ipv6_input to properly validate the headers.
1631 	 */
1632 	ip = mtod(m, struct ip *);
1633 	ip6 = mtod(m, struct ip6_hdr *);
1634 
1635 	if (m->m_pkthdr.len >= sizeof(struct ip) && ip->ip_v == IPVERSION) {
1636 		m->m_pkthdr.ph_family = AF_INET;
1637 
1638 		len = ntohs(ip->ip_len);
1639 		if (len >= sizeof(struct ip) && len < m->m_pkthdr.len)
1640 			m_adj(m, len - m->m_pkthdr.len);
1641 
1642 		allowed_peer = wg_aip_lookup(sc->sc_aip4, &ip->ip_src);
1643 #ifdef INET6
1644 	} else if (m->m_pkthdr.len >= sizeof(struct ip6_hdr) &&
1645 	    (ip6->ip6_vfc & IPV6_VERSION_MASK) == IPV6_VERSION) {
1646 		m->m_pkthdr.ph_family = AF_INET6;
1647 
1648 		len = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr);
1649 		if (len < m->m_pkthdr.len)
1650 			m_adj(m, len - m->m_pkthdr.len);
1651 
1652 		allowed_peer = wg_aip_lookup(sc->sc_aip6, &ip6->ip6_src);
1653 #endif
1654 	} else {
1655 		DPRINTF(sc, "Packet is neither ipv4 nor ipv6 from "
1656 		    "peer %llu\n", peer->p_id);
1657 		goto error;
1658 	}
1659 
1660 	if (__predict_false(peer != allowed_peer)) {
1661 		DPRINTF(sc, "Packet has unallowed src IP from peer "
1662 		    "%llu\n", peer->p_id);
1663 		goto error;
1664 	}
1665 
1666 	/* tunneled packet was not offloaded */
1667 	m->m_pkthdr.csum_flags = 0;
1668 
1669 	m->m_pkthdr.ph_ifidx = sc->sc_if.if_index;
1670 	m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1671 	m->m_flags &= ~(M_MCAST | M_BCAST);
1672 #if NPF > 0
1673 	pf_pkt_addr_changed(m);
1674 #endif /* NPF > 0 */
1675 
1676 done:
1677 	t->t_mbuf = m;
1678 error:
1679 	t->t_done = 1;
1680 	task_add(net_tq(sc->sc_if.if_index), &peer->p_deliver_in);
1681 }
1682 
1683 void
1684 wg_encap_worker(void *_sc)
1685 {
1686 	struct mbuf *m;
1687 	struct wg_softc *sc = _sc;
1688 	while ((m = wg_ring_dequeue(&sc->sc_encap_ring)) != NULL)
1689 		wg_encap(sc, m);
1690 }
1691 
1692 void
1693 wg_decap_worker(void *_sc)
1694 {
1695 	struct mbuf *m;
1696 	struct wg_softc *sc = _sc;
1697 	while ((m = wg_ring_dequeue(&sc->sc_decap_ring)) != NULL)
1698 		wg_decap(sc, m);
1699 }
1700 
1701 void
1702 wg_deliver_out(void *_peer)
1703 {
1704 	struct wg_peer		*peer = _peer;
1705 	struct wg_softc		*sc = peer->p_sc;
1706 	struct wg_endpoint	 endpoint;
1707 	struct wg_tag		*t;
1708 	struct mbuf		*m;
1709 	int			 ret;
1710 
1711 	wg_peer_get_endpoint(peer, &endpoint);
1712 
1713 	while ((m = wg_queue_dequeue(&peer->p_encap_queue, &t)) != NULL) {
1714 		/* t_mbuf will contain the encrypted packet */
1715 		if (t->t_mbuf == NULL){
1716 			counters_inc(sc->sc_if.if_counters, ifc_oerrors);
1717 			m_freem(m);
1718 			continue;
1719 		}
1720 
1721 		ret = wg_send(sc, &endpoint, t->t_mbuf);
1722 
1723 		if (ret == 0) {
1724 			wg_timers_event_any_authenticated_packet_traversal(
1725 			    &peer->p_timers);
1726 			wg_timers_event_any_authenticated_packet_sent(
1727 			    &peer->p_timers);
1728 
1729 			if (m->m_pkthdr.len != 0)
1730 				wg_timers_event_data_sent(&peer->p_timers);
1731 		} else if (ret == EADDRNOTAVAIL) {
1732 			wg_peer_clear_src(peer);
1733 			wg_peer_get_endpoint(peer, &endpoint);
1734 		}
1735 
1736 		m_freem(m);
1737 	}
1738 }
1739 
1740 void
1741 wg_deliver_in(void *_peer)
1742 {
1743 	struct wg_peer	*peer = _peer;
1744 	struct wg_softc	*sc = peer->p_sc;
1745 	struct wg_tag	*t;
1746 	struct mbuf	*m;
1747 
1748 	while ((m = wg_queue_dequeue(&peer->p_decap_queue, &t)) != NULL) {
1749 		/* t_mbuf will contain the decrypted packet */
1750 		if (t->t_mbuf == NULL) {
1751 			counters_inc(sc->sc_if.if_counters, ifc_ierrors);
1752 			m_freem(m);
1753 			continue;
1754 		}
1755 
1756 		/* From here on m == t->t_mbuf */
1757 		KASSERT(m == t->t_mbuf);
1758 
1759 		wg_timers_event_any_authenticated_packet_received(
1760 		    &peer->p_timers);
1761 		wg_timers_event_any_authenticated_packet_traversal(
1762 		    &peer->p_timers);
1763 
1764 		if (m->m_pkthdr.len == 0) {
1765 			m_freem(m);
1766 			continue;
1767 		}
1768 
1769 #if NBPFILTER > 0
1770 		if (sc->sc_if.if_bpf != NULL)
1771 			bpf_mtap_af(sc->sc_if.if_bpf,
1772 			    m->m_pkthdr.ph_family, m, BPF_DIRECTION_IN);
1773 #endif
1774 
1775 		NET_LOCK();
1776 		if (m->m_pkthdr.ph_family == AF_INET)
1777 			ipv4_input(&sc->sc_if, m);
1778 #ifdef INET6
1779 		else if (m->m_pkthdr.ph_family == AF_INET6)
1780 			ipv6_input(&sc->sc_if, m);
1781 #endif
1782 		else
1783 			panic("invalid ph_family");
1784 		NET_UNLOCK();
1785 
1786 		wg_timers_event_data_received(&peer->p_timers);
1787 	}
1788 }
1789 
1790 int
1791 wg_queue_in(struct wg_softc *sc, struct wg_peer *peer, struct mbuf *m)
1792 {
1793 	struct wg_ring		*parallel = &sc->sc_decap_ring;
1794 	struct wg_queue		*serial = &peer->p_decap_queue;
1795 	struct wg_tag		*t;
1796 
1797 	mtx_enter(&serial->q_mtx);
1798 	if (serial->q_list.ml_len < MAX_QUEUED_PKT) {
1799 		ml_enqueue(&serial->q_list, m);
1800 		mtx_leave(&serial->q_mtx);
1801 	} else {
1802 		mtx_leave(&serial->q_mtx);
1803 		m_freem(m);
1804 		return ENOBUFS;
1805 	}
1806 
1807 	mtx_enter(&parallel->r_mtx);
1808 	if (parallel->r_tail - parallel->r_head < MAX_QUEUED_PKT) {
1809 		parallel->r_buf[parallel->r_tail & MAX_QUEUED_PKT_MASK] = m;
1810 		parallel->r_tail++;
1811 		mtx_leave(&parallel->r_mtx);
1812 	} else {
1813 		mtx_leave(&parallel->r_mtx);
1814 		t = wg_tag_get(m);
1815 		t->t_done = 1;
1816 		return ENOBUFS;
1817 	}
1818 
1819 	return 0;
1820 }
1821 
1822 void
1823 wg_queue_out(struct wg_softc *sc, struct wg_peer *peer)
1824 {
1825 	struct wg_ring		*parallel = &sc->sc_encap_ring;
1826 	struct wg_queue		*serial = &peer->p_encap_queue;
1827 	struct mbuf_list 	 ml, ml_free;
1828 	struct mbuf		*m;
1829 	struct wg_tag		*t;
1830 	int			 dropped;
1831 
1832 	/*
1833 	 * We delist all staged packets and then add them to the queues. This
1834 	 * can race with wg_qstart when called from wg_send_keepalive, however
1835 	 * wg_qstart will not race as it is serialised.
1836 	 */
1837 	mq_delist(&peer->p_stage_queue, &ml);
1838 	ml_init(&ml_free);
1839 
1840 	while ((m = ml_dequeue(&ml)) != NULL) {
1841 		mtx_enter(&serial->q_mtx);
1842 		if (serial->q_list.ml_len < MAX_QUEUED_PKT) {
1843 			ml_enqueue(&serial->q_list, m);
1844 			mtx_leave(&serial->q_mtx);
1845 		} else {
1846 			mtx_leave(&serial->q_mtx);
1847 			ml_enqueue(&ml_free, m);
1848 			continue;
1849 		}
1850 
1851 		mtx_enter(&parallel->r_mtx);
1852 		if (parallel->r_tail - parallel->r_head < MAX_QUEUED_PKT) {
1853 			parallel->r_buf[parallel->r_tail & MAX_QUEUED_PKT_MASK] = m;
1854 			parallel->r_tail++;
1855 			mtx_leave(&parallel->r_mtx);
1856 		} else {
1857 			mtx_leave(&parallel->r_mtx);
1858 			t = wg_tag_get(m);
1859 			t->t_done = 1;
1860 		}
1861 	}
1862 
1863 	if ((dropped = ml_purge(&ml_free)) > 0)
1864 		counters_add(sc->sc_if.if_counters, ifc_oqdrops, dropped);
1865 }
1866 
1867 struct mbuf *
1868 wg_ring_dequeue(struct wg_ring *r)
1869 {
1870 	struct mbuf *m = NULL;
1871 	mtx_enter(&r->r_mtx);
1872 	if (r->r_head != r->r_tail) {
1873 		m = r->r_buf[r->r_head & MAX_QUEUED_PKT_MASK];
1874 		r->r_head++;
1875 	}
1876 	mtx_leave(&r->r_mtx);
1877 	return m;
1878 }
1879 
1880 struct mbuf *
1881 wg_queue_dequeue(struct wg_queue *q, struct wg_tag **t)
1882 {
1883 	struct mbuf *m;
1884 	mtx_enter(&q->q_mtx);
1885 	if ((m = q->q_list.ml_head) != NULL && (*t = wg_tag_get(m))->t_done)
1886 		ml_dequeue(&q->q_list);
1887 	else
1888 		m = NULL;
1889 	mtx_leave(&q->q_mtx);
1890 	return m;
1891 }
1892 
1893 size_t
1894 wg_queue_len(struct wg_queue *q)
1895 {
1896 	size_t len;
1897 	mtx_enter(&q->q_mtx);
1898 	len = q->q_list.ml_len;
1899 	mtx_leave(&q->q_mtx);
1900 	return len;
1901 }
1902 
1903 struct noise_remote *
1904 wg_remote_get(void *_sc, uint8_t public[NOISE_PUBLIC_KEY_LEN])
1905 {
1906 	struct wg_peer	*peer;
1907 	struct wg_softc	*sc = _sc;
1908 	if ((peer = wg_peer_lookup(sc, public)) == NULL)
1909 		return NULL;
1910 	return &peer->p_remote;
1911 }
1912 
1913 uint32_t
1914 wg_index_set(void *_sc, struct noise_remote *remote)
1915 {
1916 	struct wg_peer	*peer;
1917 	struct wg_softc	*sc = _sc;
1918 	struct wg_index *index, *iter;
1919 	uint32_t	 key;
1920 
1921 	/*
1922 	 * We can modify this without a lock as wg_index_set, wg_index_drop are
1923 	 * guaranteed to be serialised (per remote).
1924 	 */
1925 	peer = CONTAINER_OF(remote, struct wg_peer, p_remote);
1926 	index = SLIST_FIRST(&peer->p_unused_index);
1927 	KASSERT(index != NULL);
1928 	SLIST_REMOVE_HEAD(&peer->p_unused_index, i_unused_entry);
1929 
1930 	index->i_value = remote;
1931 
1932 	mtx_enter(&sc->sc_index_mtx);
1933 assign_id:
1934 	key = index->i_key = arc4random();
1935 	key &= sc->sc_index_mask;
1936 	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1937 		if (iter->i_key == index->i_key)
1938 			goto assign_id;
1939 
1940 	LIST_INSERT_HEAD(&sc->sc_index[key], index, i_entry);
1941 
1942 	mtx_leave(&sc->sc_index_mtx);
1943 
1944 	/* Likewise, no need to lock for index here. */
1945 	return index->i_key;
1946 }
1947 
1948 struct noise_remote *
1949 wg_index_get(void *_sc, uint32_t key0)
1950 {
1951 	struct wg_softc		*sc = _sc;
1952 	struct wg_index		*iter;
1953 	struct noise_remote	*remote = NULL;
1954 	uint32_t		 key = key0 & sc->sc_index_mask;
1955 
1956 	mtx_enter(&sc->sc_index_mtx);
1957 	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1958 		if (iter->i_key == key0) {
1959 			remote = iter->i_value;
1960 			break;
1961 		}
1962 	mtx_leave(&sc->sc_index_mtx);
1963 	return remote;
1964 }
1965 
1966 void
1967 wg_index_drop(void *_sc, uint32_t key0)
1968 {
1969 	struct wg_softc	*sc = _sc;
1970 	struct wg_index	*iter;
1971 	struct wg_peer	*peer = NULL;
1972 	uint32_t	 key = key0 & sc->sc_index_mask;
1973 
1974 	mtx_enter(&sc->sc_index_mtx);
1975 	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
1976 		if (iter->i_key == key0) {
1977 			LIST_REMOVE(iter, i_entry);
1978 			break;
1979 		}
1980 	mtx_leave(&sc->sc_index_mtx);
1981 
1982 	/* We expect a peer */
1983 	peer = CONTAINER_OF(iter->i_value, struct wg_peer, p_remote);
1984 	KASSERT(peer != NULL);
1985 	SLIST_INSERT_HEAD(&peer->p_unused_index, iter, i_unused_entry);
1986 }
1987 
1988 struct mbuf *
1989 wg_input(void *_sc, struct mbuf *m, struct ip *ip, struct ip6_hdr *ip6,
1990     void *_uh, int hlen)
1991 {
1992 	struct wg_pkt_data	*data;
1993 	struct noise_remote	*remote;
1994 	struct wg_tag		*t;
1995 	struct wg_softc		*sc = _sc;
1996 	struct udphdr		*uh = _uh;
1997 
1998 	NET_ASSERT_LOCKED();
1999 
2000 	if ((t = wg_tag_get(m)) == NULL) {
2001 		m_freem(m);
2002 		return NULL;
2003 	}
2004 
2005 	if (ip != NULL) {
2006 		t->t_endpoint.e_remote.r_sa.sa_len = sizeof(struct sockaddr_in);
2007 		t->t_endpoint.e_remote.r_sa.sa_family = AF_INET;
2008 		t->t_endpoint.e_remote.r_sin.sin_port = uh->uh_sport;
2009 		t->t_endpoint.e_remote.r_sin.sin_addr = ip->ip_src;
2010 		t->t_endpoint.e_local.l_in = ip->ip_dst;
2011 #ifdef INET6
2012 	} else if (ip6 != NULL) {
2013 		t->t_endpoint.e_remote.r_sa.sa_len = sizeof(struct sockaddr_in6);
2014 		t->t_endpoint.e_remote.r_sa.sa_family = AF_INET6;
2015 		t->t_endpoint.e_remote.r_sin6.sin6_port = uh->uh_sport;
2016 		t->t_endpoint.e_remote.r_sin6.sin6_addr = ip6->ip6_src;
2017 		t->t_endpoint.e_local.l_in6 = ip6->ip6_dst;
2018 #endif
2019 	} else {
2020 		m_freem(m);
2021 		return NULL;
2022 	}
2023 
2024 	/* m has a IP/IPv6 header of hlen length, we don't need it anymore. */
2025 	m_adj(m, hlen);
2026 
2027 	/*
2028 	 * Ensure mbuf is contiguous over full length of packet. This is done
2029 	 * so we can directly read the handshake values in wg_handshake, and so
2030 	 * we can decrypt a transport packet by passing a single buffer to
2031 	 * noise_remote_decrypt in wg_decap.
2032 	 */
2033 	if ((m = m_pullup(m, m->m_pkthdr.len)) == NULL)
2034 		return NULL;
2035 
2036 	if ((m->m_pkthdr.len == sizeof(struct wg_pkt_initiation) &&
2037 		*mtod(m, uint32_t *) == WG_PKT_INITIATION) ||
2038 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_response) &&
2039 		*mtod(m, uint32_t *) == WG_PKT_RESPONSE) ||
2040 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_cookie) &&
2041 		*mtod(m, uint32_t *) == WG_PKT_COOKIE)) {
2042 
2043 		if (mq_enqueue(&sc->sc_handshake_queue, m) != 0)
2044 			DPRINTF(sc, "Dropping handshake packet\n");
2045 		task_add(wg_handshake_taskq, &sc->sc_handshake);
2046 
2047 	} else if (m->m_pkthdr.len >= sizeof(struct wg_pkt_data) +
2048 	    NOISE_AUTHTAG_LEN && *mtod(m, uint32_t *) == WG_PKT_DATA) {
2049 
2050 		data = mtod(m, struct wg_pkt_data *);
2051 
2052 		if ((remote = wg_index_get(sc, data->r_idx)) != NULL) {
2053 			t->t_peer = CONTAINER_OF(remote, struct wg_peer,
2054 			    p_remote);
2055 			t->t_mbuf = NULL;
2056 			t->t_done = 0;
2057 
2058 			if (wg_queue_in(sc, t->t_peer, m) != 0)
2059 				counters_inc(sc->sc_if.if_counters,
2060 				    ifc_iqdrops);
2061 			task_add(wg_crypt_taskq, &sc->sc_decap);
2062 		} else {
2063 			counters_inc(sc->sc_if.if_counters, ifc_ierrors);
2064 			m_freem(m);
2065 		}
2066 	} else {
2067 		counters_inc(sc->sc_if.if_counters, ifc_ierrors);
2068 		m_freem(m);
2069 	}
2070 
2071 	return NULL;
2072 }
2073 
2074 void
2075 wg_qstart(struct ifqueue *ifq)
2076 {
2077 	struct ifnet		*ifp = ifq->ifq_if;
2078 	struct wg_softc		*sc = ifp->if_softc;
2079 	struct wg_peer		*peer;
2080 	struct wg_tag		*t;
2081 	struct mbuf		*m;
2082 	SLIST_HEAD(,wg_peer)	 start_list;
2083 
2084 	SLIST_INIT(&start_list);
2085 
2086 	/*
2087 	 * We should be OK to modify p_start_list, p_start_onlist in this
2088 	 * function as there should only be one ifp->if_qstart invoked at a
2089 	 * time.
2090 	 */
2091 	while ((m = ifq_dequeue(ifq)) != NULL) {
2092 		t = wg_tag_get(m);
2093 		peer = t->t_peer;
2094 		if (mq_push(&peer->p_stage_queue, m) != 0)
2095 			counters_inc(ifp->if_counters, ifc_oqdrops);
2096 		if (!peer->p_start_onlist) {
2097 			SLIST_INSERT_HEAD(&start_list, peer, p_start_list);
2098 			peer->p_start_onlist = 1;
2099 		}
2100 	}
2101 	SLIST_FOREACH(peer, &start_list, p_start_list) {
2102 		if (noise_remote_ready(&peer->p_remote) == 0)
2103 			wg_queue_out(sc, peer);
2104 		else
2105 			wg_timers_event_want_initiation(&peer->p_timers);
2106 		peer->p_start_onlist = 0;
2107 	}
2108 	task_add(wg_crypt_taskq, &sc->sc_encap);
2109 }
2110 
2111 int
2112 wg_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2113     struct rtentry *rt)
2114 {
2115 	struct wg_softc	*sc = ifp->if_softc;
2116 	struct wg_peer	*peer;
2117 	struct wg_tag	*t;
2118 	int		 af, ret = EINVAL;
2119 
2120 	NET_ASSERT_LOCKED();
2121 
2122 	if ((t = wg_tag_get(m)) == NULL) {
2123 		ret = ENOBUFS;
2124 		goto error;
2125 	}
2126 
2127 	m->m_pkthdr.ph_family = sa->sa_family;
2128 	if (sa->sa_family == AF_INET) {
2129 		peer = wg_aip_lookup(sc->sc_aip4,
2130 		    &mtod(m, struct ip *)->ip_dst);
2131 #ifdef INET6
2132 	} else if (sa->sa_family == AF_INET6) {
2133 		peer = wg_aip_lookup(sc->sc_aip6,
2134 		    &mtod(m, struct ip6_hdr *)->ip6_dst);
2135 #endif
2136 	} else {
2137 		ret = EAFNOSUPPORT;
2138 		goto error;
2139 	}
2140 
2141 #if NBPFILTER > 0
2142 	if (sc->sc_if.if_bpf)
2143 		bpf_mtap_af(sc->sc_if.if_bpf, sa->sa_family, m,
2144 		    BPF_DIRECTION_OUT);
2145 #endif
2146 
2147 	if (peer == NULL) {
2148 		ret = ENETUNREACH;
2149 		goto error;
2150 	}
2151 
2152 	af = peer->p_endpoint.e_remote.r_sa.sa_family;
2153 	if (af != AF_INET && af != AF_INET6) {
2154 		DPRINTF(sc, "No valid endpoint has been configured or "
2155 				"discovered for peer %llu\n", peer->p_id);
2156 		ret = EDESTADDRREQ;
2157 		goto error;
2158 	}
2159 
2160 	if (m->m_pkthdr.ph_loopcnt++ > M_MAXLOOP) {
2161 		DPRINTF(sc, "Packet looped\n");
2162 		ret = ELOOP;
2163 		goto error;
2164 	}
2165 
2166 	/*
2167 	 * As we hold a reference to peer in the mbuf, we can't handle a
2168 	 * delayed packet without doing some refcnting. If a peer is removed
2169 	 * while a delayed holds a reference, bad things will happen. For the
2170 	 * time being, delayed packets are unsupported. This may be fixed with
2171 	 * another aip_lookup in wg_qstart, or refcnting as mentioned before.
2172 	 */
2173 	if (m->m_pkthdr.pf.delay > 0) {
2174 		DPRINTF(sc, "PF delay unsupported\n");
2175 		ret = EOPNOTSUPP;
2176 		goto error;
2177 	}
2178 
2179 	t->t_peer = peer;
2180 	t->t_mbuf = NULL;
2181 	t->t_done = 0;
2182 	t->t_mtu = ifp->if_mtu;
2183 
2184 	/*
2185 	 * We still have an issue with ifq that will count a packet that gets
2186 	 * dropped in wg_qstart, or not encrypted. These get counted as
2187 	 * ofails or oqdrops, so the packet gets counted twice.
2188 	 */
2189 	return if_enqueue(ifp, m);
2190 error:
2191 	counters_inc(ifp->if_counters, ifc_oerrors);
2192 	m_freem(m);
2193 	return ret;
2194 }
2195 
2196 int
2197 wg_ioctl_set(struct wg_softc *sc, struct wg_data_io *data)
2198 {
2199 	struct wg_interface_io	*iface_p, iface_o;
2200 	struct wg_peer_io	*peer_p, peer_o;
2201 	struct wg_aip_io	*aip_p, aip_o;
2202 
2203 	struct wg_peer		*peer, *tpeer;
2204 	struct wg_aip		*aip, *taip;
2205 
2206 	in_port_t		 port;
2207 	int			 rtable;
2208 
2209 	uint8_t			 public[WG_KEY_SIZE], private[WG_KEY_SIZE];
2210 	size_t			 i, j;
2211 	int			 ret, has_identity;
2212 
2213 	if ((ret = suser(curproc)) != 0)
2214 		return ret;
2215 
2216 	rw_enter_write(&sc->sc_lock);
2217 
2218 	iface_p = data->wgd_interface;
2219 	if ((ret = copyin(iface_p, &iface_o, sizeof(iface_o))) != 0)
2220 		goto error;
2221 
2222 	if (iface_o.i_flags & WG_INTERFACE_REPLACE_PEERS)
2223 		TAILQ_FOREACH_SAFE(peer, &sc->sc_peer_seq, p_seq_entry, tpeer)
2224 			wg_peer_destroy(peer);
2225 
2226 	if (iface_o.i_flags & WG_INTERFACE_HAS_PRIVATE &&
2227 	    (noise_local_keys(&sc->sc_local, NULL, private) ||
2228 	     timingsafe_bcmp(private, iface_o.i_private, WG_KEY_SIZE))) {
2229 		if (curve25519_generate_public(public, iface_o.i_private)) {
2230 			if ((peer = wg_peer_lookup(sc, public)) != NULL)
2231 				wg_peer_destroy(peer);
2232 		}
2233 		noise_local_lock_identity(&sc->sc_local);
2234 		has_identity = noise_local_set_private(&sc->sc_local,
2235 						       iface_o.i_private);
2236 		TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2237 			noise_remote_precompute(&peer->p_remote);
2238 			wg_timers_event_reset_handshake_last_sent(&peer->p_timers);
2239 			noise_remote_expire_current(&peer->p_remote);
2240 		}
2241 		cookie_checker_update(&sc->sc_cookie,
2242 				      has_identity == 0 ? public : NULL);
2243 		noise_local_unlock_identity(&sc->sc_local);
2244 	}
2245 
2246 	if (iface_o.i_flags & WG_INTERFACE_HAS_PORT)
2247 		port = htons(iface_o.i_port);
2248 	else
2249 		port = sc->sc_udp_port;
2250 
2251 	if (iface_o.i_flags & WG_INTERFACE_HAS_RTABLE)
2252 		rtable = iface_o.i_rtable;
2253 	else
2254 		rtable = sc->sc_udp_rtable;
2255 
2256 	if (port != sc->sc_udp_port || rtable != sc->sc_udp_rtable) {
2257 		TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry)
2258 			wg_peer_clear_src(peer);
2259 
2260 		if (sc->sc_if.if_flags & IFF_RUNNING)
2261 			if ((ret = wg_bind(sc, &port, &rtable)) != 0)
2262 				goto error;
2263 
2264 		sc->sc_udp_port = port;
2265 		sc->sc_udp_rtable = rtable;
2266 	}
2267 
2268 	peer_p = &iface_p->i_peers[0];
2269 	for (i = 0; i < iface_o.i_peers_count; i++) {
2270 		if ((ret = copyin(peer_p, &peer_o, sizeof(peer_o))) != 0)
2271 			goto error;
2272 
2273 		/* Peer must have public key */
2274 		if (!(peer_o.p_flags & WG_PEER_HAS_PUBLIC))
2275 			goto next_peer;
2276 
2277 		/* 0 = latest protocol, 1 = this protocol */
2278 		if (peer_o.p_protocol_version != 0) {
2279 			if (peer_o.p_protocol_version > 1) {
2280 				ret = EPFNOSUPPORT;
2281 				goto error;
2282 			}
2283 		}
2284 
2285 		/* Get local public and check that peer key doesn't match */
2286 		if (noise_local_keys(&sc->sc_local, public, NULL) == 0 &&
2287 		    bcmp(public, peer_o.p_public, WG_KEY_SIZE) == 0)
2288 			goto next_peer;
2289 
2290 		/* Lookup peer, or create if it doesn't exist */
2291 		if ((peer = wg_peer_lookup(sc, peer_o.p_public)) == NULL) {
2292 			/* If we want to delete, no need creating a new one.
2293 			 * Also, don't create a new one if we only want to
2294 			 * update. */
2295 			if (peer_o.p_flags & (WG_PEER_REMOVE|WG_PEER_UPDATE))
2296 				goto next_peer;
2297 
2298 			if ((peer = wg_peer_create(sc,
2299 			    peer_o.p_public)) == NULL) {
2300 				ret = ENOMEM;
2301 				goto error;
2302 			}
2303 		}
2304 
2305 		/* Remove peer and continue if specified */
2306 		if (peer_o.p_flags & WG_PEER_REMOVE) {
2307 			wg_peer_destroy(peer);
2308 			goto next_peer;
2309 		}
2310 
2311 		if (peer_o.p_flags & WG_PEER_HAS_ENDPOINT)
2312 			wg_peer_set_sockaddr(peer, &peer_o.p_sa);
2313 
2314 		if (peer_o.p_flags & WG_PEER_HAS_PSK)
2315 			noise_remote_set_psk(&peer->p_remote, peer_o.p_psk);
2316 
2317 		if (peer_o.p_flags & WG_PEER_HAS_PKA)
2318 			wg_timers_set_persistent_keepalive(&peer->p_timers,
2319 			    peer_o.p_pka);
2320 
2321 		if (peer_o.p_flags & WG_PEER_REPLACE_AIPS) {
2322 			LIST_FOREACH_SAFE(aip, &peer->p_aip, a_entry, taip) {
2323 				wg_aip_remove(sc, peer, &aip->a_data);
2324 			}
2325 		}
2326 
2327 		if (peer_o.p_flags & WG_PEER_SET_DESCRIPTION)
2328 			strlcpy(peer->p_description, peer_o.p_description,
2329 			    IFDESCRSIZE);
2330 
2331 		aip_p = &peer_p->p_aips[0];
2332 		for (j = 0; j < peer_o.p_aips_count; j++) {
2333 			if ((ret = copyin(aip_p, &aip_o, sizeof(aip_o))) != 0)
2334 				goto error;
2335 			ret = wg_aip_add(sc, peer, &aip_o);
2336 			if (ret != 0)
2337 				goto error;
2338 			aip_p++;
2339 		}
2340 
2341 		peer_p = (struct wg_peer_io *)aip_p;
2342 		continue;
2343 next_peer:
2344 		aip_p = &peer_p->p_aips[0];
2345 		aip_p += peer_o.p_aips_count;
2346 		peer_p = (struct wg_peer_io *)aip_p;
2347 	}
2348 
2349 error:
2350 	rw_exit_write(&sc->sc_lock);
2351 	explicit_bzero(&iface_o, sizeof(iface_o));
2352 	explicit_bzero(&peer_o, sizeof(peer_o));
2353 	explicit_bzero(&aip_o, sizeof(aip_o));
2354 	explicit_bzero(public, sizeof(public));
2355 	explicit_bzero(private, sizeof(private));
2356 	return ret;
2357 }
2358 
2359 int
2360 wg_ioctl_get(struct wg_softc *sc, struct wg_data_io *data)
2361 {
2362 	struct wg_interface_io	*iface_p, iface_o;
2363 	struct wg_peer_io	*peer_p, peer_o;
2364 	struct wg_aip_io	*aip_p;
2365 
2366 	struct wg_peer		*peer;
2367 	struct wg_aip		*aip;
2368 
2369 	size_t			 size, peer_count, aip_count;
2370 	int			 ret = 0, is_suser = suser(curproc) == 0;
2371 
2372 	size = sizeof(struct wg_interface_io);
2373 	if (data->wgd_size < size && !is_suser)
2374 		goto ret_size;
2375 
2376 	iface_p = data->wgd_interface;
2377 	bzero(&iface_o, sizeof(iface_o));
2378 
2379 	rw_enter_read(&sc->sc_lock);
2380 
2381 	if (sc->sc_udp_port != 0) {
2382 		iface_o.i_port = ntohs(sc->sc_udp_port);
2383 		iface_o.i_flags |= WG_INTERFACE_HAS_PORT;
2384 	}
2385 
2386 	if (sc->sc_udp_rtable != 0) {
2387 		iface_o.i_rtable = sc->sc_udp_rtable;
2388 		iface_o.i_flags |= WG_INTERFACE_HAS_RTABLE;
2389 	}
2390 
2391 	if (!is_suser)
2392 		goto copy_out_iface;
2393 
2394 	if (noise_local_keys(&sc->sc_local, iface_o.i_public,
2395 	    iface_o.i_private) == 0) {
2396 		iface_o.i_flags |= WG_INTERFACE_HAS_PUBLIC;
2397 		iface_o.i_flags |= WG_INTERFACE_HAS_PRIVATE;
2398 	}
2399 
2400 	size += sizeof(struct wg_peer_io) * sc->sc_peer_num;
2401 	size += sizeof(struct wg_aip_io) * sc->sc_aip_num;
2402 	if (data->wgd_size < size)
2403 		goto unlock_and_ret_size;
2404 
2405 	peer_count = 0;
2406 	peer_p = &iface_p->i_peers[0];
2407 	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2408 		bzero(&peer_o, sizeof(peer_o));
2409 		peer_o.p_flags = WG_PEER_HAS_PUBLIC;
2410 		peer_o.p_protocol_version = 1;
2411 
2412 		if (noise_remote_keys(&peer->p_remote, peer_o.p_public,
2413 		    peer_o.p_psk) == 0)
2414 			peer_o.p_flags |= WG_PEER_HAS_PSK;
2415 
2416 		if (wg_timers_get_persistent_keepalive(&peer->p_timers,
2417 		    &peer_o.p_pka) == 0)
2418 			peer_o.p_flags |= WG_PEER_HAS_PKA;
2419 
2420 		if (wg_peer_get_sockaddr(peer, &peer_o.p_sa) == 0)
2421 			peer_o.p_flags |= WG_PEER_HAS_ENDPOINT;
2422 
2423 		mtx_enter(&peer->p_counters_mtx);
2424 		peer_o.p_txbytes = peer->p_counters_tx;
2425 		peer_o.p_rxbytes = peer->p_counters_rx;
2426 		mtx_leave(&peer->p_counters_mtx);
2427 
2428 		wg_timers_get_last_handshake(&peer->p_timers,
2429 		    &peer_o.p_last_handshake);
2430 
2431 		aip_count = 0;
2432 		aip_p = &peer_p->p_aips[0];
2433 		LIST_FOREACH(aip, &peer->p_aip, a_entry) {
2434 			if ((ret = copyout(&aip->a_data, aip_p, sizeof(*aip_p))) != 0)
2435 				goto unlock_and_ret_size;
2436 			aip_p++;
2437 			aip_count++;
2438 		}
2439 		peer_o.p_aips_count = aip_count;
2440 
2441 		strlcpy(peer_o.p_description, peer->p_description, IFDESCRSIZE);
2442 
2443 		if ((ret = copyout(&peer_o, peer_p, sizeof(peer_o))) != 0)
2444 			goto unlock_and_ret_size;
2445 
2446 		peer_p = (struct wg_peer_io *)aip_p;
2447 		peer_count++;
2448 	}
2449 	iface_o.i_peers_count = peer_count;
2450 
2451 copy_out_iface:
2452 	ret = copyout(&iface_o, iface_p, sizeof(iface_o));
2453 unlock_and_ret_size:
2454 	rw_exit_read(&sc->sc_lock);
2455 	explicit_bzero(&iface_o, sizeof(iface_o));
2456 	explicit_bzero(&peer_o, sizeof(peer_o));
2457 ret_size:
2458 	data->wgd_size = size;
2459 	return ret;
2460 }
2461 
2462 int
2463 wg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2464 {
2465 	struct ifreq	*ifr = (struct ifreq *) data;
2466 	struct wg_softc	*sc = ifp->if_softc;
2467 	int		 ret = 0;
2468 
2469 	switch (cmd) {
2470 	case SIOCSWG:
2471 		NET_UNLOCK();
2472 		ret = wg_ioctl_set(sc, (struct wg_data_io *) data);
2473 		NET_LOCK();
2474 		break;
2475 	case SIOCGWG:
2476 		NET_UNLOCK();
2477 		ret = wg_ioctl_get(sc, (struct wg_data_io *) data);
2478 		NET_LOCK();
2479 		break;
2480 	/* Interface IOCTLs */
2481 	case SIOCSIFADDR:
2482 		SET(ifp->if_flags, IFF_UP);
2483 		/* FALLTHROUGH */
2484 	case SIOCSIFFLAGS:
2485 		if (ISSET(ifp->if_flags, IFF_UP))
2486 			ret = wg_up(sc);
2487 		else
2488 			wg_down(sc);
2489 		break;
2490 	case SIOCSIFMTU:
2491 		/* Arbitrary limits */
2492 		if (ifr->ifr_mtu <= 0 || ifr->ifr_mtu > 9000)
2493 			ret = EINVAL;
2494 		else
2495 			ifp->if_mtu = ifr->ifr_mtu;
2496 		break;
2497 	case SIOCADDMULTI:
2498 	case SIOCDELMULTI:
2499 		break;
2500 	default:
2501 		ret = ENOTTY;
2502 	}
2503 
2504 	return ret;
2505 }
2506 
2507 int
2508 wg_up(struct wg_softc *sc)
2509 {
2510 	struct wg_peer	*peer;
2511 	int		 ret = 0;
2512 
2513 	NET_ASSERT_LOCKED();
2514 	/*
2515 	 * We use IFF_RUNNING as an exclusive access here. We also may want
2516 	 * an exclusive sc_lock as wg_bind may write to sc_udp_port. We also
2517 	 * want to drop NET_LOCK as we want to call socreate, sobind, etc. Once
2518 	 * solock is no longer === NET_LOCK, we may be able to avoid this.
2519 	 */
2520 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
2521 		SET(sc->sc_if.if_flags, IFF_RUNNING);
2522 		NET_UNLOCK();
2523 
2524 		rw_enter_write(&sc->sc_lock);
2525 		/*
2526 		 * If we successfully bind the socket, then enable the timers
2527 		 * for the peer. This will send all staged packets and a
2528 		 * keepalive if necessary.
2529 		 */
2530 		ret = wg_bind(sc, &sc->sc_udp_port, &sc->sc_udp_rtable);
2531 		if (ret == 0) {
2532 			TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2533 				wg_timers_enable(&peer->p_timers);
2534 				wg_queue_out(sc, peer);
2535 			}
2536 		}
2537 		rw_exit_write(&sc->sc_lock);
2538 
2539 		NET_LOCK();
2540 		if (ret != 0)
2541 			CLR(sc->sc_if.if_flags, IFF_RUNNING);
2542 	}
2543 	return ret;
2544 }
2545 
2546 void
2547 wg_down(struct wg_softc *sc)
2548 {
2549 	struct wg_peer	*peer;
2550 
2551 	NET_ASSERT_LOCKED();
2552 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2553 		return;
2554 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
2555 	NET_UNLOCK();
2556 
2557 	/*
2558 	 * We only need a read lock here, as we aren't writing to anything
2559 	 * that isn't granularly locked.
2560 	 */
2561 	rw_enter_read(&sc->sc_lock);
2562 	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2563 		mq_purge(&peer->p_stage_queue);
2564 		wg_timers_disable(&peer->p_timers);
2565 	}
2566 
2567 	taskq_barrier(wg_handshake_taskq);
2568 	TAILQ_FOREACH(peer, &sc->sc_peer_seq, p_seq_entry) {
2569 		noise_remote_clear(&peer->p_remote);
2570 		wg_timers_event_reset_handshake_last_sent(&peer->p_timers);
2571 	}
2572 
2573 	wg_unbind(sc);
2574 	rw_exit_read(&sc->sc_lock);
2575 	NET_LOCK();
2576 }
2577 
2578 int
2579 wg_clone_create(struct if_clone *ifc, int unit)
2580 {
2581 	struct ifnet		*ifp;
2582 	struct wg_softc		*sc;
2583 	struct noise_upcall	 local_upcall;
2584 
2585 	KERNEL_ASSERT_LOCKED();
2586 
2587 	if (wg_counter == 0) {
2588 		wg_handshake_taskq = taskq_create("wg_handshake",
2589 		    2, IPL_NET, TASKQ_MPSAFE);
2590 		wg_crypt_taskq = taskq_create("wg_crypt",
2591 		    ncpus, IPL_NET, TASKQ_MPSAFE);
2592 
2593 		if (wg_handshake_taskq == NULL || wg_crypt_taskq == NULL) {
2594 			if (wg_handshake_taskq != NULL)
2595 				taskq_destroy(wg_handshake_taskq);
2596 			if (wg_crypt_taskq != NULL)
2597 				taskq_destroy(wg_crypt_taskq);
2598 			wg_handshake_taskq = NULL;
2599 			wg_crypt_taskq = NULL;
2600 			return ENOTRECOVERABLE;
2601 		}
2602 	}
2603 	wg_counter++;
2604 
2605 	if ((sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL)
2606 		goto ret_00;
2607 
2608 	local_upcall.u_arg = sc;
2609 	local_upcall.u_remote_get = wg_remote_get;
2610 	local_upcall.u_index_set = wg_index_set;
2611 	local_upcall.u_index_drop = wg_index_drop;
2612 
2613 	TAILQ_INIT(&sc->sc_peer_seq);
2614 
2615 	/* sc_if is initialised after everything else */
2616 	arc4random_buf(&sc->sc_secret, sizeof(sc->sc_secret));
2617 
2618 	rw_init(&sc->sc_lock, "wg");
2619 	noise_local_init(&sc->sc_local, &local_upcall);
2620 	if (cookie_checker_init(&sc->sc_cookie, &wg_ratelimit_pool) != 0)
2621 		goto ret_01;
2622 	sc->sc_udp_port = 0;
2623 	sc->sc_udp_rtable = 0;
2624 
2625 	rw_init(&sc->sc_so_lock, "wg_so");
2626 	sc->sc_so4 = NULL;
2627 #ifdef INET6
2628 	sc->sc_so6 = NULL;
2629 #endif
2630 
2631 	sc->sc_aip_num = 0;
2632 	if ((sc->sc_aip4 = art_alloc(0, 32, 0)) == NULL)
2633 		goto ret_02;
2634 #ifdef INET6
2635 	if ((sc->sc_aip6 = art_alloc(0, 128, 0)) == NULL)
2636 		goto ret_03;
2637 #endif
2638 
2639 	rw_init(&sc->sc_peer_lock, "wg_peer");
2640 	sc->sc_peer_num = 0;
2641 	if ((sc->sc_peer = hashinit(HASHTABLE_PEER_SIZE, M_DEVBUF,
2642 	    M_NOWAIT, &sc->sc_peer_mask)) == NULL)
2643 		goto ret_04;
2644 
2645 	mtx_init(&sc->sc_index_mtx, IPL_NET);
2646 	if ((sc->sc_index = hashinit(HASHTABLE_INDEX_SIZE, M_DEVBUF,
2647 	    M_NOWAIT, &sc->sc_index_mask)) == NULL)
2648 		goto ret_05;
2649 
2650 	task_set(&sc->sc_handshake, wg_handshake_worker, sc);
2651 	mq_init(&sc->sc_handshake_queue, MAX_QUEUED_HANDSHAKES, IPL_NET);
2652 
2653 	task_set(&sc->sc_encap, wg_encap_worker, sc);
2654 	task_set(&sc->sc_decap, wg_decap_worker, sc);
2655 
2656 	bzero(&sc->sc_encap_ring, sizeof(sc->sc_encap_ring));
2657 	mtx_init(&sc->sc_encap_ring.r_mtx, IPL_NET);
2658 	bzero(&sc->sc_decap_ring, sizeof(sc->sc_decap_ring));
2659 	mtx_init(&sc->sc_decap_ring.r_mtx, IPL_NET);
2660 
2661 	/* We've setup the softc, now we can setup the ifnet */
2662 	ifp = &sc->sc_if;
2663 	ifp->if_softc = sc;
2664 
2665 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "wg%d", unit);
2666 
2667 	ifp->if_mtu = DEFAULT_MTU;
2668 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_NOARP;
2669 	ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
2670 	ifp->if_txmit = 64; /* Keep our workers active for longer. */
2671 
2672 	ifp->if_ioctl = wg_ioctl;
2673 	ifp->if_qstart = wg_qstart;
2674 	ifp->if_output = wg_output;
2675 
2676 	ifp->if_type = IFT_WIREGUARD;
2677 	ifp->if_rtrequest = p2p_rtrequest;
2678 
2679 	if_attach(ifp);
2680 	if_alloc_sadl(ifp);
2681 	if_counters_alloc(ifp);
2682 
2683 #if NBPFILTER > 0
2684 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
2685 #endif
2686 
2687 	DPRINTF(sc, "Interface created\n");
2688 
2689 	return 0;
2690 ret_05:
2691 	hashfree(sc->sc_peer, HASHTABLE_PEER_SIZE, M_DEVBUF);
2692 ret_04:
2693 #ifdef INET6
2694 	free(sc->sc_aip6, M_RTABLE, sizeof(*sc->sc_aip6));
2695 ret_03:
2696 #endif
2697 	free(sc->sc_aip4, M_RTABLE, sizeof(*sc->sc_aip4));
2698 ret_02:
2699 	cookie_checker_deinit(&sc->sc_cookie);
2700 ret_01:
2701 	free(sc, M_DEVBUF, sizeof(*sc));
2702 ret_00:
2703 	return ENOBUFS;
2704 }
2705 int
2706 wg_clone_destroy(struct ifnet *ifp)
2707 {
2708 	struct wg_softc	*sc = ifp->if_softc;
2709 	struct wg_peer	*peer, *tpeer;
2710 
2711 	KERNEL_ASSERT_LOCKED();
2712 
2713 	rw_enter_write(&sc->sc_lock);
2714 	TAILQ_FOREACH_SAFE(peer, &sc->sc_peer_seq, p_seq_entry, tpeer)
2715 		wg_peer_destroy(peer);
2716 	rw_exit_write(&sc->sc_lock);
2717 
2718 	wg_unbind(sc);
2719 	if_detach(ifp);
2720 
2721 	wg_counter--;
2722 	if (wg_counter == 0) {
2723 		KASSERT(wg_handshake_taskq != NULL && wg_crypt_taskq != NULL);
2724 		taskq_destroy(wg_handshake_taskq);
2725 		taskq_destroy(wg_crypt_taskq);
2726 		wg_handshake_taskq = NULL;
2727 		wg_crypt_taskq = NULL;
2728 	}
2729 
2730 	DPRINTF(sc, "Destroyed interface\n");
2731 
2732 	hashfree(sc->sc_index, HASHTABLE_INDEX_SIZE, M_DEVBUF);
2733 	hashfree(sc->sc_peer, HASHTABLE_PEER_SIZE, M_DEVBUF);
2734 #ifdef INET6
2735 	free(sc->sc_aip6, M_RTABLE, sizeof(*sc->sc_aip6));
2736 #endif
2737 	free(sc->sc_aip4, M_RTABLE, sizeof(*sc->sc_aip4));
2738 	cookie_checker_deinit(&sc->sc_cookie);
2739 	free(sc, M_DEVBUF, sizeof(*sc));
2740 	return 0;
2741 }
2742 
2743 void
2744 wgattach(int nwg)
2745 {
2746 #ifdef WGTEST
2747 	cookie_test();
2748 	noise_test();
2749 #endif
2750 	if_clone_attach(&wg_cloner);
2751 
2752 	pool_init(&wg_aip_pool, sizeof(struct wg_aip), 0,
2753 			IPL_NET, 0, "wgaip", NULL);
2754 	pool_init(&wg_peer_pool, sizeof(struct wg_peer), 0,
2755 			IPL_NET, 0, "wgpeer", NULL);
2756 	pool_init(&wg_ratelimit_pool, sizeof(struct ratelimit_entry), 0,
2757 			IPL_NET, 0, "wgratelimit", NULL);
2758 }
2759