xref: /netbsd-src/sys/net/if_wg.c (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1 /*	$NetBSD: if_wg.c,v 1.132 2024/10/08 02:29:40 riastradh Exp $	*/
2 
3 /*
4  * Copyright (C) Ryota Ozaki <ozaki.ryota@gmail.com>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the project nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*
33  * This network interface aims to implement the WireGuard protocol.
34  * The implementation is based on the paper of WireGuard as of
35  * 2018-06-30 [1].  The paper is referred in the source code with label
36  * [W].  Also the specification of the Noise protocol framework as of
37  * 2018-07-11 [2] is referred with label [N].
38  *
39  * [1] https://www.wireguard.com/papers/wireguard.pdf
40  *     https://web.archive.org/web/20180805103233/https://www.wireguard.com/papers/wireguard.pdf
41  * [2] http://noiseprotocol.org/noise.pdf
42  *     https://web.archive.org/web/20180727193154/https://noiseprotocol.org/noise.pdf
43  */
44 
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: if_wg.c,v 1.132 2024/10/08 02:29:40 riastradh Exp $");
47 
48 #ifdef _KERNEL_OPT
49 #include "opt_altq_enabled.h"
50 #include "opt_inet.h"
51 #endif
52 
53 #include <sys/param.h>
54 #include <sys/types.h>
55 
56 #include <sys/atomic.h>
57 #include <sys/callout.h>
58 #include <sys/cprng.h>
59 #include <sys/cpu.h>
60 #include <sys/device.h>
61 #include <sys/domain.h>
62 #include <sys/errno.h>
63 #include <sys/intr.h>
64 #include <sys/ioctl.h>
65 #include <sys/kernel.h>
66 #include <sys/kmem.h>
67 #include <sys/mbuf.h>
68 #include <sys/module.h>
69 #include <sys/mutex.h>
70 #include <sys/once.h>
71 #include <sys/percpu.h>
72 #include <sys/pserialize.h>
73 #include <sys/psref.h>
74 #include <sys/queue.h>
75 #include <sys/rwlock.h>
76 #include <sys/socket.h>
77 #include <sys/socketvar.h>
78 #include <sys/sockio.h>
79 #include <sys/sysctl.h>
80 #include <sys/syslog.h>
81 #include <sys/systm.h>
82 #include <sys/thmap.h>
83 #include <sys/threadpool.h>
84 #include <sys/time.h>
85 #include <sys/timespec.h>
86 #include <sys/workqueue.h>
87 
88 #include <lib/libkern/libkern.h>
89 
90 #include <net/bpf.h>
91 #include <net/if.h>
92 #include <net/if_types.h>
93 #include <net/if_wg.h>
94 #include <net/pktqueue.h>
95 #include <net/route.h>
96 
97 #ifdef INET
98 #include <netinet/in.h>
99 #include <netinet/in_pcb.h>
100 #include <netinet/in_var.h>
101 #include <netinet/ip.h>
102 #include <netinet/ip_var.h>
103 #include <netinet/udp.h>
104 #include <netinet/udp_var.h>
105 #endif	/* INET */
106 
107 #ifdef INET6
108 #include <netinet/ip6.h>
109 #include <netinet6/in6_pcb.h>
110 #include <netinet6/in6_var.h>
111 #include <netinet6/ip6_var.h>
112 #include <netinet6/udp6_var.h>
113 #endif	/* INET6 */
114 
115 #include <prop/proplib.h>
116 
117 #include <crypto/blake2/blake2s.h>
118 #include <crypto/sodium/crypto_aead_chacha20poly1305.h>
119 #include <crypto/sodium/crypto_aead_xchacha20poly1305.h>
120 #include <crypto/sodium/crypto_scalarmult.h>
121 
122 #include "ioconf.h"
123 
124 #ifdef WG_RUMPKERNEL
125 #include "wg_user.h"
126 #endif
127 
128 #ifndef time_uptime32
129 #define	time_uptime32	((uint32_t)time_uptime)
130 #endif
131 
132 /*
133  * Data structures
134  * - struct wg_softc is an instance of wg interfaces
135  *   - It has a list of peers (struct wg_peer)
136  *   - It has a threadpool job that sends/receives handshake messages and
137  *     runs event handlers
138  *   - It has its own two routing tables: one is for IPv4 and the other IPv6
139  * - struct wg_peer is a representative of a peer
140  *   - It has a struct work to handle handshakes and timer tasks
141  *   - It has a pair of session instances (struct wg_session)
142  *   - It has a pair of endpoint instances (struct wg_sockaddr)
143  *     - Normally one endpoint is used and the second one is used only on
144  *       a peer migration (a change of peer's IP address)
145  *   - It has a list of IP addresses and sub networks called allowedips
146  *     (struct wg_allowedip)
147  *     - A packets sent over a session is allowed if its destination matches
148  *       any IP addresses or sub networks of the list
149  * - struct wg_session represents a session of a secure tunnel with a peer
150  *   - Two instances of sessions belong to a peer; a stable session and a
151  *     unstable session
152  *   - A handshake process of a session always starts with a unstable instance
153  *   - Once a session is established, its instance becomes stable and the
154  *     other becomes unstable instead
155  *   - Data messages are always sent via a stable session
156  *
157  * Locking notes:
158  * - Each wg has a mutex(9) wg_lock, and a rwlock(9) wg_rwlock
159  *   - Changes to the peer list are serialized by wg_lock
160  *   - The peer list may be read with pserialize(9) and psref(9)
161  *   - The rwlock (wg_rwlock) protects the routing tables (wg_rtable_ipv[46])
162  *     => XXX replace by pserialize when routing table is psz-safe
163  * - Each peer (struct wg_peer, wgp) has a mutex wgp_lock, which can be taken
164  *   only in thread context and serializes:
165  *   - the stable and unstable session pointers
166  *   - all unstable session state
167  * - Packet processing may be done in softint context:
168  *   - The stable session can be read under pserialize(9) or psref(9)
169  *     - The stable session is always ESTABLISHED
170  *     - On a session swap, we must wait for all readers to release a
171  *       reference to a stable session before changing wgs_state and
172  *       session states
173  * - Lock order: wg_lock -> wgp_lock
174  */
175 
176 
177 #define WGLOG(level, fmt, args...)					      \
178 	log(level, "%s: " fmt, __func__, ##args)
179 
180 #define WG_DEBUG
181 
182 /* Debug options */
183 #ifdef WG_DEBUG
184 /* Output debug logs */
185 #ifndef WG_DEBUG_LOG
186 #define WG_DEBUG_LOG
187 #endif
188 /* Output trace logs */
189 #ifndef WG_DEBUG_TRACE
190 #define WG_DEBUG_TRACE
191 #endif
192 /* Output hash values, etc. */
193 #ifndef WG_DEBUG_DUMP
194 #define WG_DEBUG_DUMP
195 #endif
196 /* Make some internal parameters configurable for testing and debugging */
197 #ifndef WG_DEBUG_PARAMS
198 #define WG_DEBUG_PARAMS
199 #endif
200 #endif /* WG_DEBUG */
201 
202 #ifndef WG_DEBUG
203 # if defined(WG_DEBUG_LOG) || defined(WG_DEBUG_TRACE) ||		    \
204 	defined(WG_DEBUG_DUMP) || defined(WG_DEBUG_PARAMS)
205 #   define WG_DEBUG
206 # endif
207 #endif
208 
209 #ifdef WG_DEBUG
210 int wg_debug;
211 #define WG_DEBUG_FLAGS_LOG	1
212 #define WG_DEBUG_FLAGS_TRACE	2
213 #define WG_DEBUG_FLAGS_DUMP	4
214 #endif
215 
216 #ifdef WG_DEBUG_TRACE
217 #define WG_TRACE(msg)	 do {						\
218 	if (wg_debug & WG_DEBUG_FLAGS_TRACE)				\
219 	    log(LOG_DEBUG, "%s:%d: %s\n", __func__, __LINE__, (msg));	\
220 } while (0)
221 #else
222 #define WG_TRACE(msg)	__nothing
223 #endif
224 
225 #ifdef WG_DEBUG_LOG
226 #define WG_DLOG(fmt, args...)	 do {					\
227 	if (wg_debug & WG_DEBUG_FLAGS_LOG)				\
228 	    log(LOG_DEBUG, "%s: " fmt, __func__, ##args);		\
229 } while (0)
230 #else
231 #define WG_DLOG(fmt, args...)	__nothing
232 #endif
233 
234 #define WG_LOG_RATECHECK(wgprc, level, fmt, args...)	do {		\
235 	if (ppsratecheck(&(wgprc)->wgprc_lasttime,			\
236 	    &(wgprc)->wgprc_curpps, 1)) {				\
237 		log(level, fmt, ##args);				\
238 	}								\
239 } while (0)
240 
241 #ifdef WG_DEBUG_PARAMS
242 static bool wg_force_underload = false;
243 #endif
244 
245 #ifdef WG_DEBUG_DUMP
246 
247 static char enomem[10] = "[enomem]";
248 
249 #define	MAX_HDUMP_LEN	10000	/* large enough */
250 
251 /*
252  * gethexdump(p, n)
253  *
254  *	Allocate a string returning a hexdump of bytes p[0..n),
255  *	truncated to MAX_HDUMP_LEN.  Must be freed with puthexdump.
256  *
257  *	We use this instead of libkern hexdump() because the result is
258  *	logged with log(LOG_DEBUG, ...), which puts a priority tag on
259  *	every message, so it can't be done incrementally.
260  */
261 static char *
262 gethexdump(const void *vp, size_t n)
263 {
264 	char *buf;
265 	const uint8_t *p = vp;
266 	size_t i, alloc;
267 
268 	alloc = n;
269 	if (n > MAX_HDUMP_LEN)
270 		alloc = MAX_HDUMP_LEN;
271 	buf = kmem_alloc(3*alloc + 5, KM_NOSLEEP);
272 	if (buf == NULL)
273 		return enomem;
274 	for (i = 0; i < alloc; i++)
275 		snprintf(buf + 3*i, 3 + 1, " %02hhx", p[i]);
276 	if (alloc != n)
277 		snprintf(buf + 3*i, 4 + 1, " ...");
278 	return buf;
279 }
280 
281 static void
282 puthexdump(char *buf, const void *p, size_t n)
283 {
284 
285 	if (buf == NULL || buf == enomem)
286 		return;
287 	if (n > MAX_HDUMP_LEN)
288 		n = MAX_HDUMP_LEN;
289 	kmem_free(buf, 3*n + 5);
290 }
291 
292 #ifdef WG_RUMPKERNEL
293 static void
294 wg_dump_buf(const char *func, const char *buf, const size_t size)
295 {
296 	if ((wg_debug & WG_DEBUG_FLAGS_DUMP) == 0)
297 		return;
298 
299 	char *hex = gethexdump(buf, size);
300 
301 	log(LOG_DEBUG, "%s: %s\n", func, hex);
302 	puthexdump(hex, buf, size);
303 }
304 #endif
305 
306 static void
307 wg_dump_hash(const uint8_t *func, const uint8_t *name, const uint8_t *hash,
308     const size_t size)
309 {
310 	if ((wg_debug & WG_DEBUG_FLAGS_DUMP) == 0)
311 		return;
312 
313 	char *hex = gethexdump(hash, size);
314 
315 	log(LOG_DEBUG, "%s: %s: %s\n", func, name, hex);
316 	puthexdump(hex, hash, size);
317 }
318 
319 #define WG_DUMP_HASH(name, hash) \
320 	wg_dump_hash(__func__, name, hash, WG_HASH_LEN)
321 #define WG_DUMP_HASH48(name, hash) \
322 	wg_dump_hash(__func__, name, hash, 48)
323 #define WG_DUMP_BUF(buf, size) \
324 	wg_dump_buf(__func__, buf, size)
325 #else
326 #define WG_DUMP_HASH(name, hash)	__nothing
327 #define WG_DUMP_HASH48(name, hash)	__nothing
328 #define WG_DUMP_BUF(buf, size)	__nothing
329 #endif /* WG_DEBUG_DUMP */
330 
331 /* chosen somewhat arbitrarily -- fits in signed 16 bits NUL-terminated */
332 #define	WG_MAX_PROPLEN		32766
333 
334 #define WG_MTU			1420
335 #define WG_ALLOWEDIPS		16
336 
337 #define CURVE25519_KEY_LEN	32
338 #define TAI64N_LEN		(sizeof(uint32_t) * 3)
339 #define POLY1305_AUTHTAG_LEN	16
340 #define HMAC_BLOCK_LEN		64
341 
342 /* [N] 4.1: "DHLEN must be 32 or greater."  WireGuard chooses 32. */
343 /* [N] 4.3: Hash functions */
344 #define NOISE_DHLEN		32
345 /* [N] 4.3: "Must be 32 or 64."  WireGuard chooses 32. */
346 #define NOISE_HASHLEN		32
347 #define NOISE_BLOCKLEN		64
348 #define NOISE_HKDF_OUTPUT_LEN	NOISE_HASHLEN
349 /* [N] 5.1: "k" */
350 #define NOISE_CIPHER_KEY_LEN	32
351 /*
352  * [N] 9.2: "psk"
353  *          "... psk is a 32-byte secret value provided by the application."
354  */
355 #define NOISE_PRESHARED_KEY_LEN	32
356 
357 #define WG_STATIC_KEY_LEN	CURVE25519_KEY_LEN
358 #define WG_TIMESTAMP_LEN	TAI64N_LEN
359 
360 #define WG_PRESHARED_KEY_LEN	NOISE_PRESHARED_KEY_LEN
361 
362 #define WG_COOKIE_LEN		16
363 #define WG_MAC_LEN		16
364 #define WG_COOKIESECRET_LEN	32
365 
366 #define WG_EPHEMERAL_KEY_LEN	CURVE25519_KEY_LEN
367 /* [N] 5.2: "ck: A chaining key of HASHLEN bytes" */
368 #define WG_CHAINING_KEY_LEN	NOISE_HASHLEN
369 /* [N] 5.2: "h: A hash output of HASHLEN bytes" */
370 #define WG_HASH_LEN		NOISE_HASHLEN
371 #define WG_CIPHER_KEY_LEN	NOISE_CIPHER_KEY_LEN
372 #define WG_DH_OUTPUT_LEN	NOISE_DHLEN
373 #define WG_KDF_OUTPUT_LEN	NOISE_HKDF_OUTPUT_LEN
374 #define WG_AUTHTAG_LEN		POLY1305_AUTHTAG_LEN
375 #define WG_DATA_KEY_LEN		32
376 #define WG_SALT_LEN		24
377 
378 /*
379  * The protocol messages
380  */
381 struct wg_msg {
382 	uint32_t	wgm_type;
383 } __packed;
384 
385 /* [W] 5.4.2 First Message: Initiator to Responder */
386 struct wg_msg_init {
387 	uint32_t	wgmi_type;
388 	uint32_t	wgmi_sender;
389 	uint8_t		wgmi_ephemeral[WG_EPHEMERAL_KEY_LEN];
390 	uint8_t		wgmi_static[WG_STATIC_KEY_LEN + WG_AUTHTAG_LEN];
391 	uint8_t		wgmi_timestamp[WG_TIMESTAMP_LEN + WG_AUTHTAG_LEN];
392 	uint8_t		wgmi_mac1[WG_MAC_LEN];
393 	uint8_t		wgmi_mac2[WG_MAC_LEN];
394 } __packed;
395 
396 /* [W] 5.4.3 Second Message: Responder to Initiator */
397 struct wg_msg_resp {
398 	uint32_t	wgmr_type;
399 	uint32_t	wgmr_sender;
400 	uint32_t	wgmr_receiver;
401 	uint8_t		wgmr_ephemeral[WG_EPHEMERAL_KEY_LEN];
402 	uint8_t		wgmr_empty[0 + WG_AUTHTAG_LEN];
403 	uint8_t		wgmr_mac1[WG_MAC_LEN];
404 	uint8_t		wgmr_mac2[WG_MAC_LEN];
405 } __packed;
406 
407 /* [W] 5.4.6 Subsequent Messages: Transport Data Messages */
408 struct wg_msg_data {
409 	uint32_t	wgmd_type;
410 	uint32_t	wgmd_receiver;
411 	uint64_t	wgmd_counter;
412 	uint32_t	wgmd_packet[];
413 } __packed;
414 
415 /* [W] 5.4.7 Under Load: Cookie Reply Message */
416 struct wg_msg_cookie {
417 	uint32_t	wgmc_type;
418 	uint32_t	wgmc_receiver;
419 	uint8_t		wgmc_salt[WG_SALT_LEN];
420 	uint8_t		wgmc_cookie[WG_COOKIE_LEN + WG_AUTHTAG_LEN];
421 } __packed;
422 
423 #define WG_MSG_TYPE_INIT		1
424 #define WG_MSG_TYPE_RESP		2
425 #define WG_MSG_TYPE_COOKIE		3
426 #define WG_MSG_TYPE_DATA		4
427 #define WG_MSG_TYPE_MAX			WG_MSG_TYPE_DATA
428 
429 /* Sliding windows */
430 
431 #define	SLIWIN_BITS	2048u
432 #define	SLIWIN_TYPE	uint32_t
433 #define	SLIWIN_BPW	(NBBY*sizeof(SLIWIN_TYPE))
434 #define	SLIWIN_WORDS	howmany(SLIWIN_BITS, SLIWIN_BPW)
435 #define	SLIWIN_NPKT	(SLIWIN_BITS - NBBY*sizeof(SLIWIN_TYPE))
436 
437 struct sliwin {
438 	SLIWIN_TYPE	B[SLIWIN_WORDS];
439 	uint64_t	T;
440 };
441 
442 /*
443  * sliwin_reset(W)
444  *
445  *	Reset sliding window state to a blank history with no observed
446  *	sequence numbers.
447  *
448  *	Caller must have exclusive access to W.
449  */
450 static void
451 sliwin_reset(struct sliwin *W)
452 {
453 
454 	memset(W, 0, sizeof(*W));
455 }
456 
457 /*
458  * sliwin_check_fast(W, S)
459  *
460  *	Do a fast check of the sliding window W to validate sequence
461  *	number S.  No state is recorded.  Return 0 on accept, nonzero
462  *	error code on reject.
463  *
464  *	May be called concurrently with other calls to
465  *	sliwin_check_fast and sliwin_update.
466  */
467 static int
468 sliwin_check_fast(const volatile struct sliwin *W, uint64_t S)
469 {
470 
471 	/*
472 	 * If it's more than one window older than the highest sequence
473 	 * number we've seen, reject.
474 	 */
475 #ifdef __HAVE_ATOMIC64_LOADSTORE
476 	if (S + SLIWIN_NPKT < atomic_load_relaxed(&W->T))
477 		return EAUTH;
478 #endif
479 
480 	/*
481 	 * Otherwise, we need to take the lock to decide, so don't
482 	 * reject just yet.  Caller must serialize a call to
483 	 * sliwin_update in this case.
484 	 */
485 	return 0;
486 }
487 
488 /*
489  * sliwin_update(W, S)
490  *
491  *	Check the sliding window W to validate sequence number S, and
492  *	if accepted, update it to reflect having observed S.  Return 0
493  *	on accept, nonzero error code on reject.
494  *
495  *	May be called concurrently with other calls to
496  *	sliwin_check_fast, but caller must exclude other calls to
497  *	sliwin_update.
498  */
499 static int
500 sliwin_update(struct sliwin *W, uint64_t S)
501 {
502 	unsigned word, bit;
503 
504 	/*
505 	 * If it's more than one window older than the highest sequence
506 	 * number we've seen, reject.
507 	 */
508 	if (S + SLIWIN_NPKT < W->T)
509 		return EAUTH;
510 
511 	/*
512 	 * If it's higher than the highest sequence number we've seen,
513 	 * advance the window.
514 	 */
515 	if (S > W->T) {
516 		uint64_t i = W->T / SLIWIN_BPW;
517 		uint64_t j = S / SLIWIN_BPW;
518 		unsigned k;
519 
520 		for (k = 0; k < MIN(j - i, SLIWIN_WORDS); k++)
521 			W->B[(i + k + 1) % SLIWIN_WORDS] = 0;
522 #ifdef __HAVE_ATOMIC64_LOADSTORE
523 		atomic_store_relaxed(&W->T, S);
524 #else
525 		W->T = S;
526 #endif
527 	}
528 
529 	/* Test and set the bit -- if already set, reject.  */
530 	word = (S / SLIWIN_BPW) % SLIWIN_WORDS;
531 	bit = S % SLIWIN_BPW;
532 	if (W->B[word] & (1UL << bit))
533 		return EAUTH;
534 	W->B[word] |= 1U << bit;
535 
536 	/* Accept!  */
537 	return 0;
538 }
539 
540 struct wg_session {
541 	struct wg_peer	*wgs_peer;
542 	struct psref_target
543 			wgs_psref;
544 
545 	volatile int	wgs_state;
546 #define WGS_STATE_UNKNOWN	0
547 #define WGS_STATE_INIT_ACTIVE	1
548 #define WGS_STATE_INIT_PASSIVE	2
549 #define WGS_STATE_ESTABLISHED	3
550 #define WGS_STATE_DESTROYING	4
551 
552 	uint32_t	wgs_time_established;
553 	volatile uint32_t
554 			wgs_time_last_data_sent;
555 	volatile bool	wgs_force_rekey;
556 	bool		wgs_is_initiator;
557 
558 	uint32_t	wgs_local_index;
559 	uint32_t	wgs_remote_index;
560 #ifdef __HAVE_ATOMIC64_LOADSTORE
561 	volatile uint64_t
562 			wgs_send_counter;
563 #else
564 	kmutex_t	wgs_send_counter_lock;
565 	uint64_t	wgs_send_counter;
566 #endif
567 
568 	struct {
569 		kmutex_t	lock;
570 		struct sliwin	window;
571 	}		*wgs_recvwin;
572 
573 	uint8_t		wgs_handshake_hash[WG_HASH_LEN];
574 	uint8_t		wgs_chaining_key[WG_CHAINING_KEY_LEN];
575 	uint8_t		wgs_ephemeral_key_pub[WG_EPHEMERAL_KEY_LEN];
576 	uint8_t		wgs_ephemeral_key_priv[WG_EPHEMERAL_KEY_LEN];
577 	uint8_t		wgs_ephemeral_key_peer[WG_EPHEMERAL_KEY_LEN];
578 	uint8_t		wgs_tkey_send[WG_DATA_KEY_LEN];
579 	uint8_t		wgs_tkey_recv[WG_DATA_KEY_LEN];
580 };
581 
582 struct wg_sockaddr {
583 	union {
584 		struct sockaddr_storage _ss;
585 		struct sockaddr _sa;
586 		struct sockaddr_in _sin;
587 		struct sockaddr_in6 _sin6;
588 	};
589 	struct psref_target	wgsa_psref;
590 };
591 
592 #define wgsatoss(wgsa)		(&(wgsa)->_ss)
593 #define wgsatosa(wgsa)		(&(wgsa)->_sa)
594 #define wgsatosin(wgsa)		(&(wgsa)->_sin)
595 #define wgsatosin6(wgsa)	(&(wgsa)->_sin6)
596 
597 #define	wgsa_family(wgsa)	(wgsatosa(wgsa)->sa_family)
598 
599 struct wg_peer;
600 struct wg_allowedip {
601 	struct radix_node	wga_nodes[2];
602 	struct wg_sockaddr	_wga_sa_addr;
603 	struct wg_sockaddr	_wga_sa_mask;
604 #define wga_sa_addr		_wga_sa_addr._sa
605 #define wga_sa_mask		_wga_sa_mask._sa
606 
607 	int			wga_family;
608 	uint8_t			wga_cidr;
609 	union {
610 		struct in_addr _ip4;
611 		struct in6_addr _ip6;
612 	} wga_addr;
613 #define wga_addr4	wga_addr._ip4
614 #define wga_addr6	wga_addr._ip6
615 
616 	struct wg_peer		*wga_peer;
617 };
618 
619 typedef uint8_t wg_timestamp_t[WG_TIMESTAMP_LEN];
620 
621 struct wg_ppsratecheck {
622 	struct timeval		wgprc_lasttime;
623 	int			wgprc_curpps;
624 };
625 
626 struct wg_softc;
627 struct wg_peer {
628 	struct wg_softc		*wgp_sc;
629 	char			wgp_name[WG_PEER_NAME_MAXLEN + 1];
630 	struct pslist_entry	wgp_peerlist_entry;
631 	pserialize_t		wgp_psz;
632 	struct psref_target	wgp_psref;
633 	kmutex_t		*wgp_lock;
634 	kmutex_t		*wgp_intr_lock;
635 
636 	uint8_t	wgp_pubkey[WG_STATIC_KEY_LEN];
637 	struct wg_sockaddr	*volatile wgp_endpoint;
638 	struct wg_sockaddr	*wgp_endpoint0;
639 	volatile unsigned	wgp_endpoint_changing;
640 	volatile bool		wgp_endpoint_available;
641 
642 			/* The preshared key (optional) */
643 	uint8_t		wgp_psk[WG_PRESHARED_KEY_LEN];
644 
645 	struct wg_session	*volatile wgp_session_stable;
646 	struct wg_session	*wgp_session_unstable;
647 
648 	/* first outgoing packet awaiting session initiation */
649 	struct mbuf		*volatile wgp_pending;
650 
651 	/* timestamp in big-endian */
652 	wg_timestamp_t	wgp_timestamp_latest_init;
653 
654 	struct timespec		wgp_last_handshake_time;
655 
656 	callout_t		wgp_handshake_timeout_timer;
657 	callout_t		wgp_session_dtor_timer;
658 
659 	time_t			wgp_handshake_start_time;
660 
661 	int			wgp_n_allowedips;
662 	struct wg_allowedip	wgp_allowedips[WG_ALLOWEDIPS];
663 
664 	time_t			wgp_latest_cookie_time;
665 	uint8_t			wgp_latest_cookie[WG_COOKIE_LEN];
666 	uint8_t			wgp_last_sent_mac1[WG_MAC_LEN];
667 	bool			wgp_last_sent_mac1_valid;
668 	uint8_t			wgp_last_sent_cookie[WG_COOKIE_LEN];
669 	bool			wgp_last_sent_cookie_valid;
670 
671 	time_t			wgp_last_msg_received_time[WG_MSG_TYPE_MAX];
672 
673 	time_t			wgp_last_cookiesecret_time;
674 	uint8_t			wgp_cookiesecret[WG_COOKIESECRET_LEN];
675 
676 	struct wg_ppsratecheck	wgp_ppsratecheck;
677 
678 	struct work		wgp_work;
679 	unsigned int		wgp_tasks;
680 #define WGP_TASK_SEND_INIT_MESSAGE		__BIT(0)
681 #define WGP_TASK_RETRY_HANDSHAKE		__BIT(1)
682 #define WGP_TASK_ESTABLISH_SESSION		__BIT(2)
683 #define WGP_TASK_ENDPOINT_CHANGED		__BIT(3)
684 #define WGP_TASK_SEND_KEEPALIVE_MESSAGE		__BIT(4)
685 #define WGP_TASK_DESTROY_PREV_SESSION		__BIT(5)
686 };
687 
688 struct wg_ops;
689 
690 struct wg_softc {
691 	struct ifnet	wg_if;
692 	LIST_ENTRY(wg_softc) wg_list;
693 	kmutex_t	*wg_lock;
694 	kmutex_t	*wg_intr_lock;
695 	krwlock_t	*wg_rwlock;
696 
697 	uint8_t		wg_privkey[WG_STATIC_KEY_LEN];
698 	uint8_t		wg_pubkey[WG_STATIC_KEY_LEN];
699 
700 	int		wg_npeers;
701 	struct pslist_head	wg_peers;
702 	struct thmap	*wg_peers_bypubkey;
703 	struct thmap	*wg_peers_byname;
704 	struct thmap	*wg_sessions_byindex;
705 	uint16_t	wg_listen_port;
706 
707 	struct threadpool	*wg_threadpool;
708 
709 	struct threadpool_job	wg_job;
710 	int			wg_upcalls;
711 #define	WG_UPCALL_INET	__BIT(0)
712 #define	WG_UPCALL_INET6	__BIT(1)
713 
714 #ifdef INET
715 	struct socket		*wg_so4;
716 	struct radix_node_head	*wg_rtable_ipv4;
717 #endif
718 #ifdef INET6
719 	struct socket		*wg_so6;
720 	struct radix_node_head	*wg_rtable_ipv6;
721 #endif
722 
723 	struct wg_ppsratecheck	wg_ppsratecheck;
724 
725 	struct wg_ops		*wg_ops;
726 
727 #ifdef WG_RUMPKERNEL
728 	struct wg_user		*wg_user;
729 #endif
730 };
731 
732 /* [W] 6.1 Preliminaries */
733 #define WG_REKEY_AFTER_MESSAGES		(1ULL << 60)
734 #define WG_REJECT_AFTER_MESSAGES	(UINT64_MAX - (1 << 13))
735 #define WG_REKEY_AFTER_TIME		120
736 #define WG_REJECT_AFTER_TIME		180
737 #define WG_REKEY_ATTEMPT_TIME		 90
738 #define WG_REKEY_TIMEOUT		  5
739 #define WG_KEEPALIVE_TIMEOUT		 10
740 
741 #define WG_COOKIE_TIME			120
742 #define WG_COOKIESECRET_TIME		(2 * 60)
743 
744 static uint64_t wg_rekey_after_messages = WG_REKEY_AFTER_MESSAGES;
745 static uint64_t wg_reject_after_messages = WG_REJECT_AFTER_MESSAGES;
746 static unsigned wg_rekey_after_time = WG_REKEY_AFTER_TIME;
747 static unsigned wg_reject_after_time = WG_REJECT_AFTER_TIME;
748 static unsigned wg_rekey_attempt_time = WG_REKEY_ATTEMPT_TIME;
749 static unsigned wg_rekey_timeout = WG_REKEY_TIMEOUT;
750 static unsigned wg_keepalive_timeout = WG_KEEPALIVE_TIMEOUT;
751 
752 static struct mbuf *
753 		wg_get_mbuf(size_t, size_t);
754 
755 static void	wg_send_data_msg(struct wg_peer *, struct wg_session *,
756 		    struct mbuf *);
757 static void	wg_send_cookie_msg(struct wg_softc *, struct wg_peer *,
758 		    const uint32_t, const uint8_t[static WG_MAC_LEN],
759 		    const struct sockaddr *);
760 static void	wg_send_handshake_msg_resp(struct wg_softc *, struct wg_peer *,
761 		    struct wg_session *, const struct wg_msg_init *);
762 static void	wg_send_keepalive_msg(struct wg_peer *, struct wg_session *);
763 
764 static struct wg_peer *
765 		wg_pick_peer_by_sa(struct wg_softc *, const struct sockaddr *,
766 		    struct psref *);
767 static struct wg_peer *
768 		wg_lookup_peer_by_pubkey(struct wg_softc *,
769 		    const uint8_t[static WG_STATIC_KEY_LEN], struct psref *);
770 
771 static struct wg_session *
772 		wg_lookup_session_by_index(struct wg_softc *,
773 		    const uint32_t, struct psref *);
774 
775 static void	wg_update_endpoint_if_necessary(struct wg_peer *,
776 		    const struct sockaddr *);
777 
778 static void	wg_schedule_session_dtor_timer(struct wg_peer *);
779 
780 static bool	wg_is_underload(struct wg_softc *, struct wg_peer *, int);
781 static void	wg_calculate_keys(struct wg_session *, const bool);
782 
783 static void	wg_clear_states(struct wg_session *);
784 
785 static void	wg_get_peer(struct wg_peer *, struct psref *);
786 static void	wg_put_peer(struct wg_peer *, struct psref *);
787 
788 static int	wg_send_so(struct wg_peer *, struct mbuf *);
789 static int	wg_send_udp(struct wg_peer *, struct mbuf *);
790 static int	wg_output(struct ifnet *, struct mbuf *,
791 			   const struct sockaddr *, const struct rtentry *);
792 static void	wg_input(struct ifnet *, struct mbuf *, const int);
793 static int	wg_ioctl(struct ifnet *, u_long, void *);
794 static int	wg_bind_port(struct wg_softc *, const uint16_t);
795 static int	wg_init(struct ifnet *);
796 #ifdef ALTQ
797 static void	wg_start(struct ifnet *);
798 #endif
799 static void	wg_stop(struct ifnet *, int);
800 
801 static void	wg_peer_work(struct work *, void *);
802 static void	wg_job(struct threadpool_job *);
803 static void	wgintr(void *);
804 static void	wg_purge_pending_packets(struct wg_peer *);
805 
806 static int	wg_clone_create(struct if_clone *, int);
807 static int	wg_clone_destroy(struct ifnet *);
808 
809 struct wg_ops {
810 	int (*send_hs_msg)(struct wg_peer *, struct mbuf *);
811 	int (*send_data_msg)(struct wg_peer *, struct mbuf *);
812 	void (*input)(struct ifnet *, struct mbuf *, const int);
813 	int (*bind_port)(struct wg_softc *, const uint16_t);
814 };
815 
816 struct wg_ops wg_ops_rumpkernel = {
817 	.send_hs_msg	= wg_send_so,
818 	.send_data_msg	= wg_send_udp,
819 	.input		= wg_input,
820 	.bind_port	= wg_bind_port,
821 };
822 
823 #ifdef WG_RUMPKERNEL
824 static bool	wg_user_mode(struct wg_softc *);
825 static int	wg_ioctl_linkstr(struct wg_softc *, struct ifdrv *);
826 
827 static int	wg_send_user(struct wg_peer *, struct mbuf *);
828 static void	wg_input_user(struct ifnet *, struct mbuf *, const int);
829 static int	wg_bind_port_user(struct wg_softc *, const uint16_t);
830 
831 struct wg_ops wg_ops_rumpuser = {
832 	.send_hs_msg	= wg_send_user,
833 	.send_data_msg	= wg_send_user,
834 	.input		= wg_input_user,
835 	.bind_port	= wg_bind_port_user,
836 };
837 #endif
838 
839 #define WG_PEER_READER_FOREACH(wgp, wg)					\
840 	PSLIST_READER_FOREACH((wgp), &(wg)->wg_peers, struct wg_peer,	\
841 	    wgp_peerlist_entry)
842 #define WG_PEER_WRITER_FOREACH(wgp, wg)					\
843 	PSLIST_WRITER_FOREACH((wgp), &(wg)->wg_peers, struct wg_peer,	\
844 	    wgp_peerlist_entry)
845 #define WG_PEER_WRITER_INSERT_HEAD(wgp, wg)				\
846 	PSLIST_WRITER_INSERT_HEAD(&(wg)->wg_peers, (wgp), wgp_peerlist_entry)
847 #define WG_PEER_WRITER_REMOVE(wgp)					\
848 	PSLIST_WRITER_REMOVE((wgp), wgp_peerlist_entry)
849 
850 struct wg_route {
851 	struct radix_node	wgr_nodes[2];
852 	struct wg_peer		*wgr_peer;
853 };
854 
855 static struct radix_node_head *
856 wg_rnh(struct wg_softc *wg, const int family)
857 {
858 
859 	switch (family) {
860 #ifdef INET
861 		case AF_INET:
862 			return wg->wg_rtable_ipv4;
863 #endif
864 #ifdef INET6
865 		case AF_INET6:
866 			return wg->wg_rtable_ipv6;
867 #endif
868 		default:
869 			return NULL;
870 	}
871 }
872 
873 
874 /*
875  * Global variables
876  */
877 static volatile unsigned wg_count __cacheline_aligned;
878 
879 struct psref_class *wg_psref_class __read_mostly;
880 
881 static struct if_clone wg_cloner =
882     IF_CLONE_INITIALIZER("wg", wg_clone_create, wg_clone_destroy);
883 
884 static struct pktqueue *wg_pktq __read_mostly;
885 static struct workqueue *wg_wq __read_mostly;
886 
887 void wgattach(int);
888 /* ARGSUSED */
889 void
890 wgattach(int count)
891 {
892 	/*
893 	 * Nothing to do here, initialization is handled by the
894 	 * module initialization code in wginit() below).
895 	 */
896 }
897 
898 static void
899 wginit(void)
900 {
901 
902 	wg_psref_class = psref_class_create("wg", IPL_SOFTNET);
903 
904 	if_clone_attach(&wg_cloner);
905 }
906 
907 /*
908  * XXX Kludge: This should just happen in wginit, but workqueue_create
909  * cannot be run until after CPUs have been detected, and wginit runs
910  * before configure.
911  */
912 static int
913 wginitqueues(void)
914 {
915 	int error __diagused;
916 
917 	wg_pktq = pktq_create(IFQ_MAXLEN, wgintr, NULL);
918 	KASSERT(wg_pktq != NULL);
919 
920 	error = workqueue_create(&wg_wq, "wgpeer", wg_peer_work, NULL,
921 	    PRI_NONE, IPL_SOFTNET, WQ_MPSAFE|WQ_PERCPU);
922 	KASSERTMSG(error == 0, "error=%d", error);
923 
924 	return 0;
925 }
926 
927 static void
928 wg_guarantee_initialized(void)
929 {
930 	static ONCE_DECL(init);
931 	int error __diagused;
932 
933 	error = RUN_ONCE(&init, wginitqueues);
934 	KASSERTMSG(error == 0, "error=%d", error);
935 }
936 
937 static int
938 wg_count_inc(void)
939 {
940 	unsigned o, n;
941 
942 	do {
943 		o = atomic_load_relaxed(&wg_count);
944 		if (o == UINT_MAX)
945 			return ENFILE;
946 		n = o + 1;
947 	} while (atomic_cas_uint(&wg_count, o, n) != o);
948 
949 	return 0;
950 }
951 
952 static void
953 wg_count_dec(void)
954 {
955 	unsigned c __diagused;
956 
957 	membar_release();	/* match atomic_load_acquire in wgdetach */
958 	c = atomic_dec_uint_nv(&wg_count);
959 	KASSERT(c != UINT_MAX);
960 }
961 
962 static int
963 wgdetach(void)
964 {
965 
966 	/* Prevent new interface creation.  */
967 	if_clone_detach(&wg_cloner);
968 
969 	/*
970 	 * Check whether there are any existing interfaces.  Matches
971 	 * membar_release and atomic_dec_uint_nv in wg_count_dec.
972 	 */
973 	if (atomic_load_acquire(&wg_count)) {
974 		/* Back out -- reattach the cloner.  */
975 		if_clone_attach(&wg_cloner);
976 		return EBUSY;
977 	}
978 
979 	/* No interfaces left.  Nuke it.  */
980 	if (wg_wq)
981 		workqueue_destroy(wg_wq);
982 	if (wg_pktq)
983 		pktq_destroy(wg_pktq);
984 	psref_class_destroy(wg_psref_class);
985 
986 	return 0;
987 }
988 
989 static void
990 wg_init_key_and_hash(uint8_t ckey[static WG_CHAINING_KEY_LEN],
991     uint8_t hash[static WG_HASH_LEN])
992 {
993 	/* [W] 5.4: CONSTRUCTION */
994 	const char *signature = "Noise_IKpsk2_25519_ChaChaPoly_BLAKE2s";
995 	/* [W] 5.4: IDENTIFIER */
996 	const char *id = "WireGuard v1 zx2c4 Jason@zx2c4.com";
997 	struct blake2s state;
998 
999 	blake2s(ckey, WG_CHAINING_KEY_LEN, NULL, 0,
1000 	    signature, strlen(signature));
1001 
1002 	CTASSERT(WG_HASH_LEN == WG_CHAINING_KEY_LEN);
1003 	memcpy(hash, ckey, WG_CHAINING_KEY_LEN);
1004 
1005 	blake2s_init(&state, WG_HASH_LEN, NULL, 0);
1006 	blake2s_update(&state, ckey, WG_CHAINING_KEY_LEN);
1007 	blake2s_update(&state, id, strlen(id));
1008 	blake2s_final(&state, hash);
1009 
1010 	WG_DUMP_HASH("ckey", ckey);
1011 	WG_DUMP_HASH("hash", hash);
1012 }
1013 
1014 static void
1015 wg_algo_hash(uint8_t hash[static WG_HASH_LEN], const uint8_t input[],
1016     const size_t inputsize)
1017 {
1018 	struct blake2s state;
1019 
1020 	blake2s_init(&state, WG_HASH_LEN, NULL, 0);
1021 	blake2s_update(&state, hash, WG_HASH_LEN);
1022 	blake2s_update(&state, input, inputsize);
1023 	blake2s_final(&state, hash);
1024 }
1025 
1026 static void
1027 wg_algo_mac(uint8_t out[], const size_t outsize,
1028     const uint8_t key[], const size_t keylen,
1029     const uint8_t input1[], const size_t input1len,
1030     const uint8_t input2[], const size_t input2len)
1031 {
1032 	struct blake2s state;
1033 
1034 	blake2s_init(&state, outsize, key, keylen);
1035 
1036 	blake2s_update(&state, input1, input1len);
1037 	if (input2 != NULL)
1038 		blake2s_update(&state, input2, input2len);
1039 	blake2s_final(&state, out);
1040 }
1041 
1042 static void
1043 wg_algo_mac_mac1(uint8_t out[], const size_t outsize,
1044     const uint8_t input1[], const size_t input1len,
1045     const uint8_t input2[], const size_t input2len)
1046 {
1047 	struct blake2s state;
1048 	/* [W] 5.4: LABEL-MAC1 */
1049 	const char *label = "mac1----";
1050 	uint8_t key[WG_HASH_LEN];
1051 
1052 	blake2s_init(&state, sizeof(key), NULL, 0);
1053 	blake2s_update(&state, label, strlen(label));
1054 	blake2s_update(&state, input1, input1len);
1055 	blake2s_final(&state, key);
1056 
1057 	blake2s_init(&state, outsize, key, sizeof(key));
1058 	if (input2 != NULL)
1059 		blake2s_update(&state, input2, input2len);
1060 	blake2s_final(&state, out);
1061 }
1062 
1063 static void
1064 wg_algo_mac_cookie(uint8_t out[], const size_t outsize,
1065     const uint8_t input1[], const size_t input1len)
1066 {
1067 	struct blake2s state;
1068 	/* [W] 5.4: LABEL-COOKIE */
1069 	const char *label = "cookie--";
1070 
1071 	blake2s_init(&state, outsize, NULL, 0);
1072 	blake2s_update(&state, label, strlen(label));
1073 	blake2s_update(&state, input1, input1len);
1074 	blake2s_final(&state, out);
1075 }
1076 
1077 static void
1078 wg_algo_generate_keypair(uint8_t pubkey[static WG_EPHEMERAL_KEY_LEN],
1079     uint8_t privkey[static WG_EPHEMERAL_KEY_LEN])
1080 {
1081 
1082 	CTASSERT(WG_EPHEMERAL_KEY_LEN == crypto_scalarmult_curve25519_BYTES);
1083 
1084 	cprng_strong(kern_cprng, privkey, WG_EPHEMERAL_KEY_LEN, 0);
1085 	crypto_scalarmult_base(pubkey, privkey);
1086 }
1087 
1088 static void
1089 wg_algo_dh(uint8_t out[static WG_DH_OUTPUT_LEN],
1090     const uint8_t privkey[static WG_STATIC_KEY_LEN],
1091     const uint8_t pubkey[static WG_STATIC_KEY_LEN])
1092 {
1093 
1094 	CTASSERT(WG_STATIC_KEY_LEN == crypto_scalarmult_curve25519_BYTES);
1095 
1096 	int ret __diagused = crypto_scalarmult(out, privkey, pubkey);
1097 	KASSERT(ret == 0);
1098 }
1099 
1100 static void
1101 wg_algo_hmac(uint8_t out[], const size_t outlen,
1102     const uint8_t key[], const size_t keylen,
1103     const uint8_t in[], const size_t inlen)
1104 {
1105 #define IPAD	0x36
1106 #define OPAD	0x5c
1107 	uint8_t hmackey[HMAC_BLOCK_LEN] = {0};
1108 	uint8_t ipad[HMAC_BLOCK_LEN];
1109 	uint8_t opad[HMAC_BLOCK_LEN];
1110 	size_t i;
1111 	struct blake2s state;
1112 
1113 	KASSERT(outlen == WG_HASH_LEN);
1114 	KASSERT(keylen <= HMAC_BLOCK_LEN);
1115 
1116 	memcpy(hmackey, key, keylen);
1117 
1118 	for (i = 0; i < sizeof(hmackey); i++) {
1119 		ipad[i] = hmackey[i] ^ IPAD;
1120 		opad[i] = hmackey[i] ^ OPAD;
1121 	}
1122 
1123 	blake2s_init(&state, WG_HASH_LEN, NULL, 0);
1124 	blake2s_update(&state, ipad, sizeof(ipad));
1125 	blake2s_update(&state, in, inlen);
1126 	blake2s_final(&state, out);
1127 
1128 	blake2s_init(&state, WG_HASH_LEN, NULL, 0);
1129 	blake2s_update(&state, opad, sizeof(opad));
1130 	blake2s_update(&state, out, WG_HASH_LEN);
1131 	blake2s_final(&state, out);
1132 #undef IPAD
1133 #undef OPAD
1134 }
1135 
1136 static void
1137 wg_algo_kdf(uint8_t out1[static WG_KDF_OUTPUT_LEN],
1138     uint8_t out2[WG_KDF_OUTPUT_LEN],
1139     uint8_t out3[WG_KDF_OUTPUT_LEN],
1140     const uint8_t ckey[static WG_CHAINING_KEY_LEN],
1141     const uint8_t input[], const size_t inputlen)
1142 {
1143 	uint8_t tmp1[WG_KDF_OUTPUT_LEN], tmp2[WG_KDF_OUTPUT_LEN + 1];
1144 	uint8_t one[1];
1145 
1146 	/*
1147 	 * [N] 4.3: "an input_key_material byte sequence with length
1148 	 * either zero bytes, 32 bytes, or DHLEN bytes."
1149 	 */
1150 	KASSERT(inputlen == 0 || inputlen == 32 || inputlen == NOISE_DHLEN);
1151 
1152 	WG_DUMP_HASH("ckey", ckey);
1153 	if (input != NULL)
1154 		WG_DUMP_HASH("input", input);
1155 	wg_algo_hmac(tmp1, sizeof(tmp1), ckey, WG_CHAINING_KEY_LEN,
1156 	    input, inputlen);
1157 	WG_DUMP_HASH("tmp1", tmp1);
1158 	one[0] = 1;
1159 	wg_algo_hmac(out1, WG_KDF_OUTPUT_LEN, tmp1, sizeof(tmp1),
1160 	    one, sizeof(one));
1161 	WG_DUMP_HASH("out1", out1);
1162 	if (out2 == NULL)
1163 		return;
1164 	memcpy(tmp2, out1, WG_KDF_OUTPUT_LEN);
1165 	tmp2[WG_KDF_OUTPUT_LEN] = 2;
1166 	wg_algo_hmac(out2, WG_KDF_OUTPUT_LEN, tmp1, sizeof(tmp1),
1167 	    tmp2, sizeof(tmp2));
1168 	WG_DUMP_HASH("out2", out2);
1169 	if (out3 == NULL)
1170 		return;
1171 	memcpy(tmp2, out2, WG_KDF_OUTPUT_LEN);
1172 	tmp2[WG_KDF_OUTPUT_LEN] = 3;
1173 	wg_algo_hmac(out3, WG_KDF_OUTPUT_LEN, tmp1, sizeof(tmp1),
1174 	    tmp2, sizeof(tmp2));
1175 	WG_DUMP_HASH("out3", out3);
1176 }
1177 
1178 static void __noinline
1179 wg_algo_dh_kdf(uint8_t ckey[static WG_CHAINING_KEY_LEN],
1180     uint8_t cipher_key[WG_CIPHER_KEY_LEN],
1181     const uint8_t local_key[static WG_STATIC_KEY_LEN],
1182     const uint8_t remote_key[static WG_STATIC_KEY_LEN])
1183 {
1184 	uint8_t dhout[WG_DH_OUTPUT_LEN];
1185 
1186 	wg_algo_dh(dhout, local_key, remote_key);
1187 	wg_algo_kdf(ckey, cipher_key, NULL, ckey, dhout, sizeof(dhout));
1188 
1189 	WG_DUMP_HASH("dhout", dhout);
1190 	WG_DUMP_HASH("ckey", ckey);
1191 	if (cipher_key != NULL)
1192 		WG_DUMP_HASH("cipher_key", cipher_key);
1193 }
1194 
1195 static void
1196 wg_algo_aead_enc(uint8_t out[], size_t expected_outsize,
1197     const uint8_t key[static crypto_aead_chacha20poly1305_ietf_KEYBYTES],
1198     const uint64_t counter,
1199     const uint8_t plain[], const size_t plainsize,
1200     const uint8_t auth[], size_t authlen)
1201 {
1202 	uint8_t nonce[(32 + 64) / 8] = {0};
1203 	long long unsigned int outsize;
1204 	int error __diagused;
1205 
1206 	le64enc(&nonce[4], counter);
1207 
1208 	error = crypto_aead_chacha20poly1305_ietf_encrypt(out, &outsize, plain,
1209 	    plainsize, auth, authlen, NULL, nonce, key);
1210 	KASSERT(error == 0);
1211 	KASSERT(outsize == expected_outsize);
1212 }
1213 
1214 static int
1215 wg_algo_aead_dec(uint8_t out[], size_t expected_outsize,
1216     const uint8_t key[static crypto_aead_chacha20poly1305_ietf_KEYBYTES],
1217     const uint64_t counter,
1218     const uint8_t encrypted[], const size_t encryptedsize,
1219     const uint8_t auth[], size_t authlen)
1220 {
1221 	uint8_t nonce[(32 + 64) / 8] = {0};
1222 	long long unsigned int outsize;
1223 	int error;
1224 
1225 	le64enc(&nonce[4], counter);
1226 
1227 	error = crypto_aead_chacha20poly1305_ietf_decrypt(out, &outsize, NULL,
1228 	    encrypted, encryptedsize, auth, authlen, nonce, key);
1229 	if (error == 0)
1230 		KASSERT(outsize == expected_outsize);
1231 	return error;
1232 }
1233 
1234 static void
1235 wg_algo_xaead_enc(uint8_t out[], const size_t expected_outsize,
1236     const uint8_t key[static crypto_aead_xchacha20poly1305_ietf_KEYBYTES],
1237     const uint8_t plain[], const size_t plainsize,
1238     const uint8_t auth[], size_t authlen,
1239     const uint8_t nonce[static WG_SALT_LEN])
1240 {
1241 	long long unsigned int outsize;
1242 	int error __diagused;
1243 
1244 	CTASSERT(WG_SALT_LEN == crypto_aead_xchacha20poly1305_ietf_NPUBBYTES);
1245 	error = crypto_aead_xchacha20poly1305_ietf_encrypt(out, &outsize,
1246 	    plain, plainsize, auth, authlen, NULL, nonce, key);
1247 	KASSERT(error == 0);
1248 	KASSERT(outsize == expected_outsize);
1249 }
1250 
1251 static int
1252 wg_algo_xaead_dec(uint8_t out[], const size_t expected_outsize,
1253     const uint8_t key[static crypto_aead_xchacha20poly1305_ietf_KEYBYTES],
1254     const uint8_t encrypted[], const size_t encryptedsize,
1255     const uint8_t auth[], size_t authlen,
1256     const uint8_t nonce[static WG_SALT_LEN])
1257 {
1258 	long long unsigned int outsize;
1259 	int error;
1260 
1261 	error = crypto_aead_xchacha20poly1305_ietf_decrypt(out, &outsize, NULL,
1262 	    encrypted, encryptedsize, auth, authlen, nonce, key);
1263 	if (error == 0)
1264 		KASSERT(outsize == expected_outsize);
1265 	return error;
1266 }
1267 
1268 static void
1269 wg_algo_tai64n(wg_timestamp_t timestamp)
1270 {
1271 	struct timespec ts;
1272 
1273 	/* FIXME strict TAI64N (https://cr.yp.to/libtai/tai64.html) */
1274 	getnanotime(&ts);
1275 	/* TAI64 label in external TAI64 format */
1276 	be32enc(timestamp, 0x40000000U + (uint32_t)(ts.tv_sec >> 32));
1277 	/* second beginning from 1970 TAI */
1278 	be32enc(timestamp + 4, (uint32_t)(ts.tv_sec & 0xffffffffU));
1279 	/* nanosecond in big-endian format */
1280 	be32enc(timestamp + 8, (uint32_t)ts.tv_nsec);
1281 }
1282 
1283 /*
1284  * wg_get_stable_session(wgp, psref)
1285  *
1286  *	Get a passive reference to the current stable session, or
1287  *	return NULL if there is no current stable session.
1288  *
1289  *	The pointer is always there but the session is not necessarily
1290  *	ESTABLISHED; if it is not ESTABLISHED, return NULL.  However,
1291  *	the session may transition from ESTABLISHED to DESTROYING while
1292  *	holding the passive reference.
1293  */
1294 static struct wg_session *
1295 wg_get_stable_session(struct wg_peer *wgp, struct psref *psref)
1296 {
1297 	int s;
1298 	struct wg_session *wgs;
1299 
1300 	s = pserialize_read_enter();
1301 	wgs = atomic_load_consume(&wgp->wgp_session_stable);
1302 	if (__predict_false(atomic_load_relaxed(&wgs->wgs_state) !=
1303 		WGS_STATE_ESTABLISHED))
1304 		wgs = NULL;
1305 	else
1306 		psref_acquire(psref, &wgs->wgs_psref, wg_psref_class);
1307 	pserialize_read_exit(s);
1308 
1309 	return wgs;
1310 }
1311 
1312 static void
1313 wg_put_session(struct wg_session *wgs, struct psref *psref)
1314 {
1315 
1316 	psref_release(psref, &wgs->wgs_psref, wg_psref_class);
1317 }
1318 
1319 static void
1320 wg_destroy_session(struct wg_softc *wg, struct wg_session *wgs)
1321 {
1322 	struct wg_peer *wgp = wgs->wgs_peer;
1323 	struct wg_session *wgs0 __diagused;
1324 	void *garbage;
1325 
1326 	KASSERT(mutex_owned(wgp->wgp_lock));
1327 	KASSERT(wgs->wgs_state != WGS_STATE_UNKNOWN);
1328 
1329 	/* Remove the session from the table.  */
1330 	wgs0 = thmap_del(wg->wg_sessions_byindex,
1331 	    &wgs->wgs_local_index, sizeof(wgs->wgs_local_index));
1332 	KASSERT(wgs0 == wgs);
1333 	garbage = thmap_stage_gc(wg->wg_sessions_byindex);
1334 
1335 	/* Wait for passive references to drain.  */
1336 	pserialize_perform(wgp->wgp_psz);
1337 	psref_target_destroy(&wgs->wgs_psref, wg_psref_class);
1338 
1339 	/*
1340 	 * Free memory, zero state, and transition to UNKNOWN.  We have
1341 	 * exclusive access to the session now, so there is no need for
1342 	 * an atomic store.
1343 	 */
1344 	thmap_gc(wg->wg_sessions_byindex, garbage);
1345 	WG_DLOG("session[L=%"PRIx32" R=%"PRIx32"] -> WGS_STATE_UNKNOWN\n",
1346 	    wgs->wgs_local_index, wgs->wgs_remote_index);
1347 	wgs->wgs_local_index = 0;
1348 	wgs->wgs_remote_index = 0;
1349 	wg_clear_states(wgs);
1350 	wgs->wgs_state = WGS_STATE_UNKNOWN;
1351 	wgs->wgs_force_rekey = false;
1352 }
1353 
1354 /*
1355  * wg_get_session_index(wg, wgs)
1356  *
1357  *	Choose a session index for wgs->wgs_local_index, and store it
1358  *	in wg's table of sessions by index.
1359  *
1360  *	wgs must be the unstable session of its peer, and must be
1361  *	transitioning out of the UNKNOWN state.
1362  */
1363 static void
1364 wg_get_session_index(struct wg_softc *wg, struct wg_session *wgs)
1365 {
1366 	struct wg_peer *wgp __diagused = wgs->wgs_peer;
1367 	struct wg_session *wgs0;
1368 	uint32_t index;
1369 
1370 	KASSERT(mutex_owned(wgp->wgp_lock));
1371 	KASSERT(wgs == wgp->wgp_session_unstable);
1372 	KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d",
1373 	    wgs->wgs_state);
1374 
1375 	do {
1376 		/* Pick a uniform random index.  */
1377 		index = cprng_strong32();
1378 
1379 		/* Try to take it.  */
1380 		wgs->wgs_local_index = index;
1381 		wgs0 = thmap_put(wg->wg_sessions_byindex,
1382 		    &wgs->wgs_local_index, sizeof wgs->wgs_local_index, wgs);
1383 
1384 		/* If someone else beat us, start over.  */
1385 	} while (__predict_false(wgs0 != wgs));
1386 }
1387 
1388 /*
1389  * wg_put_session_index(wg, wgs)
1390  *
1391  *	Remove wgs from the table of sessions by index, wait for any
1392  *	passive references to drain, and transition the session to the
1393  *	UNKNOWN state.
1394  *
1395  *	wgs must be the unstable session of its peer, and must not be
1396  *	UNKNOWN or ESTABLISHED.
1397  */
1398 static void
1399 wg_put_session_index(struct wg_softc *wg, struct wg_session *wgs)
1400 {
1401 	struct wg_peer *wgp __diagused = wgs->wgs_peer;
1402 
1403 	KASSERT(mutex_owned(wgp->wgp_lock));
1404 	KASSERT(wgs->wgs_state != WGS_STATE_UNKNOWN);
1405 	KASSERT(wgs->wgs_state != WGS_STATE_ESTABLISHED);
1406 
1407 	wg_destroy_session(wg, wgs);
1408 	psref_target_init(&wgs->wgs_psref, wg_psref_class);
1409 }
1410 
1411 /*
1412  * Handshake patterns
1413  *
1414  * [W] 5: "These messages use the "IK" pattern from Noise"
1415  * [N] 7.5. Interactive handshake patterns (fundamental)
1416  *     "The first character refers to the initiator’s static key:"
1417  *     "I = Static key for initiator Immediately transmitted to responder,
1418  *          despite reduced or absent identity hiding"
1419  *     "The second character refers to the responder’s static key:"
1420  *     "K = Static key for responder Known to initiator"
1421  *     "IK:
1422  *        <- s
1423  *        ...
1424  *        -> e, es, s, ss
1425  *        <- e, ee, se"
1426  * [N] 9.4. Pattern modifiers
1427  *     "IKpsk2:
1428  *        <- s
1429  *        ...
1430  *        -> e, es, s, ss
1431  *        <- e, ee, se, psk"
1432  */
1433 static void
1434 wg_fill_msg_init(struct wg_softc *wg, struct wg_peer *wgp,
1435     struct wg_session *wgs, struct wg_msg_init *wgmi)
1436 {
1437 	uint8_t ckey[WG_CHAINING_KEY_LEN]; /* [W] 5.4.2: Ci */
1438 	uint8_t hash[WG_HASH_LEN]; /* [W] 5.4.2: Hi */
1439 	uint8_t cipher_key[WG_CIPHER_KEY_LEN];
1440 	uint8_t pubkey[WG_EPHEMERAL_KEY_LEN];
1441 	uint8_t privkey[WG_EPHEMERAL_KEY_LEN];
1442 
1443 	KASSERT(mutex_owned(wgp->wgp_lock));
1444 	KASSERT(wgs == wgp->wgp_session_unstable);
1445 	KASSERTMSG(wgs->wgs_state == WGS_STATE_INIT_ACTIVE, "state=%d",
1446 	    wgs->wgs_state);
1447 
1448 	wgmi->wgmi_type = htole32(WG_MSG_TYPE_INIT);
1449 	wgmi->wgmi_sender = wgs->wgs_local_index;
1450 
1451 	/* [W] 5.4.2: First Message: Initiator to Responder */
1452 
1453 	/* Ci := HASH(CONSTRUCTION) */
1454 	/* Hi := HASH(Ci || IDENTIFIER) */
1455 	wg_init_key_and_hash(ckey, hash);
1456 	/* Hi := HASH(Hi || Sr^pub) */
1457 	wg_algo_hash(hash, wgp->wgp_pubkey, sizeof(wgp->wgp_pubkey));
1458 
1459 	WG_DUMP_HASH("hash", hash);
1460 
1461 	/* [N] 2.2: "e" */
1462 	/* Ei^priv, Ei^pub := DH-GENERATE() */
1463 	wg_algo_generate_keypair(pubkey, privkey);
1464 	/* Ci := KDF1(Ci, Ei^pub) */
1465 	wg_algo_kdf(ckey, NULL, NULL, ckey, pubkey, sizeof(pubkey));
1466 	/* msg.ephemeral := Ei^pub */
1467 	memcpy(wgmi->wgmi_ephemeral, pubkey, sizeof(wgmi->wgmi_ephemeral));
1468 	/* Hi := HASH(Hi || msg.ephemeral) */
1469 	wg_algo_hash(hash, pubkey, sizeof(pubkey));
1470 
1471 	WG_DUMP_HASH("ckey", ckey);
1472 	WG_DUMP_HASH("hash", hash);
1473 
1474 	/* [N] 2.2: "es" */
1475 	/* Ci, k := KDF2(Ci, DH(Ei^priv, Sr^pub)) */
1476 	wg_algo_dh_kdf(ckey, cipher_key, privkey, wgp->wgp_pubkey);
1477 
1478 	/* [N] 2.2: "s" */
1479 	/* msg.static := AEAD(k, 0, Si^pub, Hi) */
1480 	wg_algo_aead_enc(wgmi->wgmi_static, sizeof(wgmi->wgmi_static),
1481 	    cipher_key, 0, wg->wg_pubkey, sizeof(wg->wg_pubkey),
1482 	    hash, sizeof(hash));
1483 	/* Hi := HASH(Hi || msg.static) */
1484 	wg_algo_hash(hash, wgmi->wgmi_static, sizeof(wgmi->wgmi_static));
1485 
1486 	WG_DUMP_HASH48("wgmi_static", wgmi->wgmi_static);
1487 
1488 	/* [N] 2.2: "ss" */
1489 	/* Ci, k := KDF2(Ci, DH(Si^priv, Sr^pub)) */
1490 	wg_algo_dh_kdf(ckey, cipher_key, wg->wg_privkey, wgp->wgp_pubkey);
1491 
1492 	/* msg.timestamp := AEAD(k, TIMESTAMP(), Hi) */
1493 	wg_timestamp_t timestamp;
1494 	wg_algo_tai64n(timestamp);
1495 	wg_algo_aead_enc(wgmi->wgmi_timestamp, sizeof(wgmi->wgmi_timestamp),
1496 	    cipher_key, 0, timestamp, sizeof(timestamp), hash, sizeof(hash));
1497 	/* Hi := HASH(Hi || msg.timestamp) */
1498 	wg_algo_hash(hash, wgmi->wgmi_timestamp, sizeof(wgmi->wgmi_timestamp));
1499 
1500 	/* [W] 5.4.4 Cookie MACs */
1501 	wg_algo_mac_mac1(wgmi->wgmi_mac1, sizeof(wgmi->wgmi_mac1),
1502 	    wgp->wgp_pubkey, sizeof(wgp->wgp_pubkey),
1503 	    (const uint8_t *)wgmi, offsetof(struct wg_msg_init, wgmi_mac1));
1504 	/* Need mac1 to decrypt a cookie from a cookie message */
1505 	memcpy(wgp->wgp_last_sent_mac1, wgmi->wgmi_mac1,
1506 	    sizeof(wgp->wgp_last_sent_mac1));
1507 	wgp->wgp_last_sent_mac1_valid = true;
1508 
1509 	if (wgp->wgp_latest_cookie_time == 0 ||
1510 	    (time_uptime - wgp->wgp_latest_cookie_time) >= WG_COOKIE_TIME)
1511 		memset(wgmi->wgmi_mac2, 0, sizeof(wgmi->wgmi_mac2));
1512 	else {
1513 		wg_algo_mac(wgmi->wgmi_mac2, sizeof(wgmi->wgmi_mac2),
1514 		    wgp->wgp_latest_cookie, WG_COOKIE_LEN,
1515 		    (const uint8_t *)wgmi,
1516 		    offsetof(struct wg_msg_init, wgmi_mac2),
1517 		    NULL, 0);
1518 	}
1519 
1520 	memcpy(wgs->wgs_ephemeral_key_pub, pubkey, sizeof(pubkey));
1521 	memcpy(wgs->wgs_ephemeral_key_priv, privkey, sizeof(privkey));
1522 	memcpy(wgs->wgs_handshake_hash, hash, sizeof(hash));
1523 	memcpy(wgs->wgs_chaining_key, ckey, sizeof(ckey));
1524 	WG_DLOG("%s: sender=%x\n", __func__, wgs->wgs_local_index);
1525 }
1526 
1527 /*
1528  * wg_initiator_priority(wg, wgp)
1529  *
1530  *	Return true if we claim priority over peer wgp as initiator at
1531  *	the moment, false if not.  That is, if we and our peer are
1532  *	trying to initiate a session, do we ignore the peer's attempt
1533  *	and barge ahead with ours, or discard our attempt and accept
1534  *	the peer's?
1535  *
1536  *	We jointly flip a coin by computing
1537  *
1538  *		H(pubkey A) ^ H(pubkey B) ^ H(posix minutes as le64),
1539  *
1540  *	and taking the low-order bit.  If our public key hash, as a
1541  *	256-bit integer in little-endian, is less than the peer's
1542  *	public key hash, also as a 256-bit integer in little-endian, we
1543  *	claim priority iff the bit is 0; otherwise we claim priority
1544  *	iff the bit is 1.
1545  *
1546  *	This way, it is essentially arbitrary who claims priority, and
1547  *	it may change (by a coin toss) minute to minute, but both
1548  *	parties agree at any given moment -- except possibly at the
1549  *	boundary of a minute -- who will take priority.
1550  *
1551  *	This is an extension to the WireGuard protocol -- as far as I
1552  *	can tell, the protocol whitepaper has no resolution to this
1553  *	deadlock scenario.  According to the author, `the deadlock
1554  *	doesn't happen because of some additional state machine logic,
1555  *	and on very small chances that it does, it quickly undoes
1556  *	itself.', but this additional state machine logic does not
1557  *	appear to be anywhere in the whitepaper, and I don't see how it
1558  *	can undo itself until both sides have given up and one side is
1559  *	quicker to initiate the next time around.
1560  *
1561  *	XXX It might be prudent to put a prefix in the hash input, so
1562  *	we avoid accidentally colliding with any other uses of the same
1563  *	hash on the same input.  But it's best if any changes are
1564  *	coordinated, so that peers generally agree on what coin is
1565  *	being tossed, instead of tossing their own independent coins
1566  *	(which will also converge to working but more slowly over more
1567  *	handshake retries).
1568  */
1569 static bool
1570 wg_initiator_priority(struct wg_softc *wg, struct wg_peer *wgp)
1571 {
1572 	const uint64_t now = time_second/60, now_le = htole64(now);
1573 	uint8_t h_min;
1574 	uint8_t h_local[BLAKE2S_MAX_DIGEST];
1575 	uint8_t h_peer[BLAKE2S_MAX_DIGEST];
1576 	int borrow;
1577 	unsigned i;
1578 
1579 	blake2s(&h_min, 1, NULL, 0, &now_le, sizeof(now_le));
1580 	blake2s(h_local, sizeof(h_local), NULL, 0,
1581 	    wg->wg_pubkey, sizeof(wg->wg_pubkey));
1582 	blake2s(h_peer, sizeof(h_peer), NULL, 0,
1583 	    wgp->wgp_pubkey, sizeof(wgp->wgp_pubkey));
1584 
1585 	for (borrow = 0, i = 0; i < BLAKE2S_MAX_DIGEST; i++)
1586 		borrow = (h_local[i] - h_peer[i] + borrow) >> 8;
1587 
1588 	return 1 & (h_local[0] ^ h_peer[0] ^ h_min ^ borrow);
1589 }
1590 
1591 static void __noinline
1592 wg_handle_msg_init(struct wg_softc *wg, const struct wg_msg_init *wgmi,
1593     const struct sockaddr *src)
1594 {
1595 	uint8_t ckey[WG_CHAINING_KEY_LEN]; /* [W] 5.4.2: Ci */
1596 	uint8_t hash[WG_HASH_LEN]; /* [W] 5.4.2: Hi */
1597 	uint8_t cipher_key[WG_CIPHER_KEY_LEN];
1598 	uint8_t peer_pubkey[WG_STATIC_KEY_LEN];
1599 	struct wg_peer *wgp;
1600 	struct wg_session *wgs;
1601 	int error, ret;
1602 	struct psref psref_peer;
1603 	uint8_t mac1[WG_MAC_LEN];
1604 
1605 	WG_TRACE("init msg received");
1606 
1607 	wg_algo_mac_mac1(mac1, sizeof(mac1),
1608 	    wg->wg_pubkey, sizeof(wg->wg_pubkey),
1609 	    (const uint8_t *)wgmi, offsetof(struct wg_msg_init, wgmi_mac1));
1610 
1611 	/*
1612 	 * [W] 5.3: Denial of Service Mitigation & Cookies
1613 	 * "the responder, ..., must always reject messages with an invalid
1614 	 *  msg.mac1"
1615 	 */
1616 	if (!consttime_memequal(mac1, wgmi->wgmi_mac1, sizeof(mac1))) {
1617 		WG_DLOG("mac1 is invalid\n");
1618 		return;
1619 	}
1620 
1621 	/*
1622 	 * [W] 5.4.2: First Message: Initiator to Responder
1623 	 * "When the responder receives this message, it does the same
1624 	 *  operations so that its final state variables are identical,
1625 	 *  replacing the operands of the DH function to produce equivalent
1626 	 *  values."
1627 	 *  Note that the following comments of operations are just copies of
1628 	 *  the initiator's ones.
1629 	 */
1630 
1631 	/* Ci := HASH(CONSTRUCTION) */
1632 	/* Hi := HASH(Ci || IDENTIFIER) */
1633 	wg_init_key_and_hash(ckey, hash);
1634 	/* Hi := HASH(Hi || Sr^pub) */
1635 	wg_algo_hash(hash, wg->wg_pubkey, sizeof(wg->wg_pubkey));
1636 
1637 	/* [N] 2.2: "e" */
1638 	/* Ci := KDF1(Ci, Ei^pub) */
1639 	wg_algo_kdf(ckey, NULL, NULL, ckey, wgmi->wgmi_ephemeral,
1640 	    sizeof(wgmi->wgmi_ephemeral));
1641 	/* Hi := HASH(Hi || msg.ephemeral) */
1642 	wg_algo_hash(hash, wgmi->wgmi_ephemeral, sizeof(wgmi->wgmi_ephemeral));
1643 
1644 	WG_DUMP_HASH("ckey", ckey);
1645 
1646 	/* [N] 2.2: "es" */
1647 	/* Ci, k := KDF2(Ci, DH(Ei^priv, Sr^pub)) */
1648 	wg_algo_dh_kdf(ckey, cipher_key, wg->wg_privkey, wgmi->wgmi_ephemeral);
1649 
1650 	WG_DUMP_HASH48("wgmi_static", wgmi->wgmi_static);
1651 
1652 	/* [N] 2.2: "s" */
1653 	/* msg.static := AEAD(k, 0, Si^pub, Hi) */
1654 	error = wg_algo_aead_dec(peer_pubkey, WG_STATIC_KEY_LEN, cipher_key, 0,
1655 	    wgmi->wgmi_static, sizeof(wgmi->wgmi_static), hash, sizeof(hash));
1656 	if (error != 0) {
1657 		WG_LOG_RATECHECK(&wg->wg_ppsratecheck, LOG_DEBUG,
1658 		    "%s: wg_algo_aead_dec for secret key failed\n",
1659 		    if_name(&wg->wg_if));
1660 		return;
1661 	}
1662 	/* Hi := HASH(Hi || msg.static) */
1663 	wg_algo_hash(hash, wgmi->wgmi_static, sizeof(wgmi->wgmi_static));
1664 
1665 	wgp = wg_lookup_peer_by_pubkey(wg, peer_pubkey, &psref_peer);
1666 	if (wgp == NULL) {
1667 		WG_DLOG("peer not found\n");
1668 		return;
1669 	}
1670 
1671 	/*
1672 	 * Lock the peer to serialize access to cookie state.
1673 	 *
1674 	 * XXX Can we safely avoid holding the lock across DH?  Take it
1675 	 * just to verify mac2 and then unlock/DH/lock?
1676 	 */
1677 	mutex_enter(wgp->wgp_lock);
1678 
1679 	if (__predict_false(wg_is_underload(wg, wgp, WG_MSG_TYPE_INIT))) {
1680 		WG_TRACE("under load");
1681 		/*
1682 		 * [W] 5.3: Denial of Service Mitigation & Cookies
1683 		 * "the responder, ..., and when under load may reject messages
1684 		 *  with an invalid msg.mac2.  If the responder receives a
1685 		 *  message with a valid msg.mac1 yet with an invalid msg.mac2,
1686 		 *  and is under load, it may respond with a cookie reply
1687 		 *  message"
1688 		 */
1689 		uint8_t zero[WG_MAC_LEN] = {0};
1690 		if (consttime_memequal(wgmi->wgmi_mac2, zero, sizeof(zero))) {
1691 			WG_TRACE("sending a cookie message: no cookie included");
1692 			wg_send_cookie_msg(wg, wgp, wgmi->wgmi_sender,
1693 			    wgmi->wgmi_mac1, src);
1694 			goto out;
1695 		}
1696 		if (!wgp->wgp_last_sent_cookie_valid) {
1697 			WG_TRACE("sending a cookie message: no cookie sent ever");
1698 			wg_send_cookie_msg(wg, wgp, wgmi->wgmi_sender,
1699 			    wgmi->wgmi_mac1, src);
1700 			goto out;
1701 		}
1702 		uint8_t mac2[WG_MAC_LEN];
1703 		wg_algo_mac(mac2, sizeof(mac2), wgp->wgp_last_sent_cookie,
1704 		    WG_COOKIE_LEN, (const uint8_t *)wgmi,
1705 		    offsetof(struct wg_msg_init, wgmi_mac2), NULL, 0);
1706 		if (!consttime_memequal(mac2, wgmi->wgmi_mac2, sizeof(mac2))) {
1707 			WG_DLOG("mac2 is invalid\n");
1708 			goto out;
1709 		}
1710 		WG_TRACE("under load, but continue to sending");
1711 	}
1712 
1713 	/* [N] 2.2: "ss" */
1714 	/* Ci, k := KDF2(Ci, DH(Si^priv, Sr^pub)) */
1715 	wg_algo_dh_kdf(ckey, cipher_key, wg->wg_privkey, wgp->wgp_pubkey);
1716 
1717 	/* msg.timestamp := AEAD(k, TIMESTAMP(), Hi) */
1718 	wg_timestamp_t timestamp;
1719 	error = wg_algo_aead_dec(timestamp, sizeof(timestamp), cipher_key, 0,
1720 	    wgmi->wgmi_timestamp, sizeof(wgmi->wgmi_timestamp),
1721 	    hash, sizeof(hash));
1722 	if (error != 0) {
1723 		WG_LOG_RATECHECK(&wgp->wgp_ppsratecheck, LOG_DEBUG,
1724 		    "%s: peer %s: wg_algo_aead_dec for timestamp failed\n",
1725 		    if_name(&wg->wg_if), wgp->wgp_name);
1726 		goto out;
1727 	}
1728 	/* Hi := HASH(Hi || msg.timestamp) */
1729 	wg_algo_hash(hash, wgmi->wgmi_timestamp, sizeof(wgmi->wgmi_timestamp));
1730 
1731 	/*
1732 	 * [W] 5.1 "The responder keeps track of the greatest timestamp
1733 	 *      received per peer and discards packets containing
1734 	 *      timestamps less than or equal to it."
1735 	 */
1736 	ret = memcmp(timestamp, wgp->wgp_timestamp_latest_init,
1737 	    sizeof(timestamp));
1738 	if (ret <= 0) {
1739 		WG_LOG_RATECHECK(&wgp->wgp_ppsratecheck, LOG_DEBUG,
1740 		    "%s: peer %s: invalid init msg: timestamp is old\n",
1741 		    if_name(&wg->wg_if), wgp->wgp_name);
1742 		goto out;
1743 	}
1744 	memcpy(wgp->wgp_timestamp_latest_init, timestamp, sizeof(timestamp));
1745 
1746 	/*
1747 	 * Message is good -- we're committing to handle it now, unless
1748 	 * we were already initiating a session.
1749 	 */
1750 	wgs = wgp->wgp_session_unstable;
1751 	switch (wgs->wgs_state) {
1752 	case WGS_STATE_UNKNOWN:		/* new session initiated by peer */
1753 		break;
1754 	case WGS_STATE_INIT_ACTIVE:	/* we're already initiating */
1755 		if (wg_initiator_priority(wg, wgp)) {
1756 			WG_TRACE("Session already initializing,"
1757 			    " ignoring the message");
1758 			goto out;
1759 		}
1760 		WG_TRACE("Yielding session initiation to peer");
1761 		wg_put_session_index(wg, wgs);
1762 		KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d",
1763 		    wgs->wgs_state);
1764 		break;
1765 	case WGS_STATE_INIT_PASSIVE:	/* peer is retrying, start over */
1766 		WG_TRACE("Session already initializing, destroying old states");
1767 		/*
1768 		 * XXX Avoid this -- just resend our response -- if the
1769 		 * INIT message is identical to the previous one.
1770 		 */
1771 		wg_put_session_index(wg, wgs);
1772 		KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d",
1773 		    wgs->wgs_state);
1774 		break;
1775 	case WGS_STATE_ESTABLISHED:	/* can't happen */
1776 		panic("unstable session can't be established");
1777 	case WGS_STATE_DESTROYING:	/* rekey initiated by peer */
1778 		WG_TRACE("Session destroying, but force to clear");
1779 		wg_put_session_index(wg, wgs);
1780 		KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d",
1781 		    wgs->wgs_state);
1782 		break;
1783 	default:
1784 		panic("invalid session state: %d", wgs->wgs_state);
1785 	}
1786 
1787 	/*
1788 	 * Assign a fresh session index.
1789 	 */
1790 	KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d",
1791 	    wgs->wgs_state);
1792 	wg_get_session_index(wg, wgs);
1793 
1794 	memcpy(wgs->wgs_handshake_hash, hash, sizeof(hash));
1795 	memcpy(wgs->wgs_chaining_key, ckey, sizeof(ckey));
1796 	memcpy(wgs->wgs_ephemeral_key_peer, wgmi->wgmi_ephemeral,
1797 	    sizeof(wgmi->wgmi_ephemeral));
1798 
1799 	/*
1800 	 * The packet is genuine.  Update the peer's endpoint if the
1801 	 * source address changed.
1802 	 *
1803 	 * XXX How to prevent DoS by replaying genuine packets from the
1804 	 * wrong source address?
1805 	 */
1806 	wg_update_endpoint_if_necessary(wgp, src);
1807 
1808 	/*
1809 	 * Even though we don't transition from INIT_PASSIVE to
1810 	 * ESTABLISHED until we receive the first data packet from the
1811 	 * initiator, we count the time of the INIT message as the time
1812 	 * of establishment -- this is used to decide when to erase
1813 	 * keys, and we want to start counting as soon as we have
1814 	 * generated keys.
1815 	 */
1816 	wgs->wgs_time_established = time_uptime32;
1817 	wg_schedule_session_dtor_timer(wgp);
1818 
1819 	/*
1820 	 * Respond to the initiator with our ephemeral public key.
1821 	 */
1822 	wg_send_handshake_msg_resp(wg, wgp, wgs, wgmi);
1823 
1824 	WG_DLOG("session[L=%"PRIx32" R=%"PRIx32"]:"
1825 	    " calculate keys as responder\n",
1826 	    wgs->wgs_local_index, wgs->wgs_remote_index);
1827 	wg_calculate_keys(wgs, false);
1828 	wg_clear_states(wgs);
1829 
1830 	/*
1831 	 * Session is ready to receive data now that we have received
1832 	 * the peer initiator's ephemeral key pair, generated our
1833 	 * responder's ephemeral key pair, and derived a session key.
1834 	 *
1835 	 * Transition from UNKNOWN to INIT_PASSIVE to publish it to the
1836 	 * data rx path, wg_handle_msg_data, where the
1837 	 * atomic_load_acquire matching this atomic_store_release
1838 	 * happens.
1839 	 *
1840 	 * (Session is not, however, ready to send data until the peer
1841 	 * has acknowledged our response by sending its first data
1842 	 * packet.  So don't swap the sessions yet.)
1843 	 */
1844 	WG_DLOG("session[L=%"PRIx32" R=%"PRIx32"] -> WGS_STATE_INIT_PASSIVE\n",
1845 	    wgs->wgs_local_index, wgs->wgs_remote_index);
1846 	atomic_store_release(&wgs->wgs_state, WGS_STATE_INIT_PASSIVE);
1847 	WG_TRACE("WGS_STATE_INIT_PASSIVE");
1848 
1849 out:
1850 	mutex_exit(wgp->wgp_lock);
1851 	wg_put_peer(wgp, &psref_peer);
1852 }
1853 
1854 static struct socket *
1855 wg_get_so_by_af(struct wg_softc *wg, const int af)
1856 {
1857 
1858 	switch (af) {
1859 #ifdef INET
1860 	case AF_INET:
1861 		return wg->wg_so4;
1862 #endif
1863 #ifdef INET6
1864 	case AF_INET6:
1865 		return wg->wg_so6;
1866 #endif
1867 	default:
1868 		panic("wg: no such af: %d", af);
1869 	}
1870 }
1871 
1872 static struct socket *
1873 wg_get_so_by_peer(struct wg_peer *wgp, struct wg_sockaddr *wgsa)
1874 {
1875 
1876 	return wg_get_so_by_af(wgp->wgp_sc, wgsa_family(wgsa));
1877 }
1878 
1879 static struct wg_sockaddr *
1880 wg_get_endpoint_sa(struct wg_peer *wgp, struct psref *psref)
1881 {
1882 	struct wg_sockaddr *wgsa;
1883 	int s;
1884 
1885 	s = pserialize_read_enter();
1886 	wgsa = atomic_load_consume(&wgp->wgp_endpoint);
1887 	psref_acquire(psref, &wgsa->wgsa_psref, wg_psref_class);
1888 	pserialize_read_exit(s);
1889 
1890 	return wgsa;
1891 }
1892 
1893 static void
1894 wg_put_sa(struct wg_peer *wgp, struct wg_sockaddr *wgsa, struct psref *psref)
1895 {
1896 
1897 	psref_release(psref, &wgsa->wgsa_psref, wg_psref_class);
1898 }
1899 
1900 static int
1901 wg_send_so(struct wg_peer *wgp, struct mbuf *m)
1902 {
1903 	int error;
1904 	struct socket *so;
1905 	struct psref psref;
1906 	struct wg_sockaddr *wgsa;
1907 
1908 	wgsa = wg_get_endpoint_sa(wgp, &psref);
1909 	so = wg_get_so_by_peer(wgp, wgsa);
1910 	error = sosend(so, wgsatosa(wgsa), NULL, m, NULL, 0, curlwp);
1911 	wg_put_sa(wgp, wgsa, &psref);
1912 
1913 	return error;
1914 }
1915 
1916 static void
1917 wg_send_handshake_msg_init(struct wg_softc *wg, struct wg_peer *wgp)
1918 {
1919 	int error;
1920 	struct mbuf *m;
1921 	struct wg_msg_init *wgmi;
1922 	struct wg_session *wgs;
1923 
1924 	KASSERT(mutex_owned(wgp->wgp_lock));
1925 
1926 	wgs = wgp->wgp_session_unstable;
1927 	/* XXX pull dispatch out into wg_task_send_init_message */
1928 	switch (wgs->wgs_state) {
1929 	case WGS_STATE_UNKNOWN:		/* new session initiated by us */
1930 		break;
1931 	case WGS_STATE_INIT_ACTIVE:	/* we're already initiating, stop */
1932 		WG_TRACE("Session already initializing, skip starting new one");
1933 		return;
1934 	case WGS_STATE_INIT_PASSIVE:	/* peer was trying -- XXX what now? */
1935 		WG_TRACE("Session already initializing, waiting for peer");
1936 		return;
1937 	case WGS_STATE_ESTABLISHED:	/* can't happen */
1938 		panic("unstable session can't be established");
1939 	case WGS_STATE_DESTROYING:	/* rekey initiated by us too early */
1940 		WG_TRACE("Session destroying");
1941 		wg_put_session_index(wg, wgs);
1942 		KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d",
1943 		    wgs->wgs_state);
1944 		break;
1945 	}
1946 
1947 	/*
1948 	 * Assign a fresh session index.
1949 	 */
1950 	KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d",
1951 	    wgs->wgs_state);
1952 	wg_get_session_index(wg, wgs);
1953 
1954 	/*
1955 	 * We have initiated a session.  Transition to INIT_ACTIVE.
1956 	 * This doesn't publish it for use in the data rx path,
1957 	 * wg_handle_msg_data, or in the data tx path, wg_output -- we
1958 	 * have to wait for the peer to respond with their ephemeral
1959 	 * public key before we can derive a session key for tx/rx.
1960 	 * Hence only atomic_store_relaxed.
1961 	 */
1962 	WG_DLOG("session[L=%"PRIx32" R=(unknown)] -> WGS_STATE_INIT_ACTIVE\n",
1963 	    wgs->wgs_local_index);
1964 	atomic_store_relaxed(&wgs->wgs_state, WGS_STATE_INIT_ACTIVE);
1965 
1966 	m = m_gethdr(M_WAIT, MT_DATA);
1967 	if (sizeof(*wgmi) > MHLEN) {
1968 		m_clget(m, M_WAIT);
1969 		CTASSERT(sizeof(*wgmi) <= MCLBYTES);
1970 	}
1971 	m->m_pkthdr.len = m->m_len = sizeof(*wgmi);
1972 	wgmi = mtod(m, struct wg_msg_init *);
1973 	wg_fill_msg_init(wg, wgp, wgs, wgmi);
1974 
1975 	error = wg->wg_ops->send_hs_msg(wgp, m); /* consumes m */
1976 	if (error) {
1977 		/*
1978 		 * Sending out an initiation packet failed; give up on
1979 		 * this session and toss packet waiting for it if any.
1980 		 *
1981 		 * XXX Why don't we just let the periodic handshake
1982 		 * retry logic work in this case?
1983 		 */
1984 		WG_DLOG("send_hs_msg failed, error=%d\n", error);
1985 		wg_put_session_index(wg, wgs);
1986 		m = atomic_swap_ptr(&wgp->wgp_pending, NULL);
1987 		membar_acquire(); /* matches membar_release in wgintr */
1988 		m_freem(m);
1989 		return;
1990 	}
1991 
1992 	WG_TRACE("init msg sent");
1993 	if (wgp->wgp_handshake_start_time == 0)
1994 		wgp->wgp_handshake_start_time = time_uptime;
1995 	callout_schedule(&wgp->wgp_handshake_timeout_timer,
1996 	    MIN(wg_rekey_timeout, (unsigned)(INT_MAX / hz)) * hz);
1997 }
1998 
1999 static void
2000 wg_fill_msg_resp(struct wg_softc *wg, struct wg_peer *wgp,
2001     struct wg_session *wgs, struct wg_msg_resp *wgmr,
2002     const struct wg_msg_init *wgmi)
2003 {
2004 	uint8_t ckey[WG_CHAINING_KEY_LEN]; /* [W] 5.4.3: Cr */
2005 	uint8_t hash[WG_HASH_LEN]; /* [W] 5.4.3: Hr */
2006 	uint8_t cipher_key[WG_KDF_OUTPUT_LEN];
2007 	uint8_t pubkey[WG_EPHEMERAL_KEY_LEN];
2008 	uint8_t privkey[WG_EPHEMERAL_KEY_LEN];
2009 
2010 	KASSERT(mutex_owned(wgp->wgp_lock));
2011 	KASSERT(wgs == wgp->wgp_session_unstable);
2012 	KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d",
2013 	    wgs->wgs_state);
2014 
2015 	memcpy(hash, wgs->wgs_handshake_hash, sizeof(hash));
2016 	memcpy(ckey, wgs->wgs_chaining_key, sizeof(ckey));
2017 
2018 	wgmr->wgmr_type = htole32(WG_MSG_TYPE_RESP);
2019 	wgmr->wgmr_sender = wgs->wgs_local_index;
2020 	wgmr->wgmr_receiver = wgmi->wgmi_sender;
2021 
2022 	/* [W] 5.4.3 Second Message: Responder to Initiator */
2023 
2024 	/* [N] 2.2: "e" */
2025 	/* Er^priv, Er^pub := DH-GENERATE() */
2026 	wg_algo_generate_keypair(pubkey, privkey);
2027 	/* Cr := KDF1(Cr, Er^pub) */
2028 	wg_algo_kdf(ckey, NULL, NULL, ckey, pubkey, sizeof(pubkey));
2029 	/* msg.ephemeral := Er^pub */
2030 	memcpy(wgmr->wgmr_ephemeral, pubkey, sizeof(wgmr->wgmr_ephemeral));
2031 	/* Hr := HASH(Hr || msg.ephemeral) */
2032 	wg_algo_hash(hash, pubkey, sizeof(pubkey));
2033 
2034 	WG_DUMP_HASH("ckey", ckey);
2035 	WG_DUMP_HASH("hash", hash);
2036 
2037 	/* [N] 2.2: "ee" */
2038 	/* Cr := KDF1(Cr, DH(Er^priv, Ei^pub)) */
2039 	wg_algo_dh_kdf(ckey, NULL, privkey, wgs->wgs_ephemeral_key_peer);
2040 
2041 	/* [N] 2.2: "se" */
2042 	/* Cr := KDF1(Cr, DH(Er^priv, Si^pub)) */
2043 	wg_algo_dh_kdf(ckey, NULL, privkey, wgp->wgp_pubkey);
2044 
2045 	/* [N] 9.2: "psk" */
2046     {
2047 	uint8_t kdfout[WG_KDF_OUTPUT_LEN];
2048 	/* Cr, r, k := KDF3(Cr, Q) */
2049 	wg_algo_kdf(ckey, kdfout, cipher_key, ckey, wgp->wgp_psk,
2050 	    sizeof(wgp->wgp_psk));
2051 	/* Hr := HASH(Hr || r) */
2052 	wg_algo_hash(hash, kdfout, sizeof(kdfout));
2053     }
2054 
2055 	/* msg.empty := AEAD(k, 0, e, Hr) */
2056 	wg_algo_aead_enc(wgmr->wgmr_empty, sizeof(wgmr->wgmr_empty),
2057 	    cipher_key, 0, NULL, 0, hash, sizeof(hash));
2058 	/* Hr := HASH(Hr || msg.empty) */
2059 	wg_algo_hash(hash, wgmr->wgmr_empty, sizeof(wgmr->wgmr_empty));
2060 
2061 	WG_DUMP_HASH("wgmr_empty", wgmr->wgmr_empty);
2062 
2063 	/* [W] 5.4.4: Cookie MACs */
2064 	/* msg.mac1 := MAC(HASH(LABEL-MAC1 || Sm'^pub), msg_a) */
2065 	wg_algo_mac_mac1(wgmr->wgmr_mac1, sizeof(wgmi->wgmi_mac1),
2066 	    wgp->wgp_pubkey, sizeof(wgp->wgp_pubkey),
2067 	    (const uint8_t *)wgmr, offsetof(struct wg_msg_resp, wgmr_mac1));
2068 	/* Need mac1 to decrypt a cookie from a cookie message */
2069 	memcpy(wgp->wgp_last_sent_mac1, wgmr->wgmr_mac1,
2070 	    sizeof(wgp->wgp_last_sent_mac1));
2071 	wgp->wgp_last_sent_mac1_valid = true;
2072 
2073 	if (wgp->wgp_latest_cookie_time == 0 ||
2074 	    (time_uptime - wgp->wgp_latest_cookie_time) >= WG_COOKIE_TIME)
2075 		/* msg.mac2 := 0^16 */
2076 		memset(wgmr->wgmr_mac2, 0, sizeof(wgmr->wgmr_mac2));
2077 	else {
2078 		/* msg.mac2 := MAC(Lm, msg_b) */
2079 		wg_algo_mac(wgmr->wgmr_mac2, sizeof(wgmi->wgmi_mac2),
2080 		    wgp->wgp_latest_cookie, WG_COOKIE_LEN,
2081 		    (const uint8_t *)wgmr,
2082 		    offsetof(struct wg_msg_resp, wgmr_mac2),
2083 		    NULL, 0);
2084 	}
2085 
2086 	memcpy(wgs->wgs_handshake_hash, hash, sizeof(hash));
2087 	memcpy(wgs->wgs_chaining_key, ckey, sizeof(ckey));
2088 	memcpy(wgs->wgs_ephemeral_key_pub, pubkey, sizeof(pubkey));
2089 	memcpy(wgs->wgs_ephemeral_key_priv, privkey, sizeof(privkey));
2090 	wgs->wgs_remote_index = wgmi->wgmi_sender;
2091 	WG_DLOG("sender=%x\n", wgs->wgs_local_index);
2092 	WG_DLOG("receiver=%x\n", wgs->wgs_remote_index);
2093 }
2094 
2095 /*
2096  * wg_swap_sessions(wg, wgp)
2097  *
2098  *	Caller has just finished establishing the unstable session in
2099  *	wg for peer wgp.  Publish it as the stable session, send queued
2100  *	packets or keepalives as necessary to kick off the session,
2101  *	move the previously stable session to unstable, and begin
2102  *	destroying it.
2103  */
2104 static void
2105 wg_swap_sessions(struct wg_softc *wg, struct wg_peer *wgp)
2106 {
2107 	struct wg_session *wgs, *wgs_prev;
2108 	struct mbuf *m;
2109 
2110 	KASSERT(mutex_owned(wgp->wgp_lock));
2111 
2112 	/*
2113 	 * Get the newly established session, to become the new
2114 	 * session.  Caller must have transitioned from INIT_ACTIVE to
2115 	 * INIT_PASSIVE or to ESTABLISHED already.  This will become
2116 	 * the stable session.
2117 	 */
2118 	wgs = wgp->wgp_session_unstable;
2119 	KASSERTMSG(wgs->wgs_state == WGS_STATE_ESTABLISHED, "state=%d",
2120 	    wgs->wgs_state);
2121 
2122 	/*
2123 	 * Get the stable session, which is either the previously
2124 	 * established session in the ESTABLISHED state, or has not
2125 	 * been established at all and is UNKNOWN.  This will become
2126 	 * the unstable session.
2127 	 */
2128 	wgs_prev = wgp->wgp_session_stable;
2129 	KASSERTMSG((wgs_prev->wgs_state == WGS_STATE_ESTABLISHED ||
2130 		wgs_prev->wgs_state == WGS_STATE_UNKNOWN),
2131 	    "state=%d", wgs_prev->wgs_state);
2132 
2133 	/*
2134 	 * Publish the newly established session for the tx path to use
2135 	 * and make the other one the unstable session to handle
2136 	 * stragglers in the rx path and later be used for the next
2137 	 * session's handshake.
2138 	 */
2139 	atomic_store_release(&wgp->wgp_session_stable, wgs);
2140 	wgp->wgp_session_unstable = wgs_prev;
2141 
2142 	/*
2143 	 * Record the handshake time and reset the handshake state.
2144 	 */
2145 	getnanotime(&wgp->wgp_last_handshake_time);
2146 	wgp->wgp_handshake_start_time = 0;
2147 	wgp->wgp_last_sent_mac1_valid = false;
2148 	wgp->wgp_last_sent_cookie_valid = false;
2149 
2150 	/*
2151 	 * If we had a data packet queued up, send it.
2152 	 *
2153 	 * If not, but we're the initiator, send a keepalive message --
2154 	 * if we're the initiator we have to send something immediately
2155 	 * or else the responder will never answer.
2156 	 */
2157 	if ((m = atomic_swap_ptr(&wgp->wgp_pending, NULL)) != NULL) {
2158 		membar_acquire(); /* matches membar_release in wgintr */
2159 		wg_send_data_msg(wgp, wgs, m); /* consumes m */
2160 		m = NULL;
2161 	} else if (wgs->wgs_is_initiator) {
2162 		wg_send_keepalive_msg(wgp, wgs);
2163 	}
2164 
2165 	/*
2166 	 * If the previous stable session was established, begin to
2167 	 * destroy it.
2168 	 */
2169 	if (wgs_prev->wgs_state == WGS_STATE_ESTABLISHED) {
2170 		/*
2171 		 * Transition ESTABLISHED->DESTROYING.  The session
2172 		 * will remain usable for the data rx path to process
2173 		 * packets still in flight to us, but we won't use it
2174 		 * for data tx.
2175 		 */
2176 		WG_DLOG("session[L=%"PRIx32" R=%"PRIx32"]"
2177 		    " -> WGS_STATE_DESTROYING\n",
2178 		    wgs_prev->wgs_local_index, wgs_prev->wgs_remote_index);
2179 		atomic_store_relaxed(&wgs_prev->wgs_state,
2180 		    WGS_STATE_DESTROYING);
2181 	} else {
2182 		KASSERTMSG(wgs_prev->wgs_state == WGS_STATE_UNKNOWN,
2183 		    "state=%d", wgs_prev->wgs_state);
2184 		wgs_prev->wgs_local_index = 0; /* paranoia */
2185 		wgs_prev->wgs_remote_index = 0; /* paranoia */
2186 		wg_clear_states(wgs_prev); /* paranoia */
2187 		wgs_prev->wgs_state = WGS_STATE_UNKNOWN;
2188 	}
2189 }
2190 
2191 static void __noinline
2192 wg_handle_msg_resp(struct wg_softc *wg, const struct wg_msg_resp *wgmr,
2193     const struct sockaddr *src)
2194 {
2195 	uint8_t ckey[WG_CHAINING_KEY_LEN]; /* [W] 5.4.3: Cr */
2196 	uint8_t hash[WG_HASH_LEN]; /* [W] 5.4.3: Kr */
2197 	uint8_t cipher_key[WG_KDF_OUTPUT_LEN];
2198 	struct wg_peer *wgp;
2199 	struct wg_session *wgs;
2200 	struct psref psref;
2201 	int error;
2202 	uint8_t mac1[WG_MAC_LEN];
2203 
2204 	wg_algo_mac_mac1(mac1, sizeof(mac1),
2205 	    wg->wg_pubkey, sizeof(wg->wg_pubkey),
2206 	    (const uint8_t *)wgmr, offsetof(struct wg_msg_resp, wgmr_mac1));
2207 
2208 	/*
2209 	 * [W] 5.3: Denial of Service Mitigation & Cookies
2210 	 * "the responder, ..., must always reject messages with an invalid
2211 	 *  msg.mac1"
2212 	 */
2213 	if (!consttime_memequal(mac1, wgmr->wgmr_mac1, sizeof(mac1))) {
2214 		WG_DLOG("mac1 is invalid\n");
2215 		return;
2216 	}
2217 
2218 	WG_TRACE("resp msg received");
2219 	wgs = wg_lookup_session_by_index(wg, wgmr->wgmr_receiver, &psref);
2220 	if (wgs == NULL) {
2221 		WG_TRACE("No session found");
2222 		return;
2223 	}
2224 
2225 	wgp = wgs->wgs_peer;
2226 
2227 	mutex_enter(wgp->wgp_lock);
2228 
2229 	/* If we weren't waiting for a handshake response, drop it.  */
2230 	if (wgs->wgs_state != WGS_STATE_INIT_ACTIVE) {
2231 		WG_TRACE("peer sent spurious handshake response, ignoring");
2232 		goto out;
2233 	}
2234 
2235 	if (__predict_false(wg_is_underload(wg, wgp, WG_MSG_TYPE_RESP))) {
2236 		WG_TRACE("under load");
2237 		/*
2238 		 * [W] 5.3: Denial of Service Mitigation & Cookies
2239 		 * "the responder, ..., and when under load may reject messages
2240 		 *  with an invalid msg.mac2.  If the responder receives a
2241 		 *  message with a valid msg.mac1 yet with an invalid msg.mac2,
2242 		 *  and is under load, it may respond with a cookie reply
2243 		 *  message"
2244 		 */
2245 		uint8_t zero[WG_MAC_LEN] = {0};
2246 		if (consttime_memequal(wgmr->wgmr_mac2, zero, sizeof(zero))) {
2247 			WG_TRACE("sending a cookie message: no cookie included");
2248 			wg_send_cookie_msg(wg, wgp, wgmr->wgmr_sender,
2249 			    wgmr->wgmr_mac1, src);
2250 			goto out;
2251 		}
2252 		if (!wgp->wgp_last_sent_cookie_valid) {
2253 			WG_TRACE("sending a cookie message: no cookie sent ever");
2254 			wg_send_cookie_msg(wg, wgp, wgmr->wgmr_sender,
2255 			    wgmr->wgmr_mac1, src);
2256 			goto out;
2257 		}
2258 		uint8_t mac2[WG_MAC_LEN];
2259 		wg_algo_mac(mac2, sizeof(mac2), wgp->wgp_last_sent_cookie,
2260 		    WG_COOKIE_LEN, (const uint8_t *)wgmr,
2261 		    offsetof(struct wg_msg_resp, wgmr_mac2), NULL, 0);
2262 		if (!consttime_memequal(mac2, wgmr->wgmr_mac2, sizeof(mac2))) {
2263 			WG_DLOG("mac2 is invalid\n");
2264 			goto out;
2265 		}
2266 		WG_TRACE("under load, but continue to sending");
2267 	}
2268 
2269 	memcpy(hash, wgs->wgs_handshake_hash, sizeof(hash));
2270 	memcpy(ckey, wgs->wgs_chaining_key, sizeof(ckey));
2271 
2272 	/*
2273 	 * [W] 5.4.3 Second Message: Responder to Initiator
2274 	 * "When the initiator receives this message, it does the same
2275 	 *  operations so that its final state variables are identical,
2276 	 *  replacing the operands of the DH function to produce equivalent
2277 	 *  values."
2278 	 *  Note that the following comments of operations are just copies of
2279 	 *  the initiator's ones.
2280 	 */
2281 
2282 	/* [N] 2.2: "e" */
2283 	/* Cr := KDF1(Cr, Er^pub) */
2284 	wg_algo_kdf(ckey, NULL, NULL, ckey, wgmr->wgmr_ephemeral,
2285 	    sizeof(wgmr->wgmr_ephemeral));
2286 	/* Hr := HASH(Hr || msg.ephemeral) */
2287 	wg_algo_hash(hash, wgmr->wgmr_ephemeral, sizeof(wgmr->wgmr_ephemeral));
2288 
2289 	WG_DUMP_HASH("ckey", ckey);
2290 	WG_DUMP_HASH("hash", hash);
2291 
2292 	/* [N] 2.2: "ee" */
2293 	/* Cr := KDF1(Cr, DH(Er^priv, Ei^pub)) */
2294 	wg_algo_dh_kdf(ckey, NULL, wgs->wgs_ephemeral_key_priv,
2295 	    wgmr->wgmr_ephemeral);
2296 
2297 	/* [N] 2.2: "se" */
2298 	/* Cr := KDF1(Cr, DH(Er^priv, Si^pub)) */
2299 	wg_algo_dh_kdf(ckey, NULL, wg->wg_privkey, wgmr->wgmr_ephemeral);
2300 
2301 	/* [N] 9.2: "psk" */
2302     {
2303 	uint8_t kdfout[WG_KDF_OUTPUT_LEN];
2304 	/* Cr, r, k := KDF3(Cr, Q) */
2305 	wg_algo_kdf(ckey, kdfout, cipher_key, ckey, wgp->wgp_psk,
2306 	    sizeof(wgp->wgp_psk));
2307 	/* Hr := HASH(Hr || r) */
2308 	wg_algo_hash(hash, kdfout, sizeof(kdfout));
2309     }
2310 
2311     {
2312 	uint8_t out[sizeof(wgmr->wgmr_empty)]; /* for safety */
2313 	/* msg.empty := AEAD(k, 0, e, Hr) */
2314 	error = wg_algo_aead_dec(out, 0, cipher_key, 0, wgmr->wgmr_empty,
2315 	    sizeof(wgmr->wgmr_empty), hash, sizeof(hash));
2316 	WG_DUMP_HASH("wgmr_empty", wgmr->wgmr_empty);
2317 	if (error != 0) {
2318 		WG_LOG_RATECHECK(&wgp->wgp_ppsratecheck, LOG_DEBUG,
2319 		    "%s: peer %s: wg_algo_aead_dec for empty message failed\n",
2320 		    if_name(&wg->wg_if), wgp->wgp_name);
2321 		goto out;
2322 	}
2323 	/* Hr := HASH(Hr || msg.empty) */
2324 	wg_algo_hash(hash, wgmr->wgmr_empty, sizeof(wgmr->wgmr_empty));
2325     }
2326 
2327 	memcpy(wgs->wgs_handshake_hash, hash, sizeof(wgs->wgs_handshake_hash));
2328 	memcpy(wgs->wgs_chaining_key, ckey, sizeof(wgs->wgs_chaining_key));
2329 	wgs->wgs_remote_index = wgmr->wgmr_sender;
2330 	WG_DLOG("receiver=%x\n", wgs->wgs_remote_index);
2331 
2332 	/*
2333 	 * The packet is genuine.  Update the peer's endpoint if the
2334 	 * source address changed.
2335 	 *
2336 	 * XXX How to prevent DoS by replaying genuine packets from the
2337 	 * wrong source address?
2338 	 */
2339 	wg_update_endpoint_if_necessary(wgp, src);
2340 
2341 	KASSERTMSG(wgs->wgs_state == WGS_STATE_INIT_ACTIVE, "state=%d",
2342 	    wgs->wgs_state);
2343 	wgs->wgs_time_established = time_uptime32;
2344 	wg_schedule_session_dtor_timer(wgp);
2345 	wgs->wgs_time_last_data_sent = 0;
2346 	wgs->wgs_is_initiator = true;
2347 	WG_DLOG("session[L=%"PRIx32" R=%"PRIx32"]:"
2348 	    " calculate keys as initiator\n",
2349 	    wgs->wgs_local_index, wgs->wgs_remote_index);
2350 	wg_calculate_keys(wgs, true);
2351 	wg_clear_states(wgs);
2352 
2353 	/*
2354 	 * Session is ready to receive data now that we have received
2355 	 * the responder's response.
2356 	 *
2357 	 * Transition from INIT_ACTIVE to ESTABLISHED to publish it to
2358 	 * the data rx path, wg_handle_msg_data.
2359 	 */
2360 	WG_DLOG("session[L=%"PRIx32" R=%"PRIx32" -> WGS_STATE_ESTABLISHED\n",
2361 	    wgs->wgs_local_index, wgs->wgs_remote_index);
2362 	atomic_store_release(&wgs->wgs_state, WGS_STATE_ESTABLISHED);
2363 	WG_TRACE("WGS_STATE_ESTABLISHED");
2364 
2365 	callout_halt(&wgp->wgp_handshake_timeout_timer, NULL);
2366 
2367 	/*
2368 	 * Session is ready to send data now that we have received the
2369 	 * responder's response.
2370 	 *
2371 	 * Swap the sessions to publish the new one as the stable
2372 	 * session for the data tx path, wg_output.
2373 	 */
2374 	wg_swap_sessions(wg, wgp);
2375 	KASSERT(wgs == wgp->wgp_session_stable);
2376 
2377 out:
2378 	mutex_exit(wgp->wgp_lock);
2379 	wg_put_session(wgs, &psref);
2380 }
2381 
2382 static void
2383 wg_send_handshake_msg_resp(struct wg_softc *wg, struct wg_peer *wgp,
2384     struct wg_session *wgs, const struct wg_msg_init *wgmi)
2385 {
2386 	int error;
2387 	struct mbuf *m;
2388 	struct wg_msg_resp *wgmr;
2389 
2390 	KASSERT(mutex_owned(wgp->wgp_lock));
2391 	KASSERT(wgs == wgp->wgp_session_unstable);
2392 	KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d",
2393 	    wgs->wgs_state);
2394 
2395 	m = m_gethdr(M_WAIT, MT_DATA);
2396 	if (sizeof(*wgmr) > MHLEN) {
2397 		m_clget(m, M_WAIT);
2398 		CTASSERT(sizeof(*wgmr) <= MCLBYTES);
2399 	}
2400 	m->m_pkthdr.len = m->m_len = sizeof(*wgmr);
2401 	wgmr = mtod(m, struct wg_msg_resp *);
2402 	wg_fill_msg_resp(wg, wgp, wgs, wgmr, wgmi);
2403 
2404 	error = wg->wg_ops->send_hs_msg(wgp, m); /* consumes m */
2405 	if (error) {
2406 		WG_DLOG("send_hs_msg failed, error=%d\n", error);
2407 		return;
2408 	}
2409 
2410 	WG_TRACE("resp msg sent");
2411 }
2412 
2413 static struct wg_peer *
2414 wg_lookup_peer_by_pubkey(struct wg_softc *wg,
2415     const uint8_t pubkey[static WG_STATIC_KEY_LEN], struct psref *psref)
2416 {
2417 	struct wg_peer *wgp;
2418 
2419 	int s = pserialize_read_enter();
2420 	wgp = thmap_get(wg->wg_peers_bypubkey, pubkey, WG_STATIC_KEY_LEN);
2421 	if (wgp != NULL)
2422 		wg_get_peer(wgp, psref);
2423 	pserialize_read_exit(s);
2424 
2425 	return wgp;
2426 }
2427 
2428 static void
2429 wg_fill_msg_cookie(struct wg_softc *wg, struct wg_peer *wgp,
2430     struct wg_msg_cookie *wgmc, const uint32_t sender,
2431     const uint8_t mac1[static WG_MAC_LEN], const struct sockaddr *src)
2432 {
2433 	uint8_t cookie[WG_COOKIE_LEN];
2434 	uint8_t key[WG_HASH_LEN];
2435 	uint8_t addr[sizeof(struct in6_addr)];
2436 	size_t addrlen;
2437 	uint16_t uh_sport; /* be */
2438 
2439 	KASSERT(mutex_owned(wgp->wgp_lock));
2440 
2441 	wgmc->wgmc_type = htole32(WG_MSG_TYPE_COOKIE);
2442 	wgmc->wgmc_receiver = sender;
2443 	cprng_fast(wgmc->wgmc_salt, sizeof(wgmc->wgmc_salt));
2444 
2445 	/*
2446 	 * [W] 5.4.7: Under Load: Cookie Reply Message
2447 	 * "The secret variable, Rm, changes every two minutes to a
2448 	 * random value"
2449 	 */
2450 	if ((time_uptime - wgp->wgp_last_cookiesecret_time) >
2451 	    WG_COOKIESECRET_TIME) {
2452 		cprng_strong(kern_cprng, wgp->wgp_cookiesecret,
2453 		    sizeof(wgp->wgp_cookiesecret), 0);
2454 		wgp->wgp_last_cookiesecret_time = time_uptime;
2455 	}
2456 
2457 	switch (src->sa_family) {
2458 #ifdef INET
2459 	case AF_INET: {
2460 		const struct sockaddr_in *sin = satocsin(src);
2461 		addrlen = sizeof(sin->sin_addr);
2462 		memcpy(addr, &sin->sin_addr, addrlen);
2463 		uh_sport = sin->sin_port;
2464 		break;
2465 	    }
2466 #endif
2467 #ifdef INET6
2468 	case AF_INET6: {
2469 		const struct sockaddr_in6 *sin6 = satocsin6(src);
2470 		addrlen = sizeof(sin6->sin6_addr);
2471 		memcpy(addr, &sin6->sin6_addr, addrlen);
2472 		uh_sport = sin6->sin6_port;
2473 		break;
2474 	    }
2475 #endif
2476 	default:
2477 		panic("invalid af=%d", src->sa_family);
2478 	}
2479 
2480 	wg_algo_mac(cookie, sizeof(cookie),
2481 	    wgp->wgp_cookiesecret, sizeof(wgp->wgp_cookiesecret),
2482 	    addr, addrlen, (const uint8_t *)&uh_sport, sizeof(uh_sport));
2483 	wg_algo_mac_cookie(key, sizeof(key), wg->wg_pubkey,
2484 	    sizeof(wg->wg_pubkey));
2485 	wg_algo_xaead_enc(wgmc->wgmc_cookie, sizeof(wgmc->wgmc_cookie), key,
2486 	    cookie, sizeof(cookie), mac1, WG_MAC_LEN, wgmc->wgmc_salt);
2487 
2488 	/* Need to store to calculate mac2 */
2489 	memcpy(wgp->wgp_last_sent_cookie, cookie, sizeof(cookie));
2490 	wgp->wgp_last_sent_cookie_valid = true;
2491 }
2492 
2493 static void
2494 wg_send_cookie_msg(struct wg_softc *wg, struct wg_peer *wgp,
2495     const uint32_t sender, const uint8_t mac1[static WG_MAC_LEN],
2496     const struct sockaddr *src)
2497 {
2498 	int error;
2499 	struct mbuf *m;
2500 	struct wg_msg_cookie *wgmc;
2501 
2502 	KASSERT(mutex_owned(wgp->wgp_lock));
2503 
2504 	m = m_gethdr(M_WAIT, MT_DATA);
2505 	if (sizeof(*wgmc) > MHLEN) {
2506 		m_clget(m, M_WAIT);
2507 		CTASSERT(sizeof(*wgmc) <= MCLBYTES);
2508 	}
2509 	m->m_pkthdr.len = m->m_len = sizeof(*wgmc);
2510 	wgmc = mtod(m, struct wg_msg_cookie *);
2511 	wg_fill_msg_cookie(wg, wgp, wgmc, sender, mac1, src);
2512 
2513 	error = wg->wg_ops->send_hs_msg(wgp, m); /* consumes m */
2514 	if (error) {
2515 		WG_DLOG("send_hs_msg failed, error=%d\n", error);
2516 		return;
2517 	}
2518 
2519 	WG_TRACE("cookie msg sent");
2520 }
2521 
2522 static bool
2523 wg_is_underload(struct wg_softc *wg, struct wg_peer *wgp, int msgtype)
2524 {
2525 #ifdef WG_DEBUG_PARAMS
2526 	if (wg_force_underload)
2527 		return true;
2528 #endif
2529 
2530 	/*
2531 	 * XXX we don't have a means of a load estimation.  The purpose of
2532 	 * the mechanism is a DoS mitigation, so we consider frequent handshake
2533 	 * messages as (a kind of) load; if a message of the same type comes
2534 	 * to a peer within 1 second, we consider we are under load.
2535 	 */
2536 	time_t last = wgp->wgp_last_msg_received_time[msgtype];
2537 	wgp->wgp_last_msg_received_time[msgtype] = time_uptime;
2538 	return (time_uptime - last) == 0;
2539 }
2540 
2541 static void
2542 wg_calculate_keys(struct wg_session *wgs, const bool initiator)
2543 {
2544 
2545 	KASSERT(mutex_owned(wgs->wgs_peer->wgp_lock));
2546 
2547 	/*
2548 	 * [W] 5.4.5: Ti^send = Tr^recv, Ti^recv = Tr^send := KDF2(Ci = Cr, e)
2549 	 */
2550 	if (initiator) {
2551 		wg_algo_kdf(wgs->wgs_tkey_send, wgs->wgs_tkey_recv, NULL,
2552 		    wgs->wgs_chaining_key, NULL, 0);
2553 	} else {
2554 		wg_algo_kdf(wgs->wgs_tkey_recv, wgs->wgs_tkey_send, NULL,
2555 		    wgs->wgs_chaining_key, NULL, 0);
2556 	}
2557 	WG_DUMP_HASH("wgs_tkey_send", wgs->wgs_tkey_send);
2558 	WG_DUMP_HASH("wgs_tkey_recv", wgs->wgs_tkey_recv);
2559 }
2560 
2561 static uint64_t
2562 wg_session_get_send_counter(struct wg_session *wgs)
2563 {
2564 #ifdef __HAVE_ATOMIC64_LOADSTORE
2565 	return atomic_load_relaxed(&wgs->wgs_send_counter);
2566 #else
2567 	uint64_t send_counter;
2568 
2569 	mutex_enter(&wgs->wgs_send_counter_lock);
2570 	send_counter = wgs->wgs_send_counter;
2571 	mutex_exit(&wgs->wgs_send_counter_lock);
2572 
2573 	return send_counter;
2574 #endif
2575 }
2576 
2577 static uint64_t
2578 wg_session_inc_send_counter(struct wg_session *wgs)
2579 {
2580 #ifdef __HAVE_ATOMIC64_LOADSTORE
2581 	return atomic_inc_64_nv(&wgs->wgs_send_counter) - 1;
2582 #else
2583 	uint64_t send_counter;
2584 
2585 	mutex_enter(&wgs->wgs_send_counter_lock);
2586 	send_counter = wgs->wgs_send_counter++;
2587 	mutex_exit(&wgs->wgs_send_counter_lock);
2588 
2589 	return send_counter;
2590 #endif
2591 }
2592 
2593 static void
2594 wg_clear_states(struct wg_session *wgs)
2595 {
2596 
2597 	KASSERT(mutex_owned(wgs->wgs_peer->wgp_lock));
2598 
2599 	wgs->wgs_send_counter = 0;
2600 	sliwin_reset(&wgs->wgs_recvwin->window);
2601 
2602 #define wgs_clear(v)	explicit_memset(wgs->wgs_##v, 0, sizeof(wgs->wgs_##v))
2603 	wgs_clear(handshake_hash);
2604 	wgs_clear(chaining_key);
2605 	wgs_clear(ephemeral_key_pub);
2606 	wgs_clear(ephemeral_key_priv);
2607 	wgs_clear(ephemeral_key_peer);
2608 #undef wgs_clear
2609 }
2610 
2611 static struct wg_session *
2612 wg_lookup_session_by_index(struct wg_softc *wg, const uint32_t index,
2613     struct psref *psref)
2614 {
2615 	struct wg_session *wgs;
2616 
2617 	int s = pserialize_read_enter();
2618 	wgs = thmap_get(wg->wg_sessions_byindex, &index, sizeof index);
2619 	if (wgs != NULL) {
2620 		KASSERTMSG(index == wgs->wgs_local_index,
2621 		    "index=%"PRIx32" wgs->wgs_local_index=%"PRIx32,
2622 		    index, wgs->wgs_local_index);
2623 		psref_acquire(psref, &wgs->wgs_psref, wg_psref_class);
2624 	}
2625 	pserialize_read_exit(s);
2626 
2627 	return wgs;
2628 }
2629 
2630 static void
2631 wg_send_keepalive_msg(struct wg_peer *wgp, struct wg_session *wgs)
2632 {
2633 	struct mbuf *m;
2634 
2635 	/*
2636 	 * [W] 6.5 Passive Keepalive
2637 	 * "A keepalive message is simply a transport data message with
2638 	 *  a zero-length encapsulated encrypted inner-packet."
2639 	 */
2640 	WG_TRACE("");
2641 	m = m_gethdr(M_WAIT, MT_DATA);
2642 	wg_send_data_msg(wgp, wgs, m);
2643 }
2644 
2645 static bool
2646 wg_need_to_send_init_message(struct wg_session *wgs)
2647 {
2648 	/*
2649 	 * [W] 6.2 Transport Message Limits
2650 	 * "if a peer is the initiator of a current secure session,
2651 	 *  WireGuard will send a handshake initiation message to begin
2652 	 *  a new secure session ... if after receiving a transport data
2653 	 *  message, the current secure session is (REJECT-AFTER-TIME −
2654 	 *  KEEPALIVE-TIMEOUT − REKEY-TIMEOUT) seconds old and it has
2655 	 *  not yet acted upon this event."
2656 	 */
2657 	return wgs->wgs_is_initiator &&
2658 	    atomic_load_relaxed(&wgs->wgs_time_last_data_sent) == 0 &&
2659 	    (time_uptime32 - wgs->wgs_time_established >=
2660 		(wg_reject_after_time - wg_keepalive_timeout -
2661 		    wg_rekey_timeout));
2662 }
2663 
2664 static void
2665 wg_schedule_peer_task(struct wg_peer *wgp, unsigned int task)
2666 {
2667 
2668 	mutex_enter(wgp->wgp_intr_lock);
2669 	WG_DLOG("tasks=%d, task=%d\n", wgp->wgp_tasks, task);
2670 	if (wgp->wgp_tasks == 0)
2671 		/*
2672 		 * XXX If the current CPU is already loaded -- e.g., if
2673 		 * there's already a bunch of handshakes queued up --
2674 		 * consider tossing this over to another CPU to
2675 		 * distribute the load.
2676 		 */
2677 		workqueue_enqueue(wg_wq, &wgp->wgp_work, NULL);
2678 	wgp->wgp_tasks |= task;
2679 	mutex_exit(wgp->wgp_intr_lock);
2680 }
2681 
2682 static void
2683 wg_change_endpoint(struct wg_peer *wgp, const struct sockaddr *new)
2684 {
2685 	struct wg_sockaddr *wgsa_prev;
2686 
2687 	WG_TRACE("Changing endpoint");
2688 
2689 	memcpy(wgp->wgp_endpoint0, new, new->sa_len);
2690 	wgsa_prev = wgp->wgp_endpoint;
2691 	atomic_store_release(&wgp->wgp_endpoint, wgp->wgp_endpoint0);
2692 	wgp->wgp_endpoint0 = wgsa_prev;
2693 	atomic_store_release(&wgp->wgp_endpoint_available, true);
2694 
2695 	wg_schedule_peer_task(wgp, WGP_TASK_ENDPOINT_CHANGED);
2696 }
2697 
2698 static bool
2699 wg_validate_inner_packet(const char *packet, size_t decrypted_len, int *af)
2700 {
2701 	uint16_t packet_len;
2702 	const struct ip *ip;
2703 
2704 	if (__predict_false(decrypted_len < sizeof(*ip))) {
2705 		WG_DLOG("decrypted_len=%zu < %zu\n", decrypted_len,
2706 		    sizeof(*ip));
2707 		return false;
2708 	}
2709 
2710 	ip = (const struct ip *)packet;
2711 	if (ip->ip_v == 4)
2712 		*af = AF_INET;
2713 	else if (ip->ip_v == 6)
2714 		*af = AF_INET6;
2715 	else {
2716 		WG_DLOG("ip_v=%d\n", ip->ip_v);
2717 		return false;
2718 	}
2719 
2720 	WG_DLOG("af=%d\n", *af);
2721 
2722 	switch (*af) {
2723 #ifdef INET
2724 	case AF_INET:
2725 		packet_len = ntohs(ip->ip_len);
2726 		break;
2727 #endif
2728 #ifdef INET6
2729 	case AF_INET6: {
2730 		const struct ip6_hdr *ip6;
2731 
2732 		if (__predict_false(decrypted_len < sizeof(*ip6))) {
2733 			WG_DLOG("decrypted_len=%zu < %zu\n", decrypted_len,
2734 			    sizeof(*ip6));
2735 			return false;
2736 		}
2737 
2738 		ip6 = (const struct ip6_hdr *)packet;
2739 		packet_len = sizeof(*ip6) + ntohs(ip6->ip6_plen);
2740 		break;
2741 	}
2742 #endif
2743 	default:
2744 		return false;
2745 	}
2746 
2747 	if (packet_len > decrypted_len) {
2748 		WG_DLOG("packet_len %u > decrypted_len %zu\n", packet_len,
2749 		    decrypted_len);
2750 		return false;
2751 	}
2752 
2753 	return true;
2754 }
2755 
2756 static bool
2757 wg_validate_route(struct wg_softc *wg, struct wg_peer *wgp_expected,
2758     int af, char *packet)
2759 {
2760 	struct sockaddr_storage ss;
2761 	struct sockaddr *sa;
2762 	struct psref psref;
2763 	struct wg_peer *wgp;
2764 	bool ok;
2765 
2766 	/*
2767 	 * II CRYPTOKEY ROUTING
2768 	 * "it will only accept it if its source IP resolves in the
2769 	 *  table to the public key used in the secure session for
2770 	 *  decrypting it."
2771 	 */
2772 
2773 	switch (af) {
2774 #ifdef INET
2775 	case AF_INET: {
2776 		const struct ip *ip = (const struct ip *)packet;
2777 		struct sockaddr_in *sin = (struct sockaddr_in *)&ss;
2778 		sockaddr_in_init(sin, &ip->ip_src, 0);
2779 		sa = sintosa(sin);
2780 		break;
2781 	}
2782 #endif
2783 #ifdef INET6
2784 	case AF_INET6: {
2785 		const struct ip6_hdr *ip6 = (const struct ip6_hdr *)packet;
2786 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ss;
2787 		sockaddr_in6_init(sin6, &ip6->ip6_src, 0, 0, 0);
2788 		sa = sin6tosa(sin6);
2789 		break;
2790 	}
2791 #endif
2792 	default:
2793 		__USE(ss);
2794 		return false;
2795 	}
2796 
2797 	wgp = wg_pick_peer_by_sa(wg, sa, &psref);
2798 	ok = (wgp == wgp_expected);
2799 	if (wgp != NULL)
2800 		wg_put_peer(wgp, &psref);
2801 
2802 	return ok;
2803 }
2804 
2805 static void
2806 wg_session_dtor_timer(void *arg)
2807 {
2808 	struct wg_peer *wgp = arg;
2809 
2810 	WG_TRACE("enter");
2811 
2812 	wg_schedule_session_dtor_timer(wgp);
2813 	wg_schedule_peer_task(wgp, WGP_TASK_DESTROY_PREV_SESSION);
2814 }
2815 
2816 static void
2817 wg_schedule_session_dtor_timer(struct wg_peer *wgp)
2818 {
2819 
2820 	/*
2821 	 * If the periodic session destructor is already pending to
2822 	 * handle the previous session, that's fine -- leave it in
2823 	 * place; it will be scheduled again.
2824 	 */
2825 	if (callout_pending(&wgp->wgp_session_dtor_timer)) {
2826 		WG_DLOG("session dtor already pending\n");
2827 		return;
2828 	}
2829 
2830 	WG_DLOG("scheduling session dtor in %u secs\n", wg_reject_after_time);
2831 	callout_schedule(&wgp->wgp_session_dtor_timer,
2832 	    wg_reject_after_time*hz);
2833 }
2834 
2835 static bool
2836 sockaddr_port_match(const struct sockaddr *sa1, const struct sockaddr *sa2)
2837 {
2838 	if (sa1->sa_family != sa2->sa_family)
2839 		return false;
2840 
2841 	switch (sa1->sa_family) {
2842 #ifdef INET
2843 	case AF_INET:
2844 		return satocsin(sa1)->sin_port == satocsin(sa2)->sin_port;
2845 #endif
2846 #ifdef INET6
2847 	case AF_INET6:
2848 		return satocsin6(sa1)->sin6_port == satocsin6(sa2)->sin6_port;
2849 #endif
2850 	default:
2851 		return false;
2852 	}
2853 }
2854 
2855 static void
2856 wg_update_endpoint_if_necessary(struct wg_peer *wgp,
2857     const struct sockaddr *src)
2858 {
2859 	struct wg_sockaddr *wgsa;
2860 	struct psref psref;
2861 
2862 	wgsa = wg_get_endpoint_sa(wgp, &psref);
2863 
2864 #ifdef WG_DEBUG_LOG
2865 	char oldaddr[128], newaddr[128];
2866 	sockaddr_format(wgsatosa(wgsa), oldaddr, sizeof(oldaddr));
2867 	sockaddr_format(src, newaddr, sizeof(newaddr));
2868 	WG_DLOG("old=%s, new=%s\n", oldaddr, newaddr);
2869 #endif
2870 
2871 	/*
2872 	 * III: "Since the packet has authenticated correctly, the source IP of
2873 	 * the outer UDP/IP packet is used to update the endpoint for peer..."
2874 	 */
2875 	if (__predict_false(sockaddr_cmp(src, wgsatosa(wgsa)) != 0 ||
2876 		!sockaddr_port_match(src, wgsatosa(wgsa)))) {
2877 		/* XXX We can't change the endpoint twice in a short period */
2878 		if (atomic_swap_uint(&wgp->wgp_endpoint_changing, 1) == 0) {
2879 			wg_change_endpoint(wgp, src);
2880 		}
2881 	}
2882 
2883 	wg_put_sa(wgp, wgsa, &psref);
2884 }
2885 
2886 static void __noinline
2887 wg_handle_msg_data(struct wg_softc *wg, struct mbuf *m,
2888     const struct sockaddr *src)
2889 {
2890 	struct wg_msg_data *wgmd;
2891 	char *encrypted_buf = NULL, *decrypted_buf;
2892 	size_t encrypted_len, decrypted_len;
2893 	struct wg_session *wgs;
2894 	struct wg_peer *wgp;
2895 	int state;
2896 	uint32_t age;
2897 	size_t mlen;
2898 	struct psref psref;
2899 	int error, af;
2900 	bool success, free_encrypted_buf = false, ok;
2901 	struct mbuf *n;
2902 
2903 	KASSERT(m->m_len >= sizeof(struct wg_msg_data));
2904 	wgmd = mtod(m, struct wg_msg_data *);
2905 
2906 	KASSERT(wgmd->wgmd_type == htole32(WG_MSG_TYPE_DATA));
2907 	WG_TRACE("data");
2908 
2909 	/* Find the putative session, or drop.  */
2910 	wgs = wg_lookup_session_by_index(wg, wgmd->wgmd_receiver, &psref);
2911 	if (wgs == NULL) {
2912 		WG_TRACE("No session found");
2913 		m_freem(m);
2914 		return;
2915 	}
2916 
2917 	/*
2918 	 * We are only ready to handle data when in INIT_PASSIVE,
2919 	 * ESTABLISHED, or DESTROYING.  All transitions out of that
2920 	 * state dissociate the session index and drain psrefs.
2921 	 *
2922 	 * atomic_load_acquire matches atomic_store_release in either
2923 	 * wg_handle_msg_init or wg_handle_msg_resp.  (The transition
2924 	 * INIT_PASSIVE to ESTABLISHED in wg_task_establish_session
2925 	 * doesn't make a difference for this rx path.)
2926 	 */
2927 	state = atomic_load_acquire(&wgs->wgs_state);
2928 	switch (state) {
2929 	case WGS_STATE_UNKNOWN:
2930 	case WGS_STATE_INIT_ACTIVE:
2931 		WG_TRACE("not yet ready for data");
2932 		goto out;
2933 	case WGS_STATE_INIT_PASSIVE:
2934 	case WGS_STATE_ESTABLISHED:
2935 	case WGS_STATE_DESTROYING:
2936 		break;
2937 	}
2938 
2939 	/*
2940 	 * Reject if the session is too old.
2941 	 */
2942 	age = time_uptime32 - wgs->wgs_time_established;
2943 	if (__predict_false(age >= wg_reject_after_time)) {
2944 		WG_DLOG("session %"PRIx32" too old, %"PRIu32" sec\n",
2945 		    wgmd->wgmd_receiver, age);
2946 	       goto out;
2947 	}
2948 
2949 	/*
2950 	 * Get the peer, for rate-limited logs (XXX MPSAFE, dtrace) and
2951 	 * to update the endpoint if authentication succeeds.
2952 	 */
2953 	wgp = wgs->wgs_peer;
2954 
2955 	/*
2956 	 * Reject outrageously wrong sequence numbers before doing any
2957 	 * crypto work or taking any locks.
2958 	 */
2959 	error = sliwin_check_fast(&wgs->wgs_recvwin->window,
2960 	    le64toh(wgmd->wgmd_counter));
2961 	if (error) {
2962 		WG_LOG_RATECHECK(&wgp->wgp_ppsratecheck, LOG_DEBUG,
2963 		    "%s: peer %s: out-of-window packet: %"PRIu64"\n",
2964 		    if_name(&wg->wg_if), wgp->wgp_name,
2965 		    le64toh(wgmd->wgmd_counter));
2966 		goto out;
2967 	}
2968 
2969 	/* Ensure the payload and authenticator are contiguous.  */
2970 	mlen = m_length(m);
2971 	encrypted_len = mlen - sizeof(*wgmd);
2972 	if (encrypted_len < WG_AUTHTAG_LEN) {
2973 		WG_DLOG("Short encrypted_len: %zu\n", encrypted_len);
2974 		goto out;
2975 	}
2976 	success = m_ensure_contig(&m, sizeof(*wgmd) + encrypted_len);
2977 	if (success) {
2978 		encrypted_buf = mtod(m, char *) + sizeof(*wgmd);
2979 	} else {
2980 		encrypted_buf = kmem_intr_alloc(encrypted_len, KM_NOSLEEP);
2981 		if (encrypted_buf == NULL) {
2982 			WG_DLOG("failed to allocate encrypted_buf\n");
2983 			goto out;
2984 		}
2985 		m_copydata(m, sizeof(*wgmd), encrypted_len, encrypted_buf);
2986 		free_encrypted_buf = true;
2987 	}
2988 	/* m_ensure_contig may change m regardless of its result */
2989 	KASSERT(m->m_len >= sizeof(*wgmd));
2990 	wgmd = mtod(m, struct wg_msg_data *);
2991 
2992 	/*
2993 	 * Get a buffer for the plaintext.  Add WG_AUTHTAG_LEN to avoid
2994 	 * a zero-length buffer (XXX).  Drop if plaintext is longer
2995 	 * than MCLBYTES (XXX).
2996 	 */
2997 	decrypted_len = encrypted_len - WG_AUTHTAG_LEN;
2998 	if (decrypted_len > MCLBYTES) {
2999 		/* FIXME handle larger data than MCLBYTES */
3000 		WG_DLOG("couldn't handle larger data than MCLBYTES\n");
3001 		goto out;
3002 	}
3003 	n = wg_get_mbuf(0, decrypted_len + WG_AUTHTAG_LEN);
3004 	if (n == NULL) {
3005 		WG_DLOG("wg_get_mbuf failed\n");
3006 		goto out;
3007 	}
3008 	decrypted_buf = mtod(n, char *);
3009 
3010 	/* Decrypt and verify the packet.  */
3011 	WG_DLOG("mlen=%zu, encrypted_len=%zu\n", mlen, encrypted_len);
3012 	error = wg_algo_aead_dec(decrypted_buf,
3013 	    encrypted_len - WG_AUTHTAG_LEN /* can be 0 */,
3014 	    wgs->wgs_tkey_recv, le64toh(wgmd->wgmd_counter), encrypted_buf,
3015 	    encrypted_len, NULL, 0);
3016 	if (error != 0) {
3017 		WG_LOG_RATECHECK(&wgp->wgp_ppsratecheck, LOG_DEBUG,
3018 		    "%s: peer %s: failed to wg_algo_aead_dec\n",
3019 		    if_name(&wg->wg_if), wgp->wgp_name);
3020 		m_freem(n);
3021 		goto out;
3022 	}
3023 	WG_DLOG("outsize=%u\n", (u_int)decrypted_len);
3024 
3025 	/* Packet is genuine.  Reject it if a replay or just too old.  */
3026 	mutex_enter(&wgs->wgs_recvwin->lock);
3027 	error = sliwin_update(&wgs->wgs_recvwin->window,
3028 	    le64toh(wgmd->wgmd_counter));
3029 	mutex_exit(&wgs->wgs_recvwin->lock);
3030 	if (error) {
3031 		WG_LOG_RATECHECK(&wgp->wgp_ppsratecheck, LOG_DEBUG,
3032 		    "%s: peer %s: replay or out-of-window packet: %"PRIu64"\n",
3033 		    if_name(&wg->wg_if), wgp->wgp_name,
3034 		    le64toh(wgmd->wgmd_counter));
3035 		m_freem(n);
3036 		goto out;
3037 	}
3038 
3039 	/* We're done with m now; free it and chuck the pointers.  */
3040 	m_freem(m);
3041 	m = NULL;
3042 	wgmd = NULL;
3043 
3044 	/*
3045 	 * The packet is genuine.  Update the peer's endpoint if the
3046 	 * source address changed.
3047 	 *
3048 	 * XXX How to prevent DoS by replaying genuine packets from the
3049 	 * wrong source address?
3050 	 */
3051 	wg_update_endpoint_if_necessary(wgp, src);
3052 
3053 	/*
3054 	 * Validate the encapsulated packet header and get the address
3055 	 * family, or drop.
3056 	 */
3057 	ok = wg_validate_inner_packet(decrypted_buf, decrypted_len, &af);
3058 	if (!ok) {
3059 		m_freem(n);
3060 		goto update_state;
3061 	}
3062 
3063 	/* Submit it into our network stack if routable.  */
3064 	ok = wg_validate_route(wg, wgp, af, decrypted_buf);
3065 	if (ok) {
3066 		wg->wg_ops->input(&wg->wg_if, n, af);
3067 	} else {
3068 		char addrstr[INET6_ADDRSTRLEN];
3069 		memset(addrstr, 0, sizeof(addrstr));
3070 		switch (af) {
3071 #ifdef INET
3072 		case AF_INET: {
3073 			const struct ip *ip = (const struct ip *)decrypted_buf;
3074 			IN_PRINT(addrstr, &ip->ip_src);
3075 			break;
3076 		}
3077 #endif
3078 #ifdef INET6
3079 		case AF_INET6: {
3080 			const struct ip6_hdr *ip6 =
3081 			    (const struct ip6_hdr *)decrypted_buf;
3082 			IN6_PRINT(addrstr, &ip6->ip6_src);
3083 			break;
3084 		}
3085 #endif
3086 		default:
3087 			panic("invalid af=%d", af);
3088 		}
3089 		WG_LOG_RATECHECK(&wgp->wgp_ppsratecheck, LOG_DEBUG,
3090 		    "%s: peer %s: invalid source address (%s)\n",
3091 		    if_name(&wg->wg_if), wgp->wgp_name, addrstr);
3092 		m_freem(n);
3093 		/*
3094 		 * The inner address is invalid however the session is valid
3095 		 * so continue the session processing below.
3096 		 */
3097 	}
3098 	n = NULL;
3099 
3100 update_state:
3101 	/* Update the state machine if necessary.  */
3102 	if (__predict_false(state == WGS_STATE_INIT_PASSIVE)) {
3103 		/*
3104 		 * We were waiting for the initiator to send their
3105 		 * first data transport message, and that has happened.
3106 		 * Schedule a task to establish this session.
3107 		 */
3108 		wg_schedule_peer_task(wgp, WGP_TASK_ESTABLISH_SESSION);
3109 	} else {
3110 		if (__predict_false(wg_need_to_send_init_message(wgs))) {
3111 			wg_schedule_peer_task(wgp, WGP_TASK_SEND_INIT_MESSAGE);
3112 		}
3113 		/*
3114 		 * [W] 6.5 Passive Keepalive
3115 		 * "If a peer has received a validly-authenticated transport
3116 		 *  data message (section 5.4.6), but does not have any packets
3117 		 *  itself to send back for KEEPALIVE-TIMEOUT seconds, it sends
3118 		 *  a keepalive message."
3119 		 */
3120 		const uint32_t now = time_uptime32;
3121 		const uint32_t time_last_data_sent =
3122 		    atomic_load_relaxed(&wgs->wgs_time_last_data_sent);
3123 		WG_DLOG("time_uptime32=%"PRIu32
3124 		    " wgs_time_last_data_sent=%"PRIu32"\n",
3125 		    now, time_last_data_sent);
3126 		if ((now - time_last_data_sent) >= wg_keepalive_timeout) {
3127 			WG_TRACE("Schedule sending keepalive message");
3128 			/*
3129 			 * We can't send a keepalive message here to avoid
3130 			 * a deadlock;  we already hold the solock of a socket
3131 			 * that is used to send the message.
3132 			 */
3133 			wg_schedule_peer_task(wgp,
3134 			    WGP_TASK_SEND_KEEPALIVE_MESSAGE);
3135 		}
3136 	}
3137 out:
3138 	wg_put_session(wgs, &psref);
3139 	m_freem(m);
3140 	if (free_encrypted_buf)
3141 		kmem_intr_free(encrypted_buf, encrypted_len);
3142 }
3143 
3144 static void __noinline
3145 wg_handle_msg_cookie(struct wg_softc *wg, const struct wg_msg_cookie *wgmc)
3146 {
3147 	struct wg_session *wgs;
3148 	struct wg_peer *wgp;
3149 	struct psref psref;
3150 	int error;
3151 	uint8_t key[WG_HASH_LEN];
3152 	uint8_t cookie[WG_COOKIE_LEN];
3153 
3154 	WG_TRACE("cookie msg received");
3155 
3156 	/* Find the putative session.  */
3157 	wgs = wg_lookup_session_by_index(wg, wgmc->wgmc_receiver, &psref);
3158 	if (wgs == NULL) {
3159 		WG_TRACE("No session found");
3160 		return;
3161 	}
3162 
3163 	/* Lock the peer so we can update the cookie state.  */
3164 	wgp = wgs->wgs_peer;
3165 	mutex_enter(wgp->wgp_lock);
3166 
3167 	if (!wgp->wgp_last_sent_mac1_valid) {
3168 		WG_TRACE("No valid mac1 sent (or expired)");
3169 		goto out;
3170 	}
3171 
3172 	/*
3173 	 * wgp_last_sent_mac1_valid is only set to true when we are
3174 	 * transitioning to INIT_ACTIVE or INIT_PASSIVE, and always
3175 	 * cleared on transition out of them.
3176 	 */
3177 	KASSERTMSG((wgs->wgs_state == WGS_STATE_INIT_ACTIVE ||
3178 		wgs->wgs_state == WGS_STATE_INIT_PASSIVE),
3179 	    "state=%d", wgs->wgs_state);
3180 
3181 	/* Decrypt the cookie and store it for later handshake retry.  */
3182 	wg_algo_mac_cookie(key, sizeof(key), wgp->wgp_pubkey,
3183 	    sizeof(wgp->wgp_pubkey));
3184 	error = wg_algo_xaead_dec(cookie, sizeof(cookie), key,
3185 	    wgmc->wgmc_cookie, sizeof(wgmc->wgmc_cookie),
3186 	    wgp->wgp_last_sent_mac1, sizeof(wgp->wgp_last_sent_mac1),
3187 	    wgmc->wgmc_salt);
3188 	if (error != 0) {
3189 		WG_LOG_RATECHECK(&wgp->wgp_ppsratecheck, LOG_DEBUG,
3190 		    "%s: peer %s: wg_algo_aead_dec for cookie failed: "
3191 		    "error=%d\n", if_name(&wg->wg_if), wgp->wgp_name, error);
3192 		goto out;
3193 	}
3194 	/*
3195 	 * [W] 6.6: Interaction with Cookie Reply System
3196 	 * "it should simply store the decrypted cookie value from the cookie
3197 	 *  reply message, and wait for the expiration of the REKEY-TIMEOUT
3198 	 *  timer for retrying a handshake initiation message."
3199 	 */
3200 	wgp->wgp_latest_cookie_time = time_uptime;
3201 	memcpy(wgp->wgp_latest_cookie, cookie, sizeof(wgp->wgp_latest_cookie));
3202 out:
3203 	mutex_exit(wgp->wgp_lock);
3204 	wg_put_session(wgs, &psref);
3205 }
3206 
3207 static struct mbuf *
3208 wg_validate_msg_header(struct wg_softc *wg, struct mbuf *m)
3209 {
3210 	struct wg_msg wgm;
3211 	size_t mbuflen;
3212 	size_t msglen;
3213 
3214 	/*
3215 	 * Get the mbuf chain length.  It is already guaranteed, by
3216 	 * wg_overudp_cb, to be large enough for a struct wg_msg.
3217 	 */
3218 	mbuflen = m_length(m);
3219 	KASSERT(mbuflen >= sizeof(struct wg_msg));
3220 
3221 	/*
3222 	 * Copy the message header (32-bit message type) out -- we'll
3223 	 * worry about contiguity and alignment later.
3224 	 */
3225 	m_copydata(m, 0, sizeof(wgm), &wgm);
3226 	switch (le32toh(wgm.wgm_type)) {
3227 	case WG_MSG_TYPE_INIT:
3228 		msglen = sizeof(struct wg_msg_init);
3229 		break;
3230 	case WG_MSG_TYPE_RESP:
3231 		msglen = sizeof(struct wg_msg_resp);
3232 		break;
3233 	case WG_MSG_TYPE_COOKIE:
3234 		msglen = sizeof(struct wg_msg_cookie);
3235 		break;
3236 	case WG_MSG_TYPE_DATA:
3237 		msglen = sizeof(struct wg_msg_data);
3238 		break;
3239 	default:
3240 		WG_LOG_RATECHECK(&wg->wg_ppsratecheck, LOG_DEBUG,
3241 		    "%s: Unexpected msg type: %u\n", if_name(&wg->wg_if),
3242 		    le32toh(wgm.wgm_type));
3243 		goto error;
3244 	}
3245 
3246 	/* Verify the mbuf chain is long enough for this type of message.  */
3247 	if (__predict_false(mbuflen < msglen)) {
3248 		WG_DLOG("Invalid msg size: mbuflen=%zu type=%u\n", mbuflen,
3249 		    le32toh(wgm.wgm_type));
3250 		goto error;
3251 	}
3252 
3253 	/* Make the message header contiguous if necessary.  */
3254 	if (__predict_false(m->m_len < msglen)) {
3255 		m = m_pullup(m, msglen);
3256 		if (m == NULL)
3257 			return NULL;
3258 	}
3259 
3260 	return m;
3261 
3262 error:
3263 	m_freem(m);
3264 	return NULL;
3265 }
3266 
3267 static void
3268 wg_handle_packet(struct wg_softc *wg, struct mbuf *m,
3269     const struct sockaddr *src)
3270 {
3271 	struct wg_msg *wgm;
3272 
3273 	KASSERT(curlwp->l_pflag & LP_BOUND);
3274 
3275 	m = wg_validate_msg_header(wg, m);
3276 	if (__predict_false(m == NULL))
3277 		return;
3278 
3279 	KASSERT(m->m_len >= sizeof(struct wg_msg));
3280 	wgm = mtod(m, struct wg_msg *);
3281 	switch (le32toh(wgm->wgm_type)) {
3282 	case WG_MSG_TYPE_INIT:
3283 		wg_handle_msg_init(wg, (struct wg_msg_init *)wgm, src);
3284 		break;
3285 	case WG_MSG_TYPE_RESP:
3286 		wg_handle_msg_resp(wg, (struct wg_msg_resp *)wgm, src);
3287 		break;
3288 	case WG_MSG_TYPE_COOKIE:
3289 		wg_handle_msg_cookie(wg, (struct wg_msg_cookie *)wgm);
3290 		break;
3291 	case WG_MSG_TYPE_DATA:
3292 		wg_handle_msg_data(wg, m, src);
3293 		/* wg_handle_msg_data frees m for us */
3294 		return;
3295 	default:
3296 		panic("invalid message type: %d", le32toh(wgm->wgm_type));
3297 	}
3298 
3299 	m_freem(m);
3300 }
3301 
3302 static void
3303 wg_receive_packets(struct wg_softc *wg, const int af)
3304 {
3305 
3306 	for (;;) {
3307 		int error, flags;
3308 		struct socket *so;
3309 		struct mbuf *m = NULL;
3310 		struct uio dummy_uio;
3311 		struct mbuf *paddr = NULL;
3312 		struct sockaddr *src;
3313 
3314 		so = wg_get_so_by_af(wg, af);
3315 		flags = MSG_DONTWAIT;
3316 		dummy_uio.uio_resid = 1000000000;
3317 
3318 		error = so->so_receive(so, &paddr, &dummy_uio, &m, NULL,
3319 		    &flags);
3320 		if (error || m == NULL) {
3321 			//if (error == EWOULDBLOCK)
3322 			return;
3323 		}
3324 
3325 		KASSERT(paddr != NULL);
3326 		KASSERT(paddr->m_len >= sizeof(struct sockaddr));
3327 		src = mtod(paddr, struct sockaddr *);
3328 
3329 		wg_handle_packet(wg, m, src);
3330 	}
3331 }
3332 
3333 static void
3334 wg_get_peer(struct wg_peer *wgp, struct psref *psref)
3335 {
3336 
3337 	psref_acquire(psref, &wgp->wgp_psref, wg_psref_class);
3338 }
3339 
3340 static void
3341 wg_put_peer(struct wg_peer *wgp, struct psref *psref)
3342 {
3343 
3344 	psref_release(psref, &wgp->wgp_psref, wg_psref_class);
3345 }
3346 
3347 static void
3348 wg_task_send_init_message(struct wg_softc *wg, struct wg_peer *wgp)
3349 {
3350 	struct wg_session *wgs;
3351 
3352 	WG_TRACE("WGP_TASK_SEND_INIT_MESSAGE");
3353 
3354 	KASSERT(mutex_owned(wgp->wgp_lock));
3355 
3356 	if (!atomic_load_acquire(&wgp->wgp_endpoint_available)) {
3357 		WGLOG(LOG_DEBUG, "%s: No endpoint available\n",
3358 		    if_name(&wg->wg_if));
3359 		/* XXX should do something? */
3360 		return;
3361 	}
3362 
3363 	/*
3364 	 * If we already have an established session, there's no need
3365 	 * to initiate a new one -- unless the rekey-after-time or
3366 	 * rekey-after-messages limits have passed.
3367 	 */
3368 	wgs = wgp->wgp_session_stable;
3369 	if (wgs->wgs_state == WGS_STATE_ESTABLISHED &&
3370 	    !atomic_load_relaxed(&wgs->wgs_force_rekey))
3371 		return;
3372 
3373 	/*
3374 	 * Ensure we're initiating a new session.  If the unstable
3375 	 * session is already INIT_ACTIVE or INIT_PASSIVE, this does
3376 	 * nothing.
3377 	 */
3378 	wg_send_handshake_msg_init(wg, wgp);
3379 }
3380 
3381 static void
3382 wg_task_retry_handshake(struct wg_softc *wg, struct wg_peer *wgp)
3383 {
3384 	struct wg_session *wgs;
3385 
3386 	WG_TRACE("WGP_TASK_RETRY_HANDSHAKE");
3387 
3388 	KASSERT(mutex_owned(wgp->wgp_lock));
3389 	KASSERT(wgp->wgp_handshake_start_time != 0);
3390 
3391 	wgs = wgp->wgp_session_unstable;
3392 	if (wgs->wgs_state != WGS_STATE_INIT_ACTIVE)
3393 		return;
3394 
3395 	/*
3396 	 * XXX no real need to assign a new index here, but we do need
3397 	 * to transition to UNKNOWN temporarily
3398 	 */
3399 	wg_put_session_index(wg, wgs);
3400 
3401 	/* [W] 6.4 Handshake Initiation Retransmission */
3402 	if ((time_uptime - wgp->wgp_handshake_start_time) >
3403 	    wg_rekey_attempt_time) {
3404 		/* Give up handshaking */
3405 		wgp->wgp_handshake_start_time = 0;
3406 		WG_TRACE("give up");
3407 
3408 		/*
3409 		 * If a new data packet comes, handshaking will be retried
3410 		 * and a new session would be established at that time,
3411 		 * however we don't want to send pending packets then.
3412 		 */
3413 		wg_purge_pending_packets(wgp);
3414 		return;
3415 	}
3416 
3417 	wg_task_send_init_message(wg, wgp);
3418 }
3419 
3420 static void
3421 wg_task_establish_session(struct wg_softc *wg, struct wg_peer *wgp)
3422 {
3423 	struct wg_session *wgs;
3424 
3425 	KASSERT(mutex_owned(wgp->wgp_lock));
3426 
3427 	wgs = wgp->wgp_session_unstable;
3428 	if (wgs->wgs_state != WGS_STATE_INIT_PASSIVE)
3429 		/* XXX Can this happen?  */
3430 		return;
3431 
3432 	wgs->wgs_time_last_data_sent = 0;
3433 	wgs->wgs_is_initiator = false;
3434 
3435 	/*
3436 	 * Session was already ready to receive data.  Transition from
3437 	 * INIT_PASSIVE to ESTABLISHED just so we can swap the
3438 	 * sessions.
3439 	 *
3440 	 * atomic_store_relaxed because this doesn't affect the data rx
3441 	 * path, wg_handle_msg_data -- changing from INIT_PASSIVE to
3442 	 * ESTABLISHED makes no difference to the data rx path, and the
3443 	 * transition to INIT_PASSIVE with store-release already
3444 	 * published the state needed by the data rx path.
3445 	 */
3446 	WG_DLOG("session[L=%"PRIx32" R=%"PRIx32"] -> WGS_STATE_ESTABLISHED\n",
3447 	    wgs->wgs_local_index, wgs->wgs_remote_index);
3448 	atomic_store_relaxed(&wgs->wgs_state, WGS_STATE_ESTABLISHED);
3449 	WG_TRACE("WGS_STATE_ESTABLISHED");
3450 
3451 	/*
3452 	 * Session is ready to send data too now that we have received
3453 	 * the peer initiator's first data packet.
3454 	 *
3455 	 * Swap the sessions to publish the new one as the stable
3456 	 * session for the data tx path, wg_output.
3457 	 */
3458 	wg_swap_sessions(wg, wgp);
3459 	KASSERT(wgs == wgp->wgp_session_stable);
3460 }
3461 
3462 static void
3463 wg_task_endpoint_changed(struct wg_softc *wg, struct wg_peer *wgp)
3464 {
3465 
3466 	WG_TRACE("WGP_TASK_ENDPOINT_CHANGED");
3467 
3468 	KASSERT(mutex_owned(wgp->wgp_lock));
3469 
3470 	if (atomic_load_relaxed(&wgp->wgp_endpoint_changing)) {
3471 		pserialize_perform(wgp->wgp_psz);
3472 		mutex_exit(wgp->wgp_lock);
3473 		psref_target_destroy(&wgp->wgp_endpoint0->wgsa_psref,
3474 		    wg_psref_class);
3475 		psref_target_init(&wgp->wgp_endpoint0->wgsa_psref,
3476 		    wg_psref_class);
3477 		mutex_enter(wgp->wgp_lock);
3478 		atomic_store_release(&wgp->wgp_endpoint_changing, 0);
3479 	}
3480 }
3481 
3482 static void
3483 wg_task_send_keepalive_message(struct wg_softc *wg, struct wg_peer *wgp)
3484 {
3485 	struct wg_session *wgs;
3486 
3487 	WG_TRACE("WGP_TASK_SEND_KEEPALIVE_MESSAGE");
3488 
3489 	KASSERT(mutex_owned(wgp->wgp_lock));
3490 
3491 	wgs = wgp->wgp_session_stable;
3492 	if (wgs->wgs_state != WGS_STATE_ESTABLISHED)
3493 		return;
3494 
3495 	wg_send_keepalive_msg(wgp, wgs);
3496 }
3497 
3498 static void
3499 wg_task_destroy_prev_session(struct wg_softc *wg, struct wg_peer *wgp)
3500 {
3501 	struct wg_session *wgs;
3502 	uint32_t age;
3503 
3504 	WG_TRACE("WGP_TASK_DESTROY_PREV_SESSION");
3505 
3506 	KASSERT(mutex_owned(wgp->wgp_lock));
3507 
3508 	/*
3509 	 * If theres's any previous unstable session, i.e., one that
3510 	 * was ESTABLISHED and is now DESTROYING, older than
3511 	 * reject-after-time, destroy it.  Upcoming sessions are still
3512 	 * in INIT_ACTIVE or INIT_PASSIVE -- we don't touch those here.
3513 	 */
3514 	wgs = wgp->wgp_session_unstable;
3515 	KASSERT(wgs->wgs_state != WGS_STATE_ESTABLISHED);
3516 	if (wgs->wgs_state == WGS_STATE_DESTROYING &&
3517 	    ((age = (time_uptime32 - wgs->wgs_time_established)) >=
3518 		wg_reject_after_time)) {
3519 		WG_DLOG("destroying past session %"PRIu32" sec old\n", age);
3520 		wg_put_session_index(wg, wgs);
3521 		KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d",
3522 		    wgs->wgs_state);
3523 	}
3524 
3525 	/*
3526 	 * If theres's any ESTABLISHED stable session older than
3527 	 * reject-after-time, destroy it.  (The stable session can also
3528 	 * be in UNKNOWN state -- nothing to do in that case)
3529 	 */
3530 	wgs = wgp->wgp_session_stable;
3531 	KASSERT(wgs->wgs_state != WGS_STATE_INIT_ACTIVE);
3532 	KASSERT(wgs->wgs_state != WGS_STATE_INIT_PASSIVE);
3533 	KASSERT(wgs->wgs_state != WGS_STATE_DESTROYING);
3534 	if (wgs->wgs_state == WGS_STATE_ESTABLISHED &&
3535 	    ((age = (time_uptime32 - wgs->wgs_time_established)) >=
3536 		wg_reject_after_time)) {
3537 		WG_DLOG("destroying current session %"PRIu32" sec old\n", age);
3538 		atomic_store_relaxed(&wgs->wgs_state, WGS_STATE_DESTROYING);
3539 		wg_put_session_index(wg, wgs);
3540 		KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d",
3541 		    wgs->wgs_state);
3542 	}
3543 
3544 	/*
3545 	 * If there's no sessions left, no need to have the timer run
3546 	 * until the next time around -- halt it.
3547 	 *
3548 	 * It is only ever scheduled with wgp_lock held or in the
3549 	 * callout itself, and callout_halt prevents rescheudling
3550 	 * itself, so this never races with rescheduling.
3551 	 */
3552 	if (wgp->wgp_session_unstable->wgs_state == WGS_STATE_UNKNOWN &&
3553 	    wgp->wgp_session_stable->wgs_state == WGS_STATE_UNKNOWN)
3554 		callout_halt(&wgp->wgp_session_dtor_timer, NULL);
3555 }
3556 
3557 static void
3558 wg_peer_work(struct work *wk, void *cookie)
3559 {
3560 	struct wg_peer *wgp = container_of(wk, struct wg_peer, wgp_work);
3561 	struct wg_softc *wg = wgp->wgp_sc;
3562 	unsigned int tasks;
3563 
3564 	mutex_enter(wgp->wgp_intr_lock);
3565 	while ((tasks = wgp->wgp_tasks) != 0) {
3566 		wgp->wgp_tasks = 0;
3567 		mutex_exit(wgp->wgp_intr_lock);
3568 
3569 		mutex_enter(wgp->wgp_lock);
3570 		if (ISSET(tasks, WGP_TASK_SEND_INIT_MESSAGE))
3571 			wg_task_send_init_message(wg, wgp);
3572 		if (ISSET(tasks, WGP_TASK_RETRY_HANDSHAKE))
3573 			wg_task_retry_handshake(wg, wgp);
3574 		if (ISSET(tasks, WGP_TASK_ESTABLISH_SESSION))
3575 			wg_task_establish_session(wg, wgp);
3576 		if (ISSET(tasks, WGP_TASK_ENDPOINT_CHANGED))
3577 			wg_task_endpoint_changed(wg, wgp);
3578 		if (ISSET(tasks, WGP_TASK_SEND_KEEPALIVE_MESSAGE))
3579 			wg_task_send_keepalive_message(wg, wgp);
3580 		if (ISSET(tasks, WGP_TASK_DESTROY_PREV_SESSION))
3581 			wg_task_destroy_prev_session(wg, wgp);
3582 		mutex_exit(wgp->wgp_lock);
3583 
3584 		mutex_enter(wgp->wgp_intr_lock);
3585 	}
3586 	mutex_exit(wgp->wgp_intr_lock);
3587 }
3588 
3589 static void
3590 wg_job(struct threadpool_job *job)
3591 {
3592 	struct wg_softc *wg = container_of(job, struct wg_softc, wg_job);
3593 	int bound, upcalls;
3594 
3595 	mutex_enter(wg->wg_intr_lock);
3596 	while ((upcalls = wg->wg_upcalls) != 0) {
3597 		wg->wg_upcalls = 0;
3598 		mutex_exit(wg->wg_intr_lock);
3599 		bound = curlwp_bind();
3600 		if (ISSET(upcalls, WG_UPCALL_INET))
3601 			wg_receive_packets(wg, AF_INET);
3602 		if (ISSET(upcalls, WG_UPCALL_INET6))
3603 			wg_receive_packets(wg, AF_INET6);
3604 		curlwp_bindx(bound);
3605 		mutex_enter(wg->wg_intr_lock);
3606 	}
3607 	threadpool_job_done(job);
3608 	mutex_exit(wg->wg_intr_lock);
3609 }
3610 
3611 static int
3612 wg_bind_port(struct wg_softc *wg, const uint16_t port)
3613 {
3614 	int error = 0;
3615 	uint16_t old_port = wg->wg_listen_port;
3616 
3617 	if (port != 0 && old_port == port)
3618 		return 0;
3619 
3620 #ifdef INET
3621 	struct sockaddr_in _sin, *sin = &_sin;
3622 	sin->sin_len = sizeof(*sin);
3623 	sin->sin_family = AF_INET;
3624 	sin->sin_addr.s_addr = INADDR_ANY;
3625 	sin->sin_port = htons(port);
3626 
3627 	error = sobind(wg->wg_so4, sintosa(sin), curlwp);
3628 	if (error)
3629 		return error;
3630 #endif
3631 
3632 #ifdef INET6
3633 	struct sockaddr_in6 _sin6, *sin6 = &_sin6;
3634 	sin6->sin6_len = sizeof(*sin6);
3635 	sin6->sin6_family = AF_INET6;
3636 	sin6->sin6_addr = in6addr_any;
3637 	sin6->sin6_port = htons(port);
3638 
3639 	error = sobind(wg->wg_so6, sin6tosa(sin6), curlwp);
3640 	if (error)
3641 		return error;
3642 #endif
3643 
3644 	wg->wg_listen_port = port;
3645 
3646 	return error;
3647 }
3648 
3649 static void
3650 wg_so_upcall(struct socket *so, void *cookie, int events, int waitflag)
3651 {
3652 	struct wg_softc *wg = cookie;
3653 	int reason;
3654 
3655 	reason = (so->so_proto->pr_domain->dom_family == AF_INET) ?
3656 	    WG_UPCALL_INET :
3657 	    WG_UPCALL_INET6;
3658 
3659 	mutex_enter(wg->wg_intr_lock);
3660 	wg->wg_upcalls |= reason;
3661 	threadpool_schedule_job(wg->wg_threadpool, &wg->wg_job);
3662 	mutex_exit(wg->wg_intr_lock);
3663 }
3664 
3665 /*
3666  * wg_overudp_cb(&m, offset, so, src, arg)
3667  *
3668  *	Callback for incoming UDP packets in high-priority
3669  *	packet-processing path.
3670  *
3671  *	Three cases:
3672  *
3673  *	- Data packet.  Consumed here for high-priority handling.
3674  *	  => Returns 1 and takes ownership of m.
3675  *
3676  *	- Handshake packet.  Defer to thread context via so_receive in
3677  *	  wg_receive_packets.
3678  *	  => Returns 0 and leaves caller with ownership of m.
3679  *
3680  *	- Invalid.  Dropped on the floor and freed.
3681  *	  => Returns -1 and takes ownership of m (frees m).
3682  */
3683 static int
3684 wg_overudp_cb(struct mbuf **mp, int offset, struct socket *so,
3685     struct sockaddr *src, void *arg)
3686 {
3687 	struct wg_softc *wg = arg;
3688 	struct wg_msg wgm;
3689 	struct mbuf *m = *mp;
3690 
3691 	WG_TRACE("enter");
3692 
3693 	/* Verify the mbuf chain is long enough to have a wg msg header.  */
3694 	KASSERT(offset <= m_length(m));
3695 	if (__predict_false(m_length(m) - offset < sizeof(struct wg_msg))) {
3696 		/* drop on the floor */
3697 		m_freem(m);
3698 		*mp = NULL;
3699 		return -1;	/* dropped */
3700 	}
3701 
3702 	/*
3703 	 * Copy the message header (32-bit message type) out -- we'll
3704 	 * worry about contiguity and alignment later.
3705 	 */
3706 	m_copydata(m, offset, sizeof(struct wg_msg), &wgm);
3707 	WG_DLOG("type=%d\n", le32toh(wgm.wgm_type));
3708 
3709 	/*
3710 	 * Handle DATA packets promptly as they arrive, if they are in
3711 	 * an active session.  Other packets may require expensive
3712 	 * public-key crypto and are not as sensitive to latency, so
3713 	 * defer them to the worker thread.
3714 	 */
3715 	switch (le32toh(wgm.wgm_type)) {
3716 	case WG_MSG_TYPE_DATA:
3717 		/* handle immediately */
3718 		m_adj(m, offset);
3719 		if (__predict_false(m->m_len < sizeof(struct wg_msg_data))) {
3720 			m = m_pullup(m, sizeof(struct wg_msg_data));
3721 			if (m == NULL) {
3722 				*mp = NULL;
3723 				return -1; /* dropped */
3724 			}
3725 		}
3726 		wg_handle_msg_data(wg, m, src);
3727 		*mp = NULL;
3728 		return 1;	/* consumed */
3729 	case WG_MSG_TYPE_INIT:
3730 	case WG_MSG_TYPE_RESP:
3731 	case WG_MSG_TYPE_COOKIE:
3732 		/* pass through to so_receive in wg_receive_packets */
3733 		return 0;	/* passthrough */
3734 	default:
3735 		/* drop on the floor */
3736 		m_freem(m);
3737 		*mp = NULL;
3738 		return -1;	/* dropped */
3739 	}
3740 }
3741 
3742 static int
3743 wg_socreate(struct wg_softc *wg, int af, struct socket **sop)
3744 {
3745 	int error;
3746 	struct socket *so;
3747 
3748 	error = socreate(af, &so, SOCK_DGRAM, 0, curlwp, NULL);
3749 	if (error != 0)
3750 		return error;
3751 
3752 	solock(so);
3753 	so->so_upcallarg = wg;
3754 	so->so_upcall = wg_so_upcall;
3755 	so->so_rcv.sb_flags |= SB_UPCALL;
3756 	inpcb_register_overudp_cb(sotoinpcb(so), wg_overudp_cb, wg);
3757 	sounlock(so);
3758 
3759 	*sop = so;
3760 
3761 	return 0;
3762 }
3763 
3764 static bool
3765 wg_session_hit_limits(struct wg_session *wgs)
3766 {
3767 
3768 	/*
3769 	 * [W] 6.2: Transport Message Limits
3770 	 * "After REJECT-AFTER-MESSAGES transport data messages or after the
3771 	 *  current secure session is REJECT-AFTER-TIME seconds old, whichever
3772 	 *  comes first, WireGuard will refuse to send or receive any more
3773 	 *  transport data messages using the current secure session, ..."
3774 	 */
3775 	KASSERT(wgs->wgs_time_established != 0 || time_uptime > UINT32_MAX);
3776 	if (time_uptime32 - wgs->wgs_time_established > wg_reject_after_time) {
3777 		WG_DLOG("The session hits REJECT_AFTER_TIME\n");
3778 		return true;
3779 	} else if (wg_session_get_send_counter(wgs) >
3780 	    wg_reject_after_messages) {
3781 		WG_DLOG("The session hits REJECT_AFTER_MESSAGES\n");
3782 		return true;
3783 	}
3784 
3785 	return false;
3786 }
3787 
3788 static void
3789 wgintr(void *cookie)
3790 {
3791 	struct wg_peer *wgp;
3792 	struct wg_session *wgs;
3793 	struct mbuf *m;
3794 	struct psref psref;
3795 
3796 	while ((m = pktq_dequeue(wg_pktq)) != NULL) {
3797 		wgp = M_GETCTX(m, struct wg_peer *);
3798 		if ((wgs = wg_get_stable_session(wgp, &psref)) == NULL) {
3799 			/*
3800 			 * No established session.  If we're the first
3801 			 * to try sending data, schedule a handshake
3802 			 * and queue the packet for when the handshake
3803 			 * is done; otherwise just drop the packet and
3804 			 * let the ongoing handshake attempt continue.
3805 			 * We could queue more data packets but it's
3806 			 * not clear that's worthwhile.
3807 			 */
3808 			WG_TRACE("no stable session");
3809 			membar_release();
3810 			if ((m = atomic_swap_ptr(&wgp->wgp_pending, m)) ==
3811 			    NULL) {
3812 				WG_TRACE("queued first packet;"
3813 				    " init handshake");
3814 				wg_schedule_peer_task(wgp,
3815 				    WGP_TASK_SEND_INIT_MESSAGE);
3816 			} else {
3817 				membar_acquire();
3818 				WG_TRACE("first packet already queued,"
3819 				    " dropping");
3820 			}
3821 			goto next0;
3822 		}
3823 		if (__predict_false(wg_session_hit_limits(wgs))) {
3824 			WG_TRACE("stable session hit limits");
3825 			membar_release();
3826 			if ((m = atomic_swap_ptr(&wgp->wgp_pending, m)) ==
3827 			    NULL) {
3828 				WG_TRACE("queued first packet in a while;"
3829 				    " reinit handshake");
3830 				atomic_store_relaxed(&wgs->wgs_force_rekey,
3831 				    true);
3832 				wg_schedule_peer_task(wgp,
3833 				    WGP_TASK_SEND_INIT_MESSAGE);
3834 			} else {
3835 				membar_acquire();
3836 				WG_TRACE("first packet in already queued,"
3837 				    " dropping");
3838 			}
3839 			goto next1;
3840 		}
3841 		wg_send_data_msg(wgp, wgs, m);
3842 		m = NULL;	/* consumed */
3843 next1:		wg_put_session(wgs, &psref);
3844 next0:		m_freem(m);
3845 		/* XXX Yield to avoid userland starvation?  */
3846 	}
3847 }
3848 
3849 static void
3850 wg_purge_pending_packets(struct wg_peer *wgp)
3851 {
3852 	struct mbuf *m;
3853 
3854 	m = atomic_swap_ptr(&wgp->wgp_pending, NULL);
3855 	membar_acquire();     /* matches membar_release in wgintr */
3856 	m_freem(m);
3857 #ifdef ALTQ
3858 	wg_start(&wgp->wgp_sc->wg_if);
3859 #endif
3860 	pktq_barrier(wg_pktq);
3861 }
3862 
3863 static void
3864 wg_handshake_timeout_timer(void *arg)
3865 {
3866 	struct wg_peer *wgp = arg;
3867 
3868 	WG_TRACE("enter");
3869 
3870 	wg_schedule_peer_task(wgp, WGP_TASK_RETRY_HANDSHAKE);
3871 }
3872 
3873 static struct wg_peer *
3874 wg_alloc_peer(struct wg_softc *wg)
3875 {
3876 	struct wg_peer *wgp;
3877 
3878 	wgp = kmem_zalloc(sizeof(*wgp), KM_SLEEP);
3879 
3880 	wgp->wgp_sc = wg;
3881 	callout_init(&wgp->wgp_handshake_timeout_timer, CALLOUT_MPSAFE);
3882 	callout_setfunc(&wgp->wgp_handshake_timeout_timer,
3883 	    wg_handshake_timeout_timer, wgp);
3884 	callout_init(&wgp->wgp_session_dtor_timer, CALLOUT_MPSAFE);
3885 	callout_setfunc(&wgp->wgp_session_dtor_timer,
3886 	    wg_session_dtor_timer, wgp);
3887 	PSLIST_ENTRY_INIT(wgp, wgp_peerlist_entry);
3888 	wgp->wgp_endpoint_changing = false;
3889 	wgp->wgp_endpoint_available = false;
3890 	wgp->wgp_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
3891 	wgp->wgp_intr_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SOFTNET);
3892 	wgp->wgp_psz = pserialize_create();
3893 	psref_target_init(&wgp->wgp_psref, wg_psref_class);
3894 
3895 	wgp->wgp_endpoint = kmem_zalloc(sizeof(*wgp->wgp_endpoint), KM_SLEEP);
3896 	wgp->wgp_endpoint0 = kmem_zalloc(sizeof(*wgp->wgp_endpoint0), KM_SLEEP);
3897 	psref_target_init(&wgp->wgp_endpoint->wgsa_psref, wg_psref_class);
3898 	psref_target_init(&wgp->wgp_endpoint0->wgsa_psref, wg_psref_class);
3899 
3900 	struct wg_session *wgs;
3901 	wgp->wgp_session_stable =
3902 	    kmem_zalloc(sizeof(*wgp->wgp_session_stable), KM_SLEEP);
3903 	wgp->wgp_session_unstable =
3904 	    kmem_zalloc(sizeof(*wgp->wgp_session_unstable), KM_SLEEP);
3905 	wgs = wgp->wgp_session_stable;
3906 	wgs->wgs_peer = wgp;
3907 	wgs->wgs_state = WGS_STATE_UNKNOWN;
3908 	psref_target_init(&wgs->wgs_psref, wg_psref_class);
3909 #ifndef __HAVE_ATOMIC64_LOADSTORE
3910 	mutex_init(&wgs->wgs_send_counter_lock, MUTEX_DEFAULT, IPL_SOFTNET);
3911 #endif
3912 	wgs->wgs_recvwin = kmem_zalloc(sizeof(*wgs->wgs_recvwin), KM_SLEEP);
3913 	mutex_init(&wgs->wgs_recvwin->lock, MUTEX_DEFAULT, IPL_SOFTNET);
3914 
3915 	wgs = wgp->wgp_session_unstable;
3916 	wgs->wgs_peer = wgp;
3917 	wgs->wgs_state = WGS_STATE_UNKNOWN;
3918 	psref_target_init(&wgs->wgs_psref, wg_psref_class);
3919 #ifndef __HAVE_ATOMIC64_LOADSTORE
3920 	mutex_init(&wgs->wgs_send_counter_lock, MUTEX_DEFAULT, IPL_SOFTNET);
3921 #endif
3922 	wgs->wgs_recvwin = kmem_zalloc(sizeof(*wgs->wgs_recvwin), KM_SLEEP);
3923 	mutex_init(&wgs->wgs_recvwin->lock, MUTEX_DEFAULT, IPL_SOFTNET);
3924 
3925 	return wgp;
3926 }
3927 
3928 static void
3929 wg_destroy_peer(struct wg_peer *wgp)
3930 {
3931 	struct wg_session *wgs;
3932 	struct wg_softc *wg = wgp->wgp_sc;
3933 
3934 	/* Prevent new packets from this peer on any source address.  */
3935 	rw_enter(wg->wg_rwlock, RW_WRITER);
3936 	for (int i = 0; i < wgp->wgp_n_allowedips; i++) {
3937 		struct wg_allowedip *wga = &wgp->wgp_allowedips[i];
3938 		struct radix_node_head *rnh = wg_rnh(wg, wga->wga_family);
3939 		struct radix_node *rn;
3940 
3941 		KASSERT(rnh != NULL);
3942 		rn = rnh->rnh_deladdr(&wga->wga_sa_addr,
3943 		    &wga->wga_sa_mask, rnh);
3944 		if (rn == NULL) {
3945 			char addrstr[128];
3946 			sockaddr_format(&wga->wga_sa_addr, addrstr,
3947 			    sizeof(addrstr));
3948 			WGLOG(LOG_WARNING, "%s: Couldn't delete %s",
3949 			    if_name(&wg->wg_if), addrstr);
3950 		}
3951 	}
3952 	rw_exit(wg->wg_rwlock);
3953 
3954 	/* Purge pending packets.  */
3955 	wg_purge_pending_packets(wgp);
3956 
3957 	/* Halt all packet processing and timeouts.  */
3958 	callout_halt(&wgp->wgp_handshake_timeout_timer, NULL);
3959 	callout_halt(&wgp->wgp_session_dtor_timer, NULL);
3960 
3961 	/* Wait for any queued work to complete.  */
3962 	workqueue_wait(wg_wq, &wgp->wgp_work);
3963 
3964 	wgs = wgp->wgp_session_unstable;
3965 	if (wgs->wgs_state != WGS_STATE_UNKNOWN) {
3966 		mutex_enter(wgp->wgp_lock);
3967 		wg_destroy_session(wg, wgs);
3968 		mutex_exit(wgp->wgp_lock);
3969 	}
3970 	mutex_destroy(&wgs->wgs_recvwin->lock);
3971 	kmem_free(wgs->wgs_recvwin, sizeof(*wgs->wgs_recvwin));
3972 #ifndef __HAVE_ATOMIC64_LOADSTORE
3973 	mutex_destroy(&wgs->wgs_send_counter_lock);
3974 #endif
3975 	kmem_free(wgs, sizeof(*wgs));
3976 
3977 	wgs = wgp->wgp_session_stable;
3978 	if (wgs->wgs_state != WGS_STATE_UNKNOWN) {
3979 		mutex_enter(wgp->wgp_lock);
3980 		wg_destroy_session(wg, wgs);
3981 		mutex_exit(wgp->wgp_lock);
3982 	}
3983 	mutex_destroy(&wgs->wgs_recvwin->lock);
3984 	kmem_free(wgs->wgs_recvwin, sizeof(*wgs->wgs_recvwin));
3985 #ifndef __HAVE_ATOMIC64_LOADSTORE
3986 	mutex_destroy(&wgs->wgs_send_counter_lock);
3987 #endif
3988 	kmem_free(wgs, sizeof(*wgs));
3989 
3990 	psref_target_destroy(&wgp->wgp_endpoint->wgsa_psref, wg_psref_class);
3991 	psref_target_destroy(&wgp->wgp_endpoint0->wgsa_psref, wg_psref_class);
3992 	kmem_free(wgp->wgp_endpoint, sizeof(*wgp->wgp_endpoint));
3993 	kmem_free(wgp->wgp_endpoint0, sizeof(*wgp->wgp_endpoint0));
3994 
3995 	pserialize_destroy(wgp->wgp_psz);
3996 	mutex_obj_free(wgp->wgp_intr_lock);
3997 	mutex_obj_free(wgp->wgp_lock);
3998 
3999 	kmem_free(wgp, sizeof(*wgp));
4000 }
4001 
4002 static void
4003 wg_destroy_all_peers(struct wg_softc *wg)
4004 {
4005 	struct wg_peer *wgp, *wgp0 __diagused;
4006 	void *garbage_byname, *garbage_bypubkey;
4007 
4008 restart:
4009 	garbage_byname = garbage_bypubkey = NULL;
4010 	mutex_enter(wg->wg_lock);
4011 	WG_PEER_WRITER_FOREACH(wgp, wg) {
4012 		if (wgp->wgp_name[0]) {
4013 			wgp0 = thmap_del(wg->wg_peers_byname, wgp->wgp_name,
4014 			    strlen(wgp->wgp_name));
4015 			KASSERT(wgp0 == wgp);
4016 			garbage_byname = thmap_stage_gc(wg->wg_peers_byname);
4017 		}
4018 		wgp0 = thmap_del(wg->wg_peers_bypubkey, wgp->wgp_pubkey,
4019 		    sizeof(wgp->wgp_pubkey));
4020 		KASSERT(wgp0 == wgp);
4021 		garbage_bypubkey = thmap_stage_gc(wg->wg_peers_bypubkey);
4022 		WG_PEER_WRITER_REMOVE(wgp);
4023 		wg->wg_npeers--;
4024 		mutex_enter(wgp->wgp_lock);
4025 		pserialize_perform(wgp->wgp_psz);
4026 		mutex_exit(wgp->wgp_lock);
4027 		PSLIST_ENTRY_DESTROY(wgp, wgp_peerlist_entry);
4028 		break;
4029 	}
4030 	mutex_exit(wg->wg_lock);
4031 
4032 	if (wgp == NULL)
4033 		return;
4034 
4035 	psref_target_destroy(&wgp->wgp_psref, wg_psref_class);
4036 
4037 	wg_destroy_peer(wgp);
4038 	thmap_gc(wg->wg_peers_byname, garbage_byname);
4039 	thmap_gc(wg->wg_peers_bypubkey, garbage_bypubkey);
4040 
4041 	goto restart;
4042 }
4043 
4044 static int
4045 wg_destroy_peer_name(struct wg_softc *wg, const char *name)
4046 {
4047 	struct wg_peer *wgp, *wgp0 __diagused;
4048 	void *garbage_byname, *garbage_bypubkey;
4049 
4050 	mutex_enter(wg->wg_lock);
4051 	wgp = thmap_del(wg->wg_peers_byname, name, strlen(name));
4052 	if (wgp != NULL) {
4053 		wgp0 = thmap_del(wg->wg_peers_bypubkey, wgp->wgp_pubkey,
4054 		    sizeof(wgp->wgp_pubkey));
4055 		KASSERT(wgp0 == wgp);
4056 		garbage_byname = thmap_stage_gc(wg->wg_peers_byname);
4057 		garbage_bypubkey = thmap_stage_gc(wg->wg_peers_bypubkey);
4058 		WG_PEER_WRITER_REMOVE(wgp);
4059 		wg->wg_npeers--;
4060 		if (wg->wg_npeers == 0)
4061 			if_link_state_change(&wg->wg_if, LINK_STATE_DOWN);
4062 		mutex_enter(wgp->wgp_lock);
4063 		pserialize_perform(wgp->wgp_psz);
4064 		mutex_exit(wgp->wgp_lock);
4065 		PSLIST_ENTRY_DESTROY(wgp, wgp_peerlist_entry);
4066 	}
4067 	mutex_exit(wg->wg_lock);
4068 
4069 	if (wgp == NULL)
4070 		return ENOENT;
4071 
4072 	psref_target_destroy(&wgp->wgp_psref, wg_psref_class);
4073 
4074 	wg_destroy_peer(wgp);
4075 	thmap_gc(wg->wg_peers_byname, garbage_byname);
4076 	thmap_gc(wg->wg_peers_bypubkey, garbage_bypubkey);
4077 
4078 	return 0;
4079 }
4080 
4081 static int
4082 wg_if_attach(struct wg_softc *wg)
4083 {
4084 
4085 	wg->wg_if.if_addrlen = 0;
4086 	wg->wg_if.if_mtu = WG_MTU;
4087 	wg->wg_if.if_flags = IFF_MULTICAST;
4088 	wg->wg_if.if_extflags = IFEF_MPSAFE;
4089 	wg->wg_if.if_ioctl = wg_ioctl;
4090 	wg->wg_if.if_output = wg_output;
4091 	wg->wg_if.if_init = wg_init;
4092 #ifdef ALTQ
4093 	wg->wg_if.if_start = wg_start;
4094 #endif
4095 	wg->wg_if.if_stop = wg_stop;
4096 	wg->wg_if.if_type = IFT_OTHER;
4097 	wg->wg_if.if_dlt = DLT_NULL;
4098 	wg->wg_if.if_softc = wg;
4099 #ifdef ALTQ
4100 	IFQ_SET_READY(&wg->wg_if.if_snd);
4101 #endif
4102 	if_initialize(&wg->wg_if);
4103 
4104 	wg->wg_if.if_link_state = LINK_STATE_DOWN;
4105 	if_alloc_sadl(&wg->wg_if);
4106 	if_register(&wg->wg_if);
4107 
4108 	bpf_attach(&wg->wg_if, DLT_NULL, sizeof(uint32_t));
4109 
4110 	return 0;
4111 }
4112 
4113 static void
4114 wg_if_detach(struct wg_softc *wg)
4115 {
4116 	struct ifnet *ifp = &wg->wg_if;
4117 
4118 	bpf_detach(ifp);
4119 	if_detach(ifp);
4120 }
4121 
4122 static int
4123 wg_clone_create(struct if_clone *ifc, int unit)
4124 {
4125 	struct wg_softc *wg;
4126 	int error;
4127 
4128 	wg_guarantee_initialized();
4129 
4130 	error = wg_count_inc();
4131 	if (error)
4132 		return error;
4133 
4134 	wg = kmem_zalloc(sizeof(*wg), KM_SLEEP);
4135 
4136 	if_initname(&wg->wg_if, ifc->ifc_name, unit);
4137 
4138 	PSLIST_INIT(&wg->wg_peers);
4139 	wg->wg_peers_bypubkey = thmap_create(0, NULL, THMAP_NOCOPY);
4140 	wg->wg_peers_byname = thmap_create(0, NULL, THMAP_NOCOPY);
4141 	wg->wg_sessions_byindex = thmap_create(0, NULL, THMAP_NOCOPY);
4142 	wg->wg_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
4143 	wg->wg_intr_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SOFTNET);
4144 	wg->wg_rwlock = rw_obj_alloc();
4145 	threadpool_job_init(&wg->wg_job, wg_job, wg->wg_intr_lock,
4146 	    "%s", if_name(&wg->wg_if));
4147 	wg->wg_ops = &wg_ops_rumpkernel;
4148 
4149 	error = threadpool_get(&wg->wg_threadpool, PRI_NONE);
4150 	if (error)
4151 		goto fail0;
4152 
4153 #ifdef INET
4154 	error = wg_socreate(wg, AF_INET, &wg->wg_so4);
4155 	if (error)
4156 		goto fail1;
4157 	rn_inithead((void **)&wg->wg_rtable_ipv4,
4158 	    offsetof(struct sockaddr_in, sin_addr) * NBBY);
4159 #endif
4160 #ifdef INET6
4161 	error = wg_socreate(wg, AF_INET6, &wg->wg_so6);
4162 	if (error)
4163 		goto fail2;
4164 	rn_inithead((void **)&wg->wg_rtable_ipv6,
4165 	    offsetof(struct sockaddr_in6, sin6_addr) * NBBY);
4166 #endif
4167 
4168 	error = wg_if_attach(wg);
4169 	if (error)
4170 		goto fail3;
4171 
4172 	return 0;
4173 
4174 fail4: __unused
4175 	wg_destroy_all_peers(wg);
4176 	wg_if_detach(wg);
4177 fail3:
4178 #ifdef INET6
4179 	solock(wg->wg_so6);
4180 	wg->wg_so6->so_rcv.sb_flags &= ~SB_UPCALL;
4181 	sounlock(wg->wg_so6);
4182 #endif
4183 #ifdef INET
4184 	solock(wg->wg_so4);
4185 	wg->wg_so4->so_rcv.sb_flags &= ~SB_UPCALL;
4186 	sounlock(wg->wg_so4);
4187 #endif
4188 	mutex_enter(wg->wg_intr_lock);
4189 	threadpool_cancel_job(wg->wg_threadpool, &wg->wg_job);
4190 	mutex_exit(wg->wg_intr_lock);
4191 #ifdef INET6
4192 	if (wg->wg_rtable_ipv6 != NULL)
4193 		free(wg->wg_rtable_ipv6, M_RTABLE);
4194 	soclose(wg->wg_so6);
4195 fail2:
4196 #endif
4197 #ifdef INET
4198 	if (wg->wg_rtable_ipv4 != NULL)
4199 		free(wg->wg_rtable_ipv4, M_RTABLE);
4200 	soclose(wg->wg_so4);
4201 fail1:
4202 #endif
4203 	threadpool_put(wg->wg_threadpool, PRI_NONE);
4204 fail0:	threadpool_job_destroy(&wg->wg_job);
4205 	rw_obj_free(wg->wg_rwlock);
4206 	mutex_obj_free(wg->wg_intr_lock);
4207 	mutex_obj_free(wg->wg_lock);
4208 	thmap_destroy(wg->wg_sessions_byindex);
4209 	thmap_destroy(wg->wg_peers_byname);
4210 	thmap_destroy(wg->wg_peers_bypubkey);
4211 	PSLIST_DESTROY(&wg->wg_peers);
4212 	kmem_free(wg, sizeof(*wg));
4213 	wg_count_dec();
4214 	return error;
4215 }
4216 
4217 static int
4218 wg_clone_destroy(struct ifnet *ifp)
4219 {
4220 	struct wg_softc *wg = container_of(ifp, struct wg_softc, wg_if);
4221 
4222 #ifdef WG_RUMPKERNEL
4223 	if (wg_user_mode(wg)) {
4224 		rumpuser_wg_destroy(wg->wg_user);
4225 		wg->wg_user = NULL;
4226 	}
4227 #endif
4228 
4229 	wg_destroy_all_peers(wg);
4230 	wg_if_detach(wg);
4231 #ifdef INET6
4232 	solock(wg->wg_so6);
4233 	wg->wg_so6->so_rcv.sb_flags &= ~SB_UPCALL;
4234 	sounlock(wg->wg_so6);
4235 #endif
4236 #ifdef INET
4237 	solock(wg->wg_so4);
4238 	wg->wg_so4->so_rcv.sb_flags &= ~SB_UPCALL;
4239 	sounlock(wg->wg_so4);
4240 #endif
4241 	mutex_enter(wg->wg_intr_lock);
4242 	threadpool_cancel_job(wg->wg_threadpool, &wg->wg_job);
4243 	mutex_exit(wg->wg_intr_lock);
4244 #ifdef INET6
4245 	if (wg->wg_rtable_ipv6 != NULL)
4246 		free(wg->wg_rtable_ipv6, M_RTABLE);
4247 	soclose(wg->wg_so6);
4248 #endif
4249 #ifdef INET
4250 	if (wg->wg_rtable_ipv4 != NULL)
4251 		free(wg->wg_rtable_ipv4, M_RTABLE);
4252 	soclose(wg->wg_so4);
4253 #endif
4254 	threadpool_put(wg->wg_threadpool, PRI_NONE);
4255 	threadpool_job_destroy(&wg->wg_job);
4256 	rw_obj_free(wg->wg_rwlock);
4257 	mutex_obj_free(wg->wg_intr_lock);
4258 	mutex_obj_free(wg->wg_lock);
4259 	thmap_destroy(wg->wg_sessions_byindex);
4260 	thmap_destroy(wg->wg_peers_byname);
4261 	thmap_destroy(wg->wg_peers_bypubkey);
4262 	PSLIST_DESTROY(&wg->wg_peers);
4263 	kmem_free(wg, sizeof(*wg));
4264 	wg_count_dec();
4265 
4266 	return 0;
4267 }
4268 
4269 static struct wg_peer *
4270 wg_pick_peer_by_sa(struct wg_softc *wg, const struct sockaddr *sa,
4271     struct psref *psref)
4272 {
4273 	struct radix_node_head *rnh;
4274 	struct radix_node *rn;
4275 	struct wg_peer *wgp = NULL;
4276 	struct wg_allowedip *wga;
4277 
4278 #ifdef WG_DEBUG_LOG
4279 	char addrstr[128];
4280 	sockaddr_format(sa, addrstr, sizeof(addrstr));
4281 	WG_DLOG("sa=%s\n", addrstr);
4282 #endif
4283 
4284 	rw_enter(wg->wg_rwlock, RW_READER);
4285 
4286 	rnh = wg_rnh(wg, sa->sa_family);
4287 	if (rnh == NULL)
4288 		goto out;
4289 
4290 	rn = rnh->rnh_matchaddr(sa, rnh);
4291 	if (rn == NULL || (rn->rn_flags & RNF_ROOT) != 0)
4292 		goto out;
4293 
4294 	WG_TRACE("success");
4295 
4296 	wga = container_of(rn, struct wg_allowedip, wga_nodes[0]);
4297 	wgp = wga->wga_peer;
4298 	wg_get_peer(wgp, psref);
4299 
4300 out:
4301 	rw_exit(wg->wg_rwlock);
4302 	return wgp;
4303 }
4304 
4305 static void
4306 wg_fill_msg_data(struct wg_softc *wg, struct wg_peer *wgp,
4307     struct wg_session *wgs, struct wg_msg_data *wgmd)
4308 {
4309 
4310 	memset(wgmd, 0, sizeof(*wgmd));
4311 	wgmd->wgmd_type = htole32(WG_MSG_TYPE_DATA);
4312 	wgmd->wgmd_receiver = wgs->wgs_remote_index;
4313 	/* [W] 5.4.6: msg.counter := Nm^send */
4314 	/* [W] 5.4.6: Nm^send := Nm^send + 1 */
4315 	wgmd->wgmd_counter = htole64(wg_session_inc_send_counter(wgs));
4316 	WG_DLOG("counter=%"PRIu64"\n", le64toh(wgmd->wgmd_counter));
4317 }
4318 
4319 static int
4320 wg_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
4321     const struct rtentry *rt)
4322 {
4323 	struct wg_softc *wg = ifp->if_softc;
4324 	struct wg_peer *wgp = NULL;
4325 	struct psref wgp_psref;
4326 	int bound;
4327 	int error;
4328 
4329 	bound = curlwp_bind();
4330 
4331 	/* TODO make the nest limit configurable via sysctl */
4332 	error = if_tunnel_check_nesting(ifp, m, 1);
4333 	if (error) {
4334 		WGLOG(LOG_ERR,
4335 		    "%s: tunneling loop detected and packet dropped\n",
4336 		    if_name(&wg->wg_if));
4337 		goto out0;
4338 	}
4339 
4340 #ifdef ALTQ
4341 	bool altq = atomic_load_relaxed(&ifp->if_snd.altq_flags)
4342 	    & ALTQF_ENABLED;
4343 	if (altq)
4344 		IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family);
4345 #endif
4346 
4347 	bpf_mtap_af(ifp, dst->sa_family, m, BPF_D_OUT);
4348 
4349 	m->m_flags &= ~(M_BCAST|M_MCAST);
4350 
4351 	wgp = wg_pick_peer_by_sa(wg, dst, &wgp_psref);
4352 	if (wgp == NULL) {
4353 		WG_TRACE("peer not found");
4354 		error = EHOSTUNREACH;
4355 		goto out0;
4356 	}
4357 
4358 	/* Clear checksum-offload flags. */
4359 	m->m_pkthdr.csum_flags = 0;
4360 	m->m_pkthdr.csum_data = 0;
4361 
4362 	/* Toss it in the queue.  */
4363 #ifdef ALTQ
4364 	if (altq) {
4365 		mutex_enter(ifp->if_snd.ifq_lock);
4366 		if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
4367 			M_SETCTX(m, wgp);
4368 			ALTQ_ENQUEUE(&ifp->if_snd, m, error);
4369 			m = NULL; /* consume */
4370 		}
4371 		mutex_exit(ifp->if_snd.ifq_lock);
4372 		if (m == NULL) {
4373 			wg_start(ifp);
4374 			goto out1;
4375 		}
4376 	}
4377 #endif
4378 	kpreempt_disable();
4379 	const uint32_t h = curcpu()->ci_index;	// pktq_rps_hash(m)
4380 	M_SETCTX(m, wgp);
4381 	if (__predict_false(!pktq_enqueue(wg_pktq, m, h))) {
4382 		WGLOG(LOG_ERR, "%s: pktq full, dropping\n",
4383 		    if_name(&wg->wg_if));
4384 		error = ENOBUFS;
4385 		goto out2;
4386 	}
4387 	m = NULL;		/* consumed */
4388 	error = 0;
4389 out2:	kpreempt_enable();
4390 
4391 #ifdef ALTQ
4392 out1:
4393 #endif
4394 	wg_put_peer(wgp, &wgp_psref);
4395 out0:	m_freem(m);
4396 	curlwp_bindx(bound);
4397 	return error;
4398 }
4399 
4400 static int
4401 wg_send_udp(struct wg_peer *wgp, struct mbuf *m)
4402 {
4403 	struct psref psref;
4404 	struct wg_sockaddr *wgsa;
4405 	int error;
4406 	struct socket *so;
4407 
4408 	wgsa = wg_get_endpoint_sa(wgp, &psref);
4409 	so = wg_get_so_by_peer(wgp, wgsa);
4410 	solock(so);
4411 	switch (wgsatosa(wgsa)->sa_family) {
4412 #ifdef INET
4413 	case AF_INET:
4414 		error = udp_send(so, m, wgsatosa(wgsa), NULL, curlwp);
4415 		break;
4416 #endif
4417 #ifdef INET6
4418 	case AF_INET6:
4419 		error = udp6_output(sotoinpcb(so), m, wgsatosin6(wgsa),
4420 		    NULL, curlwp);
4421 		break;
4422 #endif
4423 	default:
4424 		m_freem(m);
4425 		error = EPFNOSUPPORT;
4426 	}
4427 	sounlock(so);
4428 	wg_put_sa(wgp, wgsa, &psref);
4429 
4430 	return error;
4431 }
4432 
4433 /* Inspired by pppoe_get_mbuf */
4434 static struct mbuf *
4435 wg_get_mbuf(size_t leading_len, size_t len)
4436 {
4437 	struct mbuf *m;
4438 
4439 	KASSERT(leading_len <= MCLBYTES);
4440 	KASSERT(len <= MCLBYTES - leading_len);
4441 
4442 	m = m_gethdr(M_DONTWAIT, MT_DATA);
4443 	if (m == NULL)
4444 		return NULL;
4445 	if (len + leading_len > MHLEN) {
4446 		m_clget(m, M_DONTWAIT);
4447 		if ((m->m_flags & M_EXT) == 0) {
4448 			m_free(m);
4449 			return NULL;
4450 		}
4451 	}
4452 	m->m_data += leading_len;
4453 	m->m_pkthdr.len = m->m_len = len;
4454 
4455 	return m;
4456 }
4457 
4458 static void
4459 wg_send_data_msg(struct wg_peer *wgp, struct wg_session *wgs, struct mbuf *m)
4460 {
4461 	struct wg_softc *wg = wgp->wgp_sc;
4462 	int error;
4463 	size_t inner_len, padded_len, encrypted_len;
4464 	char *padded_buf = NULL;
4465 	size_t mlen;
4466 	struct wg_msg_data *wgmd;
4467 	bool free_padded_buf = false;
4468 	struct mbuf *n;
4469 	size_t leading_len = max_hdr + sizeof(struct udphdr);
4470 
4471 	mlen = m_length(m);
4472 	inner_len = mlen;
4473 	padded_len = roundup(mlen, 16);
4474 	encrypted_len = padded_len + WG_AUTHTAG_LEN;
4475 	WG_DLOG("inner=%zu, padded=%zu, encrypted_len=%zu\n",
4476 	    inner_len, padded_len, encrypted_len);
4477 	if (mlen != 0) {
4478 		bool success;
4479 		success = m_ensure_contig(&m, padded_len);
4480 		if (success) {
4481 			padded_buf = mtod(m, char *);
4482 		} else {
4483 			padded_buf = kmem_intr_alloc(padded_len, KM_NOSLEEP);
4484 			if (padded_buf == NULL) {
4485 				error = ENOBUFS;
4486 				goto out;
4487 			}
4488 			free_padded_buf = true;
4489 			m_copydata(m, 0, mlen, padded_buf);
4490 		}
4491 		memset(padded_buf + mlen, 0, padded_len - inner_len);
4492 	}
4493 
4494 	n = wg_get_mbuf(leading_len, sizeof(*wgmd) + encrypted_len);
4495 	if (n == NULL) {
4496 		error = ENOBUFS;
4497 		goto out;
4498 	}
4499 	KASSERT(n->m_len >= sizeof(*wgmd));
4500 	wgmd = mtod(n, struct wg_msg_data *);
4501 	wg_fill_msg_data(wg, wgp, wgs, wgmd);
4502 
4503 	/* [W] 5.4.6: AEAD(Tm^send, Nm^send, P, e) */
4504 	wg_algo_aead_enc((char *)wgmd + sizeof(*wgmd), encrypted_len,
4505 	    wgs->wgs_tkey_send, le64toh(wgmd->wgmd_counter),
4506 	    padded_buf, padded_len,
4507 	    NULL, 0);
4508 
4509 	error = wg->wg_ops->send_data_msg(wgp, n); /* consumes n */
4510 	if (error) {
4511 		WG_DLOG("send_data_msg failed, error=%d\n", error);
4512 		goto out;
4513 	}
4514 
4515 	/*
4516 	 * Packet was sent out -- count it in the interface statistics.
4517 	 */
4518 	if_statadd(&wg->wg_if, if_obytes, mlen);
4519 	if_statinc(&wg->wg_if, if_opackets);
4520 
4521 	/*
4522 	 * Record when we last sent data, for determining when we need
4523 	 * to send a passive keepalive.
4524 	 *
4525 	 * Other logic assumes that wgs_time_last_data_sent is zero iff
4526 	 * we have never sent data on this session.  Early at boot, if
4527 	 * wg(4) starts operating within <1sec, or after 136 years of
4528 	 * uptime, we may observe time_uptime32 = 0.  In that case,
4529 	 * pretend we observed 1 instead.  That way, we correctly
4530 	 * indicate we have sent data on this session; the only logic
4531 	 * this might adversely affect is the keepalive timeout
4532 	 * detection, which might spuriously send a keepalive during
4533 	 * one second every 136 years.  All of this is very silly, of
4534 	 * course, but the cost to guaranteeing wgs_time_last_data_sent
4535 	 * is nonzero is negligible here.
4536 	 */
4537 	const uint32_t now = time_uptime32;
4538 	atomic_store_relaxed(&wgs->wgs_time_last_data_sent, MAX(now, 1));
4539 
4540 	/*
4541 	 * Check rekey-after-time.
4542 	 */
4543 	if (wgs->wgs_is_initiator &&
4544 	    now - wgs->wgs_time_established >= wg_rekey_after_time) {
4545 		/*
4546 		 * [W] 6.2 Transport Message Limits
4547 		 * "if a peer is the initiator of a current secure
4548 		 *  session, WireGuard will send a handshake initiation
4549 		 *  message to begin a new secure session if, after
4550 		 *  transmitting a transport data message, the current
4551 		 *  secure session is REKEY-AFTER-TIME seconds old,"
4552 		 */
4553 		WG_TRACE("rekey after time");
4554 		atomic_store_relaxed(&wgs->wgs_force_rekey, true);
4555 		wg_schedule_peer_task(wgp, WGP_TASK_SEND_INIT_MESSAGE);
4556 	}
4557 
4558 	/*
4559 	 * Check rekey-after-messages.
4560 	 */
4561 	if (wg_session_get_send_counter(wgs) >= wg_rekey_after_messages) {
4562 		/*
4563 		 * [W] 6.2 Transport Message Limits
4564 		 * "WireGuard will try to create a new session, by
4565 		 *  sending a handshake initiation message (section
4566 		 *  5.4.2), after it has sent REKEY-AFTER-MESSAGES
4567 		 *  transport data messages..."
4568 		 */
4569 		WG_TRACE("rekey after messages");
4570 		atomic_store_relaxed(&wgs->wgs_force_rekey, true);
4571 		wg_schedule_peer_task(wgp, WGP_TASK_SEND_INIT_MESSAGE);
4572 	}
4573 
4574 out:	m_freem(m);
4575 	if (free_padded_buf)
4576 		kmem_intr_free(padded_buf, padded_len);
4577 }
4578 
4579 static void
4580 wg_input(struct ifnet *ifp, struct mbuf *m, const int af)
4581 {
4582 	pktqueue_t *pktq;
4583 	size_t pktlen;
4584 
4585 	KASSERT(af == AF_INET || af == AF_INET6);
4586 
4587 	WG_TRACE("");
4588 
4589 	m_set_rcvif(m, ifp);
4590 	pktlen = m->m_pkthdr.len;
4591 
4592 	bpf_mtap_af(ifp, af, m, BPF_D_IN);
4593 
4594 	switch (af) {
4595 #ifdef INET
4596 	case AF_INET:
4597 		pktq = ip_pktq;
4598 		break;
4599 #endif
4600 #ifdef INET6
4601 	case AF_INET6:
4602 		pktq = ip6_pktq;
4603 		break;
4604 #endif
4605 	default:
4606 		panic("invalid af=%d", af);
4607 	}
4608 
4609 	kpreempt_disable();
4610 	const u_int h = curcpu()->ci_index;
4611 	if (__predict_true(pktq_enqueue(pktq, m, h))) {
4612 		if_statadd(ifp, if_ibytes, pktlen);
4613 		if_statinc(ifp, if_ipackets);
4614 	} else {
4615 		m_freem(m);
4616 	}
4617 	kpreempt_enable();
4618 }
4619 
4620 static void
4621 wg_calc_pubkey(uint8_t pubkey[static WG_STATIC_KEY_LEN],
4622     const uint8_t privkey[static WG_STATIC_KEY_LEN])
4623 {
4624 
4625 	crypto_scalarmult_base(pubkey, privkey);
4626 }
4627 
4628 static int
4629 wg_rtable_add_route(struct wg_softc *wg, struct wg_allowedip *wga)
4630 {
4631 	struct radix_node_head *rnh;
4632 	struct radix_node *rn;
4633 	int error = 0;
4634 
4635 	rw_enter(wg->wg_rwlock, RW_WRITER);
4636 	rnh = wg_rnh(wg, wga->wga_family);
4637 	KASSERT(rnh != NULL);
4638 	rn = rnh->rnh_addaddr(&wga->wga_sa_addr, &wga->wga_sa_mask, rnh,
4639 	    wga->wga_nodes);
4640 	rw_exit(wg->wg_rwlock);
4641 
4642 	if (rn == NULL)
4643 		error = EEXIST;
4644 
4645 	return error;
4646 }
4647 
4648 static int
4649 wg_handle_prop_peer(struct wg_softc *wg, prop_dictionary_t peer,
4650     struct wg_peer **wgpp)
4651 {
4652 	int error = 0;
4653 	const void *pubkey;
4654 	size_t pubkey_len;
4655 	const void *psk;
4656 	size_t psk_len;
4657 	const char *name = NULL;
4658 
4659 	if (prop_dictionary_get_string(peer, "name", &name)) {
4660 		if (strlen(name) > WG_PEER_NAME_MAXLEN) {
4661 			error = EINVAL;
4662 			goto out;
4663 		}
4664 	}
4665 
4666 	if (!prop_dictionary_get_data(peer, "public_key",
4667 		&pubkey, &pubkey_len)) {
4668 		error = EINVAL;
4669 		goto out;
4670 	}
4671 #ifdef WG_DEBUG_DUMP
4672         if (wg_debug & WG_DEBUG_FLAGS_DUMP) {
4673 		char *hex = gethexdump(pubkey, pubkey_len);
4674 		log(LOG_DEBUG, "pubkey=%p, pubkey_len=%zu\n%s\n",
4675 		    pubkey, pubkey_len, hex);
4676 		puthexdump(hex, pubkey, pubkey_len);
4677 	}
4678 #endif
4679 
4680 	struct wg_peer *wgp = wg_alloc_peer(wg);
4681 	memcpy(wgp->wgp_pubkey, pubkey, sizeof(wgp->wgp_pubkey));
4682 	if (name != NULL)
4683 		strncpy(wgp->wgp_name, name, sizeof(wgp->wgp_name));
4684 
4685 	if (prop_dictionary_get_data(peer, "preshared_key", &psk, &psk_len)) {
4686 		if (psk_len != sizeof(wgp->wgp_psk)) {
4687 			error = EINVAL;
4688 			goto out;
4689 		}
4690 		memcpy(wgp->wgp_psk, psk, sizeof(wgp->wgp_psk));
4691 	}
4692 
4693 	const void *addr;
4694 	size_t addr_len;
4695 	struct wg_sockaddr *wgsa = wgp->wgp_endpoint;
4696 
4697 	if (!prop_dictionary_get_data(peer, "endpoint", &addr, &addr_len))
4698 		goto skip_endpoint;
4699 	if (addr_len < sizeof(*wgsatosa(wgsa)) ||
4700 	    addr_len > sizeof(*wgsatoss(wgsa))) {
4701 		error = EINVAL;
4702 		goto out;
4703 	}
4704 	memcpy(wgsatoss(wgsa), addr, addr_len);
4705 	switch (wgsa_family(wgsa)) {
4706 #ifdef INET
4707 	case AF_INET:
4708 		break;
4709 #endif
4710 #ifdef INET6
4711 	case AF_INET6:
4712 		break;
4713 #endif
4714 	default:
4715 		error = EPFNOSUPPORT;
4716 		goto out;
4717 	}
4718 	if (addr_len != sockaddr_getsize_by_family(wgsa_family(wgsa))) {
4719 		error = EINVAL;
4720 		goto out;
4721 	}
4722     {
4723 	char addrstr[128];
4724 	sockaddr_format(wgsatosa(wgsa), addrstr, sizeof(addrstr));
4725 	WG_DLOG("addr=%s\n", addrstr);
4726     }
4727 	wgp->wgp_endpoint_available = true;
4728 
4729 	prop_array_t allowedips;
4730 skip_endpoint:
4731 	allowedips = prop_dictionary_get(peer, "allowedips");
4732 	if (allowedips == NULL)
4733 		goto skip;
4734 
4735 	prop_object_iterator_t _it = prop_array_iterator(allowedips);
4736 	prop_dictionary_t prop_allowedip;
4737 	int j = 0;
4738 	while ((prop_allowedip = prop_object_iterator_next(_it)) != NULL) {
4739 		struct wg_allowedip *wga = &wgp->wgp_allowedips[j];
4740 
4741 		if (!prop_dictionary_get_int(prop_allowedip, "family",
4742 			&wga->wga_family))
4743 			continue;
4744 		if (!prop_dictionary_get_data(prop_allowedip, "ip",
4745 			&addr, &addr_len))
4746 			continue;
4747 		if (!prop_dictionary_get_uint8(prop_allowedip, "cidr",
4748 			&wga->wga_cidr))
4749 			continue;
4750 
4751 		switch (wga->wga_family) {
4752 #ifdef INET
4753 		case AF_INET: {
4754 			struct sockaddr_in sin;
4755 			char addrstr[128];
4756 			struct in_addr mask;
4757 			struct sockaddr_in sin_mask;
4758 
4759 			if (addr_len != sizeof(struct in_addr))
4760 				return EINVAL;
4761 			memcpy(&wga->wga_addr4, addr, addr_len);
4762 
4763 			sockaddr_in_init(&sin, (const struct in_addr *)addr,
4764 			    0);
4765 			sockaddr_copy(&wga->wga_sa_addr,
4766 			    sizeof(sin), sintosa(&sin));
4767 
4768 			sockaddr_format(sintosa(&sin),
4769 			    addrstr, sizeof(addrstr));
4770 			WG_DLOG("addr=%s/%d\n", addrstr, wga->wga_cidr);
4771 
4772 			in_len2mask(&mask, wga->wga_cidr);
4773 			sockaddr_in_init(&sin_mask, &mask, 0);
4774 			sockaddr_copy(&wga->wga_sa_mask,
4775 			    sizeof(sin_mask), sintosa(&sin_mask));
4776 
4777 			break;
4778 		    }
4779 #endif
4780 #ifdef INET6
4781 		case AF_INET6: {
4782 			struct sockaddr_in6 sin6;
4783 			char addrstr[128];
4784 			struct in6_addr mask;
4785 			struct sockaddr_in6 sin6_mask;
4786 
4787 			if (addr_len != sizeof(struct in6_addr))
4788 				return EINVAL;
4789 			memcpy(&wga->wga_addr6, addr, addr_len);
4790 
4791 			sockaddr_in6_init(&sin6, (const struct in6_addr *)addr,
4792 			    0, 0, 0);
4793 			sockaddr_copy(&wga->wga_sa_addr,
4794 			    sizeof(sin6), sin6tosa(&sin6));
4795 
4796 			sockaddr_format(sin6tosa(&sin6),
4797 			    addrstr, sizeof(addrstr));
4798 			WG_DLOG("addr=%s/%d\n", addrstr, wga->wga_cidr);
4799 
4800 			in6_prefixlen2mask(&mask, wga->wga_cidr);
4801 			sockaddr_in6_init(&sin6_mask, &mask, 0, 0, 0);
4802 			sockaddr_copy(&wga->wga_sa_mask,
4803 			    sizeof(sin6_mask), sin6tosa(&sin6_mask));
4804 
4805 			break;
4806 		    }
4807 #endif
4808 		default:
4809 			error = EINVAL;
4810 			goto out;
4811 		}
4812 		wga->wga_peer = wgp;
4813 
4814 		error = wg_rtable_add_route(wg, wga);
4815 		if (error != 0)
4816 			goto out;
4817 
4818 		j++;
4819 	}
4820 	wgp->wgp_n_allowedips = j;
4821 skip:
4822 	*wgpp = wgp;
4823 out:
4824 	return error;
4825 }
4826 
4827 static int
4828 wg_alloc_prop_buf(char **_buf, struct ifdrv *ifd)
4829 {
4830 	int error;
4831 	char *buf;
4832 
4833 	WG_DLOG("buf=%p, len=%zu\n", ifd->ifd_data, ifd->ifd_len);
4834 	if (ifd->ifd_len >= WG_MAX_PROPLEN)
4835 		return E2BIG;
4836 	buf = kmem_alloc(ifd->ifd_len + 1, KM_SLEEP);
4837 	error = copyin(ifd->ifd_data, buf, ifd->ifd_len);
4838 	if (error != 0)
4839 		return error;
4840 	buf[ifd->ifd_len] = '\0';
4841 #ifdef WG_DEBUG_DUMP
4842 	if (wg_debug & WG_DEBUG_FLAGS_DUMP) {
4843 		log(LOG_DEBUG, "%.*s\n", (int)MIN(INT_MAX, ifd->ifd_len),
4844 		    (const char *)buf);
4845 	}
4846 #endif
4847 	*_buf = buf;
4848 	return 0;
4849 }
4850 
4851 static int
4852 wg_ioctl_set_private_key(struct wg_softc *wg, struct ifdrv *ifd)
4853 {
4854 	int error;
4855 	prop_dictionary_t prop_dict;
4856 	char *buf = NULL;
4857 	const void *privkey;
4858 	size_t privkey_len;
4859 
4860 	error = wg_alloc_prop_buf(&buf, ifd);
4861 	if (error != 0)
4862 		return error;
4863 	error = EINVAL;
4864 	prop_dict = prop_dictionary_internalize(buf);
4865 	if (prop_dict == NULL)
4866 		goto out;
4867 	if (!prop_dictionary_get_data(prop_dict, "private_key",
4868 		&privkey, &privkey_len))
4869 		goto out;
4870 #ifdef WG_DEBUG_DUMP
4871 	if (wg_debug & WG_DEBUG_FLAGS_DUMP) {
4872 		char *hex = gethexdump(privkey, privkey_len);
4873 		log(LOG_DEBUG, "privkey=%p, privkey_len=%zu\n%s\n",
4874 		    privkey, privkey_len, hex);
4875 		puthexdump(hex, privkey, privkey_len);
4876 	}
4877 #endif
4878 	if (privkey_len != WG_STATIC_KEY_LEN)
4879 		goto out;
4880 	memcpy(wg->wg_privkey, privkey, WG_STATIC_KEY_LEN);
4881 	wg_calc_pubkey(wg->wg_pubkey, wg->wg_privkey);
4882 	error = 0;
4883 
4884 out:
4885 	kmem_free(buf, ifd->ifd_len + 1);
4886 	return error;
4887 }
4888 
4889 static int
4890 wg_ioctl_set_listen_port(struct wg_softc *wg, struct ifdrv *ifd)
4891 {
4892 	int error;
4893 	prop_dictionary_t prop_dict;
4894 	char *buf = NULL;
4895 	uint16_t port;
4896 
4897 	error = wg_alloc_prop_buf(&buf, ifd);
4898 	if (error != 0)
4899 		return error;
4900 	error = EINVAL;
4901 	prop_dict = prop_dictionary_internalize(buf);
4902 	if (prop_dict == NULL)
4903 		goto out;
4904 	if (!prop_dictionary_get_uint16(prop_dict, "listen_port", &port))
4905 		goto out;
4906 
4907 	error = wg->wg_ops->bind_port(wg, (uint16_t)port);
4908 
4909 out:
4910 	kmem_free(buf, ifd->ifd_len + 1);
4911 	return error;
4912 }
4913 
4914 static int
4915 wg_ioctl_add_peer(struct wg_softc *wg, struct ifdrv *ifd)
4916 {
4917 	int error;
4918 	prop_dictionary_t prop_dict;
4919 	char *buf = NULL;
4920 	struct wg_peer *wgp = NULL, *wgp0 __diagused;
4921 
4922 	error = wg_alloc_prop_buf(&buf, ifd);
4923 	if (error != 0)
4924 		return error;
4925 	error = EINVAL;
4926 	prop_dict = prop_dictionary_internalize(buf);
4927 	if (prop_dict == NULL)
4928 		goto out;
4929 
4930 	error = wg_handle_prop_peer(wg, prop_dict, &wgp);
4931 	if (error != 0)
4932 		goto out;
4933 
4934 	mutex_enter(wg->wg_lock);
4935 	if (thmap_get(wg->wg_peers_bypubkey, wgp->wgp_pubkey,
4936 		sizeof(wgp->wgp_pubkey)) != NULL ||
4937 	    (wgp->wgp_name[0] &&
4938 		thmap_get(wg->wg_peers_byname, wgp->wgp_name,
4939 		    strlen(wgp->wgp_name)) != NULL)) {
4940 		mutex_exit(wg->wg_lock);
4941 		wg_destroy_peer(wgp);
4942 		error = EEXIST;
4943 		goto out;
4944 	}
4945 	wgp0 = thmap_put(wg->wg_peers_bypubkey, wgp->wgp_pubkey,
4946 	    sizeof(wgp->wgp_pubkey), wgp);
4947 	KASSERT(wgp0 == wgp);
4948 	if (wgp->wgp_name[0]) {
4949 		wgp0 = thmap_put(wg->wg_peers_byname, wgp->wgp_name,
4950 		    strlen(wgp->wgp_name), wgp);
4951 		KASSERT(wgp0 == wgp);
4952 	}
4953 	WG_PEER_WRITER_INSERT_HEAD(wgp, wg);
4954 	wg->wg_npeers++;
4955 	mutex_exit(wg->wg_lock);
4956 
4957 	if_link_state_change(&wg->wg_if, LINK_STATE_UP);
4958 
4959 out:
4960 	kmem_free(buf, ifd->ifd_len + 1);
4961 	return error;
4962 }
4963 
4964 static int
4965 wg_ioctl_delete_peer(struct wg_softc *wg, struct ifdrv *ifd)
4966 {
4967 	int error;
4968 	prop_dictionary_t prop_dict;
4969 	char *buf = NULL;
4970 	const char *name;
4971 
4972 	error = wg_alloc_prop_buf(&buf, ifd);
4973 	if (error != 0)
4974 		return error;
4975 	error = EINVAL;
4976 	prop_dict = prop_dictionary_internalize(buf);
4977 	if (prop_dict == NULL)
4978 		goto out;
4979 
4980 	if (!prop_dictionary_get_string(prop_dict, "name", &name))
4981 		goto out;
4982 	if (strlen(name) > WG_PEER_NAME_MAXLEN)
4983 		goto out;
4984 
4985 	error = wg_destroy_peer_name(wg, name);
4986 out:
4987 	kmem_free(buf, ifd->ifd_len + 1);
4988 	return error;
4989 }
4990 
4991 static bool
4992 wg_is_authorized(struct wg_softc *wg, u_long cmd)
4993 {
4994 	int au = cmd == SIOCGDRVSPEC ?
4995 	    KAUTH_REQ_NETWORK_INTERFACE_WG_GETPRIV :
4996 	    KAUTH_REQ_NETWORK_INTERFACE_WG_SETPRIV;
4997 	return kauth_authorize_network(kauth_cred_get(),
4998 	    KAUTH_NETWORK_INTERFACE_WG, au, &wg->wg_if,
4999 	    (void *)cmd, NULL) == 0;
5000 }
5001 
5002 static int
5003 wg_ioctl_get(struct wg_softc *wg, struct ifdrv *ifd)
5004 {
5005 	int error = ENOMEM;
5006 	prop_dictionary_t prop_dict;
5007 	prop_array_t peers = NULL;
5008 	char *buf;
5009 	struct wg_peer *wgp;
5010 	int s, i;
5011 
5012 	prop_dict = prop_dictionary_create();
5013 	if (prop_dict == NULL)
5014 		goto error;
5015 
5016 	if (wg_is_authorized(wg, SIOCGDRVSPEC)) {
5017 		if (!prop_dictionary_set_data(prop_dict, "private_key",
5018 			wg->wg_privkey, WG_STATIC_KEY_LEN))
5019 			goto error;
5020 	}
5021 
5022 	if (wg->wg_listen_port != 0) {
5023 		if (!prop_dictionary_set_uint16(prop_dict, "listen_port",
5024 			wg->wg_listen_port))
5025 			goto error;
5026 	}
5027 
5028 	if (wg->wg_npeers == 0)
5029 		goto skip_peers;
5030 
5031 	peers = prop_array_create();
5032 	if (peers == NULL)
5033 		goto error;
5034 
5035 	s = pserialize_read_enter();
5036 	i = 0;
5037 	WG_PEER_READER_FOREACH(wgp, wg) {
5038 		struct wg_sockaddr *wgsa;
5039 		struct psref wgp_psref, wgsa_psref;
5040 		prop_dictionary_t prop_peer;
5041 
5042 		wg_get_peer(wgp, &wgp_psref);
5043 		pserialize_read_exit(s);
5044 
5045 		prop_peer = prop_dictionary_create();
5046 		if (prop_peer == NULL)
5047 			goto next;
5048 
5049 		if (strlen(wgp->wgp_name) > 0) {
5050 			if (!prop_dictionary_set_string(prop_peer, "name",
5051 				wgp->wgp_name))
5052 				goto next;
5053 		}
5054 
5055 		if (!prop_dictionary_set_data(prop_peer, "public_key",
5056 			wgp->wgp_pubkey, sizeof(wgp->wgp_pubkey)))
5057 			goto next;
5058 
5059 		uint8_t psk_zero[WG_PRESHARED_KEY_LEN] = {0};
5060 		if (!consttime_memequal(wgp->wgp_psk, psk_zero,
5061 			sizeof(wgp->wgp_psk))) {
5062 			if (wg_is_authorized(wg, SIOCGDRVSPEC)) {
5063 				if (!prop_dictionary_set_data(prop_peer,
5064 					"preshared_key",
5065 					wgp->wgp_psk, sizeof(wgp->wgp_psk)))
5066 					goto next;
5067 			}
5068 		}
5069 
5070 		wgsa = wg_get_endpoint_sa(wgp, &wgsa_psref);
5071 		CTASSERT(AF_UNSPEC == 0);
5072 		if (wgsa_family(wgsa) != 0 /*AF_UNSPEC*/ &&
5073 		    !prop_dictionary_set_data(prop_peer, "endpoint",
5074 			wgsatoss(wgsa),
5075 			sockaddr_getsize_by_family(wgsa_family(wgsa)))) {
5076 			wg_put_sa(wgp, wgsa, &wgsa_psref);
5077 			goto next;
5078 		}
5079 		wg_put_sa(wgp, wgsa, &wgsa_psref);
5080 
5081 		const struct timespec *t = &wgp->wgp_last_handshake_time;
5082 
5083 		if (!prop_dictionary_set_uint64(prop_peer,
5084 			"last_handshake_time_sec", (uint64_t)t->tv_sec))
5085 			goto next;
5086 		if (!prop_dictionary_set_uint32(prop_peer,
5087 			"last_handshake_time_nsec", (uint32_t)t->tv_nsec))
5088 			goto next;
5089 
5090 		if (wgp->wgp_n_allowedips == 0)
5091 			goto skip_allowedips;
5092 
5093 		prop_array_t allowedips = prop_array_create();
5094 		if (allowedips == NULL)
5095 			goto next;
5096 		for (int j = 0; j < wgp->wgp_n_allowedips; j++) {
5097 			struct wg_allowedip *wga = &wgp->wgp_allowedips[j];
5098 			prop_dictionary_t prop_allowedip;
5099 
5100 			prop_allowedip = prop_dictionary_create();
5101 			if (prop_allowedip == NULL)
5102 				break;
5103 
5104 			if (!prop_dictionary_set_int(prop_allowedip, "family",
5105 				wga->wga_family))
5106 				goto _next;
5107 			if (!prop_dictionary_set_uint8(prop_allowedip, "cidr",
5108 				wga->wga_cidr))
5109 				goto _next;
5110 
5111 			switch (wga->wga_family) {
5112 #ifdef INET
5113 			case AF_INET:
5114 				if (!prop_dictionary_set_data(prop_allowedip,
5115 					"ip", &wga->wga_addr4,
5116 					sizeof(wga->wga_addr4)))
5117 					goto _next;
5118 				break;
5119 #endif
5120 #ifdef INET6
5121 			case AF_INET6:
5122 				if (!prop_dictionary_set_data(prop_allowedip,
5123 					"ip", &wga->wga_addr6,
5124 					sizeof(wga->wga_addr6)))
5125 					goto _next;
5126 				break;
5127 #endif
5128 			default:
5129 				panic("invalid af=%d", wga->wga_family);
5130 			}
5131 			prop_array_set(allowedips, j, prop_allowedip);
5132 		_next:
5133 			prop_object_release(prop_allowedip);
5134 		}
5135 		prop_dictionary_set(prop_peer, "allowedips", allowedips);
5136 		prop_object_release(allowedips);
5137 
5138 	skip_allowedips:
5139 
5140 		prop_array_set(peers, i, prop_peer);
5141 	next:
5142 		if (prop_peer)
5143 			prop_object_release(prop_peer);
5144 		i++;
5145 
5146 		s = pserialize_read_enter();
5147 		wg_put_peer(wgp, &wgp_psref);
5148 	}
5149 	pserialize_read_exit(s);
5150 
5151 	prop_dictionary_set(prop_dict, "peers", peers);
5152 	prop_object_release(peers);
5153 	peers = NULL;
5154 
5155 skip_peers:
5156 	buf = prop_dictionary_externalize(prop_dict);
5157 	if (buf == NULL)
5158 		goto error;
5159 	if (ifd->ifd_len < (strlen(buf) + 1)) {
5160 		error = EINVAL;
5161 		goto error;
5162 	}
5163 	error = copyout(buf, ifd->ifd_data, strlen(buf) + 1);
5164 
5165 	free(buf, 0);
5166 error:
5167 	if (peers != NULL)
5168 		prop_object_release(peers);
5169 	if (prop_dict != NULL)
5170 		prop_object_release(prop_dict);
5171 
5172 	return error;
5173 }
5174 
5175 static int
5176 wg_ioctl(struct ifnet *ifp, u_long cmd, void *data)
5177 {
5178 	struct wg_softc *wg = ifp->if_softc;
5179 	struct ifreq *ifr = data;
5180 	struct ifaddr *ifa = data;
5181 	struct ifdrv *ifd = data;
5182 	int error = 0;
5183 
5184 	switch (cmd) {
5185 	case SIOCINITIFADDR:
5186 		if (ifa->ifa_addr->sa_family != AF_LINK &&
5187 		    (ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
5188 		    (IFF_UP | IFF_RUNNING)) {
5189 			ifp->if_flags |= IFF_UP;
5190 			error = if_init(ifp);
5191 		}
5192 		return error;
5193 	case SIOCADDMULTI:
5194 	case SIOCDELMULTI:
5195 		switch (ifr->ifr_addr.sa_family) {
5196 #ifdef INET
5197 		case AF_INET:	/* IP supports Multicast */
5198 			break;
5199 #endif
5200 #ifdef INET6
5201 		case AF_INET6:	/* IP6 supports Multicast */
5202 			break;
5203 #endif
5204 		default:  /* Other protocols doesn't support Multicast */
5205 			error = EAFNOSUPPORT;
5206 			break;
5207 		}
5208 		return error;
5209 	case SIOCSDRVSPEC:
5210 		if (!wg_is_authorized(wg, cmd)) {
5211 			return EPERM;
5212 		}
5213 		switch (ifd->ifd_cmd) {
5214 		case WG_IOCTL_SET_PRIVATE_KEY:
5215 			error = wg_ioctl_set_private_key(wg, ifd);
5216 			break;
5217 		case WG_IOCTL_SET_LISTEN_PORT:
5218 			error = wg_ioctl_set_listen_port(wg, ifd);
5219 			break;
5220 		case WG_IOCTL_ADD_PEER:
5221 			error = wg_ioctl_add_peer(wg, ifd);
5222 			break;
5223 		case WG_IOCTL_DELETE_PEER:
5224 			error = wg_ioctl_delete_peer(wg, ifd);
5225 			break;
5226 		default:
5227 			error = EINVAL;
5228 			break;
5229 		}
5230 		return error;
5231 	case SIOCGDRVSPEC:
5232 		return wg_ioctl_get(wg, ifd);
5233 	case SIOCSIFFLAGS:
5234 		if ((error = ifioctl_common(ifp, cmd, data)) != 0)
5235 			break;
5236 		switch (ifp->if_flags & (IFF_UP|IFF_RUNNING)) {
5237 		case IFF_RUNNING:
5238 			/*
5239 			 * If interface is marked down and it is running,
5240 			 * then stop and disable it.
5241 			 */
5242 			if_stop(ifp, 1);
5243 			break;
5244 		case IFF_UP:
5245 			/*
5246 			 * If interface is marked up and it is stopped, then
5247 			 * start it.
5248 			 */
5249 			error = if_init(ifp);
5250 			break;
5251 		default:
5252 			break;
5253 		}
5254 		return error;
5255 #ifdef WG_RUMPKERNEL
5256 	case SIOCSLINKSTR:
5257 		error = wg_ioctl_linkstr(wg, ifd);
5258 		if (error)
5259 			return error;
5260 		wg->wg_ops = &wg_ops_rumpuser;
5261 		return 0;
5262 #endif
5263 	default:
5264 		break;
5265 	}
5266 
5267 	error = ifioctl_common(ifp, cmd, data);
5268 
5269 #ifdef WG_RUMPKERNEL
5270 	if (!wg_user_mode(wg))
5271 		return error;
5272 
5273 	/* Do the same to the corresponding tun device on the host */
5274 	/*
5275 	 * XXX Actually the command has not been handled yet.  It
5276 	 *     will be handled via pr_ioctl form doifioctl later.
5277 	 */
5278 	switch (cmd) {
5279 #ifdef INET
5280 	case SIOCAIFADDR:
5281 	case SIOCDIFADDR: {
5282 		struct in_aliasreq _ifra = *(const struct in_aliasreq *)data;
5283 		struct in_aliasreq *ifra = &_ifra;
5284 		KASSERT(error == ENOTTY);
5285 		strncpy(ifra->ifra_name, rumpuser_wg_get_tunname(wg->wg_user),
5286 		    IFNAMSIZ);
5287 		error = rumpuser_wg_ioctl(wg->wg_user, cmd, ifra, AF_INET);
5288 		if (error == 0)
5289 			error = ENOTTY;
5290 		break;
5291 	}
5292 #endif
5293 #ifdef INET6
5294 	case SIOCAIFADDR_IN6:
5295 	case SIOCDIFADDR_IN6: {
5296 		struct in6_aliasreq _ifra = *(const struct in6_aliasreq *)data;
5297 		struct in6_aliasreq *ifra = &_ifra;
5298 		KASSERT(error == ENOTTY);
5299 		strncpy(ifra->ifra_name, rumpuser_wg_get_tunname(wg->wg_user),
5300 		    IFNAMSIZ);
5301 		error = rumpuser_wg_ioctl(wg->wg_user, cmd, ifra, AF_INET6);
5302 		if (error == 0)
5303 			error = ENOTTY;
5304 		break;
5305 	}
5306 #endif
5307 	default:
5308 		break;
5309 	}
5310 #endif /* WG_RUMPKERNEL */
5311 
5312 	return error;
5313 }
5314 
5315 static int
5316 wg_init(struct ifnet *ifp)
5317 {
5318 
5319 	ifp->if_flags |= IFF_RUNNING;
5320 
5321 	/* TODO flush pending packets. */
5322 	return 0;
5323 }
5324 
5325 #ifdef ALTQ
5326 static void
5327 wg_start(struct ifnet *ifp)
5328 {
5329 	struct mbuf *m;
5330 
5331 	for (;;) {
5332 		IFQ_DEQUEUE(&ifp->if_snd, m);
5333 		if (m == NULL)
5334 			break;
5335 
5336 		kpreempt_disable();
5337 		const uint32_t h = curcpu()->ci_index;	// pktq_rps_hash(m)
5338 		if (__predict_false(!pktq_enqueue(wg_pktq, m, h))) {
5339 			WGLOG(LOG_ERR, "%s: pktq full, dropping\n",
5340 			    if_name(ifp));
5341 			m_freem(m);
5342 		}
5343 		kpreempt_enable();
5344 	}
5345 }
5346 #endif
5347 
5348 static void
5349 wg_stop(struct ifnet *ifp, int disable)
5350 {
5351 
5352 	KASSERT((ifp->if_flags & IFF_RUNNING) != 0);
5353 	ifp->if_flags &= ~IFF_RUNNING;
5354 
5355 	/* Need to do something? */
5356 }
5357 
5358 #ifdef WG_DEBUG_PARAMS
5359 SYSCTL_SETUP(sysctl_net_wg_setup, "sysctl net.wg setup")
5360 {
5361 	const struct sysctlnode *node = NULL;
5362 
5363 	sysctl_createv(clog, 0, NULL, &node,
5364 	    CTLFLAG_PERMANENT,
5365 	    CTLTYPE_NODE, "wg",
5366 	    SYSCTL_DESCR("wg(4)"),
5367 	    NULL, 0, NULL, 0,
5368 	    CTL_NET, CTL_CREATE, CTL_EOL);
5369 	sysctl_createv(clog, 0, &node, NULL,
5370 	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
5371 	    CTLTYPE_QUAD, "rekey_after_messages",
5372 	    SYSCTL_DESCR("session liftime by messages"),
5373 	    NULL, 0, &wg_rekey_after_messages, 0, CTL_CREATE, CTL_EOL);
5374 	sysctl_createv(clog, 0, &node, NULL,
5375 	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
5376 	    CTLTYPE_INT, "rekey_after_time",
5377 	    SYSCTL_DESCR("session liftime"),
5378 	    NULL, 0, &wg_rekey_after_time, 0, CTL_CREATE, CTL_EOL);
5379 	sysctl_createv(clog, 0, &node, NULL,
5380 	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
5381 	    CTLTYPE_INT, "rekey_timeout",
5382 	    SYSCTL_DESCR("session handshake retry time"),
5383 	    NULL, 0, &wg_rekey_timeout, 0, CTL_CREATE, CTL_EOL);
5384 	sysctl_createv(clog, 0, &node, NULL,
5385 	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
5386 	    CTLTYPE_INT, "rekey_attempt_time",
5387 	    SYSCTL_DESCR("session handshake timeout"),
5388 	    NULL, 0, &wg_rekey_attempt_time, 0, CTL_CREATE, CTL_EOL);
5389 	sysctl_createv(clog, 0, &node, NULL,
5390 	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
5391 	    CTLTYPE_INT, "keepalive_timeout",
5392 	    SYSCTL_DESCR("keepalive timeout"),
5393 	    NULL, 0, &wg_keepalive_timeout, 0, CTL_CREATE, CTL_EOL);
5394 	sysctl_createv(clog, 0, &node, NULL,
5395 	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
5396 	    CTLTYPE_BOOL, "force_underload",
5397 	    SYSCTL_DESCR("force to detemine under load"),
5398 	    NULL, 0, &wg_force_underload, 0, CTL_CREATE, CTL_EOL);
5399 	sysctl_createv(clog, 0, &node, NULL,
5400 	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
5401 	    CTLTYPE_INT, "debug",
5402 	    SYSCTL_DESCR("set debug flags 1=log 2=trace 4=dump 8=packet"),
5403 	    NULL, 0, &wg_debug, 0, CTL_CREATE, CTL_EOL);
5404 }
5405 #endif
5406 
5407 #ifdef WG_RUMPKERNEL
5408 static bool
5409 wg_user_mode(struct wg_softc *wg)
5410 {
5411 
5412 	return wg->wg_user != NULL;
5413 }
5414 
5415 static int
5416 wg_ioctl_linkstr(struct wg_softc *wg, struct ifdrv *ifd)
5417 {
5418 	struct ifnet *ifp = &wg->wg_if;
5419 	int error;
5420 
5421 	if (ifp->if_flags & IFF_UP)
5422 		return EBUSY;
5423 
5424 	if (ifd->ifd_cmd == IFLINKSTR_UNSET) {
5425 		/* XXX do nothing */
5426 		return 0;
5427 	} else if (ifd->ifd_cmd != 0) {
5428 		return EINVAL;
5429 	} else if (wg->wg_user != NULL) {
5430 		return EBUSY;
5431 	}
5432 
5433 	/* Assume \0 included */
5434 	if (ifd->ifd_len > IFNAMSIZ) {
5435 		return E2BIG;
5436 	} else if (ifd->ifd_len < 1) {
5437 		return EINVAL;
5438 	}
5439 
5440 	char tun_name[IFNAMSIZ];
5441 	error = copyinstr(ifd->ifd_data, tun_name, ifd->ifd_len, NULL);
5442 	if (error != 0)
5443 		return error;
5444 
5445 	if (strncmp(tun_name, "tun", 3) != 0)
5446 		return EINVAL;
5447 
5448 	error = rumpuser_wg_create(tun_name, wg, &wg->wg_user);
5449 
5450 	return error;
5451 }
5452 
5453 static int
5454 wg_send_user(struct wg_peer *wgp, struct mbuf *m)
5455 {
5456 	int error;
5457 	struct psref psref;
5458 	struct wg_sockaddr *wgsa;
5459 	struct wg_softc *wg = wgp->wgp_sc;
5460 	struct iovec iov[1];
5461 
5462 	wgsa = wg_get_endpoint_sa(wgp, &psref);
5463 
5464 	iov[0].iov_base = mtod(m, void *);
5465 	iov[0].iov_len = m->m_len;
5466 
5467 	/* Send messages to a peer via an ordinary socket. */
5468 	error = rumpuser_wg_send_peer(wg->wg_user, wgsatosa(wgsa), iov, 1);
5469 
5470 	wg_put_sa(wgp, wgsa, &psref);
5471 
5472 	m_freem(m);
5473 
5474 	return error;
5475 }
5476 
5477 static void
5478 wg_input_user(struct ifnet *ifp, struct mbuf *m, const int af)
5479 {
5480 	struct wg_softc *wg = ifp->if_softc;
5481 	struct iovec iov[2];
5482 	struct sockaddr_storage ss;
5483 
5484 	KASSERT(af == AF_INET || af == AF_INET6);
5485 
5486 	WG_TRACE("");
5487 
5488 	switch (af) {
5489 #ifdef INET
5490 	case AF_INET: {
5491 		struct sockaddr_in *sin = (struct sockaddr_in *)&ss;
5492 		struct ip *ip;
5493 
5494 		KASSERT(m->m_len >= sizeof(struct ip));
5495 		ip = mtod(m, struct ip *);
5496 		sockaddr_in_init(sin, &ip->ip_dst, 0);
5497 		break;
5498 	}
5499 #endif
5500 #ifdef INET6
5501 	case AF_INET6: {
5502 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ss;
5503 		struct ip6_hdr *ip6;
5504 
5505 		KASSERT(m->m_len >= sizeof(struct ip6_hdr));
5506 		ip6 = mtod(m, struct ip6_hdr *);
5507 		sockaddr_in6_init(sin6, &ip6->ip6_dst, 0, 0, 0);
5508 		break;
5509 	}
5510 #endif
5511 	default:
5512 		goto out;
5513 	}
5514 
5515 	iov[0].iov_base = &ss;
5516 	iov[0].iov_len = ss.ss_len;
5517 	iov[1].iov_base = mtod(m, void *);
5518 	iov[1].iov_len = m->m_len;
5519 
5520 	WG_DUMP_BUF(iov[1].iov_base, iov[1].iov_len);
5521 
5522 	/* Send decrypted packets to users via a tun. */
5523 	rumpuser_wg_send_user(wg->wg_user, iov, 2);
5524 
5525 out:	m_freem(m);
5526 }
5527 
5528 static int
5529 wg_bind_port_user(struct wg_softc *wg, const uint16_t port)
5530 {
5531 	int error;
5532 	uint16_t old_port = wg->wg_listen_port;
5533 
5534 	if (port != 0 && old_port == port)
5535 		return 0;
5536 
5537 	error = rumpuser_wg_sock_bind(wg->wg_user, port);
5538 	if (error)
5539 		return error;
5540 
5541 	wg->wg_listen_port = port;
5542 	return 0;
5543 }
5544 
5545 /*
5546  * Receive user packets.
5547  */
5548 void
5549 rumpkern_wg_recv_user(struct wg_softc *wg, struct iovec *iov, size_t iovlen)
5550 {
5551 	struct ifnet *ifp = &wg->wg_if;
5552 	struct mbuf *m;
5553 	const struct sockaddr *dst;
5554 	int error;
5555 
5556 	WG_TRACE("");
5557 
5558 	dst = iov[0].iov_base;
5559 
5560 	m = m_gethdr(M_DONTWAIT, MT_DATA);
5561 	if (m == NULL)
5562 		return;
5563 	m->m_len = m->m_pkthdr.len = 0;
5564 	m_copyback(m, 0, iov[1].iov_len, iov[1].iov_base);
5565 
5566 	WG_DLOG("iov_len=%zu\n", iov[1].iov_len);
5567 	WG_DUMP_BUF(iov[1].iov_base, iov[1].iov_len);
5568 
5569 	error = wg_output(ifp, m, dst, NULL); /* consumes m */
5570 	if (error)
5571 		WG_DLOG("wg_output failed, error=%d\n", error);
5572 }
5573 
5574 /*
5575  * Receive packets from a peer.
5576  */
5577 void
5578 rumpkern_wg_recv_peer(struct wg_softc *wg, struct iovec *iov, size_t iovlen)
5579 {
5580 	struct mbuf *m;
5581 	const struct sockaddr *src;
5582 	int bound;
5583 
5584 	WG_TRACE("");
5585 
5586 	src = iov[0].iov_base;
5587 
5588 	m = m_gethdr(M_DONTWAIT, MT_DATA);
5589 	if (m == NULL)
5590 		return;
5591 	m->m_len = m->m_pkthdr.len = 0;
5592 	m_copyback(m, 0, iov[1].iov_len, iov[1].iov_base);
5593 
5594 	WG_DLOG("iov_len=%zu\n", iov[1].iov_len);
5595 	WG_DUMP_BUF(iov[1].iov_base, iov[1].iov_len);
5596 
5597 	bound = curlwp_bind();
5598 	wg_handle_packet(wg, m, src);
5599 	curlwp_bindx(bound);
5600 }
5601 #endif /* WG_RUMPKERNEL */
5602 
5603 /*
5604  * Module infrastructure
5605  */
5606 #include "if_module.h"
5607 
5608 IF_MODULE(MODULE_CLASS_DRIVER, wg, "sodium,blake2s")
5609