xref: /openbsd-src/sys/net/if_pfsync.c (revision 897fc685943471cf985a0fe38ba076ea6fe74fa5)
1 /*	$OpenBSD: if_pfsync.c,v 1.257 2018/02/19 08:59:52 mpi Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Michael Shalayeff
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  * THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
31  *
32  * Permission to use, copy, modify, and distribute this software for any
33  * purpose with or without fee is hereby granted, provided that the above
34  * copyright notice and this permission notice appear in all copies.
35  *
36  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
37  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
38  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
39  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
41  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
42  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43  */
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/time.h>
48 #include <sys/malloc.h>
49 #include <sys/mbuf.h>
50 #include <sys/socket.h>
51 #include <sys/ioctl.h>
52 #include <sys/timeout.h>
53 #include <sys/kernel.h>
54 #include <sys/sysctl.h>
55 #include <sys/pool.h>
56 #include <sys/syslog.h>
57 
58 #include <net/if.h>
59 #include <net/if_types.h>
60 #include <net/bpf.h>
61 #include <net/netisr.h>
62 
63 #include <netinet/in.h>
64 #include <netinet/if_ether.h>
65 #include <netinet/ip.h>
66 #include <netinet/in_var.h>
67 #include <netinet/ip_var.h>
68 #include <netinet/ip_ipsp.h>
69 #include <netinet/ip_icmp.h>
70 #include <netinet/icmp6.h>
71 #include <netinet/tcp.h>
72 #include <netinet/tcp_seq.h>
73 #include <netinet/tcp_fsm.h>
74 #include <netinet/udp.h>
75 
76 #ifdef INET6
77 #include <netinet6/in6_var.h>
78 #include <netinet/ip6.h>
79 #include <netinet6/ip6_var.h>
80 #include <netinet6/nd6.h>
81 #endif /* INET6 */
82 
83 #include "carp.h"
84 #if NCARP > 0
85 #include <netinet/ip_carp.h>
86 #endif
87 
88 #define PF_DEBUGNAME	"pfsync: "
89 #include <net/pfvar.h>
90 #include <net/pfvar_priv.h>
91 #include <net/if_pfsync.h>
92 
93 #include "bpfilter.h"
94 #include "pfsync.h"
95 
96 #define PFSYNC_MINPKT ( \
97 	sizeof(struct ip) + \
98 	sizeof(struct pfsync_header))
99 
100 int	pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
101 	    struct pfsync_state_peer *);
102 
103 int	pfsync_in_clr(caddr_t, int, int, int);
104 int	pfsync_in_iack(caddr_t, int, int, int);
105 int	pfsync_in_upd_c(caddr_t, int, int, int);
106 int	pfsync_in_ureq(caddr_t, int, int, int);
107 int	pfsync_in_del(caddr_t, int, int, int);
108 int	pfsync_in_del_c(caddr_t, int, int, int);
109 int	pfsync_in_bus(caddr_t, int, int, int);
110 int	pfsync_in_tdb(caddr_t, int, int, int);
111 int	pfsync_in_ins(caddr_t, int, int, int);
112 int	pfsync_in_upd(caddr_t, int, int, int);
113 int	pfsync_in_eof(caddr_t, int, int, int);
114 
115 int	pfsync_in_error(caddr_t, int, int, int);
116 
117 struct {
118 	int	(*in)(caddr_t, int, int, int);
119 	size_t	len;
120 } pfsync_acts[] = {
121 	/* PFSYNC_ACT_CLR */
122 	{ pfsync_in_clr,	sizeof(struct pfsync_clr) },
123 	 /* PFSYNC_ACT_OINS */
124 	{ pfsync_in_error,	0 },
125 	/* PFSYNC_ACT_INS_ACK */
126 	{ pfsync_in_iack,	sizeof(struct pfsync_ins_ack) },
127 	/* PFSYNC_ACT_OUPD */
128 	{ pfsync_in_error,	0 },
129 	/* PFSYNC_ACT_UPD_C */
130 	{ pfsync_in_upd_c,	sizeof(struct pfsync_upd_c) },
131 	/* PFSYNC_ACT_UPD_REQ */
132 	{ pfsync_in_ureq,	sizeof(struct pfsync_upd_req) },
133 	/* PFSYNC_ACT_DEL */
134 	{ pfsync_in_del,	sizeof(struct pfsync_state) },
135 	/* PFSYNC_ACT_DEL_C */
136 	{ pfsync_in_del_c,	sizeof(struct pfsync_del_c) },
137 	/* PFSYNC_ACT_INS_F */
138 	{ pfsync_in_error,	0 },
139 	/* PFSYNC_ACT_DEL_F */
140 	{ pfsync_in_error,	0 },
141 	/* PFSYNC_ACT_BUS */
142 	{ pfsync_in_bus,	sizeof(struct pfsync_bus) },
143 	/* PFSYNC_ACT_OTDB */
144 	{ pfsync_in_error,	0 },
145 	/* PFSYNC_ACT_EOF */
146 	{ pfsync_in_error,	0 },
147 	/* PFSYNC_ACT_INS */
148 	{ pfsync_in_ins,	sizeof(struct pfsync_state) },
149 	/* PFSYNC_ACT_UPD */
150 	{ pfsync_in_upd,	sizeof(struct pfsync_state) },
151 	/* PFSYNC_ACT_TDB */
152 	{ pfsync_in_tdb,	sizeof(struct pfsync_tdb) },
153 };
154 
155 struct pfsync_q {
156 	void		(*write)(struct pf_state *, void *);
157 	size_t		len;
158 	u_int8_t	action;
159 };
160 
161 /* we have one of these for every PFSYNC_S_ */
162 void	pfsync_out_state(struct pf_state *, void *);
163 void	pfsync_out_iack(struct pf_state *, void *);
164 void	pfsync_out_upd_c(struct pf_state *, void *);
165 void	pfsync_out_del(struct pf_state *, void *);
166 
167 struct pfsync_q pfsync_qs[] = {
168 	{ pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
169 	{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
170 	{ pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C },
171 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
172 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD }
173 };
174 
175 void	pfsync_q_ins(struct pf_state *, int);
176 void	pfsync_q_del(struct pf_state *);
177 
178 struct pfsync_upd_req_item {
179 	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
180 	struct pfsync_upd_req			ur_msg;
181 };
182 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item);
183 
184 struct pfsync_deferral {
185 	TAILQ_ENTRY(pfsync_deferral)		 pd_entry;
186 	struct pf_state				*pd_st;
187 	struct mbuf				*pd_m;
188 	struct timeout				 pd_tmo;
189 };
190 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral);
191 
192 #define PFSYNC_PLSIZE	MAX(sizeof(struct pfsync_upd_req_item), \
193 			    sizeof(struct pfsync_deferral))
194 
195 void	pfsync_out_tdb(struct tdb *, void *);
196 
197 struct pfsync_softc {
198 	struct ifnet		 sc_if;
199 	struct ifnet		*sc_sync_if;
200 
201 	struct pool		 sc_pool;
202 
203 	struct ip_moptions	 sc_imo;
204 
205 	struct in_addr		 sc_sync_peer;
206 	u_int8_t		 sc_maxupdates;
207 
208 	struct ip		 sc_template;
209 
210 	struct pf_state_queue	 sc_qs[PFSYNC_S_COUNT];
211 	size_t			 sc_len;
212 
213 	struct pfsync_upd_reqs	 sc_upd_req_list;
214 
215 	int			 sc_initial_bulk;
216 	int			 sc_link_demoted;
217 
218 	int			 sc_defer;
219 	struct pfsync_deferrals	 sc_deferrals;
220 	u_int			 sc_deferred;
221 
222 	void			*sc_plus;
223 	size_t			 sc_pluslen;
224 
225 	u_int32_t		 sc_ureq_sent;
226 	int			 sc_bulk_tries;
227 	struct timeout		 sc_bulkfail_tmo;
228 
229 	u_int32_t		 sc_ureq_received;
230 	struct pf_state		*sc_bulk_next;
231 	struct pf_state		*sc_bulk_last;
232 	struct timeout		 sc_bulk_tmo;
233 
234 	TAILQ_HEAD(, tdb)	 sc_tdb_q;
235 
236 	void			*sc_lhcookie;
237 	void			*sc_dhcookie;
238 
239 	struct timeout		 sc_tmo;
240 };
241 
242 struct pfsync_softc	*pfsyncif = NULL;
243 struct cpumem		*pfsynccounters;
244 
245 void	pfsyncattach(int);
246 int	pfsync_clone_create(struct if_clone *, int);
247 int	pfsync_clone_destroy(struct ifnet *);
248 int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
249 	    struct pf_state_peer *);
250 void	pfsync_update_net_tdb(struct pfsync_tdb *);
251 int	pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
252 	    struct rtentry *);
253 int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
254 void	pfsyncstart(struct ifnet *);
255 void	pfsync_syncdev_state(void *);
256 void	pfsync_ifdetach(void *);
257 
258 void	pfsync_deferred(struct pf_state *, int);
259 void	pfsync_undefer(struct pfsync_deferral *, int);
260 void	pfsync_defer_tmo(void *);
261 
262 void	pfsync_cancel_full_update(struct pfsync_softc *);
263 void	pfsync_request_full_update(struct pfsync_softc *);
264 void	pfsync_request_update(u_int32_t, u_int64_t);
265 void	pfsync_update_state_req(struct pf_state *);
266 
267 void	pfsync_drop(struct pfsync_softc *);
268 void	pfsync_sendout(void);
269 void	pfsync_send_plus(void *, size_t);
270 void	pfsync_timeout(void *);
271 void	pfsync_tdb_timeout(void *);
272 
273 void	pfsync_bulk_start(void);
274 void	pfsync_bulk_status(u_int8_t);
275 void	pfsync_bulk_update(void *);
276 void	pfsync_bulk_fail(void *);
277 
278 #define PFSYNC_MAX_BULKTRIES	12
279 int	pfsync_sync_ok;
280 
281 struct if_clone	pfsync_cloner =
282     IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
283 
284 void
285 pfsyncattach(int npfsync)
286 {
287 	if_clone_attach(&pfsync_cloner);
288 	pfsynccounters = counters_alloc(pfsyncs_ncounters);
289 }
290 
291 int
292 pfsync_clone_create(struct if_clone *ifc, int unit)
293 {
294 	struct pfsync_softc *sc;
295 	struct ifnet *ifp;
296 	int q;
297 
298 	if (unit != 0)
299 		return (EINVAL);
300 
301 	pfsync_sync_ok = 1;
302 
303 	sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK|M_ZERO);
304 	for (q = 0; q < PFSYNC_S_COUNT; q++)
305 		TAILQ_INIT(&sc->sc_qs[q]);
306 
307 	pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, IPL_SOFTNET, 0, "pfsync",
308 	    NULL);
309 	TAILQ_INIT(&sc->sc_upd_req_list);
310 	TAILQ_INIT(&sc->sc_deferrals);
311 	sc->sc_deferred = 0;
312 
313 	TAILQ_INIT(&sc->sc_tdb_q);
314 
315 	sc->sc_len = PFSYNC_MINPKT;
316 	sc->sc_maxupdates = 128;
317 
318 	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
319 	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
320 	    M_WAITOK | M_ZERO);
321 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
322 
323 	ifp = &sc->sc_if;
324 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
325 	ifp->if_softc = sc;
326 	ifp->if_ioctl = pfsyncioctl;
327 	ifp->if_output = pfsyncoutput;
328 	ifp->if_start = pfsyncstart;
329 	ifp->if_type = IFT_PFSYNC;
330 	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
331 	ifp->if_hdrlen = sizeof(struct pfsync_header);
332 	ifp->if_mtu = ETHERMTU;
333 	ifp->if_xflags = IFXF_CLONED;
334 	timeout_set_proc(&sc->sc_tmo, pfsync_timeout, sc);
335 	timeout_set_proc(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
336 	timeout_set_proc(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
337 
338 	if_attach(ifp);
339 	if_alloc_sadl(ifp);
340 
341 #if NCARP > 0
342 	if_addgroup(ifp, "carp");
343 #endif
344 
345 #if NBPFILTER > 0
346 	bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
347 #endif
348 
349 	pfsyncif = sc;
350 
351 	return (0);
352 }
353 
354 int
355 pfsync_clone_destroy(struct ifnet *ifp)
356 {
357 	struct pfsync_softc *sc = ifp->if_softc;
358 	struct pfsync_deferral *pd;
359 
360 	timeout_del(&sc->sc_bulkfail_tmo);
361 	timeout_del(&sc->sc_bulk_tmo);
362 	timeout_del(&sc->sc_tmo);
363 #if NCARP > 0
364 	if (!pfsync_sync_ok)
365 		carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
366 	if (sc->sc_link_demoted)
367 		carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
368 #endif
369 	if (sc->sc_sync_if) {
370 		hook_disestablish(
371 		    sc->sc_sync_if->if_linkstatehooks,
372 		    sc->sc_lhcookie);
373 		hook_disestablish(sc->sc_sync_if->if_detachhooks,
374 		    sc->sc_dhcookie);
375 	}
376 	if_detach(ifp);
377 
378 	pfsync_drop(sc);
379 
380 	while (sc->sc_deferred > 0) {
381 		pd = TAILQ_FIRST(&sc->sc_deferrals);
382 		timeout_del(&pd->pd_tmo);
383 		pfsync_undefer(pd, 0);
384 	}
385 
386 	pool_destroy(&sc->sc_pool);
387 	free(sc->sc_imo.imo_membership, M_IPMOPTS, 0);
388 	free(sc, M_DEVBUF, sizeof(*sc));
389 
390 	pfsyncif = NULL;
391 
392 	return (0);
393 }
394 
395 /*
396  * Start output on the pfsync interface.
397  */
398 void
399 pfsyncstart(struct ifnet *ifp)
400 {
401 	IFQ_PURGE(&ifp->if_snd);
402 }
403 
404 void
405 pfsync_syncdev_state(void *arg)
406 {
407 	struct pfsync_softc *sc = arg;
408 
409 	if (!sc->sc_sync_if || !(sc->sc_if.if_flags & IFF_UP))
410 		return;
411 
412 	if (sc->sc_sync_if->if_link_state == LINK_STATE_DOWN) {
413 		sc->sc_if.if_flags &= ~IFF_RUNNING;
414 		if (!sc->sc_link_demoted) {
415 #if NCARP > 0
416 			carp_group_demote_adj(&sc->sc_if, 1,
417 			    "pfsync link state down");
418 #endif
419 			sc->sc_link_demoted = 1;
420 		}
421 
422 		/* drop everything */
423 		timeout_del(&sc->sc_tmo);
424 		pfsync_drop(sc);
425 
426 		pfsync_cancel_full_update(sc);
427 	} else if (sc->sc_link_demoted) {
428 		sc->sc_if.if_flags |= IFF_RUNNING;
429 
430 		pfsync_request_full_update(sc);
431 	}
432 }
433 
434 void
435 pfsync_ifdetach(void *arg)
436 {
437 	struct pfsync_softc *sc = arg;
438 
439 	sc->sc_sync_if = NULL;
440 }
441 
442 int
443 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
444     struct pf_state_peer *d)
445 {
446 	if (s->scrub.scrub_flag && d->scrub == NULL) {
447 		d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO);
448 		if (d->scrub == NULL)
449 			return (ENOMEM);
450 	}
451 
452 	return (0);
453 }
454 
455 void
456 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
457 {
458 	pf_state_export(sp, st);
459 }
460 
461 int
462 pfsync_state_import(struct pfsync_state *sp, int flags)
463 {
464 	struct pf_state	*st = NULL;
465 	struct pf_state_key *skw = NULL, *sks = NULL;
466 	struct pf_rule *r = NULL;
467 	struct pfi_kif	*kif;
468 	int pool_flags;
469 	int error;
470 
471 	if (sp->creatorid == 0) {
472 		DPFPRINTF(LOG_NOTICE, "pfsync_state_import: "
473 		    "invalid creator id: %08x", ntohl(sp->creatorid));
474 		return (EINVAL);
475 	}
476 
477 	if ((kif = pfi_kif_get(sp->ifname)) == NULL) {
478 		DPFPRINTF(LOG_NOTICE, "pfsync_state_import: "
479 		    "unknown interface: %s", sp->ifname);
480 		if (flags & PFSYNC_SI_IOCTL)
481 			return (EINVAL);
482 		return (0);	/* skip this state */
483 	}
484 
485 	if (sp->af == 0)
486 		return (0);	/* skip this state */
487 
488 	/*
489 	 * If the ruleset checksums match or the state is coming from the ioctl,
490 	 * it's safe to associate the state with the rule of that number.
491 	 */
492 	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
493 	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
494 	    pf_main_ruleset.rules.active.rcount)
495 		r = pf_main_ruleset.rules.active.ptr_array[ntohl(sp->rule)];
496 	else
497 		r = &pf_default_rule;
498 
499 	if ((r->max_states && r->states_cur >= r->max_states))
500 		goto cleanup;
501 
502 	if (flags & PFSYNC_SI_IOCTL)
503 		pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO;
504 	else
505 		pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO;
506 
507 	if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL)
508 		goto cleanup;
509 
510 	if ((skw = pf_alloc_state_key(pool_flags)) == NULL)
511 		goto cleanup;
512 
513 	if ((sp->key[PF_SK_WIRE].af &&
514 	    (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) ||
515 	    PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
516 	    &sp->key[PF_SK_STACK].addr[0], sp->af) ||
517 	    PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
518 	    &sp->key[PF_SK_STACK].addr[1], sp->af) ||
519 	    sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
520 	    sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] ||
521 	    sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) {
522 		if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
523 			goto cleanup;
524 	} else
525 		sks = skw;
526 
527 	/* allocate memory for scrub info */
528 	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
529 	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
530 		goto cleanup;
531 
532 	/* copy to state key(s) */
533 	skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
534 	skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
535 	skw->port[0] = sp->key[PF_SK_WIRE].port[0];
536 	skw->port[1] = sp->key[PF_SK_WIRE].port[1];
537 	skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain);
538 	PF_REF_INIT(skw->refcnt);
539 	skw->proto = sp->proto;
540 	if (!(skw->af = sp->key[PF_SK_WIRE].af))
541 		skw->af = sp->af;
542 	if (sks != skw) {
543 		sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
544 		sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
545 		sks->port[0] = sp->key[PF_SK_STACK].port[0];
546 		sks->port[1] = sp->key[PF_SK_STACK].port[1];
547 		sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain);
548 		PF_REF_INIT(sks->refcnt);
549 		if (!(sks->af = sp->key[PF_SK_STACK].af))
550 			sks->af = sp->af;
551 		if (sks->af != skw->af) {
552 			switch (sp->proto) {
553 			case IPPROTO_ICMP:
554 				sks->proto = IPPROTO_ICMPV6;
555 				break;
556 			case IPPROTO_ICMPV6:
557 				sks->proto = IPPROTO_ICMP;
558 				break;
559 			default:
560 				sks->proto = sp->proto;
561 			}
562 		} else
563 			sks->proto = sp->proto;
564 	}
565 	st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]);
566 	st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]);
567 
568 	/* copy to state */
569 	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
570 	st->creation = time_uptime - ntohl(sp->creation);
571 	st->expire = time_uptime;
572 	if (ntohl(sp->expire)) {
573 		u_int32_t timeout;
574 
575 		timeout = r->timeout[sp->timeout];
576 		if (!timeout)
577 			timeout = pf_default_rule.timeout[sp->timeout];
578 
579 		/* sp->expire may have been adaptively scaled by export. */
580 		st->expire -= timeout - ntohl(sp->expire);
581 	}
582 
583 	st->direction = sp->direction;
584 	st->log = sp->log;
585 	st->timeout = sp->timeout;
586 	st->state_flags = ntohs(sp->state_flags);
587 	st->max_mss = ntohs(sp->max_mss);
588 	st->min_ttl = sp->min_ttl;
589 	st->set_tos = sp->set_tos;
590 	st->set_prio[0] = sp->set_prio[0];
591 	st->set_prio[1] = sp->set_prio[1];
592 
593 	st->id = sp->id;
594 	st->creatorid = sp->creatorid;
595 	pf_state_peer_ntoh(&sp->src, &st->src);
596 	pf_state_peer_ntoh(&sp->dst, &st->dst);
597 
598 	st->rule.ptr = r;
599 	st->anchor.ptr = NULL;
600 	st->rt_kif = NULL;
601 
602 	st->pfsync_time = time_uptime;
603 	st->sync_state = PFSYNC_S_NONE;
604 
605 	/* XXX when we have anchors, use STATE_INC_COUNTERS */
606 	r->states_cur++;
607 	r->states_tot++;
608 
609 	if (!ISSET(flags, PFSYNC_SI_IOCTL))
610 		SET(st->state_flags, PFSTATE_NOSYNC);
611 
612 	if (pf_state_insert(kif, &skw, &sks, st) != 0) {
613 		/* XXX when we have anchors, use STATE_DEC_COUNTERS */
614 		r->states_cur--;
615 		error = EEXIST;
616 		goto cleanup_state;
617 	}
618 
619 	if (!ISSET(flags, PFSYNC_SI_IOCTL)) {
620 		CLR(st->state_flags, PFSTATE_NOSYNC);
621 		if (ISSET(st->state_flags, PFSTATE_ACK)) {
622 			pfsync_q_ins(st, PFSYNC_S_IACK);
623 			schednetisr(NETISR_PFSYNC);
624 		}
625 	}
626 	CLR(st->state_flags, PFSTATE_ACK);
627 
628 	return (0);
629 
630  cleanup:
631 	error = ENOMEM;
632 	if (skw == sks)
633 		sks = NULL;
634 	if (skw != NULL)
635 		pool_put(&pf_state_key_pl, skw);
636 	if (sks != NULL)
637 		pool_put(&pf_state_key_pl, sks);
638 
639  cleanup_state:	/* pf_state_insert frees the state keys */
640 	if (st) {
641 		if (st->dst.scrub)
642 			pool_put(&pf_state_scrub_pl, st->dst.scrub);
643 		if (st->src.scrub)
644 			pool_put(&pf_state_scrub_pl, st->src.scrub);
645 		pool_put(&pf_state_pl, st);
646 	}
647 	return (error);
648 }
649 
650 int
651 pfsync_input(struct mbuf **mp, int *offp, int proto, int af)
652 {
653 	struct mbuf *n, *m = *mp;
654 	struct pfsync_softc *sc = pfsyncif;
655 	struct ip *ip = mtod(m, struct ip *);
656 	struct pfsync_header *ph;
657 	struct pfsync_subheader subh;
658 	int offset, noff, len, count, mlen, flags = 0;
659 	int e;
660 
661 	NET_ASSERT_LOCKED();
662 
663 	pfsyncstat_inc(pfsyncs_ipackets);
664 
665 	/* verify that we have a sync interface configured */
666 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
667 	    sc->sc_sync_if == NULL || !pf_status.running)
668 		goto done;
669 
670 	/* verify that the packet came in on the right interface */
671 	if (sc->sc_sync_if->if_index != m->m_pkthdr.ph_ifidx) {
672 		pfsyncstat_inc(pfsyncs_badif);
673 		goto done;
674 	}
675 
676 	sc->sc_if.if_ipackets++;
677 	sc->sc_if.if_ibytes += m->m_pkthdr.len;
678 
679 	/* verify that the IP TTL is 255. */
680 	if (ip->ip_ttl != PFSYNC_DFLTTL) {
681 		pfsyncstat_inc(pfsyncs_badttl);
682 		goto done;
683 	}
684 
685 	offset = ip->ip_hl << 2;
686 	n = m_pulldown(m, offset, sizeof(*ph), &noff);
687 	if (n == NULL) {
688 		pfsyncstat_inc(pfsyncs_hdrops);
689 		return IPPROTO_DONE;
690 	}
691 	ph = (struct pfsync_header *)(n->m_data + noff);
692 
693 	/* verify the version */
694 	if (ph->version != PFSYNC_VERSION) {
695 		pfsyncstat_inc(pfsyncs_badver);
696 		goto done;
697 	}
698 	len = ntohs(ph->len) + offset;
699 	if (m->m_pkthdr.len < len) {
700 		pfsyncstat_inc(pfsyncs_badlen);
701 		goto done;
702 	}
703 
704 	if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
705 		flags = PFSYNC_SI_CKSUM;
706 
707 	offset += sizeof(*ph);
708 	while (offset <= len - sizeof(subh)) {
709 		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
710 		offset += sizeof(subh);
711 
712 		mlen = subh.len << 2;
713 		count = ntohs(subh.count);
714 
715 		if (subh.action >= PFSYNC_ACT_MAX ||
716 		    subh.action >= nitems(pfsync_acts) ||
717 		    mlen < pfsync_acts[subh.action].len) {
718 			/*
719 			 * subheaders are always followed by at least one
720 			 * message, so if the peer is new
721 			 * enough to tell us how big its messages are then we
722 			 * know enough to skip them.
723 			 */
724 			if (count > 0 && mlen > 0) {
725 				offset += count * mlen;
726 				continue;
727 			}
728 			pfsyncstat_inc(pfsyncs_badact);
729 			goto done;
730 		}
731 
732 		n = m_pulldown(m, offset, mlen * count, &noff);
733 		if (n == NULL) {
734 			pfsyncstat_inc(pfsyncs_badlen);
735 			return IPPROTO_DONE;
736 		}
737 
738 		PF_LOCK();
739 		e = pfsync_acts[subh.action].in(n->m_data + noff, mlen, count,
740 		    flags);
741 		PF_UNLOCK();
742 		if (e != 0)
743 			goto done;
744 
745 		offset += mlen * count;
746 	}
747 
748 done:
749 	m_freem(m);
750 	return IPPROTO_DONE;
751 }
752 
753 int
754 pfsync_in_clr(caddr_t buf, int len, int count, int flags)
755 {
756 	struct pfsync_clr *clr;
757 	struct pf_state *st, *nexts;
758 	struct pfi_kif *kif;
759 	u_int32_t creatorid;
760 	int i;
761 
762 	for (i = 0; i < count; i++) {
763 		clr = (struct pfsync_clr *)buf + len * i;
764 		kif = NULL;
765 		creatorid = clr->creatorid;
766 		if (strlen(clr->ifname) &&
767 		    (kif = pfi_kif_find(clr->ifname)) == NULL)
768 			continue;
769 
770 		for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) {
771 			nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
772 			if (st->creatorid == creatorid &&
773 			    ((kif && st->kif == kif) || !kif)) {
774 				SET(st->state_flags, PFSTATE_NOSYNC);
775 				pf_remove_state(st);
776 			}
777 		}
778 	}
779 
780 	return (0);
781 }
782 
783 int
784 pfsync_in_ins(caddr_t buf, int len, int count, int flags)
785 {
786 	struct pfsync_state *sp;
787 	sa_family_t af1, af2;
788 	int i;
789 
790 	for (i = 0; i < count; i++) {
791 		sp = (struct pfsync_state *)(buf + len * i);
792 		af1 = sp->key[0].af;
793 		af2 = sp->key[1].af;
794 
795 		/* check for invalid values */
796 		if (sp->timeout >= PFTM_MAX ||
797 		    sp->src.state > PF_TCPS_PROXY_DST ||
798 		    sp->dst.state > PF_TCPS_PROXY_DST ||
799 		    sp->direction > PF_OUT ||
800 		    (((af1 || af2) &&
801 		     ((af1 != AF_INET && af1 != AF_INET6) ||
802 		      (af2 != AF_INET && af2 != AF_INET6))) ||
803 		    (sp->af != AF_INET && sp->af != AF_INET6))) {
804 			DPFPRINTF(LOG_NOTICE,
805 			    "pfsync_input: PFSYNC5_ACT_INS: invalid value");
806 			pfsyncstat_inc(pfsyncs_badval);
807 			continue;
808 		}
809 
810 		if (pfsync_state_import(sp, flags) == ENOMEM) {
811 			/* drop out, but process the rest of the actions */
812 			break;
813 		}
814 	}
815 
816 	return (0);
817 }
818 
819 int
820 pfsync_in_iack(caddr_t buf, int len, int count, int flags)
821 {
822 	struct pfsync_ins_ack *ia;
823 	struct pf_state_cmp id_key;
824 	struct pf_state *st;
825 	int i;
826 
827 	for (i = 0; i < count; i++) {
828 		ia = (struct pfsync_ins_ack *)(buf + len * i);
829 
830 		id_key.id = ia->id;
831 		id_key.creatorid = ia->creatorid;
832 
833 		st = pf_find_state_byid(&id_key);
834 		if (st == NULL)
835 			continue;
836 
837 		if (ISSET(st->state_flags, PFSTATE_ACK))
838 			pfsync_deferred(st, 0);
839 	}
840 
841 	return (0);
842 }
843 
844 int
845 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
846     struct pfsync_state_peer *dst)
847 {
848 	int sync = 0;
849 
850 	/*
851 	 * The state should never go backwards except
852 	 * for syn-proxy states.  Neither should the
853 	 * sequence window slide backwards.
854 	 */
855 	if ((st->src.state > src->state &&
856 	    (st->src.state < PF_TCPS_PROXY_SRC ||
857 	    src->state >= PF_TCPS_PROXY_SRC)) ||
858 
859 	    (st->src.state == src->state &&
860 	    SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
861 		sync++;
862 	else
863 		pf_state_peer_ntoh(src, &st->src);
864 
865 	if ((st->dst.state > dst->state) ||
866 
867 	    (st->dst.state >= TCPS_SYN_SENT &&
868 	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
869 		sync++;
870 	else
871 		pf_state_peer_ntoh(dst, &st->dst);
872 
873 	return (sync);
874 }
875 
876 int
877 pfsync_in_upd(caddr_t buf, int len, int count, int flags)
878 {
879 	struct pfsync_state *sp;
880 	struct pf_state_cmp id_key;
881 	struct pf_state *st;
882 	int sync;
883 
884 	int i;
885 
886 	for (i = 0; i < count; i++) {
887 		sp = (struct pfsync_state *)(buf + len * i);
888 
889 		/* check for invalid values */
890 		if (sp->timeout >= PFTM_MAX ||
891 		    sp->src.state > PF_TCPS_PROXY_DST ||
892 		    sp->dst.state > PF_TCPS_PROXY_DST) {
893 			DPFPRINTF(LOG_NOTICE,
894 			    "pfsync_input: PFSYNC_ACT_UPD: invalid value");
895 			pfsyncstat_inc(pfsyncs_badval);
896 			continue;
897 		}
898 
899 		id_key.id = sp->id;
900 		id_key.creatorid = sp->creatorid;
901 
902 		st = pf_find_state_byid(&id_key);
903 		if (st == NULL) {
904 			/* insert the update */
905 			if (pfsync_state_import(sp, flags))
906 				pfsyncstat_inc(pfsyncs_badstate);
907 			continue;
908 		}
909 
910 		if (ISSET(st->state_flags, PFSTATE_ACK))
911 			pfsync_deferred(st, 1);
912 
913 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
914 			sync = pfsync_upd_tcp(st, &sp->src, &sp->dst);
915 		else {
916 			sync = 0;
917 
918 			/*
919 			 * Non-TCP protocol state machine always go
920 			 * forwards
921 			 */
922 			if (st->src.state > sp->src.state)
923 				sync++;
924 			else
925 				pf_state_peer_ntoh(&sp->src, &st->src);
926 
927 			if (st->dst.state > sp->dst.state)
928 				sync++;
929 			else
930 				pf_state_peer_ntoh(&sp->dst, &st->dst);
931 		}
932 
933 		if (sync < 2) {
934 			pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
935 			pf_state_peer_ntoh(&sp->dst, &st->dst);
936 			st->expire = time_uptime;
937 			st->timeout = sp->timeout;
938 		}
939 		st->pfsync_time = time_uptime;
940 
941 		if (sync) {
942 			pfsyncstat_inc(pfsyncs_stale);
943 
944 			pfsync_update_state(st);
945 			schednetisr(NETISR_PFSYNC);
946 		}
947 	}
948 
949 	return (0);
950 }
951 
952 int
953 pfsync_in_upd_c(caddr_t buf, int len, int count, int flags)
954 {
955 	struct pfsync_upd_c *up;
956 	struct pf_state_cmp id_key;
957 	struct pf_state *st;
958 
959 	int sync;
960 
961 	int i;
962 
963 	for (i = 0; i < count; i++) {
964 		up = (struct pfsync_upd_c *)(buf + len * i);
965 
966 		/* check for invalid values */
967 		if (up->timeout >= PFTM_MAX ||
968 		    up->src.state > PF_TCPS_PROXY_DST ||
969 		    up->dst.state > PF_TCPS_PROXY_DST) {
970 			DPFPRINTF(LOG_NOTICE,
971 			    "pfsync_input: PFSYNC_ACT_UPD_C: invalid value");
972 			pfsyncstat_inc(pfsyncs_badval);
973 			continue;
974 		}
975 
976 		id_key.id = up->id;
977 		id_key.creatorid = up->creatorid;
978 
979 		st = pf_find_state_byid(&id_key);
980 		if (st == NULL) {
981 			/* We don't have this state. Ask for it. */
982 			pfsync_request_update(id_key.creatorid, id_key.id);
983 			continue;
984 		}
985 
986 		if (ISSET(st->state_flags, PFSTATE_ACK))
987 			pfsync_deferred(st, 1);
988 
989 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
990 			sync = pfsync_upd_tcp(st, &up->src, &up->dst);
991 		else {
992 			sync = 0;
993 			/*
994 			 * Non-TCP protocol state machine always go
995 			 * forwards
996 			 */
997 			if (st->src.state > up->src.state)
998 				sync++;
999 			else
1000 				pf_state_peer_ntoh(&up->src, &st->src);
1001 
1002 			if (st->dst.state > up->dst.state)
1003 				sync++;
1004 			else
1005 				pf_state_peer_ntoh(&up->dst, &st->dst);
1006 		}
1007 		if (sync < 2) {
1008 			pfsync_alloc_scrub_memory(&up->dst, &st->dst);
1009 			pf_state_peer_ntoh(&up->dst, &st->dst);
1010 			st->expire = time_uptime;
1011 			st->timeout = up->timeout;
1012 		}
1013 		st->pfsync_time = time_uptime;
1014 
1015 		if (sync) {
1016 			pfsyncstat_inc(pfsyncs_stale);
1017 
1018 			pfsync_update_state(st);
1019 			schednetisr(NETISR_PFSYNC);
1020 		}
1021 	}
1022 
1023 	return (0);
1024 }
1025 
1026 int
1027 pfsync_in_ureq(caddr_t buf, int len, int count, int flags)
1028 {
1029 	struct pfsync_upd_req *ur;
1030 	int i;
1031 
1032 	struct pf_state_cmp id_key;
1033 	struct pf_state *st;
1034 
1035 	for (i = 0; i < count; i++) {
1036 		ur = (struct pfsync_upd_req *)(buf + len * i);
1037 
1038 		id_key.id = ur->id;
1039 		id_key.creatorid = ur->creatorid;
1040 
1041 		if (id_key.id == 0 && id_key.creatorid == 0)
1042 			pfsync_bulk_start();
1043 		else {
1044 			st = pf_find_state_byid(&id_key);
1045 			if (st == NULL) {
1046 				pfsyncstat_inc(pfsyncs_badstate);
1047 				continue;
1048 			}
1049 			if (ISSET(st->state_flags, PFSTATE_NOSYNC))
1050 				continue;
1051 
1052 			pfsync_update_state_req(st);
1053 		}
1054 	}
1055 
1056 	return (0);
1057 }
1058 
1059 int
1060 pfsync_in_del(caddr_t buf, int len, int count, int flags)
1061 {
1062 	struct pfsync_state *sp;
1063 	struct pf_state_cmp id_key;
1064 	struct pf_state *st;
1065 	int i;
1066 
1067 	for (i = 0; i < count; i++) {
1068 		sp = (struct pfsync_state *)(buf + len * i);
1069 
1070 		id_key.id = sp->id;
1071 		id_key.creatorid = sp->creatorid;
1072 
1073 		st = pf_find_state_byid(&id_key);
1074 		if (st == NULL) {
1075 			pfsyncstat_inc(pfsyncs_badstate);
1076 			continue;
1077 		}
1078 		SET(st->state_flags, PFSTATE_NOSYNC);
1079 		pf_remove_state(st);
1080 	}
1081 
1082 	return (0);
1083 }
1084 
1085 int
1086 pfsync_in_del_c(caddr_t buf, int len, int count, int flags)
1087 {
1088 	struct pfsync_del_c *sp;
1089 	struct pf_state_cmp id_key;
1090 	struct pf_state *st;
1091 	int i;
1092 
1093 	for (i = 0; i < count; i++) {
1094 		sp = (struct pfsync_del_c *)(buf + len * i);
1095 
1096 		id_key.id = sp->id;
1097 		id_key.creatorid = sp->creatorid;
1098 
1099 		st = pf_find_state_byid(&id_key);
1100 		if (st == NULL) {
1101 			pfsyncstat_inc(pfsyncs_badstate);
1102 			continue;
1103 		}
1104 
1105 		SET(st->state_flags, PFSTATE_NOSYNC);
1106 		pf_remove_state(st);
1107 	}
1108 
1109 	return (0);
1110 }
1111 
1112 int
1113 pfsync_in_bus(caddr_t buf, int len, int count, int flags)
1114 {
1115 	struct pfsync_softc *sc = pfsyncif;
1116 	struct pfsync_bus *bus;
1117 
1118 	/* If we're not waiting for a bulk update, who cares. */
1119 	if (sc->sc_ureq_sent == 0)
1120 		return (0);
1121 
1122 	bus = (struct pfsync_bus *)buf;
1123 
1124 	switch (bus->status) {
1125 	case PFSYNC_BUS_START:
1126 		timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1127 		    pf_pool_limits[PF_LIMIT_STATES].limit /
1128 		    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1129 		    sizeof(struct pfsync_state)));
1130 		DPFPRINTF(LOG_INFO, "received bulk update start");
1131 		break;
1132 
1133 	case PFSYNC_BUS_END:
1134 		if (time_uptime - ntohl(bus->endtime) >=
1135 		    sc->sc_ureq_sent) {
1136 			/* that's it, we're happy */
1137 			sc->sc_ureq_sent = 0;
1138 			sc->sc_bulk_tries = 0;
1139 			timeout_del(&sc->sc_bulkfail_tmo);
1140 #if NCARP > 0
1141 			if (!pfsync_sync_ok)
1142 				carp_group_demote_adj(&sc->sc_if, -1,
1143 				    sc->sc_link_demoted ?
1144 				    "pfsync link state up" :
1145 				    "pfsync bulk done");
1146 			if (sc->sc_initial_bulk) {
1147 				carp_group_demote_adj(&sc->sc_if, -32,
1148 				    "pfsync init");
1149 				sc->sc_initial_bulk = 0;
1150 			}
1151 #endif
1152 			pfsync_sync_ok = 1;
1153 			sc->sc_link_demoted = 0;
1154 			DPFPRINTF(LOG_INFO, "received valid bulk update end");
1155 		} else {
1156 			DPFPRINTF(LOG_WARNING, "received invalid "
1157 			    "bulk update end: bad timestamp");
1158 		}
1159 		break;
1160 	}
1161 
1162 	return (0);
1163 }
1164 
1165 int
1166 pfsync_in_tdb(caddr_t buf, int len, int count, int flags)
1167 {
1168 #if defined(IPSEC)
1169 	struct pfsync_tdb *tp;
1170 	int i;
1171 
1172 	for (i = 0; i < count; i++) {
1173 		tp = (struct pfsync_tdb *)(buf + len * i);
1174 		pfsync_update_net_tdb(tp);
1175 	}
1176 #endif
1177 
1178 	return (0);
1179 }
1180 
1181 #if defined(IPSEC)
1182 /* Update an in-kernel tdb. Silently fail if no tdb is found. */
1183 void
1184 pfsync_update_net_tdb(struct pfsync_tdb *pt)
1185 {
1186 	struct tdb		*tdb;
1187 
1188 	NET_ASSERT_LOCKED();
1189 
1190 	/* check for invalid values */
1191 	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
1192 	    (pt->dst.sa.sa_family != AF_INET &&
1193 	     pt->dst.sa.sa_family != AF_INET6))
1194 		goto bad;
1195 
1196 	tdb = gettdb(ntohs(pt->rdomain), pt->spi,
1197 	    (union sockaddr_union *)&pt->dst, pt->sproto);
1198 	if (tdb) {
1199 		pt->rpl = betoh64(pt->rpl);
1200 		pt->cur_bytes = betoh64(pt->cur_bytes);
1201 
1202 		/* Neither replay nor byte counter should ever decrease. */
1203 		if (pt->rpl < tdb->tdb_rpl ||
1204 		    pt->cur_bytes < tdb->tdb_cur_bytes) {
1205 			goto bad;
1206 		}
1207 
1208 		tdb->tdb_rpl = pt->rpl;
1209 		tdb->tdb_cur_bytes = pt->cur_bytes;
1210 	}
1211 	return;
1212 
1213  bad:
1214 	DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: "
1215 	    "invalid value");
1216 	pfsyncstat_inc(pfsyncs_badstate);
1217 	return;
1218 }
1219 #endif
1220 
1221 
1222 int
1223 pfsync_in_eof(caddr_t buf, int len, int count, int flags)
1224 {
1225 	if (len > 0 || count > 0)
1226 		pfsyncstat_inc(pfsyncs_badact);
1227 
1228 	/* we're done. let the caller return */
1229 	return (1);
1230 }
1231 
1232 int
1233 pfsync_in_error(caddr_t buf, int len, int count, int flags)
1234 {
1235 	pfsyncstat_inc(pfsyncs_badact);
1236 	return (-1);
1237 }
1238 
1239 int
1240 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1241 	struct rtentry *rt)
1242 {
1243 	m_freem(m);	/* drop packet */
1244 	return (EAFNOSUPPORT);
1245 }
1246 
1247 int
1248 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1249 {
1250 	struct proc *p = curproc;
1251 	struct pfsync_softc *sc = ifp->if_softc;
1252 	struct ifreq *ifr = (struct ifreq *)data;
1253 	struct ip_moptions *imo = &sc->sc_imo;
1254 	struct pfsyncreq pfsyncr;
1255 	struct ifnet    *sifp;
1256 	struct ip *ip;
1257 	int error;
1258 
1259 	switch (cmd) {
1260 	case SIOCSIFFLAGS:
1261 		if ((ifp->if_flags & IFF_RUNNING) == 0 &&
1262 		    (ifp->if_flags & IFF_UP)) {
1263 			ifp->if_flags |= IFF_RUNNING;
1264 
1265 #if NCARP > 0
1266 			sc->sc_initial_bulk = 1;
1267 			carp_group_demote_adj(&sc->sc_if, 32, "pfsync init");
1268 #endif
1269 
1270 			pfsync_request_full_update(sc);
1271 		}
1272 		if ((ifp->if_flags & IFF_RUNNING) &&
1273 		    (ifp->if_flags & IFF_UP) == 0) {
1274 			ifp->if_flags &= ~IFF_RUNNING;
1275 
1276 			/* drop everything */
1277 			timeout_del(&sc->sc_tmo);
1278 			pfsync_drop(sc);
1279 
1280 			pfsync_cancel_full_update(sc);
1281 		}
1282 		break;
1283 	case SIOCSIFMTU:
1284 		if (!sc->sc_sync_if ||
1285 		    ifr->ifr_mtu <= PFSYNC_MINPKT ||
1286 		    ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
1287 			return (EINVAL);
1288 		if (ifr->ifr_mtu < ifp->if_mtu)
1289 			pfsync_sendout();
1290 		ifp->if_mtu = ifr->ifr_mtu;
1291 		break;
1292 	case SIOCGETPFSYNC:
1293 		bzero(&pfsyncr, sizeof(pfsyncr));
1294 		if (sc->sc_sync_if) {
1295 			strlcpy(pfsyncr.pfsyncr_syncdev,
1296 			    sc->sc_sync_if->if_xname, IFNAMSIZ);
1297 		}
1298 		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1299 		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1300 		pfsyncr.pfsyncr_defer = sc->sc_defer;
1301 		return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
1302 
1303 	case SIOCSETPFSYNC:
1304 		if ((error = suser(p)) != 0)
1305 			return (error);
1306 		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
1307 			return (error);
1308 
1309 		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1310 			sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1311 		else
1312 			sc->sc_sync_peer.s_addr =
1313 			    pfsyncr.pfsyncr_syncpeer.s_addr;
1314 
1315 		if (pfsyncr.pfsyncr_maxupdates > 255)
1316 			return (EINVAL);
1317 		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1318 
1319 		sc->sc_defer = pfsyncr.pfsyncr_defer;
1320 
1321 		if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1322 			if (sc->sc_sync_if) {
1323 				hook_disestablish(
1324 				    sc->sc_sync_if->if_linkstatehooks,
1325 				    sc->sc_lhcookie);
1326 				hook_disestablish(
1327 				    sc->sc_sync_if->if_detachhooks,
1328 				    sc->sc_dhcookie);
1329 			}
1330 			sc->sc_sync_if = NULL;
1331 			if (imo->imo_num_memberships > 0) {
1332 				in_delmulti(imo->imo_membership[
1333 				    --imo->imo_num_memberships]);
1334 				imo->imo_ifidx = 0;
1335 			}
1336 			break;
1337 		}
1338 
1339 		if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL)
1340 			return (EINVAL);
1341 
1342 		if (sifp->if_mtu < sc->sc_if.if_mtu ||
1343 		    (sc->sc_sync_if != NULL &&
1344 		    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
1345 		    sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1346 			pfsync_sendout();
1347 
1348 		if (sc->sc_sync_if) {
1349 			hook_disestablish(
1350 			    sc->sc_sync_if->if_linkstatehooks,
1351 			    sc->sc_lhcookie);
1352 			hook_disestablish(
1353 			    sc->sc_sync_if->if_detachhooks,
1354 			    sc->sc_dhcookie);
1355 		}
1356 		sc->sc_sync_if = sifp;
1357 
1358 		if (imo->imo_num_memberships > 0) {
1359 			in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1360 			imo->imo_ifidx = 0;
1361 		}
1362 
1363 		if (sc->sc_sync_if &&
1364 		    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1365 			struct in_addr addr;
1366 
1367 			if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) {
1368 				sc->sc_sync_if = NULL;
1369 				return (EADDRNOTAVAIL);
1370 			}
1371 
1372 			addr.s_addr = INADDR_PFSYNC_GROUP;
1373 
1374 			if ((imo->imo_membership[0] =
1375 			    in_addmulti(&addr, sc->sc_sync_if)) == NULL) {
1376 				sc->sc_sync_if = NULL;
1377 				return (ENOBUFS);
1378 			}
1379 			imo->imo_num_memberships++;
1380 			imo->imo_ifidx = sc->sc_sync_if->if_index;
1381 			imo->imo_ttl = PFSYNC_DFLTTL;
1382 			imo->imo_loop = 0;
1383 		}
1384 
1385 		ip = &sc->sc_template;
1386 		bzero(ip, sizeof(*ip));
1387 		ip->ip_v = IPVERSION;
1388 		ip->ip_hl = sizeof(sc->sc_template) >> 2;
1389 		ip->ip_tos = IPTOS_LOWDELAY;
1390 		/* len and id are set later */
1391 		ip->ip_off = htons(IP_DF);
1392 		ip->ip_ttl = PFSYNC_DFLTTL;
1393 		ip->ip_p = IPPROTO_PFSYNC;
1394 		ip->ip_src.s_addr = INADDR_ANY;
1395 		ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
1396 
1397 		sc->sc_lhcookie =
1398 		    hook_establish(sc->sc_sync_if->if_linkstatehooks, 1,
1399 		    pfsync_syncdev_state, sc);
1400 		sc->sc_dhcookie = hook_establish(sc->sc_sync_if->if_detachhooks,
1401 		    0, pfsync_ifdetach, sc);
1402 
1403 		pfsync_request_full_update(sc);
1404 
1405 		break;
1406 
1407 	default:
1408 		return (ENOTTY);
1409 	}
1410 
1411 	return (0);
1412 }
1413 
1414 void
1415 pfsync_out_state(struct pf_state *st, void *buf)
1416 {
1417 	struct pfsync_state *sp = buf;
1418 
1419 	pfsync_state_export(sp, st);
1420 }
1421 
1422 void
1423 pfsync_out_iack(struct pf_state *st, void *buf)
1424 {
1425 	struct pfsync_ins_ack *iack = buf;
1426 
1427 	iack->id = st->id;
1428 	iack->creatorid = st->creatorid;
1429 }
1430 
1431 void
1432 pfsync_out_upd_c(struct pf_state *st, void *buf)
1433 {
1434 	struct pfsync_upd_c *up = buf;
1435 
1436 	bzero(up, sizeof(*up));
1437 	up->id = st->id;
1438 	pf_state_peer_hton(&st->src, &up->src);
1439 	pf_state_peer_hton(&st->dst, &up->dst);
1440 	up->creatorid = st->creatorid;
1441 	up->timeout = st->timeout;
1442 }
1443 
1444 void
1445 pfsync_out_del(struct pf_state *st, void *buf)
1446 {
1447 	struct pfsync_del_c *dp = buf;
1448 
1449 	dp->id = st->id;
1450 	dp->creatorid = st->creatorid;
1451 
1452 	SET(st->state_flags, PFSTATE_NOSYNC);
1453 }
1454 
1455 void
1456 pfsync_drop(struct pfsync_softc *sc)
1457 {
1458 	struct pf_state *st;
1459 	struct pfsync_upd_req_item *ur;
1460 	struct tdb *t;
1461 	int q;
1462 
1463 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1464 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
1465 			continue;
1466 
1467 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
1468 #ifdef PFSYNC_DEBUG
1469 			KASSERT(st->sync_state == q);
1470 #endif
1471 			st->sync_state = PFSYNC_S_NONE;
1472 		}
1473 		TAILQ_INIT(&sc->sc_qs[q]);
1474 	}
1475 
1476 	while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1477 		TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1478 		pool_put(&sc->sc_pool, ur);
1479 	}
1480 
1481 	sc->sc_plus = NULL;
1482 
1483 	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
1484 		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry)
1485 			CLR(t->tdb_flags, TDBF_PFSYNC);
1486 
1487 		TAILQ_INIT(&sc->sc_tdb_q);
1488 	}
1489 
1490 	sc->sc_len = PFSYNC_MINPKT;
1491 }
1492 
1493 void
1494 pfsync_sendout(void)
1495 {
1496 	struct pfsync_softc *sc = pfsyncif;
1497 #if NBPFILTER > 0
1498 	struct ifnet *ifp = &sc->sc_if;
1499 #endif
1500 	struct mbuf *m;
1501 	struct ip *ip;
1502 	struct pfsync_header *ph;
1503 	struct pfsync_subheader *subh;
1504 	struct pf_state *st;
1505 	struct pfsync_upd_req_item *ur;
1506 	struct tdb *t;
1507 
1508 	int offset;
1509 	int q, count = 0;
1510 
1511 	if (sc == NULL || sc->sc_len == PFSYNC_MINPKT)
1512 		return;
1513 
1514 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
1515 #if NBPFILTER > 0
1516 	    (ifp->if_bpf == NULL && sc->sc_sync_if == NULL)) {
1517 #else
1518 	    sc->sc_sync_if == NULL) {
1519 #endif
1520 		pfsync_drop(sc);
1521 		return;
1522 	}
1523 
1524 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1525 	if (m == NULL) {
1526 		sc->sc_if.if_oerrors++;
1527 		pfsyncstat_inc(pfsyncs_onomem);
1528 		pfsync_drop(sc);
1529 		return;
1530 	}
1531 
1532 	if (max_linkhdr + sc->sc_len > MHLEN) {
1533 		MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len);
1534 		if (!ISSET(m->m_flags, M_EXT)) {
1535 			m_free(m);
1536 			sc->sc_if.if_oerrors++;
1537 			pfsyncstat_inc(pfsyncs_onomem);
1538 			pfsync_drop(sc);
1539 			return;
1540 		}
1541 	}
1542 	m->m_data += max_linkhdr;
1543 	m->m_len = m->m_pkthdr.len = sc->sc_len;
1544 
1545 	/* build the ip header */
1546 	ip = mtod(m, struct ip *);
1547 	bcopy(&sc->sc_template, ip, sizeof(*ip));
1548 	offset = sizeof(*ip);
1549 
1550 	ip->ip_len = htons(m->m_pkthdr.len);
1551 	ip->ip_id = htons(ip_randomid());
1552 
1553 	/* build the pfsync header */
1554 	ph = (struct pfsync_header *)(m->m_data + offset);
1555 	bzero(ph, sizeof(*ph));
1556 	offset += sizeof(*ph);
1557 
1558 	ph->version = PFSYNC_VERSION;
1559 	ph->len = htons(sc->sc_len - sizeof(*ip));
1560 	bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
1561 
1562 	if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
1563 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1564 		offset += sizeof(*subh);
1565 
1566 		count = 0;
1567 		while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1568 			TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1569 
1570 			bcopy(&ur->ur_msg, m->m_data + offset,
1571 			    sizeof(ur->ur_msg));
1572 			offset += sizeof(ur->ur_msg);
1573 
1574 			pool_put(&sc->sc_pool, ur);
1575 
1576 			count++;
1577 		}
1578 
1579 		bzero(subh, sizeof(*subh));
1580 		subh->len = sizeof(ur->ur_msg) >> 2;
1581 		subh->action = PFSYNC_ACT_UPD_REQ;
1582 		subh->count = htons(count);
1583 	}
1584 
1585 	/* has someone built a custom region for us to add? */
1586 	if (sc->sc_plus != NULL) {
1587 		bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
1588 		offset += sc->sc_pluslen;
1589 
1590 		sc->sc_plus = NULL;
1591 	}
1592 
1593 	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
1594 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1595 		offset += sizeof(*subh);
1596 
1597 		count = 0;
1598 		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) {
1599 			pfsync_out_tdb(t, m->m_data + offset);
1600 			offset += sizeof(struct pfsync_tdb);
1601 			CLR(t->tdb_flags, TDBF_PFSYNC);
1602 
1603 			count++;
1604 		}
1605 		TAILQ_INIT(&sc->sc_tdb_q);
1606 
1607 		bzero(subh, sizeof(*subh));
1608 		subh->action = PFSYNC_ACT_TDB;
1609 		subh->len = sizeof(struct pfsync_tdb) >> 2;
1610 		subh->count = htons(count);
1611 	}
1612 
1613 	/* walk the queues */
1614 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1615 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
1616 			continue;
1617 
1618 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1619 		offset += sizeof(*subh);
1620 
1621 		count = 0;
1622 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
1623 #ifdef PFSYNC_DEBUG
1624 			KASSERT(st->sync_state == q);
1625 #endif
1626 			pfsync_qs[q].write(st, m->m_data + offset);
1627 			offset += pfsync_qs[q].len;
1628 
1629 			st->sync_state = PFSYNC_S_NONE;
1630 			count++;
1631 		}
1632 		TAILQ_INIT(&sc->sc_qs[q]);
1633 
1634 		bzero(subh, sizeof(*subh));
1635 		subh->action = pfsync_qs[q].action;
1636 		subh->len = pfsync_qs[q].len >> 2;
1637 		subh->count = htons(count);
1638 	}
1639 
1640 	/* we're done, let's put it on the wire */
1641 #if NBPFILTER > 0
1642 	if (ifp->if_bpf) {
1643 		m->m_data += sizeof(*ip);
1644 		m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
1645 		bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1646 		m->m_data -= sizeof(*ip);
1647 		m->m_len = m->m_pkthdr.len = sc->sc_len;
1648 	}
1649 
1650 	if (sc->sc_sync_if == NULL) {
1651 		sc->sc_len = PFSYNC_MINPKT;
1652 		m_freem(m);
1653 		return;
1654 	}
1655 #endif
1656 
1657 	/* start again */
1658 	sc->sc_len = PFSYNC_MINPKT;
1659 
1660 	sc->sc_if.if_opackets++;
1661 	sc->sc_if.if_obytes += m->m_pkthdr.len;
1662 
1663 	m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1664 
1665 	if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL, 0) == 0)
1666 		pfsyncstat_inc(pfsyncs_opackets);
1667 	else
1668 		pfsyncstat_inc(pfsyncs_oerrors);
1669 }
1670 
1671 void
1672 pfsync_insert_state(struct pf_state *st)
1673 {
1674 	struct pfsync_softc *sc = pfsyncif;
1675 
1676 	NET_ASSERT_LOCKED();
1677 
1678 	if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) ||
1679 	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
1680 		SET(st->state_flags, PFSTATE_NOSYNC);
1681 		return;
1682 	}
1683 
1684 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
1685 	    ISSET(st->state_flags, PFSTATE_NOSYNC))
1686 		return;
1687 
1688 #ifdef PFSYNC_DEBUG
1689 	KASSERT(st->sync_state == PFSYNC_S_NONE);
1690 #endif
1691 
1692 	if (sc->sc_len == PFSYNC_MINPKT)
1693 		timeout_add_sec(&sc->sc_tmo, 1);
1694 
1695 	pfsync_q_ins(st, PFSYNC_S_INS);
1696 
1697 	st->sync_updates = 0;
1698 }
1699 
1700 int
1701 pfsync_defer(struct pf_state *st, struct mbuf *m)
1702 {
1703 	struct pfsync_softc *sc = pfsyncif;
1704 	struct pfsync_deferral *pd;
1705 
1706 	NET_ASSERT_LOCKED();
1707 
1708 	if (!sc->sc_defer ||
1709 	    ISSET(st->state_flags, PFSTATE_NOSYNC) ||
1710 	    m->m_flags & (M_BCAST|M_MCAST))
1711 		return (0);
1712 
1713 	if (sc->sc_deferred >= 128) {
1714 		pd = TAILQ_FIRST(&sc->sc_deferrals);
1715 		if (timeout_del(&pd->pd_tmo))
1716 			pfsync_undefer(pd, 0);
1717 	}
1718 
1719 	pd = pool_get(&sc->sc_pool, M_NOWAIT);
1720 	if (pd == NULL)
1721 		return (0);
1722 
1723 	m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
1724 	SET(st->state_flags, PFSTATE_ACK);
1725 
1726 	pd->pd_st = st;
1727 	pd->pd_m = m;
1728 
1729 	sc->sc_deferred++;
1730 	TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
1731 
1732 	timeout_set_proc(&pd->pd_tmo, pfsync_defer_tmo, pd);
1733 	timeout_add_msec(&pd->pd_tmo, 20);
1734 
1735 	schednetisr(NETISR_PFSYNC);
1736 
1737 	return (1);
1738 }
1739 
1740 void
1741 pfsync_undefer(struct pfsync_deferral *pd, int drop)
1742 {
1743 	struct pfsync_softc *sc = pfsyncif;
1744 	struct pf_pdesc pdesc;
1745 
1746 	NET_ASSERT_LOCKED();
1747 
1748 	TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
1749 	sc->sc_deferred--;
1750 
1751 	CLR(pd->pd_st->state_flags, PFSTATE_ACK);
1752 	if (drop)
1753 		m_freem(pd->pd_m);
1754 	else {
1755 		if (pd->pd_st->rule.ptr->rt == PF_ROUTETO) {
1756 			if (pf_setup_pdesc(&pdesc,
1757 			    pd->pd_st->key[PF_SK_WIRE]->af,
1758 			    pd->pd_st->direction, pd->pd_st->rt_kif,
1759 			    pd->pd_m, NULL) != PF_PASS) {
1760 				m_freem(pd->pd_m);
1761 				goto out;
1762 			}
1763 			switch (pd->pd_st->key[PF_SK_WIRE]->af) {
1764 			case AF_INET:
1765 				pf_route(&pdesc,
1766 				    pd->pd_st->rule.ptr, pd->pd_st);
1767 				break;
1768 #ifdef INET6
1769 			case AF_INET6:
1770 				pf_route6(&pdesc,
1771 				    pd->pd_st->rule.ptr, pd->pd_st);
1772 				break;
1773 #endif /* INET6 */
1774 			}
1775 			pd->pd_m = pdesc.m;
1776 		} else {
1777 			switch (pd->pd_st->key[PF_SK_WIRE]->af) {
1778 			case AF_INET:
1779 				ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL,
1780 				    0);
1781 				break;
1782 #ifdef INET6
1783 			case AF_INET6:
1784 				ip6_output(pd->pd_m, NULL, NULL, 0,
1785 				    NULL, NULL);
1786 				break;
1787 #endif /* INET6 */
1788 			}
1789 		}
1790 	}
1791  out:
1792 	pool_put(&sc->sc_pool, pd);
1793 }
1794 
1795 void
1796 pfsync_defer_tmo(void *arg)
1797 {
1798 	NET_LOCK();
1799 	pfsync_undefer(arg, 0);
1800 	NET_UNLOCK();
1801 }
1802 
1803 void
1804 pfsync_deferred(struct pf_state *st, int drop)
1805 {
1806 	struct pfsync_softc *sc = pfsyncif;
1807 	struct pfsync_deferral *pd;
1808 
1809 	NET_ASSERT_LOCKED();
1810 
1811 	TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
1812 		 if (pd->pd_st == st) {
1813 			if (timeout_del(&pd->pd_tmo))
1814 				pfsync_undefer(pd, drop);
1815 			return;
1816 		}
1817 	}
1818 
1819 	panic("pfsync_deferred: unable to find deferred state");
1820 }
1821 
1822 void
1823 pfsync_update_state(struct pf_state *st)
1824 {
1825 	struct pfsync_softc *sc = pfsyncif;
1826 	int sync = 0;
1827 
1828 	NET_ASSERT_LOCKED();
1829 
1830 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
1831 		return;
1832 
1833 	if (ISSET(st->state_flags, PFSTATE_ACK))
1834 		pfsync_deferred(st, 0);
1835 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1836 		if (st->sync_state != PFSYNC_S_NONE)
1837 			pfsync_q_del(st);
1838 		return;
1839 	}
1840 
1841 	if (sc->sc_len == PFSYNC_MINPKT)
1842 		timeout_add_sec(&sc->sc_tmo, 1);
1843 
1844 	switch (st->sync_state) {
1845 	case PFSYNC_S_UPD_C:
1846 	case PFSYNC_S_UPD:
1847 	case PFSYNC_S_INS:
1848 		/* we're already handling it */
1849 
1850 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
1851 			st->sync_updates++;
1852 			if (st->sync_updates >= sc->sc_maxupdates)
1853 				sync = 1;
1854 		}
1855 		break;
1856 
1857 	case PFSYNC_S_IACK:
1858 		pfsync_q_del(st);
1859 	case PFSYNC_S_NONE:
1860 		pfsync_q_ins(st, PFSYNC_S_UPD_C);
1861 		st->sync_updates = 0;
1862 		break;
1863 
1864 	default:
1865 		panic("pfsync_update_state: unexpected sync state %d",
1866 		    st->sync_state);
1867 	}
1868 
1869 	if (sync || (time_uptime - st->pfsync_time) < 2)
1870 		schednetisr(NETISR_PFSYNC);
1871 }
1872 
1873 void
1874 pfsync_cancel_full_update(struct pfsync_softc *sc)
1875 {
1876 	if (timeout_pending(&sc->sc_bulkfail_tmo) ||
1877 	    timeout_pending(&sc->sc_bulk_tmo)) {
1878 #if NCARP > 0
1879 		if (!pfsync_sync_ok)
1880 			carp_group_demote_adj(&sc->sc_if, -1,
1881 			    "pfsync bulk cancelled");
1882 		if (sc->sc_initial_bulk) {
1883 			carp_group_demote_adj(&sc->sc_if, -32,
1884 			    "pfsync init");
1885 			sc->sc_initial_bulk = 0;
1886 		}
1887 #endif
1888 		pfsync_sync_ok = 1;
1889 		DPFPRINTF(LOG_INFO, "cancelling bulk update");
1890 	}
1891 	timeout_del(&sc->sc_bulkfail_tmo);
1892 	timeout_del(&sc->sc_bulk_tmo);
1893 	sc->sc_bulk_next = NULL;
1894 	sc->sc_bulk_last = NULL;
1895 	sc->sc_ureq_sent = 0;
1896 	sc->sc_bulk_tries = 0;
1897 }
1898 
1899 void
1900 pfsync_request_full_update(struct pfsync_softc *sc)
1901 {
1902 	if (sc->sc_sync_if && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
1903 		/* Request a full state table update. */
1904 		sc->sc_ureq_sent = time_uptime;
1905 #if NCARP > 0
1906 		if (!sc->sc_link_demoted && pfsync_sync_ok)
1907 			carp_group_demote_adj(&sc->sc_if, 1,
1908 			    "pfsync bulk start");
1909 #endif
1910 		pfsync_sync_ok = 0;
1911 		DPFPRINTF(LOG_INFO, "requesting bulk update");
1912 		timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1913 		    pf_pool_limits[PF_LIMIT_STATES].limit /
1914 		    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1915 		    sizeof(struct pfsync_state)));
1916 		pfsync_request_update(0, 0);
1917 	}
1918 }
1919 
1920 void
1921 pfsync_request_update(u_int32_t creatorid, u_int64_t id)
1922 {
1923 	struct pfsync_softc *sc = pfsyncif;
1924 	struct pfsync_upd_req_item *item;
1925 	size_t nlen = sizeof(struct pfsync_upd_req);
1926 
1927 	/*
1928 	 * this code does nothing to prevent multiple update requests for the
1929 	 * same state being generated.
1930 	 */
1931 
1932 	item = pool_get(&sc->sc_pool, PR_NOWAIT);
1933 	if (item == NULL) {
1934 		/* XXX stats */
1935 		return;
1936 	}
1937 
1938 	item->ur_msg.id = id;
1939 	item->ur_msg.creatorid = creatorid;
1940 
1941 	if (TAILQ_EMPTY(&sc->sc_upd_req_list))
1942 		nlen += sizeof(struct pfsync_subheader);
1943 
1944 	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
1945 		pfsync_sendout();
1946 
1947 		nlen = sizeof(struct pfsync_subheader) +
1948 		    sizeof(struct pfsync_upd_req);
1949 	}
1950 
1951 	TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
1952 	sc->sc_len += nlen;
1953 
1954 	schednetisr(NETISR_PFSYNC);
1955 }
1956 
1957 void
1958 pfsync_update_state_req(struct pf_state *st)
1959 {
1960 	struct pfsync_softc *sc = pfsyncif;
1961 
1962 	if (sc == NULL)
1963 		panic("pfsync_update_state_req: nonexistant instance");
1964 
1965 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1966 		if (st->sync_state != PFSYNC_S_NONE)
1967 			pfsync_q_del(st);
1968 		return;
1969 	}
1970 
1971 	switch (st->sync_state) {
1972 	case PFSYNC_S_UPD_C:
1973 	case PFSYNC_S_IACK:
1974 		pfsync_q_del(st);
1975 	case PFSYNC_S_NONE:
1976 		pfsync_q_ins(st, PFSYNC_S_UPD);
1977 		schednetisr(NETISR_PFSYNC);
1978 		return;
1979 
1980 	case PFSYNC_S_INS:
1981 	case PFSYNC_S_UPD:
1982 	case PFSYNC_S_DEL:
1983 		/* we're already handling it */
1984 		return;
1985 
1986 	default:
1987 		panic("pfsync_update_state_req: unexpected sync state %d",
1988 		    st->sync_state);
1989 	}
1990 }
1991 
1992 void
1993 pfsync_delete_state(struct pf_state *st)
1994 {
1995 	struct pfsync_softc *sc = pfsyncif;
1996 
1997 	NET_ASSERT_LOCKED();
1998 
1999 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2000 		return;
2001 
2002 	if (ISSET(st->state_flags, PFSTATE_ACK))
2003 		pfsync_deferred(st, 1);
2004 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
2005 		if (st->sync_state != PFSYNC_S_NONE)
2006 			pfsync_q_del(st);
2007 		return;
2008 	}
2009 
2010 	if (sc->sc_len == PFSYNC_MINPKT)
2011 		timeout_add_sec(&sc->sc_tmo, 1);
2012 
2013 	switch (st->sync_state) {
2014 	case PFSYNC_S_INS:
2015 		/* we never got to tell the world so just forget about it */
2016 		pfsync_q_del(st);
2017 		return;
2018 
2019 	case PFSYNC_S_UPD_C:
2020 	case PFSYNC_S_UPD:
2021 	case PFSYNC_S_IACK:
2022 		pfsync_q_del(st);
2023 		/* FALLTHROUGH to putting it on the del list */
2024 
2025 	case PFSYNC_S_NONE:
2026 		pfsync_q_ins(st, PFSYNC_S_DEL);
2027 		return;
2028 
2029 	default:
2030 		panic("pfsync_delete_state: unexpected sync state %d",
2031 		    st->sync_state);
2032 	}
2033 }
2034 
2035 void
2036 pfsync_clear_states(u_int32_t creatorid, const char *ifname)
2037 {
2038 	struct pfsync_softc *sc = pfsyncif;
2039 	struct {
2040 		struct pfsync_subheader subh;
2041 		struct pfsync_clr clr;
2042 	} __packed r;
2043 
2044 	NET_ASSERT_LOCKED();
2045 
2046 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2047 		return;
2048 
2049 	bzero(&r, sizeof(r));
2050 
2051 	r.subh.action = PFSYNC_ACT_CLR;
2052 	r.subh.len = sizeof(struct pfsync_clr) >> 2;
2053 	r.subh.count = htons(1);
2054 
2055 	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
2056 	r.clr.creatorid = creatorid;
2057 
2058 	pfsync_send_plus(&r, sizeof(r));
2059 }
2060 
2061 void
2062 pfsync_q_ins(struct pf_state *st, int q)
2063 {
2064 	struct pfsync_softc *sc = pfsyncif;
2065 	size_t nlen = pfsync_qs[q].len;
2066 
2067 	KASSERT(st->sync_state == PFSYNC_S_NONE);
2068 
2069 #if defined(PFSYNC_DEBUG)
2070 	if (sc->sc_len < PFSYNC_MINPKT)
2071 		panic("pfsync pkt len is too low %d", sc->sc_len);
2072 #endif
2073 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2074 		nlen += sizeof(struct pfsync_subheader);
2075 
2076 	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2077 		pfsync_sendout();
2078 
2079 		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
2080 	}
2081 
2082 	sc->sc_len += nlen;
2083 	TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
2084 	st->sync_state = q;
2085 }
2086 
2087 void
2088 pfsync_q_del(struct pf_state *st)
2089 {
2090 	struct pfsync_softc *sc = pfsyncif;
2091 	int q = st->sync_state;
2092 
2093 	KASSERT(st->sync_state != PFSYNC_S_NONE);
2094 
2095 	sc->sc_len -= pfsync_qs[q].len;
2096 	TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
2097 	st->sync_state = PFSYNC_S_NONE;
2098 
2099 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2100 		sc->sc_len -= sizeof(struct pfsync_subheader);
2101 }
2102 
2103 void
2104 pfsync_update_tdb(struct tdb *t, int output)
2105 {
2106 	struct pfsync_softc *sc = pfsyncif;
2107 	size_t nlen = sizeof(struct pfsync_tdb);
2108 
2109 	if (sc == NULL)
2110 		return;
2111 
2112 	if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) {
2113 		if (TAILQ_EMPTY(&sc->sc_tdb_q))
2114 			nlen += sizeof(struct pfsync_subheader);
2115 
2116 		if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2117 			pfsync_sendout();
2118 
2119 			nlen = sizeof(struct pfsync_subheader) +
2120 			    sizeof(struct pfsync_tdb);
2121 		}
2122 
2123 		sc->sc_len += nlen;
2124 		TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
2125 		SET(t->tdb_flags, TDBF_PFSYNC);
2126 		t->tdb_updates = 0;
2127 	} else {
2128 		if (++t->tdb_updates >= sc->sc_maxupdates)
2129 			schednetisr(NETISR_PFSYNC);
2130 	}
2131 
2132 	if (output)
2133 		SET(t->tdb_flags, TDBF_PFSYNC_RPL);
2134 	else
2135 		CLR(t->tdb_flags, TDBF_PFSYNC_RPL);
2136 }
2137 
2138 void
2139 pfsync_delete_tdb(struct tdb *t)
2140 {
2141 	struct pfsync_softc *sc = pfsyncif;
2142 
2143 	if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC))
2144 		return;
2145 
2146 	sc->sc_len -= sizeof(struct pfsync_tdb);
2147 	TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
2148 	CLR(t->tdb_flags, TDBF_PFSYNC);
2149 
2150 	if (TAILQ_EMPTY(&sc->sc_tdb_q))
2151 		sc->sc_len -= sizeof(struct pfsync_subheader);
2152 }
2153 
2154 void
2155 pfsync_out_tdb(struct tdb *t, void *buf)
2156 {
2157 	struct pfsync_tdb *ut = buf;
2158 
2159 	bzero(ut, sizeof(*ut));
2160 	ut->spi = t->tdb_spi;
2161 	bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst));
2162 	/*
2163 	 * When a failover happens, the master's rpl is probably above
2164 	 * what we see here (we may be up to a second late), so
2165 	 * increase it a bit for outbound tdbs to manage most such
2166 	 * situations.
2167 	 *
2168 	 * For now, just add an offset that is likely to be larger
2169 	 * than the number of packets we can see in one second. The RFC
2170 	 * just says the next packet must have a higher seq value.
2171 	 *
2172 	 * XXX What is a good algorithm for this? We could use
2173 	 * a rate-determined increase, but to know it, we would have
2174 	 * to extend struct tdb.
2175 	 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
2176 	 * will soon be replaced anyway. For now, just don't handle
2177 	 * this edge case.
2178 	 */
2179 #define RPL_INCR 16384
2180 	ut->rpl = htobe64(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ?
2181 	    RPL_INCR : 0));
2182 	ut->cur_bytes = htobe64(t->tdb_cur_bytes);
2183 	ut->sproto = t->tdb_sproto;
2184 	ut->rdomain = htons(t->tdb_rdomain);
2185 }
2186 
2187 void
2188 pfsync_bulk_start(void)
2189 {
2190 	struct pfsync_softc *sc = pfsyncif;
2191 
2192 	DPFPRINTF(LOG_INFO, "received bulk update request");
2193 
2194 	if (TAILQ_EMPTY(&state_list))
2195 		pfsync_bulk_status(PFSYNC_BUS_END);
2196 	else {
2197 		sc->sc_ureq_received = time_uptime;
2198 
2199 		if (sc->sc_bulk_next == NULL)
2200 			sc->sc_bulk_next = TAILQ_FIRST(&state_list);
2201 		sc->sc_bulk_last = sc->sc_bulk_next;
2202 
2203 		pfsync_bulk_status(PFSYNC_BUS_START);
2204 		timeout_add(&sc->sc_bulk_tmo, 0);
2205 	}
2206 }
2207 
2208 void
2209 pfsync_bulk_update(void *arg)
2210 {
2211 	struct pfsync_softc *sc = arg;
2212 	struct pf_state *st;
2213 	int i = 0;
2214 
2215 	NET_LOCK();
2216 	st = sc->sc_bulk_next;
2217 
2218 	for (;;) {
2219 		if (st->sync_state == PFSYNC_S_NONE &&
2220 		    st->timeout < PFTM_MAX &&
2221 		    st->pfsync_time <= sc->sc_ureq_received) {
2222 			pfsync_update_state_req(st);
2223 			i++;
2224 		}
2225 
2226 		st = TAILQ_NEXT(st, entry_list);
2227 		if (st == NULL)
2228 			st = TAILQ_FIRST(&state_list);
2229 
2230 		if (st == sc->sc_bulk_last) {
2231 			/* we're done */
2232 			sc->sc_bulk_next = NULL;
2233 			sc->sc_bulk_last = NULL;
2234 			pfsync_bulk_status(PFSYNC_BUS_END);
2235 			break;
2236 		}
2237 
2238 		if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) <
2239 		    sizeof(struct pfsync_state)) {
2240 			/* we've filled a packet */
2241 			sc->sc_bulk_next = st;
2242 			timeout_add(&sc->sc_bulk_tmo, 1);
2243 			break;
2244 		}
2245 	}
2246 	NET_UNLOCK();
2247 }
2248 
2249 void
2250 pfsync_bulk_status(u_int8_t status)
2251 {
2252 	struct {
2253 		struct pfsync_subheader subh;
2254 		struct pfsync_bus bus;
2255 	} __packed r;
2256 
2257 	struct pfsync_softc *sc = pfsyncif;
2258 
2259 	bzero(&r, sizeof(r));
2260 
2261 	r.subh.action = PFSYNC_ACT_BUS;
2262 	r.subh.len = sizeof(struct pfsync_bus) >> 2;
2263 	r.subh.count = htons(1);
2264 
2265 	r.bus.creatorid = pf_status.hostid;
2266 	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
2267 	r.bus.status = status;
2268 
2269 	pfsync_send_plus(&r, sizeof(r));
2270 }
2271 
2272 void
2273 pfsync_bulk_fail(void *arg)
2274 {
2275 	struct pfsync_softc *sc = arg;
2276 
2277 	NET_LOCK();
2278 	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
2279 		/* Try again */
2280 		timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
2281 		pfsync_request_update(0, 0);
2282 	} else {
2283 		/* Pretend like the transfer was ok */
2284 		sc->sc_ureq_sent = 0;
2285 		sc->sc_bulk_tries = 0;
2286 #if NCARP > 0
2287 		if (!pfsync_sync_ok)
2288 			carp_group_demote_adj(&sc->sc_if, -1,
2289 			    sc->sc_link_demoted ?
2290 			    "pfsync link state up" :
2291 			    "pfsync bulk fail");
2292 		if (sc->sc_initial_bulk) {
2293 			carp_group_demote_adj(&sc->sc_if, -32,
2294 			    "pfsync init");
2295 			sc->sc_initial_bulk = 0;
2296 		}
2297 #endif
2298 		pfsync_sync_ok = 1;
2299 		sc->sc_link_demoted = 0;
2300 		DPFPRINTF(LOG_ERR, "failed to receive bulk update");
2301 	}
2302 	NET_UNLOCK();
2303 }
2304 
2305 void
2306 pfsync_send_plus(void *plus, size_t pluslen)
2307 {
2308 	struct pfsync_softc *sc = pfsyncif;
2309 
2310 	if (sc->sc_len + pluslen > sc->sc_if.if_mtu)
2311 		pfsync_sendout();
2312 
2313 	sc->sc_plus = plus;
2314 	sc->sc_len += (sc->sc_pluslen = pluslen);
2315 
2316 	pfsync_sendout();
2317 }
2318 
2319 int
2320 pfsync_up(void)
2321 {
2322 	struct pfsync_softc *sc = pfsyncif;
2323 
2324 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2325 		return (0);
2326 
2327 	return (1);
2328 }
2329 
2330 int
2331 pfsync_state_in_use(struct pf_state *st)
2332 {
2333 	struct pfsync_softc *sc = pfsyncif;
2334 
2335 	if (sc == NULL)
2336 		return (0);
2337 
2338 	if (st->sync_state != PFSYNC_S_NONE ||
2339 	    st == sc->sc_bulk_next ||
2340 	    st == sc->sc_bulk_last)
2341 		return (1);
2342 
2343 	return (0);
2344 }
2345 
2346 void
2347 pfsync_timeout(void *arg)
2348 {
2349 	NET_LOCK();
2350 	pfsync_sendout();
2351 	NET_UNLOCK();
2352 }
2353 
2354 /* this is a softnet/netisr handler */
2355 void
2356 pfsyncintr(void)
2357 {
2358 	pfsync_sendout();
2359 }
2360 
2361 int
2362 pfsync_sysctl_pfsyncstat(void *oldp, size_t *oldlenp, void *newp)
2363 {
2364 	struct pfsyncstats pfsyncstat;
2365 
2366 	CTASSERT(sizeof(pfsyncstat) == (pfsyncs_ncounters * sizeof(uint64_t)));
2367 	memset(&pfsyncstat, 0, sizeof pfsyncstat);
2368 	counters_read(pfsynccounters, (uint64_t *)&pfsyncstat,
2369 	    pfsyncs_ncounters);
2370 	return (sysctl_rdstruct(oldp, oldlenp, newp,
2371 	    &pfsyncstat, sizeof(pfsyncstat)));
2372 }
2373 
2374 int
2375 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
2376     size_t newlen)
2377 {
2378 	/* All sysctl names at this level are terminal. */
2379 	if (namelen != 1)
2380 		return (ENOTDIR);
2381 
2382 	switch (name[0]) {
2383 	case PFSYNCCTL_STATS:
2384 		return (pfsync_sysctl_pfsyncstat(oldp, oldlenp, newp));
2385 	default:
2386 		return (ENOPROTOOPT);
2387 	}
2388 }
2389