xref: /openbsd-src/sys/net/if_pfsync.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$OpenBSD: if_pfsync.c,v 1.207 2014/07/12 18:44:22 tedu Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Michael Shalayeff
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  * THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
31  *
32  * Permission to use, copy, modify, and distribute this software for any
33  * purpose with or without fee is hereby granted, provided that the above
34  * copyright notice and this permission notice appear in all copies.
35  *
36  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
37  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
38  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
39  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
41  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
42  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43  */
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/time.h>
48 #include <sys/malloc.h>
49 #include <sys/mbuf.h>
50 #include <sys/socket.h>
51 #include <sys/ioctl.h>
52 #include <sys/timeout.h>
53 #include <sys/kernel.h>
54 #include <sys/sysctl.h>
55 #include <sys/pool.h>
56 #include <sys/syslog.h>
57 
58 #include <net/if.h>
59 #include <net/if_types.h>
60 #include <net/route.h>
61 #include <net/bpf.h>
62 #include <net/netisr.h>
63 #include <netinet/in.h>
64 #include <netinet/if_ether.h>
65 #include <netinet/tcp.h>
66 #include <netinet/tcp_seq.h>
67 
68 #ifdef	INET
69 #include <netinet/in_systm.h>
70 #include <netinet/in_var.h>
71 #include <netinet/ip.h>
72 #include <netinet/ip_var.h>
73 #endif
74 
75 #ifdef INET6
76 #include <netinet6/in6_var.h>
77 #include <netinet/ip6.h>
78 #include <netinet/in_pcb.h>
79 #include <netinet/icmp6.h>
80 #include <netinet6/nd6.h>
81 #include <netinet6/ip6_divert.h>
82 #endif /* INET6 */
83 
84 #include "carp.h"
85 #if NCARP > 0
86 #include <netinet/ip_carp.h>
87 #endif
88 
89 #define PF_DEBUGNAME	"pfsync: "
90 #include <net/pfvar.h>
91 #include <net/if_pfsync.h>
92 
93 #include "bpfilter.h"
94 #include "pfsync.h"
95 
96 #define PFSYNC_MINPKT ( \
97 	sizeof(struct ip) + \
98 	sizeof(struct pfsync_header))
99 
100 int	pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
101 	    struct pfsync_state_peer *);
102 
103 int	pfsync_in_clr(caddr_t, int, int, int);
104 int	pfsync_in_iack(caddr_t, int, int, int);
105 int	pfsync_in_upd_c(caddr_t, int, int, int);
106 int	pfsync_in_ureq(caddr_t, int, int, int);
107 int	pfsync_in_del(caddr_t, int, int, int);
108 int	pfsync_in_del_c(caddr_t, int, int, int);
109 int	pfsync_in_bus(caddr_t, int, int, int);
110 int	pfsync_in_tdb(caddr_t, int, int, int);
111 int	pfsync_in_ins(caddr_t, int, int, int);
112 int	pfsync_in_upd(caddr_t, int, int, int);
113 int	pfsync_in_eof(caddr_t, int, int, int);
114 
115 int	pfsync_in_error(caddr_t, int, int, int);
116 
117 struct {
118 	int	(*in)(caddr_t, int, int, int);
119 	size_t	len;
120 } pfsync_acts[] = {
121 	/* PFSYNC_ACT_CLR */
122 	{ pfsync_in_clr,	sizeof(struct pfsync_clr) },
123 	 /* PFSYNC_ACT_OINS */
124 	{ pfsync_in_error,	0 },
125 	/* PFSYNC_ACT_INS_ACK */
126 	{ pfsync_in_iack,	sizeof(struct pfsync_ins_ack) },
127 	/* PFSYNC_ACT_OUPD */
128 	{ pfsync_in_error,	0 },
129 	/* PFSYNC_ACT_UPD_C */
130 	{ pfsync_in_upd_c,	sizeof(struct pfsync_upd_c) },
131 	/* PFSYNC_ACT_UPD_REQ */
132 	{ pfsync_in_ureq,	sizeof(struct pfsync_upd_req) },
133 	/* PFSYNC_ACT_DEL */
134 	{ pfsync_in_del,	sizeof(struct pfsync_state) },
135 	/* PFSYNC_ACT_DEL_C */
136 	{ pfsync_in_del_c,	sizeof(struct pfsync_del_c) },
137 	/* PFSYNC_ACT_INS_F */
138 	{ pfsync_in_error,	0 },
139 	/* PFSYNC_ACT_DEL_F */
140 	{ pfsync_in_error,	0 },
141 	/* PFSYNC_ACT_BUS */
142 	{ pfsync_in_bus,	sizeof(struct pfsync_bus) },
143 	/* PFSYNC_ACT_OTDB */
144 	{ pfsync_in_error,	0 },
145 	/* PFSYNC_ACT_EOF */
146 	{ pfsync_in_error,	0 },
147 	/* PFSYNC_ACT_INS */
148 	{ pfsync_in_ins,	sizeof(struct pfsync_state) },
149 	/* PFSYNC_ACT_UPD */
150 	{ pfsync_in_upd,	sizeof(struct pfsync_state) },
151 	/* PFSYNC_ACT_TDB */
152 	{ pfsync_in_tdb,	sizeof(struct pfsync_tdb) },
153 };
154 
155 struct pfsync_q {
156 	void		(*write)(struct pf_state *, void *);
157 	size_t		len;
158 	u_int8_t	action;
159 };
160 
161 /* we have one of these for every PFSYNC_S_ */
162 void	pfsync_out_state(struct pf_state *, void *);
163 void	pfsync_out_iack(struct pf_state *, void *);
164 void	pfsync_out_upd_c(struct pf_state *, void *);
165 void	pfsync_out_del(struct pf_state *, void *);
166 
167 struct pfsync_q pfsync_qs[] = {
168 	{ pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
169 	{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
170 	{ pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C },
171 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
172 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD }
173 };
174 
175 void	pfsync_q_ins(struct pf_state *, int);
176 void	pfsync_q_del(struct pf_state *);
177 
178 struct pfsync_upd_req_item {
179 	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
180 	struct pfsync_upd_req			ur_msg;
181 };
182 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item);
183 
184 struct pfsync_deferral {
185 	TAILQ_ENTRY(pfsync_deferral)		 pd_entry;
186 	struct pf_state				*pd_st;
187 	struct mbuf				*pd_m;
188 	struct timeout				 pd_tmo;
189 };
190 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral);
191 
192 #define PFSYNC_PLSIZE	MAX(sizeof(struct pfsync_upd_req_item), \
193 			    sizeof(struct pfsync_deferral))
194 
195 void	pfsync_out_tdb(struct tdb *, void *);
196 
197 struct pfsync_softc {
198 	struct ifnet		 sc_if;
199 	struct ifnet		*sc_sync_if;
200 
201 	struct pool		 sc_pool;
202 
203 	struct ip_moptions	 sc_imo;
204 
205 	struct in_addr		 sc_sync_peer;
206 	u_int8_t		 sc_maxupdates;
207 
208 	struct ip		 sc_template;
209 
210 	struct pf_state_queue	 sc_qs[PFSYNC_S_COUNT];
211 	size_t			 sc_len;
212 
213 	struct pfsync_upd_reqs	 sc_upd_req_list;
214 
215 	int			 sc_initial_bulk;
216 	int			 sc_link_demoted;
217 
218 	int			 sc_defer;
219 	struct pfsync_deferrals	 sc_deferrals;
220 	u_int			 sc_deferred;
221 
222 	void			*sc_plus;
223 	size_t			 sc_pluslen;
224 
225 	u_int32_t		 sc_ureq_sent;
226 	int			 sc_bulk_tries;
227 	struct timeout		 sc_bulkfail_tmo;
228 
229 	u_int32_t		 sc_ureq_received;
230 	struct pf_state		*sc_bulk_next;
231 	struct pf_state		*sc_bulk_last;
232 	struct timeout		 sc_bulk_tmo;
233 
234 	TAILQ_HEAD(, tdb)	 sc_tdb_q;
235 
236 	void			*sc_lhcookie;
237 
238 	struct timeout		 sc_tmo;
239 };
240 
241 struct pfsync_softc	*pfsyncif = NULL;
242 struct pfsyncstats	 pfsyncstats;
243 
244 void	pfsyncattach(int);
245 int	pfsync_clone_create(struct if_clone *, int);
246 int	pfsync_clone_destroy(struct ifnet *);
247 int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
248 	    struct pf_state_peer *);
249 void	pfsync_update_net_tdb(struct pfsync_tdb *);
250 int	pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
251 	    struct rtentry *);
252 int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
253 void	pfsyncstart(struct ifnet *);
254 void	pfsync_syncdev_state(void *);
255 
256 struct mbuf *pfsync_if_dequeue(struct ifnet *);
257 
258 void	pfsync_deferred(struct pf_state *, int);
259 void	pfsync_undefer(struct pfsync_deferral *, int);
260 void	pfsync_defer_tmo(void *);
261 
262 void	pfsync_cancel_full_update(struct pfsync_softc *);
263 void	pfsync_request_full_update(struct pfsync_softc *);
264 void	pfsync_request_update(u_int32_t, u_int64_t);
265 void	pfsync_update_state_req(struct pf_state *);
266 
267 void	pfsync_drop(struct pfsync_softc *);
268 void	pfsync_sendout(void);
269 void	pfsync_send_plus(void *, size_t);
270 void	pfsync_timeout(void *);
271 void	pfsync_tdb_timeout(void *);
272 
273 void	pfsync_bulk_start(void);
274 void	pfsync_bulk_status(u_int8_t);
275 void	pfsync_bulk_update(void *);
276 void	pfsync_bulk_fail(void *);
277 
278 #define PFSYNC_MAX_BULKTRIES	12
279 int	pfsync_sync_ok;
280 
281 struct if_clone	pfsync_cloner =
282     IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
283 
284 void
285 pfsyncattach(int npfsync)
286 {
287 	if_clone_attach(&pfsync_cloner);
288 }
289 
290 int
291 pfsync_clone_create(struct if_clone *ifc, int unit)
292 {
293 	struct pfsync_softc *sc;
294 	struct ifnet *ifp;
295 	int q;
296 
297 	if (unit != 0)
298 		return (EINVAL);
299 
300 	pfsync_sync_ok = 1;
301 
302 	sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK | M_ZERO);
303 
304 	for (q = 0; q < PFSYNC_S_COUNT; q++)
305 		TAILQ_INIT(&sc->sc_qs[q]);
306 
307 	pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, 0, 0, "pfsync", NULL);
308 	TAILQ_INIT(&sc->sc_upd_req_list);
309 	TAILQ_INIT(&sc->sc_deferrals);
310 	sc->sc_deferred = 0;
311 
312 	TAILQ_INIT(&sc->sc_tdb_q);
313 
314 	sc->sc_len = PFSYNC_MINPKT;
315 	sc->sc_maxupdates = 128;
316 
317 	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
318 	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
319 	    M_WAITOK | M_ZERO);
320 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
321 
322 	ifp = &sc->sc_if;
323 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
324 	ifp->if_softc = sc;
325 	ifp->if_ioctl = pfsyncioctl;
326 	ifp->if_output = pfsyncoutput;
327 	ifp->if_start = pfsyncstart;
328 	ifp->if_type = IFT_PFSYNC;
329 	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
330 	ifp->if_hdrlen = sizeof(struct pfsync_header);
331 	ifp->if_mtu = ETHERMTU;
332 	timeout_set(&sc->sc_tmo, pfsync_timeout, sc);
333 	timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
334 	timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
335 
336 	if_attach(ifp);
337 	if_alloc_sadl(ifp);
338 
339 #if NCARP > 0
340 	if_addgroup(ifp, "carp");
341 #endif
342 
343 #if NBPFILTER > 0
344 	bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
345 #endif
346 
347 	pfsyncif = sc;
348 
349 	return (0);
350 }
351 
352 int
353 pfsync_clone_destroy(struct ifnet *ifp)
354 {
355 	struct pfsync_softc *sc = ifp->if_softc;
356 	struct pfsync_deferral *pd;
357 	int s;
358 
359 	s = splsoftnet();
360 	timeout_del(&sc->sc_bulkfail_tmo);
361 	timeout_del(&sc->sc_bulk_tmo);
362 	timeout_del(&sc->sc_tmo);
363 #if NCARP > 0
364 	if (!pfsync_sync_ok)
365 		carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
366 	if (sc->sc_link_demoted)
367 		carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
368 #endif
369 	if (sc->sc_sync_if)
370 		hook_disestablish(
371 		    sc->sc_sync_if->if_linkstatehooks,
372 		    sc->sc_lhcookie);
373 	if_detach(ifp);
374 
375 	pfsync_drop(sc);
376 
377 	while (sc->sc_deferred > 0) {
378 		pd = TAILQ_FIRST(&sc->sc_deferrals);
379 		timeout_del(&pd->pd_tmo);
380 		pfsync_undefer(pd, 0);
381 	}
382 
383 	pool_destroy(&sc->sc_pool);
384 	free(sc->sc_imo.imo_membership, M_IPMOPTS, 0);
385 	free(sc, M_DEVBUF, 0);
386 
387 	pfsyncif = NULL;
388 	splx(s);
389 
390 	return (0);
391 }
392 
393 struct mbuf *
394 pfsync_if_dequeue(struct ifnet *ifp)
395 {
396 	struct mbuf *m;
397 
398 	IF_DEQUEUE(&ifp->if_snd, m);
399 
400 	return (m);
401 }
402 
403 /*
404  * Start output on the pfsync interface.
405  */
406 void
407 pfsyncstart(struct ifnet *ifp)
408 {
409 	struct mbuf *m;
410 	int s;
411 
412 	s = splnet();
413 	while ((m = pfsync_if_dequeue(ifp)) != NULL) {
414 		IF_DROP(&ifp->if_snd);
415 		m_freem(m);
416 	}
417 	splx(s);
418 }
419 
420 void
421 pfsync_syncdev_state(void *arg)
422 {
423 	struct pfsync_softc *sc = arg;
424 
425 	if (!sc->sc_sync_if || !(sc->sc_if.if_flags & IFF_UP))
426 		return;
427 
428 	if (sc->sc_sync_if->if_link_state == LINK_STATE_DOWN) {
429 		sc->sc_if.if_flags &= ~IFF_RUNNING;
430 		if (!sc->sc_link_demoted) {
431 #if NCARP > 0
432 			carp_group_demote_adj(&sc->sc_if, 1,
433 			    "pfsync link state down");
434 #endif
435 			sc->sc_link_demoted = 1;
436 		}
437 
438 		/* drop everything */
439 		timeout_del(&sc->sc_tmo);
440 		pfsync_drop(sc);
441 
442 		pfsync_cancel_full_update(sc);
443 	} else if (sc->sc_link_demoted) {
444 		sc->sc_if.if_flags |= IFF_RUNNING;
445 
446 		pfsync_request_full_update(sc);
447 	}
448 }
449 
450 int
451 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
452     struct pf_state_peer *d)
453 {
454 	if (s->scrub.scrub_flag && d->scrub == NULL) {
455 		d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO);
456 		if (d->scrub == NULL)
457 			return (ENOMEM);
458 	}
459 
460 	return (0);
461 }
462 
463 void
464 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
465 {
466 	pf_state_export(sp, st);
467 }
468 
469 int
470 pfsync_state_import(struct pfsync_state *sp, int flags)
471 {
472 	struct pf_state	*st = NULL;
473 	struct pf_state_key *skw = NULL, *sks = NULL;
474 	struct pf_rule *r = NULL;
475 	struct pfi_kif	*kif;
476 	int pool_flags;
477 	int error;
478 
479 	if (sp->creatorid == 0) {
480 		DPFPRINTF(LOG_NOTICE, "pfsync_state_import: "
481 		    "invalid creator id: %08x", ntohl(sp->creatorid));
482 		return (EINVAL);
483 	}
484 
485 	if ((kif = pfi_kif_get(sp->ifname)) == NULL) {
486 		DPFPRINTF(LOG_NOTICE, "pfsync_state_import: "
487 		    "unknown interface: %s", sp->ifname);
488 		if (flags & PFSYNC_SI_IOCTL)
489 			return (EINVAL);
490 		return (0);	/* skip this state */
491 	}
492 
493 	if (sp->af == 0)
494 		return (0);	/* skip this state */
495 
496 	/*
497 	 * If the ruleset checksums match or the state is coming from the ioctl,
498 	 * it's safe to associate the state with the rule of that number.
499 	 */
500 	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
501 	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
502 	    pf_main_ruleset.rules.active.rcount)
503 		r = pf_main_ruleset.rules.active.ptr_array[ntohl(sp->rule)];
504 	else
505 		r = &pf_default_rule;
506 
507 	if ((r->max_states && r->states_cur >= r->max_states))
508 		goto cleanup;
509 
510 	if (flags & PFSYNC_SI_IOCTL)
511 		pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO;
512 	else
513 		pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO;
514 
515 	if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL)
516 		goto cleanup;
517 
518 	if ((skw = pf_alloc_state_key(pool_flags)) == NULL)
519 		goto cleanup;
520 
521 	if ((sp->key[PF_SK_WIRE].af &&
522 	    (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) ||
523 	    PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
524 	    &sp->key[PF_SK_STACK].addr[0], sp->af) ||
525 	    PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
526 	    &sp->key[PF_SK_STACK].addr[1], sp->af) ||
527 	    sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
528 	    sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] ||
529 	    sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) {
530 		if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
531 			goto cleanup;
532 	} else
533 		sks = skw;
534 
535 	/* allocate memory for scrub info */
536 	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
537 	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
538 		goto cleanup;
539 
540 	/* copy to state key(s) */
541 	skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
542 	skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
543 	skw->port[0] = sp->key[PF_SK_WIRE].port[0];
544 	skw->port[1] = sp->key[PF_SK_WIRE].port[1];
545 	skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain);
546 	skw->proto = sp->proto;
547 	if (!(skw->af = sp->key[PF_SK_WIRE].af))
548 		skw->af = sp->af;
549 	if (sks != skw) {
550 		sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
551 		sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
552 		sks->port[0] = sp->key[PF_SK_STACK].port[0];
553 		sks->port[1] = sp->key[PF_SK_STACK].port[1];
554 		sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain);
555 		if (!(sks->af = sp->key[PF_SK_STACK].af))
556 			sks->af = sp->af;
557 		if (sks->af != skw->af) {
558 			switch (sp->proto) {
559 			case IPPROTO_ICMP:
560 				sks->proto = IPPROTO_ICMPV6;
561 				break;
562 			case IPPROTO_ICMPV6:
563 				sks->proto = IPPROTO_ICMP;
564 				break;
565 			default:
566 				sks->proto = sp->proto;
567 			}
568 		} else
569 			sks->proto = sp->proto;
570 	}
571 	st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]);
572 	st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]);
573 
574 	/* copy to state */
575 	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
576 	st->creation = time_uptime - ntohl(sp->creation);
577 	st->expire = time_uptime;
578 	if (ntohl(sp->expire)) {
579 		u_int32_t timeout;
580 
581 		timeout = r->timeout[sp->timeout];
582 		if (!timeout)
583 			timeout = pf_default_rule.timeout[sp->timeout];
584 
585 		/* sp->expire may have been adaptively scaled by export. */
586 		st->expire -= timeout - ntohl(sp->expire);
587 	}
588 
589 	st->direction = sp->direction;
590 	st->log = sp->log;
591 	st->timeout = sp->timeout;
592 	st->state_flags = ntohs(sp->state_flags);
593 	st->max_mss = ntohs(sp->max_mss);
594 	st->min_ttl = sp->min_ttl;
595 	st->set_tos = sp->set_tos;
596 
597 	st->id = sp->id;
598 	st->creatorid = sp->creatorid;
599 	pf_state_peer_ntoh(&sp->src, &st->src);
600 	pf_state_peer_ntoh(&sp->dst, &st->dst);
601 
602 	st->rule.ptr = r;
603 	st->anchor.ptr = NULL;
604 	st->rt_kif = NULL;
605 
606 	st->pfsync_time = time_uptime;
607 	st->sync_state = PFSYNC_S_NONE;
608 
609 	/* XXX when we have anchors, use STATE_INC_COUNTERS */
610 	r->states_cur++;
611 	r->states_tot++;
612 
613 	if (!ISSET(flags, PFSYNC_SI_IOCTL))
614 		SET(st->state_flags, PFSTATE_NOSYNC);
615 
616 	if (pf_state_insert(kif, &skw, &sks, st) != 0) {
617 		/* XXX when we have anchors, use STATE_DEC_COUNTERS */
618 		r->states_cur--;
619 		error = EEXIST;
620 		goto cleanup_state;
621 	}
622 
623 	if (!ISSET(flags, PFSYNC_SI_IOCTL)) {
624 		CLR(st->state_flags, PFSTATE_NOSYNC);
625 		if (ISSET(st->state_flags, PFSTATE_ACK)) {
626 			pfsync_q_ins(st, PFSYNC_S_IACK);
627 			schednetisr(NETISR_PFSYNC);
628 		}
629 	}
630 	CLR(st->state_flags, PFSTATE_ACK);
631 
632 	return (0);
633 
634  cleanup:
635 	error = ENOMEM;
636 	if (skw == sks)
637 		sks = NULL;
638 	if (skw != NULL)
639 		pool_put(&pf_state_key_pl, skw);
640 	if (sks != NULL)
641 		pool_put(&pf_state_key_pl, sks);
642 
643  cleanup_state:	/* pf_state_insert frees the state keys */
644 	if (st) {
645 		if (st->dst.scrub)
646 			pool_put(&pf_state_scrub_pl, st->dst.scrub);
647 		if (st->src.scrub)
648 			pool_put(&pf_state_scrub_pl, st->src.scrub);
649 		pool_put(&pf_state_pl, st);
650 	}
651 	return (error);
652 }
653 
654 void
655 pfsync_input(struct mbuf *m, ...)
656 {
657 	struct pfsync_softc *sc = pfsyncif;
658 	struct ip *ip = mtod(m, struct ip *);
659 	struct mbuf *mp;
660 	struct pfsync_header *ph;
661 	struct pfsync_subheader subh;
662 
663 	int offset, offp, len, count, mlen, flags = 0;
664 
665 	pfsyncstats.pfsyncs_ipackets++;
666 
667 	/* verify that we have a sync interface configured */
668 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
669 	    sc->sc_sync_if == NULL || !pf_status.running)
670 		goto done;
671 
672 	/* verify that the packet came in on the right interface */
673 	if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
674 		pfsyncstats.pfsyncs_badif++;
675 		goto done;
676 	}
677 
678 	sc->sc_if.if_ipackets++;
679 	sc->sc_if.if_ibytes += m->m_pkthdr.len;
680 
681 	/* verify that the IP TTL is 255. */
682 	if (ip->ip_ttl != PFSYNC_DFLTTL) {
683 		pfsyncstats.pfsyncs_badttl++;
684 		goto done;
685 	}
686 
687 	offset = ip->ip_hl << 2;
688 	mp = m_pulldown(m, offset, sizeof(*ph), &offp);
689 	if (mp == NULL) {
690 		pfsyncstats.pfsyncs_hdrops++;
691 		return;
692 	}
693 	ph = (struct pfsync_header *)(mp->m_data + offp);
694 
695 	/* verify the version */
696 	if (ph->version != PFSYNC_VERSION) {
697 		pfsyncstats.pfsyncs_badver++;
698 		goto done;
699 	}
700 	len = ntohs(ph->len) + offset;
701 	if (m->m_pkthdr.len < len) {
702 		pfsyncstats.pfsyncs_badlen++;
703 		goto done;
704 	}
705 
706 	if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
707 		flags = PFSYNC_SI_CKSUM;
708 
709 	offset += sizeof(*ph);
710 	while (offset <= len - sizeof(subh)) {
711 		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
712 		offset += sizeof(subh);
713 
714 		mlen = subh.len << 2;
715 		count = ntohs(subh.count);
716 
717 		if (subh.action >= PFSYNC_ACT_MAX ||
718 		    subh.action >= nitems(pfsync_acts) ||
719 		    mlen < pfsync_acts[subh.action].len) {
720 			/*
721 			 * subheaders are always followed by at least one
722 			 * message, so if the peer is new
723 			 * enough to tell us how big its messages are then we
724 			 * know enough to skip them.
725 			 */
726 			if (count > 0 && mlen > 0) {
727 				offset += count * mlen;
728 				continue;
729 			}
730 			pfsyncstats.pfsyncs_badact++;
731 			goto done;
732 		}
733 
734 		mp = m_pulldown(m, offset, mlen * count, &offp);
735 		if (mp == NULL) {
736 			pfsyncstats.pfsyncs_badlen++;
737 			return;
738 		}
739 
740 		if (pfsync_acts[subh.action].in(mp->m_data + offp,
741 		    mlen, count, flags) != 0)
742 			goto done;
743 
744 		offset += mlen * count;
745 	}
746 
747 done:
748 	m_freem(m);
749 }
750 
751 int
752 pfsync_in_clr(caddr_t buf, int len, int count, int flags)
753 {
754 	struct pfsync_clr *clr;
755 	int i;
756 
757 	struct pf_state *st, *nexts;
758 	struct pf_state_key *sk, *nextsk;
759 	struct pf_state_item *si;
760 	u_int32_t creatorid;
761 
762 	for (i = 0; i < count; i++) {
763 		clr = (struct pfsync_clr *)buf + len * i;
764 		creatorid = clr->creatorid;
765 
766 		if (clr->ifname[0] == '\0') {
767 			for (st = RB_MIN(pf_state_tree_id, &tree_id);
768 			    st; st = nexts) {
769 				nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
770 				if (st->creatorid == creatorid) {
771 					SET(st->state_flags, PFSTATE_NOSYNC);
772 					pf_unlink_state(st);
773 				}
774 			}
775 		} else {
776 			if (pfi_kif_get(clr->ifname) == NULL)
777 				continue;
778 
779 			/* XXX correct? */
780 			for (sk = RB_MIN(pf_state_tree, &pf_statetbl);
781 			    sk; sk = nextsk) {
782 				nextsk = RB_NEXT(pf_state_tree,
783 				    &pf_statetbl, sk);
784 				TAILQ_FOREACH(si, &sk->states, entry) {
785 					if (si->s->creatorid == creatorid) {
786 						SET(si->s->state_flags,
787 						    PFSTATE_NOSYNC);
788 						pf_unlink_state(si->s);
789 					}
790 				}
791 			}
792 		}
793 	}
794 
795 	return (0);
796 }
797 
798 int
799 pfsync_in_ins(caddr_t buf, int len, int count, int flags)
800 {
801 	struct pfsync_state *sp;
802 	sa_family_t af1, af2;
803 	int i;
804 
805 	for (i = 0; i < count; i++) {
806 		sp = (struct pfsync_state *)(buf + len * i);
807 		af1 = sp->key[0].af;
808 		af2 = sp->key[1].af;
809 
810 		/* check for invalid values */
811 		if (sp->timeout >= PFTM_MAX ||
812 		    sp->src.state > PF_TCPS_PROXY_DST ||
813 		    sp->dst.state > PF_TCPS_PROXY_DST ||
814 		    sp->direction > PF_OUT ||
815 		    (((af1 || af2) &&
816 		     ((af1 != AF_INET && af1 != AF_INET6) ||
817 		      (af2 != AF_INET && af2 != AF_INET6))) ||
818 		    (sp->af != AF_INET && sp->af != AF_INET6))) {
819 			DPFPRINTF(LOG_NOTICE,
820 			    "pfsync_input: PFSYNC5_ACT_INS: invalid value");
821 			pfsyncstats.pfsyncs_badval++;
822 			continue;
823 		}
824 
825 		if (pfsync_state_import(sp, flags) == ENOMEM) {
826 			/* drop out, but process the rest of the actions */
827 			break;
828 		}
829 	}
830 
831 	return (0);
832 }
833 
834 int
835 pfsync_in_iack(caddr_t buf, int len, int count, int flags)
836 {
837 	struct pfsync_ins_ack *ia;
838 	struct pf_state_cmp id_key;
839 	struct pf_state *st;
840 	int i;
841 
842 	for (i = 0; i < count; i++) {
843 		ia = (struct pfsync_ins_ack *)(buf + len * i);
844 
845 		id_key.id = ia->id;
846 		id_key.creatorid = ia->creatorid;
847 
848 		st = pf_find_state_byid(&id_key);
849 		if (st == NULL)
850 			continue;
851 
852 		if (ISSET(st->state_flags, PFSTATE_ACK))
853 			pfsync_deferred(st, 0);
854 	}
855 
856 	return (0);
857 }
858 
859 int
860 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
861     struct pfsync_state_peer *dst)
862 {
863 	int sync = 0;
864 
865 	/*
866 	 * The state should never go backwards except
867 	 * for syn-proxy states.  Neither should the
868 	 * sequence window slide backwards.
869 	 */
870 	if ((st->src.state > src->state &&
871 	    (st->src.state < PF_TCPS_PROXY_SRC ||
872 	    src->state >= PF_TCPS_PROXY_SRC)) ||
873 
874 	    (st->src.state == src->state &&
875 	    SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
876 		sync++;
877 	else
878 		pf_state_peer_ntoh(src, &st->src);
879 
880 	if ((st->dst.state > dst->state) ||
881 
882 	    (st->dst.state >= TCPS_SYN_SENT &&
883 	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
884 		sync++;
885 	else
886 		pf_state_peer_ntoh(dst, &st->dst);
887 
888 	return (sync);
889 }
890 
891 int
892 pfsync_in_upd(caddr_t buf, int len, int count, int flags)
893 {
894 	struct pfsync_state *sp;
895 	struct pf_state_cmp id_key;
896 	struct pf_state *st;
897 	int sync;
898 
899 	int i;
900 
901 	for (i = 0; i < count; i++) {
902 		sp = (struct pfsync_state *)(buf + len * i);
903 
904 		/* check for invalid values */
905 		if (sp->timeout >= PFTM_MAX ||
906 		    sp->src.state > PF_TCPS_PROXY_DST ||
907 		    sp->dst.state > PF_TCPS_PROXY_DST) {
908 			DPFPRINTF(LOG_NOTICE,
909 			    "pfsync_input: PFSYNC_ACT_UPD: invalid value");
910 			pfsyncstats.pfsyncs_badval++;
911 			continue;
912 		}
913 
914 		id_key.id = sp->id;
915 		id_key.creatorid = sp->creatorid;
916 
917 		st = pf_find_state_byid(&id_key);
918 		if (st == NULL) {
919 			/* insert the update */
920 			if (pfsync_state_import(sp, flags))
921 				pfsyncstats.pfsyncs_badstate++;
922 			continue;
923 		}
924 
925 		if (ISSET(st->state_flags, PFSTATE_ACK))
926 			pfsync_deferred(st, 1);
927 
928 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
929 			sync = pfsync_upd_tcp(st, &sp->src, &sp->dst);
930 		else {
931 			sync = 0;
932 
933 			/*
934 			 * Non-TCP protocol state machine always go
935 			 * forwards
936 			 */
937 			if (st->src.state > sp->src.state)
938 				sync++;
939 			else
940 				pf_state_peer_ntoh(&sp->src, &st->src);
941 
942 			if (st->dst.state > sp->dst.state)
943 				sync++;
944 			else
945 				pf_state_peer_ntoh(&sp->dst, &st->dst);
946 		}
947 
948 		if (sync < 2) {
949 			pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
950 			pf_state_peer_ntoh(&sp->dst, &st->dst);
951 			st->expire = time_uptime;
952 			st->timeout = sp->timeout;
953 		}
954 		st->pfsync_time = time_uptime;
955 
956 		if (sync) {
957 			pfsyncstats.pfsyncs_stale++;
958 
959 			pfsync_update_state(st);
960 			schednetisr(NETISR_PFSYNC);
961 		}
962 	}
963 
964 	return (0);
965 }
966 
967 int
968 pfsync_in_upd_c(caddr_t buf, int len, int count, int flags)
969 {
970 	struct pfsync_upd_c *up;
971 	struct pf_state_cmp id_key;
972 	struct pf_state *st;
973 
974 	int sync;
975 
976 	int i;
977 
978 	for (i = 0; i < count; i++) {
979 		up = (struct pfsync_upd_c *)(buf + len * i);
980 
981 		/* check for invalid values */
982 		if (up->timeout >= PFTM_MAX ||
983 		    up->src.state > PF_TCPS_PROXY_DST ||
984 		    up->dst.state > PF_TCPS_PROXY_DST) {
985 			DPFPRINTF(LOG_NOTICE,
986 			    "pfsync_input: PFSYNC_ACT_UPD_C: invalid value");
987 			pfsyncstats.pfsyncs_badval++;
988 			continue;
989 		}
990 
991 		id_key.id = up->id;
992 		id_key.creatorid = up->creatorid;
993 
994 		st = pf_find_state_byid(&id_key);
995 		if (st == NULL) {
996 			/* We don't have this state. Ask for it. */
997 			pfsync_request_update(id_key.creatorid, id_key.id);
998 			continue;
999 		}
1000 
1001 		if (ISSET(st->state_flags, PFSTATE_ACK))
1002 			pfsync_deferred(st, 1);
1003 
1004 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
1005 			sync = pfsync_upd_tcp(st, &up->src, &up->dst);
1006 		else {
1007 			sync = 0;
1008 			/*
1009 			 * Non-TCP protocol state machine always go
1010 			 * forwards
1011 			 */
1012 			if (st->src.state > up->src.state)
1013 				sync++;
1014 			else
1015 				pf_state_peer_ntoh(&up->src, &st->src);
1016 
1017 			if (st->dst.state > up->dst.state)
1018 				sync++;
1019 			else
1020 				pf_state_peer_ntoh(&up->dst, &st->dst);
1021 		}
1022 		if (sync < 2) {
1023 			pfsync_alloc_scrub_memory(&up->dst, &st->dst);
1024 			pf_state_peer_ntoh(&up->dst, &st->dst);
1025 			st->expire = time_uptime;
1026 			st->timeout = up->timeout;
1027 		}
1028 		st->pfsync_time = time_uptime;
1029 
1030 		if (sync) {
1031 			pfsyncstats.pfsyncs_stale++;
1032 
1033 			pfsync_update_state(st);
1034 			schednetisr(NETISR_PFSYNC);
1035 		}
1036 	}
1037 
1038 	return (0);
1039 }
1040 
1041 int
1042 pfsync_in_ureq(caddr_t buf, int len, int count, int flags)
1043 {
1044 	struct pfsync_upd_req *ur;
1045 	int i;
1046 
1047 	struct pf_state_cmp id_key;
1048 	struct pf_state *st;
1049 
1050 	for (i = 0; i < count; i++) {
1051 		ur = (struct pfsync_upd_req *)(buf + len * i);
1052 
1053 		id_key.id = ur->id;
1054 		id_key.creatorid = ur->creatorid;
1055 
1056 		if (id_key.id == 0 && id_key.creatorid == 0)
1057 			pfsync_bulk_start();
1058 		else {
1059 			st = pf_find_state_byid(&id_key);
1060 			if (st == NULL) {
1061 				pfsyncstats.pfsyncs_badstate++;
1062 				continue;
1063 			}
1064 			if (ISSET(st->state_flags, PFSTATE_NOSYNC))
1065 				continue;
1066 
1067 			pfsync_update_state_req(st);
1068 		}
1069 	}
1070 
1071 	return (0);
1072 }
1073 
1074 int
1075 pfsync_in_del(caddr_t buf, int len, int count, int flags)
1076 {
1077 	struct pfsync_state *sp;
1078 	struct pf_state_cmp id_key;
1079 	struct pf_state *st;
1080 	int i;
1081 
1082 	for (i = 0; i < count; i++) {
1083 		sp = (struct pfsync_state *)(buf + len * i);
1084 
1085 		id_key.id = sp->id;
1086 		id_key.creatorid = sp->creatorid;
1087 
1088 		st = pf_find_state_byid(&id_key);
1089 		if (st == NULL) {
1090 			pfsyncstats.pfsyncs_badstate++;
1091 			continue;
1092 		}
1093 		SET(st->state_flags, PFSTATE_NOSYNC);
1094 		pf_unlink_state(st);
1095 	}
1096 
1097 	return (0);
1098 }
1099 
1100 int
1101 pfsync_in_del_c(caddr_t buf, int len, int count, int flags)
1102 {
1103 	struct pfsync_del_c *sp;
1104 	struct pf_state_cmp id_key;
1105 	struct pf_state *st;
1106 	int i;
1107 
1108 	for (i = 0; i < count; i++) {
1109 		sp = (struct pfsync_del_c *)(buf + len * i);
1110 
1111 		id_key.id = sp->id;
1112 		id_key.creatorid = sp->creatorid;
1113 
1114 		st = pf_find_state_byid(&id_key);
1115 		if (st == NULL) {
1116 			pfsyncstats.pfsyncs_badstate++;
1117 			continue;
1118 		}
1119 
1120 		SET(st->state_flags, PFSTATE_NOSYNC);
1121 		pf_unlink_state(st);
1122 	}
1123 
1124 	return (0);
1125 }
1126 
1127 int
1128 pfsync_in_bus(caddr_t buf, int len, int count, int flags)
1129 {
1130 	struct pfsync_softc *sc = pfsyncif;
1131 	struct pfsync_bus *bus;
1132 
1133 	/* If we're not waiting for a bulk update, who cares. */
1134 	if (sc->sc_ureq_sent == 0)
1135 		return (0);
1136 
1137 	bus = (struct pfsync_bus *)buf;
1138 
1139 	switch (bus->status) {
1140 	case PFSYNC_BUS_START:
1141 		timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1142 		    pf_pool_limits[PF_LIMIT_STATES].limit /
1143 		    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1144 		    sizeof(struct pfsync_state)));
1145 		DPFPRINTF(LOG_INFO, "received bulk update start");
1146 		break;
1147 
1148 	case PFSYNC_BUS_END:
1149 		if (time_uptime - ntohl(bus->endtime) >=
1150 		    sc->sc_ureq_sent) {
1151 			/* that's it, we're happy */
1152 			sc->sc_ureq_sent = 0;
1153 			sc->sc_bulk_tries = 0;
1154 			timeout_del(&sc->sc_bulkfail_tmo);
1155 #if NCARP > 0
1156 			if (!pfsync_sync_ok)
1157 				carp_group_demote_adj(&sc->sc_if, -1,
1158 				    sc->sc_link_demoted ?
1159 				    "pfsync link state up" :
1160 				    "pfsync bulk done");
1161 			if (sc->sc_initial_bulk) {
1162 				carp_group_demote_adj(&sc->sc_if, -32,
1163 				    "pfsync init");
1164 				sc->sc_initial_bulk = 0;
1165 			}
1166 #endif
1167 			pfsync_sync_ok = 1;
1168 			sc->sc_link_demoted = 0;
1169 			DPFPRINTF(LOG_INFO, "received valid bulk update end");
1170 		} else {
1171 			DPFPRINTF(LOG_WARNING, "received invalid "
1172 			    "bulk update end: bad timestamp");
1173 		}
1174 		break;
1175 	}
1176 
1177 	return (0);
1178 }
1179 
1180 int
1181 pfsync_in_tdb(caddr_t buf, int len, int count, int flags)
1182 {
1183 #if defined(IPSEC)
1184 	struct pfsync_tdb *tp;
1185 	int i;
1186 
1187 	for (i = 0; i < count; i++) {
1188 		tp = (struct pfsync_tdb *)(buf + len * i);
1189 		pfsync_update_net_tdb(tp);
1190 	}
1191 #endif
1192 
1193 	return (0);
1194 }
1195 
1196 #if defined(IPSEC)
1197 /* Update an in-kernel tdb. Silently fail if no tdb is found. */
1198 void
1199 pfsync_update_net_tdb(struct pfsync_tdb *pt)
1200 {
1201 	struct tdb		*tdb;
1202 	int			 s;
1203 
1204 	/* check for invalid values */
1205 	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
1206 	    (pt->dst.sa.sa_family != AF_INET &&
1207 	     pt->dst.sa.sa_family != AF_INET6))
1208 		goto bad;
1209 
1210 	s = splsoftnet();
1211 	tdb = gettdb(ntohs(pt->rdomain), pt->spi, &pt->dst, pt->sproto);
1212 	if (tdb) {
1213 		pt->rpl = betoh64(pt->rpl);
1214 		pt->cur_bytes = betoh64(pt->cur_bytes);
1215 
1216 		/* Neither replay nor byte counter should ever decrease. */
1217 		if (pt->rpl < tdb->tdb_rpl ||
1218 		    pt->cur_bytes < tdb->tdb_cur_bytes) {
1219 			splx(s);
1220 			goto bad;
1221 		}
1222 
1223 		tdb->tdb_rpl = pt->rpl;
1224 		tdb->tdb_cur_bytes = pt->cur_bytes;
1225 	}
1226 	splx(s);
1227 	return;
1228 
1229  bad:
1230 	DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: "
1231 	    "invalid value");
1232 	pfsyncstats.pfsyncs_badstate++;
1233 	return;
1234 }
1235 #endif
1236 
1237 
1238 int
1239 pfsync_in_eof(caddr_t buf, int len, int count, int flags)
1240 {
1241 	if (len > 0 || count > 0)
1242 		pfsyncstats.pfsyncs_badact++;
1243 
1244 	/* we're done. let the caller return */
1245 	return (1);
1246 }
1247 
1248 int
1249 pfsync_in_error(caddr_t buf, int len, int count, int flags)
1250 {
1251 	pfsyncstats.pfsyncs_badact++;
1252 	return (-1);
1253 }
1254 
1255 int
1256 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1257 	struct rtentry *rt)
1258 {
1259 	m_freem(m);
1260 	return (0);
1261 }
1262 
1263 /* ARGSUSED */
1264 int
1265 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1266 {
1267 	struct proc *p = curproc;
1268 	struct pfsync_softc *sc = ifp->if_softc;
1269 	struct ifreq *ifr = (struct ifreq *)data;
1270 	struct ip_moptions *imo = &sc->sc_imo;
1271 	struct pfsyncreq pfsyncr;
1272 	struct ifnet    *sifp;
1273 	struct ip *ip;
1274 	int s, error;
1275 
1276 	switch (cmd) {
1277 #if 0
1278 	case SIOCSIFADDR:
1279 	case SIOCAIFADDR:
1280 	case SIOCSIFDSTADDR:
1281 #endif
1282 	case SIOCSIFFLAGS:
1283 		s = splnet();
1284 		if ((ifp->if_flags & IFF_RUNNING) == 0 &&
1285 		    (ifp->if_flags & IFF_UP)) {
1286 			ifp->if_flags |= IFF_RUNNING;
1287 
1288 #if NCARP > 0
1289 			sc->sc_initial_bulk = 1;
1290 			carp_group_demote_adj(&sc->sc_if, 32, "pfsync init");
1291 #endif
1292 
1293 			pfsync_request_full_update(sc);
1294 		}
1295 		if ((ifp->if_flags & IFF_RUNNING) &&
1296 		    (ifp->if_flags & IFF_UP) == 0) {
1297 			ifp->if_flags &= ~IFF_RUNNING;
1298 
1299 			/* drop everything */
1300 			timeout_del(&sc->sc_tmo);
1301 			pfsync_drop(sc);
1302 
1303 			pfsync_cancel_full_update(sc);
1304 		}
1305 		splx(s);
1306 		break;
1307 	case SIOCSIFMTU:
1308 		if (!sc->sc_sync_if ||
1309 		    ifr->ifr_mtu <= PFSYNC_MINPKT ||
1310 		    ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
1311 			return (EINVAL);
1312 		s = splnet();
1313 		if (ifr->ifr_mtu < ifp->if_mtu)
1314 			pfsync_sendout();
1315 		ifp->if_mtu = ifr->ifr_mtu;
1316 		splx(s);
1317 		break;
1318 	case SIOCGETPFSYNC:
1319 		bzero(&pfsyncr, sizeof(pfsyncr));
1320 		if (sc->sc_sync_if) {
1321 			strlcpy(pfsyncr.pfsyncr_syncdev,
1322 			    sc->sc_sync_if->if_xname, IFNAMSIZ);
1323 		}
1324 		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1325 		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1326 		pfsyncr.pfsyncr_defer = sc->sc_defer;
1327 		return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
1328 
1329 	case SIOCSETPFSYNC:
1330 		if ((error = suser(p, 0)) != 0)
1331 			return (error);
1332 		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
1333 			return (error);
1334 
1335 		s = splnet();
1336 
1337 		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1338 			sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1339 		else
1340 			sc->sc_sync_peer.s_addr =
1341 			    pfsyncr.pfsyncr_syncpeer.s_addr;
1342 
1343 		if (pfsyncr.pfsyncr_maxupdates > 255) {
1344 			splx(s);
1345 			return (EINVAL);
1346 		}
1347 		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1348 
1349 		sc->sc_defer = pfsyncr.pfsyncr_defer;
1350 
1351 		if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1352 			if (sc->sc_sync_if)
1353 				hook_disestablish(
1354 				    sc->sc_sync_if->if_linkstatehooks,
1355 				    sc->sc_lhcookie);
1356 			sc->sc_sync_if = NULL;
1357 			if (imo->imo_num_memberships > 0) {
1358 				in_delmulti(imo->imo_membership[
1359 				    --imo->imo_num_memberships]);
1360 				imo->imo_multicast_ifp = NULL;
1361 			}
1362 			splx(s);
1363 			break;
1364 		}
1365 
1366 		if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) {
1367 			splx(s);
1368 			return (EINVAL);
1369 		}
1370 
1371 		if (sifp->if_mtu < sc->sc_if.if_mtu ||
1372 		    (sc->sc_sync_if != NULL &&
1373 		    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
1374 		    sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1375 			pfsync_sendout();
1376 
1377 		if (sc->sc_sync_if)
1378 			hook_disestablish(
1379 			    sc->sc_sync_if->if_linkstatehooks,
1380 			    sc->sc_lhcookie);
1381 		sc->sc_sync_if = sifp;
1382 
1383 		if (imo->imo_num_memberships > 0) {
1384 			in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1385 			imo->imo_multicast_ifp = NULL;
1386 		}
1387 
1388 		if (sc->sc_sync_if &&
1389 		    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1390 			struct in_addr addr;
1391 
1392 			if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) {
1393 				sc->sc_sync_if = NULL;
1394 				splx(s);
1395 				return (EADDRNOTAVAIL);
1396 			}
1397 
1398 			addr.s_addr = INADDR_PFSYNC_GROUP;
1399 
1400 			if ((imo->imo_membership[0] =
1401 			    in_addmulti(&addr, sc->sc_sync_if)) == NULL) {
1402 				sc->sc_sync_if = NULL;
1403 				splx(s);
1404 				return (ENOBUFS);
1405 			}
1406 			imo->imo_num_memberships++;
1407 			imo->imo_multicast_ifp = sc->sc_sync_if;
1408 			imo->imo_multicast_ttl = PFSYNC_DFLTTL;
1409 			imo->imo_multicast_loop = 0;
1410 		}
1411 
1412 		ip = &sc->sc_template;
1413 		bzero(ip, sizeof(*ip));
1414 		ip->ip_v = IPVERSION;
1415 		ip->ip_hl = sizeof(sc->sc_template) >> 2;
1416 		ip->ip_tos = IPTOS_LOWDELAY;
1417 		/* len and id are set later */
1418 		ip->ip_off = htons(IP_DF);
1419 		ip->ip_ttl = PFSYNC_DFLTTL;
1420 		ip->ip_p = IPPROTO_PFSYNC;
1421 		ip->ip_src.s_addr = INADDR_ANY;
1422 		ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
1423 
1424 		sc->sc_lhcookie =
1425 		    hook_establish(sc->sc_sync_if->if_linkstatehooks, 1,
1426 		    pfsync_syncdev_state, sc);
1427 
1428 		pfsync_request_full_update(sc);
1429 		splx(s);
1430 
1431 		break;
1432 
1433 	default:
1434 		return (ENOTTY);
1435 	}
1436 
1437 	return (0);
1438 }
1439 
1440 void
1441 pfsync_out_state(struct pf_state *st, void *buf)
1442 {
1443 	struct pfsync_state *sp = buf;
1444 
1445 	pfsync_state_export(sp, st);
1446 }
1447 
1448 void
1449 pfsync_out_iack(struct pf_state *st, void *buf)
1450 {
1451 	struct pfsync_ins_ack *iack = buf;
1452 
1453 	iack->id = st->id;
1454 	iack->creatorid = st->creatorid;
1455 }
1456 
1457 void
1458 pfsync_out_upd_c(struct pf_state *st, void *buf)
1459 {
1460 	struct pfsync_upd_c *up = buf;
1461 
1462 	bzero(up, sizeof(*up));
1463 	up->id = st->id;
1464 	pf_state_peer_hton(&st->src, &up->src);
1465 	pf_state_peer_hton(&st->dst, &up->dst);
1466 	up->creatorid = st->creatorid;
1467 	up->timeout = st->timeout;
1468 }
1469 
1470 void
1471 pfsync_out_del(struct pf_state *st, void *buf)
1472 {
1473 	struct pfsync_del_c *dp = buf;
1474 
1475 	dp->id = st->id;
1476 	dp->creatorid = st->creatorid;
1477 
1478 	SET(st->state_flags, PFSTATE_NOSYNC);
1479 }
1480 
1481 void
1482 pfsync_drop(struct pfsync_softc *sc)
1483 {
1484 	struct pf_state *st;
1485 	struct pfsync_upd_req_item *ur;
1486 	struct tdb *t;
1487 	int q;
1488 
1489 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1490 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
1491 			continue;
1492 
1493 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
1494 #ifdef PFSYNC_DEBUG
1495 			KASSERT(st->sync_state == q);
1496 #endif
1497 			st->sync_state = PFSYNC_S_NONE;
1498 		}
1499 		TAILQ_INIT(&sc->sc_qs[q]);
1500 	}
1501 
1502 	while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1503 		TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1504 		pool_put(&sc->sc_pool, ur);
1505 	}
1506 
1507 	sc->sc_plus = NULL;
1508 
1509 	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
1510 		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry)
1511 			CLR(t->tdb_flags, TDBF_PFSYNC);
1512 
1513 		TAILQ_INIT(&sc->sc_tdb_q);
1514 	}
1515 
1516 	sc->sc_len = PFSYNC_MINPKT;
1517 }
1518 
1519 void
1520 pfsync_sendout(void)
1521 {
1522 	struct pfsync_softc *sc = pfsyncif;
1523 #if NBPFILTER > 0
1524 	struct ifnet *ifp = &sc->sc_if;
1525 #endif
1526 	struct mbuf *m;
1527 	struct ip *ip;
1528 	struct pfsync_header *ph;
1529 	struct pfsync_subheader *subh;
1530 	struct pf_state *st;
1531 	struct pfsync_upd_req_item *ur;
1532 	struct tdb *t;
1533 
1534 	int offset;
1535 	int q, count = 0;
1536 
1537 	if (sc == NULL || sc->sc_len == PFSYNC_MINPKT)
1538 		return;
1539 
1540 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
1541 #if NBPFILTER > 0
1542 	    (ifp->if_bpf == NULL && sc->sc_sync_if == NULL)) {
1543 #else
1544 	    sc->sc_sync_if == NULL) {
1545 #endif
1546 		pfsync_drop(sc);
1547 		return;
1548 	}
1549 
1550 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1551 	if (m == NULL) {
1552 		sc->sc_if.if_oerrors++;
1553 		pfsyncstats.pfsyncs_onomem++;
1554 		pfsync_drop(sc);
1555 		return;
1556 	}
1557 
1558 	if (max_linkhdr + sc->sc_len > MHLEN) {
1559 		MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len);
1560 		if (!ISSET(m->m_flags, M_EXT)) {
1561 			m_free(m);
1562 			sc->sc_if.if_oerrors++;
1563 			pfsyncstats.pfsyncs_onomem++;
1564 			pfsync_drop(sc);
1565 			return;
1566 		}
1567 	}
1568 	m->m_data += max_linkhdr;
1569 	m->m_len = m->m_pkthdr.len = sc->sc_len;
1570 
1571 	/* build the ip header */
1572 	ip = mtod(m, struct ip *);
1573 	bcopy(&sc->sc_template, ip, sizeof(*ip));
1574 	offset = sizeof(*ip);
1575 
1576 	ip->ip_len = htons(m->m_pkthdr.len);
1577 	ip->ip_id = htons(ip_randomid());
1578 
1579 	/* build the pfsync header */
1580 	ph = (struct pfsync_header *)(m->m_data + offset);
1581 	bzero(ph, sizeof(*ph));
1582 	offset += sizeof(*ph);
1583 
1584 	ph->version = PFSYNC_VERSION;
1585 	ph->len = htons(sc->sc_len - sizeof(*ip));
1586 	bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
1587 
1588 	if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
1589 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1590 		offset += sizeof(*subh);
1591 
1592 		count = 0;
1593 		while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1594 			TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1595 
1596 			bcopy(&ur->ur_msg, m->m_data + offset,
1597 			    sizeof(ur->ur_msg));
1598 			offset += sizeof(ur->ur_msg);
1599 
1600 			pool_put(&sc->sc_pool, ur);
1601 
1602 			count++;
1603 		}
1604 
1605 		bzero(subh, sizeof(*subh));
1606 		subh->len = sizeof(ur->ur_msg) >> 2;
1607 		subh->action = PFSYNC_ACT_UPD_REQ;
1608 		subh->count = htons(count);
1609 	}
1610 
1611 	/* has someone built a custom region for us to add? */
1612 	if (sc->sc_plus != NULL) {
1613 		bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
1614 		offset += sc->sc_pluslen;
1615 
1616 		sc->sc_plus = NULL;
1617 	}
1618 
1619 	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
1620 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1621 		offset += sizeof(*subh);
1622 
1623 		count = 0;
1624 		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) {
1625 			pfsync_out_tdb(t, m->m_data + offset);
1626 			offset += sizeof(struct pfsync_tdb);
1627 			CLR(t->tdb_flags, TDBF_PFSYNC);
1628 
1629 			count++;
1630 		}
1631 		TAILQ_INIT(&sc->sc_tdb_q);
1632 
1633 		bzero(subh, sizeof(*subh));
1634 		subh->action = PFSYNC_ACT_TDB;
1635 		subh->len = sizeof(struct pfsync_tdb) >> 2;
1636 		subh->count = htons(count);
1637 	}
1638 
1639 	/* walk the queues */
1640 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1641 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
1642 			continue;
1643 
1644 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1645 		offset += sizeof(*subh);
1646 
1647 		count = 0;
1648 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
1649 #ifdef PFSYNC_DEBUG
1650 			KASSERT(st->sync_state == q);
1651 #endif
1652 			pfsync_qs[q].write(st, m->m_data + offset);
1653 			offset += pfsync_qs[q].len;
1654 
1655 			st->sync_state = PFSYNC_S_NONE;
1656 			count++;
1657 		}
1658 		TAILQ_INIT(&sc->sc_qs[q]);
1659 
1660 		bzero(subh, sizeof(*subh));
1661 		subh->action = pfsync_qs[q].action;
1662 		subh->len = pfsync_qs[q].len >> 2;
1663 		subh->count = htons(count);
1664 	}
1665 
1666 	/* we're done, let's put it on the wire */
1667 #if NBPFILTER > 0
1668 	if (ifp->if_bpf) {
1669 		m->m_data += sizeof(*ip);
1670 		m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
1671 		bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1672 		m->m_data -= sizeof(*ip);
1673 		m->m_len = m->m_pkthdr.len = sc->sc_len;
1674 	}
1675 
1676 	if (sc->sc_sync_if == NULL) {
1677 		sc->sc_len = PFSYNC_MINPKT;
1678 		m_freem(m);
1679 		return;
1680 	}
1681 #endif
1682 
1683 	sc->sc_if.if_opackets++;
1684 	sc->sc_if.if_obytes += m->m_pkthdr.len;
1685 
1686 	m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1687 
1688 	if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL, 0) == 0)
1689 		pfsyncstats.pfsyncs_opackets++;
1690 	else
1691 		pfsyncstats.pfsyncs_oerrors++;
1692 
1693 	/* start again */
1694 	sc->sc_len = PFSYNC_MINPKT;
1695 }
1696 
1697 void
1698 pfsync_insert_state(struct pf_state *st)
1699 {
1700 	struct pfsync_softc *sc = pfsyncif;
1701 
1702 	splsoftassert(IPL_SOFTNET);
1703 
1704 	if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) ||
1705 	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
1706 		SET(st->state_flags, PFSTATE_NOSYNC);
1707 		return;
1708 	}
1709 
1710 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
1711 	    ISSET(st->state_flags, PFSTATE_NOSYNC))
1712 		return;
1713 
1714 #ifdef PFSYNC_DEBUG
1715 	KASSERT(st->sync_state == PFSYNC_S_NONE);
1716 #endif
1717 
1718 	if (sc->sc_len == PFSYNC_MINPKT)
1719 		timeout_add_sec(&sc->sc_tmo, 1);
1720 
1721 	pfsync_q_ins(st, PFSYNC_S_INS);
1722 
1723 	st->sync_updates = 0;
1724 }
1725 
1726 int
1727 pfsync_defer(struct pf_state *st, struct mbuf *m)
1728 {
1729 	struct pfsync_softc *sc = pfsyncif;
1730 	struct pfsync_deferral *pd;
1731 
1732 	splsoftassert(IPL_SOFTNET);
1733 
1734 	if (!sc->sc_defer ||
1735 	    ISSET(st->state_flags, PFSTATE_NOSYNC) ||
1736 	    m->m_flags & (M_BCAST|M_MCAST))
1737 		return (0);
1738 
1739 	if (sc->sc_deferred >= 128) {
1740 		pd = TAILQ_FIRST(&sc->sc_deferrals);
1741 		if (timeout_del(&pd->pd_tmo))
1742 			pfsync_undefer(pd, 0);
1743 	}
1744 
1745 	pd = pool_get(&sc->sc_pool, M_NOWAIT);
1746 	if (pd == NULL)
1747 		return (0);
1748 
1749 	m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
1750 	SET(st->state_flags, PFSTATE_ACK);
1751 
1752 	pd->pd_st = st;
1753 	pd->pd_m = m;
1754 
1755 	sc->sc_deferred++;
1756 	TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
1757 
1758 	timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd);
1759 	timeout_add_msec(&pd->pd_tmo, 20);
1760 
1761 	schednetisr(NETISR_PFSYNC);
1762 
1763 	return (1);
1764 }
1765 
1766 void
1767 pfsync_undefer(struct pfsync_deferral *pd, int drop)
1768 {
1769 	struct pfsync_softc *sc = pfsyncif;
1770 
1771 	splsoftassert(IPL_SOFTNET);
1772 
1773 	TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
1774 	sc->sc_deferred--;
1775 
1776 	CLR(pd->pd_st->state_flags, PFSTATE_ACK);
1777 	if (drop)
1778 		m_freem(pd->pd_m);
1779 	else {
1780 		if (pd->pd_st->rule.ptr->rt == PF_ROUTETO) {
1781 			switch (pd->pd_st->key[PF_SK_WIRE]->af) {
1782 #ifdef INET
1783 			case AF_INET:
1784 				pf_route(&pd->pd_m, pd->pd_st->rule.ptr,
1785 				    pd->pd_st->direction,
1786 				    pd->pd_st->rt_kif->pfik_ifp, pd->pd_st);
1787 				break;
1788 #endif /* INET */
1789 #ifdef INET6
1790 			case AF_INET6:
1791 				pf_route6(&pd->pd_m, pd->pd_st->rule.ptr,
1792 				    pd->pd_st->direction,
1793 				    pd->pd_st->rt_kif->pfik_ifp, pd->pd_st);
1794 				break;
1795 #endif /* INET6 */
1796 			}
1797 		} else {
1798 			switch (pd->pd_st->key[PF_SK_WIRE]->af) {
1799 #ifdef INET
1800 			case AF_INET:
1801 				ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL,
1802 				    0);
1803 				break;
1804 #endif /* INET */
1805 #ifdef INET6
1806 	                case AF_INET6:
1807 		                ip6_output(pd->pd_m, NULL, NULL, 0,
1808 				    NULL, NULL, NULL);
1809 				break;
1810 #endif /* INET6 */
1811 			}
1812                 }
1813 	}
1814 
1815 	pool_put(&sc->sc_pool, pd);
1816 }
1817 
1818 void
1819 pfsync_defer_tmo(void *arg)
1820 {
1821 	int s;
1822 
1823 	s = splsoftnet();
1824 	pfsync_undefer(arg, 0);
1825 	splx(s);
1826 }
1827 
1828 void
1829 pfsync_deferred(struct pf_state *st, int drop)
1830 {
1831 	struct pfsync_softc *sc = pfsyncif;
1832 	struct pfsync_deferral *pd;
1833 
1834 	splsoftassert(IPL_SOFTNET);
1835 
1836 	TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
1837 		 if (pd->pd_st == st) {
1838 			if (timeout_del(&pd->pd_tmo))
1839 				pfsync_undefer(pd, drop);
1840 			return;
1841 		}
1842 	}
1843 
1844 	panic("pfsync_deferred: unable to find deferred state");
1845 }
1846 
1847 void
1848 pfsync_update_state(struct pf_state *st)
1849 {
1850 	struct pfsync_softc *sc = pfsyncif;
1851 	int sync = 0;
1852 
1853 	splsoftassert(IPL_SOFTNET);
1854 
1855 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
1856 		return;
1857 
1858 	if (ISSET(st->state_flags, PFSTATE_ACK))
1859 		pfsync_deferred(st, 0);
1860 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1861 		if (st->sync_state != PFSYNC_S_NONE)
1862 			pfsync_q_del(st);
1863 		return;
1864 	}
1865 
1866 	if (sc->sc_len == PFSYNC_MINPKT)
1867 		timeout_add_sec(&sc->sc_tmo, 1);
1868 
1869 	switch (st->sync_state) {
1870 	case PFSYNC_S_UPD_C:
1871 	case PFSYNC_S_UPD:
1872 	case PFSYNC_S_INS:
1873 		/* we're already handling it */
1874 
1875 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
1876 			st->sync_updates++;
1877 			if (st->sync_updates >= sc->sc_maxupdates)
1878 				sync = 1;
1879 		}
1880 		break;
1881 
1882 	case PFSYNC_S_IACK:
1883 		pfsync_q_del(st);
1884 	case PFSYNC_S_NONE:
1885 		pfsync_q_ins(st, PFSYNC_S_UPD_C);
1886 		st->sync_updates = 0;
1887 		break;
1888 
1889 	default:
1890 		panic("pfsync_update_state: unexpected sync state %d",
1891 		    st->sync_state);
1892 	}
1893 
1894 	if (sync || (time_uptime - st->pfsync_time) < 2)
1895 		schednetisr(NETISR_PFSYNC);
1896 }
1897 
1898 void
1899 pfsync_cancel_full_update(struct pfsync_softc *sc)
1900 {
1901 	if (timeout_pending(&sc->sc_bulkfail_tmo) ||
1902 	    timeout_pending(&sc->sc_bulk_tmo)) {
1903 #if NCARP > 0
1904 		if (!pfsync_sync_ok)
1905 			carp_group_demote_adj(&sc->sc_if, -1,
1906 			    "pfsync bulk cancelled");
1907 		if (sc->sc_initial_bulk) {
1908 			carp_group_demote_adj(&sc->sc_if, -32,
1909 			    "pfsync init");
1910 			sc->sc_initial_bulk = 0;
1911 		}
1912 #endif
1913 		pfsync_sync_ok = 1;
1914 		DPFPRINTF(LOG_INFO, "cancelling bulk update");
1915 	}
1916 	timeout_del(&sc->sc_bulkfail_tmo);
1917 	timeout_del(&sc->sc_bulk_tmo);
1918 	sc->sc_bulk_next = NULL;
1919 	sc->sc_bulk_last = NULL;
1920 	sc->sc_ureq_sent = 0;
1921 	sc->sc_bulk_tries = 0;
1922 }
1923 
1924 void
1925 pfsync_request_full_update(struct pfsync_softc *sc)
1926 {
1927 	if (sc->sc_sync_if && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
1928 		/* Request a full state table update. */
1929 		sc->sc_ureq_sent = time_uptime;
1930 #if NCARP > 0
1931 		if (!sc->sc_link_demoted && pfsync_sync_ok)
1932 			carp_group_demote_adj(&sc->sc_if, 1,
1933 			    "pfsync bulk start");
1934 #endif
1935 		pfsync_sync_ok = 0;
1936 		DPFPRINTF(LOG_INFO, "requesting bulk update");
1937 		timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1938 		    pf_pool_limits[PF_LIMIT_STATES].limit /
1939 		    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1940 		    sizeof(struct pfsync_state)));
1941 		pfsync_request_update(0, 0);
1942 	}
1943 }
1944 
1945 void
1946 pfsync_request_update(u_int32_t creatorid, u_int64_t id)
1947 {
1948 	struct pfsync_softc *sc = pfsyncif;
1949 	struct pfsync_upd_req_item *item;
1950 	size_t nlen = sizeof(struct pfsync_upd_req);
1951 
1952 	/*
1953 	 * this code does nothing to prevent multiple update requests for the
1954 	 * same state being generated.
1955 	 */
1956 
1957 	item = pool_get(&sc->sc_pool, PR_NOWAIT);
1958 	if (item == NULL) {
1959 		/* XXX stats */
1960 		return;
1961 	}
1962 
1963 	item->ur_msg.id = id;
1964 	item->ur_msg.creatorid = creatorid;
1965 
1966 	if (TAILQ_EMPTY(&sc->sc_upd_req_list))
1967 		nlen += sizeof(struct pfsync_subheader);
1968 
1969 	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
1970 		pfsync_sendout();
1971 
1972 		nlen = sizeof(struct pfsync_subheader) +
1973 		    sizeof(struct pfsync_upd_req);
1974 	}
1975 
1976 	TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
1977 	sc->sc_len += nlen;
1978 
1979 	schednetisr(NETISR_PFSYNC);
1980 }
1981 
1982 void
1983 pfsync_update_state_req(struct pf_state *st)
1984 {
1985 	struct pfsync_softc *sc = pfsyncif;
1986 
1987 	if (sc == NULL)
1988 		panic("pfsync_update_state_req: nonexistant instance");
1989 
1990 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1991 		if (st->sync_state != PFSYNC_S_NONE)
1992 			pfsync_q_del(st);
1993 		return;
1994 	}
1995 
1996 	switch (st->sync_state) {
1997 	case PFSYNC_S_UPD_C:
1998 	case PFSYNC_S_IACK:
1999 		pfsync_q_del(st);
2000 	case PFSYNC_S_NONE:
2001 		pfsync_q_ins(st, PFSYNC_S_UPD);
2002 		schednetisr(NETISR_PFSYNC);
2003 		return;
2004 
2005 	case PFSYNC_S_INS:
2006 	case PFSYNC_S_UPD:
2007 	case PFSYNC_S_DEL:
2008 		/* we're already handling it */
2009 		return;
2010 
2011 	default:
2012 		panic("pfsync_update_state_req: unexpected sync state %d",
2013 		    st->sync_state);
2014 	}
2015 }
2016 
2017 void
2018 pfsync_delete_state(struct pf_state *st)
2019 {
2020 	struct pfsync_softc *sc = pfsyncif;
2021 
2022 	splsoftassert(IPL_SOFTNET);
2023 
2024 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2025 		return;
2026 
2027 	if (ISSET(st->state_flags, PFSTATE_ACK))
2028 		pfsync_deferred(st, 1);
2029 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
2030 		if (st->sync_state != PFSYNC_S_NONE)
2031 			pfsync_q_del(st);
2032 		return;
2033 	}
2034 
2035 	if (sc->sc_len == PFSYNC_MINPKT)
2036 		timeout_add_sec(&sc->sc_tmo, 1);
2037 
2038 	switch (st->sync_state) {
2039 	case PFSYNC_S_INS:
2040 		/* we never got to tell the world so just forget about it */
2041 		pfsync_q_del(st);
2042 		return;
2043 
2044 	case PFSYNC_S_UPD_C:
2045 	case PFSYNC_S_UPD:
2046 	case PFSYNC_S_IACK:
2047 		pfsync_q_del(st);
2048 		/* FALLTHROUGH to putting it on the del list */
2049 
2050 	case PFSYNC_S_NONE:
2051 		pfsync_q_ins(st, PFSYNC_S_DEL);
2052 		return;
2053 
2054 	default:
2055 		panic("pfsync_delete_state: unexpected sync state %d",
2056 		    st->sync_state);
2057 	}
2058 }
2059 
2060 void
2061 pfsync_clear_states(u_int32_t creatorid, const char *ifname)
2062 {
2063 	struct pfsync_softc *sc = pfsyncif;
2064 	struct {
2065 		struct pfsync_subheader subh;
2066 		struct pfsync_clr clr;
2067 	} __packed r;
2068 
2069 	splsoftassert(IPL_SOFTNET);
2070 
2071 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2072 		return;
2073 
2074 	bzero(&r, sizeof(r));
2075 
2076 	r.subh.action = PFSYNC_ACT_CLR;
2077 	r.subh.len = sizeof(struct pfsync_clr) >> 2;
2078 	r.subh.count = htons(1);
2079 
2080 	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
2081 	r.clr.creatorid = creatorid;
2082 
2083 	pfsync_send_plus(&r, sizeof(r));
2084 }
2085 
2086 void
2087 pfsync_q_ins(struct pf_state *st, int q)
2088 {
2089 	struct pfsync_softc *sc = pfsyncif;
2090 	size_t nlen = pfsync_qs[q].len;
2091 
2092 	KASSERT(st->sync_state == PFSYNC_S_NONE);
2093 
2094 #if defined(PFSYNC_DEBUG)
2095 	if (sc->sc_len < PFSYNC_MINPKT)
2096 		panic("pfsync pkt len is too low %d", sc->sc_len);
2097 #endif
2098 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2099 		nlen += sizeof(struct pfsync_subheader);
2100 
2101 	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2102 		pfsync_sendout();
2103 
2104 		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
2105 	}
2106 
2107 	sc->sc_len += nlen;
2108 	TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
2109 	st->sync_state = q;
2110 }
2111 
2112 void
2113 pfsync_q_del(struct pf_state *st)
2114 {
2115 	struct pfsync_softc *sc = pfsyncif;
2116 	int q = st->sync_state;
2117 
2118 	KASSERT(st->sync_state != PFSYNC_S_NONE);
2119 
2120 	sc->sc_len -= pfsync_qs[q].len;
2121 	TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
2122 	st->sync_state = PFSYNC_S_NONE;
2123 
2124 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2125 		sc->sc_len -= sizeof(struct pfsync_subheader);
2126 }
2127 
2128 void
2129 pfsync_update_tdb(struct tdb *t, int output)
2130 {
2131 	struct pfsync_softc *sc = pfsyncif;
2132 	size_t nlen = sizeof(struct pfsync_tdb);
2133 
2134 	if (sc == NULL)
2135 		return;
2136 
2137 	if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) {
2138 		if (TAILQ_EMPTY(&sc->sc_tdb_q))
2139 			nlen += sizeof(struct pfsync_subheader);
2140 
2141 		if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2142 			pfsync_sendout();
2143 
2144 			nlen = sizeof(struct pfsync_subheader) +
2145 			    sizeof(struct pfsync_tdb);
2146 		}
2147 
2148 		sc->sc_len += nlen;
2149 		TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
2150 		SET(t->tdb_flags, TDBF_PFSYNC);
2151 		t->tdb_updates = 0;
2152 	} else {
2153 		if (++t->tdb_updates >= sc->sc_maxupdates)
2154 			schednetisr(NETISR_PFSYNC);
2155 	}
2156 
2157 	if (output)
2158 		SET(t->tdb_flags, TDBF_PFSYNC_RPL);
2159 	else
2160 		CLR(t->tdb_flags, TDBF_PFSYNC_RPL);
2161 }
2162 
2163 void
2164 pfsync_delete_tdb(struct tdb *t)
2165 {
2166 	struct pfsync_softc *sc = pfsyncif;
2167 
2168 	if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC))
2169 		return;
2170 
2171 	sc->sc_len -= sizeof(struct pfsync_tdb);
2172 	TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
2173 	CLR(t->tdb_flags, TDBF_PFSYNC);
2174 
2175 	if (TAILQ_EMPTY(&sc->sc_tdb_q))
2176 		sc->sc_len -= sizeof(struct pfsync_subheader);
2177 }
2178 
2179 void
2180 pfsync_out_tdb(struct tdb *t, void *buf)
2181 {
2182 	struct pfsync_tdb *ut = buf;
2183 
2184 	bzero(ut, sizeof(*ut));
2185 	ut->spi = t->tdb_spi;
2186 	bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst));
2187 	/*
2188 	 * When a failover happens, the master's rpl is probably above
2189 	 * what we see here (we may be up to a second late), so
2190 	 * increase it a bit for outbound tdbs to manage most such
2191 	 * situations.
2192 	 *
2193 	 * For now, just add an offset that is likely to be larger
2194 	 * than the number of packets we can see in one second. The RFC
2195 	 * just says the next packet must have a higher seq value.
2196 	 *
2197 	 * XXX What is a good algorithm for this? We could use
2198 	 * a rate-determined increase, but to know it, we would have
2199 	 * to extend struct tdb.
2200 	 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
2201 	 * will soon be replaced anyway. For now, just don't handle
2202 	 * this edge case.
2203 	 */
2204 #define RPL_INCR 16384
2205 	ut->rpl = htobe64(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ?
2206 	    RPL_INCR : 0));
2207 	ut->cur_bytes = htobe64(t->tdb_cur_bytes);
2208 	ut->sproto = t->tdb_sproto;
2209 	ut->rdomain = htons(t->tdb_rdomain);
2210 }
2211 
2212 void
2213 pfsync_bulk_start(void)
2214 {
2215 	struct pfsync_softc *sc = pfsyncif;
2216 
2217 	DPFPRINTF(LOG_INFO, "received bulk update request");
2218 
2219 	if (TAILQ_EMPTY(&state_list))
2220 		pfsync_bulk_status(PFSYNC_BUS_END);
2221 	else {
2222 		sc->sc_ureq_received = time_uptime;
2223 
2224 		if (sc->sc_bulk_next == NULL)
2225 			sc->sc_bulk_next = TAILQ_FIRST(&state_list);
2226 		sc->sc_bulk_last = sc->sc_bulk_next;
2227 
2228 		pfsync_bulk_status(PFSYNC_BUS_START);
2229 		timeout_add(&sc->sc_bulk_tmo, 0);
2230 	}
2231 }
2232 
2233 void
2234 pfsync_bulk_update(void *arg)
2235 {
2236 	struct pfsync_softc *sc = arg;
2237 	struct pf_state *st;
2238 	int i = 0;
2239 	int s;
2240 
2241 	s = splsoftnet();
2242 
2243 	st = sc->sc_bulk_next;
2244 
2245 	for (;;) {
2246 		if (st->sync_state == PFSYNC_S_NONE &&
2247 		    st->timeout < PFTM_MAX &&
2248 		    st->pfsync_time <= sc->sc_ureq_received) {
2249 			pfsync_update_state_req(st);
2250 			i++;
2251 		}
2252 
2253 		st = TAILQ_NEXT(st, entry_list);
2254 		if (st == NULL)
2255 			st = TAILQ_FIRST(&state_list);
2256 
2257 		if (st == sc->sc_bulk_last) {
2258 			/* we're done */
2259 			sc->sc_bulk_next = NULL;
2260 			sc->sc_bulk_last = NULL;
2261 			pfsync_bulk_status(PFSYNC_BUS_END);
2262 			break;
2263 		}
2264 
2265 		if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) <
2266 		    sizeof(struct pfsync_state)) {
2267 			/* we've filled a packet */
2268 			sc->sc_bulk_next = st;
2269 			timeout_add(&sc->sc_bulk_tmo, 1);
2270 			break;
2271 		}
2272 	}
2273 
2274 	splx(s);
2275 }
2276 
2277 void
2278 pfsync_bulk_status(u_int8_t status)
2279 {
2280 	struct {
2281 		struct pfsync_subheader subh;
2282 		struct pfsync_bus bus;
2283 	} __packed r;
2284 
2285 	struct pfsync_softc *sc = pfsyncif;
2286 
2287 	bzero(&r, sizeof(r));
2288 
2289 	r.subh.action = PFSYNC_ACT_BUS;
2290 	r.subh.len = sizeof(struct pfsync_bus) >> 2;
2291 	r.subh.count = htons(1);
2292 
2293 	r.bus.creatorid = pf_status.hostid;
2294 	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
2295 	r.bus.status = status;
2296 
2297 	pfsync_send_plus(&r, sizeof(r));
2298 }
2299 
2300 void
2301 pfsync_bulk_fail(void *arg)
2302 {
2303 	struct pfsync_softc *sc = arg;
2304 	int s;
2305 
2306 	s = splsoftnet();
2307 
2308 	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
2309 		/* Try again */
2310 		timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
2311 		pfsync_request_update(0, 0);
2312 	} else {
2313 		/* Pretend like the transfer was ok */
2314 		sc->sc_ureq_sent = 0;
2315 		sc->sc_bulk_tries = 0;
2316 #if NCARP > 0
2317 		if (!pfsync_sync_ok)
2318 			carp_group_demote_adj(&sc->sc_if, -1,
2319 			    sc->sc_link_demoted ?
2320 			    "pfsync link state up" :
2321 			    "pfsync bulk fail");
2322 		if (sc->sc_initial_bulk) {
2323 			carp_group_demote_adj(&sc->sc_if, -32,
2324 			    "pfsync init");
2325 			sc->sc_initial_bulk = 0;
2326 		}
2327 #endif
2328 		pfsync_sync_ok = 1;
2329 		sc->sc_link_demoted = 0;
2330 		DPFPRINTF(LOG_ERR, "failed to receive bulk update");
2331 	}
2332 
2333 	splx(s);
2334 }
2335 
2336 void
2337 pfsync_send_plus(void *plus, size_t pluslen)
2338 {
2339 	struct pfsync_softc *sc = pfsyncif;
2340 
2341 	if (sc->sc_len + pluslen > sc->sc_if.if_mtu)
2342 		pfsync_sendout();
2343 
2344 	sc->sc_plus = plus;
2345 	sc->sc_len += (sc->sc_pluslen = pluslen);
2346 
2347 	pfsync_sendout();
2348 }
2349 
2350 int
2351 pfsync_up(void)
2352 {
2353 	struct pfsync_softc *sc = pfsyncif;
2354 
2355 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2356 		return (0);
2357 
2358 	return (1);
2359 }
2360 
2361 int
2362 pfsync_state_in_use(struct pf_state *st)
2363 {
2364 	struct pfsync_softc *sc = pfsyncif;
2365 
2366 	if (sc == NULL)
2367 		return (0);
2368 
2369 	if (st->sync_state != PFSYNC_S_NONE ||
2370 	    st == sc->sc_bulk_next ||
2371 	    st == sc->sc_bulk_last)
2372 		return (1);
2373 
2374 	return (0);
2375 }
2376 
2377 void
2378 pfsync_timeout(void *arg)
2379 {
2380 	int s;
2381 
2382 	s = splsoftnet();
2383 	pfsync_sendout();
2384 	splx(s);
2385 }
2386 
2387 /* this is a softnet/netisr handler */
2388 void
2389 pfsyncintr(void)
2390 {
2391 	pfsync_sendout();
2392 }
2393 
2394 int
2395 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
2396     size_t newlen)
2397 {
2398 	/* All sysctl names at this level are terminal. */
2399 	if (namelen != 1)
2400 		return (ENOTDIR);
2401 
2402 	switch (name[0]) {
2403 	case PFSYNCCTL_STATS:
2404 		if (newp != NULL)
2405 			return (EPERM);
2406 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
2407 		    &pfsyncstats, sizeof(pfsyncstats)));
2408 	default:
2409 		return (ENOPROTOOPT);
2410 	}
2411 }
2412