xref: /openbsd-src/sys/net/if_pfsync.c (revision 0b7734b3d77bb9b21afec6f4621cae6c805dbd45)
1 /*	$OpenBSD: if_pfsync.c,v 1.229 2016/04/29 08:55:03 krw Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Michael Shalayeff
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  * THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
31  *
32  * Permission to use, copy, modify, and distribute this software for any
33  * purpose with or without fee is hereby granted, provided that the above
34  * copyright notice and this permission notice appear in all copies.
35  *
36  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
37  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
38  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
39  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
41  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
42  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43  */
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/time.h>
48 #include <sys/malloc.h>
49 #include <sys/mbuf.h>
50 #include <sys/socket.h>
51 #include <sys/ioctl.h>
52 #include <sys/timeout.h>
53 #include <sys/kernel.h>
54 #include <sys/sysctl.h>
55 #include <sys/pool.h>
56 #include <sys/syslog.h>
57 
58 #include <net/if.h>
59 #include <net/if_types.h>
60 #include <net/bpf.h>
61 #include <net/netisr.h>
62 #include <netinet/in.h>
63 #include <netinet/if_ether.h>
64 #include <netinet/tcp.h>
65 #include <netinet/tcp_seq.h>
66 #include <netinet/tcp_fsm.h>
67 
68 #include <netinet/in_var.h>
69 #include <netinet/ip.h>
70 #include <netinet/ip_var.h>
71 
72 #ifdef IPSEC
73 #include <netinet/ip_ipsp.h>
74 #endif /* IPSEC */
75 
76 #ifdef INET6
77 #include <netinet6/in6_var.h>
78 #include <netinet/ip6.h>
79 #include <netinet6/ip6_var.h>
80 #include <netinet6/nd6.h>
81 #endif /* INET6 */
82 
83 #include "carp.h"
84 #if NCARP > 0
85 #include <netinet/ip_carp.h>
86 #endif
87 
88 #define PF_DEBUGNAME	"pfsync: "
89 #include <net/pfvar.h>
90 #include <netinet/ip_ipsp.h>
91 #include <net/if_pfsync.h>
92 
93 #include "bpfilter.h"
94 #include "pfsync.h"
95 
96 #define PFSYNC_MINPKT ( \
97 	sizeof(struct ip) + \
98 	sizeof(struct pfsync_header))
99 
100 int	pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
101 	    struct pfsync_state_peer *);
102 
103 int	pfsync_in_clr(caddr_t, int, int, int);
104 int	pfsync_in_iack(caddr_t, int, int, int);
105 int	pfsync_in_upd_c(caddr_t, int, int, int);
106 int	pfsync_in_ureq(caddr_t, int, int, int);
107 int	pfsync_in_del(caddr_t, int, int, int);
108 int	pfsync_in_del_c(caddr_t, int, int, int);
109 int	pfsync_in_bus(caddr_t, int, int, int);
110 int	pfsync_in_tdb(caddr_t, int, int, int);
111 int	pfsync_in_ins(caddr_t, int, int, int);
112 int	pfsync_in_upd(caddr_t, int, int, int);
113 int	pfsync_in_eof(caddr_t, int, int, int);
114 
115 int	pfsync_in_error(caddr_t, int, int, int);
116 
117 struct {
118 	int	(*in)(caddr_t, int, int, int);
119 	size_t	len;
120 } pfsync_acts[] = {
121 	/* PFSYNC_ACT_CLR */
122 	{ pfsync_in_clr,	sizeof(struct pfsync_clr) },
123 	 /* PFSYNC_ACT_OINS */
124 	{ pfsync_in_error,	0 },
125 	/* PFSYNC_ACT_INS_ACK */
126 	{ pfsync_in_iack,	sizeof(struct pfsync_ins_ack) },
127 	/* PFSYNC_ACT_OUPD */
128 	{ pfsync_in_error,	0 },
129 	/* PFSYNC_ACT_UPD_C */
130 	{ pfsync_in_upd_c,	sizeof(struct pfsync_upd_c) },
131 	/* PFSYNC_ACT_UPD_REQ */
132 	{ pfsync_in_ureq,	sizeof(struct pfsync_upd_req) },
133 	/* PFSYNC_ACT_DEL */
134 	{ pfsync_in_del,	sizeof(struct pfsync_state) },
135 	/* PFSYNC_ACT_DEL_C */
136 	{ pfsync_in_del_c,	sizeof(struct pfsync_del_c) },
137 	/* PFSYNC_ACT_INS_F */
138 	{ pfsync_in_error,	0 },
139 	/* PFSYNC_ACT_DEL_F */
140 	{ pfsync_in_error,	0 },
141 	/* PFSYNC_ACT_BUS */
142 	{ pfsync_in_bus,	sizeof(struct pfsync_bus) },
143 	/* PFSYNC_ACT_OTDB */
144 	{ pfsync_in_error,	0 },
145 	/* PFSYNC_ACT_EOF */
146 	{ pfsync_in_error,	0 },
147 	/* PFSYNC_ACT_INS */
148 	{ pfsync_in_ins,	sizeof(struct pfsync_state) },
149 	/* PFSYNC_ACT_UPD */
150 	{ pfsync_in_upd,	sizeof(struct pfsync_state) },
151 	/* PFSYNC_ACT_TDB */
152 	{ pfsync_in_tdb,	sizeof(struct pfsync_tdb) },
153 };
154 
155 struct pfsync_q {
156 	void		(*write)(struct pf_state *, void *);
157 	size_t		len;
158 	u_int8_t	action;
159 };
160 
161 /* we have one of these for every PFSYNC_S_ */
162 void	pfsync_out_state(struct pf_state *, void *);
163 void	pfsync_out_iack(struct pf_state *, void *);
164 void	pfsync_out_upd_c(struct pf_state *, void *);
165 void	pfsync_out_del(struct pf_state *, void *);
166 
167 struct pfsync_q pfsync_qs[] = {
168 	{ pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
169 	{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
170 	{ pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C },
171 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
172 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD }
173 };
174 
175 void	pfsync_q_ins(struct pf_state *, int);
176 void	pfsync_q_del(struct pf_state *);
177 
178 struct pfsync_upd_req_item {
179 	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
180 	struct pfsync_upd_req			ur_msg;
181 };
182 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item);
183 
184 struct pfsync_deferral {
185 	TAILQ_ENTRY(pfsync_deferral)		 pd_entry;
186 	struct pf_state				*pd_st;
187 	struct mbuf				*pd_m;
188 	struct timeout				 pd_tmo;
189 };
190 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral);
191 
192 #define PFSYNC_PLSIZE	MAX(sizeof(struct pfsync_upd_req_item), \
193 			    sizeof(struct pfsync_deferral))
194 
195 void	pfsync_out_tdb(struct tdb *, void *);
196 
197 struct pfsync_softc {
198 	struct ifnet		 sc_if;
199 	struct ifnet		*sc_sync_if;
200 
201 	struct pool		 sc_pool;
202 
203 	struct ip_moptions	 sc_imo;
204 
205 	struct in_addr		 sc_sync_peer;
206 	u_int8_t		 sc_maxupdates;
207 
208 	struct ip		 sc_template;
209 
210 	struct pf_state_queue	 sc_qs[PFSYNC_S_COUNT];
211 	size_t			 sc_len;
212 
213 	struct pfsync_upd_reqs	 sc_upd_req_list;
214 
215 	int			 sc_initial_bulk;
216 	int			 sc_link_demoted;
217 
218 	int			 sc_defer;
219 	struct pfsync_deferrals	 sc_deferrals;
220 	u_int			 sc_deferred;
221 
222 	void			*sc_plus;
223 	size_t			 sc_pluslen;
224 
225 	u_int32_t		 sc_ureq_sent;
226 	int			 sc_bulk_tries;
227 	struct timeout		 sc_bulkfail_tmo;
228 
229 	u_int32_t		 sc_ureq_received;
230 	struct pf_state		*sc_bulk_next;
231 	struct pf_state		*sc_bulk_last;
232 	struct timeout		 sc_bulk_tmo;
233 
234 	TAILQ_HEAD(, tdb)	 sc_tdb_q;
235 
236 	void			*sc_lhcookie;
237 
238 	struct timeout		 sc_tmo;
239 };
240 
241 struct pfsync_softc	*pfsyncif = NULL;
242 struct pfsyncstats	 pfsyncstats;
243 
244 void	pfsyncattach(int);
245 int	pfsync_clone_create(struct if_clone *, int);
246 int	pfsync_clone_destroy(struct ifnet *);
247 int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
248 	    struct pf_state_peer *);
249 void	pfsync_update_net_tdb(struct pfsync_tdb *);
250 int	pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
251 	    struct rtentry *);
252 int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
253 void	pfsyncstart(struct ifnet *);
254 void	pfsync_syncdev_state(void *);
255 
256 void	pfsync_deferred(struct pf_state *, int);
257 void	pfsync_undefer(struct pfsync_deferral *, int);
258 void	pfsync_defer_tmo(void *);
259 
260 void	pfsync_cancel_full_update(struct pfsync_softc *);
261 void	pfsync_request_full_update(struct pfsync_softc *);
262 void	pfsync_request_update(u_int32_t, u_int64_t);
263 void	pfsync_update_state_req(struct pf_state *);
264 
265 void	pfsync_drop(struct pfsync_softc *);
266 void	pfsync_sendout(void);
267 void	pfsync_send_plus(void *, size_t);
268 void	pfsync_timeout(void *);
269 void	pfsync_tdb_timeout(void *);
270 
271 void	pfsync_bulk_start(void);
272 void	pfsync_bulk_status(u_int8_t);
273 void	pfsync_bulk_update(void *);
274 void	pfsync_bulk_fail(void *);
275 
276 #define PFSYNC_MAX_BULKTRIES	12
277 int	pfsync_sync_ok;
278 
279 struct if_clone	pfsync_cloner =
280     IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
281 
282 void
283 pfsyncattach(int npfsync)
284 {
285 	if_clone_attach(&pfsync_cloner);
286 }
287 
288 int
289 pfsync_clone_create(struct if_clone *ifc, int unit)
290 {
291 	struct pfsync_softc *sc;
292 	struct ifnet *ifp;
293 	int q;
294 
295 	if (unit != 0)
296 		return (EINVAL);
297 
298 	pfsync_sync_ok = 1;
299 
300 	sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK | M_ZERO);
301 
302 	for (q = 0; q < PFSYNC_S_COUNT; q++)
303 		TAILQ_INIT(&sc->sc_qs[q]);
304 
305 	pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, 0, 0, "pfsync", NULL);
306 	TAILQ_INIT(&sc->sc_upd_req_list);
307 	TAILQ_INIT(&sc->sc_deferrals);
308 	sc->sc_deferred = 0;
309 
310 	TAILQ_INIT(&sc->sc_tdb_q);
311 
312 	sc->sc_len = PFSYNC_MINPKT;
313 	sc->sc_maxupdates = 128;
314 
315 	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
316 	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
317 	    M_WAITOK | M_ZERO);
318 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
319 
320 	ifp = &sc->sc_if;
321 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
322 	ifp->if_softc = sc;
323 	ifp->if_ioctl = pfsyncioctl;
324 	ifp->if_output = pfsyncoutput;
325 	ifp->if_start = pfsyncstart;
326 	ifp->if_type = IFT_PFSYNC;
327 	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
328 	ifp->if_hdrlen = sizeof(struct pfsync_header);
329 	ifp->if_mtu = ETHERMTU;
330 	timeout_set(&sc->sc_tmo, pfsync_timeout, sc);
331 	timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
332 	timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
333 
334 	if_attach(ifp);
335 	if_alloc_sadl(ifp);
336 
337 #if NCARP > 0
338 	if_addgroup(ifp, "carp");
339 #endif
340 
341 #if NBPFILTER > 0
342 	bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
343 #endif
344 
345 	pfsyncif = sc;
346 
347 	return (0);
348 }
349 
350 int
351 pfsync_clone_destroy(struct ifnet *ifp)
352 {
353 	struct pfsync_softc *sc = ifp->if_softc;
354 	struct pfsync_deferral *pd;
355 	int s;
356 
357 	s = splsoftnet();
358 	timeout_del(&sc->sc_bulkfail_tmo);
359 	timeout_del(&sc->sc_bulk_tmo);
360 	timeout_del(&sc->sc_tmo);
361 #if NCARP > 0
362 	if (!pfsync_sync_ok)
363 		carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
364 	if (sc->sc_link_demoted)
365 		carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
366 #endif
367 	if (sc->sc_sync_if)
368 		hook_disestablish(
369 		    sc->sc_sync_if->if_linkstatehooks,
370 		    sc->sc_lhcookie);
371 	if_detach(ifp);
372 
373 	pfsync_drop(sc);
374 
375 	while (sc->sc_deferred > 0) {
376 		pd = TAILQ_FIRST(&sc->sc_deferrals);
377 		timeout_del(&pd->pd_tmo);
378 		pfsync_undefer(pd, 0);
379 	}
380 
381 	pool_destroy(&sc->sc_pool);
382 	free(sc->sc_imo.imo_membership, M_IPMOPTS, 0);
383 	free(sc, M_DEVBUF, sizeof(*sc));
384 
385 	pfsyncif = NULL;
386 	splx(s);
387 
388 	return (0);
389 }
390 
391 /*
392  * Start output on the pfsync interface.
393  */
394 void
395 pfsyncstart(struct ifnet *ifp)
396 {
397 	IFQ_PURGE(&ifp->if_snd);
398 }
399 
400 void
401 pfsync_syncdev_state(void *arg)
402 {
403 	struct pfsync_softc *sc = arg;
404 
405 	if (!sc->sc_sync_if || !(sc->sc_if.if_flags & IFF_UP))
406 		return;
407 
408 	if (sc->sc_sync_if->if_link_state == LINK_STATE_DOWN) {
409 		sc->sc_if.if_flags &= ~IFF_RUNNING;
410 		if (!sc->sc_link_demoted) {
411 #if NCARP > 0
412 			carp_group_demote_adj(&sc->sc_if, 1,
413 			    "pfsync link state down");
414 #endif
415 			sc->sc_link_demoted = 1;
416 		}
417 
418 		/* drop everything */
419 		timeout_del(&sc->sc_tmo);
420 		pfsync_drop(sc);
421 
422 		pfsync_cancel_full_update(sc);
423 	} else if (sc->sc_link_demoted) {
424 		sc->sc_if.if_flags |= IFF_RUNNING;
425 
426 		pfsync_request_full_update(sc);
427 	}
428 }
429 
430 int
431 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
432     struct pf_state_peer *d)
433 {
434 	if (s->scrub.scrub_flag && d->scrub == NULL) {
435 		d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO);
436 		if (d->scrub == NULL)
437 			return (ENOMEM);
438 	}
439 
440 	return (0);
441 }
442 
443 void
444 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
445 {
446 	pf_state_export(sp, st);
447 }
448 
449 int
450 pfsync_state_import(struct pfsync_state *sp, int flags)
451 {
452 	struct pf_state	*st = NULL;
453 	struct pf_state_key *skw = NULL, *sks = NULL;
454 	struct pf_rule *r = NULL;
455 	struct pfi_kif	*kif;
456 	int pool_flags;
457 	int error;
458 
459 	if (sp->creatorid == 0) {
460 		DPFPRINTF(LOG_NOTICE, "pfsync_state_import: "
461 		    "invalid creator id: %08x", ntohl(sp->creatorid));
462 		return (EINVAL);
463 	}
464 
465 	if ((kif = pfi_kif_get(sp->ifname)) == NULL) {
466 		DPFPRINTF(LOG_NOTICE, "pfsync_state_import: "
467 		    "unknown interface: %s", sp->ifname);
468 		if (flags & PFSYNC_SI_IOCTL)
469 			return (EINVAL);
470 		return (0);	/* skip this state */
471 	}
472 
473 	if (sp->af == 0)
474 		return (0);	/* skip this state */
475 
476 	/*
477 	 * If the ruleset checksums match or the state is coming from the ioctl,
478 	 * it's safe to associate the state with the rule of that number.
479 	 */
480 	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
481 	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
482 	    pf_main_ruleset.rules.active.rcount)
483 		r = pf_main_ruleset.rules.active.ptr_array[ntohl(sp->rule)];
484 	else
485 		r = &pf_default_rule;
486 
487 	if ((r->max_states && r->states_cur >= r->max_states))
488 		goto cleanup;
489 
490 	if (flags & PFSYNC_SI_IOCTL)
491 		pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO;
492 	else
493 		pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO;
494 
495 	if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL)
496 		goto cleanup;
497 
498 	if ((skw = pf_alloc_state_key(pool_flags)) == NULL)
499 		goto cleanup;
500 
501 	if ((sp->key[PF_SK_WIRE].af &&
502 	    (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) ||
503 	    PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
504 	    &sp->key[PF_SK_STACK].addr[0], sp->af) ||
505 	    PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
506 	    &sp->key[PF_SK_STACK].addr[1], sp->af) ||
507 	    sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
508 	    sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] ||
509 	    sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) {
510 		if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
511 			goto cleanup;
512 	} else
513 		sks = skw;
514 
515 	/* allocate memory for scrub info */
516 	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
517 	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
518 		goto cleanup;
519 
520 	/* copy to state key(s) */
521 	skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
522 	skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
523 	skw->port[0] = sp->key[PF_SK_WIRE].port[0];
524 	skw->port[1] = sp->key[PF_SK_WIRE].port[1];
525 	skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain);
526 	PF_REF_INIT(skw->refcnt);
527 	skw->proto = sp->proto;
528 	if (!(skw->af = sp->key[PF_SK_WIRE].af))
529 		skw->af = sp->af;
530 	if (sks != skw) {
531 		sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
532 		sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
533 		sks->port[0] = sp->key[PF_SK_STACK].port[0];
534 		sks->port[1] = sp->key[PF_SK_STACK].port[1];
535 		sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain);
536 		PF_REF_INIT(sks->refcnt);
537 		if (!(sks->af = sp->key[PF_SK_STACK].af))
538 			sks->af = sp->af;
539 		if (sks->af != skw->af) {
540 			switch (sp->proto) {
541 			case IPPROTO_ICMP:
542 				sks->proto = IPPROTO_ICMPV6;
543 				break;
544 			case IPPROTO_ICMPV6:
545 				sks->proto = IPPROTO_ICMP;
546 				break;
547 			default:
548 				sks->proto = sp->proto;
549 			}
550 		} else
551 			sks->proto = sp->proto;
552 	}
553 	st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]);
554 	st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]);
555 
556 	/* copy to state */
557 	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
558 	st->creation = time_uptime - ntohl(sp->creation);
559 	st->expire = time_uptime;
560 	if (ntohl(sp->expire)) {
561 		u_int32_t timeout;
562 
563 		timeout = r->timeout[sp->timeout];
564 		if (!timeout)
565 			timeout = pf_default_rule.timeout[sp->timeout];
566 
567 		/* sp->expire may have been adaptively scaled by export. */
568 		st->expire -= timeout - ntohl(sp->expire);
569 	}
570 
571 	st->direction = sp->direction;
572 	st->log = sp->log;
573 	st->timeout = sp->timeout;
574 	st->state_flags = ntohs(sp->state_flags);
575 	st->max_mss = ntohs(sp->max_mss);
576 	st->min_ttl = sp->min_ttl;
577 	st->set_tos = sp->set_tos;
578 	st->set_prio[0] = sp->set_prio[0];
579 	st->set_prio[1] = sp->set_prio[1];
580 
581 	st->id = sp->id;
582 	st->creatorid = sp->creatorid;
583 	pf_state_peer_ntoh(&sp->src, &st->src);
584 	pf_state_peer_ntoh(&sp->dst, &st->dst);
585 
586 	st->rule.ptr = r;
587 	st->anchor.ptr = NULL;
588 	st->rt_kif = NULL;
589 
590 	st->pfsync_time = time_uptime;
591 	st->sync_state = PFSYNC_S_NONE;
592 
593 	/* XXX when we have anchors, use STATE_INC_COUNTERS */
594 	r->states_cur++;
595 	r->states_tot++;
596 
597 	if (!ISSET(flags, PFSYNC_SI_IOCTL))
598 		SET(st->state_flags, PFSTATE_NOSYNC);
599 
600 	if (pf_state_insert(kif, &skw, &sks, st) != 0) {
601 		/* XXX when we have anchors, use STATE_DEC_COUNTERS */
602 		r->states_cur--;
603 		error = EEXIST;
604 		goto cleanup_state;
605 	}
606 
607 	if (!ISSET(flags, PFSYNC_SI_IOCTL)) {
608 		CLR(st->state_flags, PFSTATE_NOSYNC);
609 		if (ISSET(st->state_flags, PFSTATE_ACK)) {
610 			pfsync_q_ins(st, PFSYNC_S_IACK);
611 			schednetisr(NETISR_PFSYNC);
612 		}
613 	}
614 	CLR(st->state_flags, PFSTATE_ACK);
615 
616 	return (0);
617 
618  cleanup:
619 	error = ENOMEM;
620 	if (skw == sks)
621 		sks = NULL;
622 	if (skw != NULL)
623 		pool_put(&pf_state_key_pl, skw);
624 	if (sks != NULL)
625 		pool_put(&pf_state_key_pl, sks);
626 
627  cleanup_state:	/* pf_state_insert frees the state keys */
628 	if (st) {
629 		if (st->dst.scrub)
630 			pool_put(&pf_state_scrub_pl, st->dst.scrub);
631 		if (st->src.scrub)
632 			pool_put(&pf_state_scrub_pl, st->src.scrub);
633 		pool_put(&pf_state_pl, st);
634 	}
635 	return (error);
636 }
637 
638 void
639 pfsync_input(struct mbuf *m, ...)
640 {
641 	struct pfsync_softc *sc = pfsyncif;
642 	struct ip *ip = mtod(m, struct ip *);
643 	struct mbuf *mp;
644 	struct pfsync_header *ph;
645 	struct pfsync_subheader subh;
646 
647 	int offset, offp, len, count, mlen, flags = 0;
648 
649 	pfsyncstats.pfsyncs_ipackets++;
650 
651 	/* verify that we have a sync interface configured */
652 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
653 	    sc->sc_sync_if == NULL || !pf_status.running)
654 		goto done;
655 
656 	/* verify that the packet came in on the right interface */
657 	if (sc->sc_sync_if->if_index != m->m_pkthdr.ph_ifidx) {
658 		pfsyncstats.pfsyncs_badif++;
659 		goto done;
660 	}
661 
662 	sc->sc_if.if_ipackets++;
663 	sc->sc_if.if_ibytes += m->m_pkthdr.len;
664 
665 	/* verify that the IP TTL is 255. */
666 	if (ip->ip_ttl != PFSYNC_DFLTTL) {
667 		pfsyncstats.pfsyncs_badttl++;
668 		goto done;
669 	}
670 
671 	offset = ip->ip_hl << 2;
672 	mp = m_pulldown(m, offset, sizeof(*ph), &offp);
673 	if (mp == NULL) {
674 		pfsyncstats.pfsyncs_hdrops++;
675 		return;
676 	}
677 	ph = (struct pfsync_header *)(mp->m_data + offp);
678 
679 	/* verify the version */
680 	if (ph->version != PFSYNC_VERSION) {
681 		pfsyncstats.pfsyncs_badver++;
682 		goto done;
683 	}
684 	len = ntohs(ph->len) + offset;
685 	if (m->m_pkthdr.len < len) {
686 		pfsyncstats.pfsyncs_badlen++;
687 		goto done;
688 	}
689 
690 	if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
691 		flags = PFSYNC_SI_CKSUM;
692 
693 	offset += sizeof(*ph);
694 	while (offset <= len - sizeof(subh)) {
695 		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
696 		offset += sizeof(subh);
697 
698 		mlen = subh.len << 2;
699 		count = ntohs(subh.count);
700 
701 		if (subh.action >= PFSYNC_ACT_MAX ||
702 		    subh.action >= nitems(pfsync_acts) ||
703 		    mlen < pfsync_acts[subh.action].len) {
704 			/*
705 			 * subheaders are always followed by at least one
706 			 * message, so if the peer is new
707 			 * enough to tell us how big its messages are then we
708 			 * know enough to skip them.
709 			 */
710 			if (count > 0 && mlen > 0) {
711 				offset += count * mlen;
712 				continue;
713 			}
714 			pfsyncstats.pfsyncs_badact++;
715 			goto done;
716 		}
717 
718 		mp = m_pulldown(m, offset, mlen * count, &offp);
719 		if (mp == NULL) {
720 			pfsyncstats.pfsyncs_badlen++;
721 			return;
722 		}
723 
724 		if (pfsync_acts[subh.action].in(mp->m_data + offp,
725 		    mlen, count, flags) != 0)
726 			goto done;
727 
728 		offset += mlen * count;
729 	}
730 
731 done:
732 	m_freem(m);
733 }
734 
735 int
736 pfsync_in_clr(caddr_t buf, int len, int count, int flags)
737 {
738 	struct pfsync_clr *clr;
739 	struct pf_state *st, *nexts;
740 	struct pfi_kif *kif;
741 	u_int32_t creatorid;
742 	int i;
743 
744 	for (i = 0; i < count; i++) {
745 		clr = (struct pfsync_clr *)buf + len * i;
746 		kif = NULL;
747 		creatorid = clr->creatorid;
748 		if (strlen(clr->ifname) &&
749 		    (kif = pfi_kif_find(clr->ifname)) == NULL)
750 			continue;
751 
752 		for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) {
753 			nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
754 			if (st->creatorid == creatorid &&
755 			    ((kif && st->kif == kif) || !kif)) {
756 				SET(st->state_flags, PFSTATE_NOSYNC);
757 				pf_remove_state(st);
758 			}
759 		}
760 	}
761 
762 	return (0);
763 }
764 
765 int
766 pfsync_in_ins(caddr_t buf, int len, int count, int flags)
767 {
768 	struct pfsync_state *sp;
769 	sa_family_t af1, af2;
770 	int i;
771 
772 	for (i = 0; i < count; i++) {
773 		sp = (struct pfsync_state *)(buf + len * i);
774 		af1 = sp->key[0].af;
775 		af2 = sp->key[1].af;
776 
777 		/* check for invalid values */
778 		if (sp->timeout >= PFTM_MAX ||
779 		    sp->src.state > PF_TCPS_PROXY_DST ||
780 		    sp->dst.state > PF_TCPS_PROXY_DST ||
781 		    sp->direction > PF_OUT ||
782 		    (((af1 || af2) &&
783 		     ((af1 != AF_INET && af1 != AF_INET6) ||
784 		      (af2 != AF_INET && af2 != AF_INET6))) ||
785 		    (sp->af != AF_INET && sp->af != AF_INET6))) {
786 			DPFPRINTF(LOG_NOTICE,
787 			    "pfsync_input: PFSYNC5_ACT_INS: invalid value");
788 			pfsyncstats.pfsyncs_badval++;
789 			continue;
790 		}
791 
792 		if (pfsync_state_import(sp, flags) == ENOMEM) {
793 			/* drop out, but process the rest of the actions */
794 			break;
795 		}
796 	}
797 
798 	return (0);
799 }
800 
801 int
802 pfsync_in_iack(caddr_t buf, int len, int count, int flags)
803 {
804 	struct pfsync_ins_ack *ia;
805 	struct pf_state_cmp id_key;
806 	struct pf_state *st;
807 	int i;
808 
809 	for (i = 0; i < count; i++) {
810 		ia = (struct pfsync_ins_ack *)(buf + len * i);
811 
812 		id_key.id = ia->id;
813 		id_key.creatorid = ia->creatorid;
814 
815 		st = pf_find_state_byid(&id_key);
816 		if (st == NULL)
817 			continue;
818 
819 		if (ISSET(st->state_flags, PFSTATE_ACK))
820 			pfsync_deferred(st, 0);
821 	}
822 
823 	return (0);
824 }
825 
826 int
827 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
828     struct pfsync_state_peer *dst)
829 {
830 	int sync = 0;
831 
832 	/*
833 	 * The state should never go backwards except
834 	 * for syn-proxy states.  Neither should the
835 	 * sequence window slide backwards.
836 	 */
837 	if ((st->src.state > src->state &&
838 	    (st->src.state < PF_TCPS_PROXY_SRC ||
839 	    src->state >= PF_TCPS_PROXY_SRC)) ||
840 
841 	    (st->src.state == src->state &&
842 	    SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
843 		sync++;
844 	else
845 		pf_state_peer_ntoh(src, &st->src);
846 
847 	if ((st->dst.state > dst->state) ||
848 
849 	    (st->dst.state >= TCPS_SYN_SENT &&
850 	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
851 		sync++;
852 	else
853 		pf_state_peer_ntoh(dst, &st->dst);
854 
855 	return (sync);
856 }
857 
858 int
859 pfsync_in_upd(caddr_t buf, int len, int count, int flags)
860 {
861 	struct pfsync_state *sp;
862 	struct pf_state_cmp id_key;
863 	struct pf_state *st;
864 	int sync;
865 
866 	int i;
867 
868 	for (i = 0; i < count; i++) {
869 		sp = (struct pfsync_state *)(buf + len * i);
870 
871 		/* check for invalid values */
872 		if (sp->timeout >= PFTM_MAX ||
873 		    sp->src.state > PF_TCPS_PROXY_DST ||
874 		    sp->dst.state > PF_TCPS_PROXY_DST) {
875 			DPFPRINTF(LOG_NOTICE,
876 			    "pfsync_input: PFSYNC_ACT_UPD: invalid value");
877 			pfsyncstats.pfsyncs_badval++;
878 			continue;
879 		}
880 
881 		id_key.id = sp->id;
882 		id_key.creatorid = sp->creatorid;
883 
884 		st = pf_find_state_byid(&id_key);
885 		if (st == NULL) {
886 			/* insert the update */
887 			if (pfsync_state_import(sp, flags))
888 				pfsyncstats.pfsyncs_badstate++;
889 			continue;
890 		}
891 
892 		if (ISSET(st->state_flags, PFSTATE_ACK))
893 			pfsync_deferred(st, 1);
894 
895 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
896 			sync = pfsync_upd_tcp(st, &sp->src, &sp->dst);
897 		else {
898 			sync = 0;
899 
900 			/*
901 			 * Non-TCP protocol state machine always go
902 			 * forwards
903 			 */
904 			if (st->src.state > sp->src.state)
905 				sync++;
906 			else
907 				pf_state_peer_ntoh(&sp->src, &st->src);
908 
909 			if (st->dst.state > sp->dst.state)
910 				sync++;
911 			else
912 				pf_state_peer_ntoh(&sp->dst, &st->dst);
913 		}
914 
915 		if (sync < 2) {
916 			pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
917 			pf_state_peer_ntoh(&sp->dst, &st->dst);
918 			st->expire = time_uptime;
919 			st->timeout = sp->timeout;
920 		}
921 		st->pfsync_time = time_uptime;
922 
923 		if (sync) {
924 			pfsyncstats.pfsyncs_stale++;
925 
926 			pfsync_update_state(st);
927 			schednetisr(NETISR_PFSYNC);
928 		}
929 	}
930 
931 	return (0);
932 }
933 
934 int
935 pfsync_in_upd_c(caddr_t buf, int len, int count, int flags)
936 {
937 	struct pfsync_upd_c *up;
938 	struct pf_state_cmp id_key;
939 	struct pf_state *st;
940 
941 	int sync;
942 
943 	int i;
944 
945 	for (i = 0; i < count; i++) {
946 		up = (struct pfsync_upd_c *)(buf + len * i);
947 
948 		/* check for invalid values */
949 		if (up->timeout >= PFTM_MAX ||
950 		    up->src.state > PF_TCPS_PROXY_DST ||
951 		    up->dst.state > PF_TCPS_PROXY_DST) {
952 			DPFPRINTF(LOG_NOTICE,
953 			    "pfsync_input: PFSYNC_ACT_UPD_C: invalid value");
954 			pfsyncstats.pfsyncs_badval++;
955 			continue;
956 		}
957 
958 		id_key.id = up->id;
959 		id_key.creatorid = up->creatorid;
960 
961 		st = pf_find_state_byid(&id_key);
962 		if (st == NULL) {
963 			/* We don't have this state. Ask for it. */
964 			pfsync_request_update(id_key.creatorid, id_key.id);
965 			continue;
966 		}
967 
968 		if (ISSET(st->state_flags, PFSTATE_ACK))
969 			pfsync_deferred(st, 1);
970 
971 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
972 			sync = pfsync_upd_tcp(st, &up->src, &up->dst);
973 		else {
974 			sync = 0;
975 			/*
976 			 * Non-TCP protocol state machine always go
977 			 * forwards
978 			 */
979 			if (st->src.state > up->src.state)
980 				sync++;
981 			else
982 				pf_state_peer_ntoh(&up->src, &st->src);
983 
984 			if (st->dst.state > up->dst.state)
985 				sync++;
986 			else
987 				pf_state_peer_ntoh(&up->dst, &st->dst);
988 		}
989 		if (sync < 2) {
990 			pfsync_alloc_scrub_memory(&up->dst, &st->dst);
991 			pf_state_peer_ntoh(&up->dst, &st->dst);
992 			st->expire = time_uptime;
993 			st->timeout = up->timeout;
994 		}
995 		st->pfsync_time = time_uptime;
996 
997 		if (sync) {
998 			pfsyncstats.pfsyncs_stale++;
999 
1000 			pfsync_update_state(st);
1001 			schednetisr(NETISR_PFSYNC);
1002 		}
1003 	}
1004 
1005 	return (0);
1006 }
1007 
1008 int
1009 pfsync_in_ureq(caddr_t buf, int len, int count, int flags)
1010 {
1011 	struct pfsync_upd_req *ur;
1012 	int i;
1013 
1014 	struct pf_state_cmp id_key;
1015 	struct pf_state *st;
1016 
1017 	for (i = 0; i < count; i++) {
1018 		ur = (struct pfsync_upd_req *)(buf + len * i);
1019 
1020 		id_key.id = ur->id;
1021 		id_key.creatorid = ur->creatorid;
1022 
1023 		if (id_key.id == 0 && id_key.creatorid == 0)
1024 			pfsync_bulk_start();
1025 		else {
1026 			st = pf_find_state_byid(&id_key);
1027 			if (st == NULL) {
1028 				pfsyncstats.pfsyncs_badstate++;
1029 				continue;
1030 			}
1031 			if (ISSET(st->state_flags, PFSTATE_NOSYNC))
1032 				continue;
1033 
1034 			pfsync_update_state_req(st);
1035 		}
1036 	}
1037 
1038 	return (0);
1039 }
1040 
1041 int
1042 pfsync_in_del(caddr_t buf, int len, int count, int flags)
1043 {
1044 	struct pfsync_state *sp;
1045 	struct pf_state_cmp id_key;
1046 	struct pf_state *st;
1047 	int i;
1048 
1049 	for (i = 0; i < count; i++) {
1050 		sp = (struct pfsync_state *)(buf + len * i);
1051 
1052 		id_key.id = sp->id;
1053 		id_key.creatorid = sp->creatorid;
1054 
1055 		st = pf_find_state_byid(&id_key);
1056 		if (st == NULL) {
1057 			pfsyncstats.pfsyncs_badstate++;
1058 			continue;
1059 		}
1060 		SET(st->state_flags, PFSTATE_NOSYNC);
1061 		pf_remove_state(st);
1062 	}
1063 
1064 	return (0);
1065 }
1066 
1067 int
1068 pfsync_in_del_c(caddr_t buf, int len, int count, int flags)
1069 {
1070 	struct pfsync_del_c *sp;
1071 	struct pf_state_cmp id_key;
1072 	struct pf_state *st;
1073 	int i;
1074 
1075 	for (i = 0; i < count; i++) {
1076 		sp = (struct pfsync_del_c *)(buf + len * i);
1077 
1078 		id_key.id = sp->id;
1079 		id_key.creatorid = sp->creatorid;
1080 
1081 		st = pf_find_state_byid(&id_key);
1082 		if (st == NULL) {
1083 			pfsyncstats.pfsyncs_badstate++;
1084 			continue;
1085 		}
1086 
1087 		SET(st->state_flags, PFSTATE_NOSYNC);
1088 		pf_remove_state(st);
1089 	}
1090 
1091 	return (0);
1092 }
1093 
1094 int
1095 pfsync_in_bus(caddr_t buf, int len, int count, int flags)
1096 {
1097 	struct pfsync_softc *sc = pfsyncif;
1098 	struct pfsync_bus *bus;
1099 
1100 	/* If we're not waiting for a bulk update, who cares. */
1101 	if (sc->sc_ureq_sent == 0)
1102 		return (0);
1103 
1104 	bus = (struct pfsync_bus *)buf;
1105 
1106 	switch (bus->status) {
1107 	case PFSYNC_BUS_START:
1108 		timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1109 		    pf_pool_limits[PF_LIMIT_STATES].limit /
1110 		    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1111 		    sizeof(struct pfsync_state)));
1112 		DPFPRINTF(LOG_INFO, "received bulk update start");
1113 		break;
1114 
1115 	case PFSYNC_BUS_END:
1116 		if (time_uptime - ntohl(bus->endtime) >=
1117 		    sc->sc_ureq_sent) {
1118 			/* that's it, we're happy */
1119 			sc->sc_ureq_sent = 0;
1120 			sc->sc_bulk_tries = 0;
1121 			timeout_del(&sc->sc_bulkfail_tmo);
1122 #if NCARP > 0
1123 			if (!pfsync_sync_ok)
1124 				carp_group_demote_adj(&sc->sc_if, -1,
1125 				    sc->sc_link_demoted ?
1126 				    "pfsync link state up" :
1127 				    "pfsync bulk done");
1128 			if (sc->sc_initial_bulk) {
1129 				carp_group_demote_adj(&sc->sc_if, -32,
1130 				    "pfsync init");
1131 				sc->sc_initial_bulk = 0;
1132 			}
1133 #endif
1134 			pfsync_sync_ok = 1;
1135 			sc->sc_link_demoted = 0;
1136 			DPFPRINTF(LOG_INFO, "received valid bulk update end");
1137 		} else {
1138 			DPFPRINTF(LOG_WARNING, "received invalid "
1139 			    "bulk update end: bad timestamp");
1140 		}
1141 		break;
1142 	}
1143 
1144 	return (0);
1145 }
1146 
1147 int
1148 pfsync_in_tdb(caddr_t buf, int len, int count, int flags)
1149 {
1150 #if defined(IPSEC)
1151 	struct pfsync_tdb *tp;
1152 	int i;
1153 
1154 	for (i = 0; i < count; i++) {
1155 		tp = (struct pfsync_tdb *)(buf + len * i);
1156 		pfsync_update_net_tdb(tp);
1157 	}
1158 #endif
1159 
1160 	return (0);
1161 }
1162 
1163 #if defined(IPSEC)
1164 /* Update an in-kernel tdb. Silently fail if no tdb is found. */
1165 void
1166 pfsync_update_net_tdb(struct pfsync_tdb *pt)
1167 {
1168 	struct tdb		*tdb;
1169 	int			 s;
1170 
1171 	/* check for invalid values */
1172 	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
1173 	    (pt->dst.sa.sa_family != AF_INET &&
1174 	     pt->dst.sa.sa_family != AF_INET6))
1175 		goto bad;
1176 
1177 	s = splsoftnet();
1178 	tdb = gettdb(ntohs(pt->rdomain), pt->spi,
1179 	    (union sockaddr_union *)&pt->dst, pt->sproto);
1180 	if (tdb) {
1181 		pt->rpl = betoh64(pt->rpl);
1182 		pt->cur_bytes = betoh64(pt->cur_bytes);
1183 
1184 		/* Neither replay nor byte counter should ever decrease. */
1185 		if (pt->rpl < tdb->tdb_rpl ||
1186 		    pt->cur_bytes < tdb->tdb_cur_bytes) {
1187 			splx(s);
1188 			goto bad;
1189 		}
1190 
1191 		tdb->tdb_rpl = pt->rpl;
1192 		tdb->tdb_cur_bytes = pt->cur_bytes;
1193 	}
1194 	splx(s);
1195 	return;
1196 
1197  bad:
1198 	DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: "
1199 	    "invalid value");
1200 	pfsyncstats.pfsyncs_badstate++;
1201 	return;
1202 }
1203 #endif
1204 
1205 
1206 int
1207 pfsync_in_eof(caddr_t buf, int len, int count, int flags)
1208 {
1209 	if (len > 0 || count > 0)
1210 		pfsyncstats.pfsyncs_badact++;
1211 
1212 	/* we're done. let the caller return */
1213 	return (1);
1214 }
1215 
1216 int
1217 pfsync_in_error(caddr_t buf, int len, int count, int flags)
1218 {
1219 	pfsyncstats.pfsyncs_badact++;
1220 	return (-1);
1221 }
1222 
1223 int
1224 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1225 	struct rtentry *rt)
1226 {
1227 	m_freem(m);	/* drop packet */
1228 	return (EAFNOSUPPORT);
1229 }
1230 
1231 int
1232 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1233 {
1234 	struct proc *p = curproc;
1235 	struct pfsync_softc *sc = ifp->if_softc;
1236 	struct ifreq *ifr = (struct ifreq *)data;
1237 	struct ip_moptions *imo = &sc->sc_imo;
1238 	struct pfsyncreq pfsyncr;
1239 	struct ifnet    *sifp;
1240 	struct ip *ip;
1241 	int s, error;
1242 
1243 	switch (cmd) {
1244 #if 0
1245 	case SIOCSIFADDR:
1246 	case SIOCAIFADDR:
1247 	case SIOCSIFDSTADDR:
1248 #endif
1249 	case SIOCSIFFLAGS:
1250 		s = splnet();
1251 		if ((ifp->if_flags & IFF_RUNNING) == 0 &&
1252 		    (ifp->if_flags & IFF_UP)) {
1253 			ifp->if_flags |= IFF_RUNNING;
1254 
1255 #if NCARP > 0
1256 			sc->sc_initial_bulk = 1;
1257 			carp_group_demote_adj(&sc->sc_if, 32, "pfsync init");
1258 #endif
1259 
1260 			pfsync_request_full_update(sc);
1261 		}
1262 		if ((ifp->if_flags & IFF_RUNNING) &&
1263 		    (ifp->if_flags & IFF_UP) == 0) {
1264 			ifp->if_flags &= ~IFF_RUNNING;
1265 
1266 			/* drop everything */
1267 			timeout_del(&sc->sc_tmo);
1268 			pfsync_drop(sc);
1269 
1270 			pfsync_cancel_full_update(sc);
1271 		}
1272 		splx(s);
1273 		break;
1274 	case SIOCSIFMTU:
1275 		if (!sc->sc_sync_if ||
1276 		    ifr->ifr_mtu <= PFSYNC_MINPKT ||
1277 		    ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
1278 			return (EINVAL);
1279 		s = splnet();
1280 		if (ifr->ifr_mtu < ifp->if_mtu)
1281 			pfsync_sendout();
1282 		ifp->if_mtu = ifr->ifr_mtu;
1283 		splx(s);
1284 		break;
1285 	case SIOCGETPFSYNC:
1286 		bzero(&pfsyncr, sizeof(pfsyncr));
1287 		if (sc->sc_sync_if) {
1288 			strlcpy(pfsyncr.pfsyncr_syncdev,
1289 			    sc->sc_sync_if->if_xname, IFNAMSIZ);
1290 		}
1291 		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1292 		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1293 		pfsyncr.pfsyncr_defer = sc->sc_defer;
1294 		return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
1295 
1296 	case SIOCSETPFSYNC:
1297 		if ((error = suser(p, 0)) != 0)
1298 			return (error);
1299 		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
1300 			return (error);
1301 
1302 		s = splnet();
1303 
1304 		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1305 			sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1306 		else
1307 			sc->sc_sync_peer.s_addr =
1308 			    pfsyncr.pfsyncr_syncpeer.s_addr;
1309 
1310 		if (pfsyncr.pfsyncr_maxupdates > 255) {
1311 			splx(s);
1312 			return (EINVAL);
1313 		}
1314 		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1315 
1316 		sc->sc_defer = pfsyncr.pfsyncr_defer;
1317 
1318 		if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1319 			if (sc->sc_sync_if)
1320 				hook_disestablish(
1321 				    sc->sc_sync_if->if_linkstatehooks,
1322 				    sc->sc_lhcookie);
1323 			sc->sc_sync_if = NULL;
1324 			if (imo->imo_num_memberships > 0) {
1325 				in_delmulti(imo->imo_membership[
1326 				    --imo->imo_num_memberships]);
1327 				imo->imo_ifidx = 0;
1328 			}
1329 			splx(s);
1330 			break;
1331 		}
1332 
1333 		if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) {
1334 			splx(s);
1335 			return (EINVAL);
1336 		}
1337 
1338 		if (sifp->if_mtu < sc->sc_if.if_mtu ||
1339 		    (sc->sc_sync_if != NULL &&
1340 		    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
1341 		    sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1342 			pfsync_sendout();
1343 
1344 		if (sc->sc_sync_if)
1345 			hook_disestablish(
1346 			    sc->sc_sync_if->if_linkstatehooks,
1347 			    sc->sc_lhcookie);
1348 		sc->sc_sync_if = sifp;
1349 
1350 		if (imo->imo_num_memberships > 0) {
1351 			in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1352 			imo->imo_ifidx = 0;
1353 		}
1354 
1355 		if (sc->sc_sync_if &&
1356 		    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1357 			struct in_addr addr;
1358 
1359 			if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) {
1360 				sc->sc_sync_if = NULL;
1361 				splx(s);
1362 				return (EADDRNOTAVAIL);
1363 			}
1364 
1365 			addr.s_addr = INADDR_PFSYNC_GROUP;
1366 
1367 			if ((imo->imo_membership[0] =
1368 			    in_addmulti(&addr, sc->sc_sync_if)) == NULL) {
1369 				sc->sc_sync_if = NULL;
1370 				splx(s);
1371 				return (ENOBUFS);
1372 			}
1373 			imo->imo_num_memberships++;
1374 			imo->imo_ifidx = sc->sc_sync_if->if_index;
1375 			imo->imo_ttl = PFSYNC_DFLTTL;
1376 			imo->imo_loop = 0;
1377 		}
1378 
1379 		ip = &sc->sc_template;
1380 		bzero(ip, sizeof(*ip));
1381 		ip->ip_v = IPVERSION;
1382 		ip->ip_hl = sizeof(sc->sc_template) >> 2;
1383 		ip->ip_tos = IPTOS_LOWDELAY;
1384 		/* len and id are set later */
1385 		ip->ip_off = htons(IP_DF);
1386 		ip->ip_ttl = PFSYNC_DFLTTL;
1387 		ip->ip_p = IPPROTO_PFSYNC;
1388 		ip->ip_src.s_addr = INADDR_ANY;
1389 		ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
1390 
1391 		sc->sc_lhcookie =
1392 		    hook_establish(sc->sc_sync_if->if_linkstatehooks, 1,
1393 		    pfsync_syncdev_state, sc);
1394 
1395 		pfsync_request_full_update(sc);
1396 		splx(s);
1397 
1398 		break;
1399 
1400 	default:
1401 		return (ENOTTY);
1402 	}
1403 
1404 	return (0);
1405 }
1406 
1407 void
1408 pfsync_out_state(struct pf_state *st, void *buf)
1409 {
1410 	struct pfsync_state *sp = buf;
1411 
1412 	pfsync_state_export(sp, st);
1413 }
1414 
1415 void
1416 pfsync_out_iack(struct pf_state *st, void *buf)
1417 {
1418 	struct pfsync_ins_ack *iack = buf;
1419 
1420 	iack->id = st->id;
1421 	iack->creatorid = st->creatorid;
1422 }
1423 
1424 void
1425 pfsync_out_upd_c(struct pf_state *st, void *buf)
1426 {
1427 	struct pfsync_upd_c *up = buf;
1428 
1429 	bzero(up, sizeof(*up));
1430 	up->id = st->id;
1431 	pf_state_peer_hton(&st->src, &up->src);
1432 	pf_state_peer_hton(&st->dst, &up->dst);
1433 	up->creatorid = st->creatorid;
1434 	up->timeout = st->timeout;
1435 }
1436 
1437 void
1438 pfsync_out_del(struct pf_state *st, void *buf)
1439 {
1440 	struct pfsync_del_c *dp = buf;
1441 
1442 	dp->id = st->id;
1443 	dp->creatorid = st->creatorid;
1444 
1445 	SET(st->state_flags, PFSTATE_NOSYNC);
1446 }
1447 
1448 void
1449 pfsync_drop(struct pfsync_softc *sc)
1450 {
1451 	struct pf_state *st;
1452 	struct pfsync_upd_req_item *ur;
1453 	struct tdb *t;
1454 	int q;
1455 
1456 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1457 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
1458 			continue;
1459 
1460 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
1461 #ifdef PFSYNC_DEBUG
1462 			KASSERT(st->sync_state == q);
1463 #endif
1464 			st->sync_state = PFSYNC_S_NONE;
1465 		}
1466 		TAILQ_INIT(&sc->sc_qs[q]);
1467 	}
1468 
1469 	while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1470 		TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1471 		pool_put(&sc->sc_pool, ur);
1472 	}
1473 
1474 	sc->sc_plus = NULL;
1475 
1476 	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
1477 		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry)
1478 			CLR(t->tdb_flags, TDBF_PFSYNC);
1479 
1480 		TAILQ_INIT(&sc->sc_tdb_q);
1481 	}
1482 
1483 	sc->sc_len = PFSYNC_MINPKT;
1484 }
1485 
1486 void
1487 pfsync_sendout(void)
1488 {
1489 	struct pfsync_softc *sc = pfsyncif;
1490 #if NBPFILTER > 0
1491 	struct ifnet *ifp = &sc->sc_if;
1492 #endif
1493 	struct mbuf *m;
1494 	struct ip *ip;
1495 	struct pfsync_header *ph;
1496 	struct pfsync_subheader *subh;
1497 	struct pf_state *st;
1498 	struct pfsync_upd_req_item *ur;
1499 	struct tdb *t;
1500 
1501 	int offset;
1502 	int q, count = 0;
1503 
1504 	if (sc == NULL || sc->sc_len == PFSYNC_MINPKT)
1505 		return;
1506 
1507 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
1508 #if NBPFILTER > 0
1509 	    (ifp->if_bpf == NULL && sc->sc_sync_if == NULL)) {
1510 #else
1511 	    sc->sc_sync_if == NULL) {
1512 #endif
1513 		pfsync_drop(sc);
1514 		return;
1515 	}
1516 
1517 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1518 	if (m == NULL) {
1519 		sc->sc_if.if_oerrors++;
1520 		pfsyncstats.pfsyncs_onomem++;
1521 		pfsync_drop(sc);
1522 		return;
1523 	}
1524 
1525 	if (max_linkhdr + sc->sc_len > MHLEN) {
1526 		MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len);
1527 		if (!ISSET(m->m_flags, M_EXT)) {
1528 			m_free(m);
1529 			sc->sc_if.if_oerrors++;
1530 			pfsyncstats.pfsyncs_onomem++;
1531 			pfsync_drop(sc);
1532 			return;
1533 		}
1534 	}
1535 	m->m_data += max_linkhdr;
1536 	m->m_len = m->m_pkthdr.len = sc->sc_len;
1537 
1538 	/* build the ip header */
1539 	ip = mtod(m, struct ip *);
1540 	bcopy(&sc->sc_template, ip, sizeof(*ip));
1541 	offset = sizeof(*ip);
1542 
1543 	ip->ip_len = htons(m->m_pkthdr.len);
1544 	ip->ip_id = htons(ip_randomid());
1545 
1546 	/* build the pfsync header */
1547 	ph = (struct pfsync_header *)(m->m_data + offset);
1548 	bzero(ph, sizeof(*ph));
1549 	offset += sizeof(*ph);
1550 
1551 	ph->version = PFSYNC_VERSION;
1552 	ph->len = htons(sc->sc_len - sizeof(*ip));
1553 	bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
1554 
1555 	if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
1556 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1557 		offset += sizeof(*subh);
1558 
1559 		count = 0;
1560 		while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1561 			TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1562 
1563 			bcopy(&ur->ur_msg, m->m_data + offset,
1564 			    sizeof(ur->ur_msg));
1565 			offset += sizeof(ur->ur_msg);
1566 
1567 			pool_put(&sc->sc_pool, ur);
1568 
1569 			count++;
1570 		}
1571 
1572 		bzero(subh, sizeof(*subh));
1573 		subh->len = sizeof(ur->ur_msg) >> 2;
1574 		subh->action = PFSYNC_ACT_UPD_REQ;
1575 		subh->count = htons(count);
1576 	}
1577 
1578 	/* has someone built a custom region for us to add? */
1579 	if (sc->sc_plus != NULL) {
1580 		bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
1581 		offset += sc->sc_pluslen;
1582 
1583 		sc->sc_plus = NULL;
1584 	}
1585 
1586 	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
1587 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1588 		offset += sizeof(*subh);
1589 
1590 		count = 0;
1591 		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) {
1592 			pfsync_out_tdb(t, m->m_data + offset);
1593 			offset += sizeof(struct pfsync_tdb);
1594 			CLR(t->tdb_flags, TDBF_PFSYNC);
1595 
1596 			count++;
1597 		}
1598 		TAILQ_INIT(&sc->sc_tdb_q);
1599 
1600 		bzero(subh, sizeof(*subh));
1601 		subh->action = PFSYNC_ACT_TDB;
1602 		subh->len = sizeof(struct pfsync_tdb) >> 2;
1603 		subh->count = htons(count);
1604 	}
1605 
1606 	/* walk the queues */
1607 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1608 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
1609 			continue;
1610 
1611 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1612 		offset += sizeof(*subh);
1613 
1614 		count = 0;
1615 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
1616 #ifdef PFSYNC_DEBUG
1617 			KASSERT(st->sync_state == q);
1618 #endif
1619 			pfsync_qs[q].write(st, m->m_data + offset);
1620 			offset += pfsync_qs[q].len;
1621 
1622 			st->sync_state = PFSYNC_S_NONE;
1623 			count++;
1624 		}
1625 		TAILQ_INIT(&sc->sc_qs[q]);
1626 
1627 		bzero(subh, sizeof(*subh));
1628 		subh->action = pfsync_qs[q].action;
1629 		subh->len = pfsync_qs[q].len >> 2;
1630 		subh->count = htons(count);
1631 	}
1632 
1633 	/* we're done, let's put it on the wire */
1634 #if NBPFILTER > 0
1635 	if (ifp->if_bpf) {
1636 		m->m_data += sizeof(*ip);
1637 		m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
1638 		bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1639 		m->m_data -= sizeof(*ip);
1640 		m->m_len = m->m_pkthdr.len = sc->sc_len;
1641 	}
1642 
1643 	if (sc->sc_sync_if == NULL) {
1644 		sc->sc_len = PFSYNC_MINPKT;
1645 		m_freem(m);
1646 		return;
1647 	}
1648 #endif
1649 
1650 	/* start again */
1651 	sc->sc_len = PFSYNC_MINPKT;
1652 
1653 	sc->sc_if.if_opackets++;
1654 	sc->sc_if.if_obytes += m->m_pkthdr.len;
1655 
1656 	m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1657 
1658 	if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL, 0) == 0)
1659 		pfsyncstats.pfsyncs_opackets++;
1660 	else
1661 		pfsyncstats.pfsyncs_oerrors++;
1662 }
1663 
1664 void
1665 pfsync_insert_state(struct pf_state *st)
1666 {
1667 	struct pfsync_softc *sc = pfsyncif;
1668 
1669 	splsoftassert(IPL_SOFTNET);
1670 
1671 	if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) ||
1672 	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
1673 		SET(st->state_flags, PFSTATE_NOSYNC);
1674 		return;
1675 	}
1676 
1677 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
1678 	    ISSET(st->state_flags, PFSTATE_NOSYNC))
1679 		return;
1680 
1681 #ifdef PFSYNC_DEBUG
1682 	KASSERT(st->sync_state == PFSYNC_S_NONE);
1683 #endif
1684 
1685 	if (sc->sc_len == PFSYNC_MINPKT)
1686 		timeout_add_sec(&sc->sc_tmo, 1);
1687 
1688 	pfsync_q_ins(st, PFSYNC_S_INS);
1689 
1690 	st->sync_updates = 0;
1691 }
1692 
1693 int
1694 pfsync_defer(struct pf_state *st, struct mbuf *m)
1695 {
1696 	struct pfsync_softc *sc = pfsyncif;
1697 	struct pfsync_deferral *pd;
1698 
1699 	splsoftassert(IPL_SOFTNET);
1700 
1701 	if (!sc->sc_defer ||
1702 	    ISSET(st->state_flags, PFSTATE_NOSYNC) ||
1703 	    m->m_flags & (M_BCAST|M_MCAST))
1704 		return (0);
1705 
1706 	if (sc->sc_deferred >= 128) {
1707 		pd = TAILQ_FIRST(&sc->sc_deferrals);
1708 		if (timeout_del(&pd->pd_tmo))
1709 			pfsync_undefer(pd, 0);
1710 	}
1711 
1712 	pd = pool_get(&sc->sc_pool, M_NOWAIT);
1713 	if (pd == NULL)
1714 		return (0);
1715 
1716 	m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
1717 	SET(st->state_flags, PFSTATE_ACK);
1718 
1719 	pd->pd_st = st;
1720 	pd->pd_m = m;
1721 
1722 	sc->sc_deferred++;
1723 	TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
1724 
1725 	timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd);
1726 	timeout_add_msec(&pd->pd_tmo, 20);
1727 
1728 	schednetisr(NETISR_PFSYNC);
1729 
1730 	return (1);
1731 }
1732 
1733 void
1734 pfsync_undefer(struct pfsync_deferral *pd, int drop)
1735 {
1736 	struct pfsync_softc *sc = pfsyncif;
1737 
1738 	splsoftassert(IPL_SOFTNET);
1739 
1740 	TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
1741 	sc->sc_deferred--;
1742 
1743 	CLR(pd->pd_st->state_flags, PFSTATE_ACK);
1744 	if (drop)
1745 		m_freem(pd->pd_m);
1746 	else {
1747 		if (pd->pd_st->rule.ptr->rt == PF_ROUTETO) {
1748 			switch (pd->pd_st->key[PF_SK_WIRE]->af) {
1749 			case AF_INET:
1750 				pf_route(&pd->pd_m, pd->pd_st->rule.ptr,
1751 				    pd->pd_st->direction,
1752 				    pd->pd_st->rt_kif->pfik_ifp, pd->pd_st);
1753 				break;
1754 #ifdef INET6
1755 			case AF_INET6:
1756 				pf_route6(&pd->pd_m, pd->pd_st->rule.ptr,
1757 				    pd->pd_st->direction,
1758 				    pd->pd_st->rt_kif->pfik_ifp, pd->pd_st);
1759 				break;
1760 #endif /* INET6 */
1761 			}
1762 		} else {
1763 			switch (pd->pd_st->key[PF_SK_WIRE]->af) {
1764 			case AF_INET:
1765 				ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL,
1766 				    0);
1767 				break;
1768 #ifdef INET6
1769 			case AF_INET6:
1770 				ip6_output(pd->pd_m, NULL, NULL, 0,
1771 				    NULL, NULL);
1772 				break;
1773 #endif /* INET6 */
1774 			}
1775 		}
1776 	}
1777 
1778 	pool_put(&sc->sc_pool, pd);
1779 }
1780 
1781 void
1782 pfsync_defer_tmo(void *arg)
1783 {
1784 	int s;
1785 
1786 	s = splsoftnet();
1787 	pfsync_undefer(arg, 0);
1788 	splx(s);
1789 }
1790 
1791 void
1792 pfsync_deferred(struct pf_state *st, int drop)
1793 {
1794 	struct pfsync_softc *sc = pfsyncif;
1795 	struct pfsync_deferral *pd;
1796 
1797 	splsoftassert(IPL_SOFTNET);
1798 
1799 	TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
1800 		 if (pd->pd_st == st) {
1801 			if (timeout_del(&pd->pd_tmo))
1802 				pfsync_undefer(pd, drop);
1803 			return;
1804 		}
1805 	}
1806 
1807 	panic("pfsync_deferred: unable to find deferred state");
1808 }
1809 
1810 void
1811 pfsync_update_state(struct pf_state *st)
1812 {
1813 	struct pfsync_softc *sc = pfsyncif;
1814 	int sync = 0;
1815 
1816 	splsoftassert(IPL_SOFTNET);
1817 
1818 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
1819 		return;
1820 
1821 	if (ISSET(st->state_flags, PFSTATE_ACK))
1822 		pfsync_deferred(st, 0);
1823 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1824 		if (st->sync_state != PFSYNC_S_NONE)
1825 			pfsync_q_del(st);
1826 		return;
1827 	}
1828 
1829 	if (sc->sc_len == PFSYNC_MINPKT)
1830 		timeout_add_sec(&sc->sc_tmo, 1);
1831 
1832 	switch (st->sync_state) {
1833 	case PFSYNC_S_UPD_C:
1834 	case PFSYNC_S_UPD:
1835 	case PFSYNC_S_INS:
1836 		/* we're already handling it */
1837 
1838 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
1839 			st->sync_updates++;
1840 			if (st->sync_updates >= sc->sc_maxupdates)
1841 				sync = 1;
1842 		}
1843 		break;
1844 
1845 	case PFSYNC_S_IACK:
1846 		pfsync_q_del(st);
1847 	case PFSYNC_S_NONE:
1848 		pfsync_q_ins(st, PFSYNC_S_UPD_C);
1849 		st->sync_updates = 0;
1850 		break;
1851 
1852 	default:
1853 		panic("pfsync_update_state: unexpected sync state %d",
1854 		    st->sync_state);
1855 	}
1856 
1857 	if (sync || (time_uptime - st->pfsync_time) < 2)
1858 		schednetisr(NETISR_PFSYNC);
1859 }
1860 
1861 void
1862 pfsync_cancel_full_update(struct pfsync_softc *sc)
1863 {
1864 	if (timeout_pending(&sc->sc_bulkfail_tmo) ||
1865 	    timeout_pending(&sc->sc_bulk_tmo)) {
1866 #if NCARP > 0
1867 		if (!pfsync_sync_ok)
1868 			carp_group_demote_adj(&sc->sc_if, -1,
1869 			    "pfsync bulk cancelled");
1870 		if (sc->sc_initial_bulk) {
1871 			carp_group_demote_adj(&sc->sc_if, -32,
1872 			    "pfsync init");
1873 			sc->sc_initial_bulk = 0;
1874 		}
1875 #endif
1876 		pfsync_sync_ok = 1;
1877 		DPFPRINTF(LOG_INFO, "cancelling bulk update");
1878 	}
1879 	timeout_del(&sc->sc_bulkfail_tmo);
1880 	timeout_del(&sc->sc_bulk_tmo);
1881 	sc->sc_bulk_next = NULL;
1882 	sc->sc_bulk_last = NULL;
1883 	sc->sc_ureq_sent = 0;
1884 	sc->sc_bulk_tries = 0;
1885 }
1886 
1887 void
1888 pfsync_request_full_update(struct pfsync_softc *sc)
1889 {
1890 	if (sc->sc_sync_if && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
1891 		/* Request a full state table update. */
1892 		sc->sc_ureq_sent = time_uptime;
1893 #if NCARP > 0
1894 		if (!sc->sc_link_demoted && pfsync_sync_ok)
1895 			carp_group_demote_adj(&sc->sc_if, 1,
1896 			    "pfsync bulk start");
1897 #endif
1898 		pfsync_sync_ok = 0;
1899 		DPFPRINTF(LOG_INFO, "requesting bulk update");
1900 		timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1901 		    pf_pool_limits[PF_LIMIT_STATES].limit /
1902 		    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1903 		    sizeof(struct pfsync_state)));
1904 		pfsync_request_update(0, 0);
1905 	}
1906 }
1907 
1908 void
1909 pfsync_request_update(u_int32_t creatorid, u_int64_t id)
1910 {
1911 	struct pfsync_softc *sc = pfsyncif;
1912 	struct pfsync_upd_req_item *item;
1913 	size_t nlen = sizeof(struct pfsync_upd_req);
1914 
1915 	/*
1916 	 * this code does nothing to prevent multiple update requests for the
1917 	 * same state being generated.
1918 	 */
1919 
1920 	item = pool_get(&sc->sc_pool, PR_NOWAIT);
1921 	if (item == NULL) {
1922 		/* XXX stats */
1923 		return;
1924 	}
1925 
1926 	item->ur_msg.id = id;
1927 	item->ur_msg.creatorid = creatorid;
1928 
1929 	if (TAILQ_EMPTY(&sc->sc_upd_req_list))
1930 		nlen += sizeof(struct pfsync_subheader);
1931 
1932 	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
1933 		pfsync_sendout();
1934 
1935 		nlen = sizeof(struct pfsync_subheader) +
1936 		    sizeof(struct pfsync_upd_req);
1937 	}
1938 
1939 	TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
1940 	sc->sc_len += nlen;
1941 
1942 	schednetisr(NETISR_PFSYNC);
1943 }
1944 
1945 void
1946 pfsync_update_state_req(struct pf_state *st)
1947 {
1948 	struct pfsync_softc *sc = pfsyncif;
1949 
1950 	if (sc == NULL)
1951 		panic("pfsync_update_state_req: nonexistant instance");
1952 
1953 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1954 		if (st->sync_state != PFSYNC_S_NONE)
1955 			pfsync_q_del(st);
1956 		return;
1957 	}
1958 
1959 	switch (st->sync_state) {
1960 	case PFSYNC_S_UPD_C:
1961 	case PFSYNC_S_IACK:
1962 		pfsync_q_del(st);
1963 	case PFSYNC_S_NONE:
1964 		pfsync_q_ins(st, PFSYNC_S_UPD);
1965 		schednetisr(NETISR_PFSYNC);
1966 		return;
1967 
1968 	case PFSYNC_S_INS:
1969 	case PFSYNC_S_UPD:
1970 	case PFSYNC_S_DEL:
1971 		/* we're already handling it */
1972 		return;
1973 
1974 	default:
1975 		panic("pfsync_update_state_req: unexpected sync state %d",
1976 		    st->sync_state);
1977 	}
1978 }
1979 
1980 void
1981 pfsync_delete_state(struct pf_state *st)
1982 {
1983 	struct pfsync_softc *sc = pfsyncif;
1984 
1985 	splsoftassert(IPL_SOFTNET);
1986 
1987 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
1988 		return;
1989 
1990 	if (ISSET(st->state_flags, PFSTATE_ACK))
1991 		pfsync_deferred(st, 1);
1992 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1993 		if (st->sync_state != PFSYNC_S_NONE)
1994 			pfsync_q_del(st);
1995 		return;
1996 	}
1997 
1998 	if (sc->sc_len == PFSYNC_MINPKT)
1999 		timeout_add_sec(&sc->sc_tmo, 1);
2000 
2001 	switch (st->sync_state) {
2002 	case PFSYNC_S_INS:
2003 		/* we never got to tell the world so just forget about it */
2004 		pfsync_q_del(st);
2005 		return;
2006 
2007 	case PFSYNC_S_UPD_C:
2008 	case PFSYNC_S_UPD:
2009 	case PFSYNC_S_IACK:
2010 		pfsync_q_del(st);
2011 		/* FALLTHROUGH to putting it on the del list */
2012 
2013 	case PFSYNC_S_NONE:
2014 		pfsync_q_ins(st, PFSYNC_S_DEL);
2015 		return;
2016 
2017 	default:
2018 		panic("pfsync_delete_state: unexpected sync state %d",
2019 		    st->sync_state);
2020 	}
2021 }
2022 
2023 void
2024 pfsync_clear_states(u_int32_t creatorid, const char *ifname)
2025 {
2026 	struct pfsync_softc *sc = pfsyncif;
2027 	struct {
2028 		struct pfsync_subheader subh;
2029 		struct pfsync_clr clr;
2030 	} __packed r;
2031 
2032 	splsoftassert(IPL_SOFTNET);
2033 
2034 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2035 		return;
2036 
2037 	bzero(&r, sizeof(r));
2038 
2039 	r.subh.action = PFSYNC_ACT_CLR;
2040 	r.subh.len = sizeof(struct pfsync_clr) >> 2;
2041 	r.subh.count = htons(1);
2042 
2043 	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
2044 	r.clr.creatorid = creatorid;
2045 
2046 	pfsync_send_plus(&r, sizeof(r));
2047 }
2048 
2049 void
2050 pfsync_q_ins(struct pf_state *st, int q)
2051 {
2052 	struct pfsync_softc *sc = pfsyncif;
2053 	size_t nlen = pfsync_qs[q].len;
2054 
2055 	KASSERT(st->sync_state == PFSYNC_S_NONE);
2056 
2057 #if defined(PFSYNC_DEBUG)
2058 	if (sc->sc_len < PFSYNC_MINPKT)
2059 		panic("pfsync pkt len is too low %d", sc->sc_len);
2060 #endif
2061 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2062 		nlen += sizeof(struct pfsync_subheader);
2063 
2064 	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2065 		pfsync_sendout();
2066 
2067 		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
2068 	}
2069 
2070 	sc->sc_len += nlen;
2071 	TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
2072 	st->sync_state = q;
2073 }
2074 
2075 void
2076 pfsync_q_del(struct pf_state *st)
2077 {
2078 	struct pfsync_softc *sc = pfsyncif;
2079 	int q = st->sync_state;
2080 
2081 	KASSERT(st->sync_state != PFSYNC_S_NONE);
2082 
2083 	sc->sc_len -= pfsync_qs[q].len;
2084 	TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
2085 	st->sync_state = PFSYNC_S_NONE;
2086 
2087 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2088 		sc->sc_len -= sizeof(struct pfsync_subheader);
2089 }
2090 
2091 void
2092 pfsync_update_tdb(struct tdb *t, int output)
2093 {
2094 	struct pfsync_softc *sc = pfsyncif;
2095 	size_t nlen = sizeof(struct pfsync_tdb);
2096 
2097 	if (sc == NULL)
2098 		return;
2099 
2100 	if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) {
2101 		if (TAILQ_EMPTY(&sc->sc_tdb_q))
2102 			nlen += sizeof(struct pfsync_subheader);
2103 
2104 		if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2105 			pfsync_sendout();
2106 
2107 			nlen = sizeof(struct pfsync_subheader) +
2108 			    sizeof(struct pfsync_tdb);
2109 		}
2110 
2111 		sc->sc_len += nlen;
2112 		TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
2113 		SET(t->tdb_flags, TDBF_PFSYNC);
2114 		t->tdb_updates = 0;
2115 	} else {
2116 		if (++t->tdb_updates >= sc->sc_maxupdates)
2117 			schednetisr(NETISR_PFSYNC);
2118 	}
2119 
2120 	if (output)
2121 		SET(t->tdb_flags, TDBF_PFSYNC_RPL);
2122 	else
2123 		CLR(t->tdb_flags, TDBF_PFSYNC_RPL);
2124 }
2125 
2126 void
2127 pfsync_delete_tdb(struct tdb *t)
2128 {
2129 	struct pfsync_softc *sc = pfsyncif;
2130 
2131 	if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC))
2132 		return;
2133 
2134 	sc->sc_len -= sizeof(struct pfsync_tdb);
2135 	TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
2136 	CLR(t->tdb_flags, TDBF_PFSYNC);
2137 
2138 	if (TAILQ_EMPTY(&sc->sc_tdb_q))
2139 		sc->sc_len -= sizeof(struct pfsync_subheader);
2140 }
2141 
2142 void
2143 pfsync_out_tdb(struct tdb *t, void *buf)
2144 {
2145 	struct pfsync_tdb *ut = buf;
2146 
2147 	bzero(ut, sizeof(*ut));
2148 	ut->spi = t->tdb_spi;
2149 	bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst));
2150 	/*
2151 	 * When a failover happens, the master's rpl is probably above
2152 	 * what we see here (we may be up to a second late), so
2153 	 * increase it a bit for outbound tdbs to manage most such
2154 	 * situations.
2155 	 *
2156 	 * For now, just add an offset that is likely to be larger
2157 	 * than the number of packets we can see in one second. The RFC
2158 	 * just says the next packet must have a higher seq value.
2159 	 *
2160 	 * XXX What is a good algorithm for this? We could use
2161 	 * a rate-determined increase, but to know it, we would have
2162 	 * to extend struct tdb.
2163 	 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
2164 	 * will soon be replaced anyway. For now, just don't handle
2165 	 * this edge case.
2166 	 */
2167 #define RPL_INCR 16384
2168 	ut->rpl = htobe64(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ?
2169 	    RPL_INCR : 0));
2170 	ut->cur_bytes = htobe64(t->tdb_cur_bytes);
2171 	ut->sproto = t->tdb_sproto;
2172 	ut->rdomain = htons(t->tdb_rdomain);
2173 }
2174 
2175 void
2176 pfsync_bulk_start(void)
2177 {
2178 	struct pfsync_softc *sc = pfsyncif;
2179 
2180 	DPFPRINTF(LOG_INFO, "received bulk update request");
2181 
2182 	if (TAILQ_EMPTY(&state_list))
2183 		pfsync_bulk_status(PFSYNC_BUS_END);
2184 	else {
2185 		sc->sc_ureq_received = time_uptime;
2186 
2187 		if (sc->sc_bulk_next == NULL)
2188 			sc->sc_bulk_next = TAILQ_FIRST(&state_list);
2189 		sc->sc_bulk_last = sc->sc_bulk_next;
2190 
2191 		pfsync_bulk_status(PFSYNC_BUS_START);
2192 		timeout_add(&sc->sc_bulk_tmo, 0);
2193 	}
2194 }
2195 
2196 void
2197 pfsync_bulk_update(void *arg)
2198 {
2199 	struct pfsync_softc *sc = arg;
2200 	struct pf_state *st;
2201 	int i = 0;
2202 	int s;
2203 
2204 	s = splsoftnet();
2205 
2206 	st = sc->sc_bulk_next;
2207 
2208 	for (;;) {
2209 		if (st->sync_state == PFSYNC_S_NONE &&
2210 		    st->timeout < PFTM_MAX &&
2211 		    st->pfsync_time <= sc->sc_ureq_received) {
2212 			pfsync_update_state_req(st);
2213 			i++;
2214 		}
2215 
2216 		st = TAILQ_NEXT(st, entry_list);
2217 		if (st == NULL)
2218 			st = TAILQ_FIRST(&state_list);
2219 
2220 		if (st == sc->sc_bulk_last) {
2221 			/* we're done */
2222 			sc->sc_bulk_next = NULL;
2223 			sc->sc_bulk_last = NULL;
2224 			pfsync_bulk_status(PFSYNC_BUS_END);
2225 			break;
2226 		}
2227 
2228 		if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) <
2229 		    sizeof(struct pfsync_state)) {
2230 			/* we've filled a packet */
2231 			sc->sc_bulk_next = st;
2232 			timeout_add(&sc->sc_bulk_tmo, 1);
2233 			break;
2234 		}
2235 	}
2236 
2237 	splx(s);
2238 }
2239 
2240 void
2241 pfsync_bulk_status(u_int8_t status)
2242 {
2243 	struct {
2244 		struct pfsync_subheader subh;
2245 		struct pfsync_bus bus;
2246 	} __packed r;
2247 
2248 	struct pfsync_softc *sc = pfsyncif;
2249 
2250 	bzero(&r, sizeof(r));
2251 
2252 	r.subh.action = PFSYNC_ACT_BUS;
2253 	r.subh.len = sizeof(struct pfsync_bus) >> 2;
2254 	r.subh.count = htons(1);
2255 
2256 	r.bus.creatorid = pf_status.hostid;
2257 	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
2258 	r.bus.status = status;
2259 
2260 	pfsync_send_plus(&r, sizeof(r));
2261 }
2262 
2263 void
2264 pfsync_bulk_fail(void *arg)
2265 {
2266 	struct pfsync_softc *sc = arg;
2267 	int s;
2268 
2269 	s = splsoftnet();
2270 
2271 	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
2272 		/* Try again */
2273 		timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
2274 		pfsync_request_update(0, 0);
2275 	} else {
2276 		/* Pretend like the transfer was ok */
2277 		sc->sc_ureq_sent = 0;
2278 		sc->sc_bulk_tries = 0;
2279 #if NCARP > 0
2280 		if (!pfsync_sync_ok)
2281 			carp_group_demote_adj(&sc->sc_if, -1,
2282 			    sc->sc_link_demoted ?
2283 			    "pfsync link state up" :
2284 			    "pfsync bulk fail");
2285 		if (sc->sc_initial_bulk) {
2286 			carp_group_demote_adj(&sc->sc_if, -32,
2287 			    "pfsync init");
2288 			sc->sc_initial_bulk = 0;
2289 		}
2290 #endif
2291 		pfsync_sync_ok = 1;
2292 		sc->sc_link_demoted = 0;
2293 		DPFPRINTF(LOG_ERR, "failed to receive bulk update");
2294 	}
2295 
2296 	splx(s);
2297 }
2298 
2299 void
2300 pfsync_send_plus(void *plus, size_t pluslen)
2301 {
2302 	struct pfsync_softc *sc = pfsyncif;
2303 
2304 	if (sc->sc_len + pluslen > sc->sc_if.if_mtu)
2305 		pfsync_sendout();
2306 
2307 	sc->sc_plus = plus;
2308 	sc->sc_len += (sc->sc_pluslen = pluslen);
2309 
2310 	pfsync_sendout();
2311 }
2312 
2313 int
2314 pfsync_up(void)
2315 {
2316 	struct pfsync_softc *sc = pfsyncif;
2317 
2318 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2319 		return (0);
2320 
2321 	return (1);
2322 }
2323 
2324 int
2325 pfsync_state_in_use(struct pf_state *st)
2326 {
2327 	struct pfsync_softc *sc = pfsyncif;
2328 
2329 	if (sc == NULL)
2330 		return (0);
2331 
2332 	if (st->sync_state != PFSYNC_S_NONE ||
2333 	    st == sc->sc_bulk_next ||
2334 	    st == sc->sc_bulk_last)
2335 		return (1);
2336 
2337 	return (0);
2338 }
2339 
2340 void
2341 pfsync_timeout(void *arg)
2342 {
2343 	int s;
2344 
2345 	s = splsoftnet();
2346 	pfsync_sendout();
2347 	splx(s);
2348 }
2349 
2350 /* this is a softnet/netisr handler */
2351 void
2352 pfsyncintr(void)
2353 {
2354 	pfsync_sendout();
2355 }
2356 
2357 int
2358 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
2359     size_t newlen)
2360 {
2361 	/* All sysctl names at this level are terminal. */
2362 	if (namelen != 1)
2363 		return (ENOTDIR);
2364 
2365 	switch (name[0]) {
2366 	case PFSYNCCTL_STATS:
2367 		if (newp != NULL)
2368 			return (EPERM);
2369 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
2370 		    &pfsyncstats, sizeof(pfsyncstats)));
2371 	default:
2372 		return (ENOPROTOOPT);
2373 	}
2374 }
2375