xref: /openbsd-src/sys/net/if_pfsync.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*	$OpenBSD: if_pfsync.c,v 1.231 2016/09/15 02:00:18 dlg Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Michael Shalayeff
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  * THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
31  *
32  * Permission to use, copy, modify, and distribute this software for any
33  * purpose with or without fee is hereby granted, provided that the above
34  * copyright notice and this permission notice appear in all copies.
35  *
36  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
37  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
38  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
39  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
41  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
42  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43  */
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/time.h>
48 #include <sys/malloc.h>
49 #include <sys/mbuf.h>
50 #include <sys/socket.h>
51 #include <sys/ioctl.h>
52 #include <sys/timeout.h>
53 #include <sys/kernel.h>
54 #include <sys/sysctl.h>
55 #include <sys/pool.h>
56 #include <sys/syslog.h>
57 
58 #include <net/if.h>
59 #include <net/if_types.h>
60 #include <net/bpf.h>
61 #include <net/netisr.h>
62 #include <netinet/in.h>
63 #include <netinet/if_ether.h>
64 #include <netinet/tcp.h>
65 #include <netinet/tcp_seq.h>
66 #include <netinet/tcp_fsm.h>
67 
68 #include <netinet/in_var.h>
69 #include <netinet/ip.h>
70 #include <netinet/ip_var.h>
71 
72 #ifdef IPSEC
73 #include <netinet/ip_ipsp.h>
74 #endif /* IPSEC */
75 
76 #ifdef INET6
77 #include <netinet6/in6_var.h>
78 #include <netinet/ip6.h>
79 #include <netinet6/ip6_var.h>
80 #include <netinet6/nd6.h>
81 #endif /* INET6 */
82 
83 #include "carp.h"
84 #if NCARP > 0
85 #include <netinet/ip_carp.h>
86 #endif
87 
88 #define PF_DEBUGNAME	"pfsync: "
89 #include <net/pfvar.h>
90 #include <netinet/ip_ipsp.h>
91 #include <net/if_pfsync.h>
92 
93 #include "bpfilter.h"
94 #include "pfsync.h"
95 
96 #define PFSYNC_MINPKT ( \
97 	sizeof(struct ip) + \
98 	sizeof(struct pfsync_header))
99 
100 int	pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
101 	    struct pfsync_state_peer *);
102 
103 int	pfsync_in_clr(caddr_t, int, int, int);
104 int	pfsync_in_iack(caddr_t, int, int, int);
105 int	pfsync_in_upd_c(caddr_t, int, int, int);
106 int	pfsync_in_ureq(caddr_t, int, int, int);
107 int	pfsync_in_del(caddr_t, int, int, int);
108 int	pfsync_in_del_c(caddr_t, int, int, int);
109 int	pfsync_in_bus(caddr_t, int, int, int);
110 int	pfsync_in_tdb(caddr_t, int, int, int);
111 int	pfsync_in_ins(caddr_t, int, int, int);
112 int	pfsync_in_upd(caddr_t, int, int, int);
113 int	pfsync_in_eof(caddr_t, int, int, int);
114 
115 int	pfsync_in_error(caddr_t, int, int, int);
116 
117 struct {
118 	int	(*in)(caddr_t, int, int, int);
119 	size_t	len;
120 } pfsync_acts[] = {
121 	/* PFSYNC_ACT_CLR */
122 	{ pfsync_in_clr,	sizeof(struct pfsync_clr) },
123 	 /* PFSYNC_ACT_OINS */
124 	{ pfsync_in_error,	0 },
125 	/* PFSYNC_ACT_INS_ACK */
126 	{ pfsync_in_iack,	sizeof(struct pfsync_ins_ack) },
127 	/* PFSYNC_ACT_OUPD */
128 	{ pfsync_in_error,	0 },
129 	/* PFSYNC_ACT_UPD_C */
130 	{ pfsync_in_upd_c,	sizeof(struct pfsync_upd_c) },
131 	/* PFSYNC_ACT_UPD_REQ */
132 	{ pfsync_in_ureq,	sizeof(struct pfsync_upd_req) },
133 	/* PFSYNC_ACT_DEL */
134 	{ pfsync_in_del,	sizeof(struct pfsync_state) },
135 	/* PFSYNC_ACT_DEL_C */
136 	{ pfsync_in_del_c,	sizeof(struct pfsync_del_c) },
137 	/* PFSYNC_ACT_INS_F */
138 	{ pfsync_in_error,	0 },
139 	/* PFSYNC_ACT_DEL_F */
140 	{ pfsync_in_error,	0 },
141 	/* PFSYNC_ACT_BUS */
142 	{ pfsync_in_bus,	sizeof(struct pfsync_bus) },
143 	/* PFSYNC_ACT_OTDB */
144 	{ pfsync_in_error,	0 },
145 	/* PFSYNC_ACT_EOF */
146 	{ pfsync_in_error,	0 },
147 	/* PFSYNC_ACT_INS */
148 	{ pfsync_in_ins,	sizeof(struct pfsync_state) },
149 	/* PFSYNC_ACT_UPD */
150 	{ pfsync_in_upd,	sizeof(struct pfsync_state) },
151 	/* PFSYNC_ACT_TDB */
152 	{ pfsync_in_tdb,	sizeof(struct pfsync_tdb) },
153 };
154 
155 struct pfsync_q {
156 	void		(*write)(struct pf_state *, void *);
157 	size_t		len;
158 	u_int8_t	action;
159 };
160 
161 /* we have one of these for every PFSYNC_S_ */
162 void	pfsync_out_state(struct pf_state *, void *);
163 void	pfsync_out_iack(struct pf_state *, void *);
164 void	pfsync_out_upd_c(struct pf_state *, void *);
165 void	pfsync_out_del(struct pf_state *, void *);
166 
167 struct pfsync_q pfsync_qs[] = {
168 	{ pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
169 	{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
170 	{ pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C },
171 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
172 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD }
173 };
174 
175 void	pfsync_q_ins(struct pf_state *, int);
176 void	pfsync_q_del(struct pf_state *);
177 
178 struct pfsync_upd_req_item {
179 	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
180 	struct pfsync_upd_req			ur_msg;
181 };
182 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item);
183 
184 struct pfsync_deferral {
185 	TAILQ_ENTRY(pfsync_deferral)		 pd_entry;
186 	struct pf_state				*pd_st;
187 	struct mbuf				*pd_m;
188 	struct timeout				 pd_tmo;
189 };
190 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral);
191 
192 #define PFSYNC_PLSIZE	MAX(sizeof(struct pfsync_upd_req_item), \
193 			    sizeof(struct pfsync_deferral))
194 
195 void	pfsync_out_tdb(struct tdb *, void *);
196 
197 struct pfsync_softc {
198 	struct ifnet		 sc_if;
199 	struct ifnet		*sc_sync_if;
200 
201 	struct pool		 sc_pool;
202 
203 	struct ip_moptions	 sc_imo;
204 
205 	struct in_addr		 sc_sync_peer;
206 	u_int8_t		 sc_maxupdates;
207 
208 	struct ip		 sc_template;
209 
210 	struct pf_state_queue	 sc_qs[PFSYNC_S_COUNT];
211 	size_t			 sc_len;
212 
213 	struct pfsync_upd_reqs	 sc_upd_req_list;
214 
215 	int			 sc_initial_bulk;
216 	int			 sc_link_demoted;
217 
218 	int			 sc_defer;
219 	struct pfsync_deferrals	 sc_deferrals;
220 	u_int			 sc_deferred;
221 
222 	void			*sc_plus;
223 	size_t			 sc_pluslen;
224 
225 	u_int32_t		 sc_ureq_sent;
226 	int			 sc_bulk_tries;
227 	struct timeout		 sc_bulkfail_tmo;
228 
229 	u_int32_t		 sc_ureq_received;
230 	struct pf_state		*sc_bulk_next;
231 	struct pf_state		*sc_bulk_last;
232 	struct timeout		 sc_bulk_tmo;
233 
234 	TAILQ_HEAD(, tdb)	 sc_tdb_q;
235 
236 	void			*sc_lhcookie;
237 
238 	struct timeout		 sc_tmo;
239 };
240 
241 struct pfsync_softc	*pfsyncif = NULL;
242 struct pfsyncstats	 pfsyncstats;
243 
244 void	pfsyncattach(int);
245 int	pfsync_clone_create(struct if_clone *, int);
246 int	pfsync_clone_destroy(struct ifnet *);
247 int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
248 	    struct pf_state_peer *);
249 void	pfsync_update_net_tdb(struct pfsync_tdb *);
250 int	pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
251 	    struct rtentry *);
252 int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
253 void	pfsyncstart(struct ifnet *);
254 void	pfsync_syncdev_state(void *);
255 
256 void	pfsync_deferred(struct pf_state *, int);
257 void	pfsync_undefer(struct pfsync_deferral *, int);
258 void	pfsync_defer_tmo(void *);
259 
260 void	pfsync_cancel_full_update(struct pfsync_softc *);
261 void	pfsync_request_full_update(struct pfsync_softc *);
262 void	pfsync_request_update(u_int32_t, u_int64_t);
263 void	pfsync_update_state_req(struct pf_state *);
264 
265 void	pfsync_drop(struct pfsync_softc *);
266 void	pfsync_sendout(void);
267 void	pfsync_send_plus(void *, size_t);
268 void	pfsync_timeout(void *);
269 void	pfsync_tdb_timeout(void *);
270 
271 void	pfsync_bulk_start(void);
272 void	pfsync_bulk_status(u_int8_t);
273 void	pfsync_bulk_update(void *);
274 void	pfsync_bulk_fail(void *);
275 
276 #define PFSYNC_MAX_BULKTRIES	12
277 int	pfsync_sync_ok;
278 
279 struct if_clone	pfsync_cloner =
280     IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
281 
282 void
283 pfsyncattach(int npfsync)
284 {
285 	if_clone_attach(&pfsync_cloner);
286 }
287 
288 int
289 pfsync_clone_create(struct if_clone *ifc, int unit)
290 {
291 	struct pfsync_softc *sc;
292 	struct ifnet *ifp;
293 	int q;
294 
295 	if (unit != 0)
296 		return (EINVAL);
297 
298 	pfsync_sync_ok = 1;
299 
300 	sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK | M_ZERO);
301 
302 	for (q = 0; q < PFSYNC_S_COUNT; q++)
303 		TAILQ_INIT(&sc->sc_qs[q]);
304 
305 	pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, IPL_SOFTNET, 0, "pfsync",
306 	    NULL);
307 	TAILQ_INIT(&sc->sc_upd_req_list);
308 	TAILQ_INIT(&sc->sc_deferrals);
309 	sc->sc_deferred = 0;
310 
311 	TAILQ_INIT(&sc->sc_tdb_q);
312 
313 	sc->sc_len = PFSYNC_MINPKT;
314 	sc->sc_maxupdates = 128;
315 
316 	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
317 	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
318 	    M_WAITOK | M_ZERO);
319 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
320 
321 	ifp = &sc->sc_if;
322 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
323 	ifp->if_softc = sc;
324 	ifp->if_ioctl = pfsyncioctl;
325 	ifp->if_output = pfsyncoutput;
326 	ifp->if_start = pfsyncstart;
327 	ifp->if_type = IFT_PFSYNC;
328 	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
329 	ifp->if_hdrlen = sizeof(struct pfsync_header);
330 	ifp->if_mtu = ETHERMTU;
331 	timeout_set(&sc->sc_tmo, pfsync_timeout, sc);
332 	timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
333 	timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
334 
335 	if_attach(ifp);
336 	if_alloc_sadl(ifp);
337 
338 #if NCARP > 0
339 	if_addgroup(ifp, "carp");
340 #endif
341 
342 #if NBPFILTER > 0
343 	bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
344 #endif
345 
346 	pfsyncif = sc;
347 
348 	return (0);
349 }
350 
351 int
352 pfsync_clone_destroy(struct ifnet *ifp)
353 {
354 	struct pfsync_softc *sc = ifp->if_softc;
355 	struct pfsync_deferral *pd;
356 	int s;
357 
358 	s = splsoftnet();
359 	timeout_del(&sc->sc_bulkfail_tmo);
360 	timeout_del(&sc->sc_bulk_tmo);
361 	timeout_del(&sc->sc_tmo);
362 #if NCARP > 0
363 	if (!pfsync_sync_ok)
364 		carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
365 	if (sc->sc_link_demoted)
366 		carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
367 #endif
368 	if (sc->sc_sync_if)
369 		hook_disestablish(
370 		    sc->sc_sync_if->if_linkstatehooks,
371 		    sc->sc_lhcookie);
372 	if_detach(ifp);
373 
374 	pfsync_drop(sc);
375 
376 	while (sc->sc_deferred > 0) {
377 		pd = TAILQ_FIRST(&sc->sc_deferrals);
378 		timeout_del(&pd->pd_tmo);
379 		pfsync_undefer(pd, 0);
380 	}
381 
382 	pool_destroy(&sc->sc_pool);
383 	free(sc->sc_imo.imo_membership, M_IPMOPTS, 0);
384 	free(sc, M_DEVBUF, sizeof(*sc));
385 
386 	pfsyncif = NULL;
387 	splx(s);
388 
389 	return (0);
390 }
391 
392 /*
393  * Start output on the pfsync interface.
394  */
395 void
396 pfsyncstart(struct ifnet *ifp)
397 {
398 	IFQ_PURGE(&ifp->if_snd);
399 }
400 
401 void
402 pfsync_syncdev_state(void *arg)
403 {
404 	struct pfsync_softc *sc = arg;
405 
406 	if (!sc->sc_sync_if || !(sc->sc_if.if_flags & IFF_UP))
407 		return;
408 
409 	if (sc->sc_sync_if->if_link_state == LINK_STATE_DOWN) {
410 		sc->sc_if.if_flags &= ~IFF_RUNNING;
411 		if (!sc->sc_link_demoted) {
412 #if NCARP > 0
413 			carp_group_demote_adj(&sc->sc_if, 1,
414 			    "pfsync link state down");
415 #endif
416 			sc->sc_link_demoted = 1;
417 		}
418 
419 		/* drop everything */
420 		timeout_del(&sc->sc_tmo);
421 		pfsync_drop(sc);
422 
423 		pfsync_cancel_full_update(sc);
424 	} else if (sc->sc_link_demoted) {
425 		sc->sc_if.if_flags |= IFF_RUNNING;
426 
427 		pfsync_request_full_update(sc);
428 	}
429 }
430 
431 int
432 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
433     struct pf_state_peer *d)
434 {
435 	if (s->scrub.scrub_flag && d->scrub == NULL) {
436 		d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO);
437 		if (d->scrub == NULL)
438 			return (ENOMEM);
439 	}
440 
441 	return (0);
442 }
443 
444 void
445 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
446 {
447 	pf_state_export(sp, st);
448 }
449 
450 int
451 pfsync_state_import(struct pfsync_state *sp, int flags)
452 {
453 	struct pf_state	*st = NULL;
454 	struct pf_state_key *skw = NULL, *sks = NULL;
455 	struct pf_rule *r = NULL;
456 	struct pfi_kif	*kif;
457 	int pool_flags;
458 	int error;
459 
460 	if (sp->creatorid == 0) {
461 		DPFPRINTF(LOG_NOTICE, "pfsync_state_import: "
462 		    "invalid creator id: %08x", ntohl(sp->creatorid));
463 		return (EINVAL);
464 	}
465 
466 	if ((kif = pfi_kif_get(sp->ifname)) == NULL) {
467 		DPFPRINTF(LOG_NOTICE, "pfsync_state_import: "
468 		    "unknown interface: %s", sp->ifname);
469 		if (flags & PFSYNC_SI_IOCTL)
470 			return (EINVAL);
471 		return (0);	/* skip this state */
472 	}
473 
474 	if (sp->af == 0)
475 		return (0);	/* skip this state */
476 
477 	/*
478 	 * If the ruleset checksums match or the state is coming from the ioctl,
479 	 * it's safe to associate the state with the rule of that number.
480 	 */
481 	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
482 	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
483 	    pf_main_ruleset.rules.active.rcount)
484 		r = pf_main_ruleset.rules.active.ptr_array[ntohl(sp->rule)];
485 	else
486 		r = &pf_default_rule;
487 
488 	if ((r->max_states && r->states_cur >= r->max_states))
489 		goto cleanup;
490 
491 	if (flags & PFSYNC_SI_IOCTL)
492 		pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO;
493 	else
494 		pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO;
495 
496 	if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL)
497 		goto cleanup;
498 
499 	if ((skw = pf_alloc_state_key(pool_flags)) == NULL)
500 		goto cleanup;
501 
502 	if ((sp->key[PF_SK_WIRE].af &&
503 	    (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) ||
504 	    PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
505 	    &sp->key[PF_SK_STACK].addr[0], sp->af) ||
506 	    PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
507 	    &sp->key[PF_SK_STACK].addr[1], sp->af) ||
508 	    sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
509 	    sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] ||
510 	    sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) {
511 		if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
512 			goto cleanup;
513 	} else
514 		sks = skw;
515 
516 	/* allocate memory for scrub info */
517 	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
518 	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
519 		goto cleanup;
520 
521 	/* copy to state key(s) */
522 	skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
523 	skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
524 	skw->port[0] = sp->key[PF_SK_WIRE].port[0];
525 	skw->port[1] = sp->key[PF_SK_WIRE].port[1];
526 	skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain);
527 	PF_REF_INIT(skw->refcnt);
528 	skw->proto = sp->proto;
529 	if (!(skw->af = sp->key[PF_SK_WIRE].af))
530 		skw->af = sp->af;
531 	if (sks != skw) {
532 		sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
533 		sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
534 		sks->port[0] = sp->key[PF_SK_STACK].port[0];
535 		sks->port[1] = sp->key[PF_SK_STACK].port[1];
536 		sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain);
537 		PF_REF_INIT(sks->refcnt);
538 		if (!(sks->af = sp->key[PF_SK_STACK].af))
539 			sks->af = sp->af;
540 		if (sks->af != skw->af) {
541 			switch (sp->proto) {
542 			case IPPROTO_ICMP:
543 				sks->proto = IPPROTO_ICMPV6;
544 				break;
545 			case IPPROTO_ICMPV6:
546 				sks->proto = IPPROTO_ICMP;
547 				break;
548 			default:
549 				sks->proto = sp->proto;
550 			}
551 		} else
552 			sks->proto = sp->proto;
553 	}
554 	st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]);
555 	st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]);
556 
557 	/* copy to state */
558 	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
559 	st->creation = time_uptime - ntohl(sp->creation);
560 	st->expire = time_uptime;
561 	if (ntohl(sp->expire)) {
562 		u_int32_t timeout;
563 
564 		timeout = r->timeout[sp->timeout];
565 		if (!timeout)
566 			timeout = pf_default_rule.timeout[sp->timeout];
567 
568 		/* sp->expire may have been adaptively scaled by export. */
569 		st->expire -= timeout - ntohl(sp->expire);
570 	}
571 
572 	st->direction = sp->direction;
573 	st->log = sp->log;
574 	st->timeout = sp->timeout;
575 	st->state_flags = ntohs(sp->state_flags);
576 	st->max_mss = ntohs(sp->max_mss);
577 	st->min_ttl = sp->min_ttl;
578 	st->set_tos = sp->set_tos;
579 	st->set_prio[0] = sp->set_prio[0];
580 	st->set_prio[1] = sp->set_prio[1];
581 
582 	st->id = sp->id;
583 	st->creatorid = sp->creatorid;
584 	pf_state_peer_ntoh(&sp->src, &st->src);
585 	pf_state_peer_ntoh(&sp->dst, &st->dst);
586 
587 	st->rule.ptr = r;
588 	st->anchor.ptr = NULL;
589 	st->rt_kif = NULL;
590 
591 	st->pfsync_time = time_uptime;
592 	st->sync_state = PFSYNC_S_NONE;
593 
594 	/* XXX when we have anchors, use STATE_INC_COUNTERS */
595 	r->states_cur++;
596 	r->states_tot++;
597 
598 	if (!ISSET(flags, PFSYNC_SI_IOCTL))
599 		SET(st->state_flags, PFSTATE_NOSYNC);
600 
601 	if (pf_state_insert(kif, &skw, &sks, st) != 0) {
602 		/* XXX when we have anchors, use STATE_DEC_COUNTERS */
603 		r->states_cur--;
604 		error = EEXIST;
605 		goto cleanup_state;
606 	}
607 
608 	if (!ISSET(flags, PFSYNC_SI_IOCTL)) {
609 		CLR(st->state_flags, PFSTATE_NOSYNC);
610 		if (ISSET(st->state_flags, PFSTATE_ACK)) {
611 			pfsync_q_ins(st, PFSYNC_S_IACK);
612 			schednetisr(NETISR_PFSYNC);
613 		}
614 	}
615 	CLR(st->state_flags, PFSTATE_ACK);
616 
617 	return (0);
618 
619  cleanup:
620 	error = ENOMEM;
621 	if (skw == sks)
622 		sks = NULL;
623 	if (skw != NULL)
624 		pool_put(&pf_state_key_pl, skw);
625 	if (sks != NULL)
626 		pool_put(&pf_state_key_pl, sks);
627 
628  cleanup_state:	/* pf_state_insert frees the state keys */
629 	if (st) {
630 		if (st->dst.scrub)
631 			pool_put(&pf_state_scrub_pl, st->dst.scrub);
632 		if (st->src.scrub)
633 			pool_put(&pf_state_scrub_pl, st->src.scrub);
634 		pool_put(&pf_state_pl, st);
635 	}
636 	return (error);
637 }
638 
639 void
640 pfsync_input(struct mbuf *m, ...)
641 {
642 	struct pfsync_softc *sc = pfsyncif;
643 	struct ip *ip = mtod(m, struct ip *);
644 	struct mbuf *mp;
645 	struct pfsync_header *ph;
646 	struct pfsync_subheader subh;
647 
648 	int offset, offp, len, count, mlen, flags = 0;
649 
650 	pfsyncstats.pfsyncs_ipackets++;
651 
652 	/* verify that we have a sync interface configured */
653 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
654 	    sc->sc_sync_if == NULL || !pf_status.running)
655 		goto done;
656 
657 	/* verify that the packet came in on the right interface */
658 	if (sc->sc_sync_if->if_index != m->m_pkthdr.ph_ifidx) {
659 		pfsyncstats.pfsyncs_badif++;
660 		goto done;
661 	}
662 
663 	sc->sc_if.if_ipackets++;
664 	sc->sc_if.if_ibytes += m->m_pkthdr.len;
665 
666 	/* verify that the IP TTL is 255. */
667 	if (ip->ip_ttl != PFSYNC_DFLTTL) {
668 		pfsyncstats.pfsyncs_badttl++;
669 		goto done;
670 	}
671 
672 	offset = ip->ip_hl << 2;
673 	mp = m_pulldown(m, offset, sizeof(*ph), &offp);
674 	if (mp == NULL) {
675 		pfsyncstats.pfsyncs_hdrops++;
676 		return;
677 	}
678 	ph = (struct pfsync_header *)(mp->m_data + offp);
679 
680 	/* verify the version */
681 	if (ph->version != PFSYNC_VERSION) {
682 		pfsyncstats.pfsyncs_badver++;
683 		goto done;
684 	}
685 	len = ntohs(ph->len) + offset;
686 	if (m->m_pkthdr.len < len) {
687 		pfsyncstats.pfsyncs_badlen++;
688 		goto done;
689 	}
690 
691 	if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
692 		flags = PFSYNC_SI_CKSUM;
693 
694 	offset += sizeof(*ph);
695 	while (offset <= len - sizeof(subh)) {
696 		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
697 		offset += sizeof(subh);
698 
699 		mlen = subh.len << 2;
700 		count = ntohs(subh.count);
701 
702 		if (subh.action >= PFSYNC_ACT_MAX ||
703 		    subh.action >= nitems(pfsync_acts) ||
704 		    mlen < pfsync_acts[subh.action].len) {
705 			/*
706 			 * subheaders are always followed by at least one
707 			 * message, so if the peer is new
708 			 * enough to tell us how big its messages are then we
709 			 * know enough to skip them.
710 			 */
711 			if (count > 0 && mlen > 0) {
712 				offset += count * mlen;
713 				continue;
714 			}
715 			pfsyncstats.pfsyncs_badact++;
716 			goto done;
717 		}
718 
719 		mp = m_pulldown(m, offset, mlen * count, &offp);
720 		if (mp == NULL) {
721 			pfsyncstats.pfsyncs_badlen++;
722 			return;
723 		}
724 
725 		if (pfsync_acts[subh.action].in(mp->m_data + offp,
726 		    mlen, count, flags) != 0)
727 			goto done;
728 
729 		offset += mlen * count;
730 	}
731 
732 done:
733 	m_freem(m);
734 }
735 
736 int
737 pfsync_in_clr(caddr_t buf, int len, int count, int flags)
738 {
739 	struct pfsync_clr *clr;
740 	struct pf_state *st, *nexts;
741 	struct pfi_kif *kif;
742 	u_int32_t creatorid;
743 	int i;
744 
745 	for (i = 0; i < count; i++) {
746 		clr = (struct pfsync_clr *)buf + len * i;
747 		kif = NULL;
748 		creatorid = clr->creatorid;
749 		if (strlen(clr->ifname) &&
750 		    (kif = pfi_kif_find(clr->ifname)) == NULL)
751 			continue;
752 
753 		for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) {
754 			nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
755 			if (st->creatorid == creatorid &&
756 			    ((kif && st->kif == kif) || !kif)) {
757 				SET(st->state_flags, PFSTATE_NOSYNC);
758 				pf_remove_state(st);
759 			}
760 		}
761 	}
762 
763 	return (0);
764 }
765 
766 int
767 pfsync_in_ins(caddr_t buf, int len, int count, int flags)
768 {
769 	struct pfsync_state *sp;
770 	sa_family_t af1, af2;
771 	int i;
772 
773 	for (i = 0; i < count; i++) {
774 		sp = (struct pfsync_state *)(buf + len * i);
775 		af1 = sp->key[0].af;
776 		af2 = sp->key[1].af;
777 
778 		/* check for invalid values */
779 		if (sp->timeout >= PFTM_MAX ||
780 		    sp->src.state > PF_TCPS_PROXY_DST ||
781 		    sp->dst.state > PF_TCPS_PROXY_DST ||
782 		    sp->direction > PF_OUT ||
783 		    (((af1 || af2) &&
784 		     ((af1 != AF_INET && af1 != AF_INET6) ||
785 		      (af2 != AF_INET && af2 != AF_INET6))) ||
786 		    (sp->af != AF_INET && sp->af != AF_INET6))) {
787 			DPFPRINTF(LOG_NOTICE,
788 			    "pfsync_input: PFSYNC5_ACT_INS: invalid value");
789 			pfsyncstats.pfsyncs_badval++;
790 			continue;
791 		}
792 
793 		if (pfsync_state_import(sp, flags) == ENOMEM) {
794 			/* drop out, but process the rest of the actions */
795 			break;
796 		}
797 	}
798 
799 	return (0);
800 }
801 
802 int
803 pfsync_in_iack(caddr_t buf, int len, int count, int flags)
804 {
805 	struct pfsync_ins_ack *ia;
806 	struct pf_state_cmp id_key;
807 	struct pf_state *st;
808 	int i;
809 
810 	for (i = 0; i < count; i++) {
811 		ia = (struct pfsync_ins_ack *)(buf + len * i);
812 
813 		id_key.id = ia->id;
814 		id_key.creatorid = ia->creatorid;
815 
816 		st = pf_find_state_byid(&id_key);
817 		if (st == NULL)
818 			continue;
819 
820 		if (ISSET(st->state_flags, PFSTATE_ACK))
821 			pfsync_deferred(st, 0);
822 	}
823 
824 	return (0);
825 }
826 
827 int
828 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
829     struct pfsync_state_peer *dst)
830 {
831 	int sync = 0;
832 
833 	/*
834 	 * The state should never go backwards except
835 	 * for syn-proxy states.  Neither should the
836 	 * sequence window slide backwards.
837 	 */
838 	if ((st->src.state > src->state &&
839 	    (st->src.state < PF_TCPS_PROXY_SRC ||
840 	    src->state >= PF_TCPS_PROXY_SRC)) ||
841 
842 	    (st->src.state == src->state &&
843 	    SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
844 		sync++;
845 	else
846 		pf_state_peer_ntoh(src, &st->src);
847 
848 	if ((st->dst.state > dst->state) ||
849 
850 	    (st->dst.state >= TCPS_SYN_SENT &&
851 	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
852 		sync++;
853 	else
854 		pf_state_peer_ntoh(dst, &st->dst);
855 
856 	return (sync);
857 }
858 
859 int
860 pfsync_in_upd(caddr_t buf, int len, int count, int flags)
861 {
862 	struct pfsync_state *sp;
863 	struct pf_state_cmp id_key;
864 	struct pf_state *st;
865 	int sync;
866 
867 	int i;
868 
869 	for (i = 0; i < count; i++) {
870 		sp = (struct pfsync_state *)(buf + len * i);
871 
872 		/* check for invalid values */
873 		if (sp->timeout >= PFTM_MAX ||
874 		    sp->src.state > PF_TCPS_PROXY_DST ||
875 		    sp->dst.state > PF_TCPS_PROXY_DST) {
876 			DPFPRINTF(LOG_NOTICE,
877 			    "pfsync_input: PFSYNC_ACT_UPD: invalid value");
878 			pfsyncstats.pfsyncs_badval++;
879 			continue;
880 		}
881 
882 		id_key.id = sp->id;
883 		id_key.creatorid = sp->creatorid;
884 
885 		st = pf_find_state_byid(&id_key);
886 		if (st == NULL) {
887 			/* insert the update */
888 			if (pfsync_state_import(sp, flags))
889 				pfsyncstats.pfsyncs_badstate++;
890 			continue;
891 		}
892 
893 		if (ISSET(st->state_flags, PFSTATE_ACK))
894 			pfsync_deferred(st, 1);
895 
896 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
897 			sync = pfsync_upd_tcp(st, &sp->src, &sp->dst);
898 		else {
899 			sync = 0;
900 
901 			/*
902 			 * Non-TCP protocol state machine always go
903 			 * forwards
904 			 */
905 			if (st->src.state > sp->src.state)
906 				sync++;
907 			else
908 				pf_state_peer_ntoh(&sp->src, &st->src);
909 
910 			if (st->dst.state > sp->dst.state)
911 				sync++;
912 			else
913 				pf_state_peer_ntoh(&sp->dst, &st->dst);
914 		}
915 
916 		if (sync < 2) {
917 			pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
918 			pf_state_peer_ntoh(&sp->dst, &st->dst);
919 			st->expire = time_uptime;
920 			st->timeout = sp->timeout;
921 		}
922 		st->pfsync_time = time_uptime;
923 
924 		if (sync) {
925 			pfsyncstats.pfsyncs_stale++;
926 
927 			pfsync_update_state(st);
928 			schednetisr(NETISR_PFSYNC);
929 		}
930 	}
931 
932 	return (0);
933 }
934 
935 int
936 pfsync_in_upd_c(caddr_t buf, int len, int count, int flags)
937 {
938 	struct pfsync_upd_c *up;
939 	struct pf_state_cmp id_key;
940 	struct pf_state *st;
941 
942 	int sync;
943 
944 	int i;
945 
946 	for (i = 0; i < count; i++) {
947 		up = (struct pfsync_upd_c *)(buf + len * i);
948 
949 		/* check for invalid values */
950 		if (up->timeout >= PFTM_MAX ||
951 		    up->src.state > PF_TCPS_PROXY_DST ||
952 		    up->dst.state > PF_TCPS_PROXY_DST) {
953 			DPFPRINTF(LOG_NOTICE,
954 			    "pfsync_input: PFSYNC_ACT_UPD_C: invalid value");
955 			pfsyncstats.pfsyncs_badval++;
956 			continue;
957 		}
958 
959 		id_key.id = up->id;
960 		id_key.creatorid = up->creatorid;
961 
962 		st = pf_find_state_byid(&id_key);
963 		if (st == NULL) {
964 			/* We don't have this state. Ask for it. */
965 			pfsync_request_update(id_key.creatorid, id_key.id);
966 			continue;
967 		}
968 
969 		if (ISSET(st->state_flags, PFSTATE_ACK))
970 			pfsync_deferred(st, 1);
971 
972 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
973 			sync = pfsync_upd_tcp(st, &up->src, &up->dst);
974 		else {
975 			sync = 0;
976 			/*
977 			 * Non-TCP protocol state machine always go
978 			 * forwards
979 			 */
980 			if (st->src.state > up->src.state)
981 				sync++;
982 			else
983 				pf_state_peer_ntoh(&up->src, &st->src);
984 
985 			if (st->dst.state > up->dst.state)
986 				sync++;
987 			else
988 				pf_state_peer_ntoh(&up->dst, &st->dst);
989 		}
990 		if (sync < 2) {
991 			pfsync_alloc_scrub_memory(&up->dst, &st->dst);
992 			pf_state_peer_ntoh(&up->dst, &st->dst);
993 			st->expire = time_uptime;
994 			st->timeout = up->timeout;
995 		}
996 		st->pfsync_time = time_uptime;
997 
998 		if (sync) {
999 			pfsyncstats.pfsyncs_stale++;
1000 
1001 			pfsync_update_state(st);
1002 			schednetisr(NETISR_PFSYNC);
1003 		}
1004 	}
1005 
1006 	return (0);
1007 }
1008 
1009 int
1010 pfsync_in_ureq(caddr_t buf, int len, int count, int flags)
1011 {
1012 	struct pfsync_upd_req *ur;
1013 	int i;
1014 
1015 	struct pf_state_cmp id_key;
1016 	struct pf_state *st;
1017 
1018 	for (i = 0; i < count; i++) {
1019 		ur = (struct pfsync_upd_req *)(buf + len * i);
1020 
1021 		id_key.id = ur->id;
1022 		id_key.creatorid = ur->creatorid;
1023 
1024 		if (id_key.id == 0 && id_key.creatorid == 0)
1025 			pfsync_bulk_start();
1026 		else {
1027 			st = pf_find_state_byid(&id_key);
1028 			if (st == NULL) {
1029 				pfsyncstats.pfsyncs_badstate++;
1030 				continue;
1031 			}
1032 			if (ISSET(st->state_flags, PFSTATE_NOSYNC))
1033 				continue;
1034 
1035 			pfsync_update_state_req(st);
1036 		}
1037 	}
1038 
1039 	return (0);
1040 }
1041 
1042 int
1043 pfsync_in_del(caddr_t buf, int len, int count, int flags)
1044 {
1045 	struct pfsync_state *sp;
1046 	struct pf_state_cmp id_key;
1047 	struct pf_state *st;
1048 	int i;
1049 
1050 	for (i = 0; i < count; i++) {
1051 		sp = (struct pfsync_state *)(buf + len * i);
1052 
1053 		id_key.id = sp->id;
1054 		id_key.creatorid = sp->creatorid;
1055 
1056 		st = pf_find_state_byid(&id_key);
1057 		if (st == NULL) {
1058 			pfsyncstats.pfsyncs_badstate++;
1059 			continue;
1060 		}
1061 		SET(st->state_flags, PFSTATE_NOSYNC);
1062 		pf_remove_state(st);
1063 	}
1064 
1065 	return (0);
1066 }
1067 
1068 int
1069 pfsync_in_del_c(caddr_t buf, int len, int count, int flags)
1070 {
1071 	struct pfsync_del_c *sp;
1072 	struct pf_state_cmp id_key;
1073 	struct pf_state *st;
1074 	int i;
1075 
1076 	for (i = 0; i < count; i++) {
1077 		sp = (struct pfsync_del_c *)(buf + len * i);
1078 
1079 		id_key.id = sp->id;
1080 		id_key.creatorid = sp->creatorid;
1081 
1082 		st = pf_find_state_byid(&id_key);
1083 		if (st == NULL) {
1084 			pfsyncstats.pfsyncs_badstate++;
1085 			continue;
1086 		}
1087 
1088 		SET(st->state_flags, PFSTATE_NOSYNC);
1089 		pf_remove_state(st);
1090 	}
1091 
1092 	return (0);
1093 }
1094 
1095 int
1096 pfsync_in_bus(caddr_t buf, int len, int count, int flags)
1097 {
1098 	struct pfsync_softc *sc = pfsyncif;
1099 	struct pfsync_bus *bus;
1100 
1101 	/* If we're not waiting for a bulk update, who cares. */
1102 	if (sc->sc_ureq_sent == 0)
1103 		return (0);
1104 
1105 	bus = (struct pfsync_bus *)buf;
1106 
1107 	switch (bus->status) {
1108 	case PFSYNC_BUS_START:
1109 		timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1110 		    pf_pool_limits[PF_LIMIT_STATES].limit /
1111 		    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1112 		    sizeof(struct pfsync_state)));
1113 		DPFPRINTF(LOG_INFO, "received bulk update start");
1114 		break;
1115 
1116 	case PFSYNC_BUS_END:
1117 		if (time_uptime - ntohl(bus->endtime) >=
1118 		    sc->sc_ureq_sent) {
1119 			/* that's it, we're happy */
1120 			sc->sc_ureq_sent = 0;
1121 			sc->sc_bulk_tries = 0;
1122 			timeout_del(&sc->sc_bulkfail_tmo);
1123 #if NCARP > 0
1124 			if (!pfsync_sync_ok)
1125 				carp_group_demote_adj(&sc->sc_if, -1,
1126 				    sc->sc_link_demoted ?
1127 				    "pfsync link state up" :
1128 				    "pfsync bulk done");
1129 			if (sc->sc_initial_bulk) {
1130 				carp_group_demote_adj(&sc->sc_if, -32,
1131 				    "pfsync init");
1132 				sc->sc_initial_bulk = 0;
1133 			}
1134 #endif
1135 			pfsync_sync_ok = 1;
1136 			sc->sc_link_demoted = 0;
1137 			DPFPRINTF(LOG_INFO, "received valid bulk update end");
1138 		} else {
1139 			DPFPRINTF(LOG_WARNING, "received invalid "
1140 			    "bulk update end: bad timestamp");
1141 		}
1142 		break;
1143 	}
1144 
1145 	return (0);
1146 }
1147 
1148 int
1149 pfsync_in_tdb(caddr_t buf, int len, int count, int flags)
1150 {
1151 #if defined(IPSEC)
1152 	struct pfsync_tdb *tp;
1153 	int i;
1154 
1155 	for (i = 0; i < count; i++) {
1156 		tp = (struct pfsync_tdb *)(buf + len * i);
1157 		pfsync_update_net_tdb(tp);
1158 	}
1159 #endif
1160 
1161 	return (0);
1162 }
1163 
1164 #if defined(IPSEC)
1165 /* Update an in-kernel tdb. Silently fail if no tdb is found. */
1166 void
1167 pfsync_update_net_tdb(struct pfsync_tdb *pt)
1168 {
1169 	struct tdb		*tdb;
1170 	int			 s;
1171 
1172 	/* check for invalid values */
1173 	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
1174 	    (pt->dst.sa.sa_family != AF_INET &&
1175 	     pt->dst.sa.sa_family != AF_INET6))
1176 		goto bad;
1177 
1178 	s = splsoftnet();
1179 	tdb = gettdb(ntohs(pt->rdomain), pt->spi,
1180 	    (union sockaddr_union *)&pt->dst, pt->sproto);
1181 	if (tdb) {
1182 		pt->rpl = betoh64(pt->rpl);
1183 		pt->cur_bytes = betoh64(pt->cur_bytes);
1184 
1185 		/* Neither replay nor byte counter should ever decrease. */
1186 		if (pt->rpl < tdb->tdb_rpl ||
1187 		    pt->cur_bytes < tdb->tdb_cur_bytes) {
1188 			splx(s);
1189 			goto bad;
1190 		}
1191 
1192 		tdb->tdb_rpl = pt->rpl;
1193 		tdb->tdb_cur_bytes = pt->cur_bytes;
1194 	}
1195 	splx(s);
1196 	return;
1197 
1198  bad:
1199 	DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: "
1200 	    "invalid value");
1201 	pfsyncstats.pfsyncs_badstate++;
1202 	return;
1203 }
1204 #endif
1205 
1206 
1207 int
1208 pfsync_in_eof(caddr_t buf, int len, int count, int flags)
1209 {
1210 	if (len > 0 || count > 0)
1211 		pfsyncstats.pfsyncs_badact++;
1212 
1213 	/* we're done. let the caller return */
1214 	return (1);
1215 }
1216 
1217 int
1218 pfsync_in_error(caddr_t buf, int len, int count, int flags)
1219 {
1220 	pfsyncstats.pfsyncs_badact++;
1221 	return (-1);
1222 }
1223 
1224 int
1225 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1226 	struct rtentry *rt)
1227 {
1228 	m_freem(m);	/* drop packet */
1229 	return (EAFNOSUPPORT);
1230 }
1231 
1232 int
1233 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1234 {
1235 	struct proc *p = curproc;
1236 	struct pfsync_softc *sc = ifp->if_softc;
1237 	struct ifreq *ifr = (struct ifreq *)data;
1238 	struct ip_moptions *imo = &sc->sc_imo;
1239 	struct pfsyncreq pfsyncr;
1240 	struct ifnet    *sifp;
1241 	struct ip *ip;
1242 	int s, error;
1243 
1244 	switch (cmd) {
1245 #if 0
1246 	case SIOCSIFADDR:
1247 	case SIOCAIFADDR:
1248 	case SIOCSIFDSTADDR:
1249 #endif
1250 	case SIOCSIFFLAGS:
1251 		s = splnet();
1252 		if ((ifp->if_flags & IFF_RUNNING) == 0 &&
1253 		    (ifp->if_flags & IFF_UP)) {
1254 			ifp->if_flags |= IFF_RUNNING;
1255 
1256 #if NCARP > 0
1257 			sc->sc_initial_bulk = 1;
1258 			carp_group_demote_adj(&sc->sc_if, 32, "pfsync init");
1259 #endif
1260 
1261 			pfsync_request_full_update(sc);
1262 		}
1263 		if ((ifp->if_flags & IFF_RUNNING) &&
1264 		    (ifp->if_flags & IFF_UP) == 0) {
1265 			ifp->if_flags &= ~IFF_RUNNING;
1266 
1267 			/* drop everything */
1268 			timeout_del(&sc->sc_tmo);
1269 			pfsync_drop(sc);
1270 
1271 			pfsync_cancel_full_update(sc);
1272 		}
1273 		splx(s);
1274 		break;
1275 	case SIOCSIFMTU:
1276 		if (!sc->sc_sync_if ||
1277 		    ifr->ifr_mtu <= PFSYNC_MINPKT ||
1278 		    ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
1279 			return (EINVAL);
1280 		s = splnet();
1281 		if (ifr->ifr_mtu < ifp->if_mtu)
1282 			pfsync_sendout();
1283 		ifp->if_mtu = ifr->ifr_mtu;
1284 		splx(s);
1285 		break;
1286 	case SIOCGETPFSYNC:
1287 		bzero(&pfsyncr, sizeof(pfsyncr));
1288 		if (sc->sc_sync_if) {
1289 			strlcpy(pfsyncr.pfsyncr_syncdev,
1290 			    sc->sc_sync_if->if_xname, IFNAMSIZ);
1291 		}
1292 		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1293 		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1294 		pfsyncr.pfsyncr_defer = sc->sc_defer;
1295 		return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
1296 
1297 	case SIOCSETPFSYNC:
1298 		if ((error = suser(p, 0)) != 0)
1299 			return (error);
1300 		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
1301 			return (error);
1302 
1303 		s = splnet();
1304 
1305 		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1306 			sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1307 		else
1308 			sc->sc_sync_peer.s_addr =
1309 			    pfsyncr.pfsyncr_syncpeer.s_addr;
1310 
1311 		if (pfsyncr.pfsyncr_maxupdates > 255) {
1312 			splx(s);
1313 			return (EINVAL);
1314 		}
1315 		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1316 
1317 		sc->sc_defer = pfsyncr.pfsyncr_defer;
1318 
1319 		if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1320 			if (sc->sc_sync_if)
1321 				hook_disestablish(
1322 				    sc->sc_sync_if->if_linkstatehooks,
1323 				    sc->sc_lhcookie);
1324 			sc->sc_sync_if = NULL;
1325 			if (imo->imo_num_memberships > 0) {
1326 				in_delmulti(imo->imo_membership[
1327 				    --imo->imo_num_memberships]);
1328 				imo->imo_ifidx = 0;
1329 			}
1330 			splx(s);
1331 			break;
1332 		}
1333 
1334 		if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) {
1335 			splx(s);
1336 			return (EINVAL);
1337 		}
1338 
1339 		if (sifp->if_mtu < sc->sc_if.if_mtu ||
1340 		    (sc->sc_sync_if != NULL &&
1341 		    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
1342 		    sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1343 			pfsync_sendout();
1344 
1345 		if (sc->sc_sync_if)
1346 			hook_disestablish(
1347 			    sc->sc_sync_if->if_linkstatehooks,
1348 			    sc->sc_lhcookie);
1349 		sc->sc_sync_if = sifp;
1350 
1351 		if (imo->imo_num_memberships > 0) {
1352 			in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1353 			imo->imo_ifidx = 0;
1354 		}
1355 
1356 		if (sc->sc_sync_if &&
1357 		    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1358 			struct in_addr addr;
1359 
1360 			if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) {
1361 				sc->sc_sync_if = NULL;
1362 				splx(s);
1363 				return (EADDRNOTAVAIL);
1364 			}
1365 
1366 			addr.s_addr = INADDR_PFSYNC_GROUP;
1367 
1368 			if ((imo->imo_membership[0] =
1369 			    in_addmulti(&addr, sc->sc_sync_if)) == NULL) {
1370 				sc->sc_sync_if = NULL;
1371 				splx(s);
1372 				return (ENOBUFS);
1373 			}
1374 			imo->imo_num_memberships++;
1375 			imo->imo_ifidx = sc->sc_sync_if->if_index;
1376 			imo->imo_ttl = PFSYNC_DFLTTL;
1377 			imo->imo_loop = 0;
1378 		}
1379 
1380 		ip = &sc->sc_template;
1381 		bzero(ip, sizeof(*ip));
1382 		ip->ip_v = IPVERSION;
1383 		ip->ip_hl = sizeof(sc->sc_template) >> 2;
1384 		ip->ip_tos = IPTOS_LOWDELAY;
1385 		/* len and id are set later */
1386 		ip->ip_off = htons(IP_DF);
1387 		ip->ip_ttl = PFSYNC_DFLTTL;
1388 		ip->ip_p = IPPROTO_PFSYNC;
1389 		ip->ip_src.s_addr = INADDR_ANY;
1390 		ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
1391 
1392 		sc->sc_lhcookie =
1393 		    hook_establish(sc->sc_sync_if->if_linkstatehooks, 1,
1394 		    pfsync_syncdev_state, sc);
1395 
1396 		pfsync_request_full_update(sc);
1397 		splx(s);
1398 
1399 		break;
1400 
1401 	default:
1402 		return (ENOTTY);
1403 	}
1404 
1405 	return (0);
1406 }
1407 
1408 void
1409 pfsync_out_state(struct pf_state *st, void *buf)
1410 {
1411 	struct pfsync_state *sp = buf;
1412 
1413 	pfsync_state_export(sp, st);
1414 }
1415 
1416 void
1417 pfsync_out_iack(struct pf_state *st, void *buf)
1418 {
1419 	struct pfsync_ins_ack *iack = buf;
1420 
1421 	iack->id = st->id;
1422 	iack->creatorid = st->creatorid;
1423 }
1424 
1425 void
1426 pfsync_out_upd_c(struct pf_state *st, void *buf)
1427 {
1428 	struct pfsync_upd_c *up = buf;
1429 
1430 	bzero(up, sizeof(*up));
1431 	up->id = st->id;
1432 	pf_state_peer_hton(&st->src, &up->src);
1433 	pf_state_peer_hton(&st->dst, &up->dst);
1434 	up->creatorid = st->creatorid;
1435 	up->timeout = st->timeout;
1436 }
1437 
1438 void
1439 pfsync_out_del(struct pf_state *st, void *buf)
1440 {
1441 	struct pfsync_del_c *dp = buf;
1442 
1443 	dp->id = st->id;
1444 	dp->creatorid = st->creatorid;
1445 
1446 	SET(st->state_flags, PFSTATE_NOSYNC);
1447 }
1448 
1449 void
1450 pfsync_drop(struct pfsync_softc *sc)
1451 {
1452 	struct pf_state *st;
1453 	struct pfsync_upd_req_item *ur;
1454 	struct tdb *t;
1455 	int q;
1456 
1457 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1458 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
1459 			continue;
1460 
1461 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
1462 #ifdef PFSYNC_DEBUG
1463 			KASSERT(st->sync_state == q);
1464 #endif
1465 			st->sync_state = PFSYNC_S_NONE;
1466 		}
1467 		TAILQ_INIT(&sc->sc_qs[q]);
1468 	}
1469 
1470 	while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1471 		TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1472 		pool_put(&sc->sc_pool, ur);
1473 	}
1474 
1475 	sc->sc_plus = NULL;
1476 
1477 	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
1478 		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry)
1479 			CLR(t->tdb_flags, TDBF_PFSYNC);
1480 
1481 		TAILQ_INIT(&sc->sc_tdb_q);
1482 	}
1483 
1484 	sc->sc_len = PFSYNC_MINPKT;
1485 }
1486 
1487 void
1488 pfsync_sendout(void)
1489 {
1490 	struct pfsync_softc *sc = pfsyncif;
1491 #if NBPFILTER > 0
1492 	struct ifnet *ifp = &sc->sc_if;
1493 #endif
1494 	struct mbuf *m;
1495 	struct ip *ip;
1496 	struct pfsync_header *ph;
1497 	struct pfsync_subheader *subh;
1498 	struct pf_state *st;
1499 	struct pfsync_upd_req_item *ur;
1500 	struct tdb *t;
1501 
1502 	int offset;
1503 	int q, count = 0;
1504 
1505 	if (sc == NULL || sc->sc_len == PFSYNC_MINPKT)
1506 		return;
1507 
1508 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
1509 #if NBPFILTER > 0
1510 	    (ifp->if_bpf == NULL && sc->sc_sync_if == NULL)) {
1511 #else
1512 	    sc->sc_sync_if == NULL) {
1513 #endif
1514 		pfsync_drop(sc);
1515 		return;
1516 	}
1517 
1518 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1519 	if (m == NULL) {
1520 		sc->sc_if.if_oerrors++;
1521 		pfsyncstats.pfsyncs_onomem++;
1522 		pfsync_drop(sc);
1523 		return;
1524 	}
1525 
1526 	if (max_linkhdr + sc->sc_len > MHLEN) {
1527 		MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len);
1528 		if (!ISSET(m->m_flags, M_EXT)) {
1529 			m_free(m);
1530 			sc->sc_if.if_oerrors++;
1531 			pfsyncstats.pfsyncs_onomem++;
1532 			pfsync_drop(sc);
1533 			return;
1534 		}
1535 	}
1536 	m->m_data += max_linkhdr;
1537 	m->m_len = m->m_pkthdr.len = sc->sc_len;
1538 
1539 	/* build the ip header */
1540 	ip = mtod(m, struct ip *);
1541 	bcopy(&sc->sc_template, ip, sizeof(*ip));
1542 	offset = sizeof(*ip);
1543 
1544 	ip->ip_len = htons(m->m_pkthdr.len);
1545 	ip->ip_id = htons(ip_randomid());
1546 
1547 	/* build the pfsync header */
1548 	ph = (struct pfsync_header *)(m->m_data + offset);
1549 	bzero(ph, sizeof(*ph));
1550 	offset += sizeof(*ph);
1551 
1552 	ph->version = PFSYNC_VERSION;
1553 	ph->len = htons(sc->sc_len - sizeof(*ip));
1554 	bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
1555 
1556 	if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
1557 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1558 		offset += sizeof(*subh);
1559 
1560 		count = 0;
1561 		while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1562 			TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1563 
1564 			bcopy(&ur->ur_msg, m->m_data + offset,
1565 			    sizeof(ur->ur_msg));
1566 			offset += sizeof(ur->ur_msg);
1567 
1568 			pool_put(&sc->sc_pool, ur);
1569 
1570 			count++;
1571 		}
1572 
1573 		bzero(subh, sizeof(*subh));
1574 		subh->len = sizeof(ur->ur_msg) >> 2;
1575 		subh->action = PFSYNC_ACT_UPD_REQ;
1576 		subh->count = htons(count);
1577 	}
1578 
1579 	/* has someone built a custom region for us to add? */
1580 	if (sc->sc_plus != NULL) {
1581 		bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
1582 		offset += sc->sc_pluslen;
1583 
1584 		sc->sc_plus = NULL;
1585 	}
1586 
1587 	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
1588 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1589 		offset += sizeof(*subh);
1590 
1591 		count = 0;
1592 		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) {
1593 			pfsync_out_tdb(t, m->m_data + offset);
1594 			offset += sizeof(struct pfsync_tdb);
1595 			CLR(t->tdb_flags, TDBF_PFSYNC);
1596 
1597 			count++;
1598 		}
1599 		TAILQ_INIT(&sc->sc_tdb_q);
1600 
1601 		bzero(subh, sizeof(*subh));
1602 		subh->action = PFSYNC_ACT_TDB;
1603 		subh->len = sizeof(struct pfsync_tdb) >> 2;
1604 		subh->count = htons(count);
1605 	}
1606 
1607 	/* walk the queues */
1608 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1609 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
1610 			continue;
1611 
1612 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1613 		offset += sizeof(*subh);
1614 
1615 		count = 0;
1616 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
1617 #ifdef PFSYNC_DEBUG
1618 			KASSERT(st->sync_state == q);
1619 #endif
1620 			pfsync_qs[q].write(st, m->m_data + offset);
1621 			offset += pfsync_qs[q].len;
1622 
1623 			st->sync_state = PFSYNC_S_NONE;
1624 			count++;
1625 		}
1626 		TAILQ_INIT(&sc->sc_qs[q]);
1627 
1628 		bzero(subh, sizeof(*subh));
1629 		subh->action = pfsync_qs[q].action;
1630 		subh->len = pfsync_qs[q].len >> 2;
1631 		subh->count = htons(count);
1632 	}
1633 
1634 	/* we're done, let's put it on the wire */
1635 #if NBPFILTER > 0
1636 	if (ifp->if_bpf) {
1637 		m->m_data += sizeof(*ip);
1638 		m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
1639 		bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1640 		m->m_data -= sizeof(*ip);
1641 		m->m_len = m->m_pkthdr.len = sc->sc_len;
1642 	}
1643 
1644 	if (sc->sc_sync_if == NULL) {
1645 		sc->sc_len = PFSYNC_MINPKT;
1646 		m_freem(m);
1647 		return;
1648 	}
1649 #endif
1650 
1651 	/* start again */
1652 	sc->sc_len = PFSYNC_MINPKT;
1653 
1654 	sc->sc_if.if_opackets++;
1655 	sc->sc_if.if_obytes += m->m_pkthdr.len;
1656 
1657 	m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1658 
1659 	if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL, 0) == 0)
1660 		pfsyncstats.pfsyncs_opackets++;
1661 	else
1662 		pfsyncstats.pfsyncs_oerrors++;
1663 }
1664 
1665 void
1666 pfsync_insert_state(struct pf_state *st)
1667 {
1668 	struct pfsync_softc *sc = pfsyncif;
1669 
1670 	splsoftassert(IPL_SOFTNET);
1671 
1672 	if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) ||
1673 	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
1674 		SET(st->state_flags, PFSTATE_NOSYNC);
1675 		return;
1676 	}
1677 
1678 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
1679 	    ISSET(st->state_flags, PFSTATE_NOSYNC))
1680 		return;
1681 
1682 #ifdef PFSYNC_DEBUG
1683 	KASSERT(st->sync_state == PFSYNC_S_NONE);
1684 #endif
1685 
1686 	if (sc->sc_len == PFSYNC_MINPKT)
1687 		timeout_add_sec(&sc->sc_tmo, 1);
1688 
1689 	pfsync_q_ins(st, PFSYNC_S_INS);
1690 
1691 	st->sync_updates = 0;
1692 }
1693 
1694 int
1695 pfsync_defer(struct pf_state *st, struct mbuf *m)
1696 {
1697 	struct pfsync_softc *sc = pfsyncif;
1698 	struct pfsync_deferral *pd;
1699 
1700 	splsoftassert(IPL_SOFTNET);
1701 
1702 	if (!sc->sc_defer ||
1703 	    ISSET(st->state_flags, PFSTATE_NOSYNC) ||
1704 	    m->m_flags & (M_BCAST|M_MCAST))
1705 		return (0);
1706 
1707 	if (sc->sc_deferred >= 128) {
1708 		pd = TAILQ_FIRST(&sc->sc_deferrals);
1709 		if (timeout_del(&pd->pd_tmo))
1710 			pfsync_undefer(pd, 0);
1711 	}
1712 
1713 	pd = pool_get(&sc->sc_pool, M_NOWAIT);
1714 	if (pd == NULL)
1715 		return (0);
1716 
1717 	m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
1718 	SET(st->state_flags, PFSTATE_ACK);
1719 
1720 	pd->pd_st = st;
1721 	pd->pd_m = m;
1722 
1723 	sc->sc_deferred++;
1724 	TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
1725 
1726 	timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd);
1727 	timeout_add_msec(&pd->pd_tmo, 20);
1728 
1729 	schednetisr(NETISR_PFSYNC);
1730 
1731 	return (1);
1732 }
1733 
1734 void
1735 pfsync_undefer(struct pfsync_deferral *pd, int drop)
1736 {
1737 	struct pfsync_softc *sc = pfsyncif;
1738 
1739 	splsoftassert(IPL_SOFTNET);
1740 
1741 	TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
1742 	sc->sc_deferred--;
1743 
1744 	CLR(pd->pd_st->state_flags, PFSTATE_ACK);
1745 	if (drop)
1746 		m_freem(pd->pd_m);
1747 	else {
1748 		if (pd->pd_st->rule.ptr->rt == PF_ROUTETO) {
1749 			switch (pd->pd_st->key[PF_SK_WIRE]->af) {
1750 			case AF_INET:
1751 				pf_route(&pd->pd_m, pd->pd_st->rule.ptr,
1752 				    pd->pd_st->direction,
1753 				    pd->pd_st->rt_kif->pfik_ifp, pd->pd_st);
1754 				break;
1755 #ifdef INET6
1756 			case AF_INET6:
1757 				pf_route6(&pd->pd_m, pd->pd_st->rule.ptr,
1758 				    pd->pd_st->direction,
1759 				    pd->pd_st->rt_kif->pfik_ifp, pd->pd_st);
1760 				break;
1761 #endif /* INET6 */
1762 			}
1763 		} else {
1764 			switch (pd->pd_st->key[PF_SK_WIRE]->af) {
1765 			case AF_INET:
1766 				ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL,
1767 				    0);
1768 				break;
1769 #ifdef INET6
1770 			case AF_INET6:
1771 				ip6_output(pd->pd_m, NULL, NULL, 0,
1772 				    NULL, NULL);
1773 				break;
1774 #endif /* INET6 */
1775 			}
1776 		}
1777 	}
1778 
1779 	pool_put(&sc->sc_pool, pd);
1780 }
1781 
1782 void
1783 pfsync_defer_tmo(void *arg)
1784 {
1785 	int s;
1786 
1787 	s = splsoftnet();
1788 	pfsync_undefer(arg, 0);
1789 	splx(s);
1790 }
1791 
1792 void
1793 pfsync_deferred(struct pf_state *st, int drop)
1794 {
1795 	struct pfsync_softc *sc = pfsyncif;
1796 	struct pfsync_deferral *pd;
1797 
1798 	splsoftassert(IPL_SOFTNET);
1799 
1800 	TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
1801 		 if (pd->pd_st == st) {
1802 			if (timeout_del(&pd->pd_tmo))
1803 				pfsync_undefer(pd, drop);
1804 			return;
1805 		}
1806 	}
1807 
1808 	panic("pfsync_deferred: unable to find deferred state");
1809 }
1810 
1811 void
1812 pfsync_update_state(struct pf_state *st)
1813 {
1814 	struct pfsync_softc *sc = pfsyncif;
1815 	int sync = 0;
1816 
1817 	splsoftassert(IPL_SOFTNET);
1818 
1819 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
1820 		return;
1821 
1822 	if (ISSET(st->state_flags, PFSTATE_ACK))
1823 		pfsync_deferred(st, 0);
1824 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1825 		if (st->sync_state != PFSYNC_S_NONE)
1826 			pfsync_q_del(st);
1827 		return;
1828 	}
1829 
1830 	if (sc->sc_len == PFSYNC_MINPKT)
1831 		timeout_add_sec(&sc->sc_tmo, 1);
1832 
1833 	switch (st->sync_state) {
1834 	case PFSYNC_S_UPD_C:
1835 	case PFSYNC_S_UPD:
1836 	case PFSYNC_S_INS:
1837 		/* we're already handling it */
1838 
1839 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
1840 			st->sync_updates++;
1841 			if (st->sync_updates >= sc->sc_maxupdates)
1842 				sync = 1;
1843 		}
1844 		break;
1845 
1846 	case PFSYNC_S_IACK:
1847 		pfsync_q_del(st);
1848 	case PFSYNC_S_NONE:
1849 		pfsync_q_ins(st, PFSYNC_S_UPD_C);
1850 		st->sync_updates = 0;
1851 		break;
1852 
1853 	default:
1854 		panic("pfsync_update_state: unexpected sync state %d",
1855 		    st->sync_state);
1856 	}
1857 
1858 	if (sync || (time_uptime - st->pfsync_time) < 2)
1859 		schednetisr(NETISR_PFSYNC);
1860 }
1861 
1862 void
1863 pfsync_cancel_full_update(struct pfsync_softc *sc)
1864 {
1865 	if (timeout_pending(&sc->sc_bulkfail_tmo) ||
1866 	    timeout_pending(&sc->sc_bulk_tmo)) {
1867 #if NCARP > 0
1868 		if (!pfsync_sync_ok)
1869 			carp_group_demote_adj(&sc->sc_if, -1,
1870 			    "pfsync bulk cancelled");
1871 		if (sc->sc_initial_bulk) {
1872 			carp_group_demote_adj(&sc->sc_if, -32,
1873 			    "pfsync init");
1874 			sc->sc_initial_bulk = 0;
1875 		}
1876 #endif
1877 		pfsync_sync_ok = 1;
1878 		DPFPRINTF(LOG_INFO, "cancelling bulk update");
1879 	}
1880 	timeout_del(&sc->sc_bulkfail_tmo);
1881 	timeout_del(&sc->sc_bulk_tmo);
1882 	sc->sc_bulk_next = NULL;
1883 	sc->sc_bulk_last = NULL;
1884 	sc->sc_ureq_sent = 0;
1885 	sc->sc_bulk_tries = 0;
1886 }
1887 
1888 void
1889 pfsync_request_full_update(struct pfsync_softc *sc)
1890 {
1891 	if (sc->sc_sync_if && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
1892 		/* Request a full state table update. */
1893 		sc->sc_ureq_sent = time_uptime;
1894 #if NCARP > 0
1895 		if (!sc->sc_link_demoted && pfsync_sync_ok)
1896 			carp_group_demote_adj(&sc->sc_if, 1,
1897 			    "pfsync bulk start");
1898 #endif
1899 		pfsync_sync_ok = 0;
1900 		DPFPRINTF(LOG_INFO, "requesting bulk update");
1901 		timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1902 		    pf_pool_limits[PF_LIMIT_STATES].limit /
1903 		    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1904 		    sizeof(struct pfsync_state)));
1905 		pfsync_request_update(0, 0);
1906 	}
1907 }
1908 
1909 void
1910 pfsync_request_update(u_int32_t creatorid, u_int64_t id)
1911 {
1912 	struct pfsync_softc *sc = pfsyncif;
1913 	struct pfsync_upd_req_item *item;
1914 	size_t nlen = sizeof(struct pfsync_upd_req);
1915 
1916 	/*
1917 	 * this code does nothing to prevent multiple update requests for the
1918 	 * same state being generated.
1919 	 */
1920 
1921 	item = pool_get(&sc->sc_pool, PR_NOWAIT);
1922 	if (item == NULL) {
1923 		/* XXX stats */
1924 		return;
1925 	}
1926 
1927 	item->ur_msg.id = id;
1928 	item->ur_msg.creatorid = creatorid;
1929 
1930 	if (TAILQ_EMPTY(&sc->sc_upd_req_list))
1931 		nlen += sizeof(struct pfsync_subheader);
1932 
1933 	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
1934 		pfsync_sendout();
1935 
1936 		nlen = sizeof(struct pfsync_subheader) +
1937 		    sizeof(struct pfsync_upd_req);
1938 	}
1939 
1940 	TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
1941 	sc->sc_len += nlen;
1942 
1943 	schednetisr(NETISR_PFSYNC);
1944 }
1945 
1946 void
1947 pfsync_update_state_req(struct pf_state *st)
1948 {
1949 	struct pfsync_softc *sc = pfsyncif;
1950 
1951 	if (sc == NULL)
1952 		panic("pfsync_update_state_req: nonexistant instance");
1953 
1954 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1955 		if (st->sync_state != PFSYNC_S_NONE)
1956 			pfsync_q_del(st);
1957 		return;
1958 	}
1959 
1960 	switch (st->sync_state) {
1961 	case PFSYNC_S_UPD_C:
1962 	case PFSYNC_S_IACK:
1963 		pfsync_q_del(st);
1964 	case PFSYNC_S_NONE:
1965 		pfsync_q_ins(st, PFSYNC_S_UPD);
1966 		schednetisr(NETISR_PFSYNC);
1967 		return;
1968 
1969 	case PFSYNC_S_INS:
1970 	case PFSYNC_S_UPD:
1971 	case PFSYNC_S_DEL:
1972 		/* we're already handling it */
1973 		return;
1974 
1975 	default:
1976 		panic("pfsync_update_state_req: unexpected sync state %d",
1977 		    st->sync_state);
1978 	}
1979 }
1980 
1981 void
1982 pfsync_delete_state(struct pf_state *st)
1983 {
1984 	struct pfsync_softc *sc = pfsyncif;
1985 
1986 	splsoftassert(IPL_SOFTNET);
1987 
1988 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
1989 		return;
1990 
1991 	if (ISSET(st->state_flags, PFSTATE_ACK))
1992 		pfsync_deferred(st, 1);
1993 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1994 		if (st->sync_state != PFSYNC_S_NONE)
1995 			pfsync_q_del(st);
1996 		return;
1997 	}
1998 
1999 	if (sc->sc_len == PFSYNC_MINPKT)
2000 		timeout_add_sec(&sc->sc_tmo, 1);
2001 
2002 	switch (st->sync_state) {
2003 	case PFSYNC_S_INS:
2004 		/* we never got to tell the world so just forget about it */
2005 		pfsync_q_del(st);
2006 		return;
2007 
2008 	case PFSYNC_S_UPD_C:
2009 	case PFSYNC_S_UPD:
2010 	case PFSYNC_S_IACK:
2011 		pfsync_q_del(st);
2012 		/* FALLTHROUGH to putting it on the del list */
2013 
2014 	case PFSYNC_S_NONE:
2015 		pfsync_q_ins(st, PFSYNC_S_DEL);
2016 		return;
2017 
2018 	default:
2019 		panic("pfsync_delete_state: unexpected sync state %d",
2020 		    st->sync_state);
2021 	}
2022 }
2023 
2024 void
2025 pfsync_clear_states(u_int32_t creatorid, const char *ifname)
2026 {
2027 	struct pfsync_softc *sc = pfsyncif;
2028 	struct {
2029 		struct pfsync_subheader subh;
2030 		struct pfsync_clr clr;
2031 	} __packed r;
2032 
2033 	splsoftassert(IPL_SOFTNET);
2034 
2035 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2036 		return;
2037 
2038 	bzero(&r, sizeof(r));
2039 
2040 	r.subh.action = PFSYNC_ACT_CLR;
2041 	r.subh.len = sizeof(struct pfsync_clr) >> 2;
2042 	r.subh.count = htons(1);
2043 
2044 	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
2045 	r.clr.creatorid = creatorid;
2046 
2047 	pfsync_send_plus(&r, sizeof(r));
2048 }
2049 
2050 void
2051 pfsync_q_ins(struct pf_state *st, int q)
2052 {
2053 	struct pfsync_softc *sc = pfsyncif;
2054 	size_t nlen = pfsync_qs[q].len;
2055 
2056 	KASSERT(st->sync_state == PFSYNC_S_NONE);
2057 
2058 #if defined(PFSYNC_DEBUG)
2059 	if (sc->sc_len < PFSYNC_MINPKT)
2060 		panic("pfsync pkt len is too low %d", sc->sc_len);
2061 #endif
2062 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2063 		nlen += sizeof(struct pfsync_subheader);
2064 
2065 	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2066 		pfsync_sendout();
2067 
2068 		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
2069 	}
2070 
2071 	sc->sc_len += nlen;
2072 	TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
2073 	st->sync_state = q;
2074 }
2075 
2076 void
2077 pfsync_q_del(struct pf_state *st)
2078 {
2079 	struct pfsync_softc *sc = pfsyncif;
2080 	int q = st->sync_state;
2081 
2082 	KASSERT(st->sync_state != PFSYNC_S_NONE);
2083 
2084 	sc->sc_len -= pfsync_qs[q].len;
2085 	TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
2086 	st->sync_state = PFSYNC_S_NONE;
2087 
2088 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2089 		sc->sc_len -= sizeof(struct pfsync_subheader);
2090 }
2091 
2092 void
2093 pfsync_update_tdb(struct tdb *t, int output)
2094 {
2095 	struct pfsync_softc *sc = pfsyncif;
2096 	size_t nlen = sizeof(struct pfsync_tdb);
2097 
2098 	if (sc == NULL)
2099 		return;
2100 
2101 	if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) {
2102 		if (TAILQ_EMPTY(&sc->sc_tdb_q))
2103 			nlen += sizeof(struct pfsync_subheader);
2104 
2105 		if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2106 			pfsync_sendout();
2107 
2108 			nlen = sizeof(struct pfsync_subheader) +
2109 			    sizeof(struct pfsync_tdb);
2110 		}
2111 
2112 		sc->sc_len += nlen;
2113 		TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
2114 		SET(t->tdb_flags, TDBF_PFSYNC);
2115 		t->tdb_updates = 0;
2116 	} else {
2117 		if (++t->tdb_updates >= sc->sc_maxupdates)
2118 			schednetisr(NETISR_PFSYNC);
2119 	}
2120 
2121 	if (output)
2122 		SET(t->tdb_flags, TDBF_PFSYNC_RPL);
2123 	else
2124 		CLR(t->tdb_flags, TDBF_PFSYNC_RPL);
2125 }
2126 
2127 void
2128 pfsync_delete_tdb(struct tdb *t)
2129 {
2130 	struct pfsync_softc *sc = pfsyncif;
2131 
2132 	if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC))
2133 		return;
2134 
2135 	sc->sc_len -= sizeof(struct pfsync_tdb);
2136 	TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
2137 	CLR(t->tdb_flags, TDBF_PFSYNC);
2138 
2139 	if (TAILQ_EMPTY(&sc->sc_tdb_q))
2140 		sc->sc_len -= sizeof(struct pfsync_subheader);
2141 }
2142 
2143 void
2144 pfsync_out_tdb(struct tdb *t, void *buf)
2145 {
2146 	struct pfsync_tdb *ut = buf;
2147 
2148 	bzero(ut, sizeof(*ut));
2149 	ut->spi = t->tdb_spi;
2150 	bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst));
2151 	/*
2152 	 * When a failover happens, the master's rpl is probably above
2153 	 * what we see here (we may be up to a second late), so
2154 	 * increase it a bit for outbound tdbs to manage most such
2155 	 * situations.
2156 	 *
2157 	 * For now, just add an offset that is likely to be larger
2158 	 * than the number of packets we can see in one second. The RFC
2159 	 * just says the next packet must have a higher seq value.
2160 	 *
2161 	 * XXX What is a good algorithm for this? We could use
2162 	 * a rate-determined increase, but to know it, we would have
2163 	 * to extend struct tdb.
2164 	 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
2165 	 * will soon be replaced anyway. For now, just don't handle
2166 	 * this edge case.
2167 	 */
2168 #define RPL_INCR 16384
2169 	ut->rpl = htobe64(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ?
2170 	    RPL_INCR : 0));
2171 	ut->cur_bytes = htobe64(t->tdb_cur_bytes);
2172 	ut->sproto = t->tdb_sproto;
2173 	ut->rdomain = htons(t->tdb_rdomain);
2174 }
2175 
2176 void
2177 pfsync_bulk_start(void)
2178 {
2179 	struct pfsync_softc *sc = pfsyncif;
2180 
2181 	DPFPRINTF(LOG_INFO, "received bulk update request");
2182 
2183 	if (TAILQ_EMPTY(&state_list))
2184 		pfsync_bulk_status(PFSYNC_BUS_END);
2185 	else {
2186 		sc->sc_ureq_received = time_uptime;
2187 
2188 		if (sc->sc_bulk_next == NULL)
2189 			sc->sc_bulk_next = TAILQ_FIRST(&state_list);
2190 		sc->sc_bulk_last = sc->sc_bulk_next;
2191 
2192 		pfsync_bulk_status(PFSYNC_BUS_START);
2193 		timeout_add(&sc->sc_bulk_tmo, 0);
2194 	}
2195 }
2196 
2197 void
2198 pfsync_bulk_update(void *arg)
2199 {
2200 	struct pfsync_softc *sc = arg;
2201 	struct pf_state *st;
2202 	int i = 0;
2203 	int s;
2204 
2205 	s = splsoftnet();
2206 
2207 	st = sc->sc_bulk_next;
2208 
2209 	for (;;) {
2210 		if (st->sync_state == PFSYNC_S_NONE &&
2211 		    st->timeout < PFTM_MAX &&
2212 		    st->pfsync_time <= sc->sc_ureq_received) {
2213 			pfsync_update_state_req(st);
2214 			i++;
2215 		}
2216 
2217 		st = TAILQ_NEXT(st, entry_list);
2218 		if (st == NULL)
2219 			st = TAILQ_FIRST(&state_list);
2220 
2221 		if (st == sc->sc_bulk_last) {
2222 			/* we're done */
2223 			sc->sc_bulk_next = NULL;
2224 			sc->sc_bulk_last = NULL;
2225 			pfsync_bulk_status(PFSYNC_BUS_END);
2226 			break;
2227 		}
2228 
2229 		if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) <
2230 		    sizeof(struct pfsync_state)) {
2231 			/* we've filled a packet */
2232 			sc->sc_bulk_next = st;
2233 			timeout_add(&sc->sc_bulk_tmo, 1);
2234 			break;
2235 		}
2236 	}
2237 
2238 	splx(s);
2239 }
2240 
2241 void
2242 pfsync_bulk_status(u_int8_t status)
2243 {
2244 	struct {
2245 		struct pfsync_subheader subh;
2246 		struct pfsync_bus bus;
2247 	} __packed r;
2248 
2249 	struct pfsync_softc *sc = pfsyncif;
2250 
2251 	bzero(&r, sizeof(r));
2252 
2253 	r.subh.action = PFSYNC_ACT_BUS;
2254 	r.subh.len = sizeof(struct pfsync_bus) >> 2;
2255 	r.subh.count = htons(1);
2256 
2257 	r.bus.creatorid = pf_status.hostid;
2258 	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
2259 	r.bus.status = status;
2260 
2261 	pfsync_send_plus(&r, sizeof(r));
2262 }
2263 
2264 void
2265 pfsync_bulk_fail(void *arg)
2266 {
2267 	struct pfsync_softc *sc = arg;
2268 	int s;
2269 
2270 	s = splsoftnet();
2271 
2272 	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
2273 		/* Try again */
2274 		timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
2275 		pfsync_request_update(0, 0);
2276 	} else {
2277 		/* Pretend like the transfer was ok */
2278 		sc->sc_ureq_sent = 0;
2279 		sc->sc_bulk_tries = 0;
2280 #if NCARP > 0
2281 		if (!pfsync_sync_ok)
2282 			carp_group_demote_adj(&sc->sc_if, -1,
2283 			    sc->sc_link_demoted ?
2284 			    "pfsync link state up" :
2285 			    "pfsync bulk fail");
2286 		if (sc->sc_initial_bulk) {
2287 			carp_group_demote_adj(&sc->sc_if, -32,
2288 			    "pfsync init");
2289 			sc->sc_initial_bulk = 0;
2290 		}
2291 #endif
2292 		pfsync_sync_ok = 1;
2293 		sc->sc_link_demoted = 0;
2294 		DPFPRINTF(LOG_ERR, "failed to receive bulk update");
2295 	}
2296 
2297 	splx(s);
2298 }
2299 
2300 void
2301 pfsync_send_plus(void *plus, size_t pluslen)
2302 {
2303 	struct pfsync_softc *sc = pfsyncif;
2304 
2305 	if (sc->sc_len + pluslen > sc->sc_if.if_mtu)
2306 		pfsync_sendout();
2307 
2308 	sc->sc_plus = plus;
2309 	sc->sc_len += (sc->sc_pluslen = pluslen);
2310 
2311 	pfsync_sendout();
2312 }
2313 
2314 int
2315 pfsync_up(void)
2316 {
2317 	struct pfsync_softc *sc = pfsyncif;
2318 
2319 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2320 		return (0);
2321 
2322 	return (1);
2323 }
2324 
2325 int
2326 pfsync_state_in_use(struct pf_state *st)
2327 {
2328 	struct pfsync_softc *sc = pfsyncif;
2329 
2330 	if (sc == NULL)
2331 		return (0);
2332 
2333 	if (st->sync_state != PFSYNC_S_NONE ||
2334 	    st == sc->sc_bulk_next ||
2335 	    st == sc->sc_bulk_last)
2336 		return (1);
2337 
2338 	return (0);
2339 }
2340 
2341 void
2342 pfsync_timeout(void *arg)
2343 {
2344 	int s;
2345 
2346 	s = splsoftnet();
2347 	pfsync_sendout();
2348 	splx(s);
2349 }
2350 
2351 /* this is a softnet/netisr handler */
2352 void
2353 pfsyncintr(void)
2354 {
2355 	pfsync_sendout();
2356 }
2357 
2358 int
2359 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
2360     size_t newlen)
2361 {
2362 	/* All sysctl names at this level are terminal. */
2363 	if (namelen != 1)
2364 		return (ENOTDIR);
2365 
2366 	switch (name[0]) {
2367 	case PFSYNCCTL_STATS:
2368 		if (newp != NULL)
2369 			return (EPERM);
2370 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
2371 		    &pfsyncstats, sizeof(pfsyncstats)));
2372 	default:
2373 		return (ENOPROTOOPT);
2374 	}
2375 }
2376