xref: /openbsd-src/sys/net/if_pfsync.c (revision 5054e3e78af0749a9bb00ba9a024b3ee2d90290f)
1 /*	$OpenBSD: if_pfsync.c,v 1.131 2009/11/12 06:53:24 deraadt Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Michael Shalayeff
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  * THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
31  *
32  * Permission to use, copy, modify, and distribute this software for any
33  * purpose with or without fee is hereby granted, provided that the above
34  * copyright notice and this permission notice appear in all copies.
35  *
36  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
37  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
38  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
39  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
41  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
42  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43  */
44 
45 #include <sys/param.h>
46 #include <sys/proc.h>
47 #include <sys/systm.h>
48 #include <sys/time.h>
49 #include <sys/mbuf.h>
50 #include <sys/socket.h>
51 #include <sys/ioctl.h>
52 #include <sys/timeout.h>
53 #include <sys/kernel.h>
54 #include <sys/sysctl.h>
55 #include <sys/pool.h>
56 
57 #include <net/if.h>
58 #include <net/if_types.h>
59 #include <net/route.h>
60 #include <net/bpf.h>
61 #include <net/netisr.h>
62 #include <netinet/in.h>
63 #include <netinet/if_ether.h>
64 #include <netinet/tcp.h>
65 #include <netinet/tcp_seq.h>
66 
67 #ifdef	INET
68 #include <netinet/in_systm.h>
69 #include <netinet/in_var.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip_var.h>
72 #endif
73 
74 #ifdef INET6
75 #include <netinet6/nd6.h>
76 #endif /* INET6 */
77 
78 #include "carp.h"
79 #if NCARP > 0
80 #include <netinet/ip_carp.h>
81 #endif
82 
83 #include <net/pfvar.h>
84 #include <net/if_pfsync.h>
85 
86 #include "bpfilter.h"
87 #include "pfsync.h"
88 
89 #define PFSYNC_MINPKT ( \
90 	sizeof(struct ip) + \
91 	sizeof(struct pfsync_header) + \
92 	sizeof(struct pfsync_subheader))
93 
94 struct pfsync_pkt {
95 	struct ip *ip;
96 	struct in_addr src;
97 	u_int8_t flags;
98 };
99 
100 int	pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
101 	    struct pfsync_state_peer *);
102 
103 int	pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int);
104 int	pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int);
105 int	pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int);
106 int	pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int);
107 int	pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int);
108 int	pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int);
109 int	pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int);
110 int	pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int);
111 int	pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int);
112 int	pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int);
113 int	pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int);
114 
115 int	pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int);
116 
117 int	(*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = {
118 	pfsync_in_clr,			/* PFSYNC_ACT_CLR */
119 	pfsync_in_error,		/* PFSYNC_ACT_OINS */
120 	pfsync_in_iack,			/* PFSYNC_ACT_INS_ACK */
121 	pfsync_in_error,		/* PFSYNC_ACT_OUPD */
122 	pfsync_in_upd_c,		/* PFSYNC_ACT_UPD_C */
123 	pfsync_in_ureq,			/* PFSYNC_ACT_UPD_REQ */
124 	pfsync_in_del,			/* PFSYNC_ACT_DEL */
125 	pfsync_in_del_c,		/* PFSYNC_ACT_DEL_C */
126 	pfsync_in_error,		/* PFSYNC_ACT_INS_F */
127 	pfsync_in_error,		/* PFSYNC_ACT_DEL_F */
128 	pfsync_in_bus,			/* PFSYNC_ACT_BUS */
129 	pfsync_in_tdb,			/* PFSYNC_ACT_TDB */
130 	pfsync_in_eof,			/* PFSYNC_ACT_EOF */
131 	pfsync_in_ins,			/* PFSYNC_ACT_INS */
132 	pfsync_in_upd			/* PFSYNC_ACT_UPD */
133 };
134 
135 struct pfsync_q {
136 	void		(*write)(struct pf_state *, void *);
137 	size_t		len;
138 	u_int8_t	action;
139 };
140 
141 /* we have one of these for every PFSYNC_S_ */
142 void	pfsync_out_state(struct pf_state *, void *);
143 void	pfsync_out_iack(struct pf_state *, void *);
144 void	pfsync_out_upd_c(struct pf_state *, void *);
145 void	pfsync_out_del(struct pf_state *, void *);
146 
147 struct pfsync_q pfsync_qs[] = {
148 	{ pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
149 	{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
150 	{ pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C },
151 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
152 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD }
153 };
154 
155 void	pfsync_q_ins(struct pf_state *, int);
156 void	pfsync_q_del(struct pf_state *);
157 
158 struct pfsync_upd_req_item {
159 	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
160 	struct pfsync_upd_req			ur_msg;
161 };
162 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item);
163 
164 struct pfsync_deferral {
165 	TAILQ_ENTRY(pfsync_deferral)		 pd_entry;
166 	struct pf_state				*pd_st;
167 	struct mbuf				*pd_m;
168 	struct timeout				 pd_tmo;
169 };
170 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral);
171 
172 #define PFSYNC_PLSIZE	MAX(sizeof(struct pfsync_upd_req_item), \
173 			    sizeof(struct pfsync_deferral))
174 
175 void	pfsync_out_tdb(struct tdb *, void *);
176 
177 struct pfsync_softc {
178 	struct ifnet		 sc_if;
179 	struct ifnet		*sc_sync_if;
180 
181 	struct pool		 sc_pool;
182 
183 	struct ip_moptions	 sc_imo;
184 
185 	struct in_addr		 sc_sync_peer;
186 	u_int8_t		 sc_maxupdates;
187 
188 	struct ip		 sc_template;
189 
190 	struct pf_state_queue	 sc_qs[PFSYNC_S_COUNT];
191 	size_t			 sc_len;
192 
193 	struct pfsync_upd_reqs	 sc_upd_req_list;
194 
195 	int			 sc_defer;
196 	struct pfsync_deferrals	 sc_deferrals;
197 	u_int			 sc_deferred;
198 
199 	void			*sc_plus;
200 	size_t			 sc_pluslen;
201 
202 	u_int32_t		 sc_ureq_sent;
203 	int			 sc_bulk_tries;
204 	struct timeout		 sc_bulkfail_tmo;
205 
206 	u_int32_t		 sc_ureq_received;
207 	struct pf_state		*sc_bulk_next;
208 	struct pf_state		*sc_bulk_last;
209 	struct timeout		 sc_bulk_tmo;
210 
211 	TAILQ_HEAD(, tdb)	 sc_tdb_q;
212 
213 	struct timeout		 sc_tmo;
214 };
215 
216 struct pfsync_softc	*pfsyncif = NULL;
217 struct pfsyncstats	 pfsyncstats;
218 
219 void	pfsyncattach(int);
220 int	pfsync_clone_create(struct if_clone *, int);
221 int	pfsync_clone_destroy(struct ifnet *);
222 int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
223 	    struct pf_state_peer *);
224 void	pfsync_update_net_tdb(struct pfsync_tdb *);
225 int	pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
226 	    struct rtentry *);
227 int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
228 void	pfsyncstart(struct ifnet *);
229 
230 struct mbuf *pfsync_if_dequeue(struct ifnet *);
231 
232 void	pfsync_deferred(struct pf_state *, int);
233 void	pfsync_undefer(struct pfsync_deferral *, int);
234 void	pfsync_defer_tmo(void *);
235 
236 void	pfsync_request_update(u_int32_t, u_int64_t);
237 void	pfsync_update_state_req(struct pf_state *);
238 
239 void	pfsync_drop(struct pfsync_softc *);
240 void	pfsync_sendout(void);
241 void	pfsync_send_plus(void *, size_t);
242 void	pfsync_timeout(void *);
243 void	pfsync_tdb_timeout(void *);
244 
245 void	pfsync_bulk_start(void);
246 void	pfsync_bulk_status(u_int8_t);
247 void	pfsync_bulk_update(void *);
248 void	pfsync_bulk_fail(void *);
249 
250 #define PFSYNC_MAX_BULKTRIES	12
251 int	pfsync_sync_ok;
252 
253 struct if_clone	pfsync_cloner =
254     IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
255 
256 void
257 pfsyncattach(int npfsync)
258 {
259 	if_clone_attach(&pfsync_cloner);
260 }
261 int
262 pfsync_clone_create(struct if_clone *ifc, int unit)
263 {
264 	struct pfsync_softc *sc;
265 	struct ifnet *ifp;
266 	int q;
267 
268 	if (unit != 0)
269 		return (EINVAL);
270 
271 	pfsync_sync_ok = 1;
272 
273 	sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT | M_ZERO);
274 	if (sc == NULL)
275 		return (ENOMEM);
276 
277 	for (q = 0; q < PFSYNC_S_COUNT; q++)
278 		TAILQ_INIT(&sc->sc_qs[q]);
279 
280 	pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, 0, 0, "pfsync", NULL);
281 	TAILQ_INIT(&sc->sc_upd_req_list);
282 	TAILQ_INIT(&sc->sc_deferrals);
283 	sc->sc_deferred = 0;
284 
285 	TAILQ_INIT(&sc->sc_tdb_q);
286 
287 	sc->sc_len = PFSYNC_MINPKT;
288 	sc->sc_maxupdates = 128;
289 
290 	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
291 	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
292 	    M_WAITOK | M_ZERO);
293 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
294 
295 	ifp = &sc->sc_if;
296 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
297 	ifp->if_softc = sc;
298 	ifp->if_ioctl = pfsyncioctl;
299 	ifp->if_output = pfsyncoutput;
300 	ifp->if_start = pfsyncstart;
301 	ifp->if_type = IFT_PFSYNC;
302 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
303 	ifp->if_hdrlen = sizeof(struct pfsync_header);
304 	ifp->if_mtu = 1500; /* XXX */
305 	ifp->if_hardmtu = MCLBYTES; /* XXX */
306 	timeout_set(&sc->sc_tmo, pfsync_timeout, sc);
307 	timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
308 	timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
309 
310 	if_attach(ifp);
311 	if_alloc_sadl(ifp);
312 
313 #if NCARP > 0
314 	if_addgroup(ifp, "carp");
315 #endif
316 
317 #if NBPFILTER > 0
318 	bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
319 #endif
320 
321 	pfsyncif = sc;
322 
323 	return (0);
324 }
325 
326 int
327 pfsync_clone_destroy(struct ifnet *ifp)
328 {
329 	struct pfsync_softc *sc = ifp->if_softc;
330 
331 	timeout_del(&sc->sc_bulk_tmo);
332 	timeout_del(&sc->sc_tmo);
333 #if NCARP > 0
334 	if (!pfsync_sync_ok)
335 		carp_group_demote_adj(&sc->sc_if, -1);
336 #endif
337 #if NBPFILTER > 0
338 	bpfdetach(ifp);
339 #endif
340 	if_detach(ifp);
341 
342 	pfsync_drop(sc);
343 
344 	while (sc->sc_deferred > 0)
345 		pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
346 
347 	pool_destroy(&sc->sc_pool);
348 	free(sc->sc_imo.imo_membership, M_IPMOPTS);
349 	free(sc, M_DEVBUF);
350 
351 	pfsyncif = NULL;
352 
353 	return (0);
354 }
355 
356 struct mbuf *
357 pfsync_if_dequeue(struct ifnet *ifp)
358 {
359 	struct mbuf *m;
360 
361 	IF_DEQUEUE(&ifp->if_snd, m);
362 
363 	return (m);
364 }
365 
366 /*
367  * Start output on the pfsync interface.
368  */
369 void
370 pfsyncstart(struct ifnet *ifp)
371 {
372 	struct mbuf *m;
373 	int s;
374 
375 	s = splnet();
376 	while ((m = pfsync_if_dequeue(ifp)) != NULL) {
377 		IF_DROP(&ifp->if_snd);
378 		m_freem(m);
379 	}
380 	splx(s);
381 }
382 
383 int
384 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
385     struct pf_state_peer *d)
386 {
387 	if (s->scrub.scrub_flag && d->scrub == NULL) {
388 		d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO);
389 		if (d->scrub == NULL)
390 			return (ENOMEM);
391 	}
392 
393 	return (0);
394 }
395 
396 void
397 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
398 {
399 	bzero(sp, sizeof(struct pfsync_state));
400 
401 	/* copy from state key */
402 	sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
403 	sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
404 	sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
405 	sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
406 	sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain);
407 	sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
408 	sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
409 	sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
410 	sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
411 	sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain);
412 	sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]);
413 	sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]);
414 	sp->proto = st->key[PF_SK_WIRE]->proto;
415 	sp->af = st->key[PF_SK_WIRE]->af;
416 
417 	/* copy from state */
418 	strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
419 	bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
420 	sp->creation = htonl(time_second - st->creation);
421 	sp->expire = pf_state_expires(st);
422 	if (sp->expire <= time_second)
423 		sp->expire = htonl(0);
424 	else
425 		sp->expire = htonl(sp->expire - time_second);
426 
427 	sp->direction = st->direction;
428 	sp->log = st->log;
429 	sp->timeout = st->timeout;
430 	sp->state_flags = st->state_flags;
431 	if (st->src_node)
432 		sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
433 	if (st->nat_src_node)
434 		sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE;
435 
436 	bcopy(&st->id, &sp->id, sizeof(sp->id));
437 	sp->creatorid = st->creatorid;
438 	pf_state_peer_hton(&st->src, &sp->src);
439 	pf_state_peer_hton(&st->dst, &sp->dst);
440 
441 	if (st->rule.ptr == NULL)
442 		sp->rule = htonl(-1);
443 	else
444 		sp->rule = htonl(st->rule.ptr->nr);
445 	if (st->anchor.ptr == NULL)
446 		sp->anchor = htonl(-1);
447 	else
448 		sp->anchor = htonl(st->anchor.ptr->nr);
449 	if (st->nat_rule.ptr == NULL)
450 		sp->nat_rule = htonl(-1);
451 	else
452 		sp->nat_rule = htonl(st->nat_rule.ptr->nr);
453 
454 	pf_state_counter_hton(st->packets[0], sp->packets[0]);
455 	pf_state_counter_hton(st->packets[1], sp->packets[1]);
456 	pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
457 	pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
458 
459 	sp->max_mss = htons(st->max_mss);
460 	sp->min_ttl = st->min_ttl;
461 	sp->set_tos = st->set_tos;
462 }
463 
464 int
465 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
466 {
467 	struct pf_state	*st = NULL;
468 	struct pf_state_key *skw = NULL, *sks = NULL;
469 	struct pf_rule *r = NULL;
470 	struct pfi_kif	*kif;
471 	int pool_flags;
472 	int error;
473 
474 	if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) {
475 		printf("pfsync_state_import: invalid creator id:"
476 		    " %08x\n", ntohl(sp->creatorid));
477 		return (EINVAL);
478 	}
479 
480 	if ((kif = pfi_kif_get(sp->ifname)) == NULL) {
481 		if (pf_status.debug >= PF_DEBUG_MISC)
482 			printf("pfsync_state_import: "
483 			    "unknown interface: %s\n", sp->ifname);
484 		if (flags & PFSYNC_SI_IOCTL)
485 			return (EINVAL);
486 		return (0);	/* skip this state */
487 	}
488 
489 	/*
490 	 * If the ruleset checksums match or the state is coming from the ioctl,
491 	 * it's safe to associate the state with the rule of that number.
492 	 */
493 	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
494 	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
495 	    pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
496 		r = pf_main_ruleset.rules[
497 		    PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
498 	else
499 		r = &pf_default_rule;
500 
501 	if ((r->max_states && r->states_cur >= r->max_states))
502 		goto cleanup;
503 
504 	if (flags & PFSYNC_SI_IOCTL)
505 		pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO;
506 	else
507 		pool_flags = PR_LIMITFAIL | PR_ZERO;
508 
509 	if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL)
510 		goto cleanup;
511 
512 	if ((skw = pf_alloc_state_key(pool_flags)) == NULL)
513 		goto cleanup;
514 
515 	if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
516 	    &sp->key[PF_SK_STACK].addr[0], sp->af) ||
517 	    PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
518 	    &sp->key[PF_SK_STACK].addr[1], sp->af) ||
519 	    sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
520 	    sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] ||
521 	    sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) {
522 		if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
523 			goto cleanup;
524 	} else
525 		sks = skw;
526 
527 	/* allocate memory for scrub info */
528 	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
529 	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
530 		goto cleanup;
531 
532 	/* copy to state key(s) */
533 	skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
534 	skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
535 	skw->port[0] = sp->key[PF_SK_WIRE].port[0];
536 	skw->port[1] = sp->key[PF_SK_WIRE].port[1];
537 	skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain);
538 	skw->proto = sp->proto;
539 	skw->af = sp->af;
540 	if (sks != skw) {
541 		sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
542 		sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
543 		sks->port[0] = sp->key[PF_SK_STACK].port[0];
544 		sks->port[1] = sp->key[PF_SK_STACK].port[1];
545 		sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain);
546 		sks->proto = sp->proto;
547 		sks->af = sp->af;
548 	}
549 	st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]);
550 	st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]);
551 
552 	/* copy to state */
553 	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
554 	st->creation = time_second - ntohl(sp->creation);
555 	st->expire = time_second;
556 	if (sp->expire) {
557 		/* XXX No adaptive scaling. */
558 		st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire);
559 	}
560 
561 	st->expire = ntohl(sp->expire) + time_second;
562 	st->direction = sp->direction;
563 	st->log = sp->log;
564 	st->timeout = sp->timeout;
565 	st->state_flags = sp->state_flags;
566 	st->max_mss = ntohs(sp->max_mss);
567 	st->min_ttl = sp->min_ttl;
568 	st->set_tos = sp->set_tos;
569 
570 	bcopy(sp->id, &st->id, sizeof(st->id));
571 	st->creatorid = sp->creatorid;
572 	pf_state_peer_ntoh(&sp->src, &st->src);
573 	pf_state_peer_ntoh(&sp->dst, &st->dst);
574 
575 	st->rule.ptr = r;
576 	st->nat_rule.ptr = NULL;
577 	st->anchor.ptr = NULL;
578 	st->rt_kif = NULL;
579 
580 	st->pfsync_time = time_uptime;
581 	st->sync_state = PFSYNC_S_NONE;
582 
583 	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
584 	r->states_cur++;
585 	r->states_tot++;
586 
587 	if (!ISSET(flags, PFSYNC_SI_IOCTL))
588 		SET(st->state_flags, PFSTATE_NOSYNC);
589 
590 	if (pf_state_insert(kif, skw, sks, st) != 0) {
591 		/* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
592 		r->states_cur--;
593 		error = EEXIST;
594 		goto cleanup_state;
595 	}
596 
597 	if (!ISSET(flags, PFSYNC_SI_IOCTL)) {
598 		CLR(st->state_flags, PFSTATE_NOSYNC);
599 		if (ISSET(st->state_flags, PFSTATE_ACK)) {
600 			pfsync_q_ins(st, PFSYNC_S_IACK);
601 			schednetisr(NETISR_PFSYNC);
602 		}
603 	}
604 	CLR(st->state_flags, PFSTATE_ACK);
605 
606 	return (0);
607 
608  cleanup:
609 	error = ENOMEM;
610 	if (skw == sks)
611 		sks = NULL;
612 	if (skw != NULL)
613 		pool_put(&pf_state_key_pl, skw);
614 	if (sks != NULL)
615 		pool_put(&pf_state_key_pl, sks);
616 
617  cleanup_state:	/* pf_state_insert frees the state keys */
618 	if (st) {
619 		if (st->dst.scrub)
620 			pool_put(&pf_state_scrub_pl, st->dst.scrub);
621 		if (st->src.scrub)
622 			pool_put(&pf_state_scrub_pl, st->src.scrub);
623 		pool_put(&pf_state_pl, st);
624 	}
625 	return (error);
626 }
627 
628 void
629 pfsync_input(struct mbuf *m, ...)
630 {
631 	struct pfsync_softc *sc = pfsyncif;
632 	struct pfsync_pkt pkt;
633 	struct ip *ip = mtod(m, struct ip *);
634 	struct pfsync_header *ph;
635 	struct pfsync_subheader subh;
636 
637 	int offset, len;
638 	int rv;
639 
640 	pfsyncstats.pfsyncs_ipackets++;
641 
642 	/* verify that we have a sync interface configured */
643 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
644 	    sc->sc_sync_if == NULL || !pf_status.running)
645 		goto done;
646 
647 	/* verify that the packet came in on the right interface */
648 	if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
649 		pfsyncstats.pfsyncs_badif++;
650 		goto done;
651 	}
652 
653 	sc->sc_if.if_ipackets++;
654 	sc->sc_if.if_ibytes += m->m_pkthdr.len;
655 
656 	/* verify that the IP TTL is 255. */
657 	if (ip->ip_ttl != PFSYNC_DFLTTL) {
658 		pfsyncstats.pfsyncs_badttl++;
659 		goto done;
660 	}
661 
662 	offset = ip->ip_hl << 2;
663 	if (m->m_pkthdr.len < offset + sizeof(*ph)) {
664 		pfsyncstats.pfsyncs_hdrops++;
665 		goto done;
666 	}
667 
668 	if (offset + sizeof(*ph) > m->m_len) {
669 		if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
670 			pfsyncstats.pfsyncs_hdrops++;
671 			return;
672 		}
673 		ip = mtod(m, struct ip *);
674 	}
675 	ph = (struct pfsync_header *)((char *)ip + offset);
676 
677 	/* verify the version */
678 	if (ph->version != PFSYNC_VERSION) {
679 		pfsyncstats.pfsyncs_badver++;
680 		goto done;
681 	}
682 	len = ntohs(ph->len) + offset;
683 	if (m->m_pkthdr.len < len) {
684 		pfsyncstats.pfsyncs_badlen++;
685 		goto done;
686 	}
687 
688 	/* Cheaper to grab this now than having to mess with mbufs later */
689 	pkt.ip = ip;
690 	pkt.src = ip->ip_src;
691 	pkt.flags = 0;
692 
693 	if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
694 		pkt.flags |= PFSYNC_SI_CKSUM;
695 
696 	offset += sizeof(*ph);
697 	while (offset <= len - sizeof(subh)) {
698 		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
699 		offset += sizeof(subh);
700 
701 		if (subh.action >= PFSYNC_ACT_MAX ||
702 		    subh.action >= nitems(pfsync_acts)) {
703 			pfsyncstats.pfsyncs_badact++;
704 			goto done;
705 		}
706 
707 		rv = (*pfsync_acts[subh.action])(&pkt, m, offset,
708 		    ntohs(subh.count));
709 		if (rv == -1)
710 			return;
711 
712 		offset += rv;
713 	}
714 
715 done:
716 	m_freem(m);
717 }
718 
719 int
720 pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
721 {
722 	struct pfsync_clr *clr;
723 	struct mbuf *mp;
724 	int len = sizeof(*clr) * count;
725 	int i, offp;
726 
727 	struct pf_state *st, *nexts;
728 	struct pf_state_key *sk, *nextsk;
729 	struct pf_state_item *si;
730 	u_int32_t creatorid;
731 	int s;
732 
733 	mp = m_pulldown(m, offset, len, &offp);
734 	if (mp == NULL) {
735 		pfsyncstats.pfsyncs_badlen++;
736 		return (-1);
737 	}
738 	clr = (struct pfsync_clr *)(mp->m_data + offp);
739 
740 	s = splsoftnet();
741 	for (i = 0; i < count; i++) {
742 		creatorid = clr[i].creatorid;
743 
744 		if (clr[i].ifname[0] == '\0') {
745 			for (st = RB_MIN(pf_state_tree_id, &tree_id);
746 			    st; st = nexts) {
747 				nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
748 				if (st->creatorid == creatorid) {
749 					SET(st->state_flags, PFSTATE_NOSYNC);
750 					pf_unlink_state(st);
751 				}
752 			}
753 		} else {
754 			if (pfi_kif_get(clr[i].ifname) == NULL)
755 				continue;
756 
757 			/* XXX correct? */
758 			for (sk = RB_MIN(pf_state_tree, &pf_statetbl);
759 			    sk; sk = nextsk) {
760 				nextsk = RB_NEXT(pf_state_tree,
761 				    &pf_statetbl, sk);
762 				TAILQ_FOREACH(si, &sk->states, entry) {
763 					if (si->s->creatorid == creatorid) {
764 						SET(si->s->state_flags,
765 						    PFSTATE_NOSYNC);
766 						pf_unlink_state(si->s);
767 					}
768 				}
769 			}
770 		}
771 	}
772 	splx(s);
773 
774 	return (len);
775 }
776 
777 int
778 pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
779 {
780 	struct mbuf *mp;
781 	struct pfsync_state *sa, *sp;
782 	int len = sizeof(*sp) * count;
783 	int i, offp;
784 
785 	int s;
786 
787 	mp = m_pulldown(m, offset, len, &offp);
788 	if (mp == NULL) {
789 		pfsyncstats.pfsyncs_badlen++;
790 		return (-1);
791 	}
792 	sa = (struct pfsync_state *)(mp->m_data + offp);
793 
794 	s = splsoftnet();
795 	for (i = 0; i < count; i++) {
796 		sp = &sa[i];
797 
798 		/* check for invalid values */
799 		if (sp->timeout >= PFTM_MAX ||
800 		    sp->src.state > PF_TCPS_PROXY_DST ||
801 		    sp->dst.state > PF_TCPS_PROXY_DST ||
802 		    sp->direction > PF_OUT ||
803 		    (sp->af != AF_INET && sp->af != AF_INET6)) {
804 			if (pf_status.debug >= PF_DEBUG_MISC) {
805 				printf("pfsync_input: PFSYNC5_ACT_INS: "
806 				    "invalid value\n");
807 			}
808 			pfsyncstats.pfsyncs_badval++;
809 			continue;
810 		}
811 
812 		if (pfsync_state_import(sp, pkt->flags) == ENOMEM) {
813 			/* drop out, but process the rest of the actions */
814 			break;
815 		}
816 	}
817 	splx(s);
818 
819 	return (len);
820 }
821 
822 int
823 pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
824 {
825 	struct pfsync_ins_ack *ia, *iaa;
826 	struct pf_state_cmp id_key;
827 	struct pf_state *st;
828 
829 	struct mbuf *mp;
830 	int len = count * sizeof(*ia);
831 	int offp, i;
832 	int s;
833 
834 	mp = m_pulldown(m, offset, len, &offp);
835 	if (mp == NULL) {
836 		pfsyncstats.pfsyncs_badlen++;
837 		return (-1);
838 	}
839 	iaa = (struct pfsync_ins_ack *)(mp->m_data + offp);
840 
841 	s = splsoftnet();
842 	for (i = 0; i < count; i++) {
843 		ia = &iaa[i];
844 
845 		bcopy(&ia->id, &id_key.id, sizeof(id_key.id));
846 		id_key.creatorid = ia->creatorid;
847 
848 		st = pf_find_state_byid(&id_key);
849 		if (st == NULL)
850 			continue;
851 
852 		if (ISSET(st->state_flags, PFSTATE_ACK))
853 			pfsync_deferred(st, 0);
854 	}
855 	splx(s);
856 
857 	return (len);
858 }
859 
860 int
861 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
862     struct pfsync_state_peer *dst)
863 {
864 	int sync = 0;
865 
866 	/*
867 	 * The state should never go backwards except
868 	 * for syn-proxy states.  Neither should the
869 	 * sequence window slide backwards.
870 	 */
871 	if ((st->src.state > src->state &&
872 	    (st->src.state < PF_TCPS_PROXY_SRC ||
873 	    src->state >= PF_TCPS_PROXY_SRC)) ||
874 
875 	    (st->src.state == src->state &&
876 	    SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
877 		sync++;
878 	else
879 		pf_state_peer_ntoh(src, &st->src);
880 
881 	if ((st->dst.state > dst->state) ||
882 
883 	    (st->dst.state >= TCPS_SYN_SENT &&
884 	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
885 		sync++;
886 	else
887 		pf_state_peer_ntoh(dst, &st->dst);
888 
889 	return (sync);
890 }
891 
892 int
893 pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
894 {
895 	struct pfsync_state *sa, *sp;
896 	struct pf_state_cmp id_key;
897 	struct pf_state *st;
898 	int sync;
899 
900 	struct mbuf *mp;
901 	int len = count * sizeof(*sp);
902 	int offp, i;
903 	int s;
904 
905 	mp = m_pulldown(m, offset, len, &offp);
906 	if (mp == NULL) {
907 		pfsyncstats.pfsyncs_badlen++;
908 		return (-1);
909 	}
910 	sa = (struct pfsync_state *)(mp->m_data + offp);
911 
912 	s = splsoftnet();
913 	for (i = 0; i < count; i++) {
914 		sp = &sa[i];
915 
916 		/* check for invalid values */
917 		if (sp->timeout >= PFTM_MAX ||
918 		    sp->src.state > PF_TCPS_PROXY_DST ||
919 		    sp->dst.state > PF_TCPS_PROXY_DST) {
920 			if (pf_status.debug >= PF_DEBUG_MISC) {
921 				printf("pfsync_input: PFSYNC_ACT_UPD: "
922 				    "invalid value\n");
923 			}
924 			pfsyncstats.pfsyncs_badval++;
925 			continue;
926 		}
927 
928 		bcopy(sp->id, &id_key.id, sizeof(id_key.id));
929 		id_key.creatorid = sp->creatorid;
930 
931 		st = pf_find_state_byid(&id_key);
932 		if (st == NULL) {
933 			/* insert the update */
934 			if (pfsync_state_import(sp, 0))
935 				pfsyncstats.pfsyncs_badstate++;
936 			continue;
937 		}
938 
939 		if (ISSET(st->state_flags, PFSTATE_ACK))
940 			pfsync_deferred(st, 1);
941 
942 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
943 			sync = pfsync_upd_tcp(st, &sp->src, &sp->dst);
944 		else {
945 			sync = 0;
946 
947 			/*
948 			 * Non-TCP protocol state machine always go
949 			 * forwards
950 			 */
951 			if (st->src.state > sp->src.state)
952 				sync++;
953 			else
954 				pf_state_peer_ntoh(&sp->src, &st->src);
955 
956 			if (st->dst.state > sp->dst.state)
957 				sync++;
958 			else
959 				pf_state_peer_ntoh(&sp->dst, &st->dst);
960 		}
961 
962 		if (sync < 2) {
963 			pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
964 			pf_state_peer_ntoh(&sp->dst, &st->dst);
965 			st->expire = ntohl(sp->expire) + time_second;
966 			st->timeout = sp->timeout;
967 		}
968 		st->pfsync_time = time_uptime;
969 
970 		if (sync) {
971 			pfsyncstats.pfsyncs_stale++;
972 
973 			pfsync_update_state(st);
974 			schednetisr(NETISR_PFSYNC);
975 		}
976 	}
977 	splx(s);
978 
979 	return (len);
980 }
981 
982 int
983 pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
984 {
985 	struct pfsync_upd_c *ua, *up;
986 	struct pf_state_cmp id_key;
987 	struct pf_state *st;
988 
989 	int len = count * sizeof(*up);
990 	int sync;
991 
992 	struct mbuf *mp;
993 	int offp, i;
994 	int s;
995 
996 	mp = m_pulldown(m, offset, len, &offp);
997 	if (mp == NULL) {
998 		pfsyncstats.pfsyncs_badlen++;
999 		return (-1);
1000 	}
1001 	ua = (struct pfsync_upd_c *)(mp->m_data + offp);
1002 
1003 	s = splsoftnet();
1004 	for (i = 0; i < count; i++) {
1005 		up = &ua[i];
1006 
1007 		/* check for invalid values */
1008 		if (up->timeout >= PFTM_MAX ||
1009 		    up->src.state > PF_TCPS_PROXY_DST ||
1010 		    up->dst.state > PF_TCPS_PROXY_DST) {
1011 			if (pf_status.debug >= PF_DEBUG_MISC) {
1012 				printf("pfsync_input: "
1013 				    "PFSYNC_ACT_UPD_C: "
1014 				    "invalid value\n");
1015 			}
1016 			pfsyncstats.pfsyncs_badval++;
1017 			continue;
1018 		}
1019 
1020 		bcopy(&up->id, &id_key.id, sizeof(id_key.id));
1021 		id_key.creatorid = up->creatorid;
1022 
1023 		st = pf_find_state_byid(&id_key);
1024 		if (st == NULL) {
1025 			/* We don't have this state. Ask for it. */
1026 			pfsync_request_update(id_key.creatorid, id_key.id);
1027 			continue;
1028 		}
1029 
1030 		if (ISSET(st->state_flags, PFSTATE_ACK))
1031 			pfsync_deferred(st, 1);
1032 
1033 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
1034 			sync = pfsync_upd_tcp(st, &up->src, &up->dst);
1035 		else {
1036 			sync = 0;
1037 			/*
1038 			 * Non-TCP protocol state machine always go
1039 			 * forwards
1040 			 */
1041 			if (st->src.state > up->src.state)
1042 				sync++;
1043 			else
1044 				pf_state_peer_ntoh(&up->src, &st->src);
1045 
1046 			if (st->dst.state > up->dst.state)
1047 				sync++;
1048 			else
1049 				pf_state_peer_ntoh(&up->dst, &st->dst);
1050 		}
1051 		if (sync < 2) {
1052 			pfsync_alloc_scrub_memory(&up->dst, &st->dst);
1053 			pf_state_peer_ntoh(&up->dst, &st->dst);
1054 			st->expire = ntohl(up->expire) + time_second;
1055 			st->timeout = up->timeout;
1056 		}
1057 		st->pfsync_time = time_uptime;
1058 
1059 		if (sync) {
1060 			pfsyncstats.pfsyncs_stale++;
1061 
1062 			pfsync_update_state(st);
1063 			schednetisr(NETISR_PFSYNC);
1064 		}
1065 	}
1066 	splx(s);
1067 
1068 	return (len);
1069 }
1070 
1071 int
1072 pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1073 {
1074 	struct pfsync_upd_req *ur, *ura;
1075 	struct mbuf *mp;
1076 	int len = count * sizeof(*ur);
1077 	int i, offp;
1078 
1079 	struct pf_state_cmp id_key;
1080 	struct pf_state *st;
1081 
1082 	mp = m_pulldown(m, offset, len, &offp);
1083 	if (mp == NULL) {
1084 		pfsyncstats.pfsyncs_badlen++;
1085 		return (-1);
1086 	}
1087 	ura = (struct pfsync_upd_req *)(mp->m_data + offp);
1088 
1089 	for (i = 0; i < count; i++) {
1090 		ur = &ura[i];
1091 
1092 		bcopy(&ur->id, &id_key.id, sizeof(id_key.id));
1093 		id_key.creatorid = ur->creatorid;
1094 
1095 		if (id_key.id == 0 && id_key.creatorid == 0)
1096 			pfsync_bulk_start();
1097 		else {
1098 			st = pf_find_state_byid(&id_key);
1099 			if (st == NULL) {
1100 				pfsyncstats.pfsyncs_badstate++;
1101 				continue;
1102 			}
1103 			if (ISSET(st->state_flags, PFSTATE_NOSYNC))
1104 				continue;
1105 
1106 			pfsync_update_state_req(st);
1107 		}
1108 	}
1109 
1110 	return (len);
1111 }
1112 
1113 int
1114 pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1115 {
1116 	struct mbuf *mp;
1117 	struct pfsync_state *sa, *sp;
1118 	struct pf_state_cmp id_key;
1119 	struct pf_state *st;
1120 	int len = count * sizeof(*sp);
1121 	int offp, i;
1122 	int s;
1123 
1124 	mp = m_pulldown(m, offset, len, &offp);
1125 	if (mp == NULL) {
1126 		pfsyncstats.pfsyncs_badlen++;
1127 		return (-1);
1128 	}
1129 	sa = (struct pfsync_state *)(mp->m_data + offp);
1130 
1131 	s = splsoftnet();
1132 	for (i = 0; i < count; i++) {
1133 		sp = &sa[i];
1134 
1135 		bcopy(sp->id, &id_key.id, sizeof(id_key.id));
1136 		id_key.creatorid = sp->creatorid;
1137 
1138 		st = pf_find_state_byid(&id_key);
1139 		if (st == NULL) {
1140 			pfsyncstats.pfsyncs_badstate++;
1141 			continue;
1142 		}
1143 		SET(st->state_flags, PFSTATE_NOSYNC);
1144 		pf_unlink_state(st);
1145 	}
1146 	splx(s);
1147 
1148 	return (len);
1149 }
1150 
1151 int
1152 pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1153 {
1154 	struct mbuf *mp;
1155 	struct pfsync_del_c *sa, *sp;
1156 	struct pf_state_cmp id_key;
1157 	struct pf_state *st;
1158 	int len = count * sizeof(*sp);
1159 	int offp, i;
1160 	int s;
1161 
1162 	mp = m_pulldown(m, offset, len, &offp);
1163 	if (mp == NULL) {
1164 		pfsyncstats.pfsyncs_badlen++;
1165 		return (-1);
1166 	}
1167 	sa = (struct pfsync_del_c *)(mp->m_data + offp);
1168 
1169 	s = splsoftnet();
1170 	for (i = 0; i < count; i++) {
1171 		sp = &sa[i];
1172 
1173 		bcopy(&sp->id, &id_key.id, sizeof(id_key.id));
1174 		id_key.creatorid = sp->creatorid;
1175 
1176 		st = pf_find_state_byid(&id_key);
1177 		if (st == NULL) {
1178 			pfsyncstats.pfsyncs_badstate++;
1179 			continue;
1180 		}
1181 
1182 		SET(st->state_flags, PFSTATE_NOSYNC);
1183 		pf_unlink_state(st);
1184 	}
1185 	splx(s);
1186 
1187 	return (len);
1188 }
1189 
1190 int
1191 pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1192 {
1193 	struct pfsync_softc *sc = pfsyncif;
1194 	struct pfsync_bus *bus;
1195 	struct mbuf *mp;
1196 	int len = count * sizeof(*bus);
1197 	int offp;
1198 
1199 	/* If we're not waiting for a bulk update, who cares. */
1200 	if (sc->sc_ureq_sent == 0)
1201 		return (len);
1202 
1203 	mp = m_pulldown(m, offset, len, &offp);
1204 	if (mp == NULL) {
1205 		pfsyncstats.pfsyncs_badlen++;
1206 		return (-1);
1207 	}
1208 	bus = (struct pfsync_bus *)(mp->m_data + offp);
1209 
1210 	switch (bus->status) {
1211 	case PFSYNC_BUS_START:
1212 		timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1213 		    pf_pool_limits[PF_LIMIT_STATES].limit /
1214 		    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1215 		    sizeof(struct pfsync_state)));
1216 		if (pf_status.debug >= PF_DEBUG_MISC)
1217 			printf("pfsync: received bulk update start\n");
1218 		break;
1219 
1220 	case PFSYNC_BUS_END:
1221 		if (time_uptime - ntohl(bus->endtime) >=
1222 		    sc->sc_ureq_sent) {
1223 			/* that's it, we're happy */
1224 			sc->sc_ureq_sent = 0;
1225 			sc->sc_bulk_tries = 0;
1226 			timeout_del(&sc->sc_bulkfail_tmo);
1227 #if NCARP > 0
1228 			if (!pfsync_sync_ok)
1229 				carp_group_demote_adj(&sc->sc_if, -1);
1230 #endif
1231 			pfsync_sync_ok = 1;
1232 			if (pf_status.debug >= PF_DEBUG_MISC)
1233 				printf("pfsync: received valid "
1234 				    "bulk update end\n");
1235 		} else {
1236 			if (pf_status.debug >= PF_DEBUG_MISC)
1237 				printf("pfsync: received invalid "
1238 				    "bulk update end: bad timestamp\n");
1239 		}
1240 		break;
1241 	}
1242 
1243 	return (len);
1244 }
1245 
1246 int
1247 pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1248 {
1249 	int len = count * sizeof(struct pfsync_tdb);
1250 
1251 #if defined(IPSEC)
1252 	struct pfsync_tdb *tp;
1253 	struct mbuf *mp;
1254 	int offp;
1255 	int i;
1256 	int s;
1257 
1258 	mp = m_pulldown(m, offset, len, &offp);
1259 	if (mp == NULL) {
1260 		pfsyncstats.pfsyncs_badlen++;
1261 		return (-1);
1262 	}
1263 	tp = (struct pfsync_tdb *)(mp->m_data + offp);
1264 
1265 	s = splsoftnet();
1266 	for (i = 0; i < count; i++)
1267 		pfsync_update_net_tdb(&tp[i]);
1268 	splx(s);
1269 #endif
1270 
1271 	return (len);
1272 }
1273 
1274 #if defined(IPSEC)
1275 /* Update an in-kernel tdb. Silently fail if no tdb is found. */
1276 void
1277 pfsync_update_net_tdb(struct pfsync_tdb *pt)
1278 {
1279 	struct tdb		*tdb;
1280 	int			 s;
1281 
1282 	/* check for invalid values */
1283 	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
1284 	    (pt->dst.sa.sa_family != AF_INET &&
1285 	     pt->dst.sa.sa_family != AF_INET6))
1286 		goto bad;
1287 
1288 	s = spltdb();
1289 	tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
1290 	if (tdb) {
1291 		pt->rpl = ntohl(pt->rpl);
1292 		pt->cur_bytes = betoh64(pt->cur_bytes);
1293 
1294 		/* Neither replay nor byte counter should ever decrease. */
1295 		if (pt->rpl < tdb->tdb_rpl ||
1296 		    pt->cur_bytes < tdb->tdb_cur_bytes) {
1297 			splx(s);
1298 			goto bad;
1299 		}
1300 
1301 		tdb->tdb_rpl = pt->rpl;
1302 		tdb->tdb_cur_bytes = pt->cur_bytes;
1303 	}
1304 	splx(s);
1305 	return;
1306 
1307  bad:
1308 	if (pf_status.debug >= PF_DEBUG_MISC)
1309 		printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
1310 		    "invalid value\n");
1311 	pfsyncstats.pfsyncs_badstate++;
1312 	return;
1313 }
1314 #endif
1315 
1316 
1317 int
1318 pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1319 {
1320 	/* check if we are at the right place in the packet */
1321 	if (offset != m->m_pkthdr.len)
1322 		pfsyncstats.pfsyncs_badlen++;
1323 
1324 	/* we're done. free and let the caller return */
1325 	m_freem(m);
1326 	return (-1);
1327 }
1328 
1329 int
1330 pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1331 {
1332 	pfsyncstats.pfsyncs_badact++;
1333 
1334 	m_freem(m);
1335 	return (-1);
1336 }
1337 
1338 int
1339 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1340 	struct rtentry *rt)
1341 {
1342 	m_freem(m);
1343 	return (0);
1344 }
1345 
1346 /* ARGSUSED */
1347 int
1348 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1349 {
1350 	struct proc *p = curproc;
1351 	struct pfsync_softc *sc = ifp->if_softc;
1352 	struct ifreq *ifr = (struct ifreq *)data;
1353 	struct ip_moptions *imo = &sc->sc_imo;
1354 	struct pfsyncreq pfsyncr;
1355 	struct ifnet    *sifp;
1356 	struct ip *ip;
1357 	int s, error;
1358 
1359 	switch (cmd) {
1360 #if 0
1361 	case SIOCSIFADDR:
1362 	case SIOCAIFADDR:
1363 	case SIOCSIFDSTADDR:
1364 #endif
1365 	case SIOCSIFFLAGS:
1366 		s = splnet();
1367 		if (ifp->if_flags & IFF_UP)
1368 			ifp->if_flags |= IFF_RUNNING;
1369 		else {
1370 			ifp->if_flags &= ~IFF_RUNNING;
1371 
1372 			/* drop everything */
1373 			timeout_del(&sc->sc_tmo);
1374 			pfsync_drop(sc);
1375 
1376 			/* cancel bulk update */
1377 			timeout_del(&sc->sc_bulk_tmo);
1378 			sc->sc_bulk_next = NULL;
1379 			sc->sc_bulk_last = NULL;
1380 		}
1381 		splx(s);
1382 		break;
1383 	case SIOCSIFMTU:
1384 		s = splnet();
1385 		if (ifr->ifr_mtu <= PFSYNC_MINPKT)
1386 			return (EINVAL);
1387 		if (ifr->ifr_mtu > MCLBYTES) /* XXX could be bigger */
1388 			ifr->ifr_mtu = MCLBYTES;
1389 		if (ifr->ifr_mtu < ifp->if_mtu)
1390 			pfsync_sendout();
1391 		ifp->if_mtu = ifr->ifr_mtu;
1392 		splx(s);
1393 		break;
1394 	case SIOCGETPFSYNC:
1395 		bzero(&pfsyncr, sizeof(pfsyncr));
1396 		if (sc->sc_sync_if) {
1397 			strlcpy(pfsyncr.pfsyncr_syncdev,
1398 			    sc->sc_sync_if->if_xname, IFNAMSIZ);
1399 		}
1400 		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1401 		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1402 		pfsyncr.pfsyncr_defer = sc->sc_defer;
1403 		return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
1404 
1405 	case SIOCSETPFSYNC:
1406 		if ((error = suser(p, p->p_acflag)) != 0)
1407 			return (error);
1408 		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
1409 			return (error);
1410 
1411 		s = splnet();
1412 
1413 		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1414 			sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1415 		else
1416 			sc->sc_sync_peer.s_addr =
1417 			    pfsyncr.pfsyncr_syncpeer.s_addr;
1418 
1419 		if (pfsyncr.pfsyncr_maxupdates > 255) {
1420 			splx(s);
1421 			return (EINVAL);
1422 		}
1423 		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1424 
1425 		sc->sc_defer = pfsyncr.pfsyncr_defer;
1426 
1427 		if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1428 			sc->sc_sync_if = NULL;
1429 			if (imo->imo_num_memberships > 0) {
1430 				in_delmulti(imo->imo_membership[
1431 				    --imo->imo_num_memberships]);
1432 				imo->imo_multicast_ifp = NULL;
1433 			}
1434 			splx(s);
1435 			break;
1436 		}
1437 
1438 		if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) {
1439 			splx(s);
1440 			return (EINVAL);
1441 		}
1442 
1443 		if (sifp->if_mtu < sc->sc_if.if_mtu ||
1444 		    (sc->sc_sync_if != NULL &&
1445 		    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
1446 		    sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1447 			pfsync_sendout();
1448 		sc->sc_sync_if = sifp;
1449 
1450 		if (imo->imo_num_memberships > 0) {
1451 			in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1452 			imo->imo_multicast_ifp = NULL;
1453 		}
1454 
1455 		if (sc->sc_sync_if &&
1456 		    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1457 			struct in_addr addr;
1458 
1459 			if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) {
1460 				sc->sc_sync_if = NULL;
1461 				splx(s);
1462 				return (EADDRNOTAVAIL);
1463 			}
1464 
1465 			addr.s_addr = INADDR_PFSYNC_GROUP;
1466 
1467 			if ((imo->imo_membership[0] =
1468 			    in_addmulti(&addr, sc->sc_sync_if)) == NULL) {
1469 				sc->sc_sync_if = NULL;
1470 				splx(s);
1471 				return (ENOBUFS);
1472 			}
1473 			imo->imo_num_memberships++;
1474 			imo->imo_multicast_ifp = sc->sc_sync_if;
1475 			imo->imo_multicast_ttl = PFSYNC_DFLTTL;
1476 			imo->imo_multicast_loop = 0;
1477 		}
1478 
1479 		ip = &sc->sc_template;
1480 		bzero(ip, sizeof(*ip));
1481 		ip->ip_v = IPVERSION;
1482 		ip->ip_hl = sizeof(sc->sc_template) >> 2;
1483 		ip->ip_tos = IPTOS_LOWDELAY;
1484 		/* len and id are set later */
1485 		ip->ip_off = htons(IP_DF);
1486 		ip->ip_ttl = PFSYNC_DFLTTL;
1487 		ip->ip_p = IPPROTO_PFSYNC;
1488 		ip->ip_src.s_addr = INADDR_ANY;
1489 		ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
1490 
1491 		if (sc->sc_sync_if) {
1492 			/* Request a full state table update. */
1493 			sc->sc_ureq_sent = time_uptime;
1494 #if NCARP > 0
1495 			if (pfsync_sync_ok)
1496 				carp_group_demote_adj(&sc->sc_if, 1);
1497 #endif
1498 			pfsync_sync_ok = 0;
1499 			if (pf_status.debug >= PF_DEBUG_MISC)
1500 				printf("pfsync: requesting bulk update\n");
1501 			timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1502 			    pf_pool_limits[PF_LIMIT_STATES].limit /
1503 			    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1504 			    sizeof(struct pfsync_state)));
1505 			pfsync_request_update(0, 0);
1506 		}
1507 		splx(s);
1508 
1509 		break;
1510 
1511 	default:
1512 		return (ENOTTY);
1513 	}
1514 
1515 	return (0);
1516 }
1517 
1518 void
1519 pfsync_out_state(struct pf_state *st, void *buf)
1520 {
1521 	struct pfsync_state *sp = buf;
1522 
1523 	pfsync_state_export(sp, st);
1524 }
1525 
1526 void
1527 pfsync_out_iack(struct pf_state *st, void *buf)
1528 {
1529 	struct pfsync_ins_ack *iack = buf;
1530 
1531 	iack->id = st->id;
1532 	iack->creatorid = st->creatorid;
1533 }
1534 
1535 void
1536 pfsync_out_upd_c(struct pf_state *st, void *buf)
1537 {
1538 	struct pfsync_upd_c *up = buf;
1539 
1540 	up->id = st->id;
1541 	pf_state_peer_hton(&st->src, &up->src);
1542 	pf_state_peer_hton(&st->dst, &up->dst);
1543 	up->creatorid = st->creatorid;
1544 
1545 	up->expire = pf_state_expires(st);
1546 	if (up->expire <= time_second)
1547 		up->expire = htonl(0);
1548 	else
1549 		up->expire = htonl(up->expire - time_second);
1550 	up->timeout = st->timeout;
1551 
1552 	bzero(up->_pad, sizeof(up->_pad)); /* XXX */
1553 }
1554 
1555 void
1556 pfsync_out_del(struct pf_state *st, void *buf)
1557 {
1558 	struct pfsync_del_c *dp = buf;
1559 
1560 	dp->id = st->id;
1561 	dp->creatorid = st->creatorid;
1562 
1563 	SET(st->state_flags, PFSTATE_NOSYNC);
1564 }
1565 
1566 void
1567 pfsync_drop(struct pfsync_softc *sc)
1568 {
1569 	struct pf_state *st;
1570 	struct pfsync_upd_req_item *ur;
1571 	struct tdb *t;
1572 	int q;
1573 
1574 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1575 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
1576 			continue;
1577 
1578 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
1579 #ifdef PFSYNC_DEBUG
1580 			KASSERT(st->sync_state == q);
1581 #endif
1582 			st->sync_state = PFSYNC_S_NONE;
1583 		}
1584 		TAILQ_INIT(&sc->sc_qs[q]);
1585 	}
1586 
1587 	while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1588 		TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1589 		pool_put(&sc->sc_pool, ur);
1590 	}
1591 
1592 	sc->sc_plus = NULL;
1593 
1594 	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
1595 		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry)
1596 			CLR(t->tdb_flags, TDBF_PFSYNC);
1597 
1598 		TAILQ_INIT(&sc->sc_tdb_q);
1599 	}
1600 
1601 	sc->sc_len = PFSYNC_MINPKT;
1602 }
1603 
1604 void
1605 pfsync_sendout(void)
1606 {
1607 	struct pfsync_softc *sc = pfsyncif;
1608 #if NBPFILTER > 0
1609 	struct ifnet *ifp = &sc->sc_if;
1610 #endif
1611 	struct mbuf *m;
1612 	struct ip *ip;
1613 	struct pfsync_header *ph;
1614 	struct pfsync_subheader *subh;
1615 	struct pf_state *st;
1616 	struct pfsync_upd_req_item *ur;
1617 	struct tdb *t;
1618 
1619 	int offset;
1620 	int q, count = 0;
1621 
1622 	if (sc == NULL || sc->sc_len == PFSYNC_MINPKT)
1623 		return;
1624 
1625 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
1626 #if NBPFILTER > 0
1627 	    (ifp->if_bpf == NULL && sc->sc_sync_if == NULL)) {
1628 #else
1629 	    sc->sc_sync_if == NULL) {
1630 #endif
1631 		pfsync_drop(sc);
1632 		return;
1633 	}
1634 
1635 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1636 	if (m == NULL) {
1637 		sc->sc_if.if_oerrors++;
1638 		pfsyncstats.pfsyncs_onomem++;
1639 		pfsync_drop(sc);
1640 		return;
1641 	}
1642 
1643 	if (max_linkhdr + sc->sc_len > MHLEN) {
1644 		MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len);
1645 		if (!ISSET(m->m_flags, M_EXT)) {
1646 			m_free(m);
1647 			sc->sc_if.if_oerrors++;
1648 			pfsyncstats.pfsyncs_onomem++;
1649 			pfsync_drop(sc);
1650 			return;
1651 		}
1652 	}
1653 	m->m_data += max_linkhdr;
1654 	m->m_len = m->m_pkthdr.len = sc->sc_len;
1655 
1656 	/* build the ip header */
1657 	ip = (struct ip *)m->m_data;
1658 	bcopy(&sc->sc_template, ip, sizeof(*ip));
1659 	offset = sizeof(*ip);
1660 
1661 	ip->ip_len = htons(m->m_pkthdr.len);
1662 	ip->ip_id = htons(ip_randomid());
1663 
1664 	/* build the pfsync header */
1665 	ph = (struct pfsync_header *)(m->m_data + offset);
1666 	bzero(ph, sizeof(*ph));
1667 	offset += sizeof(*ph);
1668 
1669 	ph->version = PFSYNC_VERSION;
1670 	ph->len = htons(sc->sc_len - sizeof(*ip));
1671 	bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
1672 
1673 	if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
1674 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1675 		offset += sizeof(*subh);
1676 
1677 		count = 0;
1678 		while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1679 			TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1680 
1681 			bcopy(&ur->ur_msg, m->m_data + offset,
1682 			    sizeof(ur->ur_msg));
1683 			offset += sizeof(ur->ur_msg);
1684 
1685 			pool_put(&sc->sc_pool, ur);
1686 
1687 			count++;
1688 		}
1689 
1690 		bzero(subh, sizeof(*subh));
1691 		subh->action = PFSYNC_ACT_UPD_REQ;
1692 		subh->count = htons(count);
1693 	}
1694 
1695 	/* has someone built a custom region for us to add? */
1696 	if (sc->sc_plus != NULL) {
1697 		bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
1698 		offset += sc->sc_pluslen;
1699 
1700 		sc->sc_plus = NULL;
1701 	}
1702 
1703 	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
1704 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1705 		offset += sizeof(*subh);
1706 
1707 		count = 0;
1708 		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) {
1709 			pfsync_out_tdb(t, m->m_data + offset);
1710 			offset += sizeof(struct pfsync_tdb);
1711 			CLR(t->tdb_flags, TDBF_PFSYNC);
1712 
1713 			count++;
1714 		}
1715 		TAILQ_INIT(&sc->sc_tdb_q);
1716 
1717 		bzero(subh, sizeof(*subh));
1718 		subh->action = PFSYNC_ACT_TDB;
1719 		subh->count = htons(count);
1720 	}
1721 
1722 	/* walk the queues */
1723 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1724 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
1725 			continue;
1726 
1727 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1728 		offset += sizeof(*subh);
1729 
1730 		count = 0;
1731 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
1732 #ifdef PFSYNC_DEBUG
1733 			KASSERT(st->sync_state == q);
1734 #endif
1735 			pfsync_qs[q].write(st, m->m_data + offset);
1736 			offset += pfsync_qs[q].len;
1737 
1738 			st->sync_state = PFSYNC_S_NONE;
1739 			count++;
1740 		}
1741 		TAILQ_INIT(&sc->sc_qs[q]);
1742 
1743 		bzero(subh, sizeof(*subh));
1744 		subh->action = pfsync_qs[q].action;
1745 		subh->count = htons(count);
1746 	}
1747 
1748 	subh = (struct pfsync_subheader *)(m->m_data + offset);
1749 	offset += sizeof(*subh);
1750 
1751 	bzero(subh, sizeof(*subh));
1752 	subh->action = PFSYNC_ACT_EOF;
1753 	subh->count = htons(1);
1754 
1755 	/* we're done, let's put it on the wire */
1756 #if NBPFILTER > 0
1757 	if (ifp->if_bpf) {
1758 		m->m_data += sizeof(*ip);
1759 		m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
1760 		bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1761 		m->m_data -= sizeof(*ip);
1762 		m->m_len = m->m_pkthdr.len = sc->sc_len;
1763 	}
1764 
1765 	if (sc->sc_sync_if == NULL) {
1766 		sc->sc_len = PFSYNC_MINPKT;
1767 		m_freem(m);
1768 		return;
1769 	}
1770 #endif
1771 
1772 	sc->sc_if.if_opackets++;
1773 	sc->sc_if.if_obytes += m->m_pkthdr.len;
1774 
1775 	if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) == 0)
1776 		pfsyncstats.pfsyncs_opackets++;
1777 	else
1778 		pfsyncstats.pfsyncs_oerrors++;
1779 
1780 	/* start again */
1781 	sc->sc_len = PFSYNC_MINPKT;
1782 }
1783 
1784 void
1785 pfsync_insert_state(struct pf_state *st)
1786 {
1787 	struct pfsync_softc *sc = pfsyncif;
1788 
1789 	splsoftassert(IPL_SOFTNET);
1790 
1791 	if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) ||
1792 	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
1793 		SET(st->state_flags, PFSTATE_NOSYNC);
1794 		return;
1795 	}
1796 
1797 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
1798 	    ISSET(st->state_flags, PFSTATE_NOSYNC))
1799 		return;
1800 
1801 #ifdef PFSYNC_DEBUG
1802 	KASSERT(st->sync_state == PFSYNC_S_NONE);
1803 #endif
1804 
1805 	if (sc->sc_len == PFSYNC_MINPKT)
1806 		timeout_add_sec(&sc->sc_tmo, 1);
1807 
1808 	pfsync_q_ins(st, PFSYNC_S_INS);
1809 
1810 	st->sync_updates = 0;
1811 }
1812 
1813 int defer = 10;
1814 
1815 int
1816 pfsync_defer(struct pf_state *st, struct mbuf *m)
1817 {
1818 	struct pfsync_softc *sc = pfsyncif;
1819 	struct pfsync_deferral *pd;
1820 
1821 	splsoftassert(IPL_SOFTNET);
1822 
1823 	if (!sc->sc_defer)
1824 		return (0);
1825 
1826 	if (sc->sc_deferred >= 128)
1827 		pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
1828 
1829 	pd = pool_get(&sc->sc_pool, M_NOWAIT);
1830 	if (pd == NULL)
1831 		return (0);
1832 	sc->sc_deferred++;
1833 
1834 	m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
1835 	SET(st->state_flags, PFSTATE_ACK);
1836 
1837 	pd->pd_st = st;
1838 	pd->pd_m = m;
1839 
1840 	TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
1841 	timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd);
1842 	timeout_add(&pd->pd_tmo, defer);
1843 
1844 	schednetisr(NETISR_PFSYNC);
1845 
1846 	return (1);
1847 }
1848 
1849 void
1850 pfsync_undefer(struct pfsync_deferral *pd, int drop)
1851 {
1852 	struct pfsync_softc *sc = pfsyncif;
1853 
1854 	splsoftassert(IPL_SOFTNET);
1855 
1856 	TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
1857 	sc->sc_deferred--;
1858 
1859 	CLR(pd->pd_st->state_flags, PFSTATE_ACK);
1860 	timeout_del(&pd->pd_tmo); /* bah */
1861 	if (drop)
1862 		m_freem(pd->pd_m);
1863 	else {
1864 		ip_output(pd->pd_m, (void *)NULL, (void *)NULL, 0,
1865 		    (void *)NULL, (void *)NULL);
1866 	}
1867 
1868 	pool_put(&sc->sc_pool, pd);
1869 }
1870 
1871 void
1872 pfsync_defer_tmo(void *arg)
1873 {
1874 	int s;
1875 
1876 	s = splsoftnet();
1877 	pfsync_undefer(arg, 0);
1878 	splx(s);
1879 }
1880 
1881 void
1882 pfsync_deferred(struct pf_state *st, int drop)
1883 {
1884 	struct pfsync_softc *sc = pfsyncif;
1885 	struct pfsync_deferral *pd;
1886 
1887 	TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
1888 		 if (pd->pd_st == st) {
1889 			pfsync_undefer(pd, drop);
1890 			return;
1891 		}
1892 	}
1893 
1894 	panic("pfsync_send_deferred: unable to find deferred state");
1895 }
1896 
1897 u_int pfsync_upds = 0;
1898 
1899 void
1900 pfsync_update_state(struct pf_state *st)
1901 {
1902 	struct pfsync_softc *sc = pfsyncif;
1903 	int sync = 0;
1904 
1905 	splsoftassert(IPL_SOFTNET);
1906 
1907 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
1908 		return;
1909 
1910 	if (ISSET(st->state_flags, PFSTATE_ACK))
1911 		pfsync_deferred(st, 0);
1912 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1913 		if (st->sync_state != PFSYNC_S_NONE)
1914 			pfsync_q_del(st);
1915 		return;
1916 	}
1917 
1918 	if (sc->sc_len == PFSYNC_MINPKT)
1919 		timeout_add_sec(&sc->sc_tmo, 1);
1920 
1921 	switch (st->sync_state) {
1922 	case PFSYNC_S_UPD_C:
1923 	case PFSYNC_S_UPD:
1924 	case PFSYNC_S_INS:
1925 		/* we're already handling it */
1926 
1927 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
1928 			st->sync_updates++;
1929 			if (st->sync_updates >= sc->sc_maxupdates)
1930 				sync = 1;
1931 		}
1932 		break;
1933 
1934 	case PFSYNC_S_IACK:
1935 		pfsync_q_del(st);
1936 	case PFSYNC_S_NONE:
1937 		pfsync_q_ins(st, PFSYNC_S_UPD_C);
1938 		st->sync_updates = 0;
1939 		break;
1940 
1941 	default:
1942 		panic("pfsync_update_state: unexpected sync state %d",
1943 		    st->sync_state);
1944 	}
1945 
1946 	if (sync || (time_uptime - st->pfsync_time) < 2) {
1947 		pfsync_upds++;
1948 		schednetisr(NETISR_PFSYNC);
1949 	}
1950 }
1951 
1952 void
1953 pfsync_request_update(u_int32_t creatorid, u_int64_t id)
1954 {
1955 	struct pfsync_softc *sc = pfsyncif;
1956 	struct pfsync_upd_req_item *item;
1957 	size_t nlen = sizeof(struct pfsync_upd_req);
1958 
1959 	/*
1960 	 * this code does nothing to prevent multiple update requests for the
1961 	 * same state being generated.
1962 	 */
1963 
1964 	item = pool_get(&sc->sc_pool, PR_NOWAIT);
1965 	if (item == NULL) {
1966 		/* XXX stats */
1967 		return;
1968 	}
1969 
1970 	item->ur_msg.id = id;
1971 	item->ur_msg.creatorid = creatorid;
1972 
1973 	if (TAILQ_EMPTY(&sc->sc_upd_req_list))
1974 		nlen += sizeof(struct pfsync_subheader);
1975 
1976 	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
1977 		pfsync_sendout();
1978 
1979 		nlen = sizeof(struct pfsync_subheader) +
1980 		    sizeof(struct pfsync_upd_req);
1981 	}
1982 
1983 	TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
1984 	sc->sc_len += nlen;
1985 
1986 	schednetisr(NETISR_PFSYNC);
1987 }
1988 
1989 void
1990 pfsync_update_state_req(struct pf_state *st)
1991 {
1992 	struct pfsync_softc *sc = pfsyncif;
1993 
1994 	if (sc == NULL)
1995 		panic("pfsync_update_state_req: nonexistant instance");
1996 
1997 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1998 		if (st->sync_state != PFSYNC_S_NONE)
1999 			pfsync_q_del(st);
2000 		return;
2001 	}
2002 
2003 	switch (st->sync_state) {
2004 	case PFSYNC_S_UPD_C:
2005 	case PFSYNC_S_IACK:
2006 		pfsync_q_del(st);
2007 	case PFSYNC_S_NONE:
2008 		pfsync_q_ins(st, PFSYNC_S_UPD);
2009 		schednetisr(NETISR_PFSYNC);
2010 		return;
2011 
2012 	case PFSYNC_S_INS:
2013 	case PFSYNC_S_UPD:
2014 	case PFSYNC_S_DEL:
2015 		/* we're already handling it */
2016 		return;
2017 
2018 	default:
2019 		panic("pfsync_update_state_req: unexpected sync state %d",
2020 		    st->sync_state);
2021 	}
2022 }
2023 
2024 void
2025 pfsync_delete_state(struct pf_state *st)
2026 {
2027 	struct pfsync_softc *sc = pfsyncif;
2028 
2029 	splsoftassert(IPL_SOFTNET);
2030 
2031 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2032 		return;
2033 
2034 	if (ISSET(st->state_flags, PFSTATE_ACK))
2035 		pfsync_deferred(st, 1);
2036 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
2037 		if (st->sync_state != PFSYNC_S_NONE)
2038 			pfsync_q_del(st);
2039 		return;
2040 	}
2041 
2042 	if (sc->sc_len == PFSYNC_MINPKT)
2043 		timeout_add_sec(&sc->sc_tmo, 1);
2044 
2045 	switch (st->sync_state) {
2046 	case PFSYNC_S_INS:
2047 		/* we never got to tell the world so just forget about it */
2048 		pfsync_q_del(st);
2049 		return;
2050 
2051 	case PFSYNC_S_UPD_C:
2052 	case PFSYNC_S_UPD:
2053 	case PFSYNC_S_IACK:
2054 		pfsync_q_del(st);
2055 		/* FALLTHROUGH to putting it on the del list */
2056 
2057 	case PFSYNC_S_NONE:
2058 		pfsync_q_ins(st, PFSYNC_S_DEL);
2059 		return;
2060 
2061 	default:
2062 		panic("pfsync_delete_state: unexpected sync state %d",
2063 		    st->sync_state);
2064 	}
2065 }
2066 
2067 void
2068 pfsync_clear_states(u_int32_t creatorid, const char *ifname)
2069 {
2070 	struct pfsync_softc *sc = pfsyncif;
2071 	struct {
2072 		struct pfsync_subheader subh;
2073 		struct pfsync_clr clr;
2074 	} __packed r;
2075 
2076 	splsoftassert(IPL_SOFTNET);
2077 
2078 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2079 		return;
2080 
2081 	bzero(&r, sizeof(r));
2082 
2083 	r.subh.action = PFSYNC_ACT_CLR;
2084 	r.subh.count = htons(1);
2085 
2086 	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
2087 	r.clr.creatorid = creatorid;
2088 
2089 	pfsync_send_plus(&r, sizeof(r));
2090 }
2091 
2092 void
2093 pfsync_q_ins(struct pf_state *st, int q)
2094 {
2095 	struct pfsync_softc *sc = pfsyncif;
2096 	size_t nlen = pfsync_qs[q].len;
2097 
2098 	KASSERT(st->sync_state == PFSYNC_S_NONE);
2099 
2100 #if 1 || defined(PFSYNC_DEBUG)
2101 	if (sc->sc_len < PFSYNC_MINPKT)
2102 		panic("pfsync pkt len is too low %d", sc->sc_len);
2103 #endif
2104 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2105 		nlen += sizeof(struct pfsync_subheader);
2106 
2107 	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2108 		pfsync_sendout();
2109 
2110 		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
2111 	}
2112 
2113 	sc->sc_len += nlen;
2114 	TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
2115 	st->sync_state = q;
2116 }
2117 
2118 void
2119 pfsync_q_del(struct pf_state *st)
2120 {
2121 	struct pfsync_softc *sc = pfsyncif;
2122 	int q = st->sync_state;
2123 
2124 	KASSERT(st->sync_state != PFSYNC_S_NONE);
2125 
2126 	sc->sc_len -= pfsync_qs[q].len;
2127 	TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
2128 	st->sync_state = PFSYNC_S_NONE;
2129 
2130 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2131 		sc->sc_len -= sizeof(struct pfsync_subheader);
2132 }
2133 
2134 void
2135 pfsync_update_tdb(struct tdb *t, int output)
2136 {
2137 	struct pfsync_softc *sc = pfsyncif;
2138 	size_t nlen = sizeof(struct pfsync_tdb);
2139 
2140 	if (sc == NULL)
2141 		return;
2142 
2143 	if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) {
2144 		if (TAILQ_EMPTY(&sc->sc_tdb_q))
2145 			nlen += sizeof(struct pfsync_subheader);
2146 
2147 		if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2148 			pfsync_sendout();
2149 
2150 			nlen = sizeof(struct pfsync_subheader) +
2151 			    sizeof(struct pfsync_tdb);
2152 		}
2153 
2154 		sc->sc_len += nlen;
2155 		TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
2156 		SET(t->tdb_flags, TDBF_PFSYNC);
2157 		t->tdb_updates = 0;
2158 	} else {
2159 		if (++t->tdb_updates >= sc->sc_maxupdates)
2160 			schednetisr(NETISR_PFSYNC);
2161 	}
2162 
2163 	if (output)
2164 		SET(t->tdb_flags, TDBF_PFSYNC_RPL);
2165 	else
2166 		CLR(t->tdb_flags, TDBF_PFSYNC_RPL);
2167 }
2168 
2169 void
2170 pfsync_delete_tdb(struct tdb *t)
2171 {
2172 	struct pfsync_softc *sc = pfsyncif;
2173 
2174 	if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC))
2175 		return;
2176 
2177 	sc->sc_len -= sizeof(struct pfsync_tdb);
2178 	TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
2179 	CLR(t->tdb_flags, TDBF_PFSYNC);
2180 
2181 	if (TAILQ_EMPTY(&sc->sc_tdb_q))
2182 		sc->sc_len -= sizeof(struct pfsync_subheader);
2183 }
2184 
2185 void
2186 pfsync_out_tdb(struct tdb *t, void *buf)
2187 {
2188 	struct pfsync_tdb *ut = buf;
2189 
2190 	bzero(ut, sizeof(*ut));
2191 	ut->spi = t->tdb_spi;
2192 	bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst));
2193 	/*
2194 	 * When a failover happens, the master's rpl is probably above
2195 	 * what we see here (we may be up to a second late), so
2196 	 * increase it a bit for outbound tdbs to manage most such
2197 	 * situations.
2198 	 *
2199 	 * For now, just add an offset that is likely to be larger
2200 	 * than the number of packets we can see in one second. The RFC
2201 	 * just says the next packet must have a higher seq value.
2202 	 *
2203 	 * XXX What is a good algorithm for this? We could use
2204 	 * a rate-determined increase, but to know it, we would have
2205 	 * to extend struct tdb.
2206 	 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
2207 	 * will soon be replaced anyway. For now, just don't handle
2208 	 * this edge case.
2209 	 */
2210 #define RPL_INCR 16384
2211 	ut->rpl = htonl(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ?
2212 	    RPL_INCR : 0));
2213 	ut->cur_bytes = htobe64(t->tdb_cur_bytes);
2214 	ut->sproto = t->tdb_sproto;
2215 }
2216 
2217 void
2218 pfsync_bulk_start(void)
2219 {
2220 	struct pfsync_softc *sc = pfsyncif;
2221 
2222 	sc->sc_ureq_received = time_uptime;
2223 
2224 	if (sc->sc_bulk_next == NULL)
2225 		sc->sc_bulk_next = TAILQ_FIRST(&state_list);
2226 	sc->sc_bulk_last = sc->sc_bulk_next;
2227 
2228 	if (pf_status.debug >= PF_DEBUG_MISC)
2229 		printf("pfsync: received bulk update request\n");
2230 
2231 	pfsync_bulk_status(PFSYNC_BUS_START);
2232 	timeout_add(&sc->sc_bulk_tmo, 0);
2233 }
2234 
2235 void
2236 pfsync_bulk_update(void *arg)
2237 {
2238 	struct pfsync_softc *sc = arg;
2239 	struct pf_state *st;
2240 	int i = 0;
2241 	int s;
2242 
2243 	s = splsoftnet();
2244 
2245 	st = sc->sc_bulk_next;
2246 
2247 	while (st != sc->sc_bulk_last) {
2248 		if (st->sync_state == PFSYNC_S_NONE &&
2249 		    st->timeout < PFTM_MAX &&
2250 		    st->pfsync_time <= sc->sc_ureq_received) {
2251 			pfsync_update_state_req(st);
2252 			i++;
2253 		}
2254 
2255 		st = TAILQ_NEXT(st, entry_list);
2256 		if (st == NULL)
2257 			st = TAILQ_FIRST(&state_list);
2258 
2259 		if (i > 0 && TAILQ_EMPTY(&sc->sc_qs[PFSYNC_S_UPD])) {
2260 			sc->sc_bulk_next = st;
2261 			timeout_add(&sc->sc_bulk_tmo, 1);
2262 			goto out;
2263 		}
2264 	}
2265 
2266 	/* we're done */
2267 	sc->sc_bulk_next = NULL;
2268 	sc->sc_bulk_last = NULL;
2269 	pfsync_bulk_status(PFSYNC_BUS_END);
2270 
2271 out:
2272 	splx(s);
2273 }
2274 
2275 void
2276 pfsync_bulk_status(u_int8_t status)
2277 {
2278 	struct {
2279 		struct pfsync_subheader subh;
2280 		struct pfsync_bus bus;
2281 	} __packed r;
2282 
2283 	struct pfsync_softc *sc = pfsyncif;
2284 
2285 	bzero(&r, sizeof(r));
2286 
2287 	r.subh.action = PFSYNC_ACT_BUS;
2288 	r.subh.count = htons(1);
2289 
2290 	r.bus.creatorid = pf_status.hostid;
2291 	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
2292 	r.bus.status = status;
2293 
2294 	pfsync_send_plus(&r, sizeof(r));
2295 }
2296 
2297 void
2298 pfsync_bulk_fail(void *arg)
2299 {
2300 	struct pfsync_softc *sc = arg;
2301 
2302 	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
2303 		/* Try again */
2304 		timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
2305 		pfsync_request_update(0, 0);
2306 	} else {
2307 		/* Pretend like the transfer was ok */
2308 		sc->sc_ureq_sent = 0;
2309 		sc->sc_bulk_tries = 0;
2310 #if NCARP > 0
2311 		if (!pfsync_sync_ok)
2312 			carp_group_demote_adj(&sc->sc_if, -1);
2313 #endif
2314 		pfsync_sync_ok = 1;
2315 		if (pf_status.debug >= PF_DEBUG_MISC)
2316 			printf("pfsync: failed to receive bulk update\n");
2317 	}
2318 }
2319 
2320 void
2321 pfsync_send_plus(void *plus, size_t pluslen)
2322 {
2323 	struct pfsync_softc *sc = pfsyncif;
2324 
2325 	if (sc->sc_len + pluslen > sc->sc_if.if_mtu)
2326 		pfsync_sendout();
2327 
2328 	sc->sc_plus = plus;
2329 	sc->sc_len += (sc->sc_pluslen = pluslen);
2330 
2331 	pfsync_sendout();
2332 }
2333 
2334 int
2335 pfsync_up(void)
2336 {
2337 	struct pfsync_softc *sc = pfsyncif;
2338 
2339 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2340 		return (0);
2341 
2342 	return (1);
2343 }
2344 
2345 int
2346 pfsync_state_in_use(struct pf_state *st)
2347 {
2348 	struct pfsync_softc *sc = pfsyncif;
2349 
2350 	if (sc == NULL)
2351 		return (0);
2352 
2353 	if (st->sync_state != PFSYNC_S_NONE)
2354 		return (1);
2355 
2356 	if (sc->sc_bulk_next == NULL && sc->sc_bulk_last == NULL)
2357 		return (0);
2358 
2359 	return (1);
2360 }
2361 
2362 void
2363 pfsync_timeout(void *arg)
2364 {
2365 	int s;
2366 
2367 	s = splsoftnet();
2368 	pfsync_sendout();
2369 	splx(s);
2370 }
2371 
2372 /* this is a softnet/netisr handler */
2373 void
2374 pfsyncintr(void)
2375 {
2376 	pfsync_sendout();
2377 }
2378 
2379 int
2380 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
2381     size_t newlen)
2382 {
2383 	/* All sysctl names at this level are terminal. */
2384 	if (namelen != 1)
2385 		return (ENOTDIR);
2386 
2387 	switch (name[0]) {
2388 	case PFSYNCCTL_STATS:
2389 		if (newp != NULL)
2390 			return (EPERM);
2391 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
2392 		    &pfsyncstats, sizeof(pfsyncstats)));
2393 	default:
2394 		return (ENOPROTOOPT);
2395 	}
2396 }
2397