xref: /openbsd-src/sys/net/if_pfsync.c (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1 /*	$OpenBSD: if_pfsync.c,v 1.121 2009/04/15 05:11:49 david Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Michael Shalayeff
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  * THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
31  *
32  * Permission to use, copy, modify, and distribute this software for any
33  * purpose with or without fee is hereby granted, provided that the above
34  * copyright notice and this permission notice appear in all copies.
35  *
36  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
37  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
38  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
39  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
41  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
42  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43  */
44 
45 #include <sys/param.h>
46 #include <sys/proc.h>
47 #include <sys/systm.h>
48 #include <sys/time.h>
49 #include <sys/mbuf.h>
50 #include <sys/socket.h>
51 #include <sys/ioctl.h>
52 #include <sys/timeout.h>
53 #include <sys/kernel.h>
54 #include <sys/sysctl.h>
55 #include <sys/pool.h>
56 
57 #include <net/if.h>
58 #include <net/if_types.h>
59 #include <net/route.h>
60 #include <net/bpf.h>
61 #include <net/netisr.h>
62 #include <netinet/in.h>
63 #include <netinet/if_ether.h>
64 #include <netinet/tcp.h>
65 #include <netinet/tcp_seq.h>
66 
67 #ifdef	INET
68 #include <netinet/in_systm.h>
69 #include <netinet/in_var.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip_var.h>
72 #endif
73 
74 #ifdef INET6
75 #include <netinet6/nd6.h>
76 #endif /* INET6 */
77 
78 #include "carp.h"
79 #if NCARP > 0
80 #include <netinet/ip_carp.h>
81 #endif
82 
83 #include <net/pfvar.h>
84 #include <net/if_pfsync.h>
85 
86 #include "bpfilter.h"
87 #include "pfsync.h"
88 
89 #define PFSYNC_MINPKT ( \
90 	sizeof(struct ip) + \
91 	sizeof(struct pfsync_header) + \
92 	sizeof(struct pfsync_subheader))
93 
94 struct pfsync_pkt {
95 	struct ip *ip;
96 	struct in_addr src;
97 	u_int8_t flags;
98 };
99 
100 int	pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
101 	    struct pfsync_state_peer *);
102 
103 int	pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int);
104 int	pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int);
105 int	pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int);
106 int	pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int);
107 int	pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int);
108 int	pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int);
109 int	pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int);
110 int	pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int);
111 int	pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int);
112 int	pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int);
113 int	pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int);
114 
115 int	pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int);
116 
117 int	(*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = {
118 	pfsync_in_clr,			/* PFSYNC_ACT_CLR */
119 	pfsync_in_ins,			/* PFSYNC_ACT_INS */
120 	pfsync_in_iack,			/* PFSYNC_ACT_INS_ACK */
121 	pfsync_in_upd,			/* PFSYNC_ACT_UPD */
122 	pfsync_in_upd_c,		/* PFSYNC_ACT_UPD_C */
123 	pfsync_in_ureq,			/* PFSYNC_ACT_UPD_REQ */
124 	pfsync_in_del,			/* PFSYNC_ACT_DEL */
125 	pfsync_in_del_c,		/* PFSYNC_ACT_DEL_C */
126 	pfsync_in_error,		/* PFSYNC_ACT_INS_F */
127 	pfsync_in_error,		/* PFSYNC_ACT_DEL_F */
128 	pfsync_in_bus,			/* PFSYNC_ACT_BUS */
129 	pfsync_in_tdb,			/* PFSYNC_ACT_TDB */
130 	pfsync_in_eof			/* PFSYNC_ACT_EOF */
131 };
132 
133 struct pfsync_q {
134 	void		(*write)(struct pf_state *, void *);
135 	size_t		len;
136 	u_int8_t	action;
137 };
138 
139 /* we have one of these for every PFSYNC_S_ */
140 void	pfsync_out_state(struct pf_state *, void *);
141 void	pfsync_out_iack(struct pf_state *, void *);
142 void	pfsync_out_upd_c(struct pf_state *, void *);
143 void	pfsync_out_del(struct pf_state *, void *);
144 
145 struct pfsync_q pfsync_qs[] = {
146 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
147 	{ pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
148 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD },
149 	{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
150 	{ pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C }
151 };
152 
153 void	pfsync_q_ins(struct pf_state *, int);
154 void	pfsync_q_del(struct pf_state *);
155 
156 struct pfsync_upd_req_item {
157 	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
158 	struct pfsync_upd_req			ur_msg;
159 };
160 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item);
161 
162 struct pfsync_deferral {
163 	TAILQ_ENTRY(pfsync_deferral)		 pd_entry;
164 	struct pf_state				*pd_st;
165 	struct mbuf				*pd_m;
166 	struct timeout				 pd_tmo;
167 };
168 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral);
169 
170 #define PFSYNC_PLSIZE	MAX(sizeof(struct pfsync_upd_req_item), \
171 			    sizeof(struct pfsync_deferral))
172 
173 void	pfsync_out_tdb(struct tdb *, void *);
174 
175 struct pfsync_softc {
176 	struct ifnet		 sc_if;
177 	struct ifnet		*sc_sync_if;
178 
179 	struct pool		 sc_pool;
180 
181 	struct ip_moptions	 sc_imo;
182 
183 	struct in_addr		 sc_sync_peer;
184 	u_int8_t		 sc_maxupdates;
185 
186 	struct ip		 sc_template;
187 
188 	struct pf_state_queue	 sc_qs[PFSYNC_S_COUNT];
189 	size_t			 sc_len;
190 
191 	struct pfsync_upd_reqs	 sc_upd_req_list;
192 
193 	struct pfsync_deferrals	 sc_deferrals;
194 	u_int			 sc_deferred;
195 
196 	void			*sc_plus;
197 	size_t			 sc_pluslen;
198 
199 	u_int32_t		 sc_ureq_sent;
200 	int			 sc_bulk_tries;
201 	struct timeout		 sc_bulkfail_tmo;
202 
203 	u_int32_t		 sc_ureq_received;
204 	struct pf_state		*sc_bulk_next;
205 	struct pf_state		*sc_bulk_last;
206 	struct timeout		 sc_bulk_tmo;
207 
208 	TAILQ_HEAD(, tdb)	 sc_tdb_q;
209 
210 	struct timeout		 sc_tmo;
211 };
212 
213 struct pfsync_softc	*pfsyncif = NULL;
214 struct pfsyncstats	 pfsyncstats;
215 
216 void	pfsyncattach(int);
217 int	pfsync_clone_create(struct if_clone *, int);
218 int	pfsync_clone_destroy(struct ifnet *);
219 int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
220 	    struct pf_state_peer *);
221 void	pfsync_update_net_tdb(struct pfsync_tdb *);
222 int	pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
223 	    struct rtentry *);
224 int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
225 void	pfsyncstart(struct ifnet *);
226 
227 struct mbuf *pfsync_if_dequeue(struct ifnet *);
228 struct mbuf *pfsync_get_mbuf(struct pfsync_softc *);
229 
230 void	pfsync_deferred(struct pf_state *, int);
231 void	pfsync_undefer(struct pfsync_deferral *, int);
232 void	pfsync_defer_tmo(void *);
233 
234 void	pfsync_request_update(u_int32_t, u_int64_t);
235 void	pfsync_update_state_req(struct pf_state *);
236 
237 void	pfsync_drop(struct pfsync_softc *);
238 void	pfsync_sendout(void);
239 void	pfsync_send_plus(void *, size_t);
240 int	pfsync_tdb_sendout(struct pfsync_softc *);
241 int	pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *);
242 void	pfsync_timeout(void *);
243 void	pfsync_tdb_timeout(void *);
244 void	pfsync_send_bus(struct pfsync_softc *, u_int8_t);
245 
246 void	pfsync_bulk_start(void);
247 void	pfsync_bulk_status(u_int8_t);
248 void	pfsync_bulk_update(void *);
249 void	pfsync_bulk_fail(void *);
250 
251 #define PFSYNC_MAX_BULKTRIES	12
252 int	pfsync_sync_ok;
253 
254 struct if_clone	pfsync_cloner =
255     IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
256 
257 void
258 pfsyncattach(int npfsync)
259 {
260 	if_clone_attach(&pfsync_cloner);
261 }
262 int
263 pfsync_clone_create(struct if_clone *ifc, int unit)
264 {
265 	struct pfsync_softc *sc;
266 	struct ifnet *ifp;
267 	int q;
268 
269 	if (unit != 0)
270 		return (EINVAL);
271 
272 	pfsync_sync_ok = 1;
273 
274 	sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT | M_ZERO);
275 	if (sc == NULL)
276 		return (ENOMEM);
277 
278 	for (q = 0; q < PFSYNC_S_COUNT; q++)
279 		TAILQ_INIT(&sc->sc_qs[q]);
280 
281 	pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, 0, 0, "pfsync", NULL);
282 	TAILQ_INIT(&sc->sc_upd_req_list);
283 	TAILQ_INIT(&sc->sc_deferrals);
284 	sc->sc_deferred = 0;
285 
286 	TAILQ_INIT(&sc->sc_tdb_q);
287 
288 	sc->sc_len = PFSYNC_MINPKT;
289 	sc->sc_maxupdates = 128;
290 
291 	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
292 	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
293 	    M_WAITOK | M_ZERO);
294 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
295 
296 	ifp = &sc->sc_if;
297 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
298 	ifp->if_softc = sc;
299 	ifp->if_ioctl = pfsyncioctl;
300 	ifp->if_output = pfsyncoutput;
301 	ifp->if_start = pfsyncstart;
302 	ifp->if_type = IFT_PFSYNC;
303 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
304 	ifp->if_hdrlen = sizeof(struct pfsync_header);
305 	ifp->if_mtu = 1500; /* XXX */
306 	ifp->if_hardmtu = MCLBYTES; /* XXX */
307 	timeout_set(&sc->sc_tmo, pfsync_timeout, sc);
308 	timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
309 	timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
310 
311 	if_attach(ifp);
312 	if_alloc_sadl(ifp);
313 
314 #if NCARP > 0
315 	if_addgroup(ifp, "carp");
316 #endif
317 
318 #if NBPFILTER > 0
319 	bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
320 #endif
321 
322 	pfsyncif = sc;
323 
324 	return (0);
325 }
326 
327 int
328 pfsync_clone_destroy(struct ifnet *ifp)
329 {
330 	struct pfsync_softc *sc = ifp->if_softc;
331 
332 	timeout_del(&sc->sc_bulk_tmo);
333 	timeout_del(&sc->sc_tmo);
334 #if NCARP > 0
335 	if (!pfsync_sync_ok)
336 		carp_group_demote_adj(&sc->sc_if, -1);
337 #endif
338 #if NBPFILTER > 0
339 	bpfdetach(ifp);
340 #endif
341 	if_detach(ifp);
342 
343 	pfsync_drop(sc);
344 
345 	while (sc->sc_deferred > 0)
346 		pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
347 
348 	pool_destroy(&sc->sc_pool);
349 	free(sc->sc_imo.imo_membership, M_IPMOPTS);
350 	free(sc, M_DEVBUF);
351 
352 	pfsyncif = NULL;
353 
354 	return (0);
355 }
356 
357 struct mbuf *
358 pfsync_if_dequeue(struct ifnet *ifp)
359 {
360 	struct mbuf *m;
361 
362 	IF_DEQUEUE(&ifp->if_snd, m);
363 
364 	return (m);
365 }
366 
367 /*
368  * Start output on the pfsync interface.
369  */
370 void
371 pfsyncstart(struct ifnet *ifp)
372 {
373 	struct mbuf *m;
374 	int s;
375 
376 	s = splnet();
377 	while ((m = pfsync_if_dequeue(ifp)) != NULL) {
378 		IF_DROP(&ifp->if_snd);
379 		m_freem(m);
380 	}
381 	splx(s);
382 }
383 
384 int
385 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
386     struct pf_state_peer *d)
387 {
388 	if (s->scrub.scrub_flag && d->scrub == NULL) {
389 		d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO);
390 		if (d->scrub == NULL)
391 			return (ENOMEM);
392 	}
393 
394 	return (0);
395 }
396 
397 void
398 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
399 {
400 	bzero(sp, sizeof(struct pfsync_state));
401 
402 	/* copy from state key */
403 	sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
404 	sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
405 	sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
406 	sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
407 	sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
408 	sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
409 	sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
410 	sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
411 	sp->proto = st->key[PF_SK_WIRE]->proto;
412 	sp->af = st->key[PF_SK_WIRE]->af;
413 
414 	/* copy from state */
415 	strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
416 	bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
417 	sp->creation = htonl(time_second - st->creation);
418 	sp->expire = pf_state_expires(st);
419 	if (sp->expire <= time_second)
420 		sp->expire = htonl(0);
421 	else
422 		sp->expire = htonl(sp->expire - time_second);
423 
424 	sp->direction = st->direction;
425 	sp->log = st->log;
426 	sp->timeout = st->timeout;
427 	sp->state_flags = st->state_flags;
428 	if (st->src_node)
429 		sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
430 	if (st->nat_src_node)
431 		sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE;
432 
433 	bcopy(&st->id, &sp->id, sizeof(sp->id));
434 	sp->creatorid = st->creatorid;
435 	pf_state_peer_hton(&st->src, &sp->src);
436 	pf_state_peer_hton(&st->dst, &sp->dst);
437 
438 	if (st->rule.ptr == NULL)
439 		sp->rule = htonl(-1);
440 	else
441 		sp->rule = htonl(st->rule.ptr->nr);
442 	if (st->anchor.ptr == NULL)
443 		sp->anchor = htonl(-1);
444 	else
445 		sp->anchor = htonl(st->anchor.ptr->nr);
446 	if (st->nat_rule.ptr == NULL)
447 		sp->nat_rule = htonl(-1);
448 	else
449 		sp->nat_rule = htonl(st->nat_rule.ptr->nr);
450 
451 	pf_state_counter_hton(st->packets[0], sp->packets[0]);
452 	pf_state_counter_hton(st->packets[1], sp->packets[1]);
453 	pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
454 	pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
455 
456 }
457 
458 int
459 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
460 {
461 	struct pf_state	*st = NULL;
462 	struct pf_state_key *skw = NULL, *sks = NULL;
463 	struct pf_rule *r = NULL;
464 	struct pfi_kif	*kif;
465 	int pool_flags;
466 	int error;
467 
468 	if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) {
469 		printf("pfsync_state_import: invalid creator id:"
470 		    " %08x\n", ntohl(sp->creatorid));
471 		return (EINVAL);
472 	}
473 
474 	if ((kif = pfi_kif_get(sp->ifname)) == NULL) {
475 		if (pf_status.debug >= PF_DEBUG_MISC)
476 			printf("pfsync_state_import: "
477 			    "unknown interface: %s\n", sp->ifname);
478 		if (flags & PFSYNC_SI_IOCTL)
479 			return (EINVAL);
480 		return (0);	/* skip this state */
481 	}
482 
483 	/*
484 	 * If the ruleset checksums match or the state is coming from the ioctl,
485 	 * it's safe to associate the state with the rule of that number.
486 	 */
487 	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
488 	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
489 	    pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
490 		r = pf_main_ruleset.rules[
491 		    PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
492 	else
493 		r = &pf_default_rule;
494 
495 	if ((r->max_states && r->states_cur >= r->max_states))
496 		goto cleanup;
497 
498 	if (flags & PFSYNC_SI_IOCTL)
499 		pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO;
500 	else
501 		pool_flags = PR_LIMITFAIL | PR_ZERO;
502 
503 	if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL)
504 		goto cleanup;
505 
506 	if ((skw = pf_alloc_state_key(pool_flags)) == NULL)
507 		goto cleanup;
508 
509 	if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
510 	    &sp->key[PF_SK_STACK].addr[0], sp->af) ||
511 	    PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
512 	    &sp->key[PF_SK_STACK].addr[1], sp->af) ||
513 	    sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
514 	    sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) {
515 		if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
516 			goto cleanup;
517 	} else
518 		sks = skw;
519 
520 	/* allocate memory for scrub info */
521 	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
522 	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
523 		goto cleanup;
524 
525 	/* copy to state key(s) */
526 	skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
527 	skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
528 	skw->port[0] = sp->key[PF_SK_WIRE].port[0];
529 	skw->port[1] = sp->key[PF_SK_WIRE].port[1];
530 	skw->proto = sp->proto;
531 	skw->af = sp->af;
532 	if (sks != skw) {
533 		sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
534 		sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
535 		sks->port[0] = sp->key[PF_SK_STACK].port[0];
536 		sks->port[1] = sp->key[PF_SK_STACK].port[1];
537 		sks->proto = sp->proto;
538 		sks->af = sp->af;
539 	}
540 
541 	/* copy to state */
542 	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
543 	st->creation = time_second - ntohl(sp->creation);
544 	st->expire = time_second;
545 	if (sp->expire) {
546 		/* XXX No adaptive scaling. */
547 		st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire);
548 	}
549 
550 	st->expire = ntohl(sp->expire) + time_second;
551 	st->direction = sp->direction;
552 	st->log = sp->log;
553 	st->timeout = sp->timeout;
554 	st->state_flags = sp->state_flags;
555 
556 	bcopy(sp->id, &st->id, sizeof(st->id));
557 	st->creatorid = sp->creatorid;
558 	pf_state_peer_ntoh(&sp->src, &st->src);
559 	pf_state_peer_ntoh(&sp->dst, &st->dst);
560 
561 	st->rule.ptr = r;
562 	st->nat_rule.ptr = NULL;
563 	st->anchor.ptr = NULL;
564 	st->rt_kif = NULL;
565 
566 	st->pfsync_time = time_uptime;
567 	st->sync_state = PFSYNC_S_NONE;
568 
569 	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
570 	r->states_cur++;
571 	r->states_tot++;
572 
573 	if (!ISSET(flags, PFSYNC_SI_IOCTL))
574 		SET(st->state_flags, PFSTATE_NOSYNC);
575 
576 	if ((error = pf_state_insert(kif, skw, sks, st)) != 0) {
577 		/* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
578 		r->states_cur--;
579 		goto cleanup_state;
580 	}
581 
582 	if (!ISSET(flags, PFSYNC_SI_IOCTL)) {
583 		CLR(st->state_flags, PFSTATE_NOSYNC);
584 		if (ISSET(st->state_flags, PFSTATE_ACK)) {
585 			pfsync_q_ins(st, PFSYNC_S_IACK);
586 			schednetisr(NETISR_PFSYNC);
587 		}
588 	}
589 	CLR(st->state_flags, PFSTATE_ACK);
590 
591 	return (0);
592 
593  cleanup:
594 	error = ENOMEM;
595 	if (skw == sks)
596 		sks = NULL;
597 	if (skw != NULL)
598 		pool_put(&pf_state_key_pl, skw);
599 	if (sks != NULL)
600 		pool_put(&pf_state_key_pl, sks);
601 
602  cleanup_state:	/* pf_state_insert frees the state keys */
603 	if (st) {
604 		if (st->dst.scrub)
605 			pool_put(&pf_state_scrub_pl, st->dst.scrub);
606 		if (st->src.scrub)
607 			pool_put(&pf_state_scrub_pl, st->src.scrub);
608 		pool_put(&pf_state_pl, st);
609 	}
610 	return (error);
611 }
612 
613 void
614 pfsync_input(struct mbuf *m, ...)
615 {
616 	struct pfsync_softc *sc = pfsyncif;
617 	struct pfsync_pkt pkt;
618 	struct ip *ip = mtod(m, struct ip *);
619 	struct pfsync_header *ph;
620 	struct pfsync_subheader subh;
621 
622 	int offset, len;
623 	int rv;
624 
625 	pfsyncstats.pfsyncs_ipackets++;
626 
627 	/* verify that we have a sync interface configured */
628 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
629 	    sc->sc_sync_if == NULL || !pf_status.running)
630 		goto done;
631 
632 	/* verify that the packet came in on the right interface */
633 	if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
634 		pfsyncstats.pfsyncs_badif++;
635 		goto done;
636 	}
637 
638 	sc->sc_if.if_ipackets++;
639 	sc->sc_if.if_ibytes += m->m_pkthdr.len;
640 
641 	/* verify that the IP TTL is 255. */
642 	if (ip->ip_ttl != PFSYNC_DFLTTL) {
643 		pfsyncstats.pfsyncs_badttl++;
644 		goto done;
645 	}
646 
647 	offset = ip->ip_hl << 2;
648 	if (m->m_pkthdr.len < offset + sizeof(*ph)) {
649 		pfsyncstats.pfsyncs_hdrops++;
650 		goto done;
651 	}
652 
653 	if (offset + sizeof(*ph) > m->m_len) {
654 		if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
655 			pfsyncstats.pfsyncs_hdrops++;
656 			return;
657 		}
658 		ip = mtod(m, struct ip *);
659 	}
660 	ph = (struct pfsync_header *)((char *)ip + offset);
661 
662 	/* verify the version */
663 	if (ph->version != PFSYNC_VERSION) {
664 		pfsyncstats.pfsyncs_badver++;
665 		goto done;
666 	}
667 	len = ntohs(ph->len) + offset;
668 	if (m->m_pkthdr.len < len) {
669 		pfsyncstats.pfsyncs_badlen++;
670 		goto done;
671 	}
672 
673 	/* Cheaper to grab this now than having to mess with mbufs later */
674 	pkt.ip = ip;
675 	pkt.src = ip->ip_src;
676 	pkt.flags = 0;
677 
678 	if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
679 		pkt.flags |= PFSYNC_SI_CKSUM;
680 
681 	offset += sizeof(*ph);
682 	while (offset <= len - sizeof(subh)) {
683 		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
684 		offset += sizeof(subh);
685 
686 		if (subh.action >= PFSYNC_ACT_MAX) {
687 			pfsyncstats.pfsyncs_badact++;
688 			goto done;
689 		}
690 
691 		rv = (*pfsync_acts[subh.action])(&pkt, m, offset,
692 		    ntohs(subh.count));
693 		if (rv == -1)
694 			return;
695 
696 		offset += rv;
697 	}
698 
699 done:
700 	m_freem(m);
701 }
702 
703 int
704 pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
705 {
706 	struct pfsync_clr *clr;
707 	struct mbuf *mp;
708 	int len = sizeof(*clr) * count;
709 	int i, offp;
710 
711 	struct pf_state *st, *nexts;
712 	struct pf_state_key *sk, *nextsk;
713 	struct pf_state_item *si;
714 	u_int32_t creatorid;
715 	int s;
716 
717 	mp = m_pulldown(m, offset, len, &offp);
718 	if (mp == NULL) {
719 		pfsyncstats.pfsyncs_badlen++;
720 		return (-1);
721 	}
722 	clr = (struct pfsync_clr *)(mp->m_data + offp);
723 
724 	s = splsoftnet();
725 	for (i = 0; i < count; i++) {
726 		creatorid = clr[i].creatorid;
727 
728 		if (clr[i].ifname[0] == '\0') {
729 			for (st = RB_MIN(pf_state_tree_id, &tree_id);
730 			    st; st = nexts) {
731 				nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
732 				if (st->creatorid == creatorid) {
733 					SET(st->state_flags, PFSTATE_NOSYNC);
734 					pf_unlink_state(st);
735 				}
736 			}
737 		} else {
738 			if (pfi_kif_get(clr[i].ifname) == NULL)
739 				continue;
740 
741 			/* XXX correct? */
742 			for (sk = RB_MIN(pf_state_tree, &pf_statetbl);
743 			    sk; sk = nextsk) {
744 				nextsk = RB_NEXT(pf_state_tree,
745 				    &pf_statetbl, sk);
746 				TAILQ_FOREACH(si, &sk->states, entry) {
747 					if (si->s->creatorid == creatorid) {
748 						SET(si->s->state_flags,
749 						    PFSTATE_NOSYNC);
750 						pf_unlink_state(si->s);
751 					}
752 				}
753 			}
754 		}
755 	}
756 	splx(s);
757 
758 	return (len);
759 }
760 
761 int
762 pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
763 {
764 	struct mbuf *mp;
765 	struct pfsync_state *sa, *sp;
766 	int len = sizeof(*sp) * count;
767 	int i, offp;
768 
769 	int s;
770 
771 	mp = m_pulldown(m, offset, len, &offp);
772 	if (mp == NULL) {
773 		pfsyncstats.pfsyncs_badlen++;
774 		return (-1);
775 	}
776 	sa = (struct pfsync_state *)(mp->m_data + offp);
777 
778 	s = splsoftnet();
779 	for (i = 0; i < count; i++) {
780 		sp = &sa[i];
781 
782 		/* check for invalid values */
783 		if (sp->timeout >= PFTM_MAX ||
784 		    sp->src.state > PF_TCPS_PROXY_DST ||
785 		    sp->dst.state > PF_TCPS_PROXY_DST ||
786 		    sp->direction > PF_OUT ||
787 		    (sp->af != AF_INET && sp->af != AF_INET6)) {
788 			if (pf_status.debug >= PF_DEBUG_MISC) {
789 				printf("pfsync_input: PFSYNC5_ACT_INS: "
790 				    "invalid value\n");
791 			}
792 			pfsyncstats.pfsyncs_badval++;
793 			continue;
794 		}
795 
796 		if (pfsync_state_import(sp, pkt->flags) == ENOMEM) {
797 			/* drop out, but process the rest of the actions */
798 			break;
799 		}
800 	}
801 	splx(s);
802 
803 	return (len);
804 }
805 
806 int
807 pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
808 {
809 	struct pfsync_ins_ack *ia, *iaa;
810 	struct pf_state_cmp id_key;
811 	struct pf_state *st;
812 
813 	struct mbuf *mp;
814 	int len = count * sizeof(*ia);
815 	int offp, i;
816 	int s;
817 
818 	mp = m_pulldown(m, offset, len, &offp);
819 	if (mp == NULL) {
820 		pfsyncstats.pfsyncs_badlen++;
821 		return (-1);
822 	}
823 	iaa = (struct pfsync_ins_ack *)(mp->m_data + offp);
824 
825 	s = splsoftnet();
826 	for (i = 0; i < count; i++) {
827 		ia = &iaa[i];
828 
829 		bcopy(&ia->id, &id_key.id, sizeof(id_key.id));
830 		id_key.creatorid = ia->creatorid;
831 
832 		st = pf_find_state_byid(&id_key);
833 		if (st == NULL)
834 			continue;
835 
836 		if (ISSET(st->state_flags, PFSTATE_ACK))
837 			pfsync_deferred(st, 0);
838 	}
839 	splx(s);
840 
841 	return (len);
842 }
843 
844 int
845 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
846     struct pfsync_state_peer *dst)
847 {
848 	int sfail = 0;
849 
850 	/*
851 	 * The state should never go backwards except
852 	 * for syn-proxy states.  Neither should the
853 	 * sequence window slide backwards.
854 	 */
855 	if (st->src.state > src->state &&
856 	    (st->src.state < PF_TCPS_PROXY_SRC ||
857 	    src->state >= PF_TCPS_PROXY_SRC))
858 		sfail = 1;
859 	else if (SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))
860 		sfail = 3;
861 	else if (st->dst.state > dst->state) {
862 		/* There might still be useful
863 		 * information about the src state here,
864 		 * so import that part of the update,
865 		 * then "fail" so we send the updated
866 		 * state back to the peer who is missing
867 		 * our what we know. */
868 		pf_state_peer_ntoh(src, &st->src);
869 		/* XXX do anything with timeouts? */
870 		sfail = 7;
871 	} else if (st->dst.state >= TCPS_SYN_SENT &&
872 	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))
873 		sfail = 4;
874 
875 	return (sfail);
876 }
877 
878 int
879 pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
880 {
881 	struct pfsync_state *sa, *sp;
882 	struct pf_state_cmp id_key;
883 	struct pf_state_key *sk;
884 	struct pf_state *st;
885 	int sfail;
886 
887 	struct mbuf *mp;
888 	int len = count * sizeof(*sp);
889 	int offp, i;
890 	int s;
891 
892 	mp = m_pulldown(m, offset, len, &offp);
893 	if (mp == NULL) {
894 		pfsyncstats.pfsyncs_badlen++;
895 		return (-1);
896 	}
897 	sa = (struct pfsync_state *)(mp->m_data + offp);
898 
899 	s = splsoftnet();
900 	for (i = 0; i < count; i++) {
901 		sp = &sa[i];
902 
903 		/* check for invalid values */
904 		if (sp->timeout >= PFTM_MAX ||
905 		    sp->src.state > PF_TCPS_PROXY_DST ||
906 		    sp->dst.state > PF_TCPS_PROXY_DST) {
907 			if (pf_status.debug >= PF_DEBUG_MISC) {
908 				printf("pfsync_input: PFSYNC_ACT_UPD: "
909 				    "invalid value\n");
910 			}
911 			pfsyncstats.pfsyncs_badval++;
912 			continue;
913 		}
914 
915 		bcopy(sp->id, &id_key.id, sizeof(id_key.id));
916 		id_key.creatorid = sp->creatorid;
917 
918 		st = pf_find_state_byid(&id_key);
919 		if (st == NULL) {
920 			/* insert the update */
921 			if (pfsync_state_import(sp, 0))
922 				pfsyncstats.pfsyncs_badstate++;
923 			continue;
924 		}
925 
926 		if (ISSET(st->state_flags, PFSTATE_ACK))
927 			pfsync_deferred(st, 1);
928 
929 		sk = st->key[PF_SK_WIRE];	/* XXX right one? */
930 		sfail = 0;
931 		if (sk->proto == IPPROTO_TCP)
932 			sfail = pfsync_upd_tcp(st, &sp->src, &sp->dst);
933 		else {
934 			/*
935 			 * Non-TCP protocol state machine always go
936 			 * forwards
937 			 */
938 			if (st->src.state > sp->src.state)
939 				sfail = 5;
940 			else if (st->dst.state > sp->dst.state)
941 				sfail = 6;
942 		}
943 
944 		if (sfail) {
945 			if (pf_status.debug >= PF_DEBUG_NOISY) {
946 				printf("pfsync: %s stale update (%d)"
947 				    " id: %016llx creatorid: %08x\n",
948 				    (sfail < 7 ?  "ignoring" : "partial"),
949 				    sfail, betoh64(st->id),
950 				    ntohl(st->creatorid));
951 			}
952 			pfsyncstats.pfsyncs_stale++;
953 
954 			pfsync_update_state(st);
955 			schednetisr(NETISR_PFSYNC);
956 			continue;
957 		}
958 		pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
959 		pf_state_peer_ntoh(&sp->src, &st->src);
960 		pf_state_peer_ntoh(&sp->dst, &st->dst);
961 		st->expire = ntohl(sp->expire) + time_second;
962 		st->timeout = sp->timeout;
963 		st->pfsync_time = time_uptime;
964 	}
965 	splx(s);
966 
967 	return (len);
968 }
969 
970 int
971 pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
972 {
973 	struct pfsync_upd_c *ua, *up;
974 	struct pf_state_key *sk;
975 	struct pf_state_cmp id_key;
976 	struct pf_state *st;
977 
978 	int len = count * sizeof(*up);
979 	int sfail;
980 
981 	struct mbuf *mp;
982 	int offp, i;
983 	int s;
984 
985 	mp = m_pulldown(m, offset, len, &offp);
986 	if (mp == NULL) {
987 		pfsyncstats.pfsyncs_badlen++;
988 		return (-1);
989 	}
990 	ua = (struct pfsync_upd_c *)(mp->m_data + offp);
991 
992 	s = splsoftnet();
993 	for (i = 0; i < count; i++) {
994 		up = &ua[i];
995 
996 		/* check for invalid values */
997 		if (up->timeout >= PFTM_MAX ||
998 		    up->src.state > PF_TCPS_PROXY_DST ||
999 		    up->dst.state > PF_TCPS_PROXY_DST) {
1000 			if (pf_status.debug >= PF_DEBUG_MISC) {
1001 				printf("pfsync_input: "
1002 				    "PFSYNC_ACT_UPD_C: "
1003 				    "invalid value\n");
1004 			}
1005 			pfsyncstats.pfsyncs_badval++;
1006 			continue;
1007 		}
1008 
1009 		bcopy(&up->id, &id_key.id, sizeof(id_key.id));
1010 		id_key.creatorid = up->creatorid;
1011 
1012 		st = pf_find_state_byid(&id_key);
1013 		if (st == NULL) {
1014 			/* We don't have this state. Ask for it. */
1015 			pfsync_request_update(id_key.creatorid, id_key.id);
1016 			continue;
1017 		}
1018 
1019 		if (ISSET(st->state_flags, PFSTATE_ACK))
1020 			pfsync_deferred(st, 1);
1021 
1022 		sk = st->key[PF_SK_WIRE]; /* XXX right one? */
1023 		sfail = 0;
1024 		if (sk->proto == IPPROTO_TCP)
1025 			sfail = pfsync_upd_tcp(st, &up->src, &up->dst);
1026 		else {
1027 			/*
1028 			 * Non-TCP protocol state machine always go forwards
1029 			 */
1030 			if (st->src.state > up->src.state)
1031 				sfail = 5;
1032 			else if (st->dst.state > up->dst.state)
1033 				sfail = 6;
1034 		}
1035 
1036 		if (sfail) {
1037 			if (pf_status.debug >= PF_DEBUG_NOISY) {
1038 				printf("pfsync: ignoring stale update "
1039 				    "(%d) id: %016llx "
1040 				    "creatorid: %08x\n", sfail,
1041 				    betoh64(st->id),
1042 				    ntohl(st->creatorid));
1043 			}
1044 			pfsyncstats.pfsyncs_stale++;
1045 
1046 			pfsync_update_state(st);
1047 			schednetisr(NETISR_PFSYNC);
1048 			continue;
1049 		}
1050 		pfsync_alloc_scrub_memory(&up->dst, &st->dst);
1051 		pf_state_peer_ntoh(&up->src, &st->src);
1052 		pf_state_peer_ntoh(&up->dst, &st->dst);
1053 		st->expire = ntohl(up->expire) + time_second;
1054 		st->timeout = up->timeout;
1055 		st->pfsync_time = time_uptime;
1056 	}
1057 	splx(s);
1058 
1059 	return (len);
1060 }
1061 
1062 int
1063 pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1064 {
1065 	struct pfsync_upd_req *ur, *ura;
1066 	struct mbuf *mp;
1067 	int len = count * sizeof(*ur);
1068 	int i, offp;
1069 
1070 	struct pf_state_cmp id_key;
1071 	struct pf_state *st;
1072 
1073 	mp = m_pulldown(m, offset, len, &offp);
1074 	if (mp == NULL) {
1075 		pfsyncstats.pfsyncs_badlen++;
1076 		return (-1);
1077 	}
1078 	ura = (struct pfsync_upd_req *)(mp->m_data + offp);
1079 
1080 	for (i = 0; i < count; i++) {
1081 		ur = &ura[i];
1082 
1083 		bcopy(&ur->id, &id_key.id, sizeof(id_key.id));
1084 		id_key.creatorid = ur->creatorid;
1085 
1086 		if (id_key.id == 0 && id_key.creatorid == 0)
1087 			pfsync_bulk_start();
1088 		else {
1089 			st = pf_find_state_byid(&id_key);
1090 			if (st == NULL) {
1091 				pfsyncstats.pfsyncs_badstate++;
1092 				continue;
1093 			}
1094 			if (ISSET(st->state_flags, PFSTATE_NOSYNC))
1095 				continue;
1096 
1097 			pfsync_update_state_req(st);
1098 		}
1099 	}
1100 
1101 	return (len);
1102 }
1103 
1104 int
1105 pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1106 {
1107 	struct mbuf *mp;
1108 	struct pfsync_state *sa, *sp;
1109 	struct pf_state_cmp id_key;
1110 	struct pf_state *st;
1111 	int len = count * sizeof(*sp);
1112 	int offp, i;
1113 	int s;
1114 
1115 	mp = m_pulldown(m, offset, len, &offp);
1116 	if (mp == NULL) {
1117 		pfsyncstats.pfsyncs_badlen++;
1118 		return (-1);
1119 	}
1120 	sa = (struct pfsync_state *)(mp->m_data + offp);
1121 
1122 	s = splsoftnet();
1123 	for (i = 0; i < count; i++) {
1124 		sp = &sa[i];
1125 
1126 		bcopy(sp->id, &id_key.id, sizeof(id_key.id));
1127 		id_key.creatorid = sp->creatorid;
1128 
1129 		st = pf_find_state_byid(&id_key);
1130 		if (st == NULL) {
1131 			pfsyncstats.pfsyncs_badstate++;
1132 			continue;
1133 		}
1134 		SET(st->state_flags, PFSTATE_NOSYNC);
1135 		pf_unlink_state(st);
1136 	}
1137 	splx(s);
1138 
1139 	return (len);
1140 }
1141 
1142 int
1143 pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1144 {
1145 	struct mbuf *mp;
1146 	struct pfsync_del_c *sa, *sp;
1147 	struct pf_state_cmp id_key;
1148 	struct pf_state *st;
1149 	int len = count * sizeof(*sp);
1150 	int offp, i;
1151 	int s;
1152 
1153 	mp = m_pulldown(m, offset, len, &offp);
1154 	if (mp == NULL) {
1155 		pfsyncstats.pfsyncs_badlen++;
1156 		return (-1);
1157 	}
1158 	sa = (struct pfsync_del_c *)(mp->m_data + offp);
1159 
1160 	s = splsoftnet();
1161 	for (i = 0; i < count; i++) {
1162 		sp = &sa[i];
1163 
1164 		bcopy(&sp->id, &id_key.id, sizeof(id_key.id));
1165 		id_key.creatorid = sp->creatorid;
1166 
1167 		st = pf_find_state_byid(&id_key);
1168 		if (st == NULL) {
1169 			pfsyncstats.pfsyncs_badstate++;
1170 			continue;
1171 		}
1172 
1173 		SET(st->state_flags, PFSTATE_NOSYNC);
1174 		pf_unlink_state(st);
1175 	}
1176 	splx(s);
1177 
1178 	return (len);
1179 }
1180 
1181 int
1182 pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1183 {
1184 	struct pfsync_softc *sc = pfsyncif;
1185 	struct pfsync_bus *bus;
1186 	struct mbuf *mp;
1187 	int len = count * sizeof(*bus);
1188 	int offp;
1189 
1190 	/* If we're not waiting for a bulk update, who cares. */
1191 	if (sc->sc_ureq_sent == 0)
1192 		return (len);
1193 
1194 	mp = m_pulldown(m, offset, len, &offp);
1195 	if (mp == NULL) {
1196 		pfsyncstats.pfsyncs_badlen++;
1197 		return (-1);
1198 	}
1199 	bus = (struct pfsync_bus *)(mp->m_data + offp);
1200 
1201 	switch (bus->status) {
1202 	case PFSYNC_BUS_START:
1203 		timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1204 		    pf_pool_limits[PF_LIMIT_STATES].limit /
1205 		    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1206 		    sizeof(struct pfsync_state)));
1207 		if (pf_status.debug >= PF_DEBUG_MISC)
1208 			printf("pfsync: received bulk update start\n");
1209 		break;
1210 
1211 	case PFSYNC_BUS_END:
1212 		if (time_uptime - ntohl(bus->endtime) >=
1213 		    sc->sc_ureq_sent) {
1214 			/* that's it, we're happy */
1215 			sc->sc_ureq_sent = 0;
1216 			sc->sc_bulk_tries = 0;
1217 			timeout_del(&sc->sc_bulkfail_tmo);
1218 #if NCARP > 0
1219 			if (!pfsync_sync_ok)
1220 				carp_group_demote_adj(&sc->sc_if, -1);
1221 #endif
1222 			pfsync_sync_ok = 1;
1223 			if (pf_status.debug >= PF_DEBUG_MISC)
1224 				printf("pfsync: received valid "
1225 				    "bulk update end\n");
1226 		} else {
1227 			if (pf_status.debug >= PF_DEBUG_MISC)
1228 				printf("pfsync: received invalid "
1229 				    "bulk update end: bad timestamp\n");
1230 		}
1231 		break;
1232 	}
1233 
1234 	return (len);
1235 }
1236 
1237 int
1238 pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1239 {
1240 	int len = count * sizeof(struct pfsync_tdb);
1241 
1242 #if defined(IPSEC)
1243 	struct pfsync_tdb *tp;
1244 	struct mbuf *mp;
1245 	int offp;
1246 	int i;
1247 	int s;
1248 
1249 	mp = m_pulldown(m, offset, len, &offp);
1250 	if (mp == NULL) {
1251 		pfsyncstats.pfsyncs_badlen++;
1252 		return (-1);
1253 	}
1254 	tp = (struct pfsync_tdb *)(mp->m_data + offp);
1255 
1256 	s = splsoftnet();
1257 	for (i = 0; i < count; i++)
1258 		pfsync_update_net_tdb(&tp[i]);
1259 	splx(s);
1260 #endif
1261 
1262 	return (len);
1263 }
1264 
1265 #if defined(IPSEC)
1266 /* Update an in-kernel tdb. Silently fail if no tdb is found. */
1267 void
1268 pfsync_update_net_tdb(struct pfsync_tdb *pt)
1269 {
1270 	struct tdb		*tdb;
1271 	int			 s;
1272 
1273 	/* check for invalid values */
1274 	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
1275 	    (pt->dst.sa.sa_family != AF_INET &&
1276 	     pt->dst.sa.sa_family != AF_INET6))
1277 		goto bad;
1278 
1279 	s = spltdb();
1280 	tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
1281 	if (tdb) {
1282 		pt->rpl = ntohl(pt->rpl);
1283 		pt->cur_bytes = betoh64(pt->cur_bytes);
1284 
1285 		/* Neither replay nor byte counter should ever decrease. */
1286 		if (pt->rpl < tdb->tdb_rpl ||
1287 		    pt->cur_bytes < tdb->tdb_cur_bytes) {
1288 			splx(s);
1289 			goto bad;
1290 		}
1291 
1292 		tdb->tdb_rpl = pt->rpl;
1293 		tdb->tdb_cur_bytes = pt->cur_bytes;
1294 	}
1295 	splx(s);
1296 	return;
1297 
1298  bad:
1299 	if (pf_status.debug >= PF_DEBUG_MISC)
1300 		printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
1301 		    "invalid value\n");
1302 	pfsyncstats.pfsyncs_badstate++;
1303 	return;
1304 }
1305 #endif
1306 
1307 
1308 int
1309 pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1310 {
1311 	/* check if we are at the right place in the packet */
1312 	if (offset != m->m_pkthdr.len)
1313 		pfsyncstats.pfsyncs_badlen++;
1314 
1315 	/* we're done. free and let the caller return */
1316 	m_freem(m);
1317 	return (-1);
1318 }
1319 
1320 int
1321 pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1322 {
1323 	pfsyncstats.pfsyncs_badact++;
1324 
1325 	m_freem(m);
1326 	return (-1);
1327 }
1328 
1329 int
1330 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1331 	struct rtentry *rt)
1332 {
1333 	m_freem(m);
1334 	return (0);
1335 }
1336 
1337 /* ARGSUSED */
1338 int
1339 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1340 {
1341 	struct proc *p = curproc;
1342 	struct pfsync_softc *sc = ifp->if_softc;
1343 	struct ifreq *ifr = (struct ifreq *)data;
1344 	struct ip_moptions *imo = &sc->sc_imo;
1345 	struct pfsyncreq pfsyncr;
1346 	struct ifnet    *sifp;
1347 	struct ip *ip;
1348 	int s, error;
1349 
1350 	switch (cmd) {
1351 #if 0
1352 	case SIOCSIFADDR:
1353 	case SIOCAIFADDR:
1354 	case SIOCSIFDSTADDR:
1355 #endif
1356 	case SIOCSIFFLAGS:
1357 		if (ifp->if_flags & IFF_UP)
1358 			ifp->if_flags |= IFF_RUNNING;
1359 		else {
1360 			ifp->if_flags &= ~IFF_RUNNING;
1361 
1362 			/* drop everything */
1363 			timeout_del(&sc->sc_tmo);
1364 			pfsync_drop(sc);
1365 
1366 			/* cancel bulk update */
1367 			timeout_del(&sc->sc_bulk_tmo);
1368 			sc->sc_bulk_next = NULL;
1369 			sc->sc_bulk_last = NULL;
1370 		}
1371 		break;
1372 	case SIOCSIFMTU:
1373 		if (ifr->ifr_mtu <= PFSYNC_MINPKT)
1374 			return (EINVAL);
1375 		if (ifr->ifr_mtu > MCLBYTES) /* XXX could be bigger */
1376 			ifr->ifr_mtu = MCLBYTES;
1377 		if (ifr->ifr_mtu < ifp->if_mtu)
1378 			pfsync_sendout();
1379 		ifp->if_mtu = ifr->ifr_mtu;
1380 		break;
1381 	case SIOCGETPFSYNC:
1382 		bzero(&pfsyncr, sizeof(pfsyncr));
1383 		if (sc->sc_sync_if) {
1384 			strlcpy(pfsyncr.pfsyncr_syncdev,
1385 			    sc->sc_sync_if->if_xname, IFNAMSIZ);
1386 		}
1387 		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1388 		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1389 		return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
1390 
1391 	case SIOCSETPFSYNC:
1392 		if ((error = suser(p, p->p_acflag)) != 0)
1393 			return (error);
1394 		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
1395 			return (error);
1396 
1397 		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1398 			sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1399 		else
1400 			sc->sc_sync_peer.s_addr =
1401 			    pfsyncr.pfsyncr_syncpeer.s_addr;
1402 
1403 		if (pfsyncr.pfsyncr_maxupdates > 255)
1404 			return (EINVAL);
1405 		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1406 
1407 		if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1408 			sc->sc_sync_if = NULL;
1409 			if (imo->imo_num_memberships > 0) {
1410 				in_delmulti(imo->imo_membership[
1411 				    --imo->imo_num_memberships]);
1412 				imo->imo_multicast_ifp = NULL;
1413 			}
1414 			break;
1415 		}
1416 
1417 		if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL)
1418 			return (EINVAL);
1419 
1420 		if (sifp->if_mtu < sc->sc_if.if_mtu ||
1421 		    (sc->sc_sync_if != NULL &&
1422 		    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
1423 		    sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1424 			pfsync_sendout();
1425 		sc->sc_sync_if = sifp;
1426 
1427 		if (imo->imo_num_memberships > 0) {
1428 			in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1429 			imo->imo_multicast_ifp = NULL;
1430 		}
1431 
1432 		if (sc->sc_sync_if &&
1433 		    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1434 			struct in_addr addr;
1435 
1436 			if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) {
1437 				sc->sc_sync_if = NULL;
1438 				splx(s);
1439 				return (EADDRNOTAVAIL);
1440 			}
1441 
1442 			addr.s_addr = INADDR_PFSYNC_GROUP;
1443 
1444 			if ((imo->imo_membership[0] =
1445 			    in_addmulti(&addr, sc->sc_sync_if)) == NULL) {
1446 				sc->sc_sync_if = NULL;
1447 				splx(s);
1448 				return (ENOBUFS);
1449 			}
1450 			imo->imo_num_memberships++;
1451 			imo->imo_multicast_ifp = sc->sc_sync_if;
1452 			imo->imo_multicast_ttl = PFSYNC_DFLTTL;
1453 			imo->imo_multicast_loop = 0;
1454 		}
1455 
1456 		ip = &sc->sc_template;
1457 		bzero(ip, sizeof(*ip));
1458 		ip->ip_v = IPVERSION;
1459 		ip->ip_hl = sizeof(sc->sc_template) >> 2;
1460 		ip->ip_tos = IPTOS_LOWDELAY;
1461 		/* len and id are set later */
1462 		ip->ip_off = htons(IP_DF);
1463 		ip->ip_ttl = PFSYNC_DFLTTL;
1464 		ip->ip_p = IPPROTO_PFSYNC;
1465 		ip->ip_src.s_addr = INADDR_ANY;
1466 		ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
1467 
1468 		if (sc->sc_sync_if) {
1469 			/* Request a full state table update. */
1470 			sc->sc_ureq_sent = time_uptime;
1471 #if NCARP > 0
1472 			if (pfsync_sync_ok)
1473 				carp_group_demote_adj(&sc->sc_if, 1);
1474 #endif
1475 			pfsync_sync_ok = 0;
1476 			if (pf_status.debug >= PF_DEBUG_MISC)
1477 				printf("pfsync: requesting bulk update\n");
1478 			timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1479 			    pf_pool_limits[PF_LIMIT_STATES].limit /
1480 			    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1481 			    sizeof(struct pfsync_state)));
1482 			pfsync_request_update(0, 0);
1483 		}
1484 
1485 		break;
1486 
1487 	default:
1488 		return (ENOTTY);
1489 	}
1490 
1491 	return (0);
1492 }
1493 
1494 void
1495 pfsync_out_state(struct pf_state *st, void *buf)
1496 {
1497 	struct pfsync_state *sp = buf;
1498 
1499 	pfsync_state_export(sp, st);
1500 }
1501 
1502 void
1503 pfsync_out_iack(struct pf_state *st, void *buf)
1504 {
1505 	struct pfsync_ins_ack *iack = buf;
1506 
1507 	iack->id = st->id;
1508 	iack->creatorid = st->creatorid;
1509 }
1510 
1511 void
1512 pfsync_out_upd_c(struct pf_state *st, void *buf)
1513 {
1514 	struct pfsync_upd_c *up = buf;
1515 
1516 	up->id = st->id;
1517 	pf_state_peer_hton(&st->src, &up->src);
1518 	pf_state_peer_hton(&st->dst, &up->dst);
1519 	up->creatorid = st->creatorid;
1520 
1521 	up->expire = pf_state_expires(st);
1522 	if (up->expire <= time_second)
1523 		up->expire = htonl(0);
1524 	else
1525 		up->expire = htonl(up->expire - time_second);
1526 	up->timeout = st->timeout;
1527 
1528 	bzero(up->_pad, sizeof(up->_pad)); /* XXX */
1529 }
1530 
1531 void
1532 pfsync_out_del(struct pf_state *st, void *buf)
1533 {
1534 	struct pfsync_del_c *dp = buf;
1535 
1536 	dp->id = st->id;
1537 	dp->creatorid = st->creatorid;
1538 
1539 	SET(st->state_flags, PFSTATE_NOSYNC);
1540 }
1541 
1542 void
1543 pfsync_drop(struct pfsync_softc *sc)
1544 {
1545 	struct pf_state *st;
1546 	struct pfsync_upd_req_item *ur;
1547 	struct tdb *t;
1548 	int q;
1549 
1550 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1551 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
1552 			continue;
1553 
1554 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
1555 #ifdef PFSYNC_DEBUG
1556 			KASSERT(st->sync_state == q);
1557 #endif
1558 			st->sync_state = PFSYNC_S_NONE;
1559 		}
1560 		TAILQ_INIT(&sc->sc_qs[q]);
1561 	}
1562 
1563 	while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1564 		TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1565 		pool_put(&sc->sc_pool, ur);
1566 	}
1567 
1568 	sc->sc_plus = NULL;
1569 
1570 	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
1571 		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry)
1572 			CLR(t->tdb_flags, TDBF_PFSYNC);
1573 
1574 		TAILQ_INIT(&sc->sc_tdb_q);
1575 	}
1576 
1577 	sc->sc_len = PFSYNC_MINPKT;
1578 }
1579 
1580 void
1581 pfsync_sendout(void)
1582 {
1583 	struct pfsync_softc *sc = pfsyncif;
1584 #if NBPFILTER > 0
1585 	struct ifnet *ifp = &sc->sc_if;
1586 #endif
1587 	struct mbuf *m;
1588 	struct ip *ip;
1589 	struct pfsync_header *ph;
1590 	struct pfsync_subheader *subh;
1591 	struct pf_state *st;
1592 	struct pfsync_upd_req_item *ur;
1593 	struct tdb *t;
1594 
1595 	int offset;
1596 	int q, count = 0;
1597 
1598 	if (sc == NULL || sc->sc_len == PFSYNC_MINPKT)
1599 		return;
1600 
1601 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
1602 #if NBPFILTER > 0
1603 	    (ifp->if_bpf == NULL && sc->sc_sync_if == NULL)) {
1604 #else
1605 	    sc->sc_sync_if == NULL) {
1606 #endif
1607 		pfsync_drop(sc);
1608 		return;
1609 	}
1610 
1611 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1612 	if (m == NULL) {
1613 		sc->sc_if.if_oerrors++;
1614 		pfsyncstats.pfsyncs_onomem++;
1615 		pfsync_drop(sc);
1616 		return;
1617 	}
1618 
1619 	if (max_linkhdr + sc->sc_len > MHLEN) {
1620 		MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len);
1621 		if (!ISSET(m->m_flags, M_EXT)) {
1622 			m_free(m);
1623 			sc->sc_if.if_oerrors++;
1624 			pfsyncstats.pfsyncs_onomem++;
1625 			pfsync_drop(sc);
1626 			return;
1627 		}
1628 	}
1629 	m->m_data += max_linkhdr;
1630 	m->m_len = m->m_pkthdr.len = sc->sc_len;
1631 
1632 	/* build the ip header */
1633 	ip = (struct ip *)m->m_data;
1634 	bcopy(&sc->sc_template, ip, sizeof(*ip));
1635 	offset = sizeof(*ip);
1636 
1637 	ip->ip_len = htons(m->m_pkthdr.len);
1638 	ip->ip_id = htons(ip_randomid());
1639 
1640 	/* build the pfsync header */
1641 	ph = (struct pfsync_header *)(m->m_data + offset);
1642 	bzero(ph, sizeof(*ph));
1643 	offset += sizeof(*ph);
1644 
1645 	ph->version = PFSYNC_VERSION;
1646 	ph->len = htons(sc->sc_len - sizeof(*ip));
1647 	bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
1648 
1649 	/* walk the queues */
1650 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1651 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
1652 			continue;
1653 
1654 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1655 		offset += sizeof(*subh);
1656 
1657 		count = 0;
1658 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
1659 #ifdef PFSYNC_DEBUG
1660 			KASSERT(st->sync_state == q);
1661 #endif
1662 			pfsync_qs[q].write(st, m->m_data + offset);
1663 			offset += pfsync_qs[q].len;
1664 
1665 			st->sync_state = PFSYNC_S_NONE;
1666 			count++;
1667 		}
1668 		TAILQ_INIT(&sc->sc_qs[q]);
1669 
1670 		bzero(subh, sizeof(*subh));
1671 		subh->action = pfsync_qs[q].action;
1672 		subh->count = htons(count);
1673 	}
1674 
1675 	if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
1676 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1677 		offset += sizeof(*subh);
1678 
1679 		count = 0;
1680 		while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1681 			TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1682 
1683 			bcopy(&ur->ur_msg, m->m_data + offset,
1684 			    sizeof(ur->ur_msg));
1685 			offset += sizeof(ur->ur_msg);
1686 
1687 			pool_put(&sc->sc_pool, ur);
1688 
1689 			count++;
1690 		}
1691 
1692 		bzero(subh, sizeof(*subh));
1693 		subh->action = PFSYNC_ACT_UPD_REQ;
1694 		subh->count = htons(count);
1695 	}
1696 
1697 	/* has someone built a custom region for us to add? */
1698 	if (sc->sc_plus != NULL) {
1699 		bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
1700 		offset += sc->sc_pluslen;
1701 
1702 		sc->sc_plus = NULL;
1703 	}
1704 
1705 	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
1706 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1707 		offset += sizeof(*subh);
1708 
1709 		count = 0;
1710 		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) {
1711 			pfsync_out_tdb(t, m->m_data + offset);
1712 			offset += sizeof(struct pfsync_tdb);
1713 			CLR(t->tdb_flags, TDBF_PFSYNC);
1714 
1715 			count++;
1716 		}
1717 		TAILQ_INIT(&sc->sc_tdb_q);
1718 
1719 		bzero(subh, sizeof(*subh));
1720 		subh->action = PFSYNC_ACT_TDB;
1721 		subh->count = htons(count);
1722 	}
1723 
1724 	subh = (struct pfsync_subheader *)(m->m_data + offset);
1725 	offset += sizeof(*subh);
1726 
1727 	bzero(subh, sizeof(*subh));
1728 	subh->action = PFSYNC_ACT_EOF;
1729 	subh->count = htons(1);
1730 
1731 	/* we're done, let's put it on the wire */
1732 #if NBPFILTER > 0
1733 	if (ifp->if_bpf) {
1734 		m->m_data += sizeof(*ip);
1735 		m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
1736 		bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1737 		m->m_data -= sizeof(*ip);
1738 		m->m_len = m->m_pkthdr.len = sc->sc_len;
1739 	}
1740 
1741 	if (sc->sc_sync_if == NULL) {
1742 		sc->sc_len = PFSYNC_MINPKT;
1743 		m_freem(m);
1744 		return;
1745 	}
1746 #endif
1747 
1748 	sc->sc_if.if_opackets++;
1749 	sc->sc_if.if_obytes += m->m_pkthdr.len;
1750 
1751 	if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) == 0)
1752 		pfsyncstats.pfsyncs_opackets++;
1753 	else
1754 		pfsyncstats.pfsyncs_oerrors++;
1755 
1756 	/* start again */
1757 	sc->sc_len = PFSYNC_MINPKT;
1758 }
1759 
1760 void
1761 pfsync_insert_state(struct pf_state *st)
1762 {
1763 	struct pfsync_softc *sc = pfsyncif;
1764 
1765 	splsoftassert(IPL_SOFTNET);
1766 
1767 	if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) ||
1768 	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
1769 		SET(st->state_flags, PFSTATE_NOSYNC);
1770 		return;
1771 	}
1772 
1773 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
1774 	    ISSET(st->state_flags, PFSTATE_NOSYNC))
1775 		return;
1776 
1777 #ifdef PFSYNC_DEBUG
1778 	KASSERT(st->sync_state == PFSYNC_S_NONE);
1779 #endif
1780 
1781 	if (sc->sc_len == PFSYNC_MINPKT)
1782 		timeout_add_sec(&sc->sc_tmo, 1);
1783 
1784 	pfsync_q_ins(st, PFSYNC_S_INS);
1785 
1786 	if (ISSET(st->state_flags, PFSTATE_ACK))
1787 		schednetisr(NETISR_PFSYNC);
1788 	else
1789 		st->sync_updates = 0;
1790 }
1791 
1792 int defer = 10;
1793 
1794 int
1795 pfsync_defer(struct pf_state *st, struct mbuf *m)
1796 {
1797 	return (0);
1798 #ifdef notyet
1799 	struct pfsync_softc *sc = pfsyncif;
1800 	struct pfsync_deferral *pd;
1801 
1802 	splsoftassert(IPL_SOFTNET);
1803 
1804 	if (sc->sc_deferred >= 128)
1805 		pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
1806 
1807 	pd = pool_get(&sc->sc_pool, M_NOWAIT);
1808 	if (pd == NULL)
1809 		return (0);
1810 	sc->sc_deferred++;
1811 
1812 	m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
1813 	SET(st->state_flags, PFSTATE_ACK);
1814 
1815 	pd->pd_st = st;
1816 	pd->pd_m = m;
1817 
1818 	TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
1819 	timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd);
1820 	timeout_add(&pd->pd_tmo, defer);
1821 
1822 	return (1);
1823 #endif
1824 }
1825 
1826 void
1827 pfsync_undefer(struct pfsync_deferral *pd, int drop)
1828 {
1829 	struct pfsync_softc *sc = pfsyncif;
1830 
1831 	splsoftassert(IPL_SOFTNET);
1832 
1833 	TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
1834 	sc->sc_deferred--;
1835 
1836 	CLR(pd->pd_st->state_flags, PFSTATE_ACK);
1837 	timeout_del(&pd->pd_tmo); /* bah */
1838 	if (drop)
1839 		m_freem(pd->pd_m);
1840 	else {
1841 		ip_output(pd->pd_m, (void *)NULL, (void *)NULL, 0,
1842 		    (void *)NULL, (void *)NULL);
1843 	}
1844 
1845 	pool_put(&sc->sc_pool, pd);
1846 }
1847 
1848 void
1849 pfsync_defer_tmo(void *arg)
1850 {
1851 	int s;
1852 
1853 	s = splsoftnet();
1854 	pfsync_undefer(arg, 0);
1855 	splx(s);
1856 }
1857 
1858 void
1859 pfsync_deferred(struct pf_state *st, int drop)
1860 {
1861 	struct pfsync_softc *sc = pfsyncif;
1862 	struct pfsync_deferral *pd;
1863 
1864 	TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
1865 		 if (pd->pd_st == st) {
1866 			pfsync_undefer(pd, drop);
1867 			return;
1868 		}
1869 	}
1870 
1871 	panic("pfsync_send_deferred: unable to find deferred state");
1872 }
1873 
1874 u_int pfsync_upds = 0;
1875 
1876 void
1877 pfsync_update_state(struct pf_state *st)
1878 {
1879 	struct pfsync_softc *sc = pfsyncif;
1880 	int sync = 0;
1881 
1882 	splsoftassert(IPL_SOFTNET);
1883 
1884 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
1885 		return;
1886 
1887 	if (ISSET(st->state_flags, PFSTATE_ACK))
1888 		pfsync_deferred(st, 0);
1889 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1890 		if (st->sync_state != PFSYNC_S_NONE)
1891 			pfsync_q_del(st);
1892 		return;
1893 	}
1894 
1895 	if (sc->sc_len == PFSYNC_MINPKT)
1896 		timeout_add_sec(&sc->sc_tmo, 1);
1897 
1898 	switch (st->sync_state) {
1899 	case PFSYNC_S_UPD_C:
1900 	case PFSYNC_S_UPD:
1901 	case PFSYNC_S_INS:
1902 		/* we're already handling it */
1903 
1904 		st->sync_updates++;
1905 		if (st->sync_updates >= sc->sc_maxupdates)
1906 			sync = 1;
1907 		break;
1908 
1909 	case PFSYNC_S_IACK:
1910 		pfsync_q_del(st);
1911 	case PFSYNC_S_NONE:
1912 		pfsync_q_ins(st, PFSYNC_S_UPD_C);
1913 		st->sync_updates = 0;
1914 		break;
1915 
1916 	default:
1917 		panic("pfsync_update_state: unexpected sync state %d",
1918 		    st->sync_state);
1919 	}
1920 
1921 	if (sync || (time_uptime - st->pfsync_time) < 2) {
1922 		pfsync_upds++;
1923 		schednetisr(NETISR_PFSYNC);
1924 	}
1925 }
1926 
1927 void
1928 pfsync_request_update(u_int32_t creatorid, u_int64_t id)
1929 {
1930 	struct pfsync_softc *sc = pfsyncif;
1931 	struct pfsync_upd_req_item *item;
1932 	size_t nlen = sizeof(struct pfsync_upd_req);
1933 
1934 	/*
1935 	 * this code does nothing to prevent multiple update requests for the
1936 	 * same state being generated.
1937 	 */
1938 
1939 	item = pool_get(&sc->sc_pool, PR_NOWAIT);
1940 	if (item == NULL) {
1941 		/* XXX stats */
1942 		return;
1943 	}
1944 
1945 	item->ur_msg.id = id;
1946 	item->ur_msg.creatorid = creatorid;
1947 
1948 	if (TAILQ_EMPTY(&sc->sc_upd_req_list))
1949 		nlen += sizeof(struct pfsync_subheader);
1950 
1951 	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
1952 		pfsync_sendout();
1953 
1954 		nlen = sizeof(struct pfsync_subheader) +
1955 		    sizeof(struct pfsync_upd_req);
1956 	}
1957 
1958 	TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
1959 	sc->sc_len += nlen;
1960 
1961 	schednetisr(NETISR_PFSYNC);
1962 }
1963 
1964 void
1965 pfsync_update_state_req(struct pf_state *st)
1966 {
1967 	struct pfsync_softc *sc = pfsyncif;
1968 
1969 	if (sc == NULL)
1970 		panic("pfsync_update_state_req: nonexistant instance");
1971 
1972 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1973 		if (st->sync_state != PFSYNC_S_NONE)
1974 			pfsync_q_del(st);
1975 		return;
1976 	}
1977 
1978 	switch (st->sync_state) {
1979 	case PFSYNC_S_UPD_C:
1980 	case PFSYNC_S_IACK:
1981 		pfsync_q_del(st);
1982 	case PFSYNC_S_NONE:
1983 		pfsync_q_ins(st, PFSYNC_S_UPD);
1984 		schednetisr(NETISR_PFSYNC);
1985 		return;
1986 
1987 	case PFSYNC_S_INS:
1988 	case PFSYNC_S_UPD:
1989 	case PFSYNC_S_DEL:
1990 		/* we're already handling it */
1991 		return;
1992 
1993 	default:
1994 		panic("pfsync_update_state_req: unexpected sync state %d",
1995 		    st->sync_state);
1996 	}
1997 }
1998 
1999 void
2000 pfsync_delete_state(struct pf_state *st)
2001 {
2002 	struct pfsync_softc *sc = pfsyncif;
2003 
2004 	splsoftassert(IPL_SOFTNET);
2005 
2006 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2007 		return;
2008 
2009 	if (ISSET(st->state_flags, PFSTATE_ACK))
2010 		pfsync_deferred(st, 1);
2011 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
2012 		if (st->sync_state != PFSYNC_S_NONE)
2013 			pfsync_q_del(st);
2014 		return;
2015 	}
2016 
2017 	if (sc->sc_len == PFSYNC_MINPKT)
2018 		timeout_add_sec(&sc->sc_tmo, 1);
2019 
2020 	switch (st->sync_state) {
2021 	case PFSYNC_S_INS:
2022 		/* we never got to tell the world so just forget about it */
2023 		pfsync_q_del(st);
2024 		return;
2025 
2026 	case PFSYNC_S_UPD_C:
2027 	case PFSYNC_S_UPD:
2028 	case PFSYNC_S_IACK:
2029 		pfsync_q_del(st);
2030 		/* FALLTHROUGH to putting it on the del list */
2031 
2032 	case PFSYNC_S_NONE:
2033 		pfsync_q_ins(st, PFSYNC_S_DEL);
2034 		return;
2035 
2036 	default:
2037 		panic("pfsync_delete_state: unexpected sync state %d",
2038 		    st->sync_state);
2039 	}
2040 }
2041 
2042 void
2043 pfsync_clear_states(u_int32_t creatorid, const char *ifname)
2044 {
2045 	struct pfsync_softc *sc = pfsyncif;
2046 	struct {
2047 		struct pfsync_subheader subh;
2048 		struct pfsync_clr clr;
2049 	} __packed r;
2050 
2051 	splsoftassert(IPL_SOFTNET);
2052 
2053 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2054 		return;
2055 
2056 	bzero(&r, sizeof(r));
2057 
2058 	r.subh.action = PFSYNC_ACT_CLR;
2059 	r.subh.count = htons(1);
2060 
2061 	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
2062 	r.clr.creatorid = creatorid;
2063 
2064 	pfsync_send_plus(&r, sizeof(r));
2065 }
2066 
2067 void
2068 pfsync_q_ins(struct pf_state *st, int q)
2069 {
2070 	struct pfsync_softc *sc = pfsyncif;
2071 	size_t nlen = pfsync_qs[q].len;
2072 
2073 	KASSERT(st->sync_state == PFSYNC_S_NONE);
2074 
2075 #if 1 || defined(PFSYNC_DEBUG)
2076 	if (sc->sc_len < PFSYNC_MINPKT)
2077 		panic("pfsync pkt len is too low %d", sc->sc_len);
2078 #endif
2079 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2080 		nlen += sizeof(struct pfsync_subheader);
2081 
2082 	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2083 		pfsync_sendout();
2084 
2085 		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
2086 	}
2087 
2088 	sc->sc_len += nlen;
2089 	TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
2090 	st->sync_state = q;
2091 }
2092 
2093 void
2094 pfsync_q_del(struct pf_state *st)
2095 {
2096 	struct pfsync_softc *sc = pfsyncif;
2097 	int q = st->sync_state;
2098 
2099 	KASSERT(st->sync_state != PFSYNC_S_NONE);
2100 
2101 	sc->sc_len -= pfsync_qs[q].len;
2102 	TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
2103 	st->sync_state = PFSYNC_S_NONE;
2104 
2105 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2106 		sc->sc_len -= sizeof(struct pfsync_subheader);
2107 }
2108 
2109 void
2110 pfsync_update_tdb(struct tdb *t, int output)
2111 {
2112 	struct pfsync_softc *sc = pfsyncif;
2113 	size_t nlen = sizeof(struct pfsync_tdb);
2114 
2115 	if (sc == NULL)
2116 		return;
2117 
2118 	if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) {
2119 		if (TAILQ_EMPTY(&sc->sc_tdb_q))
2120 			nlen += sizeof(struct pfsync_subheader);
2121 
2122 		if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2123 			pfsync_sendout();
2124 
2125 			nlen = sizeof(struct pfsync_subheader) +
2126 			    sizeof(struct pfsync_tdb);
2127 		}
2128 
2129 		sc->sc_len += nlen;
2130 		TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
2131 		SET(t->tdb_flags, TDBF_PFSYNC);
2132 		t->tdb_updates = 0;
2133 	} else {
2134 		if (++t->tdb_updates >= sc->sc_maxupdates)
2135 			schednetisr(NETISR_PFSYNC);
2136 	}
2137 
2138 	if (output)
2139 		SET(t->tdb_flags, TDBF_PFSYNC_RPL);
2140 	else
2141 		CLR(t->tdb_flags, TDBF_PFSYNC_RPL);
2142 }
2143 
2144 void
2145 pfsync_delete_tdb(struct tdb *t)
2146 {
2147 	struct pfsync_softc *sc = pfsyncif;
2148 
2149 	if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC))
2150 		return;
2151 
2152 	sc->sc_len -= sizeof(struct pfsync_tdb);
2153 	TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
2154 	CLR(t->tdb_flags, TDBF_PFSYNC);
2155 
2156 	if (TAILQ_EMPTY(&sc->sc_tdb_q))
2157 		sc->sc_len -= sizeof(struct pfsync_subheader);
2158 }
2159 
2160 void
2161 pfsync_out_tdb(struct tdb *t, void *buf)
2162 {
2163 	struct pfsync_tdb *ut = buf;
2164 
2165 	bzero(ut, sizeof(*ut));
2166 	ut->spi = t->tdb_spi;
2167 	bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst));
2168 	/*
2169 	 * When a failover happens, the master's rpl is probably above
2170 	 * what we see here (we may be up to a second late), so
2171 	 * increase it a bit for outbound tdbs to manage most such
2172 	 * situations.
2173 	 *
2174 	 * For now, just add an offset that is likely to be larger
2175 	 * than the number of packets we can see in one second. The RFC
2176 	 * just says the next packet must have a higher seq value.
2177 	 *
2178 	 * XXX What is a good algorithm for this? We could use
2179 	 * a rate-determined increase, but to know it, we would have
2180 	 * to extend struct tdb.
2181 	 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
2182 	 * will soon be replaced anyway. For now, just don't handle
2183 	 * this edge case.
2184 	 */
2185 #define RPL_INCR 16384
2186 	ut->rpl = htonl(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ?
2187 	    RPL_INCR : 0));
2188 	ut->cur_bytes = htobe64(t->tdb_cur_bytes);
2189 	ut->sproto = t->tdb_sproto;
2190 }
2191 
2192 void
2193 pfsync_bulk_start(void)
2194 {
2195 	struct pfsync_softc *sc = pfsyncif;
2196 
2197 	sc->sc_ureq_received = time_uptime;
2198 
2199 	if (sc->sc_bulk_next == NULL)
2200 		sc->sc_bulk_next = TAILQ_FIRST(&state_list);
2201 	sc->sc_bulk_last = sc->sc_bulk_next;
2202 
2203 	if (pf_status.debug >= PF_DEBUG_MISC)
2204 		printf("pfsync: received bulk update request\n");
2205 
2206 	pfsync_bulk_status(PFSYNC_BUS_START);
2207 	pfsync_bulk_update(sc);
2208 }
2209 
2210 void
2211 pfsync_bulk_update(void *arg)
2212 {
2213 	struct pfsync_softc *sc = arg;
2214 	struct pf_state *st = sc->sc_bulk_next;
2215 	int i = 0;
2216 	int s;
2217 
2218 	s = splsoftnet();
2219 	do {
2220 		if (st->sync_state == PFSYNC_S_NONE &&
2221 		    st->timeout < PFTM_MAX &&
2222 		    st->pfsync_time <= sc->sc_ureq_received) {
2223 			pfsync_update_state_req(st);
2224 			i++;
2225 		}
2226 
2227 		st = TAILQ_NEXT(st, entry_list);
2228 		if (st == NULL)
2229 			st = TAILQ_FIRST(&state_list);
2230 
2231 		if (i > 0 && TAILQ_EMPTY(&sc->sc_qs[PFSYNC_S_UPD])) {
2232 			sc->sc_bulk_next = st;
2233 			timeout_add(&sc->sc_bulk_tmo, 1);
2234 			goto out;
2235 		}
2236 	} while (st != sc->sc_bulk_last);
2237 
2238 	/* we're done */
2239 	sc->sc_bulk_next = NULL;
2240 	sc->sc_bulk_last = NULL;
2241 	pfsync_bulk_status(PFSYNC_BUS_END);
2242 
2243 out:
2244 	splx(s);
2245 }
2246 
2247 void
2248 pfsync_bulk_status(u_int8_t status)
2249 {
2250 	struct {
2251 		struct pfsync_subheader subh;
2252 		struct pfsync_bus bus;
2253 	} __packed r;
2254 
2255 	struct pfsync_softc *sc = pfsyncif;
2256 
2257 	bzero(&r, sizeof(r));
2258 
2259 	r.subh.action = PFSYNC_ACT_BUS;
2260 	r.subh.count = htons(1);
2261 
2262 	r.bus.creatorid = pf_status.hostid;
2263 	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
2264 	r.bus.status = status;
2265 
2266 	pfsync_send_plus(&r, sizeof(r));
2267 }
2268 
2269 void
2270 pfsync_bulk_fail(void *arg)
2271 {
2272 	struct pfsync_softc *sc = arg;
2273 
2274 	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
2275 		/* Try again */
2276 		timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
2277 		pfsync_request_update(0, 0);
2278 	} else {
2279 		/* Pretend like the transfer was ok */
2280 		sc->sc_ureq_sent = 0;
2281 		sc->sc_bulk_tries = 0;
2282 #if NCARP > 0
2283 		if (!pfsync_sync_ok)
2284 			carp_group_demote_adj(&sc->sc_if, -1);
2285 #endif
2286 		pfsync_sync_ok = 1;
2287 		if (pf_status.debug >= PF_DEBUG_MISC)
2288 			printf("pfsync: failed to receive bulk update\n");
2289 	}
2290 }
2291 
2292 void
2293 pfsync_send_plus(void *plus, size_t pluslen)
2294 {
2295 	struct pfsync_softc *sc = pfsyncif;
2296 
2297 	if (sc->sc_len + pluslen > sc->sc_if.if_mtu)
2298 		pfsync_sendout();
2299 
2300 	sc->sc_plus = plus;
2301 	sc->sc_len += (sc->sc_pluslen = pluslen);
2302 
2303 	pfsync_sendout();
2304 }
2305 
2306 int
2307 pfsync_up(void)
2308 {
2309 	struct pfsync_softc *sc = pfsyncif;
2310 
2311 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2312 		return (0);
2313 
2314 	return (1);
2315 }
2316 
2317 int
2318 pfsync_state_in_use(struct pf_state *st)
2319 {
2320 	struct pfsync_softc *sc = pfsyncif;
2321 
2322 	if (sc == NULL)
2323 		return (0);
2324 
2325 	if (st->sync_state != PFSYNC_S_NONE)
2326 		return (1);
2327 
2328 	if (sc->sc_bulk_next == NULL && sc->sc_bulk_last == NULL)
2329 		return (0);
2330 
2331 	return (1);
2332 }
2333 
2334 void
2335 pfsync_timeout(void *arg)
2336 {
2337 	int s;
2338 
2339 	s = splsoftnet();
2340 	pfsync_sendout();
2341 	splx(s);
2342 }
2343 
2344 /* this is a softnet/netisr handler */
2345 void
2346 pfsyncintr(void)
2347 {
2348 	pfsync_sendout();
2349 }
2350 
2351 int
2352 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
2353     size_t newlen)
2354 {
2355 	/* All sysctl names at this level are terminal. */
2356 	if (namelen != 1)
2357 		return (ENOTDIR);
2358 
2359 	switch (name[0]) {
2360 	case PFSYNCCTL_STATS:
2361 		if (newp != NULL)
2362 			return (EPERM);
2363 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
2364 		    &pfsyncstats, sizeof(pfsyncstats)));
2365 	default:
2366 		return (ENOPROTOOPT);
2367 	}
2368 }
2369