xref: /openbsd-src/sys/net/if_pfsync.c (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1 /*	$OpenBSD: if_pfsync.c,v 1.167 2011/08/03 00:01:30 dlg Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Michael Shalayeff
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  * THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
31  *
32  * Permission to use, copy, modify, and distribute this software for any
33  * purpose with or without fee is hereby granted, provided that the above
34  * copyright notice and this permission notice appear in all copies.
35  *
36  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
37  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
38  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
39  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
41  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
42  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43  */
44 
45 #include <sys/param.h>
46 #include <sys/proc.h>
47 #include <sys/systm.h>
48 #include <sys/time.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/socket.h>
52 #include <sys/ioctl.h>
53 #include <sys/timeout.h>
54 #include <sys/kernel.h>
55 #include <sys/sysctl.h>
56 #include <sys/pool.h>
57 #include <sys/syslog.h>
58 
59 #include <net/if.h>
60 #include <net/if_types.h>
61 #include <net/route.h>
62 #include <net/bpf.h>
63 #include <net/netisr.h>
64 #include <netinet/in.h>
65 #include <netinet/if_ether.h>
66 #include <netinet/tcp.h>
67 #include <netinet/tcp_seq.h>
68 
69 #ifdef	INET
70 #include <netinet/in_systm.h>
71 #include <netinet/in_var.h>
72 #include <netinet/ip.h>
73 #include <netinet/ip_var.h>
74 #endif
75 
76 #ifdef INET6
77 #include <netinet/ip6.h>
78 #include <netinet/in_pcb.h>
79 #include <netinet/icmp6.h>
80 #include <netinet6/nd6.h>
81 #include <netinet6/ip6_divert.h>
82 #endif /* INET6 */
83 
84 #include "carp.h"
85 #if NCARP > 0
86 #include <netinet/ip_carp.h>
87 #endif
88 
89 #define PF_DEBUGNAME	"pfsync: "
90 #include <net/pfvar.h>
91 #include <net/if_pfsync.h>
92 
93 #include "bpfilter.h"
94 #include "pfsync.h"
95 
96 #define PFSYNC_MINPKT ( \
97 	sizeof(struct ip) + \
98 	sizeof(struct pfsync_header))
99 
100 int	pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
101 	    struct pfsync_state_peer *);
102 
103 int	pfsync_in_clr(caddr_t, int, int, int);
104 int	pfsync_in_iack(caddr_t, int, int, int);
105 int	pfsync_in_upd_c(caddr_t, int, int, int);
106 int	pfsync_in_ureq(caddr_t, int, int, int);
107 int	pfsync_in_del(caddr_t, int, int, int);
108 int	pfsync_in_del_c(caddr_t, int, int, int);
109 int	pfsync_in_bus(caddr_t, int, int, int);
110 int	pfsync_in_tdb(caddr_t, int, int, int);
111 int	pfsync_in_ins(caddr_t, int, int, int);
112 int	pfsync_in_upd(caddr_t, int, int, int);
113 int	pfsync_in_eof(caddr_t, int, int, int);
114 
115 int	pfsync_in_error(caddr_t, int, int, int);
116 
117 struct {
118 	int	(*in)(caddr_t, int, int, int);
119 	size_t	len;
120 } pfsync_acts[] = {
121 	/* PFSYNC_ACT_CLR */
122 	{ pfsync_in_clr,	sizeof(struct pfsync_clr) },
123 	 /* PFSYNC_ACT_OINS */
124 	{ pfsync_in_error,	0 },
125 	/* PFSYNC_ACT_INS_ACK */
126 	{ pfsync_in_iack,	sizeof(struct pfsync_ins_ack) },
127 	/* PFSYNC_ACT_OUPD */
128 	{ pfsync_in_error,	0 },
129 	/* PFSYNC_ACT_UPD_C */
130 	{ pfsync_in_upd_c,	sizeof(struct pfsync_upd_c) },
131 	/* PFSYNC_ACT_UPD_REQ */
132 	{ pfsync_in_ureq,	sizeof(struct pfsync_upd_req) },
133 	/* PFSYNC_ACT_DEL */
134 	{ pfsync_in_del,	sizeof(struct pfsync_state) },
135 	/* PFSYNC_ACT_DEL_C */
136 	{ pfsync_in_del_c,	sizeof(struct pfsync_del_c) },
137 	/* PFSYNC_ACT_INS_F */
138 	{ pfsync_in_error,	0 },
139 	/* PFSYNC_ACT_DEL_F */
140 	{ pfsync_in_error,	0 },
141 	/* PFSYNC_ACT_BUS */
142 	{ pfsync_in_bus,	sizeof(struct pfsync_bus) },
143 	/* PFSYNC_ACT_TDB */
144 	{ pfsync_in_tdb,	sizeof(struct pfsync_tdb) },
145 	/* PFSYNC_ACT_EOF */
146 	{ pfsync_in_error,	0 },
147 	/* PFSYNC_ACT_INS */
148 	{ pfsync_in_ins,	sizeof(struct pfsync_state) },
149 	/* PFSYNC_ACT_UPD */
150 	{ pfsync_in_upd,	sizeof(struct pfsync_state) }
151 };
152 
153 struct pfsync_q {
154 	void		(*write)(struct pf_state *, void *);
155 	size_t		len;
156 	u_int8_t	action;
157 };
158 
159 /* we have one of these for every PFSYNC_S_ */
160 void	pfsync_out_state(struct pf_state *, void *);
161 void	pfsync_out_iack(struct pf_state *, void *);
162 void	pfsync_out_upd_c(struct pf_state *, void *);
163 void	pfsync_out_del(struct pf_state *, void *);
164 
165 struct pfsync_q pfsync_qs[] = {
166 	{ pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
167 	{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
168 	{ pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C },
169 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
170 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD }
171 };
172 
173 void	pfsync_q_ins(struct pf_state *, int);
174 void	pfsync_q_del(struct pf_state *);
175 
176 struct pfsync_upd_req_item {
177 	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
178 	struct pfsync_upd_req			ur_msg;
179 };
180 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item);
181 
182 struct pfsync_deferral {
183 	TAILQ_ENTRY(pfsync_deferral)		 pd_entry;
184 	struct pf_state				*pd_st;
185 	struct mbuf				*pd_m;
186 	struct timeout				 pd_tmo;
187 };
188 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral);
189 
190 #define PFSYNC_PLSIZE	MAX(sizeof(struct pfsync_upd_req_item), \
191 			    sizeof(struct pfsync_deferral))
192 
193 void	pfsync_out_tdb(struct tdb *, void *);
194 
195 struct pfsync_softc {
196 	struct ifnet		 sc_if;
197 	struct ifnet		*sc_sync_if;
198 
199 	struct pool		 sc_pool;
200 
201 	struct ip_moptions	 sc_imo;
202 
203 	struct in_addr		 sc_sync_peer;
204 	u_int8_t		 sc_maxupdates;
205 
206 	struct ip		 sc_template;
207 
208 	struct pf_state_queue	 sc_qs[PFSYNC_S_COUNT];
209 	size_t			 sc_len;
210 
211 	struct pfsync_upd_reqs	 sc_upd_req_list;
212 
213 	int			 sc_defer;
214 	struct pfsync_deferrals	 sc_deferrals;
215 	u_int			 sc_deferred;
216 
217 	void			*sc_plus;
218 	size_t			 sc_pluslen;
219 
220 	u_int32_t		 sc_ureq_sent;
221 	int			 sc_bulk_tries;
222 	struct timeout		 sc_bulkfail_tmo;
223 
224 	u_int32_t		 sc_ureq_received;
225 	struct pf_state		*sc_bulk_next;
226 	struct pf_state		*sc_bulk_last;
227 	struct timeout		 sc_bulk_tmo;
228 
229 	TAILQ_HEAD(, tdb)	 sc_tdb_q;
230 
231 	struct timeout		 sc_tmo;
232 };
233 
234 struct pfsync_softc	*pfsyncif = NULL;
235 struct pfsyncstats	 pfsyncstats;
236 
237 void	pfsyncattach(int);
238 int	pfsync_clone_create(struct if_clone *, int);
239 int	pfsync_clone_destroy(struct ifnet *);
240 int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
241 	    struct pf_state_peer *);
242 void	pfsync_update_net_tdb(struct pfsync_tdb *);
243 int	pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
244 	    struct rtentry *);
245 int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
246 void	pfsyncstart(struct ifnet *);
247 
248 struct mbuf *pfsync_if_dequeue(struct ifnet *);
249 
250 void	pfsync_deferred(struct pf_state *, int);
251 void	pfsync_undefer(struct pfsync_deferral *, int);
252 void	pfsync_defer_tmo(void *);
253 
254 void	pfsync_request_full_update(struct pfsync_softc *);
255 void	pfsync_request_update(u_int32_t, u_int64_t);
256 void	pfsync_update_state_req(struct pf_state *);
257 
258 void	pfsync_drop(struct pfsync_softc *);
259 void	pfsync_sendout(void);
260 void	pfsync_send_plus(void *, size_t);
261 void	pfsync_timeout(void *);
262 void	pfsync_tdb_timeout(void *);
263 
264 void	pfsync_bulk_start(void);
265 void	pfsync_bulk_status(u_int8_t);
266 void	pfsync_bulk_update(void *);
267 void	pfsync_bulk_fail(void *);
268 
269 #define PFSYNC_MAX_BULKTRIES	12
270 int	pfsync_sync_ok;
271 
272 struct if_clone	pfsync_cloner =
273     IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
274 
275 void
276 pfsyncattach(int npfsync)
277 {
278 	if_clone_attach(&pfsync_cloner);
279 }
280 
281 int
282 pfsync_clone_create(struct if_clone *ifc, int unit)
283 {
284 	struct pfsync_softc *sc;
285 	struct ifnet *ifp;
286 	int q;
287 
288 	if (unit != 0)
289 		return (EINVAL);
290 
291 	pfsync_sync_ok = 1;
292 
293 	sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK | M_ZERO);
294 
295 	for (q = 0; q < PFSYNC_S_COUNT; q++)
296 		TAILQ_INIT(&sc->sc_qs[q]);
297 
298 	pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, 0, 0, "pfsync", NULL);
299 	TAILQ_INIT(&sc->sc_upd_req_list);
300 	TAILQ_INIT(&sc->sc_deferrals);
301 	sc->sc_deferred = 0;
302 
303 	TAILQ_INIT(&sc->sc_tdb_q);
304 
305 	sc->sc_len = PFSYNC_MINPKT;
306 	sc->sc_maxupdates = 128;
307 
308 	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
309 	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
310 	    M_WAITOK | M_ZERO);
311 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
312 
313 	ifp = &sc->sc_if;
314 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
315 	ifp->if_softc = sc;
316 	ifp->if_ioctl = pfsyncioctl;
317 	ifp->if_output = pfsyncoutput;
318 	ifp->if_start = pfsyncstart;
319 	ifp->if_type = IFT_PFSYNC;
320 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
321 	ifp->if_hdrlen = sizeof(struct pfsync_header);
322 	ifp->if_mtu = 1500; /* XXX */
323 	ifp->if_hardmtu = MCLBYTES; /* XXX */
324 	timeout_set(&sc->sc_tmo, pfsync_timeout, sc);
325 	timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
326 	timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
327 
328 	if_attach(ifp);
329 	if_alloc_sadl(ifp);
330 
331 #if NCARP > 0
332 	if_addgroup(ifp, "carp");
333 #endif
334 
335 #if NBPFILTER > 0
336 	bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
337 #endif
338 
339 	pfsyncif = sc;
340 
341 	return (0);
342 }
343 
344 int
345 pfsync_clone_destroy(struct ifnet *ifp)
346 {
347 	struct pfsync_softc *sc = ifp->if_softc;
348 	struct pfsync_deferral *pd;
349 	int s;
350 
351 	timeout_del(&sc->sc_bulk_tmo);
352 	timeout_del(&sc->sc_tmo);
353 #if NCARP > 0
354 	if (!pfsync_sync_ok)
355 		carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
356 #endif
357 	if_detach(ifp);
358 
359 	pfsync_drop(sc);
360 
361 	s = splsoftnet();
362 	while (sc->sc_deferred > 0) {
363 		pd = TAILQ_FIRST(&sc->sc_deferrals);
364 		timeout_del(&pd->pd_tmo);
365 		pfsync_undefer(pd, 0);
366 	}
367 	splx(s);
368 
369 	pool_destroy(&sc->sc_pool);
370 	free(sc->sc_imo.imo_membership, M_IPMOPTS);
371 	free(sc, M_DEVBUF);
372 
373 	pfsyncif = NULL;
374 
375 	return (0);
376 }
377 
378 struct mbuf *
379 pfsync_if_dequeue(struct ifnet *ifp)
380 {
381 	struct mbuf *m;
382 
383 	IF_DEQUEUE(&ifp->if_snd, m);
384 
385 	return (m);
386 }
387 
388 /*
389  * Start output on the pfsync interface.
390  */
391 void
392 pfsyncstart(struct ifnet *ifp)
393 {
394 	struct mbuf *m;
395 	int s;
396 
397 	s = splnet();
398 	while ((m = pfsync_if_dequeue(ifp)) != NULL) {
399 		IF_DROP(&ifp->if_snd);
400 		m_freem(m);
401 	}
402 	splx(s);
403 }
404 
405 int
406 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
407     struct pf_state_peer *d)
408 {
409 	if (s->scrub.scrub_flag && d->scrub == NULL) {
410 		d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO);
411 		if (d->scrub == NULL)
412 			return (ENOMEM);
413 	}
414 
415 	return (0);
416 }
417 
418 void
419 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
420 {
421 	bzero(sp, sizeof(struct pfsync_state));
422 
423 	/* copy from state key */
424 	sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
425 	sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
426 	sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
427 	sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
428 	sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain);
429 	sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
430 	sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
431 	sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
432 	sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
433 	sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain);
434 	sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]);
435 	sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]);
436 	sp->proto = st->key[PF_SK_WIRE]->proto;
437 	sp->af = st->key[PF_SK_WIRE]->af;
438 
439 	/* copy from state */
440 	strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
441 	bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
442 	sp->creation = htonl(time_second - st->creation);
443 	sp->expire = pf_state_expires(st);
444 	if (sp->expire <= time_second)
445 		sp->expire = htonl(0);
446 	else
447 		sp->expire = htonl(sp->expire - time_second);
448 
449 	sp->direction = st->direction;
450 	sp->log = st->log;
451 	sp->timeout = st->timeout;
452 	/* XXX replace state_flags post 5.0 */
453 	sp->state_flags = st->state_flags;
454 	sp->all_state_flags = htons(st->state_flags);
455 	if (!SLIST_EMPTY(&st->src_nodes))
456 		sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
457 
458 	bcopy(&st->id, &sp->id, sizeof(sp->id));
459 	sp->creatorid = st->creatorid;
460 	pf_state_peer_hton(&st->src, &sp->src);
461 	pf_state_peer_hton(&st->dst, &sp->dst);
462 
463 	if (st->rule.ptr == NULL)
464 		sp->rule = htonl(-1);
465 	else
466 		sp->rule = htonl(st->rule.ptr->nr);
467 	if (st->anchor.ptr == NULL)
468 		sp->anchor = htonl(-1);
469 	else
470 		sp->anchor = htonl(st->anchor.ptr->nr);
471 	sp->nat_rule = htonl(-1);	/* left for compat, nat_rule is gone */
472 
473 	pf_state_counter_hton(st->packets[0], sp->packets[0]);
474 	pf_state_counter_hton(st->packets[1], sp->packets[1]);
475 	pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
476 	pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
477 
478 	sp->max_mss = htons(st->max_mss);
479 	sp->min_ttl = st->min_ttl;
480 	sp->set_tos = st->set_tos;
481 }
482 
483 int
484 pfsync_state_import(struct pfsync_state *sp, int flags)
485 {
486 	struct pf_state	*st = NULL;
487 	struct pf_state_key *skw = NULL, *sks = NULL;
488 	struct pf_rule *r = NULL;
489 	struct pfi_kif	*kif;
490 	int pool_flags;
491 	int error;
492 
493 	if (sp->creatorid == 0) {
494 		DPFPRINTF(LOG_NOTICE, "pfsync_state_import: "
495 		    "invalid creator id: %08x", ntohl(sp->creatorid));
496 		return (EINVAL);
497 	}
498 
499 	if ((kif = pfi_kif_get(sp->ifname)) == NULL) {
500 		DPFPRINTF(LOG_NOTICE, "pfsync_state_import: "
501 		    "unknown interface: %s", sp->ifname);
502 		if (flags & PFSYNC_SI_IOCTL)
503 			return (EINVAL);
504 		return (0);	/* skip this state */
505 	}
506 
507 	if (sp->af == 0)
508 		return (0);	/* skip this state */
509 
510 	/*
511 	 * If the ruleset checksums match or the state is coming from the ioctl,
512 	 * it's safe to associate the state with the rule of that number.
513 	 */
514 	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
515 	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
516 	    pf_main_ruleset.rules.active.rcount)
517 		r = pf_main_ruleset.rules.active.ptr_array[ntohl(sp->rule)];
518 	else
519 		r = &pf_default_rule;
520 
521 	if ((r->max_states && r->states_cur >= r->max_states))
522 		goto cleanup;
523 
524 	if (flags & PFSYNC_SI_IOCTL)
525 		pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO;
526 	else
527 		pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO;
528 
529 	if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL)
530 		goto cleanup;
531 
532 	if ((skw = pf_alloc_state_key(pool_flags)) == NULL)
533 		goto cleanup;
534 
535 	if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
536 	    &sp->key[PF_SK_STACK].addr[0], sp->af) ||
537 	    PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
538 	    &sp->key[PF_SK_STACK].addr[1], sp->af) ||
539 	    sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
540 	    sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] ||
541 	    sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) {
542 		if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
543 			goto cleanup;
544 	} else
545 		sks = skw;
546 
547 	/* allocate memory for scrub info */
548 	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
549 	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
550 		goto cleanup;
551 
552 	/* copy to state key(s) */
553 	skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
554 	skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
555 	skw->port[0] = sp->key[PF_SK_WIRE].port[0];
556 	skw->port[1] = sp->key[PF_SK_WIRE].port[1];
557 	skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain);
558 	skw->proto = sp->proto;
559 	skw->af = sp->af;
560 	if (sks != skw) {
561 		sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
562 		sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
563 		sks->port[0] = sp->key[PF_SK_STACK].port[0];
564 		sks->port[1] = sp->key[PF_SK_STACK].port[1];
565 		sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain);
566 		sks->proto = sp->proto;
567 		sks->af = sp->af;
568 	}
569 	st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]);
570 	st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]);
571 
572 	/* copy to state */
573 	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
574 	st->creation = time_second - ntohl(sp->creation);
575 	st->expire = time_second;
576 	if (sp->expire) {
577 		/* XXX No adaptive scaling. */
578 		st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire);
579 	}
580 
581 	st->expire = ntohl(sp->expire) + time_second;
582 	st->direction = sp->direction;
583 	st->log = sp->log;
584 	st->timeout = sp->timeout;
585 	/* XXX replace state_flags post 5.0 */
586 	st->state_flags = sp->state_flags | ntohs(sp->all_state_flags);
587 	st->max_mss = ntohs(sp->max_mss);
588 	st->min_ttl = sp->min_ttl;
589 	st->set_tos = sp->set_tos;
590 
591 	bcopy(sp->id, &st->id, sizeof(st->id));
592 	st->creatorid = sp->creatorid;
593 	pf_state_peer_ntoh(&sp->src, &st->src);
594 	pf_state_peer_ntoh(&sp->dst, &st->dst);
595 
596 	st->rule.ptr = r;
597 	st->anchor.ptr = NULL;
598 	st->rt_kif = NULL;
599 
600 	st->pfsync_time = time_uptime;
601 	st->sync_state = PFSYNC_S_NONE;
602 
603 	/* XXX when we have anchors, use STATE_INC_COUNTERS */
604 	r->states_cur++;
605 	r->states_tot++;
606 
607 	if (!ISSET(flags, PFSYNC_SI_IOCTL))
608 		SET(st->state_flags, PFSTATE_NOSYNC);
609 
610 	if (pf_state_insert(kif, skw, sks, st) != 0) {
611 		/* XXX when we have anchors, use STATE_DEC_COUNTERS */
612 		r->states_cur--;
613 		error = EEXIST;
614 		goto cleanup_state;
615 	}
616 
617 	if (!ISSET(flags, PFSYNC_SI_IOCTL)) {
618 		CLR(st->state_flags, PFSTATE_NOSYNC);
619 		if (ISSET(st->state_flags, PFSTATE_ACK)) {
620 			pfsync_q_ins(st, PFSYNC_S_IACK);
621 			schednetisr(NETISR_PFSYNC);
622 		}
623 	}
624 	CLR(st->state_flags, PFSTATE_ACK);
625 
626 	return (0);
627 
628  cleanup:
629 	error = ENOMEM;
630 	if (skw == sks)
631 		sks = NULL;
632 	if (skw != NULL)
633 		pool_put(&pf_state_key_pl, skw);
634 	if (sks != NULL)
635 		pool_put(&pf_state_key_pl, sks);
636 
637  cleanup_state:	/* pf_state_insert frees the state keys */
638 	if (st) {
639 		if (st->dst.scrub)
640 			pool_put(&pf_state_scrub_pl, st->dst.scrub);
641 		if (st->src.scrub)
642 			pool_put(&pf_state_scrub_pl, st->src.scrub);
643 		pool_put(&pf_state_pl, st);
644 	}
645 	return (error);
646 }
647 
648 void
649 pfsync_input(struct mbuf *m, ...)
650 {
651 	struct pfsync_softc *sc = pfsyncif;
652 	struct ip *ip = mtod(m, struct ip *);
653 	struct mbuf *mp;
654 	struct pfsync_header *ph;
655 	struct pfsync_subheader subh;
656 
657 	int offset, offp, len, count, mlen, flags = 0;
658 
659 	pfsyncstats.pfsyncs_ipackets++;
660 
661 	/* verify that we have a sync interface configured */
662 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
663 	    sc->sc_sync_if == NULL || !pf_status.running)
664 		goto done;
665 
666 	/* verify that the packet came in on the right interface */
667 	if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
668 		pfsyncstats.pfsyncs_badif++;
669 		goto done;
670 	}
671 
672 	sc->sc_if.if_ipackets++;
673 	sc->sc_if.if_ibytes += m->m_pkthdr.len;
674 
675 	/* verify that the IP TTL is 255. */
676 	if (ip->ip_ttl != PFSYNC_DFLTTL) {
677 		pfsyncstats.pfsyncs_badttl++;
678 		goto done;
679 	}
680 
681 	offset = ip->ip_hl << 2;
682 	mp = m_pulldown(m, offset, sizeof(*ph), &offp);
683 	if (mp == NULL) {
684 		pfsyncstats.pfsyncs_hdrops++;
685 		return;
686 	}
687 	ph = (struct pfsync_header *)(mp->m_data + offp);
688 
689 	/* verify the version */
690 	if (ph->version != PFSYNC_VERSION) {
691 		pfsyncstats.pfsyncs_badver++;
692 		goto done;
693 	}
694 	len = ntohs(ph->len) + offset;
695 	if (m->m_pkthdr.len < len) {
696 		pfsyncstats.pfsyncs_badlen++;
697 		goto done;
698 	}
699 
700 	if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
701 		flags = PFSYNC_SI_CKSUM;
702 
703 	offset += sizeof(*ph);
704 	while (offset <= len - sizeof(subh)) {
705 		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
706 		offset += sizeof(subh);
707 
708 		mlen = subh.len << 2;
709 		count = ntohs(subh.count);
710 
711 		if (subh.action >= PFSYNC_ACT_MAX ||
712 		    subh.action >= nitems(pfsync_acts) ||
713 		    mlen < pfsync_acts[subh.action].len) {
714 			/*
715 			 * subheaders are always followed by at least one
716 			 * message, so if the peer is new
717 			 * enough to tell us how big its messages are then we
718 			 * know enough to skip them.
719 			 */
720 			if (count > 0 && mlen > 0) {
721 				offset += count * mlen;
722 				continue;
723 			}
724 			pfsyncstats.pfsyncs_badact++;
725 			goto done;
726 		}
727 
728 		mp = m_pulldown(m, offset, mlen * count, &offp);
729 		if (mp == NULL) {
730 			pfsyncstats.pfsyncs_badlen++;
731 			return;
732 		}
733 
734 		if (pfsync_acts[subh.action].in(mp->m_data + offp,
735 		    mlen, count, flags) != 0)
736 			goto done;
737 
738 		offset += mlen * count;
739 	}
740 
741 done:
742 	m_freem(m);
743 }
744 
745 int
746 pfsync_in_clr(caddr_t buf, int len, int count, int flags)
747 {
748 	struct pfsync_clr *clr;
749 	int i;
750 
751 	struct pf_state *st, *nexts;
752 	struct pf_state_key *sk, *nextsk;
753 	struct pf_state_item *si;
754 	u_int32_t creatorid;
755 
756 	for (i = 0; i < count; i++) {
757 		clr = (struct pfsync_clr *)buf + len * i;
758 		creatorid = clr->creatorid;
759 
760 		if (clr->ifname[0] == '\0') {
761 			for (st = RB_MIN(pf_state_tree_id, &tree_id);
762 			    st; st = nexts) {
763 				nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
764 				if (st->creatorid == creatorid) {
765 					SET(st->state_flags, PFSTATE_NOSYNC);
766 					pf_unlink_state(st);
767 				}
768 			}
769 		} else {
770 			if (pfi_kif_get(clr->ifname) == NULL)
771 				continue;
772 
773 			/* XXX correct? */
774 			for (sk = RB_MIN(pf_state_tree, &pf_statetbl);
775 			    sk; sk = nextsk) {
776 				nextsk = RB_NEXT(pf_state_tree,
777 				    &pf_statetbl, sk);
778 				TAILQ_FOREACH(si, &sk->states, entry) {
779 					if (si->s->creatorid == creatorid) {
780 						SET(si->s->state_flags,
781 						    PFSTATE_NOSYNC);
782 						pf_unlink_state(si->s);
783 					}
784 				}
785 			}
786 		}
787 	}
788 
789 	return (0);
790 }
791 
792 int
793 pfsync_in_ins(caddr_t buf, int len, int count, int flags)
794 {
795 	struct pfsync_state *sp;
796 	int i;
797 
798 	for (i = 0; i < count; i++) {
799 		sp = (struct pfsync_state *)(buf + len * i);
800 
801 		/* check for invalid values */
802 		if (sp->timeout >= PFTM_MAX ||
803 		    sp->src.state > PF_TCPS_PROXY_DST ||
804 		    sp->dst.state > PF_TCPS_PROXY_DST ||
805 		    sp->direction > PF_OUT ||
806 		    (sp->af != AF_INET && sp->af != AF_INET6)) {
807 			DPFPRINTF(LOG_NOTICE,
808 			    "pfsync_input: PFSYNC5_ACT_INS: invalid value");
809 			pfsyncstats.pfsyncs_badval++;
810 			continue;
811 		}
812 
813 		if (pfsync_state_import(sp, flags) == ENOMEM) {
814 			/* drop out, but process the rest of the actions */
815 			break;
816 		}
817 	}
818 
819 	return (0);
820 }
821 
822 int
823 pfsync_in_iack(caddr_t buf, int len, int count, int flags)
824 {
825 	struct pfsync_ins_ack *ia;
826 	struct pf_state_cmp id_key;
827 	struct pf_state *st;
828 	int i;
829 
830 	for (i = 0; i < count; i++) {
831 		ia = (struct pfsync_ins_ack *)(buf + len * i);
832 
833 		bcopy(&ia->id, &id_key.id, sizeof(id_key.id));
834 		id_key.creatorid = ia->creatorid;
835 
836 		st = pf_find_state_byid(&id_key);
837 		if (st == NULL)
838 			continue;
839 
840 		if (ISSET(st->state_flags, PFSTATE_ACK))
841 			pfsync_deferred(st, 0);
842 	}
843 
844 	return (0);
845 }
846 
847 int
848 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
849     struct pfsync_state_peer *dst)
850 {
851 	int sync = 0;
852 
853 	/*
854 	 * The state should never go backwards except
855 	 * for syn-proxy states.  Neither should the
856 	 * sequence window slide backwards.
857 	 */
858 	if ((st->src.state > src->state &&
859 	    (st->src.state < PF_TCPS_PROXY_SRC ||
860 	    src->state >= PF_TCPS_PROXY_SRC)) ||
861 
862 	    (st->src.state == src->state &&
863 	    SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
864 		sync++;
865 	else
866 		pf_state_peer_ntoh(src, &st->src);
867 
868 	if ((st->dst.state > dst->state) ||
869 
870 	    (st->dst.state >= TCPS_SYN_SENT &&
871 	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
872 		sync++;
873 	else
874 		pf_state_peer_ntoh(dst, &st->dst);
875 
876 	return (sync);
877 }
878 
879 int
880 pfsync_in_upd(caddr_t buf, int len, int count, int flags)
881 {
882 	struct pfsync_state *sp;
883 	struct pf_state_cmp id_key;
884 	struct pf_state *st;
885 	int sync;
886 
887 	int i;
888 
889 	for (i = 0; i < count; i++) {
890 		sp = (struct pfsync_state *)(buf + len * i);
891 
892 		/* check for invalid values */
893 		if (sp->timeout >= PFTM_MAX ||
894 		    sp->src.state > PF_TCPS_PROXY_DST ||
895 		    sp->dst.state > PF_TCPS_PROXY_DST) {
896 			DPFPRINTF(LOG_NOTICE,
897 			    "pfsync_input: PFSYNC_ACT_UPD: invalid value");
898 			pfsyncstats.pfsyncs_badval++;
899 			continue;
900 		}
901 
902 		bcopy(sp->id, &id_key.id, sizeof(id_key.id));
903 		id_key.creatorid = sp->creatorid;
904 
905 		st = pf_find_state_byid(&id_key);
906 		if (st == NULL) {
907 			/* insert the update */
908 			if (pfsync_state_import(sp, 0))
909 				pfsyncstats.pfsyncs_badstate++;
910 			continue;
911 		}
912 
913 		if (ISSET(st->state_flags, PFSTATE_ACK))
914 			pfsync_deferred(st, 1);
915 
916 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
917 			DPFPRINTF(LOG_NOTICE,
918 			    "pfsync_input: PFSYNC_ACT_UPD: invalid value");
919 			pfsyncstats.pfsyncs_badval++;
920 			continue;
921 		}
922 
923 		bcopy(sp->id, &id_key.id, sizeof(id_key.id));
924 		id_key.creatorid = sp->creatorid;
925 
926 		st = pf_find_state_byid(&id_key);
927 		if (st == NULL) {
928 			/* insert the update */
929 			if (pfsync_state_import(sp, 0))
930 				pfsyncstats.pfsyncs_badstate++;
931 			continue;
932 		}
933 
934 		if (ISSET(st->state_flags, PFSTATE_ACK))
935 			pfsync_deferred(st, 1);
936 
937 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
938 			sync = pfsync_upd_tcp(st, &sp->src, &sp->dst);
939 		else {
940 			sync = 0;
941 
942 			/*
943 			 * Non-TCP protocol state machine always go
944 			 * forwards
945 			 */
946 			if (st->src.state > sp->src.state)
947 				sync++;
948 			else
949 				pf_state_peer_ntoh(&sp->src, &st->src);
950 
951 			if (st->dst.state > sp->dst.state)
952 				sync++;
953 			else
954 				pf_state_peer_ntoh(&sp->dst, &st->dst);
955 		}
956 
957 		if (sync < 2) {
958 			pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
959 			pf_state_peer_ntoh(&sp->dst, &st->dst);
960 			st->expire = ntohl(sp->expire) + time_second;
961 			st->timeout = sp->timeout;
962 		}
963 		st->pfsync_time = time_uptime;
964 
965 		if (sync) {
966 			pfsyncstats.pfsyncs_stale++;
967 
968 			pfsync_update_state(st);
969 			schednetisr(NETISR_PFSYNC);
970 		}
971 	}
972 
973 	return (0);
974 }
975 
976 int
977 pfsync_in_upd_c(caddr_t buf, int len, int count, int flags)
978 {
979 	struct pfsync_upd_c *up;
980 	struct pf_state_cmp id_key;
981 	struct pf_state *st;
982 
983 	int sync;
984 
985 	int i;
986 
987 	for (i = 0; i < count; i++) {
988 		up = (struct pfsync_upd_c *)(buf + len * i);
989 
990 		/* check for invalid values */
991 		if (up->timeout >= PFTM_MAX ||
992 		    up->src.state > PF_TCPS_PROXY_DST ||
993 		    up->dst.state > PF_TCPS_PROXY_DST) {
994 			DPFPRINTF(LOG_NOTICE,
995 			    "pfsync_input: PFSYNC_ACT_UPD_C: invalid value");
996 			pfsyncstats.pfsyncs_badval++;
997 			continue;
998 		}
999 
1000 		bcopy(&up->id, &id_key.id, sizeof(id_key.id));
1001 		id_key.creatorid = up->creatorid;
1002 
1003 		st = pf_find_state_byid(&id_key);
1004 		if (st == NULL) {
1005 			/* We don't have this state. Ask for it. */
1006 			pfsync_request_update(id_key.creatorid, id_key.id);
1007 			continue;
1008 		}
1009 
1010 		if (ISSET(st->state_flags, PFSTATE_ACK))
1011 			pfsync_deferred(st, 1);
1012 
1013 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
1014 			sync = pfsync_upd_tcp(st, &up->src, &up->dst);
1015 		else {
1016 			sync = 0;
1017 			/*
1018 			 * Non-TCP protocol state machine always go
1019 			 * forwards
1020 			 */
1021 			if (st->src.state > up->src.state)
1022 				sync++;
1023 			else
1024 				pf_state_peer_ntoh(&up->src, &st->src);
1025 
1026 			if (st->dst.state > up->dst.state)
1027 				sync++;
1028 			else
1029 				pf_state_peer_ntoh(&up->dst, &st->dst);
1030 		}
1031 		if (sync < 2) {
1032 			pfsync_alloc_scrub_memory(&up->dst, &st->dst);
1033 			pf_state_peer_ntoh(&up->dst, &st->dst);
1034 			st->expire = ntohl(up->expire) + time_second;
1035 			st->timeout = up->timeout;
1036 		}
1037 		st->pfsync_time = time_uptime;
1038 
1039 		if (sync) {
1040 			pfsyncstats.pfsyncs_stale++;
1041 
1042 			pfsync_update_state(st);
1043 			schednetisr(NETISR_PFSYNC);
1044 		}
1045 	}
1046 
1047 	return (0);
1048 }
1049 
1050 int
1051 pfsync_in_ureq(caddr_t buf, int len, int count, int flags)
1052 {
1053 	struct pfsync_upd_req *ur;
1054 	int i;
1055 
1056 	struct pf_state_cmp id_key;
1057 	struct pf_state *st;
1058 
1059 	for (i = 0; i < count; i++) {
1060 		ur = (struct pfsync_upd_req *)(buf + len * i);
1061 
1062 		bcopy(&ur->id, &id_key.id, sizeof(id_key.id));
1063 		id_key.creatorid = ur->creatorid;
1064 
1065 		if (id_key.id == 0 && id_key.creatorid == 0)
1066 			pfsync_bulk_start();
1067 		else {
1068 			st = pf_find_state_byid(&id_key);
1069 			if (st == NULL) {
1070 				pfsyncstats.pfsyncs_badstate++;
1071 				continue;
1072 			}
1073 			if (ISSET(st->state_flags, PFSTATE_NOSYNC))
1074 				continue;
1075 
1076 			pfsync_update_state_req(st);
1077 		}
1078 	}
1079 
1080 	return (0);
1081 }
1082 
1083 int
1084 pfsync_in_del(caddr_t buf, int len, int count, int flags)
1085 {
1086 	struct pfsync_state *sp;
1087 	struct pf_state_cmp id_key;
1088 	struct pf_state *st;
1089 	int i;
1090 
1091 	for (i = 0; i < count; i++) {
1092 		sp = (struct pfsync_state *)(buf + len * i);
1093 
1094 		bcopy(sp->id, &id_key.id, sizeof(id_key.id));
1095 		id_key.creatorid = sp->creatorid;
1096 
1097 		st = pf_find_state_byid(&id_key);
1098 		if (st == NULL) {
1099 			pfsyncstats.pfsyncs_badstate++;
1100 			continue;
1101 		}
1102 		SET(st->state_flags, PFSTATE_NOSYNC);
1103 		pf_unlink_state(st);
1104 	}
1105 
1106 	return (0);
1107 }
1108 
1109 int
1110 pfsync_in_del_c(caddr_t buf, int len, int count, int flags)
1111 {
1112 	struct pfsync_del_c *sp;
1113 	struct pf_state_cmp id_key;
1114 	struct pf_state *st;
1115 	int i;
1116 
1117 	for (i = 0; i < count; i++) {
1118 		sp = (struct pfsync_del_c *)(buf + len * i);
1119 
1120 		bcopy(&sp->id, &id_key.id, sizeof(id_key.id));
1121 		id_key.creatorid = sp->creatorid;
1122 
1123 		st = pf_find_state_byid(&id_key);
1124 		if (st == NULL) {
1125 			pfsyncstats.pfsyncs_badstate++;
1126 			continue;
1127 		}
1128 
1129 		SET(st->state_flags, PFSTATE_NOSYNC);
1130 		pf_unlink_state(st);
1131 	}
1132 
1133 	return (0);
1134 }
1135 
1136 int
1137 pfsync_in_bus(caddr_t buf, int len, int count, int flags)
1138 {
1139 	struct pfsync_softc *sc = pfsyncif;
1140 	struct pfsync_bus *bus;
1141 
1142 	/* If we're not waiting for a bulk update, who cares. */
1143 	if (sc->sc_ureq_sent == 0)
1144 		return (0);
1145 
1146 	bus = (struct pfsync_bus *)buf;
1147 
1148 	switch (bus->status) {
1149 	case PFSYNC_BUS_START:
1150 		timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1151 		    pf_pool_limits[PF_LIMIT_STATES].limit /
1152 		    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1153 		    sizeof(struct pfsync_state)));
1154 		DPFPRINTF(LOG_INFO, "received bulk update start");
1155 		break;
1156 
1157 	case PFSYNC_BUS_END:
1158 		if (time_uptime - ntohl(bus->endtime) >=
1159 		    sc->sc_ureq_sent) {
1160 			/* that's it, we're happy */
1161 			sc->sc_ureq_sent = 0;
1162 			sc->sc_bulk_tries = 0;
1163 			timeout_del(&sc->sc_bulkfail_tmo);
1164 #if NCARP > 0
1165 			if (!pfsync_sync_ok)
1166 				carp_group_demote_adj(&sc->sc_if, -1,
1167 				    "pfsync bulk done");
1168 #endif
1169 			pfsync_sync_ok = 1;
1170 			DPFPRINTF(LOG_INFO, "received valid bulk update end");
1171 		} else {
1172 			DPFPRINTF(LOG_WARNING, "received invalid "
1173 			    "bulk update end: bad timestamp");
1174 		}
1175 		break;
1176 	}
1177 
1178 	return (0);
1179 }
1180 
1181 int
1182 pfsync_in_tdb(caddr_t buf, int len, int count, int flags)
1183 {
1184 #if defined(IPSEC)
1185 	struct pfsync_tdb *tp;
1186 	int i;
1187 
1188 	for (i = 0; i < count; i++) {
1189 		tp = (struct pfsync_tdb *)(buf + len * i);
1190 		pfsync_update_net_tdb(tp);
1191 	}
1192 #endif
1193 
1194 	return (0);
1195 }
1196 
1197 #if defined(IPSEC)
1198 /* Update an in-kernel tdb. Silently fail if no tdb is found. */
1199 void
1200 pfsync_update_net_tdb(struct pfsync_tdb *pt)
1201 {
1202 	struct tdb		*tdb;
1203 	int			 s;
1204 
1205 	/* check for invalid values */
1206 	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
1207 	    (pt->dst.sa.sa_family != AF_INET &&
1208 	     pt->dst.sa.sa_family != AF_INET6))
1209 		goto bad;
1210 
1211 	s = spltdb();
1212 	tdb = gettdb(ntohs(pt->rdomain), pt->spi, &pt->dst, pt->sproto);
1213 	if (tdb) {
1214 		pt->rpl = ntohl(pt->rpl);
1215 		pt->cur_bytes = betoh64(pt->cur_bytes);
1216 
1217 		/* Neither replay nor byte counter should ever decrease. */
1218 		if (pt->rpl < tdb->tdb_rpl ||
1219 		    pt->cur_bytes < tdb->tdb_cur_bytes) {
1220 			splx(s);
1221 			goto bad;
1222 		}
1223 
1224 		tdb->tdb_rpl = pt->rpl;
1225 		tdb->tdb_cur_bytes = pt->cur_bytes;
1226 	}
1227 	splx(s);
1228 	return;
1229 
1230  bad:
1231 	DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: "
1232 	    "invalid value");
1233 	pfsyncstats.pfsyncs_badstate++;
1234 	return;
1235 }
1236 #endif
1237 
1238 
1239 int
1240 pfsync_in_eof(caddr_t buf, int len, int count, int flags)
1241 {
1242 	if (len > 0 || count > 0)
1243 		pfsyncstats.pfsyncs_badact++;
1244 
1245 	/* we're done. let the caller return */
1246 	return (1);
1247 }
1248 
1249 int
1250 pfsync_in_error(caddr_t buf, int len, int count, int flags)
1251 {
1252 	pfsyncstats.pfsyncs_badact++;
1253 	return (-1);
1254 }
1255 
1256 int
1257 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1258 	struct rtentry *rt)
1259 {
1260 	m_freem(m);
1261 	return (0);
1262 }
1263 
1264 /* ARGSUSED */
1265 int
1266 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1267 {
1268 	struct proc *p = curproc;
1269 	struct pfsync_softc *sc = ifp->if_softc;
1270 	struct ifreq *ifr = (struct ifreq *)data;
1271 	struct ip_moptions *imo = &sc->sc_imo;
1272 	struct pfsyncreq pfsyncr;
1273 	struct ifnet    *sifp;
1274 	struct ip *ip;
1275 	int s, error;
1276 
1277 	switch (cmd) {
1278 #if 0
1279 	case SIOCSIFADDR:
1280 	case SIOCAIFADDR:
1281 	case SIOCSIFDSTADDR:
1282 #endif
1283 	case SIOCSIFFLAGS:
1284 		s = splnet();
1285 		if (ifp->if_flags & IFF_UP) {
1286 			ifp->if_flags |= IFF_RUNNING;
1287 			pfsync_request_full_update(sc);
1288 		} else {
1289 			ifp->if_flags &= ~IFF_RUNNING;
1290 
1291 			/* drop everything */
1292 			timeout_del(&sc->sc_tmo);
1293 			pfsync_drop(sc);
1294 
1295 			/* cancel bulk update */
1296 			timeout_del(&sc->sc_bulk_tmo);
1297 			sc->sc_bulk_next = NULL;
1298 			sc->sc_bulk_last = NULL;
1299 		}
1300 		splx(s);
1301 		break;
1302 	case SIOCSIFMTU:
1303 		s = splnet();
1304 		if (ifr->ifr_mtu <= PFSYNC_MINPKT)
1305 			return (EINVAL);
1306 		if (ifr->ifr_mtu > MCLBYTES) /* XXX could be bigger */
1307 			ifr->ifr_mtu = MCLBYTES;
1308 		if (ifr->ifr_mtu < ifp->if_mtu)
1309 			pfsync_sendout();
1310 		ifp->if_mtu = ifr->ifr_mtu;
1311 		splx(s);
1312 		break;
1313 	case SIOCGETPFSYNC:
1314 		bzero(&pfsyncr, sizeof(pfsyncr));
1315 		if (sc->sc_sync_if) {
1316 			strlcpy(pfsyncr.pfsyncr_syncdev,
1317 			    sc->sc_sync_if->if_xname, IFNAMSIZ);
1318 		}
1319 		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1320 		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1321 		pfsyncr.pfsyncr_defer = sc->sc_defer;
1322 		return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
1323 
1324 	case SIOCSETPFSYNC:
1325 		if ((error = suser(p, p->p_acflag)) != 0)
1326 			return (error);
1327 		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
1328 			return (error);
1329 
1330 		s = splnet();
1331 
1332 		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1333 			sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1334 		else
1335 			sc->sc_sync_peer.s_addr =
1336 			    pfsyncr.pfsyncr_syncpeer.s_addr;
1337 
1338 		if (pfsyncr.pfsyncr_maxupdates > 255) {
1339 			splx(s);
1340 			return (EINVAL);
1341 		}
1342 		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1343 
1344 		sc->sc_defer = pfsyncr.pfsyncr_defer;
1345 
1346 		if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1347 			sc->sc_sync_if = NULL;
1348 			if (imo->imo_num_memberships > 0) {
1349 				in_delmulti(imo->imo_membership[
1350 				    --imo->imo_num_memberships]);
1351 				imo->imo_multicast_ifp = NULL;
1352 			}
1353 			splx(s);
1354 			break;
1355 		}
1356 
1357 		if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) {
1358 			splx(s);
1359 			return (EINVAL);
1360 		}
1361 
1362 		if (sifp->if_mtu < sc->sc_if.if_mtu ||
1363 		    (sc->sc_sync_if != NULL &&
1364 		    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
1365 		    sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1366 			pfsync_sendout();
1367 		sc->sc_sync_if = sifp;
1368 
1369 		if (imo->imo_num_memberships > 0) {
1370 			in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1371 			imo->imo_multicast_ifp = NULL;
1372 		}
1373 
1374 		if (sc->sc_sync_if &&
1375 		    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1376 			struct in_addr addr;
1377 
1378 			if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) {
1379 				sc->sc_sync_if = NULL;
1380 				splx(s);
1381 				return (EADDRNOTAVAIL);
1382 			}
1383 
1384 			addr.s_addr = INADDR_PFSYNC_GROUP;
1385 
1386 			if ((imo->imo_membership[0] =
1387 			    in_addmulti(&addr, sc->sc_sync_if)) == NULL) {
1388 				sc->sc_sync_if = NULL;
1389 				splx(s);
1390 				return (ENOBUFS);
1391 			}
1392 			imo->imo_num_memberships++;
1393 			imo->imo_multicast_ifp = sc->sc_sync_if;
1394 			imo->imo_multicast_ttl = PFSYNC_DFLTTL;
1395 			imo->imo_multicast_loop = 0;
1396 		}
1397 
1398 		ip = &sc->sc_template;
1399 		bzero(ip, sizeof(*ip));
1400 		ip->ip_v = IPVERSION;
1401 		ip->ip_hl = sizeof(sc->sc_template) >> 2;
1402 		ip->ip_tos = IPTOS_LOWDELAY;
1403 		/* len and id are set later */
1404 		ip->ip_off = htons(IP_DF);
1405 		ip->ip_ttl = PFSYNC_DFLTTL;
1406 		ip->ip_p = IPPROTO_PFSYNC;
1407 		ip->ip_src.s_addr = INADDR_ANY;
1408 		ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
1409 
1410 		pfsync_request_full_update(sc);
1411 		splx(s);
1412 
1413 		break;
1414 
1415 	default:
1416 		return (ENOTTY);
1417 	}
1418 
1419 	return (0);
1420 }
1421 
1422 void
1423 pfsync_out_state(struct pf_state *st, void *buf)
1424 {
1425 	struct pfsync_state *sp = buf;
1426 
1427 	pfsync_state_export(sp, st);
1428 }
1429 
1430 void
1431 pfsync_out_iack(struct pf_state *st, void *buf)
1432 {
1433 	struct pfsync_ins_ack *iack = buf;
1434 
1435 	iack->id = st->id;
1436 	iack->creatorid = st->creatorid;
1437 }
1438 
1439 void
1440 pfsync_out_upd_c(struct pf_state *st, void *buf)
1441 {
1442 	struct pfsync_upd_c *up = buf;
1443 
1444 	bzero(up, sizeof(*up));
1445 	up->id = st->id;
1446 	pf_state_peer_hton(&st->src, &up->src);
1447 	pf_state_peer_hton(&st->dst, &up->dst);
1448 	up->creatorid = st->creatorid;
1449 
1450 	up->expire = pf_state_expires(st);
1451 	if (up->expire <= time_second)
1452 		up->expire = htonl(0);
1453 	else
1454 		up->expire = htonl(up->expire - time_second);
1455 	up->timeout = st->timeout;
1456 }
1457 
1458 void
1459 pfsync_out_del(struct pf_state *st, void *buf)
1460 {
1461 	struct pfsync_del_c *dp = buf;
1462 
1463 	dp->id = st->id;
1464 	dp->creatorid = st->creatorid;
1465 
1466 	SET(st->state_flags, PFSTATE_NOSYNC);
1467 }
1468 
1469 void
1470 pfsync_drop(struct pfsync_softc *sc)
1471 {
1472 	struct pf_state *st;
1473 	struct pfsync_upd_req_item *ur;
1474 	struct tdb *t;
1475 	int q;
1476 
1477 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1478 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
1479 			continue;
1480 
1481 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
1482 #ifdef PFSYNC_DEBUG
1483 			KASSERT(st->sync_state == q);
1484 #endif
1485 			st->sync_state = PFSYNC_S_NONE;
1486 		}
1487 		TAILQ_INIT(&sc->sc_qs[q]);
1488 	}
1489 
1490 	while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1491 		TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1492 		pool_put(&sc->sc_pool, ur);
1493 	}
1494 
1495 	sc->sc_plus = NULL;
1496 
1497 	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
1498 		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry)
1499 			CLR(t->tdb_flags, TDBF_PFSYNC);
1500 
1501 		TAILQ_INIT(&sc->sc_tdb_q);
1502 	}
1503 
1504 	sc->sc_len = PFSYNC_MINPKT;
1505 }
1506 
1507 void
1508 pfsync_sendout(void)
1509 {
1510 	struct pfsync_softc *sc = pfsyncif;
1511 #if NBPFILTER > 0
1512 	struct ifnet *ifp = &sc->sc_if;
1513 #endif
1514 	struct mbuf *m;
1515 	struct ip *ip;
1516 	struct pfsync_header *ph;
1517 	struct pfsync_subheader *subh;
1518 	struct pf_state *st;
1519 	struct pfsync_upd_req_item *ur;
1520 	struct tdb *t;
1521 
1522 	int offset;
1523 	int q, count = 0;
1524 
1525 	if (sc == NULL || sc->sc_len == PFSYNC_MINPKT)
1526 		return;
1527 
1528 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
1529 #if NBPFILTER > 0
1530 	    (ifp->if_bpf == NULL && sc->sc_sync_if == NULL)) {
1531 #else
1532 	    sc->sc_sync_if == NULL) {
1533 #endif
1534 		pfsync_drop(sc);
1535 		return;
1536 	}
1537 
1538 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1539 	if (m == NULL) {
1540 		sc->sc_if.if_oerrors++;
1541 		pfsyncstats.pfsyncs_onomem++;
1542 		pfsync_drop(sc);
1543 		return;
1544 	}
1545 
1546 	if (max_linkhdr + sc->sc_len > MHLEN) {
1547 		MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len);
1548 		if (!ISSET(m->m_flags, M_EXT)) {
1549 			m_free(m);
1550 			sc->sc_if.if_oerrors++;
1551 			pfsyncstats.pfsyncs_onomem++;
1552 			pfsync_drop(sc);
1553 			return;
1554 		}
1555 	}
1556 	m->m_data += max_linkhdr;
1557 	m->m_len = m->m_pkthdr.len = sc->sc_len;
1558 
1559 	/* build the ip header */
1560 	ip = mtod(m, struct ip *);
1561 	bcopy(&sc->sc_template, ip, sizeof(*ip));
1562 	offset = sizeof(*ip);
1563 
1564 	ip->ip_len = htons(m->m_pkthdr.len);
1565 	ip->ip_id = htons(ip_randomid());
1566 
1567 	/* build the pfsync header */
1568 	ph = (struct pfsync_header *)(m->m_data + offset);
1569 	bzero(ph, sizeof(*ph));
1570 	offset += sizeof(*ph);
1571 
1572 	ph->version = PFSYNC_VERSION;
1573 	ph->len = htons(sc->sc_len - sizeof(*ip));
1574 	bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
1575 
1576 	if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
1577 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1578 		offset += sizeof(*subh);
1579 
1580 		count = 0;
1581 		while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1582 			TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1583 
1584 			bcopy(&ur->ur_msg, m->m_data + offset,
1585 			    sizeof(ur->ur_msg));
1586 			offset += sizeof(ur->ur_msg);
1587 
1588 			pool_put(&sc->sc_pool, ur);
1589 
1590 			count++;
1591 		}
1592 
1593 		bzero(subh, sizeof(*subh));
1594 		subh->len = sizeof(ur->ur_msg) >> 2;
1595 		subh->action = PFSYNC_ACT_UPD_REQ;
1596 		subh->count = htons(count);
1597 	}
1598 
1599 	/* has someone built a custom region for us to add? */
1600 	if (sc->sc_plus != NULL) {
1601 		bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
1602 		offset += sc->sc_pluslen;
1603 
1604 		sc->sc_plus = NULL;
1605 	}
1606 
1607 	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
1608 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1609 		offset += sizeof(*subh);
1610 
1611 		count = 0;
1612 		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) {
1613 			pfsync_out_tdb(t, m->m_data + offset);
1614 			offset += sizeof(struct pfsync_tdb);
1615 			CLR(t->tdb_flags, TDBF_PFSYNC);
1616 
1617 			count++;
1618 		}
1619 		TAILQ_INIT(&sc->sc_tdb_q);
1620 
1621 		bzero(subh, sizeof(*subh));
1622 		subh->action = PFSYNC_ACT_TDB;
1623 		subh->len = sizeof(struct pfsync_tdb) >> 2;
1624 		subh->count = htons(count);
1625 	}
1626 
1627 	/* walk the queues */
1628 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1629 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
1630 			continue;
1631 
1632 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1633 		offset += sizeof(*subh);
1634 
1635 		count = 0;
1636 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
1637 #ifdef PFSYNC_DEBUG
1638 			KASSERT(st->sync_state == q);
1639 #endif
1640 			pfsync_qs[q].write(st, m->m_data + offset);
1641 			offset += pfsync_qs[q].len;
1642 
1643 			st->sync_state = PFSYNC_S_NONE;
1644 			count++;
1645 		}
1646 		TAILQ_INIT(&sc->sc_qs[q]);
1647 
1648 		bzero(subh, sizeof(*subh));
1649 		subh->action = pfsync_qs[q].action;
1650 		subh->len = pfsync_qs[q].len >> 2;
1651 		subh->count = htons(count);
1652 	}
1653 
1654 	/* we're done, let's put it on the wire */
1655 #if NBPFILTER > 0
1656 	if (ifp->if_bpf) {
1657 		m->m_data += sizeof(*ip);
1658 		m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
1659 		bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1660 		m->m_data -= sizeof(*ip);
1661 		m->m_len = m->m_pkthdr.len = sc->sc_len;
1662 	}
1663 
1664 	if (sc->sc_sync_if == NULL) {
1665 		sc->sc_len = PFSYNC_MINPKT;
1666 		m_freem(m);
1667 		return;
1668 	}
1669 #endif
1670 
1671 	sc->sc_if.if_opackets++;
1672 	sc->sc_if.if_obytes += m->m_pkthdr.len;
1673 
1674 	if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) == 0)
1675 		pfsyncstats.pfsyncs_opackets++;
1676 	else
1677 		pfsyncstats.pfsyncs_oerrors++;
1678 
1679 	/* start again */
1680 	sc->sc_len = PFSYNC_MINPKT;
1681 }
1682 
1683 void
1684 pfsync_insert_state(struct pf_state *st)
1685 {
1686 	struct pfsync_softc *sc = pfsyncif;
1687 
1688 	splsoftassert(IPL_SOFTNET);
1689 
1690 	if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) ||
1691 	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
1692 		SET(st->state_flags, PFSTATE_NOSYNC);
1693 		return;
1694 	}
1695 
1696 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
1697 	    ISSET(st->state_flags, PFSTATE_NOSYNC))
1698 		return;
1699 
1700 #ifdef PFSYNC_DEBUG
1701 	KASSERT(st->sync_state == PFSYNC_S_NONE);
1702 #endif
1703 
1704 	if (sc->sc_len == PFSYNC_MINPKT)
1705 		timeout_add_sec(&sc->sc_tmo, 1);
1706 
1707 	pfsync_q_ins(st, PFSYNC_S_INS);
1708 
1709 	st->sync_updates = 0;
1710 }
1711 
1712 int
1713 pfsync_defer(struct pf_state *st, struct mbuf *m)
1714 {
1715 	struct pfsync_softc *sc = pfsyncif;
1716 	struct pfsync_deferral *pd;
1717 
1718 	splsoftassert(IPL_SOFTNET);
1719 
1720 	if (!sc->sc_defer ||
1721 	    ISSET(st->state_flags, PFSTATE_NOSYNC) ||
1722 	    m->m_flags & (M_BCAST|M_MCAST))
1723 		return (0);
1724 
1725 	if (sc->sc_deferred >= 128) {
1726 		pd = TAILQ_FIRST(&sc->sc_deferrals);
1727 		if (timeout_del(&pd->pd_tmo))
1728 			pfsync_undefer(pd, 0);
1729 	}
1730 
1731 	pd = pool_get(&sc->sc_pool, M_NOWAIT);
1732 	if (pd == NULL)
1733 		return (0);
1734 
1735 	m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
1736 	SET(st->state_flags, PFSTATE_ACK);
1737 
1738 	pd->pd_st = st;
1739 	pd->pd_m = m;
1740 
1741 	sc->sc_deferred++;
1742 	TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
1743 
1744 	timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd);
1745 	timeout_add_msec(&pd->pd_tmo, 20);
1746 
1747 	schednetisr(NETISR_PFSYNC);
1748 
1749 	return (1);
1750 }
1751 
1752 void
1753 pfsync_undefer(struct pfsync_deferral *pd, int drop)
1754 {
1755 	struct pfsync_softc *sc = pfsyncif;
1756 
1757 	splsoftassert(IPL_SOFTNET);
1758 
1759 	TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
1760 	sc->sc_deferred--;
1761 
1762 	CLR(pd->pd_st->state_flags, PFSTATE_ACK);
1763 	if (drop)
1764 		m_freem(pd->pd_m);
1765 	else {
1766 		switch (pd->pd_st->key[PF_SK_WIRE]->af) {
1767 #ifdef INET
1768 		case AF_INET:
1769 			ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL);
1770 			break;
1771 #endif /* INET */
1772 #ifdef INET6
1773                 case AF_INET6:
1774 	                ip6_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, NULL);
1775 			break;
1776 #endif /* INET6 */
1777                 }
1778 	}
1779 
1780 	pool_put(&sc->sc_pool, pd);
1781 }
1782 
1783 void
1784 pfsync_defer_tmo(void *arg)
1785 {
1786 	int s;
1787 
1788 	s = splsoftnet();
1789 	pfsync_undefer(arg, 0);
1790 	splx(s);
1791 }
1792 
1793 void
1794 pfsync_deferred(struct pf_state *st, int drop)
1795 {
1796 	struct pfsync_softc *sc = pfsyncif;
1797 	struct pfsync_deferral *pd;
1798 
1799 	splsoftassert(IPL_SOFTNET);
1800 
1801 	TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
1802 		 if (pd->pd_st == st) {
1803 			if (timeout_del(&pd->pd_tmo))
1804 				pfsync_undefer(pd, drop);
1805 			return;
1806 		}
1807 	}
1808 
1809 	panic("pfsync_deferred: unable to find deferred state");
1810 }
1811 
1812 void
1813 pfsync_update_state(struct pf_state *st)
1814 {
1815 	struct pfsync_softc *sc = pfsyncif;
1816 	int sync = 0;
1817 
1818 	splsoftassert(IPL_SOFTNET);
1819 
1820 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
1821 		return;
1822 
1823 	if (ISSET(st->state_flags, PFSTATE_ACK))
1824 		pfsync_deferred(st, 0);
1825 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1826 		if (st->sync_state != PFSYNC_S_NONE)
1827 			pfsync_q_del(st);
1828 		return;
1829 	}
1830 
1831 	if (sc->sc_len == PFSYNC_MINPKT)
1832 		timeout_add_sec(&sc->sc_tmo, 1);
1833 
1834 	switch (st->sync_state) {
1835 	case PFSYNC_S_UPD_C:
1836 	case PFSYNC_S_UPD:
1837 	case PFSYNC_S_INS:
1838 		/* we're already handling it */
1839 
1840 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
1841 			st->sync_updates++;
1842 			if (st->sync_updates >= sc->sc_maxupdates)
1843 				sync = 1;
1844 		}
1845 		break;
1846 
1847 	case PFSYNC_S_IACK:
1848 		pfsync_q_del(st);
1849 	case PFSYNC_S_NONE:
1850 		pfsync_q_ins(st, PFSYNC_S_UPD_C);
1851 		st->sync_updates = 0;
1852 		break;
1853 
1854 	default:
1855 		panic("pfsync_update_state: unexpected sync state %d",
1856 		    st->sync_state);
1857 	}
1858 
1859 	if (sync || (time_uptime - st->pfsync_time) < 2)
1860 		schednetisr(NETISR_PFSYNC);
1861 }
1862 
1863 void
1864 pfsync_request_full_update(struct pfsync_softc *sc)
1865 {
1866 	if (sc->sc_sync_if && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
1867 		/* Request a full state table update. */
1868 		sc->sc_ureq_sent = time_uptime;
1869 #if NCARP > 0
1870 		if (pfsync_sync_ok)
1871 			carp_group_demote_adj(&sc->sc_if, 1,
1872 			    "pfsync bulk start");
1873 #endif
1874 		pfsync_sync_ok = 0;
1875 		DPFPRINTF(LOG_INFO, "requesting bulk update");
1876 		timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1877 		    pf_pool_limits[PF_LIMIT_STATES].limit /
1878 		    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1879 		    sizeof(struct pfsync_state)));
1880 		pfsync_request_update(0, 0);
1881 	}
1882 }
1883 
1884 void
1885 pfsync_request_update(u_int32_t creatorid, u_int64_t id)
1886 {
1887 	struct pfsync_softc *sc = pfsyncif;
1888 	struct pfsync_upd_req_item *item;
1889 	size_t nlen = sizeof(struct pfsync_upd_req);
1890 
1891 	/*
1892 	 * this code does nothing to prevent multiple update requests for the
1893 	 * same state being generated.
1894 	 */
1895 
1896 	item = pool_get(&sc->sc_pool, PR_NOWAIT);
1897 	if (item == NULL) {
1898 		/* XXX stats */
1899 		return;
1900 	}
1901 
1902 	item->ur_msg.id = id;
1903 	item->ur_msg.creatorid = creatorid;
1904 
1905 	if (TAILQ_EMPTY(&sc->sc_upd_req_list))
1906 		nlen += sizeof(struct pfsync_subheader);
1907 
1908 	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
1909 		pfsync_sendout();
1910 
1911 		nlen = sizeof(struct pfsync_subheader) +
1912 		    sizeof(struct pfsync_upd_req);
1913 	}
1914 
1915 	TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
1916 	sc->sc_len += nlen;
1917 
1918 	schednetisr(NETISR_PFSYNC);
1919 }
1920 
1921 void
1922 pfsync_update_state_req(struct pf_state *st)
1923 {
1924 	struct pfsync_softc *sc = pfsyncif;
1925 
1926 	if (sc == NULL)
1927 		panic("pfsync_update_state_req: nonexistant instance");
1928 
1929 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1930 		if (st->sync_state != PFSYNC_S_NONE)
1931 			pfsync_q_del(st);
1932 		return;
1933 	}
1934 
1935 	switch (st->sync_state) {
1936 	case PFSYNC_S_UPD_C:
1937 	case PFSYNC_S_IACK:
1938 		pfsync_q_del(st);
1939 	case PFSYNC_S_NONE:
1940 		pfsync_q_ins(st, PFSYNC_S_UPD);
1941 		schednetisr(NETISR_PFSYNC);
1942 		return;
1943 
1944 	case PFSYNC_S_INS:
1945 	case PFSYNC_S_UPD:
1946 	case PFSYNC_S_DEL:
1947 		/* we're already handling it */
1948 		return;
1949 
1950 	default:
1951 		panic("pfsync_update_state_req: unexpected sync state %d",
1952 		    st->sync_state);
1953 	}
1954 }
1955 
1956 void
1957 pfsync_delete_state(struct pf_state *st)
1958 {
1959 	struct pfsync_softc *sc = pfsyncif;
1960 
1961 	splsoftassert(IPL_SOFTNET);
1962 
1963 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
1964 		return;
1965 
1966 	if (ISSET(st->state_flags, PFSTATE_ACK))
1967 		pfsync_deferred(st, 1);
1968 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1969 		if (st->sync_state != PFSYNC_S_NONE)
1970 			pfsync_q_del(st);
1971 		return;
1972 	}
1973 
1974 	if (sc->sc_len == PFSYNC_MINPKT)
1975 		timeout_add_sec(&sc->sc_tmo, 1);
1976 
1977 	switch (st->sync_state) {
1978 	case PFSYNC_S_INS:
1979 		/* we never got to tell the world so just forget about it */
1980 		pfsync_q_del(st);
1981 		return;
1982 
1983 	case PFSYNC_S_UPD_C:
1984 	case PFSYNC_S_UPD:
1985 	case PFSYNC_S_IACK:
1986 		pfsync_q_del(st);
1987 		/* FALLTHROUGH to putting it on the del list */
1988 
1989 	case PFSYNC_S_NONE:
1990 		pfsync_q_ins(st, PFSYNC_S_DEL);
1991 		return;
1992 
1993 	default:
1994 		panic("pfsync_delete_state: unexpected sync state %d",
1995 		    st->sync_state);
1996 	}
1997 }
1998 
1999 void
2000 pfsync_clear_states(u_int32_t creatorid, const char *ifname)
2001 {
2002 	struct pfsync_softc *sc = pfsyncif;
2003 	struct {
2004 		struct pfsync_subheader subh;
2005 		struct pfsync_clr clr;
2006 	} __packed r;
2007 
2008 	splsoftassert(IPL_SOFTNET);
2009 
2010 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2011 		return;
2012 
2013 	bzero(&r, sizeof(r));
2014 
2015 	r.subh.action = PFSYNC_ACT_CLR;
2016 	r.subh.len = sizeof(struct pfsync_clr) >> 2;
2017 	r.subh.count = htons(1);
2018 
2019 	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
2020 	r.clr.creatorid = creatorid;
2021 
2022 	pfsync_send_plus(&r, sizeof(r));
2023 }
2024 
2025 void
2026 pfsync_q_ins(struct pf_state *st, int q)
2027 {
2028 	struct pfsync_softc *sc = pfsyncif;
2029 	size_t nlen = pfsync_qs[q].len;
2030 
2031 	KASSERT(st->sync_state == PFSYNC_S_NONE);
2032 
2033 #if defined(PFSYNC_DEBUG)
2034 	if (sc->sc_len < PFSYNC_MINPKT)
2035 		panic("pfsync pkt len is too low %d", sc->sc_len);
2036 #endif
2037 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2038 		nlen += sizeof(struct pfsync_subheader);
2039 
2040 	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2041 		pfsync_sendout();
2042 
2043 		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
2044 	}
2045 
2046 	sc->sc_len += nlen;
2047 	TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
2048 	st->sync_state = q;
2049 }
2050 
2051 void
2052 pfsync_q_del(struct pf_state *st)
2053 {
2054 	struct pfsync_softc *sc = pfsyncif;
2055 	int q = st->sync_state;
2056 
2057 	KASSERT(st->sync_state != PFSYNC_S_NONE);
2058 
2059 	sc->sc_len -= pfsync_qs[q].len;
2060 	TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
2061 	st->sync_state = PFSYNC_S_NONE;
2062 
2063 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2064 		sc->sc_len -= sizeof(struct pfsync_subheader);
2065 }
2066 
2067 void
2068 pfsync_update_tdb(struct tdb *t, int output)
2069 {
2070 	struct pfsync_softc *sc = pfsyncif;
2071 	size_t nlen = sizeof(struct pfsync_tdb);
2072 
2073 	if (sc == NULL)
2074 		return;
2075 
2076 	if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) {
2077 		if (TAILQ_EMPTY(&sc->sc_tdb_q))
2078 			nlen += sizeof(struct pfsync_subheader);
2079 
2080 		if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2081 			pfsync_sendout();
2082 
2083 			nlen = sizeof(struct pfsync_subheader) +
2084 			    sizeof(struct pfsync_tdb);
2085 		}
2086 
2087 		sc->sc_len += nlen;
2088 		TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
2089 		SET(t->tdb_flags, TDBF_PFSYNC);
2090 		t->tdb_updates = 0;
2091 	} else {
2092 		if (++t->tdb_updates >= sc->sc_maxupdates)
2093 			schednetisr(NETISR_PFSYNC);
2094 	}
2095 
2096 	if (output)
2097 		SET(t->tdb_flags, TDBF_PFSYNC_RPL);
2098 	else
2099 		CLR(t->tdb_flags, TDBF_PFSYNC_RPL);
2100 }
2101 
2102 void
2103 pfsync_delete_tdb(struct tdb *t)
2104 {
2105 	struct pfsync_softc *sc = pfsyncif;
2106 
2107 	if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC))
2108 		return;
2109 
2110 	sc->sc_len -= sizeof(struct pfsync_tdb);
2111 	TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
2112 	CLR(t->tdb_flags, TDBF_PFSYNC);
2113 
2114 	if (TAILQ_EMPTY(&sc->sc_tdb_q))
2115 		sc->sc_len -= sizeof(struct pfsync_subheader);
2116 }
2117 
2118 void
2119 pfsync_out_tdb(struct tdb *t, void *buf)
2120 {
2121 	struct pfsync_tdb *ut = buf;
2122 
2123 	bzero(ut, sizeof(*ut));
2124 	ut->spi = t->tdb_spi;
2125 	bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst));
2126 	/*
2127 	 * When a failover happens, the master's rpl is probably above
2128 	 * what we see here (we may be up to a second late), so
2129 	 * increase it a bit for outbound tdbs to manage most such
2130 	 * situations.
2131 	 *
2132 	 * For now, just add an offset that is likely to be larger
2133 	 * than the number of packets we can see in one second. The RFC
2134 	 * just says the next packet must have a higher seq value.
2135 	 *
2136 	 * XXX What is a good algorithm for this? We could use
2137 	 * a rate-determined increase, but to know it, we would have
2138 	 * to extend struct tdb.
2139 	 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
2140 	 * will soon be replaced anyway. For now, just don't handle
2141 	 * this edge case.
2142 	 */
2143 #define RPL_INCR 16384
2144 	ut->rpl = htonl(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ?
2145 	    RPL_INCR : 0));
2146 	ut->cur_bytes = htobe64(t->tdb_cur_bytes);
2147 	ut->sproto = t->tdb_sproto;
2148 	ut->rdomain = htons(t->tdb_rdomain);
2149 }
2150 
2151 void
2152 pfsync_bulk_start(void)
2153 {
2154 	struct pfsync_softc *sc = pfsyncif;
2155 
2156 	DPFPRINTF(LOG_INFO, "received bulk update request");
2157 
2158 	if (TAILQ_EMPTY(&state_list))
2159 		pfsync_bulk_status(PFSYNC_BUS_END);
2160 	else {
2161 		sc->sc_ureq_received = time_uptime;
2162 
2163 		if (sc->sc_bulk_next == NULL)
2164 			sc->sc_bulk_next = TAILQ_FIRST(&state_list);
2165 		sc->sc_bulk_last = sc->sc_bulk_next;
2166 
2167 		pfsync_bulk_status(PFSYNC_BUS_START);
2168 		timeout_add(&sc->sc_bulk_tmo, 0);
2169 	}
2170 }
2171 
2172 void
2173 pfsync_bulk_update(void *arg)
2174 {
2175 	struct pfsync_softc *sc = arg;
2176 	struct pf_state *st;
2177 	int i = 0;
2178 	int s;
2179 
2180 	s = splsoftnet();
2181 
2182 	st = sc->sc_bulk_next;
2183 
2184 	for (;;) {
2185 		if (st->sync_state == PFSYNC_S_NONE &&
2186 		    st->timeout < PFTM_MAX &&
2187 		    st->pfsync_time <= sc->sc_ureq_received) {
2188 			pfsync_update_state_req(st);
2189 			i++;
2190 		}
2191 
2192 		st = TAILQ_NEXT(st, entry_list);
2193 		if (st == NULL)
2194 			st = TAILQ_FIRST(&state_list);
2195 
2196 		if (st == sc->sc_bulk_last) {
2197 			/* we're done */
2198 			sc->sc_bulk_next = NULL;
2199 			sc->sc_bulk_last = NULL;
2200 			pfsync_bulk_status(PFSYNC_BUS_END);
2201 			break;
2202 		}
2203 
2204 		if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) <
2205 		    sizeof(struct pfsync_state)) {
2206 			/* we've filled a packet */
2207 			sc->sc_bulk_next = st;
2208 			timeout_add(&sc->sc_bulk_tmo, 1);
2209 			break;
2210 		}
2211 	}
2212 
2213 	splx(s);
2214 }
2215 
2216 void
2217 pfsync_bulk_status(u_int8_t status)
2218 {
2219 	struct {
2220 		struct pfsync_subheader subh;
2221 		struct pfsync_bus bus;
2222 	} __packed r;
2223 
2224 	struct pfsync_softc *sc = pfsyncif;
2225 
2226 	bzero(&r, sizeof(r));
2227 
2228 	r.subh.action = PFSYNC_ACT_BUS;
2229 	r.subh.len = sizeof(struct pfsync_bus) >> 2;
2230 	r.subh.count = htons(1);
2231 
2232 	r.bus.creatorid = pf_status.hostid;
2233 	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
2234 	r.bus.status = status;
2235 
2236 	pfsync_send_plus(&r, sizeof(r));
2237 }
2238 
2239 void
2240 pfsync_bulk_fail(void *arg)
2241 {
2242 	struct pfsync_softc *sc = arg;
2243 	int s;
2244 
2245 	s = splsoftnet();
2246 
2247 	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
2248 		/* Try again */
2249 		timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
2250 		pfsync_request_update(0, 0);
2251 	} else {
2252 		/* Pretend like the transfer was ok */
2253 		sc->sc_ureq_sent = 0;
2254 		sc->sc_bulk_tries = 0;
2255 #if NCARP > 0
2256 		if (!pfsync_sync_ok)
2257 			carp_group_demote_adj(&sc->sc_if, -1,
2258 			    "pfsync bulk fail");
2259 #endif
2260 		pfsync_sync_ok = 1;
2261 		DPFPRINTF(LOG_ERR, "failed to receive bulk update");
2262 	}
2263 
2264 	splx(s);
2265 }
2266 
2267 void
2268 pfsync_send_plus(void *plus, size_t pluslen)
2269 {
2270 	struct pfsync_softc *sc = pfsyncif;
2271 
2272 	if (sc->sc_len + pluslen > sc->sc_if.if_mtu)
2273 		pfsync_sendout();
2274 
2275 	sc->sc_plus = plus;
2276 	sc->sc_len += (sc->sc_pluslen = pluslen);
2277 
2278 	pfsync_sendout();
2279 }
2280 
2281 int
2282 pfsync_up(void)
2283 {
2284 	struct pfsync_softc *sc = pfsyncif;
2285 
2286 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2287 		return (0);
2288 
2289 	return (1);
2290 }
2291 
2292 int
2293 pfsync_state_in_use(struct pf_state *st)
2294 {
2295 	struct pfsync_softc *sc = pfsyncif;
2296 
2297 	if (sc == NULL)
2298 		return (0);
2299 
2300 	if (st->sync_state != PFSYNC_S_NONE ||
2301 	    st == sc->sc_bulk_next ||
2302 	    st == sc->sc_bulk_last)
2303 		return (1);
2304 
2305 	return (0);
2306 }
2307 
2308 void
2309 pfsync_timeout(void *arg)
2310 {
2311 	int s;
2312 
2313 	s = splsoftnet();
2314 	pfsync_sendout();
2315 	splx(s);
2316 }
2317 
2318 /* this is a softnet/netisr handler */
2319 void
2320 pfsyncintr(void)
2321 {
2322 	pfsync_sendout();
2323 }
2324 
2325 int
2326 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
2327     size_t newlen)
2328 {
2329 	/* All sysctl names at this level are terminal. */
2330 	if (namelen != 1)
2331 		return (ENOTDIR);
2332 
2333 	switch (name[0]) {
2334 	case PFSYNCCTL_STATS:
2335 		if (newp != NULL)
2336 			return (EPERM);
2337 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
2338 		    &pfsyncstats, sizeof(pfsyncstats)));
2339 	default:
2340 		return (ENOPROTOOPT);
2341 	}
2342 }
2343