xref: /openbsd-src/sys/net/if_pfsync.c (revision 6a13ef69787db04ae501a22e92fa10865b44fd7c)
1 /*	$OpenBSD: if_pfsync.c,v 1.241 2017/01/20 03:56:46 mpi Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Michael Shalayeff
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  * THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
31  *
32  * Permission to use, copy, modify, and distribute this software for any
33  * purpose with or without fee is hereby granted, provided that the above
34  * copyright notice and this permission notice appear in all copies.
35  *
36  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
37  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
38  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
39  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
41  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
42  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43  */
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/time.h>
48 #include <sys/malloc.h>
49 #include <sys/mbuf.h>
50 #include <sys/socket.h>
51 #include <sys/ioctl.h>
52 #include <sys/timeout.h>
53 #include <sys/kernel.h>
54 #include <sys/sysctl.h>
55 #include <sys/pool.h>
56 #include <sys/syslog.h>
57 
58 #include <net/if.h>
59 #include <net/if_types.h>
60 #include <net/bpf.h>
61 #include <net/netisr.h>
62 
63 #include <netinet/in.h>
64 #include <netinet/if_ether.h>
65 #include <netinet/ip.h>
66 #include <netinet/in_var.h>
67 #include <netinet/ip_var.h>
68 #include <netinet/ip_ipsp.h>
69 #include <netinet/ip_icmp.h>
70 #include <netinet/icmp6.h>
71 #include <netinet/tcp.h>
72 #include <netinet/tcp_seq.h>
73 #include <netinet/tcp_fsm.h>
74 #include <netinet/udp.h>
75 
76 #ifdef INET6
77 #include <netinet6/in6_var.h>
78 #include <netinet/ip6.h>
79 #include <netinet6/ip6_var.h>
80 #include <netinet6/nd6.h>
81 #endif /* INET6 */
82 
83 #include "carp.h"
84 #if NCARP > 0
85 #include <netinet/ip_carp.h>
86 #endif
87 
88 #define PF_DEBUGNAME	"pfsync: "
89 #include <net/pfvar.h>
90 #include <net/pfvar_priv.h>
91 #include <net/if_pfsync.h>
92 
93 #include "bpfilter.h"
94 #include "pfsync.h"
95 
96 #define PFSYNC_MINPKT ( \
97 	sizeof(struct ip) + \
98 	sizeof(struct pfsync_header))
99 
100 int	pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
101 	    struct pfsync_state_peer *);
102 
103 int	pfsync_in_clr(caddr_t, int, int, int);
104 int	pfsync_in_iack(caddr_t, int, int, int);
105 int	pfsync_in_upd_c(caddr_t, int, int, int);
106 int	pfsync_in_ureq(caddr_t, int, int, int);
107 int	pfsync_in_del(caddr_t, int, int, int);
108 int	pfsync_in_del_c(caddr_t, int, int, int);
109 int	pfsync_in_bus(caddr_t, int, int, int);
110 int	pfsync_in_tdb(caddr_t, int, int, int);
111 int	pfsync_in_ins(caddr_t, int, int, int);
112 int	pfsync_in_upd(caddr_t, int, int, int);
113 int	pfsync_in_eof(caddr_t, int, int, int);
114 
115 int	pfsync_in_error(caddr_t, int, int, int);
116 
117 struct {
118 	int	(*in)(caddr_t, int, int, int);
119 	size_t	len;
120 } pfsync_acts[] = {
121 	/* PFSYNC_ACT_CLR */
122 	{ pfsync_in_clr,	sizeof(struct pfsync_clr) },
123 	 /* PFSYNC_ACT_OINS */
124 	{ pfsync_in_error,	0 },
125 	/* PFSYNC_ACT_INS_ACK */
126 	{ pfsync_in_iack,	sizeof(struct pfsync_ins_ack) },
127 	/* PFSYNC_ACT_OUPD */
128 	{ pfsync_in_error,	0 },
129 	/* PFSYNC_ACT_UPD_C */
130 	{ pfsync_in_upd_c,	sizeof(struct pfsync_upd_c) },
131 	/* PFSYNC_ACT_UPD_REQ */
132 	{ pfsync_in_ureq,	sizeof(struct pfsync_upd_req) },
133 	/* PFSYNC_ACT_DEL */
134 	{ pfsync_in_del,	sizeof(struct pfsync_state) },
135 	/* PFSYNC_ACT_DEL_C */
136 	{ pfsync_in_del_c,	sizeof(struct pfsync_del_c) },
137 	/* PFSYNC_ACT_INS_F */
138 	{ pfsync_in_error,	0 },
139 	/* PFSYNC_ACT_DEL_F */
140 	{ pfsync_in_error,	0 },
141 	/* PFSYNC_ACT_BUS */
142 	{ pfsync_in_bus,	sizeof(struct pfsync_bus) },
143 	/* PFSYNC_ACT_OTDB */
144 	{ pfsync_in_error,	0 },
145 	/* PFSYNC_ACT_EOF */
146 	{ pfsync_in_error,	0 },
147 	/* PFSYNC_ACT_INS */
148 	{ pfsync_in_ins,	sizeof(struct pfsync_state) },
149 	/* PFSYNC_ACT_UPD */
150 	{ pfsync_in_upd,	sizeof(struct pfsync_state) },
151 	/* PFSYNC_ACT_TDB */
152 	{ pfsync_in_tdb,	sizeof(struct pfsync_tdb) },
153 };
154 
155 struct pfsync_q {
156 	void		(*write)(struct pf_state *, void *);
157 	size_t		len;
158 	u_int8_t	action;
159 };
160 
161 /* we have one of these for every PFSYNC_S_ */
162 void	pfsync_out_state(struct pf_state *, void *);
163 void	pfsync_out_iack(struct pf_state *, void *);
164 void	pfsync_out_upd_c(struct pf_state *, void *);
165 void	pfsync_out_del(struct pf_state *, void *);
166 
167 struct pfsync_q pfsync_qs[] = {
168 	{ pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
169 	{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
170 	{ pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C },
171 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
172 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD }
173 };
174 
175 void	pfsync_q_ins(struct pf_state *, int);
176 void	pfsync_q_del(struct pf_state *);
177 
178 struct pfsync_upd_req_item {
179 	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
180 	struct pfsync_upd_req			ur_msg;
181 };
182 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item);
183 
184 struct pfsync_deferral {
185 	TAILQ_ENTRY(pfsync_deferral)		 pd_entry;
186 	struct pf_state				*pd_st;
187 	struct mbuf				*pd_m;
188 	struct timeout				 pd_tmo;
189 };
190 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral);
191 
192 #define PFSYNC_PLSIZE	MAX(sizeof(struct pfsync_upd_req_item), \
193 			    sizeof(struct pfsync_deferral))
194 
195 void	pfsync_out_tdb(struct tdb *, void *);
196 
197 struct pfsync_softc {
198 	struct ifnet		 sc_if;
199 	struct ifnet		*sc_sync_if;
200 
201 	struct pool		 sc_pool;
202 
203 	struct ip_moptions	 sc_imo;
204 
205 	struct in_addr		 sc_sync_peer;
206 	u_int8_t		 sc_maxupdates;
207 
208 	struct ip		 sc_template;
209 
210 	struct pf_state_queue	 sc_qs[PFSYNC_S_COUNT];
211 	size_t			 sc_len;
212 
213 	struct pfsync_upd_reqs	 sc_upd_req_list;
214 
215 	int			 sc_initial_bulk;
216 	int			 sc_link_demoted;
217 
218 	int			 sc_defer;
219 	struct pfsync_deferrals	 sc_deferrals;
220 	u_int			 sc_deferred;
221 
222 	void			*sc_plus;
223 	size_t			 sc_pluslen;
224 
225 	u_int32_t		 sc_ureq_sent;
226 	int			 sc_bulk_tries;
227 	struct timeout		 sc_bulkfail_tmo;
228 
229 	u_int32_t		 sc_ureq_received;
230 	struct pf_state		*sc_bulk_next;
231 	struct pf_state		*sc_bulk_last;
232 	struct timeout		 sc_bulk_tmo;
233 
234 	TAILQ_HEAD(, tdb)	 sc_tdb_q;
235 
236 	void			*sc_lhcookie;
237 
238 	struct timeout		 sc_tmo;
239 };
240 
241 struct pfsync_softc	*pfsyncif = NULL;
242 struct pfsyncstats	 pfsyncstats;
243 
244 void	pfsyncattach(int);
245 int	pfsync_clone_create(struct if_clone *, int);
246 int	pfsync_clone_destroy(struct ifnet *);
247 int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
248 	    struct pf_state_peer *);
249 void	pfsync_update_net_tdb(struct pfsync_tdb *);
250 int	pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
251 	    struct rtentry *);
252 int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
253 void	pfsyncstart(struct ifnet *);
254 void	pfsync_syncdev_state(void *);
255 
256 void	pfsync_deferred(struct pf_state *, int);
257 void	pfsync_undefer(struct pfsync_deferral *, int);
258 void	pfsync_defer_tmo(void *);
259 
260 void	pfsync_cancel_full_update(struct pfsync_softc *);
261 void	pfsync_request_full_update(struct pfsync_softc *);
262 void	pfsync_request_update(u_int32_t, u_int64_t);
263 void	pfsync_update_state_req(struct pf_state *);
264 
265 void	pfsync_drop(struct pfsync_softc *);
266 void	pfsync_sendout(void);
267 void	pfsync_send_plus(void *, size_t);
268 void	pfsync_timeout(void *);
269 void	pfsync_tdb_timeout(void *);
270 
271 void	pfsync_bulk_start(void);
272 void	pfsync_bulk_status(u_int8_t);
273 void	pfsync_bulk_update(void *);
274 void	pfsync_bulk_fail(void *);
275 
276 #define PFSYNC_MAX_BULKTRIES	12
277 int	pfsync_sync_ok;
278 
279 struct if_clone	pfsync_cloner =
280     IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
281 
282 void
283 pfsyncattach(int npfsync)
284 {
285 	if_clone_attach(&pfsync_cloner);
286 }
287 
288 int
289 pfsync_clone_create(struct if_clone *ifc, int unit)
290 {
291 	struct pfsync_softc *sc;
292 	struct ifnet *ifp;
293 	int q;
294 
295 	if (unit != 0)
296 		return (EINVAL);
297 
298 	pfsync_sync_ok = 1;
299 
300 	sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK | M_ZERO);
301 
302 	for (q = 0; q < PFSYNC_S_COUNT; q++)
303 		TAILQ_INIT(&sc->sc_qs[q]);
304 
305 	pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, IPL_SOFTNET, 0, "pfsync",
306 	    NULL);
307 	TAILQ_INIT(&sc->sc_upd_req_list);
308 	TAILQ_INIT(&sc->sc_deferrals);
309 	sc->sc_deferred = 0;
310 
311 	TAILQ_INIT(&sc->sc_tdb_q);
312 
313 	sc->sc_len = PFSYNC_MINPKT;
314 	sc->sc_maxupdates = 128;
315 
316 	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
317 	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
318 	    M_WAITOK | M_ZERO);
319 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
320 
321 	ifp = &sc->sc_if;
322 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
323 	ifp->if_softc = sc;
324 	ifp->if_ioctl = pfsyncioctl;
325 	ifp->if_output = pfsyncoutput;
326 	ifp->if_start = pfsyncstart;
327 	ifp->if_type = IFT_PFSYNC;
328 	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
329 	ifp->if_hdrlen = sizeof(struct pfsync_header);
330 	ifp->if_mtu = ETHERMTU;
331 	timeout_set_proc(&sc->sc_tmo, pfsync_timeout, sc);
332 	timeout_set_proc(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
333 	timeout_set_proc(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
334 
335 	if_attach(ifp);
336 	if_alloc_sadl(ifp);
337 
338 #if NCARP > 0
339 	if_addgroup(ifp, "carp");
340 #endif
341 
342 #if NBPFILTER > 0
343 	bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
344 #endif
345 
346 	pfsyncif = sc;
347 
348 	return (0);
349 }
350 
351 int
352 pfsync_clone_destroy(struct ifnet *ifp)
353 {
354 	struct pfsync_softc *sc = ifp->if_softc;
355 	struct pfsync_deferral *pd;
356 
357 	timeout_del(&sc->sc_bulkfail_tmo);
358 	timeout_del(&sc->sc_bulk_tmo);
359 	timeout_del(&sc->sc_tmo);
360 #if NCARP > 0
361 	if (!pfsync_sync_ok)
362 		carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
363 	if (sc->sc_link_demoted)
364 		carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
365 #endif
366 	if (sc->sc_sync_if)
367 		hook_disestablish(
368 		    sc->sc_sync_if->if_linkstatehooks,
369 		    sc->sc_lhcookie);
370 	if_detach(ifp);
371 
372 	pfsync_drop(sc);
373 
374 	while (sc->sc_deferred > 0) {
375 		pd = TAILQ_FIRST(&sc->sc_deferrals);
376 		timeout_del(&pd->pd_tmo);
377 		pfsync_undefer(pd, 0);
378 	}
379 
380 	pool_destroy(&sc->sc_pool);
381 	free(sc->sc_imo.imo_membership, M_IPMOPTS, 0);
382 	free(sc, M_DEVBUF, sizeof(*sc));
383 
384 	pfsyncif = NULL;
385 
386 	return (0);
387 }
388 
389 /*
390  * Start output on the pfsync interface.
391  */
392 void
393 pfsyncstart(struct ifnet *ifp)
394 {
395 	IFQ_PURGE(&ifp->if_snd);
396 }
397 
398 void
399 pfsync_syncdev_state(void *arg)
400 {
401 	struct pfsync_softc *sc = arg;
402 
403 	if (!sc->sc_sync_if || !(sc->sc_if.if_flags & IFF_UP))
404 		return;
405 
406 	if (sc->sc_sync_if->if_link_state == LINK_STATE_DOWN) {
407 		sc->sc_if.if_flags &= ~IFF_RUNNING;
408 		if (!sc->sc_link_demoted) {
409 #if NCARP > 0
410 			carp_group_demote_adj(&sc->sc_if, 1,
411 			    "pfsync link state down");
412 #endif
413 			sc->sc_link_demoted = 1;
414 		}
415 
416 		/* drop everything */
417 		timeout_del(&sc->sc_tmo);
418 		pfsync_drop(sc);
419 
420 		pfsync_cancel_full_update(sc);
421 	} else if (sc->sc_link_demoted) {
422 		sc->sc_if.if_flags |= IFF_RUNNING;
423 
424 		pfsync_request_full_update(sc);
425 	}
426 }
427 
428 int
429 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
430     struct pf_state_peer *d)
431 {
432 	if (s->scrub.scrub_flag && d->scrub == NULL) {
433 		d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO);
434 		if (d->scrub == NULL)
435 			return (ENOMEM);
436 	}
437 
438 	return (0);
439 }
440 
441 void
442 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
443 {
444 	pf_state_export(sp, st);
445 }
446 
447 int
448 pfsync_state_import(struct pfsync_state *sp, int flags)
449 {
450 	struct pf_state	*st = NULL;
451 	struct pf_state_key *skw = NULL, *sks = NULL;
452 	struct pf_rule *r = NULL;
453 	struct pfi_kif	*kif;
454 	int pool_flags;
455 	int error;
456 
457 	if (sp->creatorid == 0) {
458 		DPFPRINTF(LOG_NOTICE, "pfsync_state_import: "
459 		    "invalid creator id: %08x", ntohl(sp->creatorid));
460 		return (EINVAL);
461 	}
462 
463 	if ((kif = pfi_kif_get(sp->ifname)) == NULL) {
464 		DPFPRINTF(LOG_NOTICE, "pfsync_state_import: "
465 		    "unknown interface: %s", sp->ifname);
466 		if (flags & PFSYNC_SI_IOCTL)
467 			return (EINVAL);
468 		return (0);	/* skip this state */
469 	}
470 
471 	if (sp->af == 0)
472 		return (0);	/* skip this state */
473 
474 	/*
475 	 * If the ruleset checksums match or the state is coming from the ioctl,
476 	 * it's safe to associate the state with the rule of that number.
477 	 */
478 	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
479 	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
480 	    pf_main_ruleset.rules.active.rcount)
481 		r = pf_main_ruleset.rules.active.ptr_array[ntohl(sp->rule)];
482 	else
483 		r = &pf_default_rule;
484 
485 	if ((r->max_states && r->states_cur >= r->max_states))
486 		goto cleanup;
487 
488 	if (flags & PFSYNC_SI_IOCTL)
489 		pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO;
490 	else
491 		pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO;
492 
493 	if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL)
494 		goto cleanup;
495 
496 	if ((skw = pf_alloc_state_key(pool_flags)) == NULL)
497 		goto cleanup;
498 
499 	if ((sp->key[PF_SK_WIRE].af &&
500 	    (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) ||
501 	    PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
502 	    &sp->key[PF_SK_STACK].addr[0], sp->af) ||
503 	    PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
504 	    &sp->key[PF_SK_STACK].addr[1], sp->af) ||
505 	    sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
506 	    sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] ||
507 	    sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) {
508 		if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
509 			goto cleanup;
510 	} else
511 		sks = skw;
512 
513 	/* allocate memory for scrub info */
514 	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
515 	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
516 		goto cleanup;
517 
518 	/* copy to state key(s) */
519 	skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
520 	skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
521 	skw->port[0] = sp->key[PF_SK_WIRE].port[0];
522 	skw->port[1] = sp->key[PF_SK_WIRE].port[1];
523 	skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain);
524 	PF_REF_INIT(skw->refcnt);
525 	skw->proto = sp->proto;
526 	if (!(skw->af = sp->key[PF_SK_WIRE].af))
527 		skw->af = sp->af;
528 	if (sks != skw) {
529 		sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
530 		sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
531 		sks->port[0] = sp->key[PF_SK_STACK].port[0];
532 		sks->port[1] = sp->key[PF_SK_STACK].port[1];
533 		sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain);
534 		PF_REF_INIT(sks->refcnt);
535 		if (!(sks->af = sp->key[PF_SK_STACK].af))
536 			sks->af = sp->af;
537 		if (sks->af != skw->af) {
538 			switch (sp->proto) {
539 			case IPPROTO_ICMP:
540 				sks->proto = IPPROTO_ICMPV6;
541 				break;
542 			case IPPROTO_ICMPV6:
543 				sks->proto = IPPROTO_ICMP;
544 				break;
545 			default:
546 				sks->proto = sp->proto;
547 			}
548 		} else
549 			sks->proto = sp->proto;
550 	}
551 	st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]);
552 	st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]);
553 
554 	/* copy to state */
555 	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
556 	st->creation = time_uptime - ntohl(sp->creation);
557 	st->expire = time_uptime;
558 	if (ntohl(sp->expire)) {
559 		u_int32_t timeout;
560 
561 		timeout = r->timeout[sp->timeout];
562 		if (!timeout)
563 			timeout = pf_default_rule.timeout[sp->timeout];
564 
565 		/* sp->expire may have been adaptively scaled by export. */
566 		st->expire -= timeout - ntohl(sp->expire);
567 	}
568 
569 	st->direction = sp->direction;
570 	st->log = sp->log;
571 	st->timeout = sp->timeout;
572 	st->state_flags = ntohs(sp->state_flags);
573 	st->max_mss = ntohs(sp->max_mss);
574 	st->min_ttl = sp->min_ttl;
575 	st->set_tos = sp->set_tos;
576 	st->set_prio[0] = sp->set_prio[0];
577 	st->set_prio[1] = sp->set_prio[1];
578 
579 	st->id = sp->id;
580 	st->creatorid = sp->creatorid;
581 	pf_state_peer_ntoh(&sp->src, &st->src);
582 	pf_state_peer_ntoh(&sp->dst, &st->dst);
583 
584 	st->rule.ptr = r;
585 	st->anchor.ptr = NULL;
586 	st->rt_kif = NULL;
587 
588 	st->pfsync_time = time_uptime;
589 	st->sync_state = PFSYNC_S_NONE;
590 
591 	/* XXX when we have anchors, use STATE_INC_COUNTERS */
592 	r->states_cur++;
593 	r->states_tot++;
594 
595 	if (!ISSET(flags, PFSYNC_SI_IOCTL))
596 		SET(st->state_flags, PFSTATE_NOSYNC);
597 
598 	if (pf_state_insert(kif, &skw, &sks, st) != 0) {
599 		/* XXX when we have anchors, use STATE_DEC_COUNTERS */
600 		r->states_cur--;
601 		error = EEXIST;
602 		goto cleanup_state;
603 	}
604 
605 	if (!ISSET(flags, PFSYNC_SI_IOCTL)) {
606 		CLR(st->state_flags, PFSTATE_NOSYNC);
607 		if (ISSET(st->state_flags, PFSTATE_ACK)) {
608 			pfsync_q_ins(st, PFSYNC_S_IACK);
609 			schednetisr(NETISR_PFSYNC);
610 		}
611 	}
612 	CLR(st->state_flags, PFSTATE_ACK);
613 
614 	return (0);
615 
616  cleanup:
617 	error = ENOMEM;
618 	if (skw == sks)
619 		sks = NULL;
620 	if (skw != NULL)
621 		pool_put(&pf_state_key_pl, skw);
622 	if (sks != NULL)
623 		pool_put(&pf_state_key_pl, sks);
624 
625  cleanup_state:	/* pf_state_insert frees the state keys */
626 	if (st) {
627 		if (st->dst.scrub)
628 			pool_put(&pf_state_scrub_pl, st->dst.scrub);
629 		if (st->src.scrub)
630 			pool_put(&pf_state_scrub_pl, st->src.scrub);
631 		pool_put(&pf_state_pl, st);
632 	}
633 	return (error);
634 }
635 
636 void
637 pfsync_input(struct mbuf *m, ...)
638 {
639 	struct pfsync_softc *sc = pfsyncif;
640 	struct ip *ip = mtod(m, struct ip *);
641 	struct mbuf *mp;
642 	struct pfsync_header *ph;
643 	struct pfsync_subheader subh;
644 
645 	int offset, offp, len, count, mlen, flags = 0;
646 
647 	pfsyncstats.pfsyncs_ipackets++;
648 
649 	/* verify that we have a sync interface configured */
650 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
651 	    sc->sc_sync_if == NULL || !pf_status.running)
652 		goto done;
653 
654 	/* verify that the packet came in on the right interface */
655 	if (sc->sc_sync_if->if_index != m->m_pkthdr.ph_ifidx) {
656 		pfsyncstats.pfsyncs_badif++;
657 		goto done;
658 	}
659 
660 	sc->sc_if.if_ipackets++;
661 	sc->sc_if.if_ibytes += m->m_pkthdr.len;
662 
663 	/* verify that the IP TTL is 255. */
664 	if (ip->ip_ttl != PFSYNC_DFLTTL) {
665 		pfsyncstats.pfsyncs_badttl++;
666 		goto done;
667 	}
668 
669 	offset = ip->ip_hl << 2;
670 	mp = m_pulldown(m, offset, sizeof(*ph), &offp);
671 	if (mp == NULL) {
672 		pfsyncstats.pfsyncs_hdrops++;
673 		return;
674 	}
675 	ph = (struct pfsync_header *)(mp->m_data + offp);
676 
677 	/* verify the version */
678 	if (ph->version != PFSYNC_VERSION) {
679 		pfsyncstats.pfsyncs_badver++;
680 		goto done;
681 	}
682 	len = ntohs(ph->len) + offset;
683 	if (m->m_pkthdr.len < len) {
684 		pfsyncstats.pfsyncs_badlen++;
685 		goto done;
686 	}
687 
688 	if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
689 		flags = PFSYNC_SI_CKSUM;
690 
691 	offset += sizeof(*ph);
692 	while (offset <= len - sizeof(subh)) {
693 		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
694 		offset += sizeof(subh);
695 
696 		mlen = subh.len << 2;
697 		count = ntohs(subh.count);
698 
699 		if (subh.action >= PFSYNC_ACT_MAX ||
700 		    subh.action >= nitems(pfsync_acts) ||
701 		    mlen < pfsync_acts[subh.action].len) {
702 			/*
703 			 * subheaders are always followed by at least one
704 			 * message, so if the peer is new
705 			 * enough to tell us how big its messages are then we
706 			 * know enough to skip them.
707 			 */
708 			if (count > 0 && mlen > 0) {
709 				offset += count * mlen;
710 				continue;
711 			}
712 			pfsyncstats.pfsyncs_badact++;
713 			goto done;
714 		}
715 
716 		mp = m_pulldown(m, offset, mlen * count, &offp);
717 		if (mp == NULL) {
718 			pfsyncstats.pfsyncs_badlen++;
719 			return;
720 		}
721 
722 		if (pfsync_acts[subh.action].in(mp->m_data + offp,
723 		    mlen, count, flags) != 0)
724 			goto done;
725 
726 		offset += mlen * count;
727 	}
728 
729 done:
730 	m_freem(m);
731 }
732 
733 int
734 pfsync_in_clr(caddr_t buf, int len, int count, int flags)
735 {
736 	struct pfsync_clr *clr;
737 	struct pf_state *st, *nexts;
738 	struct pfi_kif *kif;
739 	u_int32_t creatorid;
740 	int i;
741 
742 	for (i = 0; i < count; i++) {
743 		clr = (struct pfsync_clr *)buf + len * i;
744 		kif = NULL;
745 		creatorid = clr->creatorid;
746 		if (strlen(clr->ifname) &&
747 		    (kif = pfi_kif_find(clr->ifname)) == NULL)
748 			continue;
749 
750 		for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) {
751 			nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
752 			if (st->creatorid == creatorid &&
753 			    ((kif && st->kif == kif) || !kif)) {
754 				SET(st->state_flags, PFSTATE_NOSYNC);
755 				pf_remove_state(st);
756 			}
757 		}
758 	}
759 
760 	return (0);
761 }
762 
763 int
764 pfsync_in_ins(caddr_t buf, int len, int count, int flags)
765 {
766 	struct pfsync_state *sp;
767 	sa_family_t af1, af2;
768 	int i;
769 
770 	for (i = 0; i < count; i++) {
771 		sp = (struct pfsync_state *)(buf + len * i);
772 		af1 = sp->key[0].af;
773 		af2 = sp->key[1].af;
774 
775 		/* check for invalid values */
776 		if (sp->timeout >= PFTM_MAX ||
777 		    sp->src.state > PF_TCPS_PROXY_DST ||
778 		    sp->dst.state > PF_TCPS_PROXY_DST ||
779 		    sp->direction > PF_OUT ||
780 		    (((af1 || af2) &&
781 		     ((af1 != AF_INET && af1 != AF_INET6) ||
782 		      (af2 != AF_INET && af2 != AF_INET6))) ||
783 		    (sp->af != AF_INET && sp->af != AF_INET6))) {
784 			DPFPRINTF(LOG_NOTICE,
785 			    "pfsync_input: PFSYNC5_ACT_INS: invalid value");
786 			pfsyncstats.pfsyncs_badval++;
787 			continue;
788 		}
789 
790 		if (pfsync_state_import(sp, flags) == ENOMEM) {
791 			/* drop out, but process the rest of the actions */
792 			break;
793 		}
794 	}
795 
796 	return (0);
797 }
798 
799 int
800 pfsync_in_iack(caddr_t buf, int len, int count, int flags)
801 {
802 	struct pfsync_ins_ack *ia;
803 	struct pf_state_cmp id_key;
804 	struct pf_state *st;
805 	int i;
806 
807 	for (i = 0; i < count; i++) {
808 		ia = (struct pfsync_ins_ack *)(buf + len * i);
809 
810 		id_key.id = ia->id;
811 		id_key.creatorid = ia->creatorid;
812 
813 		st = pf_find_state_byid(&id_key);
814 		if (st == NULL)
815 			continue;
816 
817 		if (ISSET(st->state_flags, PFSTATE_ACK))
818 			pfsync_deferred(st, 0);
819 	}
820 
821 	return (0);
822 }
823 
824 int
825 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
826     struct pfsync_state_peer *dst)
827 {
828 	int sync = 0;
829 
830 	/*
831 	 * The state should never go backwards except
832 	 * for syn-proxy states.  Neither should the
833 	 * sequence window slide backwards.
834 	 */
835 	if ((st->src.state > src->state &&
836 	    (st->src.state < PF_TCPS_PROXY_SRC ||
837 	    src->state >= PF_TCPS_PROXY_SRC)) ||
838 
839 	    (st->src.state == src->state &&
840 	    SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
841 		sync++;
842 	else
843 		pf_state_peer_ntoh(src, &st->src);
844 
845 	if ((st->dst.state > dst->state) ||
846 
847 	    (st->dst.state >= TCPS_SYN_SENT &&
848 	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
849 		sync++;
850 	else
851 		pf_state_peer_ntoh(dst, &st->dst);
852 
853 	return (sync);
854 }
855 
856 int
857 pfsync_in_upd(caddr_t buf, int len, int count, int flags)
858 {
859 	struct pfsync_state *sp;
860 	struct pf_state_cmp id_key;
861 	struct pf_state *st;
862 	int sync;
863 
864 	int i;
865 
866 	for (i = 0; i < count; i++) {
867 		sp = (struct pfsync_state *)(buf + len * i);
868 
869 		/* check for invalid values */
870 		if (sp->timeout >= PFTM_MAX ||
871 		    sp->src.state > PF_TCPS_PROXY_DST ||
872 		    sp->dst.state > PF_TCPS_PROXY_DST) {
873 			DPFPRINTF(LOG_NOTICE,
874 			    "pfsync_input: PFSYNC_ACT_UPD: invalid value");
875 			pfsyncstats.pfsyncs_badval++;
876 			continue;
877 		}
878 
879 		id_key.id = sp->id;
880 		id_key.creatorid = sp->creatorid;
881 
882 		st = pf_find_state_byid(&id_key);
883 		if (st == NULL) {
884 			/* insert the update */
885 			if (pfsync_state_import(sp, flags))
886 				pfsyncstats.pfsyncs_badstate++;
887 			continue;
888 		}
889 
890 		if (ISSET(st->state_flags, PFSTATE_ACK))
891 			pfsync_deferred(st, 1);
892 
893 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
894 			sync = pfsync_upd_tcp(st, &sp->src, &sp->dst);
895 		else {
896 			sync = 0;
897 
898 			/*
899 			 * Non-TCP protocol state machine always go
900 			 * forwards
901 			 */
902 			if (st->src.state > sp->src.state)
903 				sync++;
904 			else
905 				pf_state_peer_ntoh(&sp->src, &st->src);
906 
907 			if (st->dst.state > sp->dst.state)
908 				sync++;
909 			else
910 				pf_state_peer_ntoh(&sp->dst, &st->dst);
911 		}
912 
913 		if (sync < 2) {
914 			pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
915 			pf_state_peer_ntoh(&sp->dst, &st->dst);
916 			st->expire = time_uptime;
917 			st->timeout = sp->timeout;
918 		}
919 		st->pfsync_time = time_uptime;
920 
921 		if (sync) {
922 			pfsyncstats.pfsyncs_stale++;
923 
924 			pfsync_update_state(st);
925 			schednetisr(NETISR_PFSYNC);
926 		}
927 	}
928 
929 	return (0);
930 }
931 
932 int
933 pfsync_in_upd_c(caddr_t buf, int len, int count, int flags)
934 {
935 	struct pfsync_upd_c *up;
936 	struct pf_state_cmp id_key;
937 	struct pf_state *st;
938 
939 	int sync;
940 
941 	int i;
942 
943 	for (i = 0; i < count; i++) {
944 		up = (struct pfsync_upd_c *)(buf + len * i);
945 
946 		/* check for invalid values */
947 		if (up->timeout >= PFTM_MAX ||
948 		    up->src.state > PF_TCPS_PROXY_DST ||
949 		    up->dst.state > PF_TCPS_PROXY_DST) {
950 			DPFPRINTF(LOG_NOTICE,
951 			    "pfsync_input: PFSYNC_ACT_UPD_C: invalid value");
952 			pfsyncstats.pfsyncs_badval++;
953 			continue;
954 		}
955 
956 		id_key.id = up->id;
957 		id_key.creatorid = up->creatorid;
958 
959 		st = pf_find_state_byid(&id_key);
960 		if (st == NULL) {
961 			/* We don't have this state. Ask for it. */
962 			pfsync_request_update(id_key.creatorid, id_key.id);
963 			continue;
964 		}
965 
966 		if (ISSET(st->state_flags, PFSTATE_ACK))
967 			pfsync_deferred(st, 1);
968 
969 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
970 			sync = pfsync_upd_tcp(st, &up->src, &up->dst);
971 		else {
972 			sync = 0;
973 			/*
974 			 * Non-TCP protocol state machine always go
975 			 * forwards
976 			 */
977 			if (st->src.state > up->src.state)
978 				sync++;
979 			else
980 				pf_state_peer_ntoh(&up->src, &st->src);
981 
982 			if (st->dst.state > up->dst.state)
983 				sync++;
984 			else
985 				pf_state_peer_ntoh(&up->dst, &st->dst);
986 		}
987 		if (sync < 2) {
988 			pfsync_alloc_scrub_memory(&up->dst, &st->dst);
989 			pf_state_peer_ntoh(&up->dst, &st->dst);
990 			st->expire = time_uptime;
991 			st->timeout = up->timeout;
992 		}
993 		st->pfsync_time = time_uptime;
994 
995 		if (sync) {
996 			pfsyncstats.pfsyncs_stale++;
997 
998 			pfsync_update_state(st);
999 			schednetisr(NETISR_PFSYNC);
1000 		}
1001 	}
1002 
1003 	return (0);
1004 }
1005 
1006 int
1007 pfsync_in_ureq(caddr_t buf, int len, int count, int flags)
1008 {
1009 	struct pfsync_upd_req *ur;
1010 	int i;
1011 
1012 	struct pf_state_cmp id_key;
1013 	struct pf_state *st;
1014 
1015 	for (i = 0; i < count; i++) {
1016 		ur = (struct pfsync_upd_req *)(buf + len * i);
1017 
1018 		id_key.id = ur->id;
1019 		id_key.creatorid = ur->creatorid;
1020 
1021 		if (id_key.id == 0 && id_key.creatorid == 0)
1022 			pfsync_bulk_start();
1023 		else {
1024 			st = pf_find_state_byid(&id_key);
1025 			if (st == NULL) {
1026 				pfsyncstats.pfsyncs_badstate++;
1027 				continue;
1028 			}
1029 			if (ISSET(st->state_flags, PFSTATE_NOSYNC))
1030 				continue;
1031 
1032 			pfsync_update_state_req(st);
1033 		}
1034 	}
1035 
1036 	return (0);
1037 }
1038 
1039 int
1040 pfsync_in_del(caddr_t buf, int len, int count, int flags)
1041 {
1042 	struct pfsync_state *sp;
1043 	struct pf_state_cmp id_key;
1044 	struct pf_state *st;
1045 	int i;
1046 
1047 	for (i = 0; i < count; i++) {
1048 		sp = (struct pfsync_state *)(buf + len * i);
1049 
1050 		id_key.id = sp->id;
1051 		id_key.creatorid = sp->creatorid;
1052 
1053 		st = pf_find_state_byid(&id_key);
1054 		if (st == NULL) {
1055 			pfsyncstats.pfsyncs_badstate++;
1056 			continue;
1057 		}
1058 		SET(st->state_flags, PFSTATE_NOSYNC);
1059 		pf_remove_state(st);
1060 	}
1061 
1062 	return (0);
1063 }
1064 
1065 int
1066 pfsync_in_del_c(caddr_t buf, int len, int count, int flags)
1067 {
1068 	struct pfsync_del_c *sp;
1069 	struct pf_state_cmp id_key;
1070 	struct pf_state *st;
1071 	int i;
1072 
1073 	for (i = 0; i < count; i++) {
1074 		sp = (struct pfsync_del_c *)(buf + len * i);
1075 
1076 		id_key.id = sp->id;
1077 		id_key.creatorid = sp->creatorid;
1078 
1079 		st = pf_find_state_byid(&id_key);
1080 		if (st == NULL) {
1081 			pfsyncstats.pfsyncs_badstate++;
1082 			continue;
1083 		}
1084 
1085 		SET(st->state_flags, PFSTATE_NOSYNC);
1086 		pf_remove_state(st);
1087 	}
1088 
1089 	return (0);
1090 }
1091 
1092 int
1093 pfsync_in_bus(caddr_t buf, int len, int count, int flags)
1094 {
1095 	struct pfsync_softc *sc = pfsyncif;
1096 	struct pfsync_bus *bus;
1097 
1098 	/* If we're not waiting for a bulk update, who cares. */
1099 	if (sc->sc_ureq_sent == 0)
1100 		return (0);
1101 
1102 	bus = (struct pfsync_bus *)buf;
1103 
1104 	switch (bus->status) {
1105 	case PFSYNC_BUS_START:
1106 		timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1107 		    pf_pool_limits[PF_LIMIT_STATES].limit /
1108 		    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1109 		    sizeof(struct pfsync_state)));
1110 		DPFPRINTF(LOG_INFO, "received bulk update start");
1111 		break;
1112 
1113 	case PFSYNC_BUS_END:
1114 		if (time_uptime - ntohl(bus->endtime) >=
1115 		    sc->sc_ureq_sent) {
1116 			/* that's it, we're happy */
1117 			sc->sc_ureq_sent = 0;
1118 			sc->sc_bulk_tries = 0;
1119 			timeout_del(&sc->sc_bulkfail_tmo);
1120 #if NCARP > 0
1121 			if (!pfsync_sync_ok)
1122 				carp_group_demote_adj(&sc->sc_if, -1,
1123 				    sc->sc_link_demoted ?
1124 				    "pfsync link state up" :
1125 				    "pfsync bulk done");
1126 			if (sc->sc_initial_bulk) {
1127 				carp_group_demote_adj(&sc->sc_if, -32,
1128 				    "pfsync init");
1129 				sc->sc_initial_bulk = 0;
1130 			}
1131 #endif
1132 			pfsync_sync_ok = 1;
1133 			sc->sc_link_demoted = 0;
1134 			DPFPRINTF(LOG_INFO, "received valid bulk update end");
1135 		} else {
1136 			DPFPRINTF(LOG_WARNING, "received invalid "
1137 			    "bulk update end: bad timestamp");
1138 		}
1139 		break;
1140 	}
1141 
1142 	return (0);
1143 }
1144 
1145 int
1146 pfsync_in_tdb(caddr_t buf, int len, int count, int flags)
1147 {
1148 #if defined(IPSEC)
1149 	struct pfsync_tdb *tp;
1150 	int i;
1151 
1152 	for (i = 0; i < count; i++) {
1153 		tp = (struct pfsync_tdb *)(buf + len * i);
1154 		pfsync_update_net_tdb(tp);
1155 	}
1156 #endif
1157 
1158 	return (0);
1159 }
1160 
1161 #if defined(IPSEC)
1162 /* Update an in-kernel tdb. Silently fail if no tdb is found. */
1163 void
1164 pfsync_update_net_tdb(struct pfsync_tdb *pt)
1165 {
1166 	struct tdb		*tdb;
1167 
1168 	splsoftassert(IPL_SOFTNET);
1169 
1170 	/* check for invalid values */
1171 	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
1172 	    (pt->dst.sa.sa_family != AF_INET &&
1173 	     pt->dst.sa.sa_family != AF_INET6))
1174 		goto bad;
1175 
1176 	tdb = gettdb(ntohs(pt->rdomain), pt->spi,
1177 	    (union sockaddr_union *)&pt->dst, pt->sproto);
1178 	if (tdb) {
1179 		pt->rpl = betoh64(pt->rpl);
1180 		pt->cur_bytes = betoh64(pt->cur_bytes);
1181 
1182 		/* Neither replay nor byte counter should ever decrease. */
1183 		if (pt->rpl < tdb->tdb_rpl ||
1184 		    pt->cur_bytes < tdb->tdb_cur_bytes) {
1185 			goto bad;
1186 		}
1187 
1188 		tdb->tdb_rpl = pt->rpl;
1189 		tdb->tdb_cur_bytes = pt->cur_bytes;
1190 	}
1191 	return;
1192 
1193  bad:
1194 	DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: "
1195 	    "invalid value");
1196 	pfsyncstats.pfsyncs_badstate++;
1197 	return;
1198 }
1199 #endif
1200 
1201 
1202 int
1203 pfsync_in_eof(caddr_t buf, int len, int count, int flags)
1204 {
1205 	if (len > 0 || count > 0)
1206 		pfsyncstats.pfsyncs_badact++;
1207 
1208 	/* we're done. let the caller return */
1209 	return (1);
1210 }
1211 
1212 int
1213 pfsync_in_error(caddr_t buf, int len, int count, int flags)
1214 {
1215 	pfsyncstats.pfsyncs_badact++;
1216 	return (-1);
1217 }
1218 
1219 int
1220 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1221 	struct rtentry *rt)
1222 {
1223 	m_freem(m);	/* drop packet */
1224 	return (EAFNOSUPPORT);
1225 }
1226 
1227 int
1228 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1229 {
1230 	struct proc *p = curproc;
1231 	struct pfsync_softc *sc = ifp->if_softc;
1232 	struct ifreq *ifr = (struct ifreq *)data;
1233 	struct ip_moptions *imo = &sc->sc_imo;
1234 	struct pfsyncreq pfsyncr;
1235 	struct ifnet    *sifp;
1236 	struct ip *ip;
1237 	int s, error;
1238 
1239 	switch (cmd) {
1240 #if 0
1241 	case SIOCSIFADDR:
1242 	case SIOCSIFDSTADDR:
1243 #endif
1244 	case SIOCSIFFLAGS:
1245 		s = splnet();
1246 		if ((ifp->if_flags & IFF_RUNNING) == 0 &&
1247 		    (ifp->if_flags & IFF_UP)) {
1248 			ifp->if_flags |= IFF_RUNNING;
1249 
1250 #if NCARP > 0
1251 			sc->sc_initial_bulk = 1;
1252 			carp_group_demote_adj(&sc->sc_if, 32, "pfsync init");
1253 #endif
1254 
1255 			pfsync_request_full_update(sc);
1256 		}
1257 		if ((ifp->if_flags & IFF_RUNNING) &&
1258 		    (ifp->if_flags & IFF_UP) == 0) {
1259 			ifp->if_flags &= ~IFF_RUNNING;
1260 
1261 			/* drop everything */
1262 			timeout_del(&sc->sc_tmo);
1263 			pfsync_drop(sc);
1264 
1265 			pfsync_cancel_full_update(sc);
1266 		}
1267 		splx(s);
1268 		break;
1269 	case SIOCSIFMTU:
1270 		if (!sc->sc_sync_if ||
1271 		    ifr->ifr_mtu <= PFSYNC_MINPKT ||
1272 		    ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
1273 			return (EINVAL);
1274 		s = splnet();
1275 		if (ifr->ifr_mtu < ifp->if_mtu)
1276 			pfsync_sendout();
1277 		ifp->if_mtu = ifr->ifr_mtu;
1278 		splx(s);
1279 		break;
1280 	case SIOCGETPFSYNC:
1281 		bzero(&pfsyncr, sizeof(pfsyncr));
1282 		if (sc->sc_sync_if) {
1283 			strlcpy(pfsyncr.pfsyncr_syncdev,
1284 			    sc->sc_sync_if->if_xname, IFNAMSIZ);
1285 		}
1286 		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1287 		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1288 		pfsyncr.pfsyncr_defer = sc->sc_defer;
1289 		return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
1290 
1291 	case SIOCSETPFSYNC:
1292 		if ((error = suser(p, 0)) != 0)
1293 			return (error);
1294 		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
1295 			return (error);
1296 
1297 		s = splnet();
1298 
1299 		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1300 			sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1301 		else
1302 			sc->sc_sync_peer.s_addr =
1303 			    pfsyncr.pfsyncr_syncpeer.s_addr;
1304 
1305 		if (pfsyncr.pfsyncr_maxupdates > 255) {
1306 			splx(s);
1307 			return (EINVAL);
1308 		}
1309 		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1310 
1311 		sc->sc_defer = pfsyncr.pfsyncr_defer;
1312 
1313 		if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1314 			if (sc->sc_sync_if)
1315 				hook_disestablish(
1316 				    sc->sc_sync_if->if_linkstatehooks,
1317 				    sc->sc_lhcookie);
1318 			sc->sc_sync_if = NULL;
1319 			if (imo->imo_num_memberships > 0) {
1320 				in_delmulti(imo->imo_membership[
1321 				    --imo->imo_num_memberships]);
1322 				imo->imo_ifidx = 0;
1323 			}
1324 			splx(s);
1325 			break;
1326 		}
1327 
1328 		if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) {
1329 			splx(s);
1330 			return (EINVAL);
1331 		}
1332 
1333 		if (sifp->if_mtu < sc->sc_if.if_mtu ||
1334 		    (sc->sc_sync_if != NULL &&
1335 		    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
1336 		    sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1337 			pfsync_sendout();
1338 
1339 		if (sc->sc_sync_if)
1340 			hook_disestablish(
1341 			    sc->sc_sync_if->if_linkstatehooks,
1342 			    sc->sc_lhcookie);
1343 		sc->sc_sync_if = sifp;
1344 
1345 		if (imo->imo_num_memberships > 0) {
1346 			in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1347 			imo->imo_ifidx = 0;
1348 		}
1349 
1350 		if (sc->sc_sync_if &&
1351 		    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1352 			struct in_addr addr;
1353 
1354 			if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) {
1355 				sc->sc_sync_if = NULL;
1356 				splx(s);
1357 				return (EADDRNOTAVAIL);
1358 			}
1359 
1360 			addr.s_addr = INADDR_PFSYNC_GROUP;
1361 
1362 			if ((imo->imo_membership[0] =
1363 			    in_addmulti(&addr, sc->sc_sync_if)) == NULL) {
1364 				sc->sc_sync_if = NULL;
1365 				splx(s);
1366 				return (ENOBUFS);
1367 			}
1368 			imo->imo_num_memberships++;
1369 			imo->imo_ifidx = sc->sc_sync_if->if_index;
1370 			imo->imo_ttl = PFSYNC_DFLTTL;
1371 			imo->imo_loop = 0;
1372 		}
1373 
1374 		ip = &sc->sc_template;
1375 		bzero(ip, sizeof(*ip));
1376 		ip->ip_v = IPVERSION;
1377 		ip->ip_hl = sizeof(sc->sc_template) >> 2;
1378 		ip->ip_tos = IPTOS_LOWDELAY;
1379 		/* len and id are set later */
1380 		ip->ip_off = htons(IP_DF);
1381 		ip->ip_ttl = PFSYNC_DFLTTL;
1382 		ip->ip_p = IPPROTO_PFSYNC;
1383 		ip->ip_src.s_addr = INADDR_ANY;
1384 		ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
1385 
1386 		sc->sc_lhcookie =
1387 		    hook_establish(sc->sc_sync_if->if_linkstatehooks, 1,
1388 		    pfsync_syncdev_state, sc);
1389 
1390 		pfsync_request_full_update(sc);
1391 		splx(s);
1392 
1393 		break;
1394 
1395 	default:
1396 		return (ENOTTY);
1397 	}
1398 
1399 	return (0);
1400 }
1401 
1402 void
1403 pfsync_out_state(struct pf_state *st, void *buf)
1404 {
1405 	struct pfsync_state *sp = buf;
1406 
1407 	pfsync_state_export(sp, st);
1408 }
1409 
1410 void
1411 pfsync_out_iack(struct pf_state *st, void *buf)
1412 {
1413 	struct pfsync_ins_ack *iack = buf;
1414 
1415 	iack->id = st->id;
1416 	iack->creatorid = st->creatorid;
1417 }
1418 
1419 void
1420 pfsync_out_upd_c(struct pf_state *st, void *buf)
1421 {
1422 	struct pfsync_upd_c *up = buf;
1423 
1424 	bzero(up, sizeof(*up));
1425 	up->id = st->id;
1426 	pf_state_peer_hton(&st->src, &up->src);
1427 	pf_state_peer_hton(&st->dst, &up->dst);
1428 	up->creatorid = st->creatorid;
1429 	up->timeout = st->timeout;
1430 }
1431 
1432 void
1433 pfsync_out_del(struct pf_state *st, void *buf)
1434 {
1435 	struct pfsync_del_c *dp = buf;
1436 
1437 	dp->id = st->id;
1438 	dp->creatorid = st->creatorid;
1439 
1440 	SET(st->state_flags, PFSTATE_NOSYNC);
1441 }
1442 
1443 void
1444 pfsync_drop(struct pfsync_softc *sc)
1445 {
1446 	struct pf_state *st;
1447 	struct pfsync_upd_req_item *ur;
1448 	struct tdb *t;
1449 	int q;
1450 
1451 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1452 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
1453 			continue;
1454 
1455 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
1456 #ifdef PFSYNC_DEBUG
1457 			KASSERT(st->sync_state == q);
1458 #endif
1459 			st->sync_state = PFSYNC_S_NONE;
1460 		}
1461 		TAILQ_INIT(&sc->sc_qs[q]);
1462 	}
1463 
1464 	while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1465 		TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1466 		pool_put(&sc->sc_pool, ur);
1467 	}
1468 
1469 	sc->sc_plus = NULL;
1470 
1471 	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
1472 		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry)
1473 			CLR(t->tdb_flags, TDBF_PFSYNC);
1474 
1475 		TAILQ_INIT(&sc->sc_tdb_q);
1476 	}
1477 
1478 	sc->sc_len = PFSYNC_MINPKT;
1479 }
1480 
1481 void
1482 pfsync_sendout(void)
1483 {
1484 	struct pfsync_softc *sc = pfsyncif;
1485 #if NBPFILTER > 0
1486 	struct ifnet *ifp = &sc->sc_if;
1487 #endif
1488 	struct mbuf *m;
1489 	struct ip *ip;
1490 	struct pfsync_header *ph;
1491 	struct pfsync_subheader *subh;
1492 	struct pf_state *st;
1493 	struct pfsync_upd_req_item *ur;
1494 	struct tdb *t;
1495 
1496 	int offset;
1497 	int q, count = 0;
1498 
1499 	if (sc == NULL || sc->sc_len == PFSYNC_MINPKT)
1500 		return;
1501 
1502 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
1503 #if NBPFILTER > 0
1504 	    (ifp->if_bpf == NULL && sc->sc_sync_if == NULL)) {
1505 #else
1506 	    sc->sc_sync_if == NULL) {
1507 #endif
1508 		pfsync_drop(sc);
1509 		return;
1510 	}
1511 
1512 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1513 	if (m == NULL) {
1514 		sc->sc_if.if_oerrors++;
1515 		pfsyncstats.pfsyncs_onomem++;
1516 		pfsync_drop(sc);
1517 		return;
1518 	}
1519 
1520 	if (max_linkhdr + sc->sc_len > MHLEN) {
1521 		MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len);
1522 		if (!ISSET(m->m_flags, M_EXT)) {
1523 			m_free(m);
1524 			sc->sc_if.if_oerrors++;
1525 			pfsyncstats.pfsyncs_onomem++;
1526 			pfsync_drop(sc);
1527 			return;
1528 		}
1529 	}
1530 	m->m_data += max_linkhdr;
1531 	m->m_len = m->m_pkthdr.len = sc->sc_len;
1532 
1533 	/* build the ip header */
1534 	ip = mtod(m, struct ip *);
1535 	bcopy(&sc->sc_template, ip, sizeof(*ip));
1536 	offset = sizeof(*ip);
1537 
1538 	ip->ip_len = htons(m->m_pkthdr.len);
1539 	ip->ip_id = htons(ip_randomid());
1540 
1541 	/* build the pfsync header */
1542 	ph = (struct pfsync_header *)(m->m_data + offset);
1543 	bzero(ph, sizeof(*ph));
1544 	offset += sizeof(*ph);
1545 
1546 	ph->version = PFSYNC_VERSION;
1547 	ph->len = htons(sc->sc_len - sizeof(*ip));
1548 	bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
1549 
1550 	if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
1551 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1552 		offset += sizeof(*subh);
1553 
1554 		count = 0;
1555 		while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1556 			TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1557 
1558 			bcopy(&ur->ur_msg, m->m_data + offset,
1559 			    sizeof(ur->ur_msg));
1560 			offset += sizeof(ur->ur_msg);
1561 
1562 			pool_put(&sc->sc_pool, ur);
1563 
1564 			count++;
1565 		}
1566 
1567 		bzero(subh, sizeof(*subh));
1568 		subh->len = sizeof(ur->ur_msg) >> 2;
1569 		subh->action = PFSYNC_ACT_UPD_REQ;
1570 		subh->count = htons(count);
1571 	}
1572 
1573 	/* has someone built a custom region for us to add? */
1574 	if (sc->sc_plus != NULL) {
1575 		bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
1576 		offset += sc->sc_pluslen;
1577 
1578 		sc->sc_plus = NULL;
1579 	}
1580 
1581 	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
1582 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1583 		offset += sizeof(*subh);
1584 
1585 		count = 0;
1586 		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) {
1587 			pfsync_out_tdb(t, m->m_data + offset);
1588 			offset += sizeof(struct pfsync_tdb);
1589 			CLR(t->tdb_flags, TDBF_PFSYNC);
1590 
1591 			count++;
1592 		}
1593 		TAILQ_INIT(&sc->sc_tdb_q);
1594 
1595 		bzero(subh, sizeof(*subh));
1596 		subh->action = PFSYNC_ACT_TDB;
1597 		subh->len = sizeof(struct pfsync_tdb) >> 2;
1598 		subh->count = htons(count);
1599 	}
1600 
1601 	/* walk the queues */
1602 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1603 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
1604 			continue;
1605 
1606 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1607 		offset += sizeof(*subh);
1608 
1609 		count = 0;
1610 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
1611 #ifdef PFSYNC_DEBUG
1612 			KASSERT(st->sync_state == q);
1613 #endif
1614 			pfsync_qs[q].write(st, m->m_data + offset);
1615 			offset += pfsync_qs[q].len;
1616 
1617 			st->sync_state = PFSYNC_S_NONE;
1618 			count++;
1619 		}
1620 		TAILQ_INIT(&sc->sc_qs[q]);
1621 
1622 		bzero(subh, sizeof(*subh));
1623 		subh->action = pfsync_qs[q].action;
1624 		subh->len = pfsync_qs[q].len >> 2;
1625 		subh->count = htons(count);
1626 	}
1627 
1628 	/* we're done, let's put it on the wire */
1629 #if NBPFILTER > 0
1630 	if (ifp->if_bpf) {
1631 		m->m_data += sizeof(*ip);
1632 		m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
1633 		bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1634 		m->m_data -= sizeof(*ip);
1635 		m->m_len = m->m_pkthdr.len = sc->sc_len;
1636 	}
1637 
1638 	if (sc->sc_sync_if == NULL) {
1639 		sc->sc_len = PFSYNC_MINPKT;
1640 		m_freem(m);
1641 		return;
1642 	}
1643 #endif
1644 
1645 	/* start again */
1646 	sc->sc_len = PFSYNC_MINPKT;
1647 
1648 	sc->sc_if.if_opackets++;
1649 	sc->sc_if.if_obytes += m->m_pkthdr.len;
1650 
1651 	m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1652 
1653 	if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL, 0) == 0)
1654 		pfsyncstats.pfsyncs_opackets++;
1655 	else
1656 		pfsyncstats.pfsyncs_oerrors++;
1657 }
1658 
1659 void
1660 pfsync_insert_state(struct pf_state *st)
1661 {
1662 	struct pfsync_softc *sc = pfsyncif;
1663 
1664 	splsoftassert(IPL_SOFTNET);
1665 
1666 	if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) ||
1667 	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
1668 		SET(st->state_flags, PFSTATE_NOSYNC);
1669 		return;
1670 	}
1671 
1672 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
1673 	    ISSET(st->state_flags, PFSTATE_NOSYNC))
1674 		return;
1675 
1676 #ifdef PFSYNC_DEBUG
1677 	KASSERT(st->sync_state == PFSYNC_S_NONE);
1678 #endif
1679 
1680 	if (sc->sc_len == PFSYNC_MINPKT)
1681 		timeout_add_sec(&sc->sc_tmo, 1);
1682 
1683 	pfsync_q_ins(st, PFSYNC_S_INS);
1684 
1685 	st->sync_updates = 0;
1686 }
1687 
1688 int
1689 pfsync_defer(struct pf_state *st, struct mbuf *m)
1690 {
1691 	struct pfsync_softc *sc = pfsyncif;
1692 	struct pfsync_deferral *pd;
1693 
1694 	splsoftassert(IPL_SOFTNET);
1695 
1696 	if (!sc->sc_defer ||
1697 	    ISSET(st->state_flags, PFSTATE_NOSYNC) ||
1698 	    m->m_flags & (M_BCAST|M_MCAST))
1699 		return (0);
1700 
1701 	if (sc->sc_deferred >= 128) {
1702 		pd = TAILQ_FIRST(&sc->sc_deferrals);
1703 		if (timeout_del(&pd->pd_tmo))
1704 			pfsync_undefer(pd, 0);
1705 	}
1706 
1707 	pd = pool_get(&sc->sc_pool, M_NOWAIT);
1708 	if (pd == NULL)
1709 		return (0);
1710 
1711 	m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
1712 	SET(st->state_flags, PFSTATE_ACK);
1713 
1714 	pd->pd_st = st;
1715 	pd->pd_m = m;
1716 
1717 	sc->sc_deferred++;
1718 	TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
1719 
1720 	timeout_set_proc(&pd->pd_tmo, pfsync_defer_tmo, pd);
1721 	timeout_add_msec(&pd->pd_tmo, 20);
1722 
1723 	schednetisr(NETISR_PFSYNC);
1724 
1725 	return (1);
1726 }
1727 
1728 void
1729 pfsync_undefer(struct pfsync_deferral *pd, int drop)
1730 {
1731 	struct pfsync_softc *sc = pfsyncif;
1732 	struct pf_pdesc pdesc;
1733 
1734 	splsoftassert(IPL_SOFTNET);
1735 
1736 	TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
1737 	sc->sc_deferred--;
1738 
1739 	CLR(pd->pd_st->state_flags, PFSTATE_ACK);
1740 	if (drop)
1741 		m_freem(pd->pd_m);
1742 	else {
1743 		if (pd->pd_st->rule.ptr->rt == PF_ROUTETO) {
1744 			if (pf_setup_pdesc(&pdesc,
1745 			    pd->pd_st->key[PF_SK_WIRE]->af,
1746 			    pd->pd_st->direction, pd->pd_st->rt_kif,
1747 			    pd->pd_m, NULL) != PF_PASS) {
1748 				m_freem(pd->pd_m);
1749 				goto out;
1750 			}
1751 			switch (pd->pd_st->key[PF_SK_WIRE]->af) {
1752 			case AF_INET:
1753 				pf_route(&pdesc,
1754 				    pd->pd_st->rule.ptr, pd->pd_st);
1755 				break;
1756 #ifdef INET6
1757 			case AF_INET6:
1758 				pf_route6(&pdesc,
1759 				    pd->pd_st->rule.ptr, pd->pd_st);
1760 				break;
1761 #endif /* INET6 */
1762 			}
1763 			pd->pd_m = pdesc.m;
1764 		} else {
1765 			switch (pd->pd_st->key[PF_SK_WIRE]->af) {
1766 			case AF_INET:
1767 				ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL,
1768 				    0);
1769 				break;
1770 #ifdef INET6
1771 			case AF_INET6:
1772 				ip6_output(pd->pd_m, NULL, NULL, 0,
1773 				    NULL, NULL);
1774 				break;
1775 #endif /* INET6 */
1776 			}
1777 		}
1778 	}
1779  out:
1780 	pool_put(&sc->sc_pool, pd);
1781 }
1782 
1783 void
1784 pfsync_defer_tmo(void *arg)
1785 {
1786 	int s;
1787 
1788 	NET_LOCK(s);
1789 	pfsync_undefer(arg, 0);
1790 	NET_UNLOCK(s);
1791 }
1792 
1793 void
1794 pfsync_deferred(struct pf_state *st, int drop)
1795 {
1796 	struct pfsync_softc *sc = pfsyncif;
1797 	struct pfsync_deferral *pd;
1798 
1799 	splsoftassert(IPL_SOFTNET);
1800 
1801 	TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
1802 		 if (pd->pd_st == st) {
1803 			if (timeout_del(&pd->pd_tmo))
1804 				pfsync_undefer(pd, drop);
1805 			return;
1806 		}
1807 	}
1808 
1809 	panic("pfsync_deferred: unable to find deferred state");
1810 }
1811 
1812 void
1813 pfsync_update_state(struct pf_state *st)
1814 {
1815 	struct pfsync_softc *sc = pfsyncif;
1816 	int sync = 0;
1817 
1818 	splsoftassert(IPL_SOFTNET);
1819 
1820 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
1821 		return;
1822 
1823 	if (ISSET(st->state_flags, PFSTATE_ACK))
1824 		pfsync_deferred(st, 0);
1825 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1826 		if (st->sync_state != PFSYNC_S_NONE)
1827 			pfsync_q_del(st);
1828 		return;
1829 	}
1830 
1831 	if (sc->sc_len == PFSYNC_MINPKT)
1832 		timeout_add_sec(&sc->sc_tmo, 1);
1833 
1834 	switch (st->sync_state) {
1835 	case PFSYNC_S_UPD_C:
1836 	case PFSYNC_S_UPD:
1837 	case PFSYNC_S_INS:
1838 		/* we're already handling it */
1839 
1840 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
1841 			st->sync_updates++;
1842 			if (st->sync_updates >= sc->sc_maxupdates)
1843 				sync = 1;
1844 		}
1845 		break;
1846 
1847 	case PFSYNC_S_IACK:
1848 		pfsync_q_del(st);
1849 	case PFSYNC_S_NONE:
1850 		pfsync_q_ins(st, PFSYNC_S_UPD_C);
1851 		st->sync_updates = 0;
1852 		break;
1853 
1854 	default:
1855 		panic("pfsync_update_state: unexpected sync state %d",
1856 		    st->sync_state);
1857 	}
1858 
1859 	if (sync || (time_uptime - st->pfsync_time) < 2)
1860 		schednetisr(NETISR_PFSYNC);
1861 }
1862 
1863 void
1864 pfsync_cancel_full_update(struct pfsync_softc *sc)
1865 {
1866 	if (timeout_pending(&sc->sc_bulkfail_tmo) ||
1867 	    timeout_pending(&sc->sc_bulk_tmo)) {
1868 #if NCARP > 0
1869 		if (!pfsync_sync_ok)
1870 			carp_group_demote_adj(&sc->sc_if, -1,
1871 			    "pfsync bulk cancelled");
1872 		if (sc->sc_initial_bulk) {
1873 			carp_group_demote_adj(&sc->sc_if, -32,
1874 			    "pfsync init");
1875 			sc->sc_initial_bulk = 0;
1876 		}
1877 #endif
1878 		pfsync_sync_ok = 1;
1879 		DPFPRINTF(LOG_INFO, "cancelling bulk update");
1880 	}
1881 	timeout_del(&sc->sc_bulkfail_tmo);
1882 	timeout_del(&sc->sc_bulk_tmo);
1883 	sc->sc_bulk_next = NULL;
1884 	sc->sc_bulk_last = NULL;
1885 	sc->sc_ureq_sent = 0;
1886 	sc->sc_bulk_tries = 0;
1887 }
1888 
1889 void
1890 pfsync_request_full_update(struct pfsync_softc *sc)
1891 {
1892 	if (sc->sc_sync_if && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
1893 		/* Request a full state table update. */
1894 		sc->sc_ureq_sent = time_uptime;
1895 #if NCARP > 0
1896 		if (!sc->sc_link_demoted && pfsync_sync_ok)
1897 			carp_group_demote_adj(&sc->sc_if, 1,
1898 			    "pfsync bulk start");
1899 #endif
1900 		pfsync_sync_ok = 0;
1901 		DPFPRINTF(LOG_INFO, "requesting bulk update");
1902 		timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1903 		    pf_pool_limits[PF_LIMIT_STATES].limit /
1904 		    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1905 		    sizeof(struct pfsync_state)));
1906 		pfsync_request_update(0, 0);
1907 	}
1908 }
1909 
1910 void
1911 pfsync_request_update(u_int32_t creatorid, u_int64_t id)
1912 {
1913 	struct pfsync_softc *sc = pfsyncif;
1914 	struct pfsync_upd_req_item *item;
1915 	size_t nlen = sizeof(struct pfsync_upd_req);
1916 
1917 	/*
1918 	 * this code does nothing to prevent multiple update requests for the
1919 	 * same state being generated.
1920 	 */
1921 
1922 	item = pool_get(&sc->sc_pool, PR_NOWAIT);
1923 	if (item == NULL) {
1924 		/* XXX stats */
1925 		return;
1926 	}
1927 
1928 	item->ur_msg.id = id;
1929 	item->ur_msg.creatorid = creatorid;
1930 
1931 	if (TAILQ_EMPTY(&sc->sc_upd_req_list))
1932 		nlen += sizeof(struct pfsync_subheader);
1933 
1934 	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
1935 		pfsync_sendout();
1936 
1937 		nlen = sizeof(struct pfsync_subheader) +
1938 		    sizeof(struct pfsync_upd_req);
1939 	}
1940 
1941 	TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
1942 	sc->sc_len += nlen;
1943 
1944 	schednetisr(NETISR_PFSYNC);
1945 }
1946 
1947 void
1948 pfsync_update_state_req(struct pf_state *st)
1949 {
1950 	struct pfsync_softc *sc = pfsyncif;
1951 
1952 	if (sc == NULL)
1953 		panic("pfsync_update_state_req: nonexistant instance");
1954 
1955 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1956 		if (st->sync_state != PFSYNC_S_NONE)
1957 			pfsync_q_del(st);
1958 		return;
1959 	}
1960 
1961 	switch (st->sync_state) {
1962 	case PFSYNC_S_UPD_C:
1963 	case PFSYNC_S_IACK:
1964 		pfsync_q_del(st);
1965 	case PFSYNC_S_NONE:
1966 		pfsync_q_ins(st, PFSYNC_S_UPD);
1967 		schednetisr(NETISR_PFSYNC);
1968 		return;
1969 
1970 	case PFSYNC_S_INS:
1971 	case PFSYNC_S_UPD:
1972 	case PFSYNC_S_DEL:
1973 		/* we're already handling it */
1974 		return;
1975 
1976 	default:
1977 		panic("pfsync_update_state_req: unexpected sync state %d",
1978 		    st->sync_state);
1979 	}
1980 }
1981 
1982 void
1983 pfsync_delete_state(struct pf_state *st)
1984 {
1985 	struct pfsync_softc *sc = pfsyncif;
1986 
1987 	splsoftassert(IPL_SOFTNET);
1988 
1989 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
1990 		return;
1991 
1992 	if (ISSET(st->state_flags, PFSTATE_ACK))
1993 		pfsync_deferred(st, 1);
1994 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1995 		if (st->sync_state != PFSYNC_S_NONE)
1996 			pfsync_q_del(st);
1997 		return;
1998 	}
1999 
2000 	if (sc->sc_len == PFSYNC_MINPKT)
2001 		timeout_add_sec(&sc->sc_tmo, 1);
2002 
2003 	switch (st->sync_state) {
2004 	case PFSYNC_S_INS:
2005 		/* we never got to tell the world so just forget about it */
2006 		pfsync_q_del(st);
2007 		return;
2008 
2009 	case PFSYNC_S_UPD_C:
2010 	case PFSYNC_S_UPD:
2011 	case PFSYNC_S_IACK:
2012 		pfsync_q_del(st);
2013 		/* FALLTHROUGH to putting it on the del list */
2014 
2015 	case PFSYNC_S_NONE:
2016 		pfsync_q_ins(st, PFSYNC_S_DEL);
2017 		return;
2018 
2019 	default:
2020 		panic("pfsync_delete_state: unexpected sync state %d",
2021 		    st->sync_state);
2022 	}
2023 }
2024 
2025 void
2026 pfsync_clear_states(u_int32_t creatorid, const char *ifname)
2027 {
2028 	struct pfsync_softc *sc = pfsyncif;
2029 	struct {
2030 		struct pfsync_subheader subh;
2031 		struct pfsync_clr clr;
2032 	} __packed r;
2033 
2034 	splsoftassert(IPL_SOFTNET);
2035 
2036 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2037 		return;
2038 
2039 	bzero(&r, sizeof(r));
2040 
2041 	r.subh.action = PFSYNC_ACT_CLR;
2042 	r.subh.len = sizeof(struct pfsync_clr) >> 2;
2043 	r.subh.count = htons(1);
2044 
2045 	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
2046 	r.clr.creatorid = creatorid;
2047 
2048 	pfsync_send_plus(&r, sizeof(r));
2049 }
2050 
2051 void
2052 pfsync_q_ins(struct pf_state *st, int q)
2053 {
2054 	struct pfsync_softc *sc = pfsyncif;
2055 	size_t nlen = pfsync_qs[q].len;
2056 
2057 	KASSERT(st->sync_state == PFSYNC_S_NONE);
2058 
2059 #if defined(PFSYNC_DEBUG)
2060 	if (sc->sc_len < PFSYNC_MINPKT)
2061 		panic("pfsync pkt len is too low %d", sc->sc_len);
2062 #endif
2063 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2064 		nlen += sizeof(struct pfsync_subheader);
2065 
2066 	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2067 		pfsync_sendout();
2068 
2069 		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
2070 	}
2071 
2072 	sc->sc_len += nlen;
2073 	TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
2074 	st->sync_state = q;
2075 }
2076 
2077 void
2078 pfsync_q_del(struct pf_state *st)
2079 {
2080 	struct pfsync_softc *sc = pfsyncif;
2081 	int q = st->sync_state;
2082 
2083 	KASSERT(st->sync_state != PFSYNC_S_NONE);
2084 
2085 	sc->sc_len -= pfsync_qs[q].len;
2086 	TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
2087 	st->sync_state = PFSYNC_S_NONE;
2088 
2089 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2090 		sc->sc_len -= sizeof(struct pfsync_subheader);
2091 }
2092 
2093 void
2094 pfsync_update_tdb(struct tdb *t, int output)
2095 {
2096 	struct pfsync_softc *sc = pfsyncif;
2097 	size_t nlen = sizeof(struct pfsync_tdb);
2098 
2099 	if (sc == NULL)
2100 		return;
2101 
2102 	if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) {
2103 		if (TAILQ_EMPTY(&sc->sc_tdb_q))
2104 			nlen += sizeof(struct pfsync_subheader);
2105 
2106 		if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2107 			pfsync_sendout();
2108 
2109 			nlen = sizeof(struct pfsync_subheader) +
2110 			    sizeof(struct pfsync_tdb);
2111 		}
2112 
2113 		sc->sc_len += nlen;
2114 		TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
2115 		SET(t->tdb_flags, TDBF_PFSYNC);
2116 		t->tdb_updates = 0;
2117 	} else {
2118 		if (++t->tdb_updates >= sc->sc_maxupdates)
2119 			schednetisr(NETISR_PFSYNC);
2120 	}
2121 
2122 	if (output)
2123 		SET(t->tdb_flags, TDBF_PFSYNC_RPL);
2124 	else
2125 		CLR(t->tdb_flags, TDBF_PFSYNC_RPL);
2126 }
2127 
2128 void
2129 pfsync_delete_tdb(struct tdb *t)
2130 {
2131 	struct pfsync_softc *sc = pfsyncif;
2132 
2133 	if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC))
2134 		return;
2135 
2136 	sc->sc_len -= sizeof(struct pfsync_tdb);
2137 	TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
2138 	CLR(t->tdb_flags, TDBF_PFSYNC);
2139 
2140 	if (TAILQ_EMPTY(&sc->sc_tdb_q))
2141 		sc->sc_len -= sizeof(struct pfsync_subheader);
2142 }
2143 
2144 void
2145 pfsync_out_tdb(struct tdb *t, void *buf)
2146 {
2147 	struct pfsync_tdb *ut = buf;
2148 
2149 	bzero(ut, sizeof(*ut));
2150 	ut->spi = t->tdb_spi;
2151 	bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst));
2152 	/*
2153 	 * When a failover happens, the master's rpl is probably above
2154 	 * what we see here (we may be up to a second late), so
2155 	 * increase it a bit for outbound tdbs to manage most such
2156 	 * situations.
2157 	 *
2158 	 * For now, just add an offset that is likely to be larger
2159 	 * than the number of packets we can see in one second. The RFC
2160 	 * just says the next packet must have a higher seq value.
2161 	 *
2162 	 * XXX What is a good algorithm for this? We could use
2163 	 * a rate-determined increase, but to know it, we would have
2164 	 * to extend struct tdb.
2165 	 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
2166 	 * will soon be replaced anyway. For now, just don't handle
2167 	 * this edge case.
2168 	 */
2169 #define RPL_INCR 16384
2170 	ut->rpl = htobe64(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ?
2171 	    RPL_INCR : 0));
2172 	ut->cur_bytes = htobe64(t->tdb_cur_bytes);
2173 	ut->sproto = t->tdb_sproto;
2174 	ut->rdomain = htons(t->tdb_rdomain);
2175 }
2176 
2177 void
2178 pfsync_bulk_start(void)
2179 {
2180 	struct pfsync_softc *sc = pfsyncif;
2181 
2182 	DPFPRINTF(LOG_INFO, "received bulk update request");
2183 
2184 	if (TAILQ_EMPTY(&state_list))
2185 		pfsync_bulk_status(PFSYNC_BUS_END);
2186 	else {
2187 		sc->sc_ureq_received = time_uptime;
2188 
2189 		if (sc->sc_bulk_next == NULL)
2190 			sc->sc_bulk_next = TAILQ_FIRST(&state_list);
2191 		sc->sc_bulk_last = sc->sc_bulk_next;
2192 
2193 		pfsync_bulk_status(PFSYNC_BUS_START);
2194 		timeout_add(&sc->sc_bulk_tmo, 0);
2195 	}
2196 }
2197 
2198 void
2199 pfsync_bulk_update(void *arg)
2200 {
2201 	struct pfsync_softc *sc = arg;
2202 	struct pf_state *st;
2203 	int i = 0;
2204 	int s;
2205 
2206 	NET_LOCK(s);
2207 	st = sc->sc_bulk_next;
2208 
2209 	for (;;) {
2210 		if (st->sync_state == PFSYNC_S_NONE &&
2211 		    st->timeout < PFTM_MAX &&
2212 		    st->pfsync_time <= sc->sc_ureq_received) {
2213 			pfsync_update_state_req(st);
2214 			i++;
2215 		}
2216 
2217 		st = TAILQ_NEXT(st, entry_list);
2218 		if (st == NULL)
2219 			st = TAILQ_FIRST(&state_list);
2220 
2221 		if (st == sc->sc_bulk_last) {
2222 			/* we're done */
2223 			sc->sc_bulk_next = NULL;
2224 			sc->sc_bulk_last = NULL;
2225 			pfsync_bulk_status(PFSYNC_BUS_END);
2226 			break;
2227 		}
2228 
2229 		if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) <
2230 		    sizeof(struct pfsync_state)) {
2231 			/* we've filled a packet */
2232 			sc->sc_bulk_next = st;
2233 			timeout_add(&sc->sc_bulk_tmo, 1);
2234 			break;
2235 		}
2236 	}
2237 	NET_UNLOCK(s);
2238 }
2239 
2240 void
2241 pfsync_bulk_status(u_int8_t status)
2242 {
2243 	struct {
2244 		struct pfsync_subheader subh;
2245 		struct pfsync_bus bus;
2246 	} __packed r;
2247 
2248 	struct pfsync_softc *sc = pfsyncif;
2249 
2250 	bzero(&r, sizeof(r));
2251 
2252 	r.subh.action = PFSYNC_ACT_BUS;
2253 	r.subh.len = sizeof(struct pfsync_bus) >> 2;
2254 	r.subh.count = htons(1);
2255 
2256 	r.bus.creatorid = pf_status.hostid;
2257 	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
2258 	r.bus.status = status;
2259 
2260 	pfsync_send_plus(&r, sizeof(r));
2261 }
2262 
2263 void
2264 pfsync_bulk_fail(void *arg)
2265 {
2266 	struct pfsync_softc *sc = arg;
2267 	int s;
2268 
2269 	NET_LOCK(s);
2270 
2271 	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
2272 		/* Try again */
2273 		timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
2274 		pfsync_request_update(0, 0);
2275 	} else {
2276 		/* Pretend like the transfer was ok */
2277 		sc->sc_ureq_sent = 0;
2278 		sc->sc_bulk_tries = 0;
2279 #if NCARP > 0
2280 		if (!pfsync_sync_ok)
2281 			carp_group_demote_adj(&sc->sc_if, -1,
2282 			    sc->sc_link_demoted ?
2283 			    "pfsync link state up" :
2284 			    "pfsync bulk fail");
2285 		if (sc->sc_initial_bulk) {
2286 			carp_group_demote_adj(&sc->sc_if, -32,
2287 			    "pfsync init");
2288 			sc->sc_initial_bulk = 0;
2289 		}
2290 #endif
2291 		pfsync_sync_ok = 1;
2292 		sc->sc_link_demoted = 0;
2293 		DPFPRINTF(LOG_ERR, "failed to receive bulk update");
2294 	}
2295 	NET_UNLOCK(s);
2296 }
2297 
2298 void
2299 pfsync_send_plus(void *plus, size_t pluslen)
2300 {
2301 	struct pfsync_softc *sc = pfsyncif;
2302 
2303 	if (sc->sc_len + pluslen > sc->sc_if.if_mtu)
2304 		pfsync_sendout();
2305 
2306 	sc->sc_plus = plus;
2307 	sc->sc_len += (sc->sc_pluslen = pluslen);
2308 
2309 	pfsync_sendout();
2310 }
2311 
2312 int
2313 pfsync_up(void)
2314 {
2315 	struct pfsync_softc *sc = pfsyncif;
2316 
2317 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2318 		return (0);
2319 
2320 	return (1);
2321 }
2322 
2323 int
2324 pfsync_state_in_use(struct pf_state *st)
2325 {
2326 	struct pfsync_softc *sc = pfsyncif;
2327 
2328 	if (sc == NULL)
2329 		return (0);
2330 
2331 	if (st->sync_state != PFSYNC_S_NONE ||
2332 	    st == sc->sc_bulk_next ||
2333 	    st == sc->sc_bulk_last)
2334 		return (1);
2335 
2336 	return (0);
2337 }
2338 
2339 void
2340 pfsync_timeout(void *arg)
2341 {
2342 	int s;
2343 
2344 	NET_LOCK(s);
2345 	pfsync_sendout();
2346 	NET_UNLOCK(s);
2347 }
2348 
2349 /* this is a softnet/netisr handler */
2350 void
2351 pfsyncintr(void)
2352 {
2353 	pfsync_sendout();
2354 }
2355 
2356 int
2357 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
2358     size_t newlen)
2359 {
2360 	/* All sysctl names at this level are terminal. */
2361 	if (namelen != 1)
2362 		return (ENOTDIR);
2363 
2364 	switch (name[0]) {
2365 	case PFSYNCCTL_STATS:
2366 		if (newp != NULL)
2367 			return (EPERM);
2368 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
2369 		    &pfsyncstats, sizeof(pfsyncstats)));
2370 	default:
2371 		return (ENOPROTOOPT);
2372 	}
2373 }
2374