xref: /openbsd-src/sys/net/if_pfsync.c (revision f763167468dba5339ed4b14b7ecaca2a397ab0f6)
1 /*	$OpenBSD: if_pfsync.c,v 1.254 2017/08/11 21:24:19 mpi Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Michael Shalayeff
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  * THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
31  *
32  * Permission to use, copy, modify, and distribute this software for any
33  * purpose with or without fee is hereby granted, provided that the above
34  * copyright notice and this permission notice appear in all copies.
35  *
36  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
37  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
38  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
39  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
41  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
42  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43  */
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/time.h>
48 #include <sys/malloc.h>
49 #include <sys/mbuf.h>
50 #include <sys/socket.h>
51 #include <sys/ioctl.h>
52 #include <sys/timeout.h>
53 #include <sys/kernel.h>
54 #include <sys/sysctl.h>
55 #include <sys/pool.h>
56 #include <sys/syslog.h>
57 
58 #include <net/if.h>
59 #include <net/if_types.h>
60 #include <net/bpf.h>
61 #include <net/netisr.h>
62 
63 #include <netinet/in.h>
64 #include <netinet/if_ether.h>
65 #include <netinet/ip.h>
66 #include <netinet/in_var.h>
67 #include <netinet/ip_var.h>
68 #include <netinet/ip_ipsp.h>
69 #include <netinet/ip_icmp.h>
70 #include <netinet/icmp6.h>
71 #include <netinet/tcp.h>
72 #include <netinet/tcp_seq.h>
73 #include <netinet/tcp_fsm.h>
74 #include <netinet/udp.h>
75 
76 #ifdef INET6
77 #include <netinet6/in6_var.h>
78 #include <netinet/ip6.h>
79 #include <netinet6/ip6_var.h>
80 #include <netinet6/nd6.h>
81 #endif /* INET6 */
82 
83 #include "carp.h"
84 #if NCARP > 0
85 #include <netinet/ip_carp.h>
86 #endif
87 
88 #define PF_DEBUGNAME	"pfsync: "
89 #include <net/pfvar.h>
90 #include <net/pfvar_priv.h>
91 #include <net/if_pfsync.h>
92 
93 #include "bpfilter.h"
94 #include "pfsync.h"
95 
96 #define PFSYNC_MINPKT ( \
97 	sizeof(struct ip) + \
98 	sizeof(struct pfsync_header))
99 
100 int	pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
101 	    struct pfsync_state_peer *);
102 
103 int	pfsync_in_clr(caddr_t, int, int, int);
104 int	pfsync_in_iack(caddr_t, int, int, int);
105 int	pfsync_in_upd_c(caddr_t, int, int, int);
106 int	pfsync_in_ureq(caddr_t, int, int, int);
107 int	pfsync_in_del(caddr_t, int, int, int);
108 int	pfsync_in_del_c(caddr_t, int, int, int);
109 int	pfsync_in_bus(caddr_t, int, int, int);
110 int	pfsync_in_tdb(caddr_t, int, int, int);
111 int	pfsync_in_ins(caddr_t, int, int, int);
112 int	pfsync_in_upd(caddr_t, int, int, int);
113 int	pfsync_in_eof(caddr_t, int, int, int);
114 
115 int	pfsync_in_error(caddr_t, int, int, int);
116 
117 struct {
118 	int	(*in)(caddr_t, int, int, int);
119 	size_t	len;
120 } pfsync_acts[] = {
121 	/* PFSYNC_ACT_CLR */
122 	{ pfsync_in_clr,	sizeof(struct pfsync_clr) },
123 	 /* PFSYNC_ACT_OINS */
124 	{ pfsync_in_error,	0 },
125 	/* PFSYNC_ACT_INS_ACK */
126 	{ pfsync_in_iack,	sizeof(struct pfsync_ins_ack) },
127 	/* PFSYNC_ACT_OUPD */
128 	{ pfsync_in_error,	0 },
129 	/* PFSYNC_ACT_UPD_C */
130 	{ pfsync_in_upd_c,	sizeof(struct pfsync_upd_c) },
131 	/* PFSYNC_ACT_UPD_REQ */
132 	{ pfsync_in_ureq,	sizeof(struct pfsync_upd_req) },
133 	/* PFSYNC_ACT_DEL */
134 	{ pfsync_in_del,	sizeof(struct pfsync_state) },
135 	/* PFSYNC_ACT_DEL_C */
136 	{ pfsync_in_del_c,	sizeof(struct pfsync_del_c) },
137 	/* PFSYNC_ACT_INS_F */
138 	{ pfsync_in_error,	0 },
139 	/* PFSYNC_ACT_DEL_F */
140 	{ pfsync_in_error,	0 },
141 	/* PFSYNC_ACT_BUS */
142 	{ pfsync_in_bus,	sizeof(struct pfsync_bus) },
143 	/* PFSYNC_ACT_OTDB */
144 	{ pfsync_in_error,	0 },
145 	/* PFSYNC_ACT_EOF */
146 	{ pfsync_in_error,	0 },
147 	/* PFSYNC_ACT_INS */
148 	{ pfsync_in_ins,	sizeof(struct pfsync_state) },
149 	/* PFSYNC_ACT_UPD */
150 	{ pfsync_in_upd,	sizeof(struct pfsync_state) },
151 	/* PFSYNC_ACT_TDB */
152 	{ pfsync_in_tdb,	sizeof(struct pfsync_tdb) },
153 };
154 
155 struct pfsync_q {
156 	void		(*write)(struct pf_state *, void *);
157 	size_t		len;
158 	u_int8_t	action;
159 };
160 
161 /* we have one of these for every PFSYNC_S_ */
162 void	pfsync_out_state(struct pf_state *, void *);
163 void	pfsync_out_iack(struct pf_state *, void *);
164 void	pfsync_out_upd_c(struct pf_state *, void *);
165 void	pfsync_out_del(struct pf_state *, void *);
166 
167 struct pfsync_q pfsync_qs[] = {
168 	{ pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
169 	{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
170 	{ pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C },
171 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
172 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD }
173 };
174 
175 void	pfsync_q_ins(struct pf_state *, int);
176 void	pfsync_q_del(struct pf_state *);
177 
178 struct pfsync_upd_req_item {
179 	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
180 	struct pfsync_upd_req			ur_msg;
181 };
182 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item);
183 
184 struct pfsync_deferral {
185 	TAILQ_ENTRY(pfsync_deferral)		 pd_entry;
186 	struct pf_state				*pd_st;
187 	struct mbuf				*pd_m;
188 	struct timeout				 pd_tmo;
189 };
190 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral);
191 
192 #define PFSYNC_PLSIZE	MAX(sizeof(struct pfsync_upd_req_item), \
193 			    sizeof(struct pfsync_deferral))
194 
195 void	pfsync_out_tdb(struct tdb *, void *);
196 
197 struct pfsync_softc {
198 	struct ifnet		 sc_if;
199 	struct ifnet		*sc_sync_if;
200 
201 	struct pool		 sc_pool;
202 
203 	struct ip_moptions	 sc_imo;
204 
205 	struct in_addr		 sc_sync_peer;
206 	u_int8_t		 sc_maxupdates;
207 
208 	struct ip		 sc_template;
209 
210 	struct pf_state_queue	 sc_qs[PFSYNC_S_COUNT];
211 	size_t			 sc_len;
212 
213 	struct pfsync_upd_reqs	 sc_upd_req_list;
214 
215 	int			 sc_initial_bulk;
216 	int			 sc_link_demoted;
217 
218 	int			 sc_defer;
219 	struct pfsync_deferrals	 sc_deferrals;
220 	u_int			 sc_deferred;
221 
222 	void			*sc_plus;
223 	size_t			 sc_pluslen;
224 
225 	u_int32_t		 sc_ureq_sent;
226 	int			 sc_bulk_tries;
227 	struct timeout		 sc_bulkfail_tmo;
228 
229 	u_int32_t		 sc_ureq_received;
230 	struct pf_state		*sc_bulk_next;
231 	struct pf_state		*sc_bulk_last;
232 	struct timeout		 sc_bulk_tmo;
233 
234 	TAILQ_HEAD(, tdb)	 sc_tdb_q;
235 
236 	void			*sc_lhcookie;
237 	void			*sc_dhcookie;
238 
239 	struct timeout		 sc_tmo;
240 };
241 
242 struct pfsync_softc	*pfsyncif = NULL;
243 struct cpumem		*pfsynccounters;
244 
245 void	pfsyncattach(int);
246 int	pfsync_clone_create(struct if_clone *, int);
247 int	pfsync_clone_destroy(struct ifnet *);
248 int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
249 	    struct pf_state_peer *);
250 void	pfsync_update_net_tdb(struct pfsync_tdb *);
251 int	pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
252 	    struct rtentry *);
253 int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
254 void	pfsyncstart(struct ifnet *);
255 void	pfsync_syncdev_state(void *);
256 void	pfsync_ifdetach(void *);
257 
258 void	pfsync_deferred(struct pf_state *, int);
259 void	pfsync_undefer(struct pfsync_deferral *, int);
260 void	pfsync_defer_tmo(void *);
261 
262 void	pfsync_cancel_full_update(struct pfsync_softc *);
263 void	pfsync_request_full_update(struct pfsync_softc *);
264 void	pfsync_request_update(u_int32_t, u_int64_t);
265 void	pfsync_update_state_req(struct pf_state *);
266 
267 void	pfsync_drop(struct pfsync_softc *);
268 void	pfsync_sendout(void);
269 void	pfsync_send_plus(void *, size_t);
270 void	pfsync_timeout(void *);
271 void	pfsync_tdb_timeout(void *);
272 
273 void	pfsync_bulk_start(void);
274 void	pfsync_bulk_status(u_int8_t);
275 void	pfsync_bulk_update(void *);
276 void	pfsync_bulk_fail(void *);
277 
278 #define PFSYNC_MAX_BULKTRIES	12
279 int	pfsync_sync_ok;
280 
281 struct if_clone	pfsync_cloner =
282     IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
283 
284 void
285 pfsyncattach(int npfsync)
286 {
287 	if_clone_attach(&pfsync_cloner);
288 	pfsynccounters = counters_alloc(pfsyncs_ncounters);
289 }
290 
291 int
292 pfsync_clone_create(struct if_clone *ifc, int unit)
293 {
294 	struct pfsync_softc *sc;
295 	struct ifnet *ifp;
296 	int q;
297 
298 	if (unit != 0)
299 		return (EINVAL);
300 
301 	pfsync_sync_ok = 1;
302 
303 	sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK | M_ZERO);
304 
305 	for (q = 0; q < PFSYNC_S_COUNT; q++)
306 		TAILQ_INIT(&sc->sc_qs[q]);
307 
308 	pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, IPL_SOFTNET, 0, "pfsync",
309 	    NULL);
310 	TAILQ_INIT(&sc->sc_upd_req_list);
311 	TAILQ_INIT(&sc->sc_deferrals);
312 	sc->sc_deferred = 0;
313 
314 	TAILQ_INIT(&sc->sc_tdb_q);
315 
316 	sc->sc_len = PFSYNC_MINPKT;
317 	sc->sc_maxupdates = 128;
318 
319 	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
320 	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
321 	    M_WAITOK | M_ZERO);
322 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
323 
324 	ifp = &sc->sc_if;
325 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
326 	ifp->if_softc = sc;
327 	ifp->if_ioctl = pfsyncioctl;
328 	ifp->if_output = pfsyncoutput;
329 	ifp->if_start = pfsyncstart;
330 	ifp->if_type = IFT_PFSYNC;
331 	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
332 	ifp->if_hdrlen = sizeof(struct pfsync_header);
333 	ifp->if_mtu = ETHERMTU;
334 	ifp->if_xflags = IFXF_CLONED;
335 	timeout_set_proc(&sc->sc_tmo, pfsync_timeout, sc);
336 	timeout_set_proc(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
337 	timeout_set_proc(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
338 
339 	if_attach(ifp);
340 	if_alloc_sadl(ifp);
341 
342 #if NCARP > 0
343 	if_addgroup(ifp, "carp");
344 #endif
345 
346 #if NBPFILTER > 0
347 	bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
348 #endif
349 
350 	pfsyncif = sc;
351 
352 	return (0);
353 }
354 
355 int
356 pfsync_clone_destroy(struct ifnet *ifp)
357 {
358 	struct pfsync_softc *sc = ifp->if_softc;
359 	struct pfsync_deferral *pd;
360 
361 	timeout_del(&sc->sc_bulkfail_tmo);
362 	timeout_del(&sc->sc_bulk_tmo);
363 	timeout_del(&sc->sc_tmo);
364 #if NCARP > 0
365 	if (!pfsync_sync_ok)
366 		carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
367 	if (sc->sc_link_demoted)
368 		carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
369 #endif
370 	if (sc->sc_sync_if) {
371 		hook_disestablish(
372 		    sc->sc_sync_if->if_linkstatehooks,
373 		    sc->sc_lhcookie);
374 		hook_disestablish(sc->sc_sync_if->if_detachhooks,
375 		    sc->sc_dhcookie);
376 	}
377 	if_detach(ifp);
378 
379 	pfsync_drop(sc);
380 
381 	while (sc->sc_deferred > 0) {
382 		pd = TAILQ_FIRST(&sc->sc_deferrals);
383 		timeout_del(&pd->pd_tmo);
384 		pfsync_undefer(pd, 0);
385 	}
386 
387 	pool_destroy(&sc->sc_pool);
388 	free(sc->sc_imo.imo_membership, M_IPMOPTS, 0);
389 	free(sc, M_DEVBUF, sizeof(*sc));
390 
391 	pfsyncif = NULL;
392 
393 	return (0);
394 }
395 
396 /*
397  * Start output on the pfsync interface.
398  */
399 void
400 pfsyncstart(struct ifnet *ifp)
401 {
402 	IFQ_PURGE(&ifp->if_snd);
403 }
404 
405 void
406 pfsync_syncdev_state(void *arg)
407 {
408 	struct pfsync_softc *sc = arg;
409 
410 	if (!sc->sc_sync_if || !(sc->sc_if.if_flags & IFF_UP))
411 		return;
412 
413 	if (sc->sc_sync_if->if_link_state == LINK_STATE_DOWN) {
414 		sc->sc_if.if_flags &= ~IFF_RUNNING;
415 		if (!sc->sc_link_demoted) {
416 #if NCARP > 0
417 			carp_group_demote_adj(&sc->sc_if, 1,
418 			    "pfsync link state down");
419 #endif
420 			sc->sc_link_demoted = 1;
421 		}
422 
423 		/* drop everything */
424 		timeout_del(&sc->sc_tmo);
425 		pfsync_drop(sc);
426 
427 		pfsync_cancel_full_update(sc);
428 	} else if (sc->sc_link_demoted) {
429 		sc->sc_if.if_flags |= IFF_RUNNING;
430 
431 		pfsync_request_full_update(sc);
432 	}
433 }
434 
435 void
436 pfsync_ifdetach(void *arg)
437 {
438 	struct pfsync_softc *sc = arg;
439 
440 	sc->sc_sync_if = NULL;
441 }
442 
443 int
444 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
445     struct pf_state_peer *d)
446 {
447 	if (s->scrub.scrub_flag && d->scrub == NULL) {
448 		d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO);
449 		if (d->scrub == NULL)
450 			return (ENOMEM);
451 	}
452 
453 	return (0);
454 }
455 
456 void
457 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
458 {
459 	pf_state_export(sp, st);
460 }
461 
462 int
463 pfsync_state_import(struct pfsync_state *sp, int flags)
464 {
465 	struct pf_state	*st = NULL;
466 	struct pf_state_key *skw = NULL, *sks = NULL;
467 	struct pf_rule *r = NULL;
468 	struct pfi_kif	*kif;
469 	int pool_flags;
470 	int error;
471 
472 	if (sp->creatorid == 0) {
473 		DPFPRINTF(LOG_NOTICE, "pfsync_state_import: "
474 		    "invalid creator id: %08x", ntohl(sp->creatorid));
475 		return (EINVAL);
476 	}
477 
478 	if ((kif = pfi_kif_get(sp->ifname)) == NULL) {
479 		DPFPRINTF(LOG_NOTICE, "pfsync_state_import: "
480 		    "unknown interface: %s", sp->ifname);
481 		if (flags & PFSYNC_SI_IOCTL)
482 			return (EINVAL);
483 		return (0);	/* skip this state */
484 	}
485 
486 	if (sp->af == 0)
487 		return (0);	/* skip this state */
488 
489 	/*
490 	 * If the ruleset checksums match or the state is coming from the ioctl,
491 	 * it's safe to associate the state with the rule of that number.
492 	 */
493 	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
494 	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
495 	    pf_main_ruleset.rules.active.rcount)
496 		r = pf_main_ruleset.rules.active.ptr_array[ntohl(sp->rule)];
497 	else
498 		r = &pf_default_rule;
499 
500 	if ((r->max_states && r->states_cur >= r->max_states))
501 		goto cleanup;
502 
503 	if (flags & PFSYNC_SI_IOCTL)
504 		pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO;
505 	else
506 		pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO;
507 
508 	if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL)
509 		goto cleanup;
510 
511 	if ((skw = pf_alloc_state_key(pool_flags)) == NULL)
512 		goto cleanup;
513 
514 	if ((sp->key[PF_SK_WIRE].af &&
515 	    (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) ||
516 	    PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
517 	    &sp->key[PF_SK_STACK].addr[0], sp->af) ||
518 	    PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
519 	    &sp->key[PF_SK_STACK].addr[1], sp->af) ||
520 	    sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
521 	    sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] ||
522 	    sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) {
523 		if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
524 			goto cleanup;
525 	} else
526 		sks = skw;
527 
528 	/* allocate memory for scrub info */
529 	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
530 	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
531 		goto cleanup;
532 
533 	/* copy to state key(s) */
534 	skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
535 	skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
536 	skw->port[0] = sp->key[PF_SK_WIRE].port[0];
537 	skw->port[1] = sp->key[PF_SK_WIRE].port[1];
538 	skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain);
539 	PF_REF_INIT(skw->refcnt);
540 	skw->proto = sp->proto;
541 	if (!(skw->af = sp->key[PF_SK_WIRE].af))
542 		skw->af = sp->af;
543 	if (sks != skw) {
544 		sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
545 		sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
546 		sks->port[0] = sp->key[PF_SK_STACK].port[0];
547 		sks->port[1] = sp->key[PF_SK_STACK].port[1];
548 		sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain);
549 		PF_REF_INIT(sks->refcnt);
550 		if (!(sks->af = sp->key[PF_SK_STACK].af))
551 			sks->af = sp->af;
552 		if (sks->af != skw->af) {
553 			switch (sp->proto) {
554 			case IPPROTO_ICMP:
555 				sks->proto = IPPROTO_ICMPV6;
556 				break;
557 			case IPPROTO_ICMPV6:
558 				sks->proto = IPPROTO_ICMP;
559 				break;
560 			default:
561 				sks->proto = sp->proto;
562 			}
563 		} else
564 			sks->proto = sp->proto;
565 	}
566 	st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]);
567 	st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]);
568 
569 	/* copy to state */
570 	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
571 	st->creation = time_uptime - ntohl(sp->creation);
572 	st->expire = time_uptime;
573 	if (ntohl(sp->expire)) {
574 		u_int32_t timeout;
575 
576 		timeout = r->timeout[sp->timeout];
577 		if (!timeout)
578 			timeout = pf_default_rule.timeout[sp->timeout];
579 
580 		/* sp->expire may have been adaptively scaled by export. */
581 		st->expire -= timeout - ntohl(sp->expire);
582 	}
583 
584 	st->direction = sp->direction;
585 	st->log = sp->log;
586 	st->timeout = sp->timeout;
587 	st->state_flags = ntohs(sp->state_flags);
588 	st->max_mss = ntohs(sp->max_mss);
589 	st->min_ttl = sp->min_ttl;
590 	st->set_tos = sp->set_tos;
591 	st->set_prio[0] = sp->set_prio[0];
592 	st->set_prio[1] = sp->set_prio[1];
593 
594 	st->id = sp->id;
595 	st->creatorid = sp->creatorid;
596 	pf_state_peer_ntoh(&sp->src, &st->src);
597 	pf_state_peer_ntoh(&sp->dst, &st->dst);
598 
599 	st->rule.ptr = r;
600 	st->anchor.ptr = NULL;
601 	st->rt_kif = NULL;
602 
603 	st->pfsync_time = time_uptime;
604 	st->sync_state = PFSYNC_S_NONE;
605 
606 	/* XXX when we have anchors, use STATE_INC_COUNTERS */
607 	r->states_cur++;
608 	r->states_tot++;
609 
610 	if (!ISSET(flags, PFSYNC_SI_IOCTL))
611 		SET(st->state_flags, PFSTATE_NOSYNC);
612 
613 	if (pf_state_insert(kif, &skw, &sks, st) != 0) {
614 		/* XXX when we have anchors, use STATE_DEC_COUNTERS */
615 		r->states_cur--;
616 		error = EEXIST;
617 		goto cleanup_state;
618 	}
619 
620 	if (!ISSET(flags, PFSYNC_SI_IOCTL)) {
621 		CLR(st->state_flags, PFSTATE_NOSYNC);
622 		if (ISSET(st->state_flags, PFSTATE_ACK)) {
623 			pfsync_q_ins(st, PFSYNC_S_IACK);
624 			schednetisr(NETISR_PFSYNC);
625 		}
626 	}
627 	CLR(st->state_flags, PFSTATE_ACK);
628 
629 	return (0);
630 
631  cleanup:
632 	error = ENOMEM;
633 	if (skw == sks)
634 		sks = NULL;
635 	if (skw != NULL)
636 		pool_put(&pf_state_key_pl, skw);
637 	if (sks != NULL)
638 		pool_put(&pf_state_key_pl, sks);
639 
640  cleanup_state:	/* pf_state_insert frees the state keys */
641 	if (st) {
642 		if (st->dst.scrub)
643 			pool_put(&pf_state_scrub_pl, st->dst.scrub);
644 		if (st->src.scrub)
645 			pool_put(&pf_state_scrub_pl, st->src.scrub);
646 		pool_put(&pf_state_pl, st);
647 	}
648 	return (error);
649 }
650 
651 int
652 pfsync_input(struct mbuf **mp, int *offp, int proto, int af)
653 {
654 	struct mbuf *n, *m = *mp;
655 	struct pfsync_softc *sc = pfsyncif;
656 	struct ip *ip = mtod(m, struct ip *);
657 	struct pfsync_header *ph;
658 	struct pfsync_subheader subh;
659 	int offset, noff, len, count, mlen, flags = 0;
660 	int e;
661 
662 	pfsyncstat_inc(pfsyncs_ipackets);
663 
664 	/* verify that we have a sync interface configured */
665 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
666 	    sc->sc_sync_if == NULL || !pf_status.running)
667 		goto done;
668 
669 	/* verify that the packet came in on the right interface */
670 	if (sc->sc_sync_if->if_index != m->m_pkthdr.ph_ifidx) {
671 		pfsyncstat_inc(pfsyncs_badif);
672 		goto done;
673 	}
674 
675 	sc->sc_if.if_ipackets++;
676 	sc->sc_if.if_ibytes += m->m_pkthdr.len;
677 
678 	/* verify that the IP TTL is 255. */
679 	if (ip->ip_ttl != PFSYNC_DFLTTL) {
680 		pfsyncstat_inc(pfsyncs_badttl);
681 		goto done;
682 	}
683 
684 	offset = ip->ip_hl << 2;
685 	n = m_pulldown(m, offset, sizeof(*ph), &noff);
686 	if (n == NULL) {
687 		pfsyncstat_inc(pfsyncs_hdrops);
688 		return IPPROTO_DONE;
689 	}
690 	ph = (struct pfsync_header *)(n->m_data + noff);
691 
692 	/* verify the version */
693 	if (ph->version != PFSYNC_VERSION) {
694 		pfsyncstat_inc(pfsyncs_badver);
695 		goto done;
696 	}
697 	len = ntohs(ph->len) + offset;
698 	if (m->m_pkthdr.len < len) {
699 		pfsyncstat_inc(pfsyncs_badlen);
700 		goto done;
701 	}
702 
703 	if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
704 		flags = PFSYNC_SI_CKSUM;
705 
706 	offset += sizeof(*ph);
707 	while (offset <= len - sizeof(subh)) {
708 		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
709 		offset += sizeof(subh);
710 
711 		mlen = subh.len << 2;
712 		count = ntohs(subh.count);
713 
714 		if (subh.action >= PFSYNC_ACT_MAX ||
715 		    subh.action >= nitems(pfsync_acts) ||
716 		    mlen < pfsync_acts[subh.action].len) {
717 			/*
718 			 * subheaders are always followed by at least one
719 			 * message, so if the peer is new
720 			 * enough to tell us how big its messages are then we
721 			 * know enough to skip them.
722 			 */
723 			if (count > 0 && mlen > 0) {
724 				offset += count * mlen;
725 				continue;
726 			}
727 			pfsyncstat_inc(pfsyncs_badact);
728 			goto done;
729 		}
730 
731 		n = m_pulldown(m, offset, mlen * count, &noff);
732 		if (n == NULL) {
733 			pfsyncstat_inc(pfsyncs_badlen);
734 			return IPPROTO_DONE;
735 		}
736 
737 		PF_LOCK();
738 		e = pfsync_acts[subh.action].in(n->m_data + noff, mlen, count,
739 		    flags);
740 		PF_UNLOCK();
741 		if (e != 0)
742 			goto done;
743 
744 		offset += mlen * count;
745 	}
746 
747 done:
748 	m_freem(m);
749 	return IPPROTO_DONE;
750 }
751 
752 int
753 pfsync_in_clr(caddr_t buf, int len, int count, int flags)
754 {
755 	struct pfsync_clr *clr;
756 	struct pf_state *st, *nexts;
757 	struct pfi_kif *kif;
758 	u_int32_t creatorid;
759 	int i;
760 
761 	for (i = 0; i < count; i++) {
762 		clr = (struct pfsync_clr *)buf + len * i;
763 		kif = NULL;
764 		creatorid = clr->creatorid;
765 		if (strlen(clr->ifname) &&
766 		    (kif = pfi_kif_find(clr->ifname)) == NULL)
767 			continue;
768 
769 		for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) {
770 			nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
771 			if (st->creatorid == creatorid &&
772 			    ((kif && st->kif == kif) || !kif)) {
773 				SET(st->state_flags, PFSTATE_NOSYNC);
774 				pf_remove_state(st);
775 			}
776 		}
777 	}
778 
779 	return (0);
780 }
781 
782 int
783 pfsync_in_ins(caddr_t buf, int len, int count, int flags)
784 {
785 	struct pfsync_state *sp;
786 	sa_family_t af1, af2;
787 	int i;
788 
789 	for (i = 0; i < count; i++) {
790 		sp = (struct pfsync_state *)(buf + len * i);
791 		af1 = sp->key[0].af;
792 		af2 = sp->key[1].af;
793 
794 		/* check for invalid values */
795 		if (sp->timeout >= PFTM_MAX ||
796 		    sp->src.state > PF_TCPS_PROXY_DST ||
797 		    sp->dst.state > PF_TCPS_PROXY_DST ||
798 		    sp->direction > PF_OUT ||
799 		    (((af1 || af2) &&
800 		     ((af1 != AF_INET && af1 != AF_INET6) ||
801 		      (af2 != AF_INET && af2 != AF_INET6))) ||
802 		    (sp->af != AF_INET && sp->af != AF_INET6))) {
803 			DPFPRINTF(LOG_NOTICE,
804 			    "pfsync_input: PFSYNC5_ACT_INS: invalid value");
805 			pfsyncstat_inc(pfsyncs_badval);
806 			continue;
807 		}
808 
809 		if (pfsync_state_import(sp, flags) == ENOMEM) {
810 			/* drop out, but process the rest of the actions */
811 			break;
812 		}
813 	}
814 
815 	return (0);
816 }
817 
818 int
819 pfsync_in_iack(caddr_t buf, int len, int count, int flags)
820 {
821 	struct pfsync_ins_ack *ia;
822 	struct pf_state_cmp id_key;
823 	struct pf_state *st;
824 	int i;
825 
826 	for (i = 0; i < count; i++) {
827 		ia = (struct pfsync_ins_ack *)(buf + len * i);
828 
829 		id_key.id = ia->id;
830 		id_key.creatorid = ia->creatorid;
831 
832 		st = pf_find_state_byid(&id_key);
833 		if (st == NULL)
834 			continue;
835 
836 		if (ISSET(st->state_flags, PFSTATE_ACK))
837 			pfsync_deferred(st, 0);
838 	}
839 
840 	return (0);
841 }
842 
843 int
844 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
845     struct pfsync_state_peer *dst)
846 {
847 	int sync = 0;
848 
849 	/*
850 	 * The state should never go backwards except
851 	 * for syn-proxy states.  Neither should the
852 	 * sequence window slide backwards.
853 	 */
854 	if ((st->src.state > src->state &&
855 	    (st->src.state < PF_TCPS_PROXY_SRC ||
856 	    src->state >= PF_TCPS_PROXY_SRC)) ||
857 
858 	    (st->src.state == src->state &&
859 	    SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
860 		sync++;
861 	else
862 		pf_state_peer_ntoh(src, &st->src);
863 
864 	if ((st->dst.state > dst->state) ||
865 
866 	    (st->dst.state >= TCPS_SYN_SENT &&
867 	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
868 		sync++;
869 	else
870 		pf_state_peer_ntoh(dst, &st->dst);
871 
872 	return (sync);
873 }
874 
875 int
876 pfsync_in_upd(caddr_t buf, int len, int count, int flags)
877 {
878 	struct pfsync_state *sp;
879 	struct pf_state_cmp id_key;
880 	struct pf_state *st;
881 	int sync;
882 
883 	int i;
884 
885 	for (i = 0; i < count; i++) {
886 		sp = (struct pfsync_state *)(buf + len * i);
887 
888 		/* check for invalid values */
889 		if (sp->timeout >= PFTM_MAX ||
890 		    sp->src.state > PF_TCPS_PROXY_DST ||
891 		    sp->dst.state > PF_TCPS_PROXY_DST) {
892 			DPFPRINTF(LOG_NOTICE,
893 			    "pfsync_input: PFSYNC_ACT_UPD: invalid value");
894 			pfsyncstat_inc(pfsyncs_badval);
895 			continue;
896 		}
897 
898 		id_key.id = sp->id;
899 		id_key.creatorid = sp->creatorid;
900 
901 		st = pf_find_state_byid(&id_key);
902 		if (st == NULL) {
903 			/* insert the update */
904 			if (pfsync_state_import(sp, flags))
905 				pfsyncstat_inc(pfsyncs_badstate);
906 			continue;
907 		}
908 
909 		if (ISSET(st->state_flags, PFSTATE_ACK))
910 			pfsync_deferred(st, 1);
911 
912 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
913 			sync = pfsync_upd_tcp(st, &sp->src, &sp->dst);
914 		else {
915 			sync = 0;
916 
917 			/*
918 			 * Non-TCP protocol state machine always go
919 			 * forwards
920 			 */
921 			if (st->src.state > sp->src.state)
922 				sync++;
923 			else
924 				pf_state_peer_ntoh(&sp->src, &st->src);
925 
926 			if (st->dst.state > sp->dst.state)
927 				sync++;
928 			else
929 				pf_state_peer_ntoh(&sp->dst, &st->dst);
930 		}
931 
932 		if (sync < 2) {
933 			pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
934 			pf_state_peer_ntoh(&sp->dst, &st->dst);
935 			st->expire = time_uptime;
936 			st->timeout = sp->timeout;
937 		}
938 		st->pfsync_time = time_uptime;
939 
940 		if (sync) {
941 			pfsyncstat_inc(pfsyncs_stale);
942 
943 			pfsync_update_state(st);
944 			schednetisr(NETISR_PFSYNC);
945 		}
946 	}
947 
948 	return (0);
949 }
950 
951 int
952 pfsync_in_upd_c(caddr_t buf, int len, int count, int flags)
953 {
954 	struct pfsync_upd_c *up;
955 	struct pf_state_cmp id_key;
956 	struct pf_state *st;
957 
958 	int sync;
959 
960 	int i;
961 
962 	for (i = 0; i < count; i++) {
963 		up = (struct pfsync_upd_c *)(buf + len * i);
964 
965 		/* check for invalid values */
966 		if (up->timeout >= PFTM_MAX ||
967 		    up->src.state > PF_TCPS_PROXY_DST ||
968 		    up->dst.state > PF_TCPS_PROXY_DST) {
969 			DPFPRINTF(LOG_NOTICE,
970 			    "pfsync_input: PFSYNC_ACT_UPD_C: invalid value");
971 			pfsyncstat_inc(pfsyncs_badval);
972 			continue;
973 		}
974 
975 		id_key.id = up->id;
976 		id_key.creatorid = up->creatorid;
977 
978 		st = pf_find_state_byid(&id_key);
979 		if (st == NULL) {
980 			/* We don't have this state. Ask for it. */
981 			pfsync_request_update(id_key.creatorid, id_key.id);
982 			continue;
983 		}
984 
985 		if (ISSET(st->state_flags, PFSTATE_ACK))
986 			pfsync_deferred(st, 1);
987 
988 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
989 			sync = pfsync_upd_tcp(st, &up->src, &up->dst);
990 		else {
991 			sync = 0;
992 			/*
993 			 * Non-TCP protocol state machine always go
994 			 * forwards
995 			 */
996 			if (st->src.state > up->src.state)
997 				sync++;
998 			else
999 				pf_state_peer_ntoh(&up->src, &st->src);
1000 
1001 			if (st->dst.state > up->dst.state)
1002 				sync++;
1003 			else
1004 				pf_state_peer_ntoh(&up->dst, &st->dst);
1005 		}
1006 		if (sync < 2) {
1007 			pfsync_alloc_scrub_memory(&up->dst, &st->dst);
1008 			pf_state_peer_ntoh(&up->dst, &st->dst);
1009 			st->expire = time_uptime;
1010 			st->timeout = up->timeout;
1011 		}
1012 		st->pfsync_time = time_uptime;
1013 
1014 		if (sync) {
1015 			pfsyncstat_inc(pfsyncs_stale);
1016 
1017 			pfsync_update_state(st);
1018 			schednetisr(NETISR_PFSYNC);
1019 		}
1020 	}
1021 
1022 	return (0);
1023 }
1024 
1025 int
1026 pfsync_in_ureq(caddr_t buf, int len, int count, int flags)
1027 {
1028 	struct pfsync_upd_req *ur;
1029 	int i;
1030 
1031 	struct pf_state_cmp id_key;
1032 	struct pf_state *st;
1033 
1034 	for (i = 0; i < count; i++) {
1035 		ur = (struct pfsync_upd_req *)(buf + len * i);
1036 
1037 		id_key.id = ur->id;
1038 		id_key.creatorid = ur->creatorid;
1039 
1040 		if (id_key.id == 0 && id_key.creatorid == 0)
1041 			pfsync_bulk_start();
1042 		else {
1043 			st = pf_find_state_byid(&id_key);
1044 			if (st == NULL) {
1045 				pfsyncstat_inc(pfsyncs_badstate);
1046 				continue;
1047 			}
1048 			if (ISSET(st->state_flags, PFSTATE_NOSYNC))
1049 				continue;
1050 
1051 			pfsync_update_state_req(st);
1052 		}
1053 	}
1054 
1055 	return (0);
1056 }
1057 
1058 int
1059 pfsync_in_del(caddr_t buf, int len, int count, int flags)
1060 {
1061 	struct pfsync_state *sp;
1062 	struct pf_state_cmp id_key;
1063 	struct pf_state *st;
1064 	int i;
1065 
1066 	for (i = 0; i < count; i++) {
1067 		sp = (struct pfsync_state *)(buf + len * i);
1068 
1069 		id_key.id = sp->id;
1070 		id_key.creatorid = sp->creatorid;
1071 
1072 		st = pf_find_state_byid(&id_key);
1073 		if (st == NULL) {
1074 			pfsyncstat_inc(pfsyncs_badstate);
1075 			continue;
1076 		}
1077 		SET(st->state_flags, PFSTATE_NOSYNC);
1078 		pf_remove_state(st);
1079 	}
1080 
1081 	return (0);
1082 }
1083 
1084 int
1085 pfsync_in_del_c(caddr_t buf, int len, int count, int flags)
1086 {
1087 	struct pfsync_del_c *sp;
1088 	struct pf_state_cmp id_key;
1089 	struct pf_state *st;
1090 	int i;
1091 
1092 	for (i = 0; i < count; i++) {
1093 		sp = (struct pfsync_del_c *)(buf + len * i);
1094 
1095 		id_key.id = sp->id;
1096 		id_key.creatorid = sp->creatorid;
1097 
1098 		st = pf_find_state_byid(&id_key);
1099 		if (st == NULL) {
1100 			pfsyncstat_inc(pfsyncs_badstate);
1101 			continue;
1102 		}
1103 
1104 		SET(st->state_flags, PFSTATE_NOSYNC);
1105 		pf_remove_state(st);
1106 	}
1107 
1108 	return (0);
1109 }
1110 
1111 int
1112 pfsync_in_bus(caddr_t buf, int len, int count, int flags)
1113 {
1114 	struct pfsync_softc *sc = pfsyncif;
1115 	struct pfsync_bus *bus;
1116 
1117 	/* If we're not waiting for a bulk update, who cares. */
1118 	if (sc->sc_ureq_sent == 0)
1119 		return (0);
1120 
1121 	bus = (struct pfsync_bus *)buf;
1122 
1123 	switch (bus->status) {
1124 	case PFSYNC_BUS_START:
1125 		timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1126 		    pf_pool_limits[PF_LIMIT_STATES].limit /
1127 		    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1128 		    sizeof(struct pfsync_state)));
1129 		DPFPRINTF(LOG_INFO, "received bulk update start");
1130 		break;
1131 
1132 	case PFSYNC_BUS_END:
1133 		if (time_uptime - ntohl(bus->endtime) >=
1134 		    sc->sc_ureq_sent) {
1135 			/* that's it, we're happy */
1136 			sc->sc_ureq_sent = 0;
1137 			sc->sc_bulk_tries = 0;
1138 			timeout_del(&sc->sc_bulkfail_tmo);
1139 #if NCARP > 0
1140 			if (!pfsync_sync_ok)
1141 				carp_group_demote_adj(&sc->sc_if, -1,
1142 				    sc->sc_link_demoted ?
1143 				    "pfsync link state up" :
1144 				    "pfsync bulk done");
1145 			if (sc->sc_initial_bulk) {
1146 				carp_group_demote_adj(&sc->sc_if, -32,
1147 				    "pfsync init");
1148 				sc->sc_initial_bulk = 0;
1149 			}
1150 #endif
1151 			pfsync_sync_ok = 1;
1152 			sc->sc_link_demoted = 0;
1153 			DPFPRINTF(LOG_INFO, "received valid bulk update end");
1154 		} else {
1155 			DPFPRINTF(LOG_WARNING, "received invalid "
1156 			    "bulk update end: bad timestamp");
1157 		}
1158 		break;
1159 	}
1160 
1161 	return (0);
1162 }
1163 
1164 int
1165 pfsync_in_tdb(caddr_t buf, int len, int count, int flags)
1166 {
1167 #if defined(IPSEC)
1168 	struct pfsync_tdb *tp;
1169 	int i;
1170 
1171 	for (i = 0; i < count; i++) {
1172 		tp = (struct pfsync_tdb *)(buf + len * i);
1173 		pfsync_update_net_tdb(tp);
1174 	}
1175 #endif
1176 
1177 	return (0);
1178 }
1179 
1180 #if defined(IPSEC)
1181 /* Update an in-kernel tdb. Silently fail if no tdb is found. */
1182 void
1183 pfsync_update_net_tdb(struct pfsync_tdb *pt)
1184 {
1185 	struct tdb		*tdb;
1186 
1187 	NET_ASSERT_LOCKED();
1188 
1189 	/* check for invalid values */
1190 	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
1191 	    (pt->dst.sa.sa_family != AF_INET &&
1192 	     pt->dst.sa.sa_family != AF_INET6))
1193 		goto bad;
1194 
1195 	tdb = gettdb(ntohs(pt->rdomain), pt->spi,
1196 	    (union sockaddr_union *)&pt->dst, pt->sproto);
1197 	if (tdb) {
1198 		pt->rpl = betoh64(pt->rpl);
1199 		pt->cur_bytes = betoh64(pt->cur_bytes);
1200 
1201 		/* Neither replay nor byte counter should ever decrease. */
1202 		if (pt->rpl < tdb->tdb_rpl ||
1203 		    pt->cur_bytes < tdb->tdb_cur_bytes) {
1204 			goto bad;
1205 		}
1206 
1207 		tdb->tdb_rpl = pt->rpl;
1208 		tdb->tdb_cur_bytes = pt->cur_bytes;
1209 	}
1210 	return;
1211 
1212  bad:
1213 	DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: "
1214 	    "invalid value");
1215 	pfsyncstat_inc(pfsyncs_badstate);
1216 	return;
1217 }
1218 #endif
1219 
1220 
1221 int
1222 pfsync_in_eof(caddr_t buf, int len, int count, int flags)
1223 {
1224 	if (len > 0 || count > 0)
1225 		pfsyncstat_inc(pfsyncs_badact);
1226 
1227 	/* we're done. let the caller return */
1228 	return (1);
1229 }
1230 
1231 int
1232 pfsync_in_error(caddr_t buf, int len, int count, int flags)
1233 {
1234 	pfsyncstat_inc(pfsyncs_badact);
1235 	return (-1);
1236 }
1237 
1238 int
1239 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1240 	struct rtentry *rt)
1241 {
1242 	m_freem(m);	/* drop packet */
1243 	return (EAFNOSUPPORT);
1244 }
1245 
1246 int
1247 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1248 {
1249 	struct proc *p = curproc;
1250 	struct pfsync_softc *sc = ifp->if_softc;
1251 	struct ifreq *ifr = (struct ifreq *)data;
1252 	struct ip_moptions *imo = &sc->sc_imo;
1253 	struct pfsyncreq pfsyncr;
1254 	struct ifnet    *sifp;
1255 	struct ip *ip;
1256 	int error;
1257 
1258 	switch (cmd) {
1259 	case SIOCSIFFLAGS:
1260 		if ((ifp->if_flags & IFF_RUNNING) == 0 &&
1261 		    (ifp->if_flags & IFF_UP)) {
1262 			ifp->if_flags |= IFF_RUNNING;
1263 
1264 #if NCARP > 0
1265 			sc->sc_initial_bulk = 1;
1266 			carp_group_demote_adj(&sc->sc_if, 32, "pfsync init");
1267 #endif
1268 
1269 			pfsync_request_full_update(sc);
1270 		}
1271 		if ((ifp->if_flags & IFF_RUNNING) &&
1272 		    (ifp->if_flags & IFF_UP) == 0) {
1273 			ifp->if_flags &= ~IFF_RUNNING;
1274 
1275 			/* drop everything */
1276 			timeout_del(&sc->sc_tmo);
1277 			pfsync_drop(sc);
1278 
1279 			pfsync_cancel_full_update(sc);
1280 		}
1281 		break;
1282 	case SIOCSIFMTU:
1283 		if (!sc->sc_sync_if ||
1284 		    ifr->ifr_mtu <= PFSYNC_MINPKT ||
1285 		    ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
1286 			return (EINVAL);
1287 		if (ifr->ifr_mtu < ifp->if_mtu)
1288 			pfsync_sendout();
1289 		ifp->if_mtu = ifr->ifr_mtu;
1290 		break;
1291 	case SIOCGETPFSYNC:
1292 		bzero(&pfsyncr, sizeof(pfsyncr));
1293 		if (sc->sc_sync_if) {
1294 			strlcpy(pfsyncr.pfsyncr_syncdev,
1295 			    sc->sc_sync_if->if_xname, IFNAMSIZ);
1296 		}
1297 		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1298 		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1299 		pfsyncr.pfsyncr_defer = sc->sc_defer;
1300 		return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
1301 
1302 	case SIOCSETPFSYNC:
1303 		if ((error = suser(p, 0)) != 0)
1304 			return (error);
1305 		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
1306 			return (error);
1307 
1308 		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1309 			sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1310 		else
1311 			sc->sc_sync_peer.s_addr =
1312 			    pfsyncr.pfsyncr_syncpeer.s_addr;
1313 
1314 		if (pfsyncr.pfsyncr_maxupdates > 255)
1315 			return (EINVAL);
1316 		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1317 
1318 		sc->sc_defer = pfsyncr.pfsyncr_defer;
1319 
1320 		if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1321 			if (sc->sc_sync_if) {
1322 				hook_disestablish(
1323 				    sc->sc_sync_if->if_linkstatehooks,
1324 				    sc->sc_lhcookie);
1325 				hook_disestablish(
1326 				    sc->sc_sync_if->if_detachhooks,
1327 				    sc->sc_dhcookie);
1328 			}
1329 			sc->sc_sync_if = NULL;
1330 			if (imo->imo_num_memberships > 0) {
1331 				in_delmulti(imo->imo_membership[
1332 				    --imo->imo_num_memberships]);
1333 				imo->imo_ifidx = 0;
1334 			}
1335 			break;
1336 		}
1337 
1338 		if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL)
1339 			return (EINVAL);
1340 
1341 		if (sifp->if_mtu < sc->sc_if.if_mtu ||
1342 		    (sc->sc_sync_if != NULL &&
1343 		    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
1344 		    sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1345 			pfsync_sendout();
1346 
1347 		if (sc->sc_sync_if) {
1348 			hook_disestablish(
1349 			    sc->sc_sync_if->if_linkstatehooks,
1350 			    sc->sc_lhcookie);
1351 			hook_disestablish(
1352 			    sc->sc_sync_if->if_detachhooks,
1353 			    sc->sc_dhcookie);
1354 		}
1355 		sc->sc_sync_if = sifp;
1356 
1357 		if (imo->imo_num_memberships > 0) {
1358 			in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1359 			imo->imo_ifidx = 0;
1360 		}
1361 
1362 		if (sc->sc_sync_if &&
1363 		    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1364 			struct in_addr addr;
1365 
1366 			if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) {
1367 				sc->sc_sync_if = NULL;
1368 				return (EADDRNOTAVAIL);
1369 			}
1370 
1371 			addr.s_addr = INADDR_PFSYNC_GROUP;
1372 
1373 			if ((imo->imo_membership[0] =
1374 			    in_addmulti(&addr, sc->sc_sync_if)) == NULL) {
1375 				sc->sc_sync_if = NULL;
1376 				return (ENOBUFS);
1377 			}
1378 			imo->imo_num_memberships++;
1379 			imo->imo_ifidx = sc->sc_sync_if->if_index;
1380 			imo->imo_ttl = PFSYNC_DFLTTL;
1381 			imo->imo_loop = 0;
1382 		}
1383 
1384 		ip = &sc->sc_template;
1385 		bzero(ip, sizeof(*ip));
1386 		ip->ip_v = IPVERSION;
1387 		ip->ip_hl = sizeof(sc->sc_template) >> 2;
1388 		ip->ip_tos = IPTOS_LOWDELAY;
1389 		/* len and id are set later */
1390 		ip->ip_off = htons(IP_DF);
1391 		ip->ip_ttl = PFSYNC_DFLTTL;
1392 		ip->ip_p = IPPROTO_PFSYNC;
1393 		ip->ip_src.s_addr = INADDR_ANY;
1394 		ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
1395 
1396 		sc->sc_lhcookie =
1397 		    hook_establish(sc->sc_sync_if->if_linkstatehooks, 1,
1398 		    pfsync_syncdev_state, sc);
1399 		sc->sc_dhcookie = hook_establish(sc->sc_sync_if->if_detachhooks,
1400 		    0, pfsync_ifdetach, sc);
1401 
1402 		pfsync_request_full_update(sc);
1403 
1404 		break;
1405 
1406 	default:
1407 		return (ENOTTY);
1408 	}
1409 
1410 	return (0);
1411 }
1412 
1413 void
1414 pfsync_out_state(struct pf_state *st, void *buf)
1415 {
1416 	struct pfsync_state *sp = buf;
1417 
1418 	pfsync_state_export(sp, st);
1419 }
1420 
1421 void
1422 pfsync_out_iack(struct pf_state *st, void *buf)
1423 {
1424 	struct pfsync_ins_ack *iack = buf;
1425 
1426 	iack->id = st->id;
1427 	iack->creatorid = st->creatorid;
1428 }
1429 
1430 void
1431 pfsync_out_upd_c(struct pf_state *st, void *buf)
1432 {
1433 	struct pfsync_upd_c *up = buf;
1434 
1435 	bzero(up, sizeof(*up));
1436 	up->id = st->id;
1437 	pf_state_peer_hton(&st->src, &up->src);
1438 	pf_state_peer_hton(&st->dst, &up->dst);
1439 	up->creatorid = st->creatorid;
1440 	up->timeout = st->timeout;
1441 }
1442 
1443 void
1444 pfsync_out_del(struct pf_state *st, void *buf)
1445 {
1446 	struct pfsync_del_c *dp = buf;
1447 
1448 	dp->id = st->id;
1449 	dp->creatorid = st->creatorid;
1450 
1451 	SET(st->state_flags, PFSTATE_NOSYNC);
1452 }
1453 
1454 void
1455 pfsync_drop(struct pfsync_softc *sc)
1456 {
1457 	struct pf_state *st;
1458 	struct pfsync_upd_req_item *ur;
1459 	struct tdb *t;
1460 	int q;
1461 
1462 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1463 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
1464 			continue;
1465 
1466 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
1467 #ifdef PFSYNC_DEBUG
1468 			KASSERT(st->sync_state == q);
1469 #endif
1470 			st->sync_state = PFSYNC_S_NONE;
1471 		}
1472 		TAILQ_INIT(&sc->sc_qs[q]);
1473 	}
1474 
1475 	while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1476 		TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1477 		pool_put(&sc->sc_pool, ur);
1478 	}
1479 
1480 	sc->sc_plus = NULL;
1481 
1482 	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
1483 		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry)
1484 			CLR(t->tdb_flags, TDBF_PFSYNC);
1485 
1486 		TAILQ_INIT(&sc->sc_tdb_q);
1487 	}
1488 
1489 	sc->sc_len = PFSYNC_MINPKT;
1490 }
1491 
1492 void
1493 pfsync_sendout(void)
1494 {
1495 	struct pfsync_softc *sc = pfsyncif;
1496 #if NBPFILTER > 0
1497 	struct ifnet *ifp = &sc->sc_if;
1498 #endif
1499 	struct mbuf *m;
1500 	struct ip *ip;
1501 	struct pfsync_header *ph;
1502 	struct pfsync_subheader *subh;
1503 	struct pf_state *st;
1504 	struct pfsync_upd_req_item *ur;
1505 	struct tdb *t;
1506 
1507 	int offset;
1508 	int q, count = 0;
1509 
1510 	if (sc == NULL || sc->sc_len == PFSYNC_MINPKT)
1511 		return;
1512 
1513 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
1514 #if NBPFILTER > 0
1515 	    (ifp->if_bpf == NULL && sc->sc_sync_if == NULL)) {
1516 #else
1517 	    sc->sc_sync_if == NULL) {
1518 #endif
1519 		pfsync_drop(sc);
1520 		return;
1521 	}
1522 
1523 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1524 	if (m == NULL) {
1525 		sc->sc_if.if_oerrors++;
1526 		pfsyncstat_inc(pfsyncs_onomem);
1527 		pfsync_drop(sc);
1528 		return;
1529 	}
1530 
1531 	if (max_linkhdr + sc->sc_len > MHLEN) {
1532 		MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len);
1533 		if (!ISSET(m->m_flags, M_EXT)) {
1534 			m_free(m);
1535 			sc->sc_if.if_oerrors++;
1536 			pfsyncstat_inc(pfsyncs_onomem);
1537 			pfsync_drop(sc);
1538 			return;
1539 		}
1540 	}
1541 	m->m_data += max_linkhdr;
1542 	m->m_len = m->m_pkthdr.len = sc->sc_len;
1543 
1544 	/* build the ip header */
1545 	ip = mtod(m, struct ip *);
1546 	bcopy(&sc->sc_template, ip, sizeof(*ip));
1547 	offset = sizeof(*ip);
1548 
1549 	ip->ip_len = htons(m->m_pkthdr.len);
1550 	ip->ip_id = htons(ip_randomid());
1551 
1552 	/* build the pfsync header */
1553 	ph = (struct pfsync_header *)(m->m_data + offset);
1554 	bzero(ph, sizeof(*ph));
1555 	offset += sizeof(*ph);
1556 
1557 	ph->version = PFSYNC_VERSION;
1558 	ph->len = htons(sc->sc_len - sizeof(*ip));
1559 	bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
1560 
1561 	if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
1562 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1563 		offset += sizeof(*subh);
1564 
1565 		count = 0;
1566 		while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1567 			TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1568 
1569 			bcopy(&ur->ur_msg, m->m_data + offset,
1570 			    sizeof(ur->ur_msg));
1571 			offset += sizeof(ur->ur_msg);
1572 
1573 			pool_put(&sc->sc_pool, ur);
1574 
1575 			count++;
1576 		}
1577 
1578 		bzero(subh, sizeof(*subh));
1579 		subh->len = sizeof(ur->ur_msg) >> 2;
1580 		subh->action = PFSYNC_ACT_UPD_REQ;
1581 		subh->count = htons(count);
1582 	}
1583 
1584 	/* has someone built a custom region for us to add? */
1585 	if (sc->sc_plus != NULL) {
1586 		bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
1587 		offset += sc->sc_pluslen;
1588 
1589 		sc->sc_plus = NULL;
1590 	}
1591 
1592 	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
1593 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1594 		offset += sizeof(*subh);
1595 
1596 		count = 0;
1597 		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) {
1598 			pfsync_out_tdb(t, m->m_data + offset);
1599 			offset += sizeof(struct pfsync_tdb);
1600 			CLR(t->tdb_flags, TDBF_PFSYNC);
1601 
1602 			count++;
1603 		}
1604 		TAILQ_INIT(&sc->sc_tdb_q);
1605 
1606 		bzero(subh, sizeof(*subh));
1607 		subh->action = PFSYNC_ACT_TDB;
1608 		subh->len = sizeof(struct pfsync_tdb) >> 2;
1609 		subh->count = htons(count);
1610 	}
1611 
1612 	/* walk the queues */
1613 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1614 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
1615 			continue;
1616 
1617 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1618 		offset += sizeof(*subh);
1619 
1620 		count = 0;
1621 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
1622 #ifdef PFSYNC_DEBUG
1623 			KASSERT(st->sync_state == q);
1624 #endif
1625 			pfsync_qs[q].write(st, m->m_data + offset);
1626 			offset += pfsync_qs[q].len;
1627 
1628 			st->sync_state = PFSYNC_S_NONE;
1629 			count++;
1630 		}
1631 		TAILQ_INIT(&sc->sc_qs[q]);
1632 
1633 		bzero(subh, sizeof(*subh));
1634 		subh->action = pfsync_qs[q].action;
1635 		subh->len = pfsync_qs[q].len >> 2;
1636 		subh->count = htons(count);
1637 	}
1638 
1639 	/* we're done, let's put it on the wire */
1640 #if NBPFILTER > 0
1641 	if (ifp->if_bpf) {
1642 		m->m_data += sizeof(*ip);
1643 		m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
1644 		bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1645 		m->m_data -= sizeof(*ip);
1646 		m->m_len = m->m_pkthdr.len = sc->sc_len;
1647 	}
1648 
1649 	if (sc->sc_sync_if == NULL) {
1650 		sc->sc_len = PFSYNC_MINPKT;
1651 		m_freem(m);
1652 		return;
1653 	}
1654 #endif
1655 
1656 	/* start again */
1657 	sc->sc_len = PFSYNC_MINPKT;
1658 
1659 	sc->sc_if.if_opackets++;
1660 	sc->sc_if.if_obytes += m->m_pkthdr.len;
1661 
1662 	m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1663 
1664 	if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL, 0) == 0)
1665 		pfsyncstat_inc(pfsyncs_opackets);
1666 	else
1667 		pfsyncstat_inc(pfsyncs_oerrors);
1668 }
1669 
1670 void
1671 pfsync_insert_state(struct pf_state *st)
1672 {
1673 	struct pfsync_softc *sc = pfsyncif;
1674 
1675 	NET_ASSERT_LOCKED();
1676 
1677 	if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) ||
1678 	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
1679 		SET(st->state_flags, PFSTATE_NOSYNC);
1680 		return;
1681 	}
1682 
1683 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
1684 	    ISSET(st->state_flags, PFSTATE_NOSYNC))
1685 		return;
1686 
1687 #ifdef PFSYNC_DEBUG
1688 	KASSERT(st->sync_state == PFSYNC_S_NONE);
1689 #endif
1690 
1691 	if (sc->sc_len == PFSYNC_MINPKT)
1692 		timeout_add_sec(&sc->sc_tmo, 1);
1693 
1694 	pfsync_q_ins(st, PFSYNC_S_INS);
1695 
1696 	st->sync_updates = 0;
1697 }
1698 
1699 int
1700 pfsync_defer(struct pf_state *st, struct mbuf *m)
1701 {
1702 	struct pfsync_softc *sc = pfsyncif;
1703 	struct pfsync_deferral *pd;
1704 
1705 	NET_ASSERT_LOCKED();
1706 
1707 	if (!sc->sc_defer ||
1708 	    ISSET(st->state_flags, PFSTATE_NOSYNC) ||
1709 	    m->m_flags & (M_BCAST|M_MCAST))
1710 		return (0);
1711 
1712 	if (sc->sc_deferred >= 128) {
1713 		pd = TAILQ_FIRST(&sc->sc_deferrals);
1714 		if (timeout_del(&pd->pd_tmo))
1715 			pfsync_undefer(pd, 0);
1716 	}
1717 
1718 	pd = pool_get(&sc->sc_pool, M_NOWAIT);
1719 	if (pd == NULL)
1720 		return (0);
1721 
1722 	m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
1723 	SET(st->state_flags, PFSTATE_ACK);
1724 
1725 	pd->pd_st = st;
1726 	pd->pd_m = m;
1727 
1728 	sc->sc_deferred++;
1729 	TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
1730 
1731 	timeout_set_proc(&pd->pd_tmo, pfsync_defer_tmo, pd);
1732 	timeout_add_msec(&pd->pd_tmo, 20);
1733 
1734 	schednetisr(NETISR_PFSYNC);
1735 
1736 	return (1);
1737 }
1738 
1739 void
1740 pfsync_undefer(struct pfsync_deferral *pd, int drop)
1741 {
1742 	struct pfsync_softc *sc = pfsyncif;
1743 	struct pf_pdesc pdesc;
1744 
1745 	NET_ASSERT_LOCKED();
1746 
1747 	TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
1748 	sc->sc_deferred--;
1749 
1750 	CLR(pd->pd_st->state_flags, PFSTATE_ACK);
1751 	if (drop)
1752 		m_freem(pd->pd_m);
1753 	else {
1754 		if (pd->pd_st->rule.ptr->rt == PF_ROUTETO) {
1755 			if (pf_setup_pdesc(&pdesc,
1756 			    pd->pd_st->key[PF_SK_WIRE]->af,
1757 			    pd->pd_st->direction, pd->pd_st->rt_kif,
1758 			    pd->pd_m, NULL) != PF_PASS) {
1759 				m_freem(pd->pd_m);
1760 				goto out;
1761 			}
1762 			switch (pd->pd_st->key[PF_SK_WIRE]->af) {
1763 			case AF_INET:
1764 				pf_route(&pdesc,
1765 				    pd->pd_st->rule.ptr, pd->pd_st);
1766 				break;
1767 #ifdef INET6
1768 			case AF_INET6:
1769 				pf_route6(&pdesc,
1770 				    pd->pd_st->rule.ptr, pd->pd_st);
1771 				break;
1772 #endif /* INET6 */
1773 			}
1774 			pd->pd_m = pdesc.m;
1775 		} else {
1776 			switch (pd->pd_st->key[PF_SK_WIRE]->af) {
1777 			case AF_INET:
1778 				ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL,
1779 				    0);
1780 				break;
1781 #ifdef INET6
1782 			case AF_INET6:
1783 				ip6_output(pd->pd_m, NULL, NULL, 0,
1784 				    NULL, NULL);
1785 				break;
1786 #endif /* INET6 */
1787 			}
1788 		}
1789 	}
1790  out:
1791 	pool_put(&sc->sc_pool, pd);
1792 }
1793 
1794 void
1795 pfsync_defer_tmo(void *arg)
1796 {
1797 	NET_LOCK();
1798 	pfsync_undefer(arg, 0);
1799 	NET_UNLOCK();
1800 }
1801 
1802 void
1803 pfsync_deferred(struct pf_state *st, int drop)
1804 {
1805 	struct pfsync_softc *sc = pfsyncif;
1806 	struct pfsync_deferral *pd;
1807 
1808 	NET_ASSERT_LOCKED();
1809 
1810 	TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
1811 		 if (pd->pd_st == st) {
1812 			if (timeout_del(&pd->pd_tmo))
1813 				pfsync_undefer(pd, drop);
1814 			return;
1815 		}
1816 	}
1817 
1818 	panic("pfsync_deferred: unable to find deferred state");
1819 }
1820 
1821 void
1822 pfsync_update_state(struct pf_state *st)
1823 {
1824 	struct pfsync_softc *sc = pfsyncif;
1825 	int sync = 0;
1826 
1827 	NET_ASSERT_LOCKED();
1828 
1829 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
1830 		return;
1831 
1832 	if (ISSET(st->state_flags, PFSTATE_ACK))
1833 		pfsync_deferred(st, 0);
1834 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1835 		if (st->sync_state != PFSYNC_S_NONE)
1836 			pfsync_q_del(st);
1837 		return;
1838 	}
1839 
1840 	if (sc->sc_len == PFSYNC_MINPKT)
1841 		timeout_add_sec(&sc->sc_tmo, 1);
1842 
1843 	switch (st->sync_state) {
1844 	case PFSYNC_S_UPD_C:
1845 	case PFSYNC_S_UPD:
1846 	case PFSYNC_S_INS:
1847 		/* we're already handling it */
1848 
1849 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
1850 			st->sync_updates++;
1851 			if (st->sync_updates >= sc->sc_maxupdates)
1852 				sync = 1;
1853 		}
1854 		break;
1855 
1856 	case PFSYNC_S_IACK:
1857 		pfsync_q_del(st);
1858 	case PFSYNC_S_NONE:
1859 		pfsync_q_ins(st, PFSYNC_S_UPD_C);
1860 		st->sync_updates = 0;
1861 		break;
1862 
1863 	default:
1864 		panic("pfsync_update_state: unexpected sync state %d",
1865 		    st->sync_state);
1866 	}
1867 
1868 	if (sync || (time_uptime - st->pfsync_time) < 2)
1869 		schednetisr(NETISR_PFSYNC);
1870 }
1871 
1872 void
1873 pfsync_cancel_full_update(struct pfsync_softc *sc)
1874 {
1875 	if (timeout_pending(&sc->sc_bulkfail_tmo) ||
1876 	    timeout_pending(&sc->sc_bulk_tmo)) {
1877 #if NCARP > 0
1878 		if (!pfsync_sync_ok)
1879 			carp_group_demote_adj(&sc->sc_if, -1,
1880 			    "pfsync bulk cancelled");
1881 		if (sc->sc_initial_bulk) {
1882 			carp_group_demote_adj(&sc->sc_if, -32,
1883 			    "pfsync init");
1884 			sc->sc_initial_bulk = 0;
1885 		}
1886 #endif
1887 		pfsync_sync_ok = 1;
1888 		DPFPRINTF(LOG_INFO, "cancelling bulk update");
1889 	}
1890 	timeout_del(&sc->sc_bulkfail_tmo);
1891 	timeout_del(&sc->sc_bulk_tmo);
1892 	sc->sc_bulk_next = NULL;
1893 	sc->sc_bulk_last = NULL;
1894 	sc->sc_ureq_sent = 0;
1895 	sc->sc_bulk_tries = 0;
1896 }
1897 
1898 void
1899 pfsync_request_full_update(struct pfsync_softc *sc)
1900 {
1901 	if (sc->sc_sync_if && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
1902 		/* Request a full state table update. */
1903 		sc->sc_ureq_sent = time_uptime;
1904 #if NCARP > 0
1905 		if (!sc->sc_link_demoted && pfsync_sync_ok)
1906 			carp_group_demote_adj(&sc->sc_if, 1,
1907 			    "pfsync bulk start");
1908 #endif
1909 		pfsync_sync_ok = 0;
1910 		DPFPRINTF(LOG_INFO, "requesting bulk update");
1911 		timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1912 		    pf_pool_limits[PF_LIMIT_STATES].limit /
1913 		    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1914 		    sizeof(struct pfsync_state)));
1915 		pfsync_request_update(0, 0);
1916 	}
1917 }
1918 
1919 void
1920 pfsync_request_update(u_int32_t creatorid, u_int64_t id)
1921 {
1922 	struct pfsync_softc *sc = pfsyncif;
1923 	struct pfsync_upd_req_item *item;
1924 	size_t nlen = sizeof(struct pfsync_upd_req);
1925 
1926 	/*
1927 	 * this code does nothing to prevent multiple update requests for the
1928 	 * same state being generated.
1929 	 */
1930 
1931 	item = pool_get(&sc->sc_pool, PR_NOWAIT);
1932 	if (item == NULL) {
1933 		/* XXX stats */
1934 		return;
1935 	}
1936 
1937 	item->ur_msg.id = id;
1938 	item->ur_msg.creatorid = creatorid;
1939 
1940 	if (TAILQ_EMPTY(&sc->sc_upd_req_list))
1941 		nlen += sizeof(struct pfsync_subheader);
1942 
1943 	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
1944 		pfsync_sendout();
1945 
1946 		nlen = sizeof(struct pfsync_subheader) +
1947 		    sizeof(struct pfsync_upd_req);
1948 	}
1949 
1950 	TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
1951 	sc->sc_len += nlen;
1952 
1953 	schednetisr(NETISR_PFSYNC);
1954 }
1955 
1956 void
1957 pfsync_update_state_req(struct pf_state *st)
1958 {
1959 	struct pfsync_softc *sc = pfsyncif;
1960 
1961 	if (sc == NULL)
1962 		panic("pfsync_update_state_req: nonexistant instance");
1963 
1964 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1965 		if (st->sync_state != PFSYNC_S_NONE)
1966 			pfsync_q_del(st);
1967 		return;
1968 	}
1969 
1970 	switch (st->sync_state) {
1971 	case PFSYNC_S_UPD_C:
1972 	case PFSYNC_S_IACK:
1973 		pfsync_q_del(st);
1974 	case PFSYNC_S_NONE:
1975 		pfsync_q_ins(st, PFSYNC_S_UPD);
1976 		schednetisr(NETISR_PFSYNC);
1977 		return;
1978 
1979 	case PFSYNC_S_INS:
1980 	case PFSYNC_S_UPD:
1981 	case PFSYNC_S_DEL:
1982 		/* we're already handling it */
1983 		return;
1984 
1985 	default:
1986 		panic("pfsync_update_state_req: unexpected sync state %d",
1987 		    st->sync_state);
1988 	}
1989 }
1990 
1991 void
1992 pfsync_delete_state(struct pf_state *st)
1993 {
1994 	struct pfsync_softc *sc = pfsyncif;
1995 
1996 	NET_ASSERT_LOCKED();
1997 
1998 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
1999 		return;
2000 
2001 	if (ISSET(st->state_flags, PFSTATE_ACK))
2002 		pfsync_deferred(st, 1);
2003 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
2004 		if (st->sync_state != PFSYNC_S_NONE)
2005 			pfsync_q_del(st);
2006 		return;
2007 	}
2008 
2009 	if (sc->sc_len == PFSYNC_MINPKT)
2010 		timeout_add_sec(&sc->sc_tmo, 1);
2011 
2012 	switch (st->sync_state) {
2013 	case PFSYNC_S_INS:
2014 		/* we never got to tell the world so just forget about it */
2015 		pfsync_q_del(st);
2016 		return;
2017 
2018 	case PFSYNC_S_UPD_C:
2019 	case PFSYNC_S_UPD:
2020 	case PFSYNC_S_IACK:
2021 		pfsync_q_del(st);
2022 		/* FALLTHROUGH to putting it on the del list */
2023 
2024 	case PFSYNC_S_NONE:
2025 		pfsync_q_ins(st, PFSYNC_S_DEL);
2026 		return;
2027 
2028 	default:
2029 		panic("pfsync_delete_state: unexpected sync state %d",
2030 		    st->sync_state);
2031 	}
2032 }
2033 
2034 void
2035 pfsync_clear_states(u_int32_t creatorid, const char *ifname)
2036 {
2037 	struct pfsync_softc *sc = pfsyncif;
2038 	struct {
2039 		struct pfsync_subheader subh;
2040 		struct pfsync_clr clr;
2041 	} __packed r;
2042 
2043 	NET_ASSERT_LOCKED();
2044 
2045 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2046 		return;
2047 
2048 	bzero(&r, sizeof(r));
2049 
2050 	r.subh.action = PFSYNC_ACT_CLR;
2051 	r.subh.len = sizeof(struct pfsync_clr) >> 2;
2052 	r.subh.count = htons(1);
2053 
2054 	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
2055 	r.clr.creatorid = creatorid;
2056 
2057 	pfsync_send_plus(&r, sizeof(r));
2058 }
2059 
2060 void
2061 pfsync_q_ins(struct pf_state *st, int q)
2062 {
2063 	struct pfsync_softc *sc = pfsyncif;
2064 	size_t nlen = pfsync_qs[q].len;
2065 
2066 	KASSERT(st->sync_state == PFSYNC_S_NONE);
2067 
2068 #if defined(PFSYNC_DEBUG)
2069 	if (sc->sc_len < PFSYNC_MINPKT)
2070 		panic("pfsync pkt len is too low %d", sc->sc_len);
2071 #endif
2072 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2073 		nlen += sizeof(struct pfsync_subheader);
2074 
2075 	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2076 		pfsync_sendout();
2077 
2078 		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
2079 	}
2080 
2081 	sc->sc_len += nlen;
2082 	TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
2083 	st->sync_state = q;
2084 }
2085 
2086 void
2087 pfsync_q_del(struct pf_state *st)
2088 {
2089 	struct pfsync_softc *sc = pfsyncif;
2090 	int q = st->sync_state;
2091 
2092 	KASSERT(st->sync_state != PFSYNC_S_NONE);
2093 
2094 	sc->sc_len -= pfsync_qs[q].len;
2095 	TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
2096 	st->sync_state = PFSYNC_S_NONE;
2097 
2098 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2099 		sc->sc_len -= sizeof(struct pfsync_subheader);
2100 }
2101 
2102 void
2103 pfsync_update_tdb(struct tdb *t, int output)
2104 {
2105 	struct pfsync_softc *sc = pfsyncif;
2106 	size_t nlen = sizeof(struct pfsync_tdb);
2107 
2108 	if (sc == NULL)
2109 		return;
2110 
2111 	if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) {
2112 		if (TAILQ_EMPTY(&sc->sc_tdb_q))
2113 			nlen += sizeof(struct pfsync_subheader);
2114 
2115 		if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2116 			pfsync_sendout();
2117 
2118 			nlen = sizeof(struct pfsync_subheader) +
2119 			    sizeof(struct pfsync_tdb);
2120 		}
2121 
2122 		sc->sc_len += nlen;
2123 		TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
2124 		SET(t->tdb_flags, TDBF_PFSYNC);
2125 		t->tdb_updates = 0;
2126 	} else {
2127 		if (++t->tdb_updates >= sc->sc_maxupdates)
2128 			schednetisr(NETISR_PFSYNC);
2129 	}
2130 
2131 	if (output)
2132 		SET(t->tdb_flags, TDBF_PFSYNC_RPL);
2133 	else
2134 		CLR(t->tdb_flags, TDBF_PFSYNC_RPL);
2135 }
2136 
2137 void
2138 pfsync_delete_tdb(struct tdb *t)
2139 {
2140 	struct pfsync_softc *sc = pfsyncif;
2141 
2142 	if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC))
2143 		return;
2144 
2145 	sc->sc_len -= sizeof(struct pfsync_tdb);
2146 	TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
2147 	CLR(t->tdb_flags, TDBF_PFSYNC);
2148 
2149 	if (TAILQ_EMPTY(&sc->sc_tdb_q))
2150 		sc->sc_len -= sizeof(struct pfsync_subheader);
2151 }
2152 
2153 void
2154 pfsync_out_tdb(struct tdb *t, void *buf)
2155 {
2156 	struct pfsync_tdb *ut = buf;
2157 
2158 	bzero(ut, sizeof(*ut));
2159 	ut->spi = t->tdb_spi;
2160 	bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst));
2161 	/*
2162 	 * When a failover happens, the master's rpl is probably above
2163 	 * what we see here (we may be up to a second late), so
2164 	 * increase it a bit for outbound tdbs to manage most such
2165 	 * situations.
2166 	 *
2167 	 * For now, just add an offset that is likely to be larger
2168 	 * than the number of packets we can see in one second. The RFC
2169 	 * just says the next packet must have a higher seq value.
2170 	 *
2171 	 * XXX What is a good algorithm for this? We could use
2172 	 * a rate-determined increase, but to know it, we would have
2173 	 * to extend struct tdb.
2174 	 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
2175 	 * will soon be replaced anyway. For now, just don't handle
2176 	 * this edge case.
2177 	 */
2178 #define RPL_INCR 16384
2179 	ut->rpl = htobe64(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ?
2180 	    RPL_INCR : 0));
2181 	ut->cur_bytes = htobe64(t->tdb_cur_bytes);
2182 	ut->sproto = t->tdb_sproto;
2183 	ut->rdomain = htons(t->tdb_rdomain);
2184 }
2185 
2186 void
2187 pfsync_bulk_start(void)
2188 {
2189 	struct pfsync_softc *sc = pfsyncif;
2190 
2191 	DPFPRINTF(LOG_INFO, "received bulk update request");
2192 
2193 	if (TAILQ_EMPTY(&state_list))
2194 		pfsync_bulk_status(PFSYNC_BUS_END);
2195 	else {
2196 		sc->sc_ureq_received = time_uptime;
2197 
2198 		if (sc->sc_bulk_next == NULL)
2199 			sc->sc_bulk_next = TAILQ_FIRST(&state_list);
2200 		sc->sc_bulk_last = sc->sc_bulk_next;
2201 
2202 		pfsync_bulk_status(PFSYNC_BUS_START);
2203 		timeout_add(&sc->sc_bulk_tmo, 0);
2204 	}
2205 }
2206 
2207 void
2208 pfsync_bulk_update(void *arg)
2209 {
2210 	struct pfsync_softc *sc = arg;
2211 	struct pf_state *st;
2212 	int i = 0;
2213 
2214 	NET_LOCK();
2215 	st = sc->sc_bulk_next;
2216 
2217 	for (;;) {
2218 		if (st->sync_state == PFSYNC_S_NONE &&
2219 		    st->timeout < PFTM_MAX &&
2220 		    st->pfsync_time <= sc->sc_ureq_received) {
2221 			pfsync_update_state_req(st);
2222 			i++;
2223 		}
2224 
2225 		st = TAILQ_NEXT(st, entry_list);
2226 		if (st == NULL)
2227 			st = TAILQ_FIRST(&state_list);
2228 
2229 		if (st == sc->sc_bulk_last) {
2230 			/* we're done */
2231 			sc->sc_bulk_next = NULL;
2232 			sc->sc_bulk_last = NULL;
2233 			pfsync_bulk_status(PFSYNC_BUS_END);
2234 			break;
2235 		}
2236 
2237 		if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) <
2238 		    sizeof(struct pfsync_state)) {
2239 			/* we've filled a packet */
2240 			sc->sc_bulk_next = st;
2241 			timeout_add(&sc->sc_bulk_tmo, 1);
2242 			break;
2243 		}
2244 	}
2245 	NET_UNLOCK();
2246 }
2247 
2248 void
2249 pfsync_bulk_status(u_int8_t status)
2250 {
2251 	struct {
2252 		struct pfsync_subheader subh;
2253 		struct pfsync_bus bus;
2254 	} __packed r;
2255 
2256 	struct pfsync_softc *sc = pfsyncif;
2257 
2258 	bzero(&r, sizeof(r));
2259 
2260 	r.subh.action = PFSYNC_ACT_BUS;
2261 	r.subh.len = sizeof(struct pfsync_bus) >> 2;
2262 	r.subh.count = htons(1);
2263 
2264 	r.bus.creatorid = pf_status.hostid;
2265 	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
2266 	r.bus.status = status;
2267 
2268 	pfsync_send_plus(&r, sizeof(r));
2269 }
2270 
2271 void
2272 pfsync_bulk_fail(void *arg)
2273 {
2274 	struct pfsync_softc *sc = arg;
2275 
2276 	NET_LOCK();
2277 	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
2278 		/* Try again */
2279 		timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
2280 		pfsync_request_update(0, 0);
2281 	} else {
2282 		/* Pretend like the transfer was ok */
2283 		sc->sc_ureq_sent = 0;
2284 		sc->sc_bulk_tries = 0;
2285 #if NCARP > 0
2286 		if (!pfsync_sync_ok)
2287 			carp_group_demote_adj(&sc->sc_if, -1,
2288 			    sc->sc_link_demoted ?
2289 			    "pfsync link state up" :
2290 			    "pfsync bulk fail");
2291 		if (sc->sc_initial_bulk) {
2292 			carp_group_demote_adj(&sc->sc_if, -32,
2293 			    "pfsync init");
2294 			sc->sc_initial_bulk = 0;
2295 		}
2296 #endif
2297 		pfsync_sync_ok = 1;
2298 		sc->sc_link_demoted = 0;
2299 		DPFPRINTF(LOG_ERR, "failed to receive bulk update");
2300 	}
2301 	NET_UNLOCK();
2302 }
2303 
2304 void
2305 pfsync_send_plus(void *plus, size_t pluslen)
2306 {
2307 	struct pfsync_softc *sc = pfsyncif;
2308 
2309 	if (sc->sc_len + pluslen > sc->sc_if.if_mtu)
2310 		pfsync_sendout();
2311 
2312 	sc->sc_plus = plus;
2313 	sc->sc_len += (sc->sc_pluslen = pluslen);
2314 
2315 	pfsync_sendout();
2316 }
2317 
2318 int
2319 pfsync_up(void)
2320 {
2321 	struct pfsync_softc *sc = pfsyncif;
2322 
2323 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2324 		return (0);
2325 
2326 	return (1);
2327 }
2328 
2329 int
2330 pfsync_state_in_use(struct pf_state *st)
2331 {
2332 	struct pfsync_softc *sc = pfsyncif;
2333 
2334 	if (sc == NULL)
2335 		return (0);
2336 
2337 	if (st->sync_state != PFSYNC_S_NONE ||
2338 	    st == sc->sc_bulk_next ||
2339 	    st == sc->sc_bulk_last)
2340 		return (1);
2341 
2342 	return (0);
2343 }
2344 
2345 void
2346 pfsync_timeout(void *arg)
2347 {
2348 	NET_LOCK();
2349 	pfsync_sendout();
2350 	NET_UNLOCK();
2351 }
2352 
2353 /* this is a softnet/netisr handler */
2354 void
2355 pfsyncintr(void)
2356 {
2357 	pfsync_sendout();
2358 }
2359 
2360 int
2361 pfsync_sysctl_pfsyncstat(void *oldp, size_t *oldlenp, void *newp)
2362 {
2363 	struct pfsyncstats pfsyncstat;
2364 
2365 	CTASSERT(sizeof(pfsyncstat) == (pfsyncs_ncounters * sizeof(uint64_t)));
2366 	memset(&pfsyncstat, 0, sizeof pfsyncstat);
2367 	counters_read(pfsynccounters, (uint64_t *)&pfsyncstat,
2368 	    pfsyncs_ncounters);
2369 	return (sysctl_rdstruct(oldp, oldlenp, newp,
2370 	    &pfsyncstat, sizeof(pfsyncstat)));
2371 }
2372 
2373 int
2374 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
2375     size_t newlen)
2376 {
2377 	/* All sysctl names at this level are terminal. */
2378 	if (namelen != 1)
2379 		return (ENOTDIR);
2380 
2381 	switch (name[0]) {
2382 	case PFSYNCCTL_STATS:
2383 		return (pfsync_sysctl_pfsyncstat(oldp, oldlenp, newp));
2384 	default:
2385 		return (ENOPROTOOPT);
2386 	}
2387 }
2388