xref: /openbsd-src/sys/net/pf_table.c (revision c1a45aed656e7d5627c30c92421893a76f370ccb)
1 /*	$OpenBSD: pf_table.c,v 1.139 2021/11/22 12:56:04 jsg Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Cedric Berger
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  *    - Redistributions of source code must retain the above copyright
12  *      notice, this list of conditions and the following disclaimer.
13  *    - Redistributions in binary form must reproduce the above
14  *      copyright notice, this list of conditions and the following
15  *      disclaimer in the documentation and/or other materials provided
16  *      with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  */
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/socket.h>
36 #include <sys/mbuf.h>
37 #include <sys/pool.h>
38 #include <sys/syslog.h>
39 #include <sys/proc.h>
40 
41 #include <net/if.h>
42 
43 #include <netinet/in.h>
44 #include <netinet/ip.h>
45 #include <netinet/ip_ipsp.h>
46 #include <netinet/ip_icmp.h>
47 #include <netinet/tcp.h>
48 #include <netinet/udp.h>
49 
50 #ifdef INET6
51 #include <netinet/ip6.h>
52 #include <netinet/icmp6.h>
53 #endif /* INET6 */
54 
55 #include <net/pfvar.h>
56 #include <net/pfvar_priv.h>
57 
58 #define ACCEPT_FLAGS(flags, oklist)		\
59 	do {					\
60 		if ((flags & ~(oklist)) &	\
61 		    PFR_FLAG_ALLMASK)		\
62 			return (EINVAL);	\
63 	} while (0)
64 
65 #define COPYIN(from, to, size, flags)		\
66 	((flags & PFR_FLAG_USERIOCTL) ?		\
67 	copyin((from), (to), (size)) :		\
68 	(bcopy((from), (to), (size)), 0))
69 
70 #define COPYOUT(from, to, size, flags)		\
71 	((flags & PFR_FLAG_USERIOCTL) ?		\
72 	copyout((from), (to), (size)) :		\
73 	(bcopy((from), (to), (size)), 0))
74 
75 #define YIELD(ok)				\
76 	do {					\
77 		if (ok)				\
78 			sched_pause(preempt);	\
79 	} while (0)
80 
81 #define	FILLIN_SIN(sin, addr)			\
82 	do {					\
83 		(sin).sin_len = sizeof(sin);	\
84 		(sin).sin_family = AF_INET;	\
85 		(sin).sin_addr = (addr);	\
86 	} while (0)
87 
88 #define	FILLIN_SIN6(sin6, addr)			\
89 	do {					\
90 		(sin6).sin6_len = sizeof(sin6);	\
91 		(sin6).sin6_family = AF_INET6;	\
92 		(sin6).sin6_addr = (addr);	\
93 	} while (0)
94 
95 #define SWAP(type, a1, a2)			\
96 	do {					\
97 		type tmp = a1;			\
98 		a1 = a2;			\
99 		a2 = tmp;			\
100 	} while (0)
101 
102 #define SUNION2PF(su, af) (((af)==AF_INET) ?	\
103     (struct pf_addr *)&(su)->sin.sin_addr :	\
104     (struct pf_addr *)&(su)->sin6.sin6_addr)
105 
106 #define	AF_BITS(af)		(((af)==AF_INET)?32:128)
107 #define	ADDR_NETWORK(ad)	((ad)->pfra_net < AF_BITS((ad)->pfra_af))
108 #define	KENTRY_NETWORK(ke)	((ke)->pfrke_net < AF_BITS((ke)->pfrke_af))
109 
110 #define NO_ADDRESSES		(-1)
111 #define ENQUEUE_UNMARKED_ONLY	(1)
112 #define INVERT_NEG_FLAG		(1)
113 
114 struct pfr_walktree {
115 	enum pfrw_op {
116 		PFRW_MARK,
117 		PFRW_SWEEP,
118 		PFRW_ENQUEUE,
119 		PFRW_GET_ADDRS,
120 		PFRW_GET_ASTATS,
121 		PFRW_POOL_GET,
122 		PFRW_DYNADDR_UPDATE
123 	}	 pfrw_op;
124 	union {
125 		struct pfr_addr		*pfrw1_addr;
126 		struct pfr_astats	*pfrw1_astats;
127 		struct pfr_kentryworkq	*pfrw1_workq;
128 		struct pfr_kentry	*pfrw1_kentry;
129 		struct pfi_dynaddr	*pfrw1_dyn;
130 	}	 pfrw_1;
131 	int	 pfrw_free;
132 	int	 pfrw_flags;
133 };
134 #define pfrw_addr	pfrw_1.pfrw1_addr
135 #define pfrw_astats	pfrw_1.pfrw1_astats
136 #define pfrw_workq	pfrw_1.pfrw1_workq
137 #define pfrw_kentry	pfrw_1.pfrw1_kentry
138 #define pfrw_dyn	pfrw_1.pfrw1_dyn
139 #define pfrw_cnt	pfrw_free
140 
141 #define senderr(e)	do { rv = (e); goto _bad; } while (0)
142 
143 struct pool		 pfr_ktable_pl;
144 struct pool		 pfr_kentry_pl[PFRKE_MAX];
145 struct pool		 pfr_kcounters_pl;
146 union sockaddr_union	 pfr_mask;
147 struct pf_addr		 pfr_ffaddr;
148 
149 int			 pfr_gcd(int, int);
150 void			 pfr_copyout_addr(struct pfr_addr *,
151 			    struct pfr_kentry *ke);
152 int			 pfr_validate_addr(struct pfr_addr *);
153 void			 pfr_enqueue_addrs(struct pfr_ktable *,
154 			    struct pfr_kentryworkq *, int *, int);
155 void			 pfr_mark_addrs(struct pfr_ktable *);
156 struct pfr_kentry	*pfr_lookup_addr(struct pfr_ktable *,
157 			    struct pfr_addr *, int);
158 struct pfr_kentry	*pfr_lookup_kentry(struct pfr_ktable *,
159 			    struct pfr_kentry *, int);
160 struct pfr_kentry	*pfr_create_kentry(struct pfr_addr *);
161 struct pfr_kentry 	*pfr_create_kentry_unlocked(struct pfr_addr *, int);
162 void			 pfr_kentry_kif_ref(struct pfr_kentry *);
163 void			 pfr_destroy_kentries(struct pfr_kentryworkq *);
164 void			 pfr_destroy_ioq(struct pfr_kentryworkq *, int);
165 void			 pfr_destroy_kentry(struct pfr_kentry *);
166 void			 pfr_insert_kentries(struct pfr_ktable *,
167 			    struct pfr_kentryworkq *, time_t);
168 void			 pfr_remove_kentries(struct pfr_ktable *,
169 			    struct pfr_kentryworkq *);
170 void			 pfr_clstats_kentries(struct pfr_kentryworkq *, time_t,
171 			    int);
172 void			 pfr_reset_feedback(struct pfr_addr *, int, int);
173 void			 pfr_prepare_network(union sockaddr_union *, int, int);
174 int			 pfr_route_kentry(struct pfr_ktable *,
175 			    struct pfr_kentry *);
176 int			 pfr_unroute_kentry(struct pfr_ktable *,
177 			    struct pfr_kentry *);
178 int			 pfr_walktree(struct radix_node *, void *, u_int);
179 int			 pfr_validate_table(struct pfr_table *, int, int);
180 int			 pfr_fix_anchor(char *);
181 void			 pfr_commit_ktable(struct pfr_ktable *, time_t);
182 void			 pfr_insert_ktables(struct pfr_ktableworkq *);
183 void			 pfr_insert_ktable(struct pfr_ktable *);
184 void			 pfr_setflags_ktables(struct pfr_ktableworkq *);
185 void			 pfr_setflags_ktable(struct pfr_ktable *, int);
186 void			 pfr_clstats_ktables(struct pfr_ktableworkq *, time_t,
187 			    int);
188 void			 pfr_clstats_ktable(struct pfr_ktable *, time_t, int);
189 struct pfr_ktable	*pfr_create_ktable(struct pfr_table *, time_t, int,
190 			    int);
191 void			 pfr_destroy_ktables(struct pfr_ktableworkq *, int);
192 void			 pfr_destroy_ktable(struct pfr_ktable *, int);
193 int			 pfr_ktable_compare(struct pfr_ktable *,
194 			    struct pfr_ktable *);
195 void			 pfr_ktable_winfo_update(struct pfr_ktable *,
196 			    struct pfr_kentry *);
197 struct pfr_ktable	*pfr_lookup_table(struct pfr_table *);
198 void			 pfr_clean_node_mask(struct pfr_ktable *,
199 			    struct pfr_kentryworkq *);
200 int			 pfr_table_count(struct pfr_table *, int);
201 int			 pfr_skip_table(struct pfr_table *,
202 			    struct pfr_ktable *, int);
203 struct pfr_kentry	*pfr_kentry_byidx(struct pfr_ktable *, int, int);
204 int			 pfr_islinklocal(sa_family_t, struct pf_addr *);
205 
206 RB_PROTOTYPE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare);
207 RB_GENERATE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare);
208 
209 struct pfr_ktablehead	 pfr_ktables;
210 struct pfr_table	 pfr_nulltable;
211 int			 pfr_ktable_cnt;
212 
213 int
214 pfr_gcd(int m, int n)
215 {
216        int t;
217 
218        while (m > 0) {
219 	       t = n % m;
220 	       n = m;
221 	       m = t;
222        }
223        return (n);
224 }
225 
226 void
227 pfr_initialize(void)
228 {
229 	rn_init(sizeof(struct sockaddr_in6));
230 
231 	pool_init(&pfr_ktable_pl, sizeof(struct pfr_ktable),
232 	    0, IPL_SOFTNET, 0, "pfrktable", NULL);
233 	pool_init(&pfr_kentry_pl[PFRKE_PLAIN], sizeof(struct pfr_kentry),
234 	    0, IPL_SOFTNET, 0, "pfrke_plain", NULL);
235 	pool_init(&pfr_kentry_pl[PFRKE_ROUTE], sizeof(struct pfr_kentry_route),
236 	    0, IPL_SOFTNET, 0, "pfrke_route", NULL);
237 	pool_init(&pfr_kentry_pl[PFRKE_COST], sizeof(struct pfr_kentry_cost),
238 	    0, IPL_SOFTNET, 0, "pfrke_cost", NULL);
239 	pool_init(&pfr_kcounters_pl, sizeof(struct pfr_kcounters),
240 	    0, IPL_SOFTNET, 0, "pfrkcounters", NULL);
241 
242 	memset(&pfr_ffaddr, 0xff, sizeof(pfr_ffaddr));
243 }
244 
245 int
246 pfr_clr_addrs(struct pfr_table *tbl, int *ndel, int flags)
247 {
248 	struct pfr_ktable	*kt;
249 	struct pfr_kentryworkq	 workq;
250 
251 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
252 	if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
253 		return (EINVAL);
254 	kt = pfr_lookup_table(tbl);
255 	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
256 		return (ESRCH);
257 	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
258 		return (EPERM);
259 	pfr_enqueue_addrs(kt, &workq, ndel, 0);
260 
261 	if (!(flags & PFR_FLAG_DUMMY)) {
262 		pfr_remove_kentries(kt, &workq);
263 		if (kt->pfrkt_cnt) {
264 			DPFPRINTF(LOG_NOTICE,
265 			    "pfr_clr_addrs: corruption detected (%d).",
266 			    kt->pfrkt_cnt);
267 			kt->pfrkt_cnt = 0;
268 		}
269 	}
270 	return (0);
271 }
272 
273 void
274 pfr_fill_feedback(struct pfr_kentry_all *ke, struct pfr_addr *ad)
275 {
276 	ad->pfra_type = ke->pfrke_type;
277 
278 	switch (ke->pfrke_type) {
279 	case PFRKE_PLAIN:
280 		break;
281 	case PFRKE_COST:
282 		((struct pfr_kentry_cost *)ke)->weight = ad->pfra_weight;
283 		/* FALLTHROUGH */
284 	case PFRKE_ROUTE:
285 		if (ke->pfrke_rifname[0])
286 			strlcpy(ad->pfra_ifname, ke->pfrke_rifname, IFNAMSIZ);
287 		break;
288 	}
289 
290 	switch (ke->pfrke_af) {
291 	case AF_INET:
292 		ad->pfra_ip4addr = ke->pfrke_sa.sin.sin_addr;
293 		break;
294 #ifdef	INET6
295 	case AF_INET6:
296 		ad->pfra_ip6addr = ke->pfrke_sa.sin6.sin6_addr;
297 		break;
298 #endif	/* INET6 */
299 	default:
300 		unhandled_af(ke->pfrke_af);
301 	}
302 	ad->pfra_weight = ((struct pfr_kentry_cost *)ke)->weight;
303 	ad->pfra_af = ke->pfrke_af;
304 	ad->pfra_net = ke->pfrke_net;
305 	if (ke->pfrke_flags & PFRKE_FLAG_NOT)
306 		ad->pfra_not = 1;
307 	ad->pfra_fback = ke->pfrke_fb;
308 }
309 
310 int
311 pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
312     int *nadd, int flags)
313 {
314 	struct pfr_ktable	*kt, *tmpkt;
315 	struct pfr_kentryworkq	 workq, ioq;
316 	struct pfr_kentry	*p, *q, *ke;
317 	struct pfr_addr		 ad;
318 	int			 i, rv, xadd = 0;
319 	time_t			 tzero = gettime();
320 
321 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
322 	if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
323 		return (EINVAL);
324 	tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0,
325 	    !(flags & PFR_FLAG_USERIOCTL));
326 	if (tmpkt == NULL)
327 		return (ENOMEM);
328 	SLIST_INIT(&workq);
329 	SLIST_INIT(&ioq);
330 	for (i = 0; i < size; i++) {
331 		YIELD(flags & PFR_FLAG_USERIOCTL);
332 		if (COPYIN(addr+i, &ad, sizeof(ad), flags))
333 			senderr(EFAULT);
334 		if (pfr_validate_addr(&ad))
335 			senderr(EINVAL);
336 
337 		ke = pfr_create_kentry_unlocked(&ad, flags);
338 		if (ke == NULL)
339 			senderr(ENOMEM);
340 		ke->pfrke_fb = PFR_FB_NONE;
341 		SLIST_INSERT_HEAD(&ioq, ke, pfrke_ioq);
342 	}
343 
344 	NET_LOCK();
345 	PF_LOCK();
346 	kt = pfr_lookup_table(tbl);
347 	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
348 		PF_UNLOCK();
349 		NET_UNLOCK();
350 		senderr(ESRCH);
351 	}
352 	if (kt->pfrkt_flags & PFR_TFLAG_CONST) {
353 		PF_UNLOCK();
354 		NET_UNLOCK();
355 		senderr(EPERM);
356 	}
357 	SLIST_FOREACH(ke, &ioq, pfrke_ioq) {
358 		pfr_kentry_kif_ref(ke);
359 		p = pfr_lookup_kentry(kt, ke, 1);
360 		q = pfr_lookup_kentry(tmpkt, ke, 1);
361 		if (flags & PFR_FLAG_FEEDBACK) {
362 			if (q != NULL)
363 				ke->pfrke_fb = PFR_FB_DUPLICATE;
364 			else if (p == NULL)
365 				ke->pfrke_fb = PFR_FB_ADDED;
366 			else if ((p->pfrke_flags & PFRKE_FLAG_NOT) !=
367 			    (ke->pfrke_flags & PFRKE_FLAG_NOT))
368 				ke->pfrke_fb = PFR_FB_CONFLICT;
369 			else
370 				ke->pfrke_fb = PFR_FB_NONE;
371 		}
372 		if (p == NULL && q == NULL) {
373 			if (pfr_route_kentry(tmpkt, ke)) {
374 				/* defer destroy after feedback is processed */
375 				ke->pfrke_fb = PFR_FB_NONE;
376 			} else {
377 				/*
378 				 * mark entry as added to table, so we won't
379 				 * kill it with rest of the ioq
380 				 */
381 				ke->pfrke_fb = PFR_FB_ADDED;
382 				SLIST_INSERT_HEAD(&workq, ke, pfrke_workq);
383 				xadd++;
384 			}
385 		}
386 	}
387 	/* remove entries, which we will insert from tmpkt */
388 	pfr_clean_node_mask(tmpkt, &workq);
389 	if (!(flags & PFR_FLAG_DUMMY))
390 		pfr_insert_kentries(kt, &workq, tzero);
391 
392 	PF_UNLOCK();
393 	NET_UNLOCK();
394 
395 	if (flags & PFR_FLAG_FEEDBACK) {
396 		i = 0;
397 		while ((ke = SLIST_FIRST(&ioq)) != NULL) {
398 			YIELD(flags & PFR_FLAG_USERIOCTL);
399 			pfr_fill_feedback((struct pfr_kentry_all *)ke, &ad);
400 			if (COPYOUT(&ad, addr+i, sizeof(ad), flags))
401 				senderr(EFAULT);
402 			i++;
403 			SLIST_REMOVE_HEAD(&ioq, pfrke_ioq);
404 			switch (ke->pfrke_fb) {
405 			case PFR_FB_CONFLICT:
406 			case PFR_FB_DUPLICATE:
407 			case PFR_FB_NONE:
408 				pfr_destroy_kentry(ke);
409 				break;
410 			case PFR_FB_ADDED:
411 				if (flags & PFR_FLAG_DUMMY)
412 					pfr_destroy_kentry(ke);
413 			}
414 		}
415 	} else
416 		pfr_destroy_ioq(&ioq, flags);
417 
418 	if (nadd != NULL)
419 		*nadd = xadd;
420 
421 	pfr_destroy_ktable(tmpkt, 0);
422 	return (0);
423 _bad:
424 	pfr_destroy_ioq(&ioq, flags);
425 	if (flags & PFR_FLAG_FEEDBACK)
426 		pfr_reset_feedback(addr, size, flags);
427 	pfr_destroy_ktable(tmpkt, 0);
428 	return (rv);
429 }
430 
431 int
432 pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
433     int *ndel, int flags)
434 {
435 	struct pfr_ktable	*kt;
436 	struct pfr_kentryworkq	 workq;
437 	struct pfr_kentry	*p;
438 	struct pfr_addr		 ad;
439 	int			 i, rv, xdel = 0, log = 1;
440 
441 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
442 	if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
443 		return (EINVAL);
444 	kt = pfr_lookup_table(tbl);
445 	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
446 		return (ESRCH);
447 	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
448 		return (EPERM);
449 	/*
450 	 * there are two algorithms to choose from here.
451 	 * with:
452 	 *   n: number of addresses to delete
453 	 *   N: number of addresses in the table
454 	 *
455 	 * one is O(N) and is better for large 'n'
456 	 * one is O(n*LOG(N)) and is better for small 'n'
457 	 *
458 	 * following code try to decide which one is best.
459 	 */
460 	for (i = kt->pfrkt_cnt; i > 0; i >>= 1)
461 		log++;
462 	if (size > kt->pfrkt_cnt/log) {
463 		/* full table scan */
464 		pfr_mark_addrs(kt);
465 	} else {
466 		/* iterate over addresses to delete */
467 		for (i = 0; i < size; i++) {
468 			YIELD(flags & PFR_FLAG_USERIOCTL);
469 			if (COPYIN(addr+i, &ad, sizeof(ad), flags))
470 				return (EFAULT);
471 			if (pfr_validate_addr(&ad))
472 				return (EINVAL);
473 			p = pfr_lookup_addr(kt, &ad, 1);
474 			if (p != NULL)
475 				p->pfrke_flags &= ~PFRKE_FLAG_MARK;
476 		}
477 	}
478 	SLIST_INIT(&workq);
479 	for (i = 0; i < size; i++) {
480 		YIELD(flags & PFR_FLAG_USERIOCTL);
481 		if (COPYIN(addr+i, &ad, sizeof(ad), flags))
482 			senderr(EFAULT);
483 		if (pfr_validate_addr(&ad))
484 			senderr(EINVAL);
485 		p = pfr_lookup_addr(kt, &ad, 1);
486 		if (flags & PFR_FLAG_FEEDBACK) {
487 			if (p == NULL)
488 				ad.pfra_fback = PFR_FB_NONE;
489 			else if ((p->pfrke_flags & PFRKE_FLAG_NOT) !=
490 			    ad.pfra_not)
491 				ad.pfra_fback = PFR_FB_CONFLICT;
492 			else if (p->pfrke_flags & PFRKE_FLAG_MARK)
493 				ad.pfra_fback = PFR_FB_DUPLICATE;
494 			else
495 				ad.pfra_fback = PFR_FB_DELETED;
496 		}
497 		if (p != NULL &&
498 		    (p->pfrke_flags & PFRKE_FLAG_NOT) == ad.pfra_not &&
499 		    !(p->pfrke_flags & PFRKE_FLAG_MARK)) {
500 			p->pfrke_flags |= PFRKE_FLAG_MARK;
501 			SLIST_INSERT_HEAD(&workq, p, pfrke_workq);
502 			xdel++;
503 		}
504 		if (flags & PFR_FLAG_FEEDBACK)
505 			if (COPYOUT(&ad, addr+i, sizeof(ad), flags))
506 				senderr(EFAULT);
507 	}
508 	if (!(flags & PFR_FLAG_DUMMY)) {
509 		pfr_remove_kentries(kt, &workq);
510 	}
511 	if (ndel != NULL)
512 		*ndel = xdel;
513 	return (0);
514 _bad:
515 	if (flags & PFR_FLAG_FEEDBACK)
516 		pfr_reset_feedback(addr, size, flags);
517 	return (rv);
518 }
519 
520 int
521 pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
522     int *size2, int *nadd, int *ndel, int *nchange, int flags,
523     u_int32_t ignore_pfrt_flags)
524 {
525 	struct pfr_ktable	*kt, *tmpkt;
526 	struct pfr_kentryworkq	 addq, delq, changeq;
527 	struct pfr_kentry	*p, *q;
528 	struct pfr_addr		 ad;
529 	int			 i, rv, xadd = 0, xdel = 0, xchange = 0;
530 	time_t			 tzero = gettime();
531 
532 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
533 	if (pfr_validate_table(tbl, ignore_pfrt_flags, flags &
534 	    PFR_FLAG_USERIOCTL))
535 		return (EINVAL);
536 	kt = pfr_lookup_table(tbl);
537 	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
538 		return (ESRCH);
539 	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
540 		return (EPERM);
541 	tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0,
542 	    !(flags & PFR_FLAG_USERIOCTL));
543 	if (tmpkt == NULL)
544 		return (ENOMEM);
545 	pfr_mark_addrs(kt);
546 	SLIST_INIT(&addq);
547 	SLIST_INIT(&delq);
548 	SLIST_INIT(&changeq);
549 	for (i = 0; i < size; i++) {
550 		YIELD(flags & PFR_FLAG_USERIOCTL);
551 		if (COPYIN(addr+i, &ad, sizeof(ad), flags))
552 			senderr(EFAULT);
553 		if (pfr_validate_addr(&ad))
554 			senderr(EINVAL);
555 		ad.pfra_fback = PFR_FB_NONE;
556 		p = pfr_lookup_addr(kt, &ad, 1);
557 		if (p != NULL) {
558 			if (p->pfrke_flags & PFRKE_FLAG_MARK) {
559 				ad.pfra_fback = PFR_FB_DUPLICATE;
560 				goto _skip;
561 			}
562 			p->pfrke_flags |= PFRKE_FLAG_MARK;
563 			if ((p->pfrke_flags & PFRKE_FLAG_NOT) != ad.pfra_not) {
564 				SLIST_INSERT_HEAD(&changeq, p, pfrke_workq);
565 				ad.pfra_fback = PFR_FB_CHANGED;
566 				xchange++;
567 			}
568 		} else {
569 			q = pfr_lookup_addr(tmpkt, &ad, 1);
570 			if (q != NULL) {
571 				ad.pfra_fback = PFR_FB_DUPLICATE;
572 				goto _skip;
573 			}
574 			p = pfr_create_kentry(&ad);
575 			if (p == NULL)
576 				senderr(ENOMEM);
577 			if (pfr_route_kentry(tmpkt, p)) {
578 				pfr_destroy_kentry(p);
579 				ad.pfra_fback = PFR_FB_NONE;
580 				goto _skip;
581 			}
582 			SLIST_INSERT_HEAD(&addq, p, pfrke_workq);
583 			ad.pfra_fback = PFR_FB_ADDED;
584 			xadd++;
585 			if (p->pfrke_type == PFRKE_COST)
586 				kt->pfrkt_refcntcost++;
587 			pfr_ktable_winfo_update(kt, p);
588 		}
589 _skip:
590 		if (flags & PFR_FLAG_FEEDBACK)
591 			if (COPYOUT(&ad, addr+i, sizeof(ad), flags))
592 				senderr(EFAULT);
593 	}
594 	pfr_enqueue_addrs(kt, &delq, &xdel, ENQUEUE_UNMARKED_ONLY);
595 	if ((flags & PFR_FLAG_FEEDBACK) && *size2) {
596 		if (*size2 < size+xdel) {
597 			*size2 = size+xdel;
598 			senderr(0);
599 		}
600 		i = 0;
601 		SLIST_FOREACH(p, &delq, pfrke_workq) {
602 			pfr_copyout_addr(&ad, p);
603 			ad.pfra_fback = PFR_FB_DELETED;
604 			if (COPYOUT(&ad, addr+size+i, sizeof(ad), flags))
605 				senderr(EFAULT);
606 			i++;
607 		}
608 	}
609 	pfr_clean_node_mask(tmpkt, &addq);
610 	if (!(flags & PFR_FLAG_DUMMY)) {
611 		pfr_insert_kentries(kt, &addq, tzero);
612 		pfr_remove_kentries(kt, &delq);
613 		pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG);
614 	} else
615 		pfr_destroy_kentries(&addq);
616 	if (nadd != NULL)
617 		*nadd = xadd;
618 	if (ndel != NULL)
619 		*ndel = xdel;
620 	if (nchange != NULL)
621 		*nchange = xchange;
622 	if ((flags & PFR_FLAG_FEEDBACK) && size2)
623 		*size2 = size+xdel;
624 	pfr_destroy_ktable(tmpkt, 0);
625 	return (0);
626 _bad:
627 	pfr_clean_node_mask(tmpkt, &addq);
628 	pfr_destroy_kentries(&addq);
629 	if (flags & PFR_FLAG_FEEDBACK)
630 		pfr_reset_feedback(addr, size, flags);
631 	pfr_destroy_ktable(tmpkt, 0);
632 	return (rv);
633 }
634 
635 int
636 pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
637 	int *nmatch, int flags)
638 {
639 	struct pfr_ktable	*kt;
640 	struct pfr_kentry	*p;
641 	struct pfr_addr		 ad;
642 	int			 i, xmatch = 0;
643 
644 	ACCEPT_FLAGS(flags, PFR_FLAG_REPLACE);
645 	if (pfr_validate_table(tbl, 0, 0))
646 		return (EINVAL);
647 	kt = pfr_lookup_table(tbl);
648 	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
649 		return (ESRCH);
650 
651 	for (i = 0; i < size; i++) {
652 		YIELD(flags & PFR_FLAG_USERIOCTL);
653 		if (COPYIN(addr+i, &ad, sizeof(ad), flags))
654 			return (EFAULT);
655 		if (pfr_validate_addr(&ad))
656 			return (EINVAL);
657 		if (ADDR_NETWORK(&ad))
658 			return (EINVAL);
659 		p = pfr_lookup_addr(kt, &ad, 0);
660 		if (flags & PFR_FLAG_REPLACE)
661 			pfr_copyout_addr(&ad, p);
662 		ad.pfra_fback = (p == NULL) ? PFR_FB_NONE :
663 		    ((p->pfrke_flags & PFRKE_FLAG_NOT) ?
664 		    PFR_FB_NOTMATCH : PFR_FB_MATCH);
665 		if (p != NULL && !(p->pfrke_flags & PFRKE_FLAG_NOT))
666 			xmatch++;
667 		if (COPYOUT(&ad, addr+i, sizeof(ad), flags))
668 			return (EFAULT);
669 	}
670 	if (nmatch != NULL)
671 		*nmatch = xmatch;
672 	return (0);
673 }
674 
675 int
676 pfr_get_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int *size,
677 	int flags)
678 {
679 	struct pfr_ktable	*kt;
680 	struct pfr_walktree	 w;
681 	int			 rv;
682 
683 	ACCEPT_FLAGS(flags, 0);
684 	if (pfr_validate_table(tbl, 0, 0))
685 		return (EINVAL);
686 	kt = pfr_lookup_table(tbl);
687 	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
688 		return (ESRCH);
689 	if (kt->pfrkt_cnt > *size) {
690 		*size = kt->pfrkt_cnt;
691 		return (0);
692 	}
693 
694 	bzero(&w, sizeof(w));
695 	w.pfrw_op = PFRW_GET_ADDRS;
696 	w.pfrw_addr = addr;
697 	w.pfrw_free = kt->pfrkt_cnt;
698 	w.pfrw_flags = flags;
699 	rv = rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
700 	if (!rv)
701 		rv = rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w);
702 	if (rv)
703 		return (rv);
704 
705 	if (w.pfrw_free) {
706 		DPFPRINTF(LOG_ERR,
707 		    "pfr_get_addrs: corruption detected (%d)", w.pfrw_free);
708 		return (ENOTTY);
709 	}
710 	*size = kt->pfrkt_cnt;
711 	return (0);
712 }
713 
714 int
715 pfr_get_astats(struct pfr_table *tbl, struct pfr_astats *addr, int *size,
716 	int flags)
717 {
718 	struct pfr_ktable	*kt;
719 	struct pfr_walktree	 w;
720 	struct pfr_kentryworkq	 workq;
721 	int			 rv;
722 	time_t			 tzero = gettime();
723 
724 	if (pfr_validate_table(tbl, 0, 0))
725 		return (EINVAL);
726 	kt = pfr_lookup_table(tbl);
727 	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
728 		return (ESRCH);
729 	if (kt->pfrkt_cnt > *size) {
730 		*size = kt->pfrkt_cnt;
731 		return (0);
732 	}
733 
734 	bzero(&w, sizeof(w));
735 	w.pfrw_op = PFRW_GET_ASTATS;
736 	w.pfrw_astats = addr;
737 	w.pfrw_free = kt->pfrkt_cnt;
738 	w.pfrw_flags = flags;
739 	rv = rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
740 	if (!rv)
741 		rv = rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w);
742 	if (!rv && (flags & PFR_FLAG_CLSTATS)) {
743 		pfr_enqueue_addrs(kt, &workq, NULL, 0);
744 		pfr_clstats_kentries(&workq, tzero, 0);
745 	}
746 	if (rv)
747 		return (rv);
748 
749 	if (w.pfrw_free) {
750 		DPFPRINTF(LOG_ERR,
751 		    "pfr_get_astats: corruption detected (%d)", w.pfrw_free);
752 		return (ENOTTY);
753 	}
754 	*size = kt->pfrkt_cnt;
755 	return (0);
756 }
757 
758 int
759 pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size,
760     int *nzero, int flags)
761 {
762 	struct pfr_ktable	*kt;
763 	struct pfr_kentryworkq	 workq;
764 	struct pfr_kentry	*p;
765 	struct pfr_addr		 ad;
766 	int			 i, rv, xzero = 0;
767 
768 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
769 	if (pfr_validate_table(tbl, 0, 0))
770 		return (EINVAL);
771 	kt = pfr_lookup_table(tbl);
772 	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
773 		return (ESRCH);
774 	SLIST_INIT(&workq);
775 	for (i = 0; i < size; i++) {
776 		YIELD(flags & PFR_FLAG_USERIOCTL);
777 		if (COPYIN(addr+i, &ad, sizeof(ad), flags))
778 			senderr(EFAULT);
779 		if (pfr_validate_addr(&ad))
780 			senderr(EINVAL);
781 		p = pfr_lookup_addr(kt, &ad, 1);
782 		if (flags & PFR_FLAG_FEEDBACK) {
783 			ad.pfra_fback = (p != NULL) ?
784 			    PFR_FB_CLEARED : PFR_FB_NONE;
785 			if (COPYOUT(&ad, addr+i, sizeof(ad), flags))
786 				senderr(EFAULT);
787 		}
788 		if (p != NULL) {
789 			SLIST_INSERT_HEAD(&workq, p, pfrke_workq);
790 			xzero++;
791 		}
792 	}
793 
794 	if (!(flags & PFR_FLAG_DUMMY)) {
795 		pfr_clstats_kentries(&workq, gettime(), 0);
796 	}
797 	if (nzero != NULL)
798 		*nzero = xzero;
799 	return (0);
800 _bad:
801 	if (flags & PFR_FLAG_FEEDBACK)
802 		pfr_reset_feedback(addr, size, flags);
803 	return (rv);
804 }
805 
806 int
807 pfr_validate_addr(struct pfr_addr *ad)
808 {
809 	int i;
810 
811 	switch (ad->pfra_af) {
812 	case AF_INET:
813 		if (ad->pfra_net > 32)
814 			return (-1);
815 		break;
816 #ifdef INET6
817 	case AF_INET6:
818 		if (ad->pfra_net > 128)
819 			return (-1);
820 		break;
821 #endif /* INET6 */
822 	default:
823 		return (-1);
824 	}
825 	if (ad->pfra_net < 128 &&
826 		(((caddr_t)ad)[ad->pfra_net/8] & (0xFF >> (ad->pfra_net%8))))
827 			return (-1);
828 	for (i = (ad->pfra_net+7)/8; i < sizeof(ad->pfra_u); i++)
829 		if (((caddr_t)ad)[i])
830 			return (-1);
831 	if (ad->pfra_not && ad->pfra_not != 1)
832 		return (-1);
833 	if (ad->pfra_fback != PFR_FB_NONE)
834 		return (-1);
835 	if (ad->pfra_type >= PFRKE_MAX)
836 		return (-1);
837 	return (0);
838 }
839 
840 void
841 pfr_enqueue_addrs(struct pfr_ktable *kt, struct pfr_kentryworkq *workq,
842 	int *naddr, int sweep)
843 {
844 	struct pfr_walktree	w;
845 
846 	SLIST_INIT(workq);
847 	bzero(&w, sizeof(w));
848 	w.pfrw_op = sweep ? PFRW_SWEEP : PFRW_ENQUEUE;
849 	w.pfrw_workq = workq;
850 	if (kt->pfrkt_ip4 != NULL)
851 		if (rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w))
852 			DPFPRINTF(LOG_ERR,
853 			    "pfr_enqueue_addrs: IPv4 walktree failed.");
854 	if (kt->pfrkt_ip6 != NULL)
855 		if (rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w))
856 			DPFPRINTF(LOG_ERR,
857 			    "pfr_enqueue_addrs: IPv6 walktree failed.");
858 	if (naddr != NULL)
859 		*naddr = w.pfrw_cnt;
860 }
861 
862 void
863 pfr_mark_addrs(struct pfr_ktable *kt)
864 {
865 	struct pfr_walktree	w;
866 
867 	bzero(&w, sizeof(w));
868 	w.pfrw_op = PFRW_MARK;
869 	if (rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w))
870 		DPFPRINTF(LOG_ERR,
871 		    "pfr_mark_addrs: IPv4 walktree failed.");
872 	if (rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w))
873 		DPFPRINTF(LOG_ERR,
874 		    "pfr_mark_addrs: IPv6 walktree failed.");
875 }
876 
877 
878 struct pfr_kentry *
879 pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact)
880 {
881 	union sockaddr_union	 sa, mask;
882 	struct radix_node_head	*head;
883 	struct pfr_kentry	*ke;
884 
885 	bzero(&sa, sizeof(sa));
886 	switch (ad->pfra_af) {
887 	case AF_INET:
888 		FILLIN_SIN(sa.sin, ad->pfra_ip4addr);
889 		head = kt->pfrkt_ip4;
890 		break;
891 #ifdef	INET6
892 	case AF_INET6:
893 		FILLIN_SIN6(sa.sin6, ad->pfra_ip6addr);
894 		head = kt->pfrkt_ip6;
895 		break;
896 #endif	/* INET6 */
897 	default:
898 		unhandled_af(ad->pfra_af);
899 	}
900 	if (ADDR_NETWORK(ad)) {
901 		pfr_prepare_network(&mask, ad->pfra_af, ad->pfra_net);
902 		ke = (struct pfr_kentry *)rn_lookup(&sa, &mask, head);
903 	} else {
904 		ke = (struct pfr_kentry *)rn_match(&sa, head);
905 		if (exact && ke && KENTRY_NETWORK(ke))
906 			ke = NULL;
907 	}
908 	return (ke);
909 }
910 
911 struct pfr_kentry *
912 pfr_lookup_kentry(struct pfr_ktable *kt, struct pfr_kentry *key, int exact)
913 {
914 	union sockaddr_union	 mask;
915 	struct radix_node_head	*head;
916 	struct pfr_kentry	*ke;
917 
918 	switch (key->pfrke_af) {
919 	case AF_INET:
920 		head = kt->pfrkt_ip4;
921 		break;
922 #ifdef	INET6
923 	case AF_INET6:
924 		head = kt->pfrkt_ip6;
925 		break;
926 #endif	/* INET6 */
927 	default:
928 		unhandled_af(key->pfrke_af);
929 	}
930 	if (KENTRY_NETWORK(key)) {
931 		pfr_prepare_network(&mask, key->pfrke_af, key->pfrke_net);
932 		ke = (struct pfr_kentry *)rn_lookup(&key->pfrke_sa, &mask,
933 		    head);
934 	} else {
935 		ke = (struct pfr_kentry *)rn_match(&key->pfrke_sa, head);
936 		if (exact && ke && KENTRY_NETWORK(ke))
937 			ke = NULL;
938 	}
939 	return (ke);
940 }
941 
942 struct pfr_kentry *
943 pfr_create_kentry(struct pfr_addr *ad)
944 {
945 	struct pfr_kentry_all	*ke;
946 
947 	if (ad->pfra_type >= PFRKE_MAX)
948 		panic("unknown pfra_type %d", ad->pfra_type);
949 
950 	ke = pool_get(&pfr_kentry_pl[ad->pfra_type], PR_NOWAIT | PR_ZERO);
951 	if (ke == NULL)
952 		return (NULL);
953 
954 	ke->pfrke_type = ad->pfra_type;
955 
956 	/* set weight allowing implicit weights */
957 	if (ad->pfra_weight == 0)
958 		ad->pfra_weight = 1;
959 
960 	switch (ke->pfrke_type) {
961 	case PFRKE_PLAIN:
962 		break;
963 	case PFRKE_COST:
964 		((struct pfr_kentry_cost *)ke)->weight = ad->pfra_weight;
965 		/* FALLTHROUGH */
966 	case PFRKE_ROUTE:
967 		if (ad->pfra_ifname[0])
968 			ke->pfrke_rkif = pfi_kif_get(ad->pfra_ifname, NULL);
969 		if (ke->pfrke_rkif)
970 			pfi_kif_ref(ke->pfrke_rkif, PFI_KIF_REF_ROUTE);
971 		break;
972 	}
973 
974 	switch (ad->pfra_af) {
975 	case AF_INET:
976 		FILLIN_SIN(ke->pfrke_sa.sin, ad->pfra_ip4addr);
977 		break;
978 #ifdef	INET6
979 	case AF_INET6:
980 		FILLIN_SIN6(ke->pfrke_sa.sin6, ad->pfra_ip6addr);
981 		break;
982 #endif	/* INET6 */
983 	default:
984 		unhandled_af(ad->pfra_af);
985 	}
986 	ke->pfrke_af = ad->pfra_af;
987 	ke->pfrke_net = ad->pfra_net;
988 	if (ad->pfra_not)
989 		ke->pfrke_flags |= PFRKE_FLAG_NOT;
990 	return ((struct pfr_kentry *)ke);
991 }
992 
993 struct pfr_kentry *
994 pfr_create_kentry_unlocked(struct pfr_addr *ad, int flags)
995 {
996 	struct pfr_kentry_all	*ke;
997 	int mflags = PR_ZERO;
998 
999 	if (ad->pfra_type >= PFRKE_MAX)
1000 		panic("unknown pfra_type %d", ad->pfra_type);
1001 
1002 	if (flags & PFR_FLAG_USERIOCTL)
1003 		mflags |= PR_WAITOK;
1004 	else
1005 		mflags |= PR_NOWAIT;
1006 
1007 	ke = pool_get(&pfr_kentry_pl[ad->pfra_type], mflags);
1008 	if (ke == NULL)
1009 		return (NULL);
1010 
1011 	ke->pfrke_type = ad->pfra_type;
1012 
1013 	/* set weight allowing implicit weights */
1014 	if (ad->pfra_weight == 0)
1015 		ad->pfra_weight = 1;
1016 
1017 	switch (ke->pfrke_type) {
1018 	case PFRKE_PLAIN:
1019 		break;
1020 	case PFRKE_COST:
1021 		((struct pfr_kentry_cost *)ke)->weight = ad->pfra_weight;
1022 		/* FALLTHROUGH */
1023 	case PFRKE_ROUTE:
1024 		if (ad->pfra_ifname[0])
1025 			(void) strlcpy(ke->pfrke_rifname, ad->pfra_ifname,
1026 			    IFNAMSIZ);
1027 		break;
1028 	}
1029 
1030 	switch (ad->pfra_af) {
1031 	case AF_INET:
1032 		FILLIN_SIN(ke->pfrke_sa.sin, ad->pfra_ip4addr);
1033 		break;
1034 #ifdef	INET6
1035 	case AF_INET6:
1036 		FILLIN_SIN6(ke->pfrke_sa.sin6, ad->pfra_ip6addr);
1037 		break;
1038 #endif	/* INET6 */
1039 	default:
1040 		unhandled_af(ad->pfra_af);
1041 	}
1042 	ke->pfrke_af = ad->pfra_af;
1043 	ke->pfrke_net = ad->pfra_net;
1044 	if (ad->pfra_not)
1045 		ke->pfrke_flags |= PFRKE_FLAG_NOT;
1046 	return ((struct pfr_kentry *)ke);
1047 }
1048 
1049 void
1050 pfr_kentry_kif_ref(struct pfr_kentry *ke_all)
1051 {
1052 	struct pfr_kentry_all	*ke = (struct pfr_kentry_all *)ke_all;
1053 
1054 	NET_ASSERT_LOCKED();
1055 	switch (ke->pfrke_type) {
1056 	case PFRKE_PLAIN:
1057 		break;
1058 	case PFRKE_COST:
1059 	case PFRKE_ROUTE:
1060 		if (ke->pfrke_rifname[0])
1061 			ke->pfrke_rkif = pfi_kif_get(ke->pfrke_rifname, NULL);
1062 		if (ke->pfrke_rkif)
1063 			pfi_kif_ref(ke->pfrke_rkif, PFI_KIF_REF_ROUTE);
1064 		break;
1065 	}
1066 }
1067 
1068 void
1069 pfr_destroy_kentries(struct pfr_kentryworkq *workq)
1070 {
1071 	struct pfr_kentry	*p;
1072 
1073 	while ((p = SLIST_FIRST(workq)) != NULL) {
1074 		YIELD(1);
1075 		SLIST_REMOVE_HEAD(workq, pfrke_workq);
1076 		pfr_destroy_kentry(p);
1077 	}
1078 }
1079 
1080 void
1081 pfr_destroy_ioq(struct pfr_kentryworkq *ioq, int flags)
1082 {
1083 	struct pfr_kentry	*p;
1084 
1085 	while ((p = SLIST_FIRST(ioq)) != NULL) {
1086 		YIELD(flags & PFR_FLAG_USERIOCTL);
1087 		SLIST_REMOVE_HEAD(ioq, pfrke_ioq);
1088 		/*
1089 		 * we destroy only those entries, which did not make it to
1090 		 * table
1091 		 */
1092 		if ((p->pfrke_fb != PFR_FB_ADDED) || (flags & PFR_FLAG_DUMMY))
1093 			pfr_destroy_kentry(p);
1094 	}
1095 }
1096 
1097 void
1098 pfr_destroy_kentry(struct pfr_kentry *ke)
1099 {
1100 	if (ke->pfrke_counters)
1101 		pool_put(&pfr_kcounters_pl, ke->pfrke_counters);
1102 	if (ke->pfrke_type == PFRKE_COST || ke->pfrke_type == PFRKE_ROUTE)
1103 		pfi_kif_unref(((struct pfr_kentry_all *)ke)->pfrke_rkif,
1104 		    PFI_KIF_REF_ROUTE);
1105 	pool_put(&pfr_kentry_pl[ke->pfrke_type], ke);
1106 }
1107 
1108 void
1109 pfr_insert_kentries(struct pfr_ktable *kt,
1110     struct pfr_kentryworkq *workq, time_t tzero)
1111 {
1112 	struct pfr_kentry	*p;
1113 	int			 rv, n = 0;
1114 
1115 	SLIST_FOREACH(p, workq, pfrke_workq) {
1116 		rv = pfr_route_kentry(kt, p);
1117 		if (rv) {
1118 			DPFPRINTF(LOG_ERR,
1119 			    "pfr_insert_kentries: cannot route entry "
1120 			    "(code=%d).", rv);
1121 			break;
1122 		}
1123 		p->pfrke_tzero = tzero;
1124 		++n;
1125 		if (p->pfrke_type == PFRKE_COST)
1126 			kt->pfrkt_refcntcost++;
1127 		pfr_ktable_winfo_update(kt, p);
1128 		YIELD(1);
1129 	}
1130 	kt->pfrkt_cnt += n;
1131 }
1132 
1133 int
1134 pfr_insert_kentry(struct pfr_ktable *kt, struct pfr_addr *ad, time_t tzero)
1135 {
1136 	struct pfr_kentry	*p;
1137 	int			 rv;
1138 
1139 	p = pfr_lookup_addr(kt, ad, 1);
1140 	if (p != NULL)
1141 		return (0);
1142 	p = pfr_create_kentry(ad);
1143 	if (p == NULL)
1144 		return (EINVAL);
1145 
1146 	rv = pfr_route_kentry(kt, p);
1147 	if (rv)
1148 		return (rv);
1149 
1150 	p->pfrke_tzero = tzero;
1151 	if (p->pfrke_type == PFRKE_COST)
1152 		kt->pfrkt_refcntcost++;
1153 	kt->pfrkt_cnt++;
1154 	pfr_ktable_winfo_update(kt, p);
1155 
1156 	return (0);
1157 }
1158 
1159 void
1160 pfr_remove_kentries(struct pfr_ktable *kt,
1161     struct pfr_kentryworkq *workq)
1162 {
1163 	struct pfr_kentry	*p;
1164 	struct pfr_kentryworkq   addrq;
1165 	int			 n = 0;
1166 
1167 	SLIST_FOREACH(p, workq, pfrke_workq) {
1168 		pfr_unroute_kentry(kt, p);
1169 		++n;
1170 		YIELD(1);
1171 		if (p->pfrke_type == PFRKE_COST)
1172 			kt->pfrkt_refcntcost--;
1173 	}
1174 	kt->pfrkt_cnt -= n;
1175 	pfr_destroy_kentries(workq);
1176 
1177 	/* update maxweight and gcd for load balancing */
1178 	if (kt->pfrkt_refcntcost > 0) {
1179 		kt->pfrkt_gcdweight = 0;
1180 		kt->pfrkt_maxweight = 1;
1181 		pfr_enqueue_addrs(kt, &addrq, NULL, 0);
1182 		SLIST_FOREACH(p, &addrq, pfrke_workq)
1183 			pfr_ktable_winfo_update(kt, p);
1184 	}
1185 }
1186 
1187 void
1188 pfr_clean_node_mask(struct pfr_ktable *kt,
1189     struct pfr_kentryworkq *workq)
1190 {
1191 	struct pfr_kentry	*p;
1192 
1193 	SLIST_FOREACH(p, workq, pfrke_workq) {
1194 		pfr_unroute_kentry(kt, p);
1195 	}
1196 }
1197 
1198 void
1199 pfr_clstats_kentries(struct pfr_kentryworkq *workq, time_t tzero, int negchange)
1200 {
1201 	struct pfr_kentry	*p;
1202 
1203 	SLIST_FOREACH(p, workq, pfrke_workq) {
1204 		if (negchange)
1205 			p->pfrke_flags ^= PFRKE_FLAG_NOT;
1206 		if (p->pfrke_counters) {
1207 			pool_put(&pfr_kcounters_pl, p->pfrke_counters);
1208 			p->pfrke_counters = NULL;
1209 		}
1210 		p->pfrke_tzero = tzero;
1211 	}
1212 }
1213 
1214 void
1215 pfr_reset_feedback(struct pfr_addr *addr, int size, int flags)
1216 {
1217 	struct pfr_addr	ad;
1218 	int		i;
1219 
1220 	for (i = 0; i < size; i++) {
1221 		YIELD(flags & PFR_FLAG_USERIOCTL);
1222 		if (COPYIN(addr+i, &ad, sizeof(ad), flags))
1223 			break;
1224 		ad.pfra_fback = PFR_FB_NONE;
1225 		if (COPYOUT(&ad, addr+i, sizeof(ad), flags))
1226 			break;
1227 	}
1228 }
1229 
1230 void
1231 pfr_prepare_network(union sockaddr_union *sa, int af, int net)
1232 {
1233 #ifdef	INET6
1234 	int	i;
1235 #endif	/* INET6 */
1236 
1237 	bzero(sa, sizeof(*sa));
1238 	switch (af) {
1239 	case AF_INET:
1240 		sa->sin.sin_len = sizeof(sa->sin);
1241 		sa->sin.sin_family = AF_INET;
1242 		sa->sin.sin_addr.s_addr = net ? htonl(-1 << (32-net)) : 0;
1243 		break;
1244 #ifdef	INET6
1245 	case AF_INET6:
1246 		sa->sin6.sin6_len = sizeof(sa->sin6);
1247 		sa->sin6.sin6_family = AF_INET6;
1248 		for (i = 0; i < 4; i++) {
1249 			if (net <= 32) {
1250 				sa->sin6.sin6_addr.s6_addr32[i] =
1251 				    net ? htonl(-1 << (32-net)) : 0;
1252 				break;
1253 			}
1254 			sa->sin6.sin6_addr.s6_addr32[i] = 0xFFFFFFFF;
1255 			net -= 32;
1256 		}
1257 		break;
1258 #endif	/* INET6 */
1259 	default:
1260 		unhandled_af(af);
1261 	}
1262 }
1263 
1264 int
1265 pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
1266 {
1267 	union sockaddr_union	 mask;
1268 	struct radix_node	*rn;
1269 	struct radix_node_head	*head;
1270 
1271 	bzero(ke->pfrke_node, sizeof(ke->pfrke_node));
1272 	switch (ke->pfrke_af) {
1273 	case AF_INET:
1274 		head = kt->pfrkt_ip4;
1275 		break;
1276 #ifdef	INET6
1277 	case AF_INET6:
1278 		head = kt->pfrkt_ip6;
1279 		break;
1280 #endif	/* INET6 */
1281 	default:
1282 		unhandled_af(ke->pfrke_af);
1283 	}
1284 
1285 	if (KENTRY_NETWORK(ke)) {
1286 		pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net);
1287 		rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node, 0);
1288 	} else
1289 		rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node, 0);
1290 
1291 	return (rn == NULL ? -1 : 0);
1292 }
1293 
1294 int
1295 pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
1296 {
1297 	union sockaddr_union	 mask;
1298 	struct radix_node	*rn;
1299 	struct radix_node_head	*head;
1300 
1301 	switch (ke->pfrke_af) {
1302 	case AF_INET:
1303 		head = kt->pfrkt_ip4;
1304 		break;
1305 #ifdef	INET6
1306 	case AF_INET6:
1307 		head = kt->pfrkt_ip6;
1308 		break;
1309 #endif	/* INET6 */
1310 	default:
1311 		unhandled_af(ke->pfrke_af);
1312 	}
1313 
1314 	if (KENTRY_NETWORK(ke)) {
1315 		pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net);
1316 		rn = rn_delete(&ke->pfrke_sa, &mask, head, NULL);
1317 	} else
1318 		rn = rn_delete(&ke->pfrke_sa, NULL, head, NULL);
1319 
1320 	if (rn == NULL) {
1321 		DPFPRINTF(LOG_ERR, "pfr_unroute_kentry: delete failed.\n");
1322 		return (-1);
1323 	}
1324 	return (0);
1325 }
1326 
1327 void
1328 pfr_copyout_addr(struct pfr_addr *ad, struct pfr_kentry *ke)
1329 {
1330 	bzero(ad, sizeof(*ad));
1331 	if (ke == NULL)
1332 		return;
1333 	ad->pfra_af = ke->pfrke_af;
1334 	ad->pfra_net = ke->pfrke_net;
1335 	ad->pfra_type = ke->pfrke_type;
1336 	if (ke->pfrke_flags & PFRKE_FLAG_NOT)
1337 		ad->pfra_not = 1;
1338 
1339 	switch (ad->pfra_af) {
1340 	case AF_INET:
1341 		ad->pfra_ip4addr = ke->pfrke_sa.sin.sin_addr;
1342 		break;
1343 #ifdef	INET6
1344 	case AF_INET6:
1345 		ad->pfra_ip6addr = ke->pfrke_sa.sin6.sin6_addr;
1346 		break;
1347 #endif	/* INET6 */
1348 	default:
1349 		unhandled_af(ad->pfra_af);
1350 	}
1351 	if (ke->pfrke_counters != NULL)
1352 		ad->pfra_states = ke->pfrke_counters->states;
1353 	switch (ke->pfrke_type) {
1354 	case PFRKE_COST:
1355 		ad->pfra_weight = ((struct pfr_kentry_cost *)ke)->weight;
1356 		/* FALLTHROUGH */
1357 	case PFRKE_ROUTE:
1358 		if (((struct pfr_kentry_route *)ke)->kif != NULL)
1359 			strlcpy(ad->pfra_ifname,
1360 			    ((struct pfr_kentry_route *)ke)->kif->pfik_name,
1361 			    IFNAMSIZ);
1362 		break;
1363 	default:
1364 		break;
1365 	}
1366 }
1367 
1368 int
1369 pfr_walktree(struct radix_node *rn, void *arg, u_int id)
1370 {
1371 	struct pfr_kentry	*ke = (struct pfr_kentry *)rn;
1372 	struct pfr_walktree	*w = arg;
1373 	union sockaddr_union	 mask;
1374 	int			 flags = w->pfrw_flags;
1375 
1376 	switch (w->pfrw_op) {
1377 	case PFRW_MARK:
1378 		ke->pfrke_flags &= ~PFRKE_FLAG_MARK;
1379 		break;
1380 	case PFRW_SWEEP:
1381 		if (ke->pfrke_flags & PFRKE_FLAG_MARK)
1382 			break;
1383 		/* FALLTHROUGH */
1384 	case PFRW_ENQUEUE:
1385 		SLIST_INSERT_HEAD(w->pfrw_workq, ke, pfrke_workq);
1386 		w->pfrw_cnt++;
1387 		break;
1388 	case PFRW_GET_ADDRS:
1389 		if (w->pfrw_free-- > 0) {
1390 			struct pfr_addr ad;
1391 
1392 			pfr_copyout_addr(&ad, ke);
1393 			if (copyout(&ad, w->pfrw_addr, sizeof(ad)))
1394 				return (EFAULT);
1395 			w->pfrw_addr++;
1396 		}
1397 		break;
1398 	case PFRW_GET_ASTATS:
1399 		if (w->pfrw_free-- > 0) {
1400 			struct pfr_astats as;
1401 
1402 			pfr_copyout_addr(&as.pfras_a, ke);
1403 
1404 			if (ke->pfrke_counters) {
1405 				bcopy(ke->pfrke_counters->pfrkc_packets,
1406 				    as.pfras_packets, sizeof(as.pfras_packets));
1407 				bcopy(ke->pfrke_counters->pfrkc_bytes,
1408 				    as.pfras_bytes, sizeof(as.pfras_bytes));
1409 			} else {
1410 				bzero(as.pfras_packets,
1411 				    sizeof(as.pfras_packets));
1412 				bzero(as.pfras_bytes, sizeof(as.pfras_bytes));
1413 				as.pfras_a.pfra_fback = PFR_FB_NOCOUNT;
1414 			}
1415 			as.pfras_tzero = ke->pfrke_tzero;
1416 
1417 			if (COPYOUT(&as, w->pfrw_astats, sizeof(as), flags))
1418 				return (EFAULT);
1419 			w->pfrw_astats++;
1420 		}
1421 		break;
1422 	case PFRW_POOL_GET:
1423 		if (ke->pfrke_flags & PFRKE_FLAG_NOT)
1424 			break; /* negative entries are ignored */
1425 		if (!w->pfrw_cnt--) {
1426 			w->pfrw_kentry = ke;
1427 			return (1); /* finish search */
1428 		}
1429 		break;
1430 	case PFRW_DYNADDR_UPDATE:
1431 		switch (ke->pfrke_af) {
1432 		case AF_INET:
1433 			if (w->pfrw_dyn->pfid_acnt4++ > 0)
1434 				break;
1435 			pfr_prepare_network(&mask, AF_INET, ke->pfrke_net);
1436 			w->pfrw_dyn->pfid_addr4 = *SUNION2PF(
1437 			    &ke->pfrke_sa, AF_INET);
1438 			w->pfrw_dyn->pfid_mask4 = *SUNION2PF(
1439 			    &mask, AF_INET);
1440 			break;
1441 #ifdef	INET6
1442 		case AF_INET6:
1443 			if (w->pfrw_dyn->pfid_acnt6++ > 0)
1444 				break;
1445 			pfr_prepare_network(&mask, AF_INET6, ke->pfrke_net);
1446 			w->pfrw_dyn->pfid_addr6 = *SUNION2PF(
1447 			    &ke->pfrke_sa, AF_INET6);
1448 			w->pfrw_dyn->pfid_mask6 = *SUNION2PF(
1449 			    &mask, AF_INET6);
1450 			break;
1451 #endif	/* INET6 */
1452 		default:
1453 			unhandled_af(ke->pfrke_af);
1454 		}
1455 		break;
1456 	}
1457 	return (0);
1458 }
1459 
1460 int
1461 pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags)
1462 {
1463 	struct pfr_ktableworkq	 workq;
1464 	struct pfr_ktable	*p;
1465 	int			 xdel = 0;
1466 
1467 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ALLRSETS);
1468 	if (pfr_fix_anchor(filter->pfrt_anchor))
1469 		return (EINVAL);
1470 	if (pfr_table_count(filter, flags) < 0)
1471 		return (ENOENT);
1472 
1473 	SLIST_INIT(&workq);
1474 	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
1475 		if (pfr_skip_table(filter, p, flags))
1476 			continue;
1477 		if (!strcmp(p->pfrkt_anchor, PF_RESERVED_ANCHOR))
1478 			continue;
1479 		if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE))
1480 			continue;
1481 		p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE;
1482 		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
1483 		xdel++;
1484 	}
1485 	if (!(flags & PFR_FLAG_DUMMY)) {
1486 		pfr_setflags_ktables(&workq);
1487 	}
1488 	if (ndel != NULL)
1489 		*ndel = xdel;
1490 	return (0);
1491 }
1492 
1493 int
1494 pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags)
1495 {
1496 	struct pfr_ktableworkq	 addq, changeq;
1497 	struct pfr_ktable	*p, *q, *r, key;
1498 	int			 i, rv, xadd = 0;
1499 	time_t			 tzero = gettime();
1500 
1501 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
1502 	SLIST_INIT(&addq);
1503 	SLIST_INIT(&changeq);
1504 	for (i = 0; i < size; i++) {
1505 		YIELD(flags & PFR_FLAG_USERIOCTL);
1506 		if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags))
1507 			senderr(EFAULT);
1508 		if (pfr_validate_table(&key.pfrkt_t, PFR_TFLAG_USRMASK,
1509 		    flags & PFR_FLAG_USERIOCTL))
1510 			senderr(EINVAL);
1511 		key.pfrkt_flags |= PFR_TFLAG_ACTIVE;
1512 		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
1513 		if (p == NULL) {
1514 			p = pfr_create_ktable(&key.pfrkt_t, tzero, 1,
1515 			    !(flags & PFR_FLAG_USERIOCTL));
1516 			if (p == NULL)
1517 				senderr(ENOMEM);
1518 			SLIST_FOREACH(q, &addq, pfrkt_workq) {
1519 				if (!pfr_ktable_compare(p, q)) {
1520 					pfr_destroy_ktable(p, 0);
1521 					goto _skip;
1522 				}
1523 			}
1524 			SLIST_INSERT_HEAD(&addq, p, pfrkt_workq);
1525 			xadd++;
1526 			if (!key.pfrkt_anchor[0])
1527 				goto _skip;
1528 
1529 			/* find or create root table */
1530 			bzero(key.pfrkt_anchor, sizeof(key.pfrkt_anchor));
1531 			r = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
1532 			if (r != NULL) {
1533 				p->pfrkt_root = r;
1534 				goto _skip;
1535 			}
1536 			SLIST_FOREACH(q, &addq, pfrkt_workq) {
1537 				if (!pfr_ktable_compare(&key, q)) {
1538 					p->pfrkt_root = q;
1539 					goto _skip;
1540 				}
1541 			}
1542 			key.pfrkt_flags = 0;
1543 			r = pfr_create_ktable(&key.pfrkt_t, 0, 1,
1544 			    !(flags & PFR_FLAG_USERIOCTL));
1545 			if (r == NULL)
1546 				senderr(ENOMEM);
1547 			SLIST_INSERT_HEAD(&addq, r, pfrkt_workq);
1548 			p->pfrkt_root = r;
1549 		} else if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
1550 			SLIST_FOREACH(q, &changeq, pfrkt_workq)
1551 				if (!pfr_ktable_compare(&key, q))
1552 					goto _skip;
1553 			p->pfrkt_nflags = (p->pfrkt_flags &
1554 			    ~PFR_TFLAG_USRMASK) | key.pfrkt_flags;
1555 			SLIST_INSERT_HEAD(&changeq, p, pfrkt_workq);
1556 			xadd++;
1557 		}
1558 _skip:
1559 	;
1560 	}
1561 	if (!(flags & PFR_FLAG_DUMMY)) {
1562 		pfr_insert_ktables(&addq);
1563 		pfr_setflags_ktables(&changeq);
1564 	} else
1565 		 pfr_destroy_ktables(&addq, 0);
1566 	if (nadd != NULL)
1567 		*nadd = xadd;
1568 	return (0);
1569 _bad:
1570 	pfr_destroy_ktables(&addq, 0);
1571 	return (rv);
1572 }
1573 
1574 int
1575 pfr_del_tables(struct pfr_table *tbl, int size, int *ndel, int flags)
1576 {
1577 	struct pfr_ktableworkq	 workq;
1578 	struct pfr_ktable	*p, *q, key;
1579 	int			 i, xdel = 0;
1580 
1581 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
1582 	SLIST_INIT(&workq);
1583 	for (i = 0; i < size; i++) {
1584 		YIELD(flags & PFR_FLAG_USERIOCTL);
1585 		if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags))
1586 			return (EFAULT);
1587 		if (pfr_validate_table(&key.pfrkt_t, 0,
1588 		    flags & PFR_FLAG_USERIOCTL))
1589 			return (EINVAL);
1590 		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
1591 		if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
1592 			SLIST_FOREACH(q, &workq, pfrkt_workq)
1593 				if (!pfr_ktable_compare(p, q))
1594 					goto _skip;
1595 			p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE;
1596 			SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
1597 			xdel++;
1598 		}
1599 _skip:
1600 	;
1601 	}
1602 
1603 	if (!(flags & PFR_FLAG_DUMMY)) {
1604 		pfr_setflags_ktables(&workq);
1605 	}
1606 	if (ndel != NULL)
1607 		*ndel = xdel;
1608 	return (0);
1609 }
1610 
1611 int
1612 pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size,
1613 	int flags)
1614 {
1615 	struct pfr_ktable	*p;
1616 	int			 n, nn;
1617 
1618 	ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS);
1619 	if (pfr_fix_anchor(filter->pfrt_anchor))
1620 		return (EINVAL);
1621 	n = nn = pfr_table_count(filter, flags);
1622 	if (n < 0)
1623 		return (ENOENT);
1624 	if (n > *size) {
1625 		*size = n;
1626 		return (0);
1627 	}
1628 	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
1629 		if (pfr_skip_table(filter, p, flags))
1630 			continue;
1631 		if (n-- <= 0)
1632 			continue;
1633 		if (COPYOUT(&p->pfrkt_t, tbl++, sizeof(*tbl), flags))
1634 			return (EFAULT);
1635 	}
1636 	if (n) {
1637 		DPFPRINTF(LOG_ERR,
1638 		    "pfr_get_tables: corruption detected (%d).", n);
1639 		return (ENOTTY);
1640 	}
1641 	*size = nn;
1642 	return (0);
1643 }
1644 
1645 int
1646 pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size,
1647 	int flags)
1648 {
1649 	struct pfr_ktable	*p;
1650 	struct pfr_ktableworkq	 workq;
1651 	int			 n, nn;
1652 	time_t			 tzero = gettime();
1653 
1654 	/* XXX PFR_FLAG_CLSTATS disabled */
1655 	ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS);
1656 	if (pfr_fix_anchor(filter->pfrt_anchor))
1657 		return (EINVAL);
1658 	n = nn = pfr_table_count(filter, flags);
1659 	if (n < 0)
1660 		return (ENOENT);
1661 	if (n > *size) {
1662 		*size = n;
1663 		return (0);
1664 	}
1665 	SLIST_INIT(&workq);
1666 	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
1667 		if (pfr_skip_table(filter, p, flags))
1668 			continue;
1669 		if (n-- <= 0)
1670 			continue;
1671 		if (COPYOUT(&p->pfrkt_ts, tbl++, sizeof(*tbl), flags))
1672 			return (EFAULT);
1673 		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
1674 	}
1675 	if (flags & PFR_FLAG_CLSTATS)
1676 		pfr_clstats_ktables(&workq, tzero,
1677 		    flags & PFR_FLAG_ADDRSTOO);
1678 	if (n) {
1679 		DPFPRINTF(LOG_ERR,
1680 		    "pfr_get_tstats: corruption detected (%d).", n);
1681 		return (ENOTTY);
1682 	}
1683 	*size = nn;
1684 	return (0);
1685 }
1686 
1687 int
1688 pfr_clr_tstats(struct pfr_table *tbl, int size, int *nzero, int flags)
1689 {
1690 	struct pfr_ktableworkq	 workq;
1691 	struct pfr_ktable	*p, key;
1692 	int			 i, xzero = 0;
1693 	time_t			 tzero = gettime();
1694 
1695 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ADDRSTOO);
1696 	SLIST_INIT(&workq);
1697 	for (i = 0; i < size; i++) {
1698 		YIELD(flags & PFR_FLAG_USERIOCTL);
1699 		if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags))
1700 			return (EFAULT);
1701 		if (pfr_validate_table(&key.pfrkt_t, 0, 0))
1702 			return (EINVAL);
1703 		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
1704 		if (p != NULL) {
1705 			SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
1706 			xzero++;
1707 		}
1708 	}
1709 	if (!(flags & PFR_FLAG_DUMMY)) {
1710 		pfr_clstats_ktables(&workq, tzero, flags & PFR_FLAG_ADDRSTOO);
1711 	}
1712 	if (nzero != NULL)
1713 		*nzero = xzero;
1714 	return (0);
1715 }
1716 
1717 int
1718 pfr_set_tflags(struct pfr_table *tbl, int size, int setflag, int clrflag,
1719 	int *nchange, int *ndel, int flags)
1720 {
1721 	struct pfr_ktableworkq	 workq;
1722 	struct pfr_ktable	*p, *q, key;
1723 	int			 i, xchange = 0, xdel = 0;
1724 
1725 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
1726 	if ((setflag & ~PFR_TFLAG_USRMASK) ||
1727 	    (clrflag & ~PFR_TFLAG_USRMASK) ||
1728 	    (setflag & clrflag))
1729 		return (EINVAL);
1730 	SLIST_INIT(&workq);
1731 	for (i = 0; i < size; i++) {
1732 		YIELD(flags & PFR_FLAG_USERIOCTL);
1733 		if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags))
1734 			return (EFAULT);
1735 		if (pfr_validate_table(&key.pfrkt_t, 0,
1736 		    flags & PFR_FLAG_USERIOCTL))
1737 			return (EINVAL);
1738 		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
1739 		if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
1740 			p->pfrkt_nflags = (p->pfrkt_flags | setflag) &
1741 			    ~clrflag;
1742 			if (p->pfrkt_nflags == p->pfrkt_flags)
1743 				goto _skip;
1744 			SLIST_FOREACH(q, &workq, pfrkt_workq)
1745 				if (!pfr_ktable_compare(p, q))
1746 					goto _skip;
1747 			SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
1748 			if ((p->pfrkt_flags & PFR_TFLAG_PERSIST) &&
1749 			    (clrflag & PFR_TFLAG_PERSIST) &&
1750 			    !(p->pfrkt_flags & PFR_TFLAG_REFERENCED))
1751 				xdel++;
1752 			else
1753 				xchange++;
1754 		}
1755 _skip:
1756 	;
1757 	}
1758 	if (!(flags & PFR_FLAG_DUMMY)) {
1759 		pfr_setflags_ktables(&workq);
1760 	}
1761 	if (nchange != NULL)
1762 		*nchange = xchange;
1763 	if (ndel != NULL)
1764 		*ndel = xdel;
1765 	return (0);
1766 }
1767 
1768 int
1769 pfr_ina_begin(struct pfr_table *trs, u_int32_t *ticket, int *ndel, int flags)
1770 {
1771 	struct pfr_ktableworkq	 workq;
1772 	struct pfr_ktable	*p;
1773 	struct pf_ruleset	*rs;
1774 	int			 xdel = 0;
1775 
1776 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
1777 	rs = pf_find_or_create_ruleset(trs->pfrt_anchor);
1778 	if (rs == NULL)
1779 		return (ENOMEM);
1780 	SLIST_INIT(&workq);
1781 	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
1782 		if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) ||
1783 		    pfr_skip_table(trs, p, 0))
1784 			continue;
1785 		p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_INACTIVE;
1786 		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
1787 		xdel++;
1788 	}
1789 	if (!(flags & PFR_FLAG_DUMMY)) {
1790 		pfr_setflags_ktables(&workq);
1791 		if (ticket != NULL)
1792 			*ticket = ++rs->tticket;
1793 		rs->topen = 1;
1794 	} else
1795 		pf_remove_if_empty_ruleset(rs);
1796 	if (ndel != NULL)
1797 		*ndel = xdel;
1798 	return (0);
1799 }
1800 
1801 int
1802 pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size,
1803     int *nadd, int *naddr, u_int32_t ticket, int flags)
1804 {
1805 	struct pfr_ktableworkq	 tableq;
1806 	struct pfr_kentryworkq	 addrq;
1807 	struct pfr_ktable	*kt, *rt, *shadow, key;
1808 	struct pfr_kentry	*p;
1809 	struct pfr_addr		 ad;
1810 	struct pf_ruleset	*rs;
1811 	int			 i, rv, xadd = 0, xaddr = 0;
1812 
1813 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ADDRSTOO);
1814 	if (size && !(flags & PFR_FLAG_ADDRSTOO))
1815 		return (EINVAL);
1816 	if (pfr_validate_table(tbl, PFR_TFLAG_USRMASK,
1817 	    flags & PFR_FLAG_USERIOCTL))
1818 		return (EINVAL);
1819 	rs = pf_find_ruleset(tbl->pfrt_anchor);
1820 	if (rs == NULL || !rs->topen || ticket != rs->tticket)
1821 		return (EBUSY);
1822 	tbl->pfrt_flags |= PFR_TFLAG_INACTIVE;
1823 	SLIST_INIT(&tableq);
1824 	kt = RB_FIND(pfr_ktablehead, &pfr_ktables, (struct pfr_ktable *)tbl);
1825 	if (kt == NULL) {
1826 		kt = pfr_create_ktable(tbl, 0, 1,
1827 		    !(flags & PFR_FLAG_USERIOCTL));
1828 		if (kt == NULL)
1829 			return (ENOMEM);
1830 		SLIST_INSERT_HEAD(&tableq, kt, pfrkt_workq);
1831 		xadd++;
1832 		if (!tbl->pfrt_anchor[0])
1833 			goto _skip;
1834 
1835 		/* find or create root table */
1836 		bzero(&key, sizeof(key));
1837 		strlcpy(key.pfrkt_name, tbl->pfrt_name, sizeof(key.pfrkt_name));
1838 		rt = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
1839 		if (rt != NULL) {
1840 			kt->pfrkt_root = rt;
1841 			goto _skip;
1842 		}
1843 		rt = pfr_create_ktable(&key.pfrkt_t, 0, 1,
1844 		    !(flags & PFR_FLAG_USERIOCTL));
1845 		if (rt == NULL) {
1846 			pfr_destroy_ktables(&tableq, 0);
1847 			return (ENOMEM);
1848 		}
1849 		SLIST_INSERT_HEAD(&tableq, rt, pfrkt_workq);
1850 		kt->pfrkt_root = rt;
1851 	} else if (!(kt->pfrkt_flags & PFR_TFLAG_INACTIVE))
1852 		xadd++;
1853 _skip:
1854 	shadow = pfr_create_ktable(tbl, 0, 0, !(flags & PFR_FLAG_USERIOCTL));
1855 	if (shadow == NULL) {
1856 		pfr_destroy_ktables(&tableq, 0);
1857 		return (ENOMEM);
1858 	}
1859 	SLIST_INIT(&addrq);
1860 	for (i = 0; i < size; i++) {
1861 		YIELD(flags & PFR_FLAG_USERIOCTL);
1862 		if (COPYIN(addr+i, &ad, sizeof(ad), flags))
1863 			senderr(EFAULT);
1864 		if (pfr_validate_addr(&ad))
1865 			senderr(EINVAL);
1866 		if (pfr_lookup_addr(shadow, &ad, 1) != NULL)
1867 			continue;
1868 		p = pfr_create_kentry(&ad);
1869 		if (p == NULL)
1870 			senderr(ENOMEM);
1871 		if (pfr_route_kentry(shadow, p)) {
1872 			pfr_destroy_kentry(p);
1873 			continue;
1874 		}
1875 		SLIST_INSERT_HEAD(&addrq, p, pfrke_workq);
1876 		xaddr++;
1877 		if (p->pfrke_type == PFRKE_COST)
1878 			kt->pfrkt_refcntcost++;
1879 		pfr_ktable_winfo_update(kt, p);
1880 	}
1881 	if (!(flags & PFR_FLAG_DUMMY)) {
1882 		if (kt->pfrkt_shadow != NULL)
1883 			pfr_destroy_ktable(kt->pfrkt_shadow, 1);
1884 		kt->pfrkt_flags |= PFR_TFLAG_INACTIVE;
1885 		pfr_insert_ktables(&tableq);
1886 		shadow->pfrkt_cnt = (flags & PFR_FLAG_ADDRSTOO) ?
1887 		    xaddr : NO_ADDRESSES;
1888 		kt->pfrkt_shadow = shadow;
1889 	} else {
1890 		pfr_clean_node_mask(shadow, &addrq);
1891 		pfr_destroy_ktable(shadow, 0);
1892 		pfr_destroy_ktables(&tableq, 0);
1893 		pfr_destroy_kentries(&addrq);
1894 	}
1895 	if (nadd != NULL)
1896 		*nadd = xadd;
1897 	if (naddr != NULL)
1898 		*naddr = xaddr;
1899 	return (0);
1900 _bad:
1901 	pfr_destroy_ktable(shadow, 0);
1902 	pfr_destroy_ktables(&tableq, 0);
1903 	pfr_destroy_kentries(&addrq);
1904 	return (rv);
1905 }
1906 
1907 int
1908 pfr_ina_rollback(struct pfr_table *trs, u_int32_t ticket, int *ndel, int flags)
1909 {
1910 	struct pfr_ktableworkq	 workq;
1911 	struct pfr_ktable	*p;
1912 	struct pf_ruleset	*rs;
1913 	int			 xdel = 0;
1914 
1915 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
1916 	rs = pf_find_ruleset(trs->pfrt_anchor);
1917 	if (rs == NULL || !rs->topen || ticket != rs->tticket)
1918 		return (0);
1919 	SLIST_INIT(&workq);
1920 	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
1921 		if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) ||
1922 		    pfr_skip_table(trs, p, 0))
1923 			continue;
1924 		p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_INACTIVE;
1925 		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
1926 		xdel++;
1927 	}
1928 	if (!(flags & PFR_FLAG_DUMMY)) {
1929 		pfr_setflags_ktables(&workq);
1930 		rs->topen = 0;
1931 		pf_remove_if_empty_ruleset(rs);
1932 	}
1933 	if (ndel != NULL)
1934 		*ndel = xdel;
1935 	return (0);
1936 }
1937 
1938 int
1939 pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd,
1940     int *nchange, int flags)
1941 {
1942 	struct pfr_ktable	*p, *q;
1943 	struct pfr_ktableworkq	 workq;
1944 	struct pf_ruleset	*rs;
1945 	int			 xadd = 0, xchange = 0;
1946 	time_t			 tzero = gettime();
1947 
1948 	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
1949 	rs = pf_find_ruleset(trs->pfrt_anchor);
1950 	if (rs == NULL || !rs->topen || ticket != rs->tticket)
1951 		return (EBUSY);
1952 
1953 	SLIST_INIT(&workq);
1954 	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
1955 		if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) ||
1956 		    pfr_skip_table(trs, p, 0))
1957 			continue;
1958 		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
1959 		if (p->pfrkt_flags & PFR_TFLAG_ACTIVE)
1960 			xchange++;
1961 		else
1962 			xadd++;
1963 	}
1964 
1965 	if (!(flags & PFR_FLAG_DUMMY)) {
1966 		SLIST_FOREACH_SAFE(p, &workq, pfrkt_workq, q) {
1967 			pfr_commit_ktable(p, tzero);
1968 		}
1969 		rs->topen = 0;
1970 		pf_remove_if_empty_ruleset(rs);
1971 	}
1972 	if (nadd != NULL)
1973 		*nadd = xadd;
1974 	if (nchange != NULL)
1975 		*nchange = xchange;
1976 
1977 	return (0);
1978 }
1979 
1980 void
1981 pfr_commit_ktable(struct pfr_ktable *kt, time_t tzero)
1982 {
1983 	struct pfr_ktable	*shadow = kt->pfrkt_shadow;
1984 	int			 nflags;
1985 
1986 	if (shadow->pfrkt_cnt == NO_ADDRESSES) {
1987 		if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
1988 			pfr_clstats_ktable(kt, tzero, 1);
1989 	} else if (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) {
1990 		/* kt might contain addresses */
1991 		struct pfr_kentryworkq	 addrq, addq, changeq, delq, garbageq;
1992 		struct pfr_kentry	*p, *q;
1993 		struct pfr_addr		 ad;
1994 
1995 		pfr_enqueue_addrs(shadow, &addrq, NULL, 0);
1996 		pfr_mark_addrs(kt);
1997 		SLIST_INIT(&addq);
1998 		SLIST_INIT(&changeq);
1999 		SLIST_INIT(&delq);
2000 		SLIST_INIT(&garbageq);
2001 		pfr_clean_node_mask(shadow, &addrq);
2002 		while ((p = SLIST_FIRST(&addrq)) != NULL) {
2003 			SLIST_REMOVE_HEAD(&addrq, pfrke_workq);
2004 			pfr_copyout_addr(&ad, p);
2005 			q = pfr_lookup_addr(kt, &ad, 1);
2006 			if (q != NULL) {
2007 				if ((q->pfrke_flags & PFRKE_FLAG_NOT) !=
2008 				    (p->pfrke_flags & PFRKE_FLAG_NOT))
2009 					SLIST_INSERT_HEAD(&changeq, q,
2010 					    pfrke_workq);
2011 				q->pfrke_flags |= PFRKE_FLAG_MARK;
2012 				SLIST_INSERT_HEAD(&garbageq, p, pfrke_workq);
2013 			} else {
2014 				p->pfrke_tzero = tzero;
2015 				SLIST_INSERT_HEAD(&addq, p, pfrke_workq);
2016 			}
2017 		}
2018 		pfr_enqueue_addrs(kt, &delq, NULL, ENQUEUE_UNMARKED_ONLY);
2019 		pfr_insert_kentries(kt, &addq, tzero);
2020 		pfr_remove_kentries(kt, &delq);
2021 		pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG);
2022 		pfr_destroy_kentries(&garbageq);
2023 	} else {
2024 		/* kt cannot contain addresses */
2025 		SWAP(struct radix_node_head *, kt->pfrkt_ip4,
2026 		    shadow->pfrkt_ip4);
2027 		SWAP(struct radix_node_head *, kt->pfrkt_ip6,
2028 		    shadow->pfrkt_ip6);
2029 		SWAP(int, kt->pfrkt_cnt, shadow->pfrkt_cnt);
2030 		pfr_clstats_ktable(kt, tzero, 1);
2031 	}
2032 	nflags = ((shadow->pfrkt_flags & PFR_TFLAG_USRMASK) |
2033 	    (kt->pfrkt_flags & PFR_TFLAG_SETMASK) | PFR_TFLAG_ACTIVE)
2034 		& ~PFR_TFLAG_INACTIVE;
2035 	pfr_destroy_ktable(shadow, 0);
2036 	kt->pfrkt_shadow = NULL;
2037 	pfr_setflags_ktable(kt, nflags);
2038 }
2039 
2040 int
2041 pfr_validate_table(struct pfr_table *tbl, int allowedflags, int no_reserved)
2042 {
2043 	int i;
2044 
2045 	if (!tbl->pfrt_name[0])
2046 		return (-1);
2047 	if (no_reserved && !strcmp(tbl->pfrt_anchor, PF_RESERVED_ANCHOR))
2048 		 return (-1);
2049 	if (tbl->pfrt_name[PF_TABLE_NAME_SIZE-1])
2050 		return (-1);
2051 	for (i = strlen(tbl->pfrt_name); i < PF_TABLE_NAME_SIZE; i++)
2052 		if (tbl->pfrt_name[i])
2053 			return (-1);
2054 	if (pfr_fix_anchor(tbl->pfrt_anchor))
2055 		return (-1);
2056 	if (tbl->pfrt_flags & ~allowedflags)
2057 		return (-1);
2058 	return (0);
2059 }
2060 
2061 /*
2062  * Rewrite anchors referenced by tables to remove slashes
2063  * and check for validity.
2064  */
2065 int
2066 pfr_fix_anchor(char *anchor)
2067 {
2068 	size_t siz = MAXPATHLEN;
2069 	int i;
2070 
2071 	if (anchor[0] == '/') {
2072 		char *path;
2073 		int off;
2074 
2075 		path = anchor;
2076 		off = 1;
2077 		while (*++path == '/')
2078 			off++;
2079 		bcopy(path, anchor, siz - off);
2080 		memset(anchor + siz - off, 0, off);
2081 	}
2082 	if (anchor[siz - 1])
2083 		return (-1);
2084 	for (i = strlen(anchor); i < siz; i++)
2085 		if (anchor[i])
2086 			return (-1);
2087 	return (0);
2088 }
2089 
2090 int
2091 pfr_table_count(struct pfr_table *filter, int flags)
2092 {
2093 	struct pf_ruleset *rs;
2094 
2095 	if (flags & PFR_FLAG_ALLRSETS)
2096 		return (pfr_ktable_cnt);
2097 	if (filter->pfrt_anchor[0]) {
2098 		rs = pf_find_ruleset(filter->pfrt_anchor);
2099 		return ((rs != NULL) ? rs->tables : -1);
2100 	}
2101 	return (pf_main_ruleset.tables);
2102 }
2103 
2104 int
2105 pfr_skip_table(struct pfr_table *filter, struct pfr_ktable *kt, int flags)
2106 {
2107 	if (flags & PFR_FLAG_ALLRSETS)
2108 		return (0);
2109 	if (strcmp(filter->pfrt_anchor, kt->pfrkt_anchor))
2110 		return (1);
2111 	return (0);
2112 }
2113 
2114 void
2115 pfr_insert_ktables(struct pfr_ktableworkq *workq)
2116 {
2117 	struct pfr_ktable	*p;
2118 
2119 	SLIST_FOREACH(p, workq, pfrkt_workq)
2120 		pfr_insert_ktable(p);
2121 }
2122 
2123 void
2124 pfr_insert_ktable(struct pfr_ktable *kt)
2125 {
2126 	RB_INSERT(pfr_ktablehead, &pfr_ktables, kt);
2127 	pfr_ktable_cnt++;
2128 	if (kt->pfrkt_root != NULL)
2129 		if (!kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR]++)
2130 			pfr_setflags_ktable(kt->pfrkt_root,
2131 			    kt->pfrkt_root->pfrkt_flags|PFR_TFLAG_REFDANCHOR);
2132 }
2133 
2134 void
2135 pfr_setflags_ktables(struct pfr_ktableworkq *workq)
2136 {
2137 	struct pfr_ktable	*p, *q;
2138 
2139 	SLIST_FOREACH_SAFE(p, workq, pfrkt_workq, q) {
2140 		pfr_setflags_ktable(p, p->pfrkt_nflags);
2141 	}
2142 }
2143 
2144 void
2145 pfr_setflags_ktable(struct pfr_ktable *kt, int newf)
2146 {
2147 	struct pfr_kentryworkq	addrq;
2148 
2149 	if (!(newf & PFR_TFLAG_REFERENCED) &&
2150 	    !(newf & PFR_TFLAG_REFDANCHOR) &&
2151 	    !(newf & PFR_TFLAG_PERSIST))
2152 		newf &= ~PFR_TFLAG_ACTIVE;
2153 	if (!(newf & PFR_TFLAG_ACTIVE))
2154 		newf &= ~PFR_TFLAG_USRMASK;
2155 	if (!(newf & PFR_TFLAG_SETMASK)) {
2156 		RB_REMOVE(pfr_ktablehead, &pfr_ktables, kt);
2157 		if (kt->pfrkt_root != NULL)
2158 			if (!--kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR])
2159 				pfr_setflags_ktable(kt->pfrkt_root,
2160 				    kt->pfrkt_root->pfrkt_flags &
2161 					~PFR_TFLAG_REFDANCHOR);
2162 		pfr_destroy_ktable(kt, 1);
2163 		pfr_ktable_cnt--;
2164 		return;
2165 	}
2166 	if (!(newf & PFR_TFLAG_ACTIVE) && kt->pfrkt_cnt) {
2167 		pfr_enqueue_addrs(kt, &addrq, NULL, 0);
2168 		pfr_remove_kentries(kt, &addrq);
2169 	}
2170 	if (!(newf & PFR_TFLAG_INACTIVE) && kt->pfrkt_shadow != NULL) {
2171 		pfr_destroy_ktable(kt->pfrkt_shadow, 1);
2172 		kt->pfrkt_shadow = NULL;
2173 	}
2174 	kt->pfrkt_flags = newf;
2175 }
2176 
2177 void
2178 pfr_clstats_ktables(struct pfr_ktableworkq *workq, time_t tzero, int recurse)
2179 {
2180 	struct pfr_ktable	*p;
2181 
2182 	SLIST_FOREACH(p, workq, pfrkt_workq)
2183 		pfr_clstats_ktable(p, tzero, recurse);
2184 }
2185 
2186 void
2187 pfr_clstats_ktable(struct pfr_ktable *kt, time_t tzero, int recurse)
2188 {
2189 	struct pfr_kentryworkq	 addrq;
2190 
2191 	if (recurse) {
2192 		pfr_enqueue_addrs(kt, &addrq, NULL, 0);
2193 		pfr_clstats_kentries(&addrq, tzero, 0);
2194 	}
2195 	bzero(kt->pfrkt_packets, sizeof(kt->pfrkt_packets));
2196 	bzero(kt->pfrkt_bytes, sizeof(kt->pfrkt_bytes));
2197 	kt->pfrkt_match = kt->pfrkt_nomatch = 0;
2198 	kt->pfrkt_tzero = tzero;
2199 }
2200 
2201 struct pfr_ktable *
2202 pfr_create_ktable(struct pfr_table *tbl, time_t tzero, int attachruleset,
2203     int intr)
2204 {
2205 	struct pfr_ktable	*kt;
2206 	struct pf_ruleset	*rs;
2207 
2208 	if (intr)
2209 		kt = pool_get(&pfr_ktable_pl, PR_NOWAIT|PR_ZERO|PR_LIMITFAIL);
2210 	else
2211 		kt = pool_get(&pfr_ktable_pl, PR_WAITOK|PR_ZERO|PR_LIMITFAIL);
2212 	if (kt == NULL)
2213 		return (NULL);
2214 	kt->pfrkt_t = *tbl;
2215 
2216 	if (attachruleset) {
2217 		rs = pf_find_or_create_ruleset(tbl->pfrt_anchor);
2218 		if (!rs) {
2219 			pfr_destroy_ktable(kt, 0);
2220 			return (NULL);
2221 		}
2222 		kt->pfrkt_rs = rs;
2223 		rs->tables++;
2224 	}
2225 
2226 	if (!rn_inithead((void **)&kt->pfrkt_ip4,
2227 	    offsetof(struct sockaddr_in, sin_addr)) ||
2228 	    !rn_inithead((void **)&kt->pfrkt_ip6,
2229 	    offsetof(struct sockaddr_in6, sin6_addr))) {
2230 		pfr_destroy_ktable(kt, 0);
2231 		return (NULL);
2232 	}
2233 	kt->pfrkt_tzero = tzero;
2234 	kt->pfrkt_refcntcost = 0;
2235 	kt->pfrkt_gcdweight = 0;
2236 	kt->pfrkt_maxweight = 1;
2237 
2238 	return (kt);
2239 }
2240 
2241 void
2242 pfr_destroy_ktables(struct pfr_ktableworkq *workq, int flushaddr)
2243 {
2244 	struct pfr_ktable	*p;
2245 
2246 	while ((p = SLIST_FIRST(workq)) != NULL) {
2247 		SLIST_REMOVE_HEAD(workq, pfrkt_workq);
2248 		pfr_destroy_ktable(p, flushaddr);
2249 	}
2250 }
2251 
2252 void
2253 pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr)
2254 {
2255 	struct pfr_kentryworkq	 addrq;
2256 
2257 	if (flushaddr) {
2258 		pfr_enqueue_addrs(kt, &addrq, NULL, 0);
2259 		pfr_clean_node_mask(kt, &addrq);
2260 		pfr_destroy_kentries(&addrq);
2261 	}
2262 	if (kt->pfrkt_ip4 != NULL)
2263 		free(kt->pfrkt_ip4, M_RTABLE, sizeof(*kt->pfrkt_ip4));
2264 	if (kt->pfrkt_ip6 != NULL)
2265 		free(kt->pfrkt_ip6, M_RTABLE, sizeof(*kt->pfrkt_ip6));
2266 	if (kt->pfrkt_shadow != NULL)
2267 		pfr_destroy_ktable(kt->pfrkt_shadow, flushaddr);
2268 	if (kt->pfrkt_rs != NULL) {
2269 		kt->pfrkt_rs->tables--;
2270 		pf_remove_if_empty_ruleset(kt->pfrkt_rs);
2271 	}
2272 	pool_put(&pfr_ktable_pl, kt);
2273 }
2274 
2275 int
2276 pfr_ktable_compare(struct pfr_ktable *p, struct pfr_ktable *q)
2277 {
2278 	int d;
2279 
2280 	if ((d = strncmp(p->pfrkt_name, q->pfrkt_name, PF_TABLE_NAME_SIZE)))
2281 		return (d);
2282 	return (strcmp(p->pfrkt_anchor, q->pfrkt_anchor));
2283 }
2284 
2285 struct pfr_ktable *
2286 pfr_lookup_table(struct pfr_table *tbl)
2287 {
2288 	/* struct pfr_ktable start like a struct pfr_table */
2289 	return (RB_FIND(pfr_ktablehead, &pfr_ktables,
2290 	    (struct pfr_ktable *)tbl));
2291 }
2292 
2293 int
2294 pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af)
2295 {
2296 	struct pfr_kentry	*ke = NULL;
2297 	int			 match;
2298 
2299 	ke = pfr_kentry_byaddr(kt, a, af, 0);
2300 
2301 	match = (ke && !(ke->pfrke_flags & PFRKE_FLAG_NOT));
2302 	if (match)
2303 		kt->pfrkt_match++;
2304 	else
2305 		kt->pfrkt_nomatch++;
2306 
2307 	return (match);
2308 }
2309 
2310 struct pfr_kentry *
2311 pfr_kentry_byaddr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af,
2312     int exact)
2313 {
2314 	struct pfr_kentry	*ke = NULL;
2315 	struct sockaddr_in	 tmp4;
2316 #ifdef INET6
2317 	struct sockaddr_in6	 tmp6;
2318 #endif /* INET6 */
2319 
2320 	kt = pfr_ktable_select_active(kt);
2321 	if (kt == NULL)
2322 		return (0);
2323 
2324 	switch (af) {
2325 	case AF_INET:
2326 		bzero(&tmp4, sizeof(tmp4));
2327 		tmp4.sin_len = sizeof(tmp4);
2328 		tmp4.sin_family = AF_INET;
2329 		tmp4.sin_addr.s_addr = a->addr32[0];
2330 		ke = (struct pfr_kentry *)rn_match(&tmp4, kt->pfrkt_ip4);
2331 		break;
2332 #ifdef INET6
2333 	case AF_INET6:
2334 		bzero(&tmp6, sizeof(tmp6));
2335 		tmp6.sin6_len = sizeof(tmp6);
2336 		tmp6.sin6_family = AF_INET6;
2337 		bcopy(a, &tmp6.sin6_addr, sizeof(tmp6.sin6_addr));
2338 		ke = (struct pfr_kentry *)rn_match(&tmp6, kt->pfrkt_ip6);
2339 		break;
2340 #endif /* INET6 */
2341 	default:
2342 		unhandled_af(af);
2343 	}
2344 	if (exact && ke && KENTRY_NETWORK(ke))
2345 		ke = NULL;
2346 
2347 	return (ke);
2348 }
2349 
2350 void
2351 pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, struct pf_pdesc *pd,
2352     int op, int notrule)
2353 {
2354 	struct pfr_kentry	*ke = NULL;
2355 	struct sockaddr_in	 tmp4;
2356 #ifdef INET6
2357 	struct sockaddr_in6	 tmp6;
2358 #endif /* INET6 */
2359 	sa_family_t		 af = pd->af;
2360 	u_int64_t		 len = pd->tot_len;
2361 	int			 dir_idx = (pd->dir == PF_OUT);
2362 	int			 op_idx;
2363 
2364 	kt = pfr_ktable_select_active(kt);
2365 	if (kt == NULL)
2366 		return;
2367 
2368 	switch (af) {
2369 	case AF_INET:
2370 		bzero(&tmp4, sizeof(tmp4));
2371 		tmp4.sin_len = sizeof(tmp4);
2372 		tmp4.sin_family = AF_INET;
2373 		tmp4.sin_addr.s_addr = a->addr32[0];
2374 		ke = (struct pfr_kentry *)rn_match(&tmp4, kt->pfrkt_ip4);
2375 		break;
2376 #ifdef INET6
2377 	case AF_INET6:
2378 		bzero(&tmp6, sizeof(tmp6));
2379 		tmp6.sin6_len = sizeof(tmp6);
2380 		tmp6.sin6_family = AF_INET6;
2381 		bcopy(a, &tmp6.sin6_addr, sizeof(tmp6.sin6_addr));
2382 		ke = (struct pfr_kentry *)rn_match(&tmp6, kt->pfrkt_ip6);
2383 		break;
2384 #endif /* INET6 */
2385 	default:
2386 		unhandled_af(af);
2387 	}
2388 
2389 	switch (op) {
2390 	case PF_PASS:
2391 		op_idx = PFR_OP_PASS;
2392 		break;
2393 	case PF_MATCH:
2394 		op_idx = PFR_OP_MATCH;
2395 		break;
2396 	case PF_DROP:
2397 		op_idx = PFR_OP_BLOCK;
2398 		break;
2399 	default:
2400 		panic("unhandled op");
2401 	}
2402 
2403 	if ((ke == NULL || (ke->pfrke_flags & PFRKE_FLAG_NOT)) != notrule) {
2404 		if (op_idx != PFR_OP_PASS)
2405 			DPFPRINTF(LOG_DEBUG,
2406 			    "pfr_update_stats: assertion failed.");
2407 		op_idx = PFR_OP_XPASS;
2408 	}
2409 	kt->pfrkt_packets[dir_idx][op_idx]++;
2410 	kt->pfrkt_bytes[dir_idx][op_idx] += len;
2411 	if (ke != NULL && op_idx != PFR_OP_XPASS &&
2412 	    (kt->pfrkt_flags & PFR_TFLAG_COUNTERS)) {
2413 		if (ke->pfrke_counters == NULL)
2414 			ke->pfrke_counters = pool_get(&pfr_kcounters_pl,
2415 			    PR_NOWAIT | PR_ZERO);
2416 		if (ke->pfrke_counters != NULL) {
2417 			ke->pfrke_counters->pfrkc_packets[dir_idx][op_idx]++;
2418 			ke->pfrke_counters->pfrkc_bytes[dir_idx][op_idx] += len;
2419 		}
2420 	}
2421 }
2422 
2423 struct pfr_ktable *
2424 pfr_attach_table(struct pf_ruleset *rs, char *name, int intr)
2425 {
2426 	struct pfr_ktable	*kt, *rt;
2427 	struct pfr_table	 tbl;
2428 	struct pf_anchor	*ac = rs->anchor;
2429 
2430 	bzero(&tbl, sizeof(tbl));
2431 	strlcpy(tbl.pfrt_name, name, sizeof(tbl.pfrt_name));
2432 	if (ac != NULL)
2433 		strlcpy(tbl.pfrt_anchor, ac->path, sizeof(tbl.pfrt_anchor));
2434 	kt = pfr_lookup_table(&tbl);
2435 	if (kt == NULL) {
2436 		kt = pfr_create_ktable(&tbl, gettime(), 1, intr);
2437 		if (kt == NULL)
2438 			return (NULL);
2439 		if (ac != NULL) {
2440 			bzero(tbl.pfrt_anchor, sizeof(tbl.pfrt_anchor));
2441 			rt = pfr_lookup_table(&tbl);
2442 			if (rt == NULL) {
2443 				rt = pfr_create_ktable(&tbl, 0, 1, intr);
2444 				if (rt == NULL) {
2445 					pfr_destroy_ktable(kt, 0);
2446 					return (NULL);
2447 				}
2448 				pfr_insert_ktable(rt);
2449 			}
2450 			kt->pfrkt_root = rt;
2451 		}
2452 		pfr_insert_ktable(kt);
2453 	}
2454 	if (!kt->pfrkt_refcnt[PFR_REFCNT_RULE]++)
2455 		pfr_setflags_ktable(kt, kt->pfrkt_flags|PFR_TFLAG_REFERENCED);
2456 	return (kt);
2457 }
2458 
2459 void
2460 pfr_detach_table(struct pfr_ktable *kt)
2461 {
2462 	if (kt->pfrkt_refcnt[PFR_REFCNT_RULE] <= 0)
2463 		DPFPRINTF(LOG_NOTICE, "pfr_detach_table: refcount = %d.",
2464 		    kt->pfrkt_refcnt[PFR_REFCNT_RULE]);
2465 	else if (!--kt->pfrkt_refcnt[PFR_REFCNT_RULE])
2466 		pfr_setflags_ktable(kt, kt->pfrkt_flags&~PFR_TFLAG_REFERENCED);
2467 }
2468 
2469 int
2470 pfr_islinklocal(sa_family_t af, struct pf_addr *addr)
2471 {
2472 #ifdef	INET6
2473 	if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr->v6))
2474 		return (1);
2475 #endif	/* INET6 */
2476 	return (0);
2477 }
2478 
2479 int
2480 pfr_pool_get(struct pf_pool *rpool, struct pf_addr **raddr,
2481     struct pf_addr **rmask, sa_family_t af)
2482 {
2483 	struct pfr_ktable	*kt;
2484 	struct pfr_kentry	*ke, *ke2;
2485 	struct pf_addr		*addr, *counter;
2486 	union sockaddr_union	 mask;
2487 	struct sockaddr_in	 tmp4;
2488 #ifdef INET6
2489 	struct sockaddr_in6	 tmp6;
2490 #endif
2491 	int			 startidx, idx = -1, loop = 0, use_counter = 0;
2492 
2493 	switch (af) {
2494 	case AF_INET:
2495 		bzero(&tmp4, sizeof(tmp4));
2496 		tmp4.sin_len = sizeof(tmp4);
2497 		tmp4.sin_family = AF_INET;
2498 		addr = (struct pf_addr *)&tmp4.sin_addr;
2499 		break;
2500 #ifdef	INET6
2501 	case AF_INET6:
2502 		bzero(&tmp6, sizeof(tmp6));
2503 		tmp6.sin6_len = sizeof(tmp6);
2504 		tmp6.sin6_family = AF_INET6;
2505 		addr = (struct pf_addr *)&tmp6.sin6_addr;
2506 		break;
2507 #endif	/* INET6 */
2508 	default:
2509 		unhandled_af(af);
2510 	}
2511 
2512 	if (rpool->addr.type == PF_ADDR_TABLE)
2513 		kt = rpool->addr.p.tbl;
2514 	else if (rpool->addr.type == PF_ADDR_DYNIFTL)
2515 		kt = rpool->addr.p.dyn->pfid_kt;
2516 	else
2517 		return (-1);
2518 	kt = pfr_ktable_select_active(kt);
2519 	if (kt == NULL)
2520 		return (-1);
2521 
2522 	counter = &rpool->counter;
2523 	idx = rpool->tblidx;
2524 	if (idx < 0 || idx >= kt->pfrkt_cnt)
2525 		idx = 0;
2526 	else
2527 		use_counter = 1;
2528 	startidx = idx;
2529 
2530  _next_block:
2531 	if (loop && startidx == idx) {
2532 		kt->pfrkt_nomatch++;
2533 		return (1);
2534 	}
2535 
2536 	ke = pfr_kentry_byidx(kt, idx, af);
2537 	if (ke == NULL) {
2538 		/* we don't have this idx, try looping */
2539 		if (loop || (ke = pfr_kentry_byidx(kt, 0, af)) == NULL) {
2540 			kt->pfrkt_nomatch++;
2541 			return (1);
2542 		}
2543 		idx = 0;
2544 		loop++;
2545 	}
2546 
2547 	/* Get current weight for weighted round-robin */
2548 	if (idx == 0 && use_counter == 1 && kt->pfrkt_refcntcost > 0) {
2549 		rpool->curweight = rpool->curweight - kt->pfrkt_gcdweight;
2550 
2551 		if (rpool->curweight < 1)
2552 			rpool->curweight = kt->pfrkt_maxweight;
2553 	}
2554 
2555 	pfr_prepare_network(&pfr_mask, af, ke->pfrke_net);
2556 	*raddr = SUNION2PF(&ke->pfrke_sa, af);
2557 	*rmask = SUNION2PF(&pfr_mask, af);
2558 
2559 	if (use_counter && !PF_AZERO(counter, af)) {
2560 		/* is supplied address within block? */
2561 		if (!pf_match_addr(0, *raddr, *rmask, counter, af)) {
2562 			/* no, go to next block in table */
2563 			idx++;
2564 			use_counter = 0;
2565 			goto _next_block;
2566 		}
2567 		pf_addrcpy(addr, counter, af);
2568 	} else {
2569 		/* use first address of block */
2570 		pf_addrcpy(addr, *raddr, af);
2571 	}
2572 
2573 	if (!KENTRY_NETWORK(ke)) {
2574 		/* this is a single IP address - no possible nested block */
2575 		if (rpool->addr.type == PF_ADDR_DYNIFTL &&
2576 		    pfr_islinklocal(af, addr)) {
2577 			idx++;
2578 			goto _next_block;
2579 		}
2580 		pf_addrcpy(counter, addr, af);
2581 		rpool->tblidx = idx;
2582 		kt->pfrkt_match++;
2583 		rpool->states = 0;
2584 		if (ke->pfrke_counters != NULL)
2585 			rpool->states = ke->pfrke_counters->states;
2586 		switch (ke->pfrke_type) {
2587 		case PFRKE_COST:
2588 			rpool->weight = ((struct pfr_kentry_cost *)ke)->weight;
2589 			/* FALLTHROUGH */
2590 		case PFRKE_ROUTE:
2591 			rpool->kif = ((struct pfr_kentry_route *)ke)->kif;
2592 			break;
2593 		default:
2594 			rpool->weight = 1;
2595 			break;
2596 		}
2597 		return (0);
2598 	}
2599 	for (;;) {
2600 		/* we don't want to use a nested block */
2601 		switch (af) {
2602 		case AF_INET:
2603 			ke2 = (struct pfr_kentry *)rn_match(&tmp4,
2604 			    kt->pfrkt_ip4);
2605 			break;
2606 #ifdef	INET6
2607 		case AF_INET6:
2608 			ke2 = (struct pfr_kentry *)rn_match(&tmp6,
2609 			    kt->pfrkt_ip6);
2610 			break;
2611 #endif	/* INET6 */
2612 		default:
2613 			unhandled_af(af);
2614 		}
2615 		if (ke2 == ke) {
2616 			/* lookup return the same block - perfect */
2617 			if (rpool->addr.type == PF_ADDR_DYNIFTL &&
2618 			    pfr_islinklocal(af, addr))
2619 				goto _next_entry;
2620 			pf_addrcpy(counter, addr, af);
2621 			rpool->tblidx = idx;
2622 			kt->pfrkt_match++;
2623 			rpool->states = 0;
2624 			if (ke->pfrke_counters != NULL)
2625 				rpool->states = ke->pfrke_counters->states;
2626 			switch (ke->pfrke_type) {
2627 			case PFRKE_COST:
2628 				rpool->weight =
2629 				    ((struct pfr_kentry_cost *)ke)->weight;
2630 				/* FALLTHROUGH */
2631 			case PFRKE_ROUTE:
2632 				rpool->kif = ((struct pfr_kentry_route *)ke)->kif;
2633 				break;
2634 			default:
2635 				rpool->weight = 1;
2636 				break;
2637 			}
2638 			return (0);
2639 		}
2640 _next_entry:
2641 		/* we need to increase the counter past the nested block */
2642 		pfr_prepare_network(&mask, AF_INET, ke2->pfrke_net);
2643 		pf_poolmask(addr, addr, SUNION2PF(&mask, af), &pfr_ffaddr, af);
2644 		pf_addr_inc(addr, af);
2645 		if (!pf_match_addr(0, *raddr, *rmask, addr, af)) {
2646 			/* ok, we reached the end of our main block */
2647 			/* go to next block in table */
2648 			idx++;
2649 			use_counter = 0;
2650 			goto _next_block;
2651 		}
2652 	}
2653 }
2654 
2655 struct pfr_kentry *
2656 pfr_kentry_byidx(struct pfr_ktable *kt, int idx, int af)
2657 {
2658 	struct pfr_walktree	w;
2659 
2660 	bzero(&w, sizeof(w));
2661 	w.pfrw_op = PFRW_POOL_GET;
2662 	w.pfrw_cnt = idx;
2663 
2664 	switch (af) {
2665 	case AF_INET:
2666 		rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
2667 		return (w.pfrw_kentry);
2668 #ifdef INET6
2669 	case AF_INET6:
2670 		rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w);
2671 		return (w.pfrw_kentry);
2672 #endif /* INET6 */
2673 	default:
2674 		return (NULL);
2675 	}
2676 }
2677 
2678 /* Added for load balancing state counter use. */
2679 int
2680 pfr_states_increase(struct pfr_ktable *kt, struct pf_addr *addr, int af)
2681 {
2682 	struct pfr_kentry *ke;
2683 
2684 	ke = pfr_kentry_byaddr(kt, addr, af, 1);
2685 	if (ke == NULL)
2686 		return (-1);
2687 
2688 	if (ke->pfrke_counters == NULL)
2689 		ke->pfrke_counters = pool_get(&pfr_kcounters_pl,
2690 		    PR_NOWAIT | PR_ZERO);
2691 	if (ke->pfrke_counters == NULL)
2692 		return (-1);
2693 
2694 	ke->pfrke_counters->states++;
2695 	return ke->pfrke_counters->states;
2696 }
2697 
2698 /* Added for load balancing state counter use. */
2699 int
2700 pfr_states_decrease(struct pfr_ktable *kt, struct pf_addr *addr, int af)
2701 {
2702 	struct pfr_kentry *ke;
2703 
2704 	ke = pfr_kentry_byaddr(kt, addr, af, 1);
2705 	if (ke == NULL)
2706 		return (-1);
2707 
2708 	if (ke->pfrke_counters == NULL)
2709 		ke->pfrke_counters = pool_get(&pfr_kcounters_pl,
2710 		    PR_NOWAIT | PR_ZERO);
2711 	if (ke->pfrke_counters == NULL)
2712 		return (-1);
2713 
2714 	if (ke->pfrke_counters->states > 0)
2715 		ke->pfrke_counters->states--;
2716 	else
2717 		DPFPRINTF(LOG_DEBUG,
2718 		    "pfr_states_decrease: states-- when states <= 0");
2719 
2720 	return ke->pfrke_counters->states;
2721 }
2722 
2723 void
2724 pfr_dynaddr_update(struct pfr_ktable *kt, struct pfi_dynaddr *dyn)
2725 {
2726 	struct pfr_walktree	w;
2727 
2728 	bzero(&w, sizeof(w));
2729 	w.pfrw_op = PFRW_DYNADDR_UPDATE;
2730 	w.pfrw_dyn = dyn;
2731 
2732 	dyn->pfid_acnt4 = 0;
2733 	dyn->pfid_acnt6 = 0;
2734 	switch (dyn->pfid_af) {
2735 	case AF_UNSPEC:	/* look up all both addresses IPv4 + IPv6 */
2736 		rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
2737 		rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w);
2738 		break;
2739 	case AF_INET:
2740 		rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
2741 		break;
2742 #ifdef	INET6
2743 	case AF_INET6:
2744 		rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w);
2745 		break;
2746 #endif	/* INET6 */
2747 	default:
2748 		unhandled_af(dyn->pfid_af);
2749 	}
2750 }
2751 
2752 void
2753 pfr_ktable_winfo_update(struct pfr_ktable *kt, struct pfr_kentry *p) {
2754 	/*
2755 	 * If cost flag is set,
2756 	 * gcdweight is needed for round-robin.
2757 	 */
2758 	if (kt->pfrkt_refcntcost > 0) {
2759 		u_int16_t weight;
2760 
2761 		weight = (p->pfrke_type == PFRKE_COST) ?
2762 		    ((struct pfr_kentry_cost *)p)->weight : 1;
2763 
2764 		if (kt->pfrkt_gcdweight == 0)
2765 			kt->pfrkt_gcdweight = weight;
2766 
2767 		kt->pfrkt_gcdweight =
2768 			pfr_gcd(weight, kt->pfrkt_gcdweight);
2769 
2770 		if (kt->pfrkt_maxweight < weight)
2771 			kt->pfrkt_maxweight = weight;
2772 	}
2773 }
2774 
2775 struct pfr_ktable *
2776 pfr_ktable_select_active(struct pfr_ktable *kt)
2777 {
2778 	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
2779 		kt = kt->pfrkt_root;
2780 	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
2781 		return (NULL);
2782 
2783 	return (kt);
2784 }
2785