xref: /openbsd-src/sys/net/pf_ioctl.c (revision 1a8dbaac879b9f3335ad7fb25429ce63ac1d6bac)
1 /*	$OpenBSD: pf_ioctl.c,v 1.358 2020/10/02 09:14:33 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2018 Henning Brauer <henning@openbsd.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  *    - Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *    - Redistributions in binary form must reproduce the above
15  *      copyright notice, this list of conditions and the following
16  *      disclaimer in the documentation and/or other materials provided
17  *      with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Effort sponsored in part by the Defense Advanced Research Projects
33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35  *
36  */
37 
38 #include "pfsync.h"
39 #include "pflog.h"
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/sysctl.h>
44 #include <sys/mbuf.h>
45 #include <sys/filio.h>
46 #include <sys/fcntl.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/kernel.h>
50 #include <sys/time.h>
51 #include <sys/timeout.h>
52 #include <sys/pool.h>
53 #include <sys/malloc.h>
54 #include <sys/proc.h>
55 #include <sys/rwlock.h>
56 #include <sys/syslog.h>
57 #include <uvm/uvm_extern.h>
58 
59 #include <crypto/md5.h>
60 
61 #include <net/if.h>
62 #include <net/if_var.h>
63 #include <net/route.h>
64 #include <net/hfsc.h>
65 #include <net/fq_codel.h>
66 
67 #include <netinet/in.h>
68 #include <netinet/ip.h>
69 #include <netinet/in_pcb.h>
70 #include <netinet/ip_var.h>
71 #include <netinet/ip_icmp.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74 
75 #ifdef INET6
76 #include <netinet/ip6.h>
77 #include <netinet/icmp6.h>
78 #endif /* INET6 */
79 
80 #include <net/pfvar.h>
81 #include <net/pfvar_priv.h>
82 
83 #if NPFSYNC > 0
84 #include <netinet/ip_ipsp.h>
85 #include <net/if_pfsync.h>
86 #endif /* NPFSYNC > 0 */
87 
88 struct pool		 pf_tag_pl;
89 
90 void			 pfattach(int);
91 void			 pf_thread_create(void *);
92 int			 pfopen(dev_t, int, int, struct proc *);
93 int			 pfclose(dev_t, int, int, struct proc *);
94 int			 pfioctl(dev_t, u_long, caddr_t, int, struct proc *);
95 int			 pf_begin_rules(u_int32_t *, const char *);
96 int			 pf_rollback_rules(u_int32_t, char *);
97 void			 pf_remove_queues(void);
98 int			 pf_commit_queues(void);
99 void			 pf_free_queues(struct pf_queuehead *);
100 void			 pf_calc_chksum(struct pf_ruleset *);
101 void			 pf_hash_rule(MD5_CTX *, struct pf_rule *);
102 void			 pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *);
103 int			 pf_commit_rules(u_int32_t, char *);
104 int			 pf_addr_setup(struct pf_ruleset *,
105 			    struct pf_addr_wrap *, sa_family_t);
106 int			 pf_kif_setup(char *, struct pfi_kif **);
107 void			 pf_addr_copyout(struct pf_addr_wrap *);
108 void			 pf_trans_set_commit(void);
109 void			 pf_pool_copyin(struct pf_pool *, struct pf_pool *);
110 int			 pf_rule_copyin(struct pf_rule *, struct pf_rule *,
111 			    struct pf_ruleset *);
112 u_int16_t		 pf_qname2qid(char *, int);
113 void			 pf_qid2qname(u_int16_t, char *);
114 void			 pf_qid_unref(u_int16_t);
115 
116 struct pf_rule		 pf_default_rule, pf_default_rule_new;
117 
118 struct {
119 	char		statusif[IFNAMSIZ];
120 	u_int32_t	debug;
121 	u_int32_t	hostid;
122 	u_int32_t	reass;
123 	u_int32_t	mask;
124 } pf_trans_set;
125 
126 #define	PF_TSET_STATUSIF	0x01
127 #define	PF_TSET_DEBUG		0x02
128 #define	PF_TSET_HOSTID		0x04
129 #define	PF_TSET_REASS		0x08
130 
131 #define	TAGID_MAX	 50000
132 TAILQ_HEAD(pf_tags, pf_tagname)	pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags),
133 				pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids);
134 
135 #ifdef WITH_PF_LOCK
136 /*
137  * pf_lock protects consistency of PF data structures, which don't have
138  * their dedicated lock yet. The pf_lock currently protects:
139  *	- rules,
140  *	- radix tables,
141  *	- source nodes
142  * All callers must grab pf_lock exclusively.
143  *
144  * pf_state_lock protects consistency of state table. Packets, which do state
145  * look up grab the lock as readers. If packet must create state, then it must
146  * grab the lock as writer. Whenever packet creates state it grabs pf_lock
147  * first then it locks pf_state_lock as the writer.
148  */
149 struct rwlock		 pf_lock = RWLOCK_INITIALIZER("pf_lock");
150 struct rwlock		 pf_state_lock = RWLOCK_INITIALIZER("pf_state_lock");
151 #endif /* WITH_PF_LOCK */
152 
153 #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
154 #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
155 #endif
156 u_int16_t		 tagname2tag(struct pf_tags *, char *, int);
157 void			 tag2tagname(struct pf_tags *, u_int16_t, char *);
158 void			 tag_unref(struct pf_tags *, u_int16_t);
159 int			 pf_rtlabel_add(struct pf_addr_wrap *);
160 void			 pf_rtlabel_remove(struct pf_addr_wrap *);
161 void			 pf_rtlabel_copyout(struct pf_addr_wrap *);
162 
163 
164 void
165 pfattach(int num)
166 {
167 	u_int32_t *timeout = pf_default_rule.timeout;
168 
169 	pool_init(&pf_rule_pl, sizeof(struct pf_rule), 0,
170 	    IPL_SOFTNET, 0, "pfrule", NULL);
171 	pool_init(&pf_src_tree_pl, sizeof(struct pf_src_node), 0,
172 	    IPL_SOFTNET, 0, "pfsrctr", NULL);
173 	pool_init(&pf_sn_item_pl, sizeof(struct pf_sn_item), 0,
174 	    IPL_SOFTNET, 0, "pfsnitem", NULL);
175 	pool_init(&pf_state_pl, sizeof(struct pf_state), 0,
176 	    IPL_SOFTNET, 0, "pfstate", NULL);
177 	pool_init(&pf_state_key_pl, sizeof(struct pf_state_key), 0,
178 	    IPL_SOFTNET, 0, "pfstkey", NULL);
179 	pool_init(&pf_state_item_pl, sizeof(struct pf_state_item), 0,
180 	    IPL_SOFTNET, 0, "pfstitem", NULL);
181 	pool_init(&pf_rule_item_pl, sizeof(struct pf_rule_item), 0,
182 	    IPL_SOFTNET, 0, "pfruleitem", NULL);
183 	pool_init(&pf_queue_pl, sizeof(struct pf_queuespec), 0,
184 	    IPL_SOFTNET, 0, "pfqueue", NULL);
185 	pool_init(&pf_tag_pl, sizeof(struct pf_tagname), 0,
186 	    IPL_SOFTNET, 0, "pftag", NULL);
187 	pool_init(&pf_pktdelay_pl, sizeof(struct pf_pktdelay), 0,
188 	    IPL_SOFTNET, 0, "pfpktdelay", NULL);
189 
190 	hfsc_initialize();
191 	pfr_initialize();
192 	pfi_initialize();
193 	pf_osfp_initialize();
194 	pf_syncookies_init();
195 
196 	pool_sethardlimit(pf_pool_limits[PF_LIMIT_STATES].pp,
197 	    pf_pool_limits[PF_LIMIT_STATES].limit, NULL, 0);
198 
199 	if (physmem <= atop(100*1024*1024))
200 		pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit =
201 		    PFR_KENTRY_HIWAT_SMALL;
202 
203 	RB_INIT(&tree_src_tracking);
204 	RB_INIT(&pf_anchors);
205 	pf_init_ruleset(&pf_main_ruleset);
206 	TAILQ_INIT(&pf_queues[0]);
207 	TAILQ_INIT(&pf_queues[1]);
208 	pf_queues_active = &pf_queues[0];
209 	pf_queues_inactive = &pf_queues[1];
210 	TAILQ_INIT(&state_list);
211 
212 	/* default rule should never be garbage collected */
213 	pf_default_rule.entries.tqe_prev = &pf_default_rule.entries.tqe_next;
214 	pf_default_rule.action = PF_PASS;
215 	pf_default_rule.nr = (u_int32_t)-1;
216 	pf_default_rule.rtableid = -1;
217 
218 	/* initialize default timeouts */
219 	timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL;
220 	timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL;
221 	timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL;
222 	timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL;
223 	timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL;
224 	timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL;
225 	timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL;
226 	timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL;
227 	timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL;
228 	timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL;
229 	timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL;
230 	timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL;
231 	timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL;
232 	timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL;
233 	timeout[PFTM_FRAG] = PFTM_FRAG_VAL;
234 	timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL;
235 	timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL;
236 	timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL;
237 	timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START;
238 	timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END;
239 
240 	pf_default_rule.src.addr.type =  PF_ADDR_ADDRMASK;
241 	pf_default_rule.dst.addr.type =  PF_ADDR_ADDRMASK;
242 	pf_default_rule.rdr.addr.type =  PF_ADDR_NONE;
243 	pf_default_rule.nat.addr.type =  PF_ADDR_NONE;
244 	pf_default_rule.route.addr.type =  PF_ADDR_NONE;
245 
246 	pf_normalize_init();
247 	memset(&pf_status, 0, sizeof(pf_status));
248 	pf_status.debug = LOG_ERR;
249 	pf_status.reass = PF_REASS_ENABLED;
250 
251 	/* XXX do our best to avoid a conflict */
252 	pf_status.hostid = arc4random();
253 }
254 
255 int
256 pfopen(dev_t dev, int flags, int fmt, struct proc *p)
257 {
258 	if (minor(dev) >= 1)
259 		return (ENXIO);
260 	return (0);
261 }
262 
263 int
264 pfclose(dev_t dev, int flags, int fmt, struct proc *p)
265 {
266 	if (minor(dev) >= 1)
267 		return (ENXIO);
268 	return (0);
269 }
270 
271 void
272 pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule)
273 {
274 	if (rulequeue != NULL) {
275 		if (rule->states_cur == 0 && rule->src_nodes == 0) {
276 			/*
277 			 * XXX - we need to remove the table *before* detaching
278 			 * the rule to make sure the table code does not delete
279 			 * the anchor under our feet.
280 			 */
281 			pf_tbladdr_remove(&rule->src.addr);
282 			pf_tbladdr_remove(&rule->dst.addr);
283 			pf_tbladdr_remove(&rule->rdr.addr);
284 			pf_tbladdr_remove(&rule->nat.addr);
285 			pf_tbladdr_remove(&rule->route.addr);
286 			if (rule->overload_tbl)
287 				pfr_detach_table(rule->overload_tbl);
288 		}
289 		TAILQ_REMOVE(rulequeue, rule, entries);
290 		rule->entries.tqe_prev = NULL;
291 		rule->nr = (u_int32_t)-1;
292 	}
293 
294 	if (rule->states_cur > 0 || rule->src_nodes > 0 ||
295 	    rule->entries.tqe_prev != NULL)
296 		return;
297 	pf_tag_unref(rule->tag);
298 	pf_tag_unref(rule->match_tag);
299 	pf_rtlabel_remove(&rule->src.addr);
300 	pf_rtlabel_remove(&rule->dst.addr);
301 	pfi_dynaddr_remove(&rule->src.addr);
302 	pfi_dynaddr_remove(&rule->dst.addr);
303 	pfi_dynaddr_remove(&rule->rdr.addr);
304 	pfi_dynaddr_remove(&rule->nat.addr);
305 	pfi_dynaddr_remove(&rule->route.addr);
306 	if (rulequeue == NULL) {
307 		pf_tbladdr_remove(&rule->src.addr);
308 		pf_tbladdr_remove(&rule->dst.addr);
309 		pf_tbladdr_remove(&rule->rdr.addr);
310 		pf_tbladdr_remove(&rule->nat.addr);
311 		pf_tbladdr_remove(&rule->route.addr);
312 		if (rule->overload_tbl)
313 			pfr_detach_table(rule->overload_tbl);
314 	}
315 	pfi_kif_unref(rule->rcv_kif, PFI_KIF_REF_RULE);
316 	pfi_kif_unref(rule->kif, PFI_KIF_REF_RULE);
317 	pfi_kif_unref(rule->rdr.kif, PFI_KIF_REF_RULE);
318 	pfi_kif_unref(rule->nat.kif, PFI_KIF_REF_RULE);
319 	pfi_kif_unref(rule->route.kif, PFI_KIF_REF_RULE);
320 	pf_remove_anchor(rule);
321 	pool_put(&pf_rule_pl, rule);
322 }
323 
324 void
325 pf_purge_rule(struct pf_rule *rule)
326 {
327 	u_int32_t		 nr = 0;
328 	struct pf_ruleset	*ruleset;
329 
330 	KASSERT((rule != NULL) && (rule->ruleset != NULL));
331 	ruleset = rule->ruleset;
332 
333 	pf_rm_rule(ruleset->rules.active.ptr, rule);
334 	ruleset->rules.active.rcount--;
335 	TAILQ_FOREACH(rule, ruleset->rules.active.ptr, entries)
336 		rule->nr = nr++;
337 	ruleset->rules.active.ticket++;
338 	pf_calc_skip_steps(ruleset->rules.active.ptr);
339 	pf_remove_if_empty_ruleset(ruleset);
340 
341 	if (ruleset == &pf_main_ruleset)
342 		pf_calc_chksum(ruleset);
343 }
344 
345 u_int16_t
346 tagname2tag(struct pf_tags *head, char *tagname, int create)
347 {
348 	struct pf_tagname	*tag, *p = NULL;
349 	u_int16_t		 new_tagid = 1;
350 
351 	TAILQ_FOREACH(tag, head, entries)
352 		if (strcmp(tagname, tag->name) == 0) {
353 			tag->ref++;
354 			return (tag->tag);
355 		}
356 
357 	if (!create)
358 		return (0);
359 
360 	/*
361 	 * to avoid fragmentation, we do a linear search from the beginning
362 	 * and take the first free slot we find. if there is none or the list
363 	 * is empty, append a new entry at the end.
364 	 */
365 
366 	/* new entry */
367 	TAILQ_FOREACH(p, head, entries) {
368 		if (p->tag != new_tagid)
369 			break;
370 		new_tagid = p->tag + 1;
371 	}
372 
373 	if (new_tagid > TAGID_MAX)
374 		return (0);
375 
376 	/* allocate and fill new struct pf_tagname */
377 	tag = pool_get(&pf_tag_pl, PR_NOWAIT | PR_ZERO);
378 	if (tag == NULL)
379 		return (0);
380 	strlcpy(tag->name, tagname, sizeof(tag->name));
381 	tag->tag = new_tagid;
382 	tag->ref++;
383 
384 	if (p != NULL)	/* insert new entry before p */
385 		TAILQ_INSERT_BEFORE(p, tag, entries);
386 	else	/* either list empty or no free slot in between */
387 		TAILQ_INSERT_TAIL(head, tag, entries);
388 
389 	return (tag->tag);
390 }
391 
392 void
393 tag2tagname(struct pf_tags *head, u_int16_t tagid, char *p)
394 {
395 	struct pf_tagname	*tag;
396 
397 	TAILQ_FOREACH(tag, head, entries)
398 		if (tag->tag == tagid) {
399 			strlcpy(p, tag->name, PF_TAG_NAME_SIZE);
400 			return;
401 		}
402 }
403 
404 void
405 tag_unref(struct pf_tags *head, u_int16_t tag)
406 {
407 	struct pf_tagname	*p, *next;
408 
409 	if (tag == 0)
410 		return;
411 
412 	TAILQ_FOREACH_SAFE(p, head, entries, next) {
413 		if (tag == p->tag) {
414 			if (--p->ref == 0) {
415 				TAILQ_REMOVE(head, p, entries);
416 				pool_put(&pf_tag_pl, p);
417 			}
418 			break;
419 		}
420 	}
421 }
422 
423 u_int16_t
424 pf_tagname2tag(char *tagname, int create)
425 {
426 	return (tagname2tag(&pf_tags, tagname, create));
427 }
428 
429 void
430 pf_tag2tagname(u_int16_t tagid, char *p)
431 {
432 	tag2tagname(&pf_tags, tagid, p);
433 }
434 
435 void
436 pf_tag_ref(u_int16_t tag)
437 {
438 	struct pf_tagname *t;
439 
440 	TAILQ_FOREACH(t, &pf_tags, entries)
441 		if (t->tag == tag)
442 			break;
443 	if (t != NULL)
444 		t->ref++;
445 }
446 
447 void
448 pf_tag_unref(u_int16_t tag)
449 {
450 	tag_unref(&pf_tags, tag);
451 }
452 
453 int
454 pf_rtlabel_add(struct pf_addr_wrap *a)
455 {
456 	if (a->type == PF_ADDR_RTLABEL &&
457 	    (a->v.rtlabel = rtlabel_name2id(a->v.rtlabelname)) == 0)
458 		return (-1);
459 	return (0);
460 }
461 
462 void
463 pf_rtlabel_remove(struct pf_addr_wrap *a)
464 {
465 	if (a->type == PF_ADDR_RTLABEL)
466 		rtlabel_unref(a->v.rtlabel);
467 }
468 
469 void
470 pf_rtlabel_copyout(struct pf_addr_wrap *a)
471 {
472 	const char	*name;
473 
474 	if (a->type == PF_ADDR_RTLABEL && a->v.rtlabel) {
475 		if ((name = rtlabel_id2name(a->v.rtlabel)) == NULL)
476 			strlcpy(a->v.rtlabelname, "?",
477 			    sizeof(a->v.rtlabelname));
478 		else
479 			strlcpy(a->v.rtlabelname, name,
480 			    sizeof(a->v.rtlabelname));
481 	}
482 }
483 
484 u_int16_t
485 pf_qname2qid(char *qname, int create)
486 {
487 	return (tagname2tag(&pf_qids, qname, create));
488 }
489 
490 void
491 pf_qid2qname(u_int16_t qid, char *p)
492 {
493 	tag2tagname(&pf_qids, qid, p);
494 }
495 
496 void
497 pf_qid_unref(u_int16_t qid)
498 {
499 	tag_unref(&pf_qids, (u_int16_t)qid);
500 }
501 
502 int
503 pf_begin_rules(u_int32_t *ticket, const char *anchor)
504 {
505 	struct pf_ruleset	*rs;
506 	struct pf_rule		*rule;
507 
508 	if ((rs = pf_find_or_create_ruleset(anchor)) == NULL)
509 		return (EINVAL);
510 	while ((rule = TAILQ_FIRST(rs->rules.inactive.ptr)) != NULL) {
511 		pf_rm_rule(rs->rules.inactive.ptr, rule);
512 		rs->rules.inactive.rcount--;
513 	}
514 	*ticket = ++rs->rules.inactive.ticket;
515 	rs->rules.inactive.open = 1;
516 	return (0);
517 }
518 
519 int
520 pf_rollback_rules(u_int32_t ticket, char *anchor)
521 {
522 	struct pf_ruleset	*rs;
523 	struct pf_rule		*rule;
524 
525 	rs = pf_find_ruleset(anchor);
526 	if (rs == NULL || !rs->rules.inactive.open ||
527 	    rs->rules.inactive.ticket != ticket)
528 		return (0);
529 	while ((rule = TAILQ_FIRST(rs->rules.inactive.ptr)) != NULL) {
530 		pf_rm_rule(rs->rules.inactive.ptr, rule);
531 		rs->rules.inactive.rcount--;
532 	}
533 	rs->rules.inactive.open = 0;
534 
535 	/* queue defs only in the main ruleset */
536 	if (anchor[0])
537 		return (0);
538 
539 	pf_free_queues(pf_queues_inactive);
540 
541 	return (0);
542 }
543 
544 void
545 pf_free_queues(struct pf_queuehead *where)
546 {
547 	struct pf_queuespec	*q, *qtmp;
548 
549 	TAILQ_FOREACH_SAFE(q, where, entries, qtmp) {
550 		TAILQ_REMOVE(where, q, entries);
551 		pfi_kif_unref(q->kif, PFI_KIF_REF_RULE);
552 		pool_put(&pf_queue_pl, q);
553 	}
554 }
555 
556 void
557 pf_remove_queues(void)
558 {
559 	struct pf_queuespec	*q;
560 	struct ifnet		*ifp;
561 
562 	/* put back interfaces in normal queueing mode */
563 	TAILQ_FOREACH(q, pf_queues_active, entries) {
564 		if (q->parent_qid != 0)
565 			continue;
566 
567 		ifp = q->kif->pfik_ifp;
568 		if (ifp == NULL)
569 			continue;
570 
571 		ifq_attach(&ifp->if_snd, ifq_priq_ops, NULL);
572 	}
573 }
574 
575 struct pf_queue_if {
576 	struct ifnet		*ifp;
577 	const struct ifq_ops	*ifqops;
578 	const struct pfq_ops	*pfqops;
579 	void			*disc;
580 	struct pf_queue_if	*next;
581 };
582 
583 static inline struct pf_queue_if *
584 pf_ifp2q(struct pf_queue_if *list, struct ifnet *ifp)
585 {
586 	struct pf_queue_if *qif = list;
587 
588 	while (qif != NULL) {
589 		if (qif->ifp == ifp)
590 			return (qif);
591 
592 		qif = qif->next;
593 	}
594 
595 	return (qif);
596 }
597 
598 int
599 pf_create_queues(void)
600 {
601 	struct pf_queuespec	*q;
602 	struct ifnet		*ifp;
603 	struct pf_queue_if		*list = NULL, *qif;
604 	int			 error;
605 
606 	/*
607 	 * Find root queues and allocate traffic conditioner
608 	 * private data for these interfaces
609 	 */
610 	TAILQ_FOREACH(q, pf_queues_active, entries) {
611 		if (q->parent_qid != 0)
612 			continue;
613 
614 		ifp = q->kif->pfik_ifp;
615 		if (ifp == NULL)
616 			continue;
617 
618 		qif = malloc(sizeof(*qif), M_TEMP, M_WAITOK);
619 		qif->ifp = ifp;
620 
621 		if (q->flags & PFQS_ROOTCLASS) {
622 			qif->ifqops = ifq_hfsc_ops;
623 			qif->pfqops = pfq_hfsc_ops;
624 		} else {
625 			qif->ifqops = ifq_fqcodel_ops;
626 			qif->pfqops = pfq_fqcodel_ops;
627 		}
628 
629 		qif->disc = qif->pfqops->pfq_alloc(ifp);
630 
631 		qif->next = list;
632 		list = qif;
633 	}
634 
635 	/* and now everything */
636 	TAILQ_FOREACH(q, pf_queues_active, entries) {
637 		ifp = q->kif->pfik_ifp;
638 		if (ifp == NULL)
639 			continue;
640 
641 		qif = pf_ifp2q(list, ifp);
642 		KASSERT(qif != NULL);
643 
644 		error = qif->pfqops->pfq_addqueue(qif->disc, q);
645 		if (error != 0)
646 			goto error;
647 	}
648 
649 	/* find root queues in old list to disable them if necessary */
650 	TAILQ_FOREACH(q, pf_queues_inactive, entries) {
651 		if (q->parent_qid != 0)
652 			continue;
653 
654 		ifp = q->kif->pfik_ifp;
655 		if (ifp == NULL)
656 			continue;
657 
658 		qif = pf_ifp2q(list, ifp);
659 		if (qif != NULL)
660 			continue;
661 
662 		ifq_attach(&ifp->if_snd, ifq_priq_ops, NULL);
663 	}
664 
665 	/* commit the new queues */
666 	while (list != NULL) {
667 		qif = list;
668 		list = qif->next;
669 
670 		ifp = qif->ifp;
671 
672 		ifq_attach(&ifp->if_snd, qif->ifqops, qif->disc);
673 		free(qif, M_TEMP, sizeof(*qif));
674 	}
675 
676 	return (0);
677 
678 error:
679 	while (list != NULL) {
680 		qif = list;
681 		list = qif->next;
682 
683 		qif->pfqops->pfq_free(qif->disc);
684 		free(qif, M_TEMP, sizeof(*qif));
685 	}
686 
687 	return (error);
688 }
689 
690 int
691 pf_commit_queues(void)
692 {
693 	struct pf_queuehead	*qswap;
694 	int error;
695 
696         /* swap */
697         qswap = pf_queues_active;
698         pf_queues_active = pf_queues_inactive;
699         pf_queues_inactive = qswap;
700 
701 	error = pf_create_queues();
702 	if (error != 0) {
703 		pf_queues_inactive = pf_queues_active;
704 		pf_queues_active = qswap;
705 		return (error);
706 	}
707 
708         pf_free_queues(pf_queues_inactive);
709 
710 	return (0);
711 }
712 
713 const struct pfq_ops *
714 pf_queue_manager(struct pf_queuespec *q)
715 {
716 	if (q->flags & PFQS_FLOWQUEUE)
717 		return pfq_fqcodel_ops;
718 	return (/* pfq_default_ops */ NULL);
719 }
720 
721 #define PF_MD5_UPD(st, elm)						\
722 		MD5Update(ctx, (u_int8_t *) &(st)->elm, sizeof((st)->elm))
723 
724 #define PF_MD5_UPD_STR(st, elm)						\
725 		MD5Update(ctx, (u_int8_t *) (st)->elm, strlen((st)->elm))
726 
727 #define PF_MD5_UPD_HTONL(st, elm, stor) do {				\
728 		(stor) = htonl((st)->elm);				\
729 		MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int32_t));\
730 } while (0)
731 
732 #define PF_MD5_UPD_HTONS(st, elm, stor) do {				\
733 		(stor) = htons((st)->elm);				\
734 		MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int16_t));\
735 } while (0)
736 
737 void
738 pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr)
739 {
740 	PF_MD5_UPD(pfr, addr.type);
741 	switch (pfr->addr.type) {
742 		case PF_ADDR_DYNIFTL:
743 			PF_MD5_UPD(pfr, addr.v.ifname);
744 			PF_MD5_UPD(pfr, addr.iflags);
745 			break;
746 		case PF_ADDR_TABLE:
747 			if (strncmp(pfr->addr.v.tblname, PF_OPTIMIZER_TABLE_PFX,
748 			    strlen(PF_OPTIMIZER_TABLE_PFX)))
749 				PF_MD5_UPD(pfr, addr.v.tblname);
750 			break;
751 		case PF_ADDR_ADDRMASK:
752 			/* XXX ignore af? */
753 			PF_MD5_UPD(pfr, addr.v.a.addr.addr32);
754 			PF_MD5_UPD(pfr, addr.v.a.mask.addr32);
755 			break;
756 		case PF_ADDR_RTLABEL:
757 			PF_MD5_UPD(pfr, addr.v.rtlabelname);
758 			break;
759 	}
760 
761 	PF_MD5_UPD(pfr, port[0]);
762 	PF_MD5_UPD(pfr, port[1]);
763 	PF_MD5_UPD(pfr, neg);
764 	PF_MD5_UPD(pfr, port_op);
765 }
766 
767 void
768 pf_hash_rule(MD5_CTX *ctx, struct pf_rule *rule)
769 {
770 	u_int16_t x;
771 	u_int32_t y;
772 
773 	pf_hash_rule_addr(ctx, &rule->src);
774 	pf_hash_rule_addr(ctx, &rule->dst);
775 	PF_MD5_UPD_STR(rule, label);
776 	PF_MD5_UPD_STR(rule, ifname);
777 	PF_MD5_UPD_STR(rule, rcv_ifname);
778 	PF_MD5_UPD_STR(rule, match_tagname);
779 	PF_MD5_UPD_HTONS(rule, match_tag, x); /* dup? */
780 	PF_MD5_UPD_HTONL(rule, os_fingerprint, y);
781 	PF_MD5_UPD_HTONL(rule, prob, y);
782 	PF_MD5_UPD_HTONL(rule, uid.uid[0], y);
783 	PF_MD5_UPD_HTONL(rule, uid.uid[1], y);
784 	PF_MD5_UPD(rule, uid.op);
785 	PF_MD5_UPD_HTONL(rule, gid.gid[0], y);
786 	PF_MD5_UPD_HTONL(rule, gid.gid[1], y);
787 	PF_MD5_UPD(rule, gid.op);
788 	PF_MD5_UPD_HTONL(rule, rule_flag, y);
789 	PF_MD5_UPD(rule, action);
790 	PF_MD5_UPD(rule, direction);
791 	PF_MD5_UPD(rule, af);
792 	PF_MD5_UPD(rule, quick);
793 	PF_MD5_UPD(rule, ifnot);
794 	PF_MD5_UPD(rule, rcvifnot);
795 	PF_MD5_UPD(rule, match_tag_not);
796 	PF_MD5_UPD(rule, keep_state);
797 	PF_MD5_UPD(rule, proto);
798 	PF_MD5_UPD(rule, type);
799 	PF_MD5_UPD(rule, code);
800 	PF_MD5_UPD(rule, flags);
801 	PF_MD5_UPD(rule, flagset);
802 	PF_MD5_UPD(rule, allow_opts);
803 	PF_MD5_UPD(rule, rt);
804 	PF_MD5_UPD(rule, tos);
805 }
806 
807 int
808 pf_commit_rules(u_int32_t ticket, char *anchor)
809 {
810 	struct pf_ruleset	*rs;
811 	struct pf_rule		*rule;
812 	struct pf_rulequeue	*old_rules;
813 	u_int32_t		 old_rcount;
814 
815 	/* Make sure any expired rules get removed from active rules first. */
816 	pf_purge_expired_rules();
817 
818 	rs = pf_find_ruleset(anchor);
819 	if (rs == NULL || !rs->rules.inactive.open ||
820 	    ticket != rs->rules.inactive.ticket)
821 		return (EBUSY);
822 
823 	if (rs == &pf_main_ruleset)
824 		pf_calc_chksum(rs);
825 
826 	/* Swap rules, keep the old. */
827 	old_rules = rs->rules.active.ptr;
828 	old_rcount = rs->rules.active.rcount;
829 
830 	rs->rules.active.ptr = rs->rules.inactive.ptr;
831 	rs->rules.active.rcount = rs->rules.inactive.rcount;
832 	rs->rules.inactive.ptr = old_rules;
833 	rs->rules.inactive.rcount = old_rcount;
834 
835 	rs->rules.active.ticket = rs->rules.inactive.ticket;
836 	pf_calc_skip_steps(rs->rules.active.ptr);
837 
838 
839 	/* Purge the old rule list. */
840 	while ((rule = TAILQ_FIRST(old_rules)) != NULL)
841 		pf_rm_rule(old_rules, rule);
842 	rs->rules.inactive.rcount = 0;
843 	rs->rules.inactive.open = 0;
844 	pf_remove_if_empty_ruleset(rs);
845 
846 	/* queue defs only in the main ruleset */
847 	if (anchor[0])
848 		return (0);
849 	return (pf_commit_queues());
850 }
851 
852 void
853 pf_calc_chksum(struct pf_ruleset *rs)
854 {
855 	MD5_CTX			 ctx;
856 	struct pf_rule		*rule;
857 	u_int8_t		 digest[PF_MD5_DIGEST_LENGTH];
858 
859 	MD5Init(&ctx);
860 
861 	if (rs->rules.inactive.rcount) {
862 		TAILQ_FOREACH(rule, rs->rules.inactive.ptr, entries) {
863 			pf_hash_rule(&ctx, rule);
864 		}
865 	}
866 
867 	MD5Final(digest, &ctx);
868 	memcpy(pf_status.pf_chksum, digest, sizeof(pf_status.pf_chksum));
869 }
870 
871 int
872 pf_addr_setup(struct pf_ruleset *ruleset, struct pf_addr_wrap *addr,
873     sa_family_t af)
874 {
875 	if (pfi_dynaddr_setup(addr, af) ||
876 	    pf_tbladdr_setup(ruleset, addr) ||
877 	    pf_rtlabel_add(addr))
878 		return (EINVAL);
879 
880 	return (0);
881 }
882 
883 int
884 pf_kif_setup(char *ifname, struct pfi_kif **kif)
885 {
886 	if (ifname[0]) {
887 		*kif = pfi_kif_get(ifname);
888 		if (*kif == NULL)
889 			return (EINVAL);
890 
891 		pfi_kif_ref(*kif, PFI_KIF_REF_RULE);
892 	} else
893 		*kif = NULL;
894 
895 	return (0);
896 }
897 
898 void
899 pf_addr_copyout(struct pf_addr_wrap *addr)
900 {
901 	pfi_dynaddr_copyout(addr);
902 	pf_tbladdr_copyout(addr);
903 	pf_rtlabel_copyout(addr);
904 }
905 
906 int
907 pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
908 {
909 	int			 error = 0;
910 
911 	/* XXX keep in sync with switch() below */
912 	if (securelevel > 1)
913 		switch (cmd) {
914 		case DIOCGETRULES:
915 		case DIOCGETRULE:
916 		case DIOCGETSTATE:
917 		case DIOCSETSTATUSIF:
918 		case DIOCGETSTATUS:
919 		case DIOCCLRSTATUS:
920 		case DIOCNATLOOK:
921 		case DIOCSETDEBUG:
922 		case DIOCGETSTATES:
923 		case DIOCGETTIMEOUT:
924 		case DIOCGETLIMIT:
925 		case DIOCGETRULESETS:
926 		case DIOCGETRULESET:
927 		case DIOCGETQUEUES:
928 		case DIOCGETQUEUE:
929 		case DIOCGETQSTATS:
930 		case DIOCRGETTABLES:
931 		case DIOCRGETTSTATS:
932 		case DIOCRCLRTSTATS:
933 		case DIOCRCLRADDRS:
934 		case DIOCRADDADDRS:
935 		case DIOCRDELADDRS:
936 		case DIOCRSETADDRS:
937 		case DIOCRGETADDRS:
938 		case DIOCRGETASTATS:
939 		case DIOCRCLRASTATS:
940 		case DIOCRTSTADDRS:
941 		case DIOCOSFPGET:
942 		case DIOCGETSRCNODES:
943 		case DIOCCLRSRCNODES:
944 		case DIOCIGETIFACES:
945 		case DIOCSETIFFLAG:
946 		case DIOCCLRIFFLAG:
947 		case DIOCGETSYNFLWATS:
948 			break;
949 		case DIOCRCLRTABLES:
950 		case DIOCRADDTABLES:
951 		case DIOCRDELTABLES:
952 		case DIOCRSETTFLAGS:
953 			if (((struct pfioc_table *)addr)->pfrio_flags &
954 			    PFR_FLAG_DUMMY)
955 				break; /* dummy operation ok */
956 			return (EPERM);
957 		default:
958 			return (EPERM);
959 		}
960 
961 	if (!(flags & FWRITE))
962 		switch (cmd) {
963 		case DIOCGETRULES:
964 		case DIOCGETSTATE:
965 		case DIOCGETSTATUS:
966 		case DIOCGETSTATES:
967 		case DIOCGETTIMEOUT:
968 		case DIOCGETLIMIT:
969 		case DIOCGETRULESETS:
970 		case DIOCGETRULESET:
971 		case DIOCGETQUEUES:
972 		case DIOCGETQUEUE:
973 		case DIOCGETQSTATS:
974 		case DIOCNATLOOK:
975 		case DIOCRGETTABLES:
976 		case DIOCRGETTSTATS:
977 		case DIOCRGETADDRS:
978 		case DIOCRGETASTATS:
979 		case DIOCRTSTADDRS:
980 		case DIOCOSFPGET:
981 		case DIOCGETSRCNODES:
982 		case DIOCIGETIFACES:
983 		case DIOCGETSYNFLWATS:
984 			break;
985 		case DIOCRCLRTABLES:
986 		case DIOCRADDTABLES:
987 		case DIOCRDELTABLES:
988 		case DIOCRCLRTSTATS:
989 		case DIOCRCLRADDRS:
990 		case DIOCRADDADDRS:
991 		case DIOCRDELADDRS:
992 		case DIOCRSETADDRS:
993 		case DIOCRSETTFLAGS:
994 			if (((struct pfioc_table *)addr)->pfrio_flags &
995 			    PFR_FLAG_DUMMY) {
996 				flags |= FWRITE; /* need write lock for dummy */
997 				break; /* dummy operation ok */
998 			}
999 			return (EACCES);
1000 		case DIOCGETRULE:
1001 			if (((struct pfioc_rule *)addr)->action ==
1002 			    PF_GET_CLR_CNTR)
1003 				return (EACCES);
1004 			break;
1005 		default:
1006 			return (EACCES);
1007 		}
1008 
1009 	NET_LOCK();
1010 	switch (cmd) {
1011 
1012 	case DIOCSTART:
1013 		PF_LOCK();
1014 		if (pf_status.running)
1015 			error = EEXIST;
1016 		else {
1017 			pf_status.running = 1;
1018 			pf_status.since = getuptime();
1019 			if (pf_status.stateid == 0) {
1020 				pf_status.stateid = gettime();
1021 				pf_status.stateid = pf_status.stateid << 32;
1022 			}
1023 			timeout_add_sec(&pf_purge_to, 1);
1024 			pf_create_queues();
1025 			DPFPRINTF(LOG_NOTICE, "pf: started");
1026 		}
1027 		PF_UNLOCK();
1028 		break;
1029 
1030 	case DIOCSTOP:
1031 		PF_LOCK();
1032 		if (!pf_status.running)
1033 			error = ENOENT;
1034 		else {
1035 			pf_status.running = 0;
1036 			pf_status.since = getuptime();
1037 			pf_remove_queues();
1038 			DPFPRINTF(LOG_NOTICE, "pf: stopped");
1039 		}
1040 		PF_UNLOCK();
1041 		break;
1042 
1043 	case DIOCGETQUEUES: {
1044 		struct pfioc_queue	*pq = (struct pfioc_queue *)addr;
1045 		struct pf_queuespec	*qs;
1046 		u_int32_t		 nr = 0;
1047 
1048 		PF_LOCK();
1049 		pq->ticket = pf_main_ruleset.rules.active.ticket;
1050 
1051 		/* save state to not run over them all each time? */
1052 		qs = TAILQ_FIRST(pf_queues_active);
1053 		while (qs != NULL) {
1054 			qs = TAILQ_NEXT(qs, entries);
1055 			nr++;
1056 		}
1057 		pq->nr = nr;
1058 		PF_UNLOCK();
1059 		break;
1060 	}
1061 
1062 	case DIOCGETQUEUE: {
1063 		struct pfioc_queue	*pq = (struct pfioc_queue *)addr;
1064 		struct pf_queuespec	*qs;
1065 		u_int32_t		 nr = 0;
1066 
1067 		PF_LOCK();
1068 		if (pq->ticket != pf_main_ruleset.rules.active.ticket) {
1069 			error = EBUSY;
1070 			PF_UNLOCK();
1071 			break;
1072 		}
1073 
1074 		/* save state to not run over them all each time? */
1075 		qs = TAILQ_FIRST(pf_queues_active);
1076 		while ((qs != NULL) && (nr++ < pq->nr))
1077 			qs = TAILQ_NEXT(qs, entries);
1078 		if (qs == NULL) {
1079 			error = EBUSY;
1080 			PF_UNLOCK();
1081 			break;
1082 		}
1083 		memcpy(&pq->queue, qs, sizeof(pq->queue));
1084 		PF_UNLOCK();
1085 		break;
1086 	}
1087 
1088 	case DIOCGETQSTATS: {
1089 		struct pfioc_qstats	*pq = (struct pfioc_qstats *)addr;
1090 		struct pf_queuespec	*qs;
1091 		u_int32_t		 nr;
1092 		int			 nbytes;
1093 
1094 		PF_LOCK();
1095 		if (pq->ticket != pf_main_ruleset.rules.active.ticket) {
1096 			error = EBUSY;
1097 			PF_UNLOCK();
1098 			break;
1099 		}
1100 		nbytes = pq->nbytes;
1101 		nr = 0;
1102 
1103 		/* save state to not run over them all each time? */
1104 		qs = TAILQ_FIRST(pf_queues_active);
1105 		while ((qs != NULL) && (nr++ < pq->nr))
1106 			qs = TAILQ_NEXT(qs, entries);
1107 		if (qs == NULL) {
1108 			error = EBUSY;
1109 			PF_UNLOCK();
1110 			break;
1111 		}
1112 		memcpy(&pq->queue, qs, sizeof(pq->queue));
1113 		/* It's a root flow queue but is not an HFSC root class */
1114 		if ((qs->flags & PFQS_FLOWQUEUE) && qs->parent_qid == 0 &&
1115 		    !(qs->flags & PFQS_ROOTCLASS))
1116 			error = pfq_fqcodel_ops->pfq_qstats(qs, pq->buf,
1117 			    &nbytes);
1118 		else
1119 			error = pfq_hfsc_ops->pfq_qstats(qs, pq->buf,
1120 			    &nbytes);
1121 		if (error == 0)
1122 			pq->nbytes = nbytes;
1123 		PF_UNLOCK();
1124 		break;
1125 	}
1126 
1127 	case DIOCADDQUEUE: {
1128 		struct pfioc_queue	*q = (struct pfioc_queue *)addr;
1129 		struct pf_queuespec	*qs;
1130 
1131 		PF_LOCK();
1132 		if (q->ticket != pf_main_ruleset.rules.inactive.ticket) {
1133 			error = EBUSY;
1134 			PF_UNLOCK();
1135 			break;
1136 		}
1137 		qs = pool_get(&pf_queue_pl, PR_WAITOK|PR_LIMITFAIL|PR_ZERO);
1138 		if (qs == NULL) {
1139 			error = ENOMEM;
1140 			PF_UNLOCK();
1141 			break;
1142 		}
1143 		memcpy(qs, &q->queue, sizeof(*qs));
1144 		qs->qid = pf_qname2qid(qs->qname, 1);
1145 		if (qs->qid == 0) {
1146 			pool_put(&pf_queue_pl, qs);
1147 			error = EBUSY;
1148 			PF_UNLOCK();
1149 			break;
1150 		}
1151 		if (qs->parent[0] && (qs->parent_qid =
1152 		    pf_qname2qid(qs->parent, 0)) == 0) {
1153 			pool_put(&pf_queue_pl, qs);
1154 			error = ESRCH;
1155 			PF_UNLOCK();
1156 			break;
1157 		}
1158 		qs->kif = pfi_kif_get(qs->ifname);
1159 		if (qs->kif == NULL) {
1160 			pool_put(&pf_queue_pl, qs);
1161 			error = ESRCH;
1162 			PF_UNLOCK();
1163 			break;
1164 		}
1165 		/* XXX resolve bw percentage specs */
1166 		pfi_kif_ref(qs->kif, PFI_KIF_REF_RULE);
1167 
1168 		TAILQ_INSERT_TAIL(pf_queues_inactive, qs, entries);
1169 		PF_UNLOCK();
1170 
1171 		break;
1172 	}
1173 
1174 	case DIOCADDRULE: {
1175 		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
1176 		struct pf_ruleset	*ruleset;
1177 		struct pf_rule		*rule, *tail;
1178 
1179 		PF_LOCK();
1180 		pr->anchor[sizeof(pr->anchor) - 1] = '\0';
1181 		ruleset = pf_find_ruleset(pr->anchor);
1182 		if (ruleset == NULL) {
1183 			error = EINVAL;
1184 			PF_UNLOCK();
1185 			break;
1186 		}
1187 		if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
1188 			error = EINVAL;
1189 			PF_UNLOCK();
1190 			break;
1191 		}
1192 		if (pr->ticket != ruleset->rules.inactive.ticket) {
1193 			error = EBUSY;
1194 			PF_UNLOCK();
1195 			break;
1196 		}
1197 		rule = pool_get(&pf_rule_pl, PR_WAITOK|PR_LIMITFAIL|PR_ZERO);
1198 		if (rule == NULL) {
1199 			error = ENOMEM;
1200 			PF_UNLOCK();
1201 			break;
1202 		}
1203 		if ((error = pf_rule_copyin(&pr->rule, rule, ruleset))) {
1204 			pf_rm_rule(NULL, rule);
1205 			rule = NULL;
1206 			PF_UNLOCK();
1207 			break;
1208 		}
1209 		rule->cuid = p->p_ucred->cr_ruid;
1210 		rule->cpid = p->p_p->ps_pid;
1211 
1212 		switch (rule->af) {
1213 		case 0:
1214 			break;
1215 		case AF_INET:
1216 			break;
1217 #ifdef INET6
1218 		case AF_INET6:
1219 			break;
1220 #endif /* INET6 */
1221 		default:
1222 			pf_rm_rule(NULL, rule);
1223 			rule = NULL;
1224 			error = EAFNOSUPPORT;
1225 			PF_UNLOCK();
1226 			goto fail;
1227 		}
1228 		tail = TAILQ_LAST(ruleset->rules.inactive.ptr,
1229 		    pf_rulequeue);
1230 		if (tail)
1231 			rule->nr = tail->nr + 1;
1232 		else
1233 			rule->nr = 0;
1234 
1235 		if (rule->src.addr.type == PF_ADDR_NONE ||
1236 		    rule->dst.addr.type == PF_ADDR_NONE)
1237 			error = EINVAL;
1238 
1239 		if (pf_addr_setup(ruleset, &rule->src.addr, rule->af))
1240 			error = EINVAL;
1241 		if (pf_addr_setup(ruleset, &rule->dst.addr, rule->af))
1242 			error = EINVAL;
1243 		if (pf_addr_setup(ruleset, &rule->rdr.addr, rule->af))
1244 			error = EINVAL;
1245 		if (pf_addr_setup(ruleset, &rule->nat.addr, rule->af))
1246 			error = EINVAL;
1247 		if (pf_addr_setup(ruleset, &rule->route.addr, rule->af))
1248 			error = EINVAL;
1249 		if (pf_anchor_setup(rule, ruleset, pr->anchor_call))
1250 			error = EINVAL;
1251 		if (rule->rt && !rule->direction)
1252 			error = EINVAL;
1253 		if (rule->scrub_flags & PFSTATE_SETPRIO &&
1254 		    (rule->set_prio[0] > IFQ_MAXPRIO ||
1255 		    rule->set_prio[1] > IFQ_MAXPRIO))
1256 			error = EINVAL;
1257 
1258 		if (error) {
1259 			pf_rm_rule(NULL, rule);
1260 			PF_UNLOCK();
1261 			break;
1262 		}
1263 		TAILQ_INSERT_TAIL(ruleset->rules.inactive.ptr,
1264 		    rule, entries);
1265 		rule->ruleset = ruleset;
1266 		ruleset->rules.inactive.rcount++;
1267 		PF_UNLOCK();
1268 		break;
1269 	}
1270 
1271 	case DIOCGETRULES: {
1272 		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
1273 		struct pf_ruleset	*ruleset;
1274 		struct pf_rule		*tail;
1275 
1276 		PF_LOCK();
1277 		pr->anchor[sizeof(pr->anchor) - 1] = '\0';
1278 		ruleset = pf_find_ruleset(pr->anchor);
1279 		if (ruleset == NULL) {
1280 			error = EINVAL;
1281 			PF_UNLOCK();
1282 			break;
1283 		}
1284 		tail = TAILQ_LAST(ruleset->rules.active.ptr, pf_rulequeue);
1285 		if (tail)
1286 			pr->nr = tail->nr + 1;
1287 		else
1288 			pr->nr = 0;
1289 		pr->ticket = ruleset->rules.active.ticket;
1290 		PF_UNLOCK();
1291 		break;
1292 	}
1293 
1294 	case DIOCGETRULE: {
1295 		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
1296 		struct pf_ruleset	*ruleset;
1297 		struct pf_rule		*rule;
1298 		int			 i;
1299 
1300 		PF_LOCK();
1301 		pr->anchor[sizeof(pr->anchor) - 1] = '\0';
1302 		ruleset = pf_find_ruleset(pr->anchor);
1303 		if (ruleset == NULL) {
1304 			error = EINVAL;
1305 			PF_UNLOCK();
1306 			break;
1307 		}
1308 		if (pr->ticket != ruleset->rules.active.ticket) {
1309 			error = EBUSY;
1310 			PF_UNLOCK();
1311 			break;
1312 		}
1313 		rule = TAILQ_FIRST(ruleset->rules.active.ptr);
1314 		while ((rule != NULL) && (rule->nr != pr->nr))
1315 			rule = TAILQ_NEXT(rule, entries);
1316 		if (rule == NULL) {
1317 			error = EBUSY;
1318 			PF_UNLOCK();
1319 			break;
1320 		}
1321 		memcpy(&pr->rule, rule, sizeof(struct pf_rule));
1322 		memset(&pr->rule.entries, 0, sizeof(pr->rule.entries));
1323 		pr->rule.kif = NULL;
1324 		pr->rule.nat.kif = NULL;
1325 		pr->rule.rdr.kif = NULL;
1326 		pr->rule.route.kif = NULL;
1327 		pr->rule.rcv_kif = NULL;
1328 		pr->rule.anchor = NULL;
1329 		pr->rule.overload_tbl = NULL;
1330 		pr->rule.pktrate.limit /= PF_THRESHOLD_MULT;
1331 		memset(&pr->rule.gcle, 0, sizeof(pr->rule.gcle));
1332 		pr->rule.ruleset = NULL;
1333 		if (pf_anchor_copyout(ruleset, rule, pr)) {
1334 			error = EBUSY;
1335 			PF_UNLOCK();
1336 			break;
1337 		}
1338 		pf_addr_copyout(&pr->rule.src.addr);
1339 		pf_addr_copyout(&pr->rule.dst.addr);
1340 		pf_addr_copyout(&pr->rule.rdr.addr);
1341 		pf_addr_copyout(&pr->rule.nat.addr);
1342 		pf_addr_copyout(&pr->rule.route.addr);
1343 		for (i = 0; i < PF_SKIP_COUNT; ++i)
1344 			if (rule->skip[i].ptr == NULL)
1345 				pr->rule.skip[i].nr = (u_int32_t)-1;
1346 			else
1347 				pr->rule.skip[i].nr =
1348 				    rule->skip[i].ptr->nr;
1349 
1350 		if (pr->action == PF_GET_CLR_CNTR) {
1351 			rule->evaluations = 0;
1352 			rule->packets[0] = rule->packets[1] = 0;
1353 			rule->bytes[0] = rule->bytes[1] = 0;
1354 			rule->states_tot = 0;
1355 		}
1356 		PF_UNLOCK();
1357 		break;
1358 	}
1359 
1360 	case DIOCCHANGERULE: {
1361 		struct pfioc_rule	*pcr = (struct pfioc_rule *)addr;
1362 		struct pf_ruleset	*ruleset;
1363 		struct pf_rule		*oldrule = NULL, *newrule = NULL;
1364 		u_int32_t		 nr = 0;
1365 
1366 		if (pcr->action < PF_CHANGE_ADD_HEAD ||
1367 		    pcr->action > PF_CHANGE_GET_TICKET) {
1368 			error = EINVAL;
1369 			break;
1370 		}
1371 		PF_LOCK();
1372 		ruleset = pf_find_ruleset(pcr->anchor);
1373 		if (ruleset == NULL) {
1374 			error = EINVAL;
1375 			PF_UNLOCK();
1376 			break;
1377 		}
1378 
1379 		if (pcr->action == PF_CHANGE_GET_TICKET) {
1380 			pcr->ticket = ++ruleset->rules.active.ticket;
1381 			PF_UNLOCK();
1382 			break;
1383 		} else {
1384 			if (pcr->ticket !=
1385 			    ruleset->rules.active.ticket) {
1386 				error = EINVAL;
1387 				PF_UNLOCK();
1388 				break;
1389 			}
1390 			if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
1391 				error = EINVAL;
1392 				PF_UNLOCK();
1393 				break;
1394 			}
1395 		}
1396 
1397 		if (pcr->action != PF_CHANGE_REMOVE) {
1398 			newrule = pool_get(&pf_rule_pl,
1399 			    PR_WAITOK|PR_LIMITFAIL|PR_ZERO);
1400 			if (newrule == NULL) {
1401 				error = ENOMEM;
1402 				PF_UNLOCK();
1403 				break;
1404 			}
1405 			pf_rule_copyin(&pcr->rule, newrule, ruleset);
1406 			newrule->cuid = p->p_ucred->cr_ruid;
1407 			newrule->cpid = p->p_p->ps_pid;
1408 
1409 			switch (newrule->af) {
1410 			case 0:
1411 				break;
1412 			case AF_INET:
1413 				break;
1414 #ifdef INET6
1415 			case AF_INET6:
1416 				break;
1417 #endif /* INET6 */
1418 			default:
1419 				pf_rm_rule(NULL, newrule);
1420 				error = EAFNOSUPPORT;
1421 				PF_UNLOCK();
1422 				goto fail;
1423 			}
1424 
1425 			if (newrule->rt && !newrule->direction)
1426 				error = EINVAL;
1427 			if (pf_addr_setup(ruleset, &newrule->src.addr, newrule->af))
1428 				error = EINVAL;
1429 			if (pf_addr_setup(ruleset, &newrule->dst.addr, newrule->af))
1430 				error = EINVAL;
1431 			if (pf_addr_setup(ruleset, &newrule->rdr.addr, newrule->af))
1432 				error = EINVAL;
1433 			if (pf_addr_setup(ruleset, &newrule->nat.addr, newrule->af))
1434 				error = EINVAL;
1435 			if (pf_addr_setup(ruleset, &newrule->route.addr, newrule->af))
1436 				error = EINVAL;
1437 			if (pf_anchor_setup(newrule, ruleset, pcr->anchor_call))
1438 				error = EINVAL;
1439 
1440 			if (error) {
1441 				pf_rm_rule(NULL, newrule);
1442 				PF_UNLOCK();
1443 				break;
1444 			}
1445 		}
1446 
1447 		if (pcr->action == PF_CHANGE_ADD_HEAD)
1448 			oldrule = TAILQ_FIRST(ruleset->rules.active.ptr);
1449 		else if (pcr->action == PF_CHANGE_ADD_TAIL)
1450 			oldrule = TAILQ_LAST(ruleset->rules.active.ptr,
1451 			    pf_rulequeue);
1452 		else {
1453 			oldrule = TAILQ_FIRST(ruleset->rules.active.ptr);
1454 			while ((oldrule != NULL) && (oldrule->nr != pcr->nr))
1455 				oldrule = TAILQ_NEXT(oldrule, entries);
1456 			if (oldrule == NULL) {
1457 				if (newrule != NULL)
1458 					pf_rm_rule(NULL, newrule);
1459 				error = EINVAL;
1460 				PF_UNLOCK();
1461 				break;
1462 			}
1463 		}
1464 
1465 		if (pcr->action == PF_CHANGE_REMOVE) {
1466 			pf_rm_rule(ruleset->rules.active.ptr, oldrule);
1467 			ruleset->rules.active.rcount--;
1468 		} else {
1469 			if (oldrule == NULL)
1470 				TAILQ_INSERT_TAIL(
1471 				    ruleset->rules.active.ptr,
1472 				    newrule, entries);
1473 			else if (pcr->action == PF_CHANGE_ADD_HEAD ||
1474 			    pcr->action == PF_CHANGE_ADD_BEFORE)
1475 				TAILQ_INSERT_BEFORE(oldrule, newrule, entries);
1476 			else
1477 				TAILQ_INSERT_AFTER(
1478 				    ruleset->rules.active.ptr,
1479 				    oldrule, newrule, entries);
1480 			ruleset->rules.active.rcount++;
1481 		}
1482 
1483 		nr = 0;
1484 		TAILQ_FOREACH(oldrule, ruleset->rules.active.ptr, entries)
1485 			oldrule->nr = nr++;
1486 
1487 		ruleset->rules.active.ticket++;
1488 
1489 		pf_calc_skip_steps(ruleset->rules.active.ptr);
1490 		pf_remove_if_empty_ruleset(ruleset);
1491 
1492 		PF_UNLOCK();
1493 		break;
1494 	}
1495 
1496 	case DIOCCLRSTATES: {
1497 		struct pf_state		*s, *nexts;
1498 		struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr;
1499 		u_int			 killed = 0;
1500 
1501 		PF_LOCK();
1502 		PF_STATE_ENTER_WRITE();
1503 		for (s = RB_MIN(pf_state_tree_id, &tree_id); s; s = nexts) {
1504 			nexts = RB_NEXT(pf_state_tree_id, &tree_id, s);
1505 
1506 			if (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname,
1507 			    s->kif->pfik_name)) {
1508 #if NPFSYNC > 0
1509 				/* don't send out individual delete messages */
1510 				SET(s->state_flags, PFSTATE_NOSYNC);
1511 #endif	/* NPFSYNC > 0 */
1512 				pf_remove_state(s);
1513 				killed++;
1514 			}
1515 		}
1516 		PF_STATE_EXIT_WRITE();
1517 		psk->psk_killed = killed;
1518 #if NPFSYNC > 0
1519 		pfsync_clear_states(pf_status.hostid, psk->psk_ifname);
1520 #endif	/* NPFSYNC > 0 */
1521 		PF_UNLOCK();
1522 		break;
1523 	}
1524 
1525 	case DIOCKILLSTATES: {
1526 		struct pf_state		*s, *nexts;
1527 		struct pf_state_item	*si, *sit;
1528 		struct pf_state_key	*sk, key;
1529 		struct pf_addr		*srcaddr, *dstaddr;
1530 		u_int16_t		 srcport, dstport;
1531 		struct pfioc_state_kill	*psk = (struct pfioc_state_kill *)addr;
1532 		u_int			 i, killed = 0;
1533 		const int 		 dirs[] = { PF_IN, PF_OUT };
1534 		int			 sidx, didx;
1535 
1536 		if (psk->psk_pfcmp.id) {
1537 			if (psk->psk_pfcmp.creatorid == 0)
1538 				psk->psk_pfcmp.creatorid = pf_status.hostid;
1539 			PF_LOCK();
1540 			PF_STATE_ENTER_WRITE();
1541 			if ((s = pf_find_state_byid(&psk->psk_pfcmp))) {
1542 				pf_remove_state(s);
1543 				psk->psk_killed = 1;
1544 			}
1545 			PF_STATE_EXIT_WRITE();
1546 			PF_UNLOCK();
1547 			break;
1548 		}
1549 
1550 		if (psk->psk_af && psk->psk_proto &&
1551 		    psk->psk_src.port_op == PF_OP_EQ &&
1552 		    psk->psk_dst.port_op == PF_OP_EQ) {
1553 
1554 			key.af = psk->psk_af;
1555 			key.proto = psk->psk_proto;
1556 			key.rdomain = psk->psk_rdomain;
1557 
1558 			PF_LOCK();
1559 			PF_STATE_ENTER_WRITE();
1560 			for (i = 0; i < nitems(dirs); i++) {
1561 				if (dirs[i] == PF_IN) {
1562 					sidx = 0;
1563 					didx = 1;
1564 				} else {
1565 					sidx = 1;
1566 					didx = 0;
1567 				}
1568 				pf_addrcpy(&key.addr[sidx],
1569 				    &psk->psk_src.addr.v.a.addr, key.af);
1570 				pf_addrcpy(&key.addr[didx],
1571 				    &psk->psk_dst.addr.v.a.addr, key.af);
1572 				key.port[sidx] = psk->psk_src.port[0];
1573 				key.port[didx] = psk->psk_dst.port[0];
1574 
1575 				sk = RB_FIND(pf_state_tree, &pf_statetbl, &key);
1576 				if (sk == NULL)
1577 					continue;
1578 
1579 				TAILQ_FOREACH_SAFE(si, &sk->states, entry, sit)
1580 					if (((si->s->key[PF_SK_WIRE]->af ==
1581 					    si->s->key[PF_SK_STACK]->af &&
1582 					    sk == (dirs[i] == PF_IN ?
1583 					    si->s->key[PF_SK_WIRE] :
1584 					    si->s->key[PF_SK_STACK])) ||
1585 					    (si->s->key[PF_SK_WIRE]->af !=
1586 					    si->s->key[PF_SK_STACK]->af &&
1587 					    dirs[i] == PF_IN &&
1588 					    (sk == si->s->key[PF_SK_STACK] ||
1589 					    sk == si->s->key[PF_SK_WIRE]))) &&
1590 					    (!psk->psk_ifname[0] ||
1591 					    (si->s->kif != pfi_all &&
1592 					    !strcmp(psk->psk_ifname,
1593 					    si->s->kif->pfik_name)))) {
1594 						pf_remove_state(si->s);
1595 						killed++;
1596 					}
1597 			}
1598 			if (killed)
1599 				psk->psk_killed = killed;
1600 			PF_STATE_EXIT_WRITE();
1601 			PF_UNLOCK();
1602 			break;
1603 		}
1604 
1605 		PF_LOCK();
1606 		PF_STATE_ENTER_WRITE();
1607 		for (s = RB_MIN(pf_state_tree_id, &tree_id); s;
1608 		    s = nexts) {
1609 			nexts = RB_NEXT(pf_state_tree_id, &tree_id, s);
1610 
1611 			if (s->direction == PF_OUT) {
1612 				sk = s->key[PF_SK_STACK];
1613 				srcaddr = &sk->addr[1];
1614 				dstaddr = &sk->addr[0];
1615 				srcport = sk->port[1];
1616 				dstport = sk->port[0];
1617 			} else {
1618 				sk = s->key[PF_SK_WIRE];
1619 				srcaddr = &sk->addr[0];
1620 				dstaddr = &sk->addr[1];
1621 				srcport = sk->port[0];
1622 				dstport = sk->port[1];
1623 			}
1624 			if ((!psk->psk_af || sk->af == psk->psk_af)
1625 			    && (!psk->psk_proto || psk->psk_proto ==
1626 			    sk->proto) && psk->psk_rdomain == sk->rdomain &&
1627 			    pf_match_addr(psk->psk_src.neg,
1628 			    &psk->psk_src.addr.v.a.addr,
1629 			    &psk->psk_src.addr.v.a.mask,
1630 			    srcaddr, sk->af) &&
1631 			    pf_match_addr(psk->psk_dst.neg,
1632 			    &psk->psk_dst.addr.v.a.addr,
1633 			    &psk->psk_dst.addr.v.a.mask,
1634 			    dstaddr, sk->af) &&
1635 			    (psk->psk_src.port_op == 0 ||
1636 			    pf_match_port(psk->psk_src.port_op,
1637 			    psk->psk_src.port[0], psk->psk_src.port[1],
1638 			    srcport)) &&
1639 			    (psk->psk_dst.port_op == 0 ||
1640 			    pf_match_port(psk->psk_dst.port_op,
1641 			    psk->psk_dst.port[0], psk->psk_dst.port[1],
1642 			    dstport)) &&
1643 			    (!psk->psk_label[0] || (s->rule.ptr->label[0] &&
1644 			    !strcmp(psk->psk_label, s->rule.ptr->label))) &&
1645 			    (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname,
1646 			    s->kif->pfik_name))) {
1647 				pf_remove_state(s);
1648 				killed++;
1649 			}
1650 		}
1651 		psk->psk_killed = killed;
1652 		PF_STATE_EXIT_WRITE();
1653 		PF_UNLOCK();
1654 		break;
1655 	}
1656 
1657 #if NPFSYNC > 0
1658 	case DIOCADDSTATE: {
1659 		struct pfioc_state	*ps = (struct pfioc_state *)addr;
1660 		struct pfsync_state	*sp = &ps->state;
1661 
1662 		if (sp->timeout >= PFTM_MAX) {
1663 			error = EINVAL;
1664 			break;
1665 		}
1666 		PF_LOCK();
1667 		PF_STATE_ENTER_WRITE();
1668 		error = pfsync_state_import(sp, PFSYNC_SI_IOCTL);
1669 		PF_STATE_EXIT_WRITE();
1670 		PF_UNLOCK();
1671 		break;
1672 	}
1673 #endif	/* NPFSYNC > 0 */
1674 
1675 	case DIOCGETSTATE: {
1676 		struct pfioc_state	*ps = (struct pfioc_state *)addr;
1677 		struct pf_state		*s;
1678 		struct pf_state_cmp	 id_key;
1679 
1680 		memset(&id_key, 0, sizeof(id_key));
1681 		id_key.id = ps->state.id;
1682 		id_key.creatorid = ps->state.creatorid;
1683 
1684 		PF_STATE_ENTER_READ();
1685 		s = pf_find_state_byid(&id_key);
1686 		s = pf_state_ref(s);
1687 		PF_STATE_EXIT_READ();
1688 		if (s == NULL) {
1689 			error = ENOENT;
1690 			break;
1691 		}
1692 
1693 		pf_state_export(&ps->state, s);
1694 		pf_state_unref(s);
1695 		break;
1696 	}
1697 
1698 	case DIOCGETSTATES: {
1699 		struct pfioc_states	*ps = (struct pfioc_states *)addr;
1700 		struct pf_state		*state;
1701 		struct pfsync_state	*p, *pstore;
1702 		u_int32_t		 nr = 0;
1703 
1704 		if (ps->ps_len == 0) {
1705 			nr = pf_status.states;
1706 			ps->ps_len = sizeof(struct pfsync_state) * nr;
1707 			break;
1708 		}
1709 
1710 		pstore = malloc(sizeof(*pstore), M_TEMP, M_WAITOK);
1711 
1712 		p = ps->ps_states;
1713 
1714 		PF_STATE_ENTER_READ();
1715 		state = TAILQ_FIRST(&state_list);
1716 		while (state) {
1717 			if (state->timeout != PFTM_UNLINKED) {
1718 				if ((nr+1) * sizeof(*p) > ps->ps_len)
1719 					break;
1720 				pf_state_export(pstore, state);
1721 				error = copyout(pstore, p, sizeof(*p));
1722 				if (error) {
1723 					free(pstore, M_TEMP, sizeof(*pstore));
1724 					PF_STATE_EXIT_READ();
1725 					goto fail;
1726 				}
1727 				p++;
1728 				nr++;
1729 			}
1730 			state = TAILQ_NEXT(state, entry_list);
1731 		}
1732 		PF_STATE_EXIT_READ();
1733 
1734 		ps->ps_len = sizeof(struct pfsync_state) * nr;
1735 
1736 		free(pstore, M_TEMP, sizeof(*pstore));
1737 		break;
1738 	}
1739 
1740 	case DIOCGETSTATUS: {
1741 		struct pf_status *s = (struct pf_status *)addr;
1742 		PF_LOCK();
1743 		memcpy(s, &pf_status, sizeof(struct pf_status));
1744 		pfi_update_status(s->ifname, s);
1745 		PF_UNLOCK();
1746 		break;
1747 	}
1748 
1749 	case DIOCSETSTATUSIF: {
1750 		struct pfioc_iface	*pi = (struct pfioc_iface *)addr;
1751 
1752 		PF_LOCK();
1753 		if (pi->pfiio_name[0] == 0) {
1754 			memset(pf_status.ifname, 0, IFNAMSIZ);
1755 			PF_UNLOCK();
1756 			break;
1757 		}
1758 		strlcpy(pf_trans_set.statusif, pi->pfiio_name, IFNAMSIZ);
1759 		pf_trans_set.mask |= PF_TSET_STATUSIF;
1760 		PF_UNLOCK();
1761 		break;
1762 	}
1763 
1764 	case DIOCCLRSTATUS: {
1765 		struct pfioc_iface	*pi = (struct pfioc_iface *)addr;
1766 
1767 		PF_LOCK();
1768 		/* if ifname is specified, clear counters there only */
1769 		if (pi->pfiio_name[0]) {
1770 			pfi_update_status(pi->pfiio_name, NULL);
1771 			PF_UNLOCK();
1772 			break;
1773 		}
1774 
1775 		memset(pf_status.counters, 0, sizeof(pf_status.counters));
1776 		memset(pf_status.fcounters, 0, sizeof(pf_status.fcounters));
1777 		memset(pf_status.scounters, 0, sizeof(pf_status.scounters));
1778 		pf_status.since = getuptime();
1779 
1780 		PF_UNLOCK();
1781 		break;
1782 	}
1783 
1784 	case DIOCNATLOOK: {
1785 		struct pfioc_natlook	*pnl = (struct pfioc_natlook *)addr;
1786 		struct pf_state_key	*sk;
1787 		struct pf_state		*state;
1788 		struct pf_state_key_cmp	 key;
1789 		int			 m = 0, direction = pnl->direction;
1790 		int			 sidx, didx;
1791 
1792 		switch (pnl->af) {
1793 		case AF_INET:
1794 			break;
1795 #ifdef INET6
1796 		case AF_INET6:
1797 			break;
1798 #endif /* INET6 */
1799 		default:
1800 			error = EAFNOSUPPORT;
1801 			goto fail;
1802 		}
1803 
1804 		/* NATLOOK src and dst are reversed, so reverse sidx/didx */
1805 		sidx = (direction == PF_IN) ? 1 : 0;
1806 		didx = (direction == PF_IN) ? 0 : 1;
1807 
1808 		if (!pnl->proto ||
1809 		    PF_AZERO(&pnl->saddr, pnl->af) ||
1810 		    PF_AZERO(&pnl->daddr, pnl->af) ||
1811 		    ((pnl->proto == IPPROTO_TCP ||
1812 		    pnl->proto == IPPROTO_UDP) &&
1813 		    (!pnl->dport || !pnl->sport)) ||
1814 		    pnl->rdomain > RT_TABLEID_MAX)
1815 			error = EINVAL;
1816 		else {
1817 			key.af = pnl->af;
1818 			key.proto = pnl->proto;
1819 			key.rdomain = pnl->rdomain;
1820 			pf_addrcpy(&key.addr[sidx], &pnl->saddr, pnl->af);
1821 			key.port[sidx] = pnl->sport;
1822 			pf_addrcpy(&key.addr[didx], &pnl->daddr, pnl->af);
1823 			key.port[didx] = pnl->dport;
1824 
1825 			PF_STATE_ENTER_READ();
1826 			state = pf_find_state_all(&key, direction, &m);
1827 			state = pf_state_ref(state);
1828 			PF_STATE_EXIT_READ();
1829 
1830 			if (m > 1)
1831 				error = E2BIG;	/* more than one state */
1832 			else if (state != NULL) {
1833 				sk = state->key[sidx];
1834 				pf_addrcpy(&pnl->rsaddr, &sk->addr[sidx],
1835 				    sk->af);
1836 				pnl->rsport = sk->port[sidx];
1837 				pf_addrcpy(&pnl->rdaddr, &sk->addr[didx],
1838 				    sk->af);
1839 				pnl->rdport = sk->port[didx];
1840 				pnl->rrdomain = sk->rdomain;
1841 			} else
1842 				error = ENOENT;
1843 			pf_state_unref(state);
1844 		}
1845 		break;
1846 	}
1847 
1848 	case DIOCSETTIMEOUT: {
1849 		struct pfioc_tm	*pt = (struct pfioc_tm *)addr;
1850 
1851 		if (pt->timeout < 0 || pt->timeout >= PFTM_MAX ||
1852 		    pt->seconds < 0) {
1853 			error = EINVAL;
1854 			goto fail;
1855 		}
1856 		PF_LOCK();
1857 		if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0)
1858 			pt->seconds = 1;
1859 		pf_default_rule_new.timeout[pt->timeout] = pt->seconds;
1860 		pt->seconds = pf_default_rule.timeout[pt->timeout];
1861 		PF_UNLOCK();
1862 		break;
1863 	}
1864 
1865 	case DIOCGETTIMEOUT: {
1866 		struct pfioc_tm	*pt = (struct pfioc_tm *)addr;
1867 
1868 		if (pt->timeout < 0 || pt->timeout >= PFTM_MAX) {
1869 			error = EINVAL;
1870 			goto fail;
1871 		}
1872 		PF_LOCK();
1873 		pt->seconds = pf_default_rule.timeout[pt->timeout];
1874 		PF_UNLOCK();
1875 		break;
1876 	}
1877 
1878 	case DIOCGETLIMIT: {
1879 		struct pfioc_limit	*pl = (struct pfioc_limit *)addr;
1880 
1881 		if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) {
1882 			error = EINVAL;
1883 			goto fail;
1884 		}
1885 		PF_LOCK();
1886 		pl->limit = pf_pool_limits[pl->index].limit;
1887 		PF_UNLOCK();
1888 		break;
1889 	}
1890 
1891 	case DIOCSETLIMIT: {
1892 		struct pfioc_limit	*pl = (struct pfioc_limit *)addr;
1893 
1894 		PF_LOCK();
1895 		if (pl->index < 0 || pl->index >= PF_LIMIT_MAX ||
1896 		    pf_pool_limits[pl->index].pp == NULL) {
1897 			error = EINVAL;
1898 			PF_UNLOCK();
1899 			goto fail;
1900 		}
1901 		if (((struct pool *)pf_pool_limits[pl->index].pp)->pr_nout >
1902 		    pl->limit) {
1903 			error = EBUSY;
1904 			PF_UNLOCK();
1905 			goto fail;
1906 		}
1907 		/* Fragments reference mbuf clusters. */
1908 		if (pl->index == PF_LIMIT_FRAGS && pl->limit > nmbclust) {
1909 			error = EINVAL;
1910 			PF_UNLOCK();
1911 			goto fail;
1912 		}
1913 
1914 		pf_pool_limits[pl->index].limit_new = pl->limit;
1915 		pl->limit = pf_pool_limits[pl->index].limit;
1916 		PF_UNLOCK();
1917 		break;
1918 	}
1919 
1920 	case DIOCSETDEBUG: {
1921 		u_int32_t	*level = (u_int32_t *)addr;
1922 
1923 		PF_LOCK();
1924 		pf_trans_set.debug = *level;
1925 		pf_trans_set.mask |= PF_TSET_DEBUG;
1926 		PF_UNLOCK();
1927 		break;
1928 	}
1929 
1930 	case DIOCGETRULESETS: {
1931 		struct pfioc_ruleset	*pr = (struct pfioc_ruleset *)addr;
1932 		struct pf_ruleset	*ruleset;
1933 		struct pf_anchor	*anchor;
1934 
1935 		PF_LOCK();
1936 		pr->path[sizeof(pr->path) - 1] = '\0';
1937 		if ((ruleset = pf_find_ruleset(pr->path)) == NULL) {
1938 			error = EINVAL;
1939 			PF_UNLOCK();
1940 			break;
1941 		}
1942 		pr->nr = 0;
1943 		if (ruleset == &pf_main_ruleset) {
1944 			/* XXX kludge for pf_main_ruleset */
1945 			RB_FOREACH(anchor, pf_anchor_global, &pf_anchors)
1946 				if (anchor->parent == NULL)
1947 					pr->nr++;
1948 		} else {
1949 			RB_FOREACH(anchor, pf_anchor_node,
1950 			    &ruleset->anchor->children)
1951 				pr->nr++;
1952 		}
1953 		PF_UNLOCK();
1954 		break;
1955 	}
1956 
1957 	case DIOCGETRULESET: {
1958 		struct pfioc_ruleset	*pr = (struct pfioc_ruleset *)addr;
1959 		struct pf_ruleset	*ruleset;
1960 		struct pf_anchor	*anchor;
1961 		u_int32_t		 nr = 0;
1962 
1963 		PF_LOCK();
1964 		pr->path[sizeof(pr->path) - 1] = '\0';
1965 		if ((ruleset = pf_find_ruleset(pr->path)) == NULL) {
1966 			error = EINVAL;
1967 			PF_UNLOCK();
1968 			break;
1969 		}
1970 		pr->name[0] = '\0';
1971 		if (ruleset == &pf_main_ruleset) {
1972 			/* XXX kludge for pf_main_ruleset */
1973 			RB_FOREACH(anchor, pf_anchor_global, &pf_anchors)
1974 				if (anchor->parent == NULL && nr++ == pr->nr) {
1975 					strlcpy(pr->name, anchor->name,
1976 					    sizeof(pr->name));
1977 					break;
1978 				}
1979 		} else {
1980 			RB_FOREACH(anchor, pf_anchor_node,
1981 			    &ruleset->anchor->children)
1982 				if (nr++ == pr->nr) {
1983 					strlcpy(pr->name, anchor->name,
1984 					    sizeof(pr->name));
1985 					break;
1986 				}
1987 		}
1988 		PF_UNLOCK();
1989 		if (!pr->name[0])
1990 			error = EBUSY;
1991 		break;
1992 	}
1993 
1994 	case DIOCRCLRTABLES: {
1995 		struct pfioc_table *io = (struct pfioc_table *)addr;
1996 
1997 		if (io->pfrio_esize != 0) {
1998 			error = ENODEV;
1999 			break;
2000 		}
2001 		PF_LOCK();
2002 		error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel,
2003 		    io->pfrio_flags | PFR_FLAG_USERIOCTL);
2004 		PF_UNLOCK();
2005 		break;
2006 	}
2007 
2008 	case DIOCRADDTABLES: {
2009 		struct pfioc_table *io = (struct pfioc_table *)addr;
2010 
2011 		if (io->pfrio_esize != sizeof(struct pfr_table)) {
2012 			error = ENODEV;
2013 			break;
2014 		}
2015 		PF_LOCK();
2016 		error = pfr_add_tables(io->pfrio_buffer, io->pfrio_size,
2017 		    &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL);
2018 		PF_UNLOCK();
2019 		break;
2020 	}
2021 
2022 	case DIOCRDELTABLES: {
2023 		struct pfioc_table *io = (struct pfioc_table *)addr;
2024 
2025 		if (io->pfrio_esize != sizeof(struct pfr_table)) {
2026 			error = ENODEV;
2027 			break;
2028 		}
2029 		PF_LOCK();
2030 		error = pfr_del_tables(io->pfrio_buffer, io->pfrio_size,
2031 		    &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
2032 		PF_UNLOCK();
2033 		break;
2034 	}
2035 
2036 	case DIOCRGETTABLES: {
2037 		struct pfioc_table *io = (struct pfioc_table *)addr;
2038 
2039 		if (io->pfrio_esize != sizeof(struct pfr_table)) {
2040 			error = ENODEV;
2041 			break;
2042 		}
2043 		PF_LOCK();
2044 		error = pfr_get_tables(&io->pfrio_table, io->pfrio_buffer,
2045 		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
2046 		PF_UNLOCK();
2047 		break;
2048 	}
2049 
2050 	case DIOCRGETTSTATS: {
2051 		struct pfioc_table *io = (struct pfioc_table *)addr;
2052 
2053 		if (io->pfrio_esize != sizeof(struct pfr_tstats)) {
2054 			error = ENODEV;
2055 			break;
2056 		}
2057 		PF_LOCK();
2058 		error = pfr_get_tstats(&io->pfrio_table, io->pfrio_buffer,
2059 		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
2060 		PF_UNLOCK();
2061 		break;
2062 	}
2063 
2064 	case DIOCRCLRTSTATS: {
2065 		struct pfioc_table *io = (struct pfioc_table *)addr;
2066 
2067 		if (io->pfrio_esize != sizeof(struct pfr_table)) {
2068 			error = ENODEV;
2069 			break;
2070 		}
2071 		PF_LOCK();
2072 		error = pfr_clr_tstats(io->pfrio_buffer, io->pfrio_size,
2073 		    &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL);
2074 		PF_UNLOCK();
2075 		break;
2076 	}
2077 
2078 	case DIOCRSETTFLAGS: {
2079 		struct pfioc_table *io = (struct pfioc_table *)addr;
2080 
2081 		if (io->pfrio_esize != sizeof(struct pfr_table)) {
2082 			error = ENODEV;
2083 			break;
2084 		}
2085 		PF_LOCK();
2086 		error = pfr_set_tflags(io->pfrio_buffer, io->pfrio_size,
2087 		    io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange,
2088 		    &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
2089 		PF_UNLOCK();
2090 		break;
2091 	}
2092 
2093 	case DIOCRCLRADDRS: {
2094 		struct pfioc_table *io = (struct pfioc_table *)addr;
2095 
2096 		if (io->pfrio_esize != 0) {
2097 			error = ENODEV;
2098 			break;
2099 		}
2100 		PF_LOCK();
2101 		error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel,
2102 		    io->pfrio_flags | PFR_FLAG_USERIOCTL);
2103 		PF_UNLOCK();
2104 		break;
2105 	}
2106 
2107 	case DIOCRADDADDRS: {
2108 		struct pfioc_table *io = (struct pfioc_table *)addr;
2109 
2110 		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
2111 			error = ENODEV;
2112 			break;
2113 		}
2114 		PF_LOCK();
2115 		error = pfr_add_addrs(&io->pfrio_table, io->pfrio_buffer,
2116 		    io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags |
2117 		    PFR_FLAG_USERIOCTL);
2118 		PF_UNLOCK();
2119 		break;
2120 	}
2121 
2122 	case DIOCRDELADDRS: {
2123 		struct pfioc_table *io = (struct pfioc_table *)addr;
2124 
2125 		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
2126 			error = ENODEV;
2127 			break;
2128 		}
2129 		PF_LOCK();
2130 		error = pfr_del_addrs(&io->pfrio_table, io->pfrio_buffer,
2131 		    io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags |
2132 		    PFR_FLAG_USERIOCTL);
2133 		PF_UNLOCK();
2134 		break;
2135 	}
2136 
2137 	case DIOCRSETADDRS: {
2138 		struct pfioc_table *io = (struct pfioc_table *)addr;
2139 
2140 		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
2141 			error = ENODEV;
2142 			break;
2143 		}
2144 		PF_LOCK();
2145 		error = pfr_set_addrs(&io->pfrio_table, io->pfrio_buffer,
2146 		    io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd,
2147 		    &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags |
2148 		    PFR_FLAG_USERIOCTL, 0);
2149 		PF_UNLOCK();
2150 		break;
2151 	}
2152 
2153 	case DIOCRGETADDRS: {
2154 		struct pfioc_table *io = (struct pfioc_table *)addr;
2155 
2156 		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
2157 			error = ENODEV;
2158 			break;
2159 		}
2160 		PF_LOCK();
2161 		error = pfr_get_addrs(&io->pfrio_table, io->pfrio_buffer,
2162 		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
2163 		PF_UNLOCK();
2164 		break;
2165 	}
2166 
2167 	case DIOCRGETASTATS: {
2168 		struct pfioc_table *io = (struct pfioc_table *)addr;
2169 
2170 		if (io->pfrio_esize != sizeof(struct pfr_astats)) {
2171 			error = ENODEV;
2172 			break;
2173 		}
2174 		PF_LOCK();
2175 		error = pfr_get_astats(&io->pfrio_table, io->pfrio_buffer,
2176 		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
2177 		PF_UNLOCK();
2178 		break;
2179 	}
2180 
2181 	case DIOCRCLRASTATS: {
2182 		struct pfioc_table *io = (struct pfioc_table *)addr;
2183 
2184 		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
2185 			error = ENODEV;
2186 			break;
2187 		}
2188 		PF_LOCK();
2189 		error = pfr_clr_astats(&io->pfrio_table, io->pfrio_buffer,
2190 		    io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags |
2191 		    PFR_FLAG_USERIOCTL);
2192 		PF_UNLOCK();
2193 		break;
2194 	}
2195 
2196 	case DIOCRTSTADDRS: {
2197 		struct pfioc_table *io = (struct pfioc_table *)addr;
2198 
2199 		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
2200 			error = ENODEV;
2201 			break;
2202 		}
2203 		PF_LOCK();
2204 		error = pfr_tst_addrs(&io->pfrio_table, io->pfrio_buffer,
2205 		    io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags |
2206 		    PFR_FLAG_USERIOCTL);
2207 		PF_UNLOCK();
2208 		break;
2209 	}
2210 
2211 	case DIOCRINADEFINE: {
2212 		struct pfioc_table *io = (struct pfioc_table *)addr;
2213 
2214 		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
2215 			error = ENODEV;
2216 			break;
2217 		}
2218 		PF_LOCK();
2219 		error = pfr_ina_define(&io->pfrio_table, io->pfrio_buffer,
2220 		    io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr,
2221 		    io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL);
2222 		PF_UNLOCK();
2223 		break;
2224 	}
2225 
2226 	case DIOCOSFPADD: {
2227 		struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
2228 		PF_LOCK();
2229 		error = pf_osfp_add(io);
2230 		PF_UNLOCK();
2231 		break;
2232 	}
2233 
2234 	case DIOCOSFPGET: {
2235 		struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
2236 		PF_LOCK();
2237 		error = pf_osfp_get(io);
2238 		PF_UNLOCK();
2239 		break;
2240 	}
2241 
2242 	case DIOCXBEGIN: {
2243 		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
2244 		struct pfioc_trans_e	*ioe;
2245 		struct pfr_table	*table;
2246 		int			 i;
2247 
2248 		if (io->esize != sizeof(*ioe)) {
2249 			error = ENODEV;
2250 			goto fail;
2251 		}
2252 		PF_LOCK();
2253 		ioe = malloc(sizeof(*ioe), M_TEMP, M_WAITOK);
2254 		table = malloc(sizeof(*table), M_TEMP, M_WAITOK);
2255 		pf_default_rule_new = pf_default_rule;
2256 		memset(&pf_trans_set, 0, sizeof(pf_trans_set));
2257 		for (i = 0; i < io->size; i++) {
2258 			if (copyin(io->array+i, ioe, sizeof(*ioe))) {
2259 				free(table, M_TEMP, sizeof(*table));
2260 				free(ioe, M_TEMP, sizeof(*ioe));
2261 				error = EFAULT;
2262 				PF_UNLOCK();
2263 				goto fail;
2264 			}
2265 			if (strnlen(ioe->anchor, sizeof(ioe->anchor)) ==
2266 			    sizeof(ioe->anchor)) {
2267 				free(table, M_TEMP, sizeof(*table));
2268 				free(ioe, M_TEMP, sizeof(*ioe));
2269 				error = ENAMETOOLONG;
2270 				PF_UNLOCK();
2271 				goto fail;
2272 			}
2273 			switch (ioe->type) {
2274 			case PF_TRANS_TABLE:
2275 				memset(table, 0, sizeof(*table));
2276 				strlcpy(table->pfrt_anchor, ioe->anchor,
2277 				    sizeof(table->pfrt_anchor));
2278 				if ((error = pfr_ina_begin(table,
2279 				    &ioe->ticket, NULL, 0))) {
2280 					free(table, M_TEMP, sizeof(*table));
2281 					free(ioe, M_TEMP, sizeof(*ioe));
2282 					PF_UNLOCK();
2283 					goto fail;
2284 				}
2285 				break;
2286 			case PF_TRANS_RULESET:
2287 				if ((error = pf_begin_rules(&ioe->ticket,
2288 				    ioe->anchor))) {
2289 					free(table, M_TEMP, sizeof(*table));
2290 					free(ioe, M_TEMP, sizeof(*ioe));
2291 					PF_UNLOCK();
2292 					goto fail;
2293 				}
2294 				break;
2295 			default:
2296 				free(table, M_TEMP, sizeof(*table));
2297 				free(ioe, M_TEMP, sizeof(*ioe));
2298 				error = EINVAL;
2299 				PF_UNLOCK();
2300 				goto fail;
2301 			}
2302 			if (copyout(ioe, io->array+i, sizeof(io->array[i]))) {
2303 				free(table, M_TEMP, sizeof(*table));
2304 				free(ioe, M_TEMP, sizeof(*ioe));
2305 				error = EFAULT;
2306 				PF_UNLOCK();
2307 				goto fail;
2308 			}
2309 		}
2310 		free(table, M_TEMP, sizeof(*table));
2311 		free(ioe, M_TEMP, sizeof(*ioe));
2312 		PF_UNLOCK();
2313 		break;
2314 	}
2315 
2316 	case DIOCXROLLBACK: {
2317 		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
2318 		struct pfioc_trans_e	*ioe;
2319 		struct pfr_table	*table;
2320 		int			 i;
2321 
2322 		if (io->esize != sizeof(*ioe)) {
2323 			error = ENODEV;
2324 			goto fail;
2325 		}
2326 		PF_LOCK();
2327 		ioe = malloc(sizeof(*ioe), M_TEMP, M_WAITOK);
2328 		table = malloc(sizeof(*table), M_TEMP, M_WAITOK);
2329 		for (i = 0; i < io->size; i++) {
2330 			if (copyin(io->array+i, ioe, sizeof(*ioe))) {
2331 				free(table, M_TEMP, sizeof(*table));
2332 				free(ioe, M_TEMP, sizeof(*ioe));
2333 				error = EFAULT;
2334 				PF_UNLOCK();
2335 				goto fail;
2336 			}
2337 			if (strnlen(ioe->anchor, sizeof(ioe->anchor)) ==
2338 			    sizeof(ioe->anchor)) {
2339 				free(table, M_TEMP, sizeof(*table));
2340 				free(ioe, M_TEMP, sizeof(*ioe));
2341 				error = ENAMETOOLONG;
2342 				PF_UNLOCK();
2343 				goto fail;
2344 			}
2345 			switch (ioe->type) {
2346 			case PF_TRANS_TABLE:
2347 				memset(table, 0, sizeof(*table));
2348 				strlcpy(table->pfrt_anchor, ioe->anchor,
2349 				    sizeof(table->pfrt_anchor));
2350 				if ((error = pfr_ina_rollback(table,
2351 				    ioe->ticket, NULL, 0))) {
2352 					free(table, M_TEMP, sizeof(*table));
2353 					free(ioe, M_TEMP, sizeof(*ioe));
2354 					PF_UNLOCK();
2355 					goto fail; /* really bad */
2356 				}
2357 				break;
2358 			case PF_TRANS_RULESET:
2359 				if ((error = pf_rollback_rules(ioe->ticket,
2360 				    ioe->anchor))) {
2361 					free(table, M_TEMP, sizeof(*table));
2362 					free(ioe, M_TEMP, sizeof(*ioe));
2363 					PF_UNLOCK();
2364 					goto fail; /* really bad */
2365 				}
2366 				break;
2367 			default:
2368 				free(table, M_TEMP, sizeof(*table));
2369 				free(ioe, M_TEMP, sizeof(*ioe));
2370 				error = EINVAL;
2371 				PF_UNLOCK();
2372 				goto fail; /* really bad */
2373 			}
2374 		}
2375 		free(table, M_TEMP, sizeof(*table));
2376 		free(ioe, M_TEMP, sizeof(*ioe));
2377 		PF_UNLOCK();
2378 		break;
2379 	}
2380 
2381 	case DIOCXCOMMIT: {
2382 		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
2383 		struct pfioc_trans_e	*ioe;
2384 		struct pfr_table	*table;
2385 		struct pf_ruleset	*rs;
2386 		int			 i;
2387 
2388 		if (io->esize != sizeof(*ioe)) {
2389 			error = ENODEV;
2390 			goto fail;
2391 		}
2392 		PF_LOCK();
2393 		ioe = malloc(sizeof(*ioe), M_TEMP, M_WAITOK);
2394 		table = malloc(sizeof(*table), M_TEMP, M_WAITOK);
2395 		/* first makes sure everything will succeed */
2396 		for (i = 0; i < io->size; i++) {
2397 			if (copyin(io->array+i, ioe, sizeof(*ioe))) {
2398 				free(table, M_TEMP, sizeof(*table));
2399 				free(ioe, M_TEMP, sizeof(*ioe));
2400 				error = EFAULT;
2401 				PF_UNLOCK();
2402 				goto fail;
2403 			}
2404 			if (strnlen(ioe->anchor, sizeof(ioe->anchor)) ==
2405 			    sizeof(ioe->anchor)) {
2406 				free(table, M_TEMP, sizeof(*table));
2407 				free(ioe, M_TEMP, sizeof(*ioe));
2408 				error = ENAMETOOLONG;
2409 				PF_UNLOCK();
2410 				goto fail;
2411 			}
2412 			switch (ioe->type) {
2413 			case PF_TRANS_TABLE:
2414 				rs = pf_find_ruleset(ioe->anchor);
2415 				if (rs == NULL || !rs->topen || ioe->ticket !=
2416 				     rs->tticket) {
2417 					free(table, M_TEMP, sizeof(*table));
2418 					free(ioe, M_TEMP, sizeof(*ioe));
2419 					error = EBUSY;
2420 					PF_UNLOCK();
2421 					goto fail;
2422 				}
2423 				break;
2424 			case PF_TRANS_RULESET:
2425 				rs = pf_find_ruleset(ioe->anchor);
2426 				if (rs == NULL ||
2427 				    !rs->rules.inactive.open ||
2428 				    rs->rules.inactive.ticket !=
2429 				    ioe->ticket) {
2430 					free(table, M_TEMP, sizeof(*table));
2431 					free(ioe, M_TEMP, sizeof(*ioe));
2432 					error = EBUSY;
2433 					PF_UNLOCK();
2434 					goto fail;
2435 				}
2436 				break;
2437 			default:
2438 				free(table, M_TEMP, sizeof(*table));
2439 				free(ioe, M_TEMP, sizeof(*ioe));
2440 				error = EINVAL;
2441 				PF_UNLOCK();
2442 				goto fail;
2443 			}
2444 		}
2445 
2446 		/*
2447 		 * Checked already in DIOCSETLIMIT, but check again as the
2448 		 * situation might have changed.
2449 		 */
2450 		for (i = 0; i < PF_LIMIT_MAX; i++) {
2451 			if (((struct pool *)pf_pool_limits[i].pp)->pr_nout >
2452 			    pf_pool_limits[i].limit_new) {
2453 				free(table, M_TEMP, sizeof(*table));
2454 				free(ioe, M_TEMP, sizeof(*ioe));
2455 				error = EBUSY;
2456 				PF_UNLOCK();
2457 				goto fail;
2458 			}
2459 		}
2460 		/* now do the commit - no errors should happen here */
2461 		for (i = 0; i < io->size; i++) {
2462 			if (copyin(io->array+i, ioe, sizeof(*ioe))) {
2463 				free(table, M_TEMP, sizeof(*table));
2464 				free(ioe, M_TEMP, sizeof(*ioe));
2465 				error = EFAULT;
2466 				PF_UNLOCK();
2467 				goto fail;
2468 			}
2469 			if (strnlen(ioe->anchor, sizeof(ioe->anchor)) ==
2470 			    sizeof(ioe->anchor)) {
2471 				free(table, M_TEMP, sizeof(*table));
2472 				free(ioe, M_TEMP, sizeof(*ioe));
2473 				error = ENAMETOOLONG;
2474 				PF_UNLOCK();
2475 				goto fail;
2476 			}
2477 			switch (ioe->type) {
2478 			case PF_TRANS_TABLE:
2479 				memset(table, 0, sizeof(*table));
2480 				strlcpy(table->pfrt_anchor, ioe->anchor,
2481 				    sizeof(table->pfrt_anchor));
2482 				if ((error = pfr_ina_commit(table, ioe->ticket,
2483 				    NULL, NULL, 0))) {
2484 					free(table, M_TEMP, sizeof(*table));
2485 					free(ioe, M_TEMP, sizeof(*ioe));
2486 					PF_UNLOCK();
2487 					goto fail; /* really bad */
2488 				}
2489 				break;
2490 			case PF_TRANS_RULESET:
2491 				if ((error = pf_commit_rules(ioe->ticket,
2492 				    ioe->anchor))) {
2493 					free(table, M_TEMP, sizeof(*table));
2494 					free(ioe, M_TEMP, sizeof(*ioe));
2495 					PF_UNLOCK();
2496 					goto fail; /* really bad */
2497 				}
2498 				break;
2499 			default:
2500 				free(table, M_TEMP, sizeof(*table));
2501 				free(ioe, M_TEMP, sizeof(*ioe));
2502 				error = EINVAL;
2503 				PF_UNLOCK();
2504 				goto fail; /* really bad */
2505 			}
2506 		}
2507 		for (i = 0; i < PF_LIMIT_MAX; i++) {
2508 			if (pf_pool_limits[i].limit_new !=
2509 			    pf_pool_limits[i].limit &&
2510 			    pool_sethardlimit(pf_pool_limits[i].pp,
2511 			    pf_pool_limits[i].limit_new, NULL, 0) != 0) {
2512 				free(table, M_TEMP, sizeof(*table));
2513 				free(ioe, M_TEMP, sizeof(*ioe));
2514 				error = EBUSY;
2515 				PF_UNLOCK();
2516 				goto fail; /* really bad */
2517 			}
2518 			pf_pool_limits[i].limit = pf_pool_limits[i].limit_new;
2519 		}
2520 		for (i = 0; i < PFTM_MAX; i++) {
2521 			int old = pf_default_rule.timeout[i];
2522 
2523 			pf_default_rule.timeout[i] =
2524 			    pf_default_rule_new.timeout[i];
2525 			if (pf_default_rule.timeout[i] == PFTM_INTERVAL &&
2526 			    pf_default_rule.timeout[i] < old)
2527 				task_add(net_tq(0), &pf_purge_task);
2528 		}
2529 		pfi_xcommit();
2530 		pf_trans_set_commit();
2531 		free(table, M_TEMP, sizeof(*table));
2532 		free(ioe, M_TEMP, sizeof(*ioe));
2533 		PF_UNLOCK();
2534 		break;
2535 	}
2536 
2537 	case DIOCGETSRCNODES: {
2538 		struct pfioc_src_nodes	*psn = (struct pfioc_src_nodes *)addr;
2539 		struct pf_src_node	*n, *p, *pstore;
2540 		u_int32_t		 nr = 0;
2541 		size_t			 space = psn->psn_len;
2542 
2543 		PF_LOCK();
2544 		if (space == 0) {
2545 			RB_FOREACH(n, pf_src_tree, &tree_src_tracking)
2546 				nr++;
2547 			psn->psn_len = sizeof(struct pf_src_node) * nr;
2548 			PF_UNLOCK();
2549 			break;
2550 		}
2551 
2552 		pstore = malloc(sizeof(*pstore), M_TEMP, M_WAITOK);
2553 
2554 		p = psn->psn_src_nodes;
2555 		RB_FOREACH(n, pf_src_tree, &tree_src_tracking) {
2556 			int	secs = getuptime(), diff;
2557 
2558 			if ((nr + 1) * sizeof(*p) > psn->psn_len)
2559 				break;
2560 
2561 			memcpy(pstore, n, sizeof(*pstore));
2562 			memset(&pstore->entry, 0, sizeof(pstore->entry));
2563 			pstore->rule.ptr = NULL;
2564 			pstore->kif = NULL;
2565 			pstore->rule.nr = n->rule.ptr->nr;
2566 			pstore->creation = secs - pstore->creation;
2567 			if (pstore->expire > secs)
2568 				pstore->expire -= secs;
2569 			else
2570 				pstore->expire = 0;
2571 
2572 			/* adjust the connection rate estimate */
2573 			diff = secs - n->conn_rate.last;
2574 			if (diff >= n->conn_rate.seconds)
2575 				pstore->conn_rate.count = 0;
2576 			else
2577 				pstore->conn_rate.count -=
2578 				    n->conn_rate.count * diff /
2579 				    n->conn_rate.seconds;
2580 
2581 			error = copyout(pstore, p, sizeof(*p));
2582 			if (error) {
2583 				free(pstore, M_TEMP, sizeof(*pstore));
2584 				PF_UNLOCK();
2585 				goto fail;
2586 			}
2587 			p++;
2588 			nr++;
2589 		}
2590 		psn->psn_len = sizeof(struct pf_src_node) * nr;
2591 
2592 		free(pstore, M_TEMP, sizeof(*pstore));
2593 		PF_UNLOCK();
2594 		break;
2595 	}
2596 
2597 	case DIOCCLRSRCNODES: {
2598 		struct pf_src_node	*n;
2599 		struct pf_state		*state;
2600 
2601 		PF_LOCK();
2602 		PF_STATE_ENTER_WRITE();
2603 		RB_FOREACH(state, pf_state_tree_id, &tree_id)
2604 			pf_src_tree_remove_state(state);
2605 		PF_STATE_EXIT_WRITE();
2606 		RB_FOREACH(n, pf_src_tree, &tree_src_tracking)
2607 			n->expire = 1;
2608 		pf_purge_expired_src_nodes();
2609 		PF_UNLOCK();
2610 		break;
2611 	}
2612 
2613 	case DIOCKILLSRCNODES: {
2614 		struct pf_src_node	*sn;
2615 		struct pf_state		*s;
2616 		struct pfioc_src_node_kill *psnk =
2617 		    (struct pfioc_src_node_kill *)addr;
2618 		u_int			killed = 0;
2619 
2620 		PF_LOCK();
2621 		RB_FOREACH(sn, pf_src_tree, &tree_src_tracking) {
2622 			if (pf_match_addr(psnk->psnk_src.neg,
2623 				&psnk->psnk_src.addr.v.a.addr,
2624 				&psnk->psnk_src.addr.v.a.mask,
2625 				&sn->addr, sn->af) &&
2626 			    pf_match_addr(psnk->psnk_dst.neg,
2627 				&psnk->psnk_dst.addr.v.a.addr,
2628 				&psnk->psnk_dst.addr.v.a.mask,
2629 				&sn->raddr, sn->af)) {
2630 				/* Handle state to src_node linkage */
2631 				if (sn->states != 0) {
2632 					PF_ASSERT_LOCKED();
2633 					PF_STATE_ENTER_WRITE();
2634 					RB_FOREACH(s, pf_state_tree_id,
2635 					   &tree_id)
2636 						pf_state_rm_src_node(s, sn);
2637 					PF_STATE_EXIT_WRITE();
2638 				}
2639 				sn->expire = 1;
2640 				killed++;
2641 			}
2642 		}
2643 
2644 		if (killed > 0)
2645 			pf_purge_expired_src_nodes();
2646 
2647 		psnk->psnk_killed = killed;
2648 		PF_UNLOCK();
2649 		break;
2650 	}
2651 
2652 	case DIOCSETHOSTID: {
2653 		u_int32_t	*hostid = (u_int32_t *)addr;
2654 
2655 		PF_LOCK();
2656 		if (*hostid == 0)
2657 			pf_trans_set.hostid = arc4random();
2658 		else
2659 			pf_trans_set.hostid = *hostid;
2660 		pf_trans_set.mask |= PF_TSET_HOSTID;
2661 		PF_UNLOCK();
2662 		break;
2663 	}
2664 
2665 	case DIOCOSFPFLUSH:
2666 		PF_LOCK();
2667 		pf_osfp_flush();
2668 		PF_UNLOCK();
2669 		break;
2670 
2671 	case DIOCIGETIFACES: {
2672 		struct pfioc_iface *io = (struct pfioc_iface *)addr;
2673 
2674 		if (io->pfiio_esize != sizeof(struct pfi_kif)) {
2675 			error = ENODEV;
2676 			break;
2677 		}
2678 		PF_LOCK();
2679 		error = pfi_get_ifaces(io->pfiio_name, io->pfiio_buffer,
2680 		    &io->pfiio_size);
2681 		PF_UNLOCK();
2682 		break;
2683 	}
2684 
2685 	case DIOCSETIFFLAG: {
2686 		struct pfioc_iface *io = (struct pfioc_iface *)addr;
2687 
2688 		PF_LOCK();
2689 		error = pfi_set_flags(io->pfiio_name, io->pfiio_flags);
2690 		PF_UNLOCK();
2691 		break;
2692 	}
2693 
2694 	case DIOCCLRIFFLAG: {
2695 		struct pfioc_iface *io = (struct pfioc_iface *)addr;
2696 
2697 		PF_LOCK();
2698 		error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags);
2699 		PF_UNLOCK();
2700 		break;
2701 	}
2702 
2703 	case DIOCSETREASS: {
2704 		u_int32_t	*reass = (u_int32_t *)addr;
2705 
2706 		PF_LOCK();
2707 		pf_trans_set.reass = *reass;
2708 		pf_trans_set.mask |= PF_TSET_REASS;
2709 		PF_UNLOCK();
2710 		break;
2711 	}
2712 
2713 	case DIOCSETSYNFLWATS: {
2714 		struct pfioc_synflwats *io = (struct pfioc_synflwats *)addr;
2715 
2716 		PF_LOCK();
2717 		error = pf_syncookies_setwats(io->hiwat, io->lowat);
2718 		PF_UNLOCK();
2719 		break;
2720 	}
2721 
2722 	case DIOCGETSYNFLWATS: {
2723 		struct pfioc_synflwats *io = (struct pfioc_synflwats *)addr;
2724 
2725 		PF_LOCK();
2726 		error = pf_syncookies_getwats(io);
2727 		PF_UNLOCK();
2728 		break;
2729 	}
2730 
2731 	case DIOCSETSYNCOOKIES: {
2732 		u_int8_t	*mode = (u_int8_t *)addr;
2733 
2734 		PF_LOCK();
2735 		error = pf_syncookies_setmode(*mode);
2736 		PF_UNLOCK();
2737 		break;
2738 	}
2739 
2740 	default:
2741 		error = ENODEV;
2742 		break;
2743 	}
2744 fail:
2745 	NET_UNLOCK();
2746 	return (error);
2747 }
2748 
2749 void
2750 pf_trans_set_commit(void)
2751 {
2752 	if (pf_trans_set.mask & PF_TSET_STATUSIF)
2753 		strlcpy(pf_status.ifname, pf_trans_set.statusif, IFNAMSIZ);
2754 	if (pf_trans_set.mask & PF_TSET_DEBUG)
2755 		pf_status.debug = pf_trans_set.debug;
2756 	if (pf_trans_set.mask & PF_TSET_HOSTID)
2757 		pf_status.hostid = pf_trans_set.hostid;
2758 	if (pf_trans_set.mask & PF_TSET_REASS)
2759 		pf_status.reass = pf_trans_set.reass;
2760 }
2761 
2762 void
2763 pf_pool_copyin(struct pf_pool *from, struct pf_pool *to)
2764 {
2765 	memmove(to, from, sizeof(*to));
2766 	to->kif = NULL;
2767 	to->addr.p.tbl = NULL;
2768 }
2769 
2770 int
2771 pf_rule_copyin(struct pf_rule *from, struct pf_rule *to,
2772     struct pf_ruleset *ruleset)
2773 {
2774 	int i;
2775 
2776 	to->src = from->src;
2777 	to->src.addr.p.tbl = NULL;
2778 	to->dst = from->dst;
2779 	to->dst.addr.p.tbl = NULL;
2780 
2781 	/* XXX union skip[] */
2782 
2783 	strlcpy(to->label, from->label, sizeof(to->label));
2784 	strlcpy(to->ifname, from->ifname, sizeof(to->ifname));
2785 	strlcpy(to->rcv_ifname, from->rcv_ifname, sizeof(to->rcv_ifname));
2786 	strlcpy(to->qname, from->qname, sizeof(to->qname));
2787 	strlcpy(to->pqname, from->pqname, sizeof(to->pqname));
2788 	strlcpy(to->tagname, from->tagname, sizeof(to->tagname));
2789 	strlcpy(to->match_tagname, from->match_tagname,
2790 	    sizeof(to->match_tagname));
2791 	strlcpy(to->overload_tblname, from->overload_tblname,
2792 	    sizeof(to->overload_tblname));
2793 
2794 	pf_pool_copyin(&from->nat, &to->nat);
2795 	pf_pool_copyin(&from->rdr, &to->rdr);
2796 	pf_pool_copyin(&from->route, &to->route);
2797 
2798 	if (pf_kif_setup(to->ifname, &to->kif))
2799 		return (EINVAL);
2800 	if (pf_kif_setup(to->rcv_ifname, &to->rcv_kif))
2801 		return (EINVAL);
2802 	if (to->overload_tblname[0]) {
2803 		if ((to->overload_tbl = pfr_attach_table(ruleset,
2804 		    to->overload_tblname, 0)) == NULL)
2805 			return (EINVAL);
2806 		else
2807 			to->overload_tbl->pfrkt_flags |= PFR_TFLAG_ACTIVE;
2808 	}
2809 
2810 	if (pf_kif_setup(to->rdr.ifname, &to->rdr.kif))
2811 		return (EINVAL);
2812 	if (pf_kif_setup(to->nat.ifname, &to->nat.kif))
2813 		return (EINVAL);
2814 	if (pf_kif_setup(to->route.ifname, &to->route.kif))
2815 		return (EINVAL);
2816 
2817 	to->os_fingerprint = from->os_fingerprint;
2818 
2819 	to->rtableid = from->rtableid;
2820 	if (to->rtableid >= 0 && !rtable_exists(to->rtableid))
2821 		return (EBUSY);
2822 	to->onrdomain = from->onrdomain;
2823 	if (to->onrdomain != -1 && (to->onrdomain < 0 ||
2824 	    to->onrdomain > RT_TABLEID_MAX))
2825 		return (EINVAL);
2826 
2827 	for (i = 0; i < PFTM_MAX; i++)
2828 		to->timeout[i] = from->timeout[i];
2829 	to->states_tot = from->states_tot;
2830 	to->max_states = from->max_states;
2831 	to->max_src_nodes = from->max_src_nodes;
2832 	to->max_src_states = from->max_src_states;
2833 	to->max_src_conn = from->max_src_conn;
2834 	to->max_src_conn_rate.limit = from->max_src_conn_rate.limit;
2835 	to->max_src_conn_rate.seconds = from->max_src_conn_rate.seconds;
2836 	pf_init_threshold(&to->pktrate, from->pktrate.limit,
2837 	    from->pktrate.seconds);
2838 
2839 	if (to->qname[0] != 0) {
2840 		if ((to->qid = pf_qname2qid(to->qname, 0)) == 0)
2841 			return (EBUSY);
2842 		if (to->pqname[0] != 0) {
2843 			if ((to->pqid = pf_qname2qid(to->pqname, 0)) == 0)
2844 				return (EBUSY);
2845 		} else
2846 			to->pqid = to->qid;
2847 	}
2848 	to->rt_listid = from->rt_listid;
2849 	to->prob = from->prob;
2850 	to->return_icmp = from->return_icmp;
2851 	to->return_icmp6 = from->return_icmp6;
2852 	to->max_mss = from->max_mss;
2853 	if (to->tagname[0])
2854 		if ((to->tag = pf_tagname2tag(to->tagname, 1)) == 0)
2855 			return (EBUSY);
2856 	if (to->match_tagname[0])
2857 		if ((to->match_tag = pf_tagname2tag(to->match_tagname, 1)) == 0)
2858 			return (EBUSY);
2859 	to->scrub_flags = from->scrub_flags;
2860 	to->delay = from->delay;
2861 	to->uid = from->uid;
2862 	to->gid = from->gid;
2863 	to->rule_flag = from->rule_flag;
2864 	to->action = from->action;
2865 	to->direction = from->direction;
2866 	to->log = from->log;
2867 	to->logif = from->logif;
2868 #if NPFLOG > 0
2869 	if (!to->log)
2870 		to->logif = 0;
2871 #endif	/* NPFLOG > 0 */
2872 	to->quick = from->quick;
2873 	to->ifnot = from->ifnot;
2874 	to->rcvifnot = from->rcvifnot;
2875 	to->match_tag_not = from->match_tag_not;
2876 	to->keep_state = from->keep_state;
2877 	to->af = from->af;
2878 	to->naf = from->naf;
2879 	to->proto = from->proto;
2880 	to->type = from->type;
2881 	to->code = from->code;
2882 	to->flags = from->flags;
2883 	to->flagset = from->flagset;
2884 	to->min_ttl = from->min_ttl;
2885 	to->allow_opts = from->allow_opts;
2886 	to->rt = from->rt;
2887 	to->return_ttl = from->return_ttl;
2888 	to->tos = from->tos;
2889 	to->set_tos = from->set_tos;
2890 	to->anchor_relative = from->anchor_relative; /* XXX */
2891 	to->anchor_wildcard = from->anchor_wildcard; /* XXX */
2892 	to->flush = from->flush;
2893 	to->divert.addr = from->divert.addr;
2894 	to->divert.port = from->divert.port;
2895 	to->divert.type = from->divert.type;
2896 	to->prio = from->prio;
2897 	to->set_prio[0] = from->set_prio[0];
2898 	to->set_prio[1] = from->set_prio[1];
2899 
2900 	return (0);
2901 }
2902 
2903 int
2904 pf_sysctl(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
2905 {
2906 	struct pf_status	pfs;
2907 
2908 	NET_RLOCK_IN_IOCTL();
2909 	PF_LOCK();
2910 	memcpy(&pfs, &pf_status, sizeof(struct pf_status));
2911 	pfi_update_status(pfs.ifname, &pfs);
2912 	PF_UNLOCK();
2913 	NET_RUNLOCK_IN_IOCTL();
2914 
2915 	return sysctl_rdstruct(oldp, oldlenp, newp, &pfs, sizeof(pfs));
2916 }
2917