xref: /openbsd-src/sys/net/pf.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: pf.c,v 1.123 2001/08/11 12:05:00 dhartmei Exp $ */
2 
3 /*
4  * Copyright (c) 2001, Daniel Hartmeier
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  *    - Redistributions of source code must retain the above copyright
12  *      notice, this list of conditions and the following disclaimer.
13  *    - Redistributions in binary form must reproduce the above
14  *      copyright notice, this list of conditions and the following
15  *      disclaimer in the documentation and/or other materials provided
16  *      with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  */
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/mbuf.h>
36 #include <sys/filio.h>
37 #include <sys/fcntl.h>
38 #include <sys/socket.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/time.h>
42 #include <sys/pool.h>
43 
44 #include <net/if.h>
45 #include <net/if_types.h>
46 #include <net/bpf.h>
47 #include <net/route.h>
48 #include <net/if_pflog.h>
49 
50 #include <netinet/in.h>
51 #include <netinet/in_var.h>
52 #include <netinet/in_systm.h>
53 #include <netinet/ip.h>
54 #include <netinet/ip_var.h>
55 #include <netinet/tcp.h>
56 #include <netinet/tcp_seq.h>
57 #include <netinet/udp.h>
58 #include <netinet/ip_icmp.h>
59 
60 #include <dev/rndvar.h>
61 #include <net/pfvar.h>
62 
63 #include "bpfilter.h"
64 #include "pflog.h"
65 
66 #define DPFPRINTF(n, x)	if (pf_status.debug >= (n)) printf x
67 
68 /*
69  * Tree data structure
70  */
71 
72 struct pf_tree_node {
73 	struct pf_tree_key	 key;
74 	struct pf_state		*state;
75 	struct pf_tree_node	*parent;
76 	struct pf_tree_node	*left;
77 	struct pf_tree_node	*right;
78 	int			 balance;
79 };
80 
81 struct pf_port_node {
82 	LIST_ENTRY(pf_port_node)	next;
83 	u_int16_t			port;
84 };
85 LIST_HEAD(pf_port_list, pf_port_node);
86 
87 /*
88  * Global variables
89  */
90 
91 TAILQ_HEAD(pf_natqueue, pf_nat)		pf_nats[2];
92 TAILQ_HEAD(pf_rdrqueue, pf_rdr)		pf_rdrs[2];
93 struct pf_rulequeue	 pf_rules[2];
94 struct pf_rulequeue	*pf_rules_active;
95 struct pf_rulequeue	*pf_rules_inactive;
96 struct pf_natqueue	*pf_nats_active;
97 struct pf_natqueue	*pf_nats_inactive;
98 struct pf_rdrqueue	*pf_rdrs_active;
99 struct pf_rdrqueue	*pf_rdrs_inactive;
100 struct pf_tree_node	*tree_lan_ext, *tree_ext_gwy;
101 struct timeval		 pftv;
102 struct pf_status	 pf_status;
103 struct ifnet		*status_ifp;
104 
105 u_int32_t		 pf_last_purge;
106 u_int32_t		 ticket_rules_active;
107 u_int32_t		 ticket_rules_inactive;
108 u_int32_t		 ticket_nats_active;
109 u_int32_t		 ticket_nats_inactive;
110 u_int32_t		 ticket_rdrs_active;
111 u_int32_t		 ticket_rdrs_inactive;
112 struct pf_port_list	 pf_tcp_ports;
113 struct pf_port_list	 pf_udp_ports;
114 
115 struct pool		 pf_tree_pl, pf_rule_pl, pf_nat_pl, pf_sport_pl;
116 struct pool		 pf_rdr_pl, pf_state_pl;
117 
118 int			 pf_tree_key_compare(struct pf_tree_key *,
119 			    struct pf_tree_key *);
120 void			 pf_tree_rotate_left(struct pf_tree_node **);
121 void			 pf_tree_rotate_right(struct pf_tree_node **);
122 struct pf_tree_node	*pf_tree_first(struct pf_tree_node *);
123 struct pf_tree_node	*pf_tree_next(struct pf_tree_node *);
124 struct pf_tree_node	*pf_tree_search(struct pf_tree_node *,
125 			    struct pf_tree_key *);
126 void			 pf_insert_state(struct pf_state *);
127 void			 pf_purge_expired_states(void);
128 
129 void			 pf_print_host(u_int32_t, u_int16_t);
130 void			 pf_print_state(struct pf_state *);
131 void			 pf_print_flags(u_int8_t);
132 
133 void			 pfattach(int);
134 int			 pfopen(dev_t, int, int, struct proc *);
135 int			 pfclose(dev_t, int, int, struct proc *);
136 int			 pfioctl(dev_t, u_long, caddr_t, int, struct proc *);
137 
138 u_int16_t		 pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t);
139 void			 pf_change_ap(u_int32_t *, u_int16_t *, u_int16_t *,
140 			    u_int16_t *, u_int32_t, u_int16_t);
141 void			 pf_change_a(u_int32_t *, u_int16_t *, u_int32_t);
142 void			 pf_change_icmp(u_int32_t *, u_int16_t *, u_int32_t *,
143 			    u_int32_t, u_int16_t, u_int16_t *, u_int16_t *,
144 			    u_int16_t *, u_int16_t *);
145 void			 pf_send_reset(struct ip *, int, struct tcphdr *);
146 void			 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t);
147 u_int16_t		 pf_map_port_range(struct pf_rdr *, u_int16_t);
148 struct pf_nat		*pf_get_nat(struct ifnet *, u_int8_t, u_int32_t,
149 			    u_int32_t);
150 struct pf_rdr		*pf_get_rdr(struct ifnet *, u_int8_t, u_int32_t,
151 			    u_int32_t, u_int16_t);
152 int			 pf_test_tcp(int, struct ifnet *, struct mbuf *,
153 			    int, int, struct ip *, struct tcphdr *);
154 int			 pf_test_udp(int, struct ifnet *, struct mbuf *,
155 			    int, int, struct ip *, struct udphdr *);
156 int			 pf_test_icmp(int, struct ifnet *, struct mbuf *,
157 			    int, int, struct ip *, struct icmp *);
158 int			 pf_test_other(int, struct ifnet *, struct mbuf *,
159 			    struct ip *);
160 int			 pf_test_state_tcp(struct pf_state **, int,
161 			    struct ifnet *, struct mbuf *, int, int,
162 			    struct ip *, struct tcphdr *);
163 int			 pf_test_state_udp(struct pf_state **, int,
164 			    struct ifnet *, struct mbuf *, int, int,
165 			    struct ip *, struct udphdr *);
166 int			 pf_test_state_icmp(struct pf_state **, int,
167 			    struct ifnet *, struct mbuf *, int, int,
168 			    struct ip *, struct icmp *);
169 void			*pf_pull_hdr(struct mbuf *, int, void *, int,
170 			    u_short *, u_short *);
171 void			 pf_calc_skip_steps(struct pf_rulequeue *);
172 
173 int			 pf_get_sport(u_int8_t, u_int16_t, u_int16_t,
174 			    u_int16_t *);
175 void			 pf_put_sport(u_int8_t, u_int16_t);
176 int			 pf_add_sport(struct pf_port_list *, u_int16_t);
177 int			 pf_chk_sport(struct pf_port_list *, u_int16_t);
178 int			 pf_normalize_tcp(int, struct ifnet *, struct mbuf *,
179 			     int, int, struct ip *, struct tcphdr *);
180 
181 #if NPFLOG > 0
182 #define	PFLOG_PACKET(x,a,b,c,d,e) \
183 	do { \
184 		HTONS((x)->ip_len); \
185 		HTONS((x)->ip_off); \
186 		pflog_packet(a,b,c,d,e); \
187 		NTOHS((x)->ip_len); \
188 		NTOHS((x)->ip_off); \
189 	} while (0)
190 #else
191 #define	PFLOG_PACKET
192 #endif
193 
194 #define	STATE_TRANSLATE(s) \
195 	((s)->lan.addr != (s)->gwy.addr || (s)->lan.port != (s)->gwy.port)
196 
197 int
198 pf_tree_key_compare(struct pf_tree_key *a, struct pf_tree_key *b)
199 {
200 	/*
201 	 * could use memcmp(), but with the best manual order, we can
202 	 * minimize the number of average compares. what is faster?
203 	 */
204 	if (a->proto < b->proto)
205 		return (-1);
206 	if (a->proto > b->proto)
207 		return ( 1);
208 	if (a->addr[0].s_addr < b->addr[0].s_addr)
209 		return (-1);
210 	if (a->addr[0].s_addr > b->addr[0].s_addr)
211 		return ( 1);
212 	if (a->addr[1].s_addr < b->addr[1].s_addr)
213 		return (-1);
214 	if (a->addr[1].s_addr > b->addr[1].s_addr)
215 		return ( 1);
216 	if (a->port[0] < b->port[0])
217 		return (-1);
218 	if (a->port[0] > b->port[0])
219 		return ( 1);
220 	if (a->port[1] < b->port[1])
221 		return (-1);
222 	if (a->port[1] > b->port[1])
223 		return ( 1);
224 	return (0);
225 }
226 
227 void
228 pf_tree_rotate_left(struct pf_tree_node **n)
229 {
230 	struct pf_tree_node *q = *n, *p = (*n)->parent;
231 
232 	(*n)->parent = (*n)->right;
233 	*n = (*n)->right;
234 	(*n)->parent = p;
235 	q->right = (*n)->left;
236 	if (q->right)
237 		q->right->parent = q;
238 	(*n)->left = q;
239 	q->balance--;
240 	if ((*n)->balance > 0)
241 		q->balance -= (*n)->balance;
242 	(*n)->balance--;
243 	if (q->balance < 0)
244 		(*n)->balance += q->balance;
245 }
246 
247 void
248 pf_tree_rotate_right(struct pf_tree_node **n)
249 {
250 	struct pf_tree_node *q = *n, *p = (*n)->parent;
251 
252 	(*n)->parent = (*n)->left;
253 	*n = (*n)->left;
254 	(*n)->parent = p;
255 	q->left = (*n)->right;
256 	if (q->left)
257 		q->left->parent = q;
258 	(*n)->right = q;
259 	q->balance++;
260 	if ((*n)->balance < 0)
261 		q->balance -= (*n)->balance;
262 	(*n)->balance++;
263 	if (q->balance > 0)
264 		(*n)->balance += q->balance;
265 }
266 
267 int
268 pf_tree_insert(struct pf_tree_node **n, struct pf_tree_node *p,
269     struct pf_tree_key *key, struct pf_state *state)
270 {
271 	int deltaH = 0;
272 
273 	if (*n == NULL) {
274 		*n = pool_get(&pf_tree_pl, PR_NOWAIT);
275 		if (*n == NULL)
276 			return (0);
277 		bcopy(key, &(*n)->key, sizeof(struct pf_tree_key));
278 		(*n)->state = state;
279 		(*n)->balance = 0;
280 		(*n)->parent = p;
281 		(*n)->left = (*n)->right = NULL;
282 		deltaH = 1;
283 	} else if (pf_tree_key_compare(key, &(*n)->key) > 0) {
284 		if (pf_tree_insert(&(*n)->right, *n, key, state)) {
285 			(*n)->balance++;
286 			if ((*n)->balance == 1)
287 				deltaH = 1;
288 			else if ((*n)->balance == 2) {
289 				if ((*n)->right->balance == -1)
290 					pf_tree_rotate_right(&(*n)->right);
291 				pf_tree_rotate_left(n);
292 			}
293 		}
294 	} else {
295 		if (pf_tree_insert(&(*n)->left, *n, key, state)) {
296 			(*n)->balance--;
297 			if ((*n)->balance == -1)
298 				deltaH = 1;
299 			else if ((*n)->balance == -2) {
300 				if ((*n)->left->balance == 1)
301 					pf_tree_rotate_left(&(*n)->left);
302 				pf_tree_rotate_right(n);
303 			}
304 		}
305 	}
306 	return (deltaH);
307 }
308 
309 int
310 pf_tree_remove(struct pf_tree_node **n, struct pf_tree_node *p,
311     struct pf_tree_key *key)
312 {
313 	int deltaH = 0;
314 	int c;
315 
316 	if (*n == NULL)
317 		return (0);
318 	c = pf_tree_key_compare(key, &(*n)->key);
319 	if (c < 0) {
320 		if (pf_tree_remove(&(*n)->left, *n, key)) {
321 			(*n)->balance++;
322 			if ((*n)->balance == 0)
323 				deltaH = 1;
324 			else if ((*n)->balance == 2) {
325 				if ((*n)->right->balance == -1)
326 					pf_tree_rotate_right(&(*n)->right);
327 				pf_tree_rotate_left(n);
328 				if ((*n)->balance == 0)
329 					deltaH = 1;
330 			}
331 		}
332 	} else if (c > 0) {
333 		if (pf_tree_remove(&(*n)->right, *n, key)) {
334 			(*n)->balance--;
335 			if ((*n)->balance == 0)
336 				deltaH = 1;
337 			else if ((*n)->balance == -2) {
338 				if ((*n)->left->balance == 1)
339 					pf_tree_rotate_left(&(*n)->left);
340 				pf_tree_rotate_right(n);
341 				if ((*n)->balance == 0)
342 					deltaH = 1;
343 			}
344 		}
345 	} else {
346 		if ((*n)->right == NULL) {
347 			struct pf_tree_node *n0 = *n;
348 
349 			*n = (*n)->left;
350 			if (*n != NULL)
351 				(*n)->parent = p;
352 			pool_put(&pf_tree_pl, n0);
353 			deltaH = 1;
354 		} else if ((*n)->left == NULL) {
355 			struct pf_tree_node *n0 = *n;
356 
357 			*n = (*n)->right;
358 			if (*n != NULL)
359 				(*n)->parent = p;
360 			pool_put(&pf_tree_pl, n0);
361 			deltaH = 1;
362 		} else {
363 			struct pf_tree_node **qq = &(*n)->left;
364 
365 			while ((*qq)->right != NULL)
366 				qq = &(*qq)->right;
367 			bcopy(&(*qq)->key, &(*n)->key,
368 			    sizeof(struct pf_tree_key));
369 			(*n)->state = (*qq)->state;
370 			bcopy(key, &(*qq)->key, sizeof(struct pf_tree_key));
371 			if (pf_tree_remove(&(*n)->left, *n, key)) {
372 				(*n)->balance++;
373 				if ((*n)->balance == 0)
374 					deltaH = 1;
375 				else if ((*n)->balance == 2) {
376 					if ((*n)->right->balance == -1)
377 						pf_tree_rotate_right(
378 						    &(*n)->right);
379 					pf_tree_rotate_left(n);
380 					if ((*n)->balance == 0)
381 						deltaH = 1;
382 				}
383 			}
384 		}
385 	}
386 	return (deltaH);
387 }
388 
389 int
390 pflog_packet(struct mbuf *m, int af, u_short dir, u_short reason,
391     struct pf_rule *rm)
392 {
393 #if NBPFILTER > 0
394 	struct ifnet *ifn, *ifp = NULL;
395 	struct pfloghdr hdr;
396 	struct mbuf m1;
397 
398 	if (m == NULL)
399 		return(-1);
400 
401 	hdr.af = htonl(af);
402 	/* Set the right interface name */
403 	if (rm != NULL)
404 		ifp = rm->ifp;
405 	if (m->m_pkthdr.rcvif != NULL)
406 		ifp = m->m_pkthdr.rcvif;
407 	if (ifp != NULL)
408 		memcpy(hdr.ifname, ifp->if_xname, sizeof(hdr.ifname));
409 	else
410 		strcpy(hdr.ifname, "unkn");
411 
412 	hdr.rnr = htons(rm->nr);
413 	hdr.reason = htons(reason);
414 	hdr.dir = htons(dir);
415 	hdr.action = htons(rm->action);
416 
417 	m1.m_next = m;
418 	m1.m_len = PFLOG_HDRLEN;
419 	m1.m_data = (char *) &hdr;
420 
421 	ifn = &(pflogif[0].sc_if);
422 
423 	if (ifn->if_bpf)
424 		bpf_mtap(ifn->if_bpf, &m1);
425 #endif
426 
427 	return (0);
428 }
429 
430 struct pf_tree_node *
431 pf_tree_first(struct pf_tree_node *n)
432 {
433 	if (n == NULL)
434 		return (NULL);
435 	while (n->parent)
436 		n = n->parent;
437 	while (n->left)
438 		n = n->left;
439 	return (n);
440 }
441 
442 struct pf_tree_node *
443 pf_tree_next(struct pf_tree_node *n)
444 {
445 	if (n == NULL)
446 		return (NULL);
447 	if (n->right) {
448 		n = n->right;
449 		while (n->left)
450 			n = n->left;
451 	} else {
452 		if (n->parent && (n == n->parent->left))
453 			n = n->parent;
454 		else {
455 			while (n->parent && (n == n->parent->right))
456 				n = n->parent;
457 			n = n->parent;
458 		}
459 	}
460 	return (n);
461 }
462 
463 struct pf_tree_node *
464 pf_tree_search(struct pf_tree_node *n, struct pf_tree_key *key)
465 {
466 	int c;
467 
468 	while (n && (c = pf_tree_key_compare(&n->key, key)))
469 		if (c > 0)
470 			n = n->left;
471 		else
472 			n = n->right;
473 	pf_status.fcounters[FCNT_STATE_SEARCH]++;
474 	return (n);
475 }
476 
477 struct pf_state *
478 pf_find_state(struct pf_tree_node *n, struct pf_tree_key *key)
479 {
480 	n = pf_tree_search(n, key);
481 	if (n)
482 		return (n->state);
483 	else
484 		return (NULL);
485 }
486 
487 void
488 pf_insert_state(struct pf_state *state)
489 {
490 	struct pf_tree_key key;
491 	struct pf_state *s;
492 
493 	key.proto = state->proto;
494 	key.addr[0].s_addr = state->lan.addr;
495 	key.port[0] = state->lan.port;
496 	key.addr[1].s_addr = state->ext.addr;
497 	key.port[1] = state->ext.port;
498 	/* sanity checks can be removed later, should never occur */
499 	if ((s = pf_find_state(tree_lan_ext, &key)) != NULL) {
500 		if (pf_status.debug >= PF_DEBUG_URGENT) {
501 			printf("pf: ERROR! insert invalid\n");
502 			printf("    key already in tree_lan_ext\n");
503 			printf("    key: proto = %u, lan = ", state->proto);
504 			pf_print_host(key.addr[0].s_addr, key.port[0]);
505 			printf(", ext = ");
506 			pf_print_host(key.addr[1].s_addr, key.port[1]);
507 			printf("\n    state: ");
508 			pf_print_state(s);
509 			printf("\n");
510 		}
511 	} else {
512 		pf_tree_insert(&tree_lan_ext, NULL, &key, state);
513 		if (pf_find_state(tree_lan_ext, &key) != state)
514 			DPFPRINTF(PF_DEBUG_URGENT,
515 			    ("pf: ERROR! insert failed\n"));
516 	}
517 
518 	key.proto = state->proto;
519 	key.addr[0].s_addr = state->ext.addr;
520 	key.port[0] = state->ext.port;
521 	key.addr[1].s_addr = state->gwy.addr;
522 	key.port[1] = state->gwy.port;
523 	if ((s = pf_find_state(tree_ext_gwy, &key)) != NULL) {
524 		if (pf_status.debug >= PF_DEBUG_URGENT) {
525 			printf("pf: ERROR! insert invalid\n");
526 			printf("    key already in tree_ext_gwy\n");
527 			printf("    key: proto = %u, ext = ", state->proto);
528 			pf_print_host(key.addr[0].s_addr, key.port[0]);
529 			printf(", gwy = ");
530 			pf_print_host(key.addr[1].s_addr, key.port[1]);
531 			printf("\n    state: ");
532 			pf_print_state(s);
533 			printf("\n");
534 		}
535 	} else {
536 		pf_tree_insert(&tree_ext_gwy, NULL, &key, state);
537 		if (pf_find_state(tree_ext_gwy, &key) != state)
538 			DPFPRINTF(PF_DEBUG_URGENT,
539 			    ("pf: ERROR! insert failed\n"));
540 	}
541 	pf_status.fcounters[FCNT_STATE_INSERT]++;
542 	pf_status.states++;
543 }
544 
545 void
546 pf_purge_expired_states(void)
547 {
548 	struct pf_tree_node *cur, *next;
549 	struct pf_tree_key key;
550 
551 	cur = pf_tree_first(tree_ext_gwy);
552 	while (cur != NULL) {
553 		if (cur->state->expire <= pftv.tv_sec) {
554 			key.proto = cur->state->proto;
555 			key.addr[0].s_addr = cur->state->lan.addr;
556 			key.port[0] = cur->state->lan.port;
557 			key.addr[1].s_addr = cur->state->ext.addr;
558 			key.port[1] = cur->state->ext.port;
559 			/* remove state from second tree */
560 			if (pf_find_state(tree_lan_ext, &key) != cur->state)
561 				DPFPRINTF(PF_DEBUG_URGENT,
562 				    ("pf: ERROR: remove invalid!\n"));
563 			pf_tree_remove(&tree_lan_ext, NULL, &key);
564 			if (pf_find_state(tree_lan_ext, &key) != NULL)
565 				DPFPRINTF(PF_DEBUG_URGENT,
566 				    ("pf: ERROR: remove failed\n"));
567 			if (STATE_TRANSLATE(cur->state))
568 				pf_put_sport(cur->state->proto,
569 					htons(cur->state->gwy.port));
570 			/* free state */
571 			pool_put(&pf_state_pl, cur->state);
572 			/*
573 			 * remove state from tree being traversed, use next
574 			 * state's key to search after removal, since removal
575 			 * can invalidate pointers.
576 			 */
577 			next = pf_tree_next(cur);
578 			if (next) {
579 				key = next->key;
580 				pf_tree_remove(&tree_ext_gwy, NULL, &cur->key);
581 				cur = pf_tree_search(tree_ext_gwy, &key);
582 				if (cur == NULL)
583 					DPFPRINTF(PF_DEBUG_URGENT,
584 					    ("pf: ERROR: next not found\n"));
585 			} else {
586 				pf_tree_remove(&tree_ext_gwy, NULL, &cur->key);
587 				cur = NULL;
588 			}
589 			pf_status.fcounters[FCNT_STATE_REMOVALS]++;
590 			pf_status.states--;
591 		} else
592 			cur = pf_tree_next(cur);
593 	}
594 }
595 
596 void
597 pf_print_host(u_int32_t a, u_int16_t p)
598 {
599 	a = ntohl(a);
600 	p = ntohs(p);
601 	printf("%u.%u.%u.%u:%u", (a>>24)&255, (a>>16)&255, (a>>8)&255, a&255,
602 	    p);
603 }
604 
605 void
606 pf_print_state(struct pf_state *s)
607 {
608 	switch (s->proto) {
609 	case IPPROTO_TCP:
610 		printf("TCP ");
611 		break;
612 	case IPPROTO_UDP:
613 		printf("UDP ");
614 		break;
615 	case IPPROTO_ICMP:
616 		printf("ICMP ");
617 		break;
618 	default:
619 		printf("%u ", s->proto);
620 		break;
621 	}
622 	pf_print_host(s->lan.addr, s->lan.port);
623 	printf(" ");
624 	pf_print_host(s->gwy.addr, s->gwy.port);
625 	printf(" ");
626 	pf_print_host(s->ext.addr, s->ext.port);
627 	printf(" [lo=%lu high=%lu win=%u]", s->src.seqlo, s->src.seqhi,
628 		 s->src.max_win);
629 	printf(" [lo=%lu high=%lu win=%u]", s->dst.seqlo, s->dst.seqhi,
630 		 s->dst.max_win);
631 	printf(" %u:%u", s->src.state, s->dst.state);
632 }
633 
634 void
635 pf_print_flags(u_int8_t f)
636 {
637 	if (f)
638 		printf(" ");
639 	if (f & TH_FIN)
640 		printf("F");
641 	if (f & TH_SYN)
642 		printf("S");
643 	if (f & TH_RST)
644 		printf("R");
645 	if (f & TH_PUSH)
646 		printf("P");
647 	if (f & TH_ACK)
648 		printf("A");
649 	if (f & TH_URG)
650 		printf("U");
651 }
652 
653 void
654 pfattach(int num)
655 {
656 	/* XXX - no M_* tags, but they are not used anyway */
657 	pool_init(&pf_tree_pl, sizeof(struct pf_tree_node), 0, 0, 0, "pftrpl",
658 	    0, NULL, NULL, 0);
659 	pool_init(&pf_rule_pl, sizeof(struct pf_rule), 0, 0, 0, "pfrulepl",
660 	    0, NULL, NULL, 0);
661 	pool_init(&pf_nat_pl, sizeof(struct pf_nat), 0, 0, 0, "pfnatpl",
662 	    0, NULL, NULL, 0);
663 	pool_init(&pf_rdr_pl, sizeof(struct pf_rdr), 0, 0, 0, "pfrdrpl",
664 	    0, NULL, NULL, 0);
665 	pool_init(&pf_state_pl, sizeof(struct pf_state), 0, 0, 0, "pfstatepl",
666 	    0, NULL, NULL, 0);
667 	pool_init(&pf_sport_pl, sizeof(struct pf_port_node), 0, 0, 0, "pfsport",
668 	    0, NULL, NULL, 0);
669 
670 	TAILQ_INIT(&pf_rules[0]);
671 	TAILQ_INIT(&pf_rules[1]);
672 	TAILQ_INIT(&pf_nats[0]);
673 	TAILQ_INIT(&pf_nats[1]);
674 	TAILQ_INIT(&pf_rdrs[0]);
675 	TAILQ_INIT(&pf_rdrs[1]);
676 	pf_rules_active = &pf_rules[0];
677 	pf_rules_inactive = &pf_rules[1];
678 	pf_nats_active = &pf_nats[0];
679 	pf_nats_inactive = &pf_nats[1];
680 	pf_rdrs_active = &pf_rdrs[0];
681 	pf_rdrs_inactive = &pf_rdrs[1];
682 
683 	LIST_INIT(&pf_tcp_ports);
684 	LIST_INIT(&pf_udp_ports);
685 
686 	pf_normalize_init();
687 }
688 
689 int
690 pfopen(dev_t dev, int flags, int fmt, struct proc *p)
691 {
692 	if (minor(dev) >= 1)
693 		return (ENXIO);
694 	return (0);
695 }
696 
697 int
698 pfclose(dev_t dev, int flags, int fmt, struct proc *p)
699 {
700 	if (minor(dev) >= 1)
701 		return (ENXIO);
702 	return (0);
703 }
704 
705 int
706 pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
707 {
708 	int error = 0;
709 	int s;
710 
711 	if (!(flags & FWRITE))
712 		return (EACCES);
713 
714 	if (securelevel > 1)
715 		switch (cmd) {
716 		case DIOCSTART:
717 		case DIOCSTOP:
718 		case DIOCBEGINRULES:
719 		case DIOCADDRULE:
720 		case DIOCCOMMITRULES:
721 		case DIOCBEGINNATS:
722 		case DIOCADDNAT:
723 		case DIOCCOMMITNATS:
724 		case DIOCBEGINRDRS:
725 		case DIOCADDRDR:
726 		case DIOCCOMMITRDRS:
727 		case DIOCCLRSTATES:
728 			return EPERM;
729 		}
730 
731 	switch (cmd) {
732 
733 	case DIOCSTART:
734 		if (pf_status.running)
735 			error = EEXIST;
736 		else {
737 			u_int32_t states = pf_status.states;
738 			bzero(&pf_status, sizeof(struct pf_status));
739 			pf_status.running = 1;
740 			pf_status.states = states;
741 			microtime(&pftv);
742 			pf_status.since = pftv.tv_sec;
743 			DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n"));
744 		}
745 		break;
746 
747 	case DIOCSTOP:
748 		if (!pf_status.running)
749 			error = ENOENT;
750 		else {
751 			pf_status.running = 0;
752 			DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n"));
753 		}
754 		break;
755 
756 	case DIOCBEGINRULES: {
757 		u_int32_t *ticket = (u_int32_t *)addr;
758 		struct pf_rule *rule;
759 
760 		while ((rule = TAILQ_FIRST(pf_rules_inactive)) != NULL) {
761 			TAILQ_REMOVE(pf_rules_inactive, rule, entries);
762 			pool_put(&pf_rule_pl, rule);
763 		}
764 		*ticket = ++ticket_rules_inactive;
765 		break;
766 	}
767 
768 	case DIOCADDRULE: {
769 		struct pfioc_rule *pr = (struct pfioc_rule *)addr;
770 		struct pf_rule *rule, *tail;
771 
772 		if (pr->ticket != ticket_rules_inactive) {
773 			error = EBUSY;
774 			break;
775 		}
776 		rule = pool_get(&pf_rule_pl, PR_NOWAIT);
777 		if (rule == NULL) {
778 			error = ENOMEM;
779 			break;
780 		}
781 		bcopy(&pr->rule, rule, sizeof(struct pf_rule));
782 		tail = TAILQ_LAST(pf_rules_inactive, pf_rulequeue);
783 		if (tail)
784 			rule->nr = tail->nr + 1;
785 		else
786 			rule->nr = 0;
787 		rule->ifp = NULL;
788 		if (rule->ifname[0]) {
789 			rule->ifp = ifunit(rule->ifname);
790 			if (rule->ifp == NULL) {
791 				pool_put(&pf_rule_pl, rule);
792 				error = EINVAL;
793 				break;
794 			}
795 		} else
796 			rule->ifp = NULL;
797 		TAILQ_INSERT_TAIL(pf_rules_inactive, rule, entries);
798 		break;
799 	}
800 
801 	case DIOCCOMMITRULES: {
802 		u_int32_t *ticket = (u_int32_t *)addr;
803 		struct pf_rulequeue *old_rules;
804 		struct pf_rule *rule;
805 		struct pf_tree_node *n;
806 
807 		if (*ticket != ticket_rules_inactive) {
808 			error = EBUSY;
809 			break;
810 		}
811 
812 		/* Swap rules, keep the old. */
813 		s = splsoftnet();
814 		/*
815 		 * Rules are about to get freed, clear rule pointers in states
816 		 */
817 		for (n = pf_tree_first(tree_ext_gwy); n != NULL;
818 		    n = pf_tree_next(n))
819 			n->state->rule = NULL;
820 		old_rules = pf_rules_active;
821 		pf_rules_active = pf_rules_inactive;
822 		pf_rules_inactive = old_rules;
823 		ticket_rules_active = ticket_rules_inactive;
824 		pf_calc_skip_steps(pf_rules_active);
825 		splx(s);
826 
827 		/* Purge the old rule list. */
828 		while ((rule = TAILQ_FIRST(old_rules)) != NULL) {
829 			TAILQ_REMOVE(old_rules, rule, entries);
830 			pool_put(&pf_rule_pl, rule);
831 		}
832 		break;
833 	}
834 
835 	case DIOCGETRULES: {
836 		struct pfioc_rule *pr = (struct pfioc_rule *)addr;
837 		struct pf_rule *tail;
838 
839 		s = splsoftnet();
840 		tail = TAILQ_LAST(pf_rules_active, pf_rulequeue);
841 		if (tail)
842 			pr->nr = tail->nr + 1;
843 		else
844 			pr->nr = 0;
845 		pr->ticket = ticket_rules_active;
846 		splx(s);
847 		break;
848 	}
849 
850 	case DIOCGETRULE: {
851 		struct pfioc_rule *pr = (struct pfioc_rule *)addr;
852 		struct pf_rule *rule;
853 
854 		if (pr->ticket != ticket_rules_active) {
855 			error = EBUSY;
856 			break;
857 		}
858 		s = splsoftnet();
859 		rule = TAILQ_FIRST(pf_rules_active);
860 		while ((rule != NULL) && (rule->nr != pr->nr))
861 			rule = TAILQ_NEXT(rule, entries);
862 		if (rule == NULL) {
863 			error = EBUSY;
864 			splx(s);
865 			break;
866 		}
867 		bcopy(rule, &pr->rule, sizeof(struct pf_rule));
868 		splx(s);
869 		break;
870 	}
871 
872 	case DIOCBEGINNATS: {
873 		u_int32_t *ticket = (u_int32_t *)addr;
874 		struct pf_nat *nat;
875 
876 		while ((nat = TAILQ_FIRST(pf_nats_inactive)) != NULL) {
877 			TAILQ_REMOVE(pf_nats_inactive, nat, entries);
878 			pool_put(&pf_nat_pl, nat);
879 		}
880 		*ticket = ++ticket_nats_inactive;
881 		break;
882 	}
883 
884 	case DIOCADDNAT: {
885 		struct pfioc_nat *pn = (struct pfioc_nat *)addr;
886 		struct pf_nat *nat;
887 
888 		if (pn->ticket != ticket_nats_inactive) {
889 			error = EBUSY;
890 			break;
891 		}
892 		nat = pool_get(&pf_nat_pl, PR_NOWAIT);
893 		if (nat == NULL) {
894 			error = ENOMEM;
895 			break;
896 		}
897 		bcopy(&pn->nat, nat, sizeof(struct pf_nat));
898 		if (nat->ifname[0]) {
899 			nat->ifp = ifunit(nat->ifname);
900 			if (nat->ifp == NULL) {
901 				pool_put(&pf_nat_pl, nat);
902 				error = EINVAL;
903 				break;
904 			}
905 		} else
906 			nat->ifp = NULL;
907 		TAILQ_INSERT_TAIL(pf_nats_inactive, nat, entries);
908 		break;
909 	}
910 
911 	case DIOCCOMMITNATS: {
912 		u_int32_t *ticket = (u_int32_t *)addr;
913 		struct pf_natqueue *old_nats;
914 		struct pf_nat *nat;
915 
916 		if (*ticket != ticket_nats_inactive) {
917 			error = EBUSY;
918 			break;
919 		}
920 
921 		/* Swap nats, keep the old. */
922 		s = splsoftnet();
923 		old_nats = pf_nats_active;
924 		pf_nats_active = pf_nats_inactive;
925 		pf_nats_inactive = old_nats;
926 		ticket_nats_active = ticket_nats_inactive;
927 		splx(s);
928 
929 		/* Purge the old nat list */
930 		while ((nat = TAILQ_FIRST(old_nats)) != NULL) {
931 			TAILQ_REMOVE(old_nats, nat, entries);
932 			pool_put(&pf_nat_pl, nat);
933 		}
934 		break;
935 	}
936 
937 	case DIOCGETNATS: {
938 		struct pfioc_nat *pn = (struct pfioc_nat *)addr;
939 		struct pf_nat *nat;
940 
941 		pn->nr = 0;
942 		s = splsoftnet();
943 		TAILQ_FOREACH(nat, pf_nats_active, entries)
944 			pn->nr++;
945 		pn->ticket = ticket_nats_active;
946 		splx(s);
947 		break;
948 	}
949 
950 	case DIOCGETNAT: {
951 		struct pfioc_nat *pn = (struct pfioc_nat *)addr;
952 		struct pf_nat *nat;
953 		u_int32_t nr;
954 
955 		if (pn->ticket != ticket_nats_active) {
956 			error = EBUSY;
957 			break;
958 		}
959 		nr = 0;
960 		s = splsoftnet();
961 		nat = TAILQ_FIRST(pf_nats_active);
962 		while ((nat != NULL) && (nr < pn->nr)) {
963 			nat = TAILQ_NEXT(nat, entries);
964 			nr++;
965 		}
966 		if (nat == NULL) {
967 			error = EBUSY;
968 			splx(s);
969 			break;
970 		}
971 		bcopy(nat, &pn->nat, sizeof(struct pf_nat));
972 		splx(s);
973 		break;
974 	}
975 
976 	case DIOCBEGINRDRS: {
977 		u_int32_t *ticket = (u_int32_t *)addr;
978 		struct pf_rdr *rdr;
979 
980 		while ((rdr = TAILQ_FIRST(pf_rdrs_inactive)) != NULL) {
981 			TAILQ_REMOVE(pf_rdrs_inactive, rdr, entries);
982 			pool_put(&pf_rdr_pl, rdr);
983 		}
984 		*ticket = ++ticket_rdrs_inactive;
985 		break;
986 	}
987 
988 	case DIOCADDRDR: {
989 		struct pfioc_rdr *pr = (struct pfioc_rdr *)addr;
990 		struct pf_rdr *rdr;
991 
992 		if (pr->ticket != ticket_rdrs_inactive) {
993 			error = EBUSY;
994 			break;
995 		}
996 		rdr = pool_get(&pf_rdr_pl, PR_NOWAIT);
997 		if (rdr == NULL) {
998 			error = ENOMEM;
999 			break;
1000 		}
1001 		bcopy(&pr->rdr, rdr, sizeof(struct pf_rdr));
1002 		if (rdr->ifname[0]) {
1003 			rdr->ifp = ifunit(rdr->ifname);
1004 			if (rdr->ifp == NULL) {
1005 				pool_put(&pf_rdr_pl, rdr);
1006 				error = EINVAL;
1007 				break;
1008 			}
1009 		} else
1010 			rdr->ifp = NULL;
1011 		TAILQ_INSERT_TAIL(pf_rdrs_inactive, rdr, entries);
1012 		break;
1013 	}
1014 
1015 	case DIOCCOMMITRDRS: {
1016 		u_int32_t *ticket = (u_int32_t *)addr;
1017 		struct pf_rdrqueue *old_rdrs;
1018 		struct pf_rdr *rdr;
1019 
1020 		if (*ticket != ticket_rdrs_inactive) {
1021 			error = EBUSY;
1022 			break;
1023 		}
1024 
1025 		/* Swap rdrs, keep the old. */
1026 		s = splsoftnet();
1027 		old_rdrs = pf_rdrs_active;
1028 		pf_rdrs_active = pf_rdrs_inactive;
1029 		pf_rdrs_inactive = old_rdrs;
1030 		ticket_rdrs_active = ticket_rdrs_inactive;
1031 		splx(s);
1032 
1033 		/* Purge the old rdr list */
1034 		while ((rdr = TAILQ_FIRST(old_rdrs)) != NULL) {
1035 			TAILQ_REMOVE(old_rdrs, rdr, entries);
1036 			pool_put(&pf_rdr_pl, rdr);
1037 		}
1038 		break;
1039 	}
1040 
1041 	case DIOCGETRDRS: {
1042 		struct pfioc_rdr *pr = (struct pfioc_rdr *)addr;
1043 		struct pf_rdr *rdr;
1044 
1045 		pr->nr = 0;
1046 		s = splsoftnet();
1047 		TAILQ_FOREACH(rdr, pf_rdrs_active, entries)
1048 			pr->nr++;
1049 		pr->ticket = ticket_rdrs_active;
1050 		splx(s);
1051 		break;
1052 	}
1053 
1054 	case DIOCGETRDR: {
1055 		struct pfioc_rdr *pr = (struct pfioc_rdr *)addr;
1056 		struct pf_rdr *rdr;
1057 		u_int32_t nr;
1058 
1059 		if (pr->ticket != ticket_rdrs_active) {
1060 			error = EBUSY;
1061 			break;
1062 		}
1063 		nr = 0;
1064 		s = splsoftnet();
1065 		rdr = TAILQ_FIRST(pf_rdrs_active);
1066 		while ((rdr != NULL) && (nr < pr->nr)) {
1067 			rdr = TAILQ_NEXT(rdr, entries);
1068 			nr++;
1069 		}
1070 		if (rdr == NULL) {
1071 			error = EBUSY;
1072 			splx(s);
1073 			break;
1074 		}
1075 		bcopy(rdr, &pr->rdr, sizeof(struct pf_rdr));
1076 		splx(s);
1077 		break;
1078 	}
1079 
1080 	case DIOCCLRSTATES: {
1081 		struct pf_tree_node *n;
1082 
1083 		s = splsoftnet();
1084 		for (n = pf_tree_first(tree_ext_gwy); n != NULL;
1085 		    n = pf_tree_next(n))
1086 			n->state->expire = 0;
1087 		pf_purge_expired_states();
1088 		splx(s);
1089 		break;
1090 	}
1091 
1092 	case DIOCGETSTATE: {
1093 		struct pfioc_state *ps = (struct pfioc_state *)addr;
1094 		struct pf_tree_node *n;
1095 		u_int32_t nr;
1096 
1097 		nr = 0;
1098 		s = splsoftnet();
1099 		n = pf_tree_first(tree_ext_gwy);
1100 		while ((n != NULL) && (nr < ps->nr)) {
1101 			n = pf_tree_next(n);
1102 			nr++;
1103 		}
1104 		if (n == NULL) {
1105 			error = EBUSY;
1106 			splx(s);
1107 			break;
1108 		}
1109 		bcopy(n->state, &ps->state, sizeof(struct pf_state));
1110 		splx(s);
1111 		microtime(&pftv);
1112 		ps->state.creation = pftv.tv_sec - ps->state.creation;
1113 		if (ps->state.expire <= pftv.tv_sec)
1114 			ps->state.expire = 0;
1115 		else
1116 			ps->state.expire -= pftv.tv_sec;
1117 		break;
1118 	}
1119 
1120 	case DIOCSETSTATUSIF: {
1121 		struct pfioc_if *pi = (struct pfioc_if *)addr;
1122 		struct ifnet *ifp;
1123 
1124 		if ((ifp = ifunit(pi->ifname)) == NULL)
1125 			error = EINVAL;
1126 		else
1127 			status_ifp = ifp;
1128 		break;
1129 	}
1130 
1131 	case DIOCGETSTATUS: {
1132 		struct pf_status *s = (struct pf_status *)addr;
1133 		bcopy(&pf_status, s, sizeof(struct pf_status));
1134 		break;
1135 	}
1136 
1137 	case DIOCCLRSTATUS: {
1138 		u_int8_t running = pf_status.running;
1139 		u_int32_t states = pf_status.states;
1140 
1141 		bzero(&pf_status, sizeof(struct pf_status));
1142 		pf_status.running = running;
1143 		pf_status.states = states;
1144 		break;
1145 	}
1146 
1147 	case DIOCNATLOOK: {
1148 		struct pf_natlook *pnl = (struct pf_natlook *)addr;
1149 		struct pf_state *st;
1150 		struct pf_tree_key key;
1151 		int direction = pnl->direction;
1152 
1153 		key.proto = pnl->proto;
1154 
1155 		/*
1156 		 * userland gives us source and dest of connetion, reverse
1157 		 * the lookup so we ask for what happens with the return
1158 		 * traffic, enabling us to find it in the state tree.
1159 		 */
1160 		key.addr[1].s_addr = pnl->saddr;
1161 		key.port[1] = pnl->sport;
1162 		key.addr[0].s_addr = pnl->daddr;
1163 		key.port[0] = pnl->dport;
1164 
1165 		if (!pnl->proto || !pnl->saddr || !pnl->daddr ||
1166 		    !pnl->dport || !pnl->sport)
1167 			error = EINVAL;
1168 		else {
1169 			s = splsoftnet();
1170 			if (direction == PF_IN)
1171 				st = pf_find_state(tree_ext_gwy, &key);
1172 			else
1173 				st = pf_find_state(tree_lan_ext, &key);
1174 			if (st != NULL) {
1175 				if (direction  == PF_IN) {
1176 					pnl->rsaddr = st->lan.addr;
1177 					pnl->rsport = st->lan.port;
1178 					pnl->rdaddr = pnl->daddr;
1179 					pnl->rdport = pnl->dport;
1180 				} else {
1181 					pnl->rdaddr = st->gwy.addr;
1182 					pnl->rdport = st->gwy.port;
1183 					pnl->rsaddr = pnl->saddr;
1184 					pnl->rsport = pnl->sport;
1185 				}
1186 			} else
1187 				error = ENOENT;
1188 			splx(s);
1189 		}
1190 		break;
1191 	}
1192 
1193 	case DIOCSETDEBUG: {
1194 		u_int32_t *level = (u_int32_t *)addr;
1195 		pf_status.debug = *level;
1196 		break;
1197 	}
1198 
1199 	default:
1200 		error = ENODEV;
1201 		break;
1202 	}
1203 
1204 	return (error);
1205 }
1206 
1207 #define		 PF_CALC_SKIP_STEP(i, c) \
1208 		do { \
1209 			if (a & 1 << i) { \
1210 				if (c) \
1211 					r->skip[i] = TAILQ_NEXT(s, entries); \
1212 				else \
1213 					a ^= 1 << i; \
1214 			} \
1215 		} while (0)
1216 
1217 void
1218 pf_calc_skip_steps(struct pf_rulequeue *rules)
1219 {
1220 	struct pf_rule *r, *s;
1221 	int a, i;
1222 
1223 	r = TAILQ_FIRST(rules);
1224 	while (r != NULL) {
1225 		a = 0;
1226 		for (i = 0; i < 5; ++i) {
1227 			a |= 1 << i;
1228 			r->skip[i] = TAILQ_NEXT(r, entries);
1229 		}
1230 		s = TAILQ_NEXT(r, entries);
1231 		while (a && s != NULL) {
1232 			PF_CALC_SKIP_STEP(0, s->proto == r->proto);
1233 			PF_CALC_SKIP_STEP(1,
1234 			    s->src.addr == r->src.addr &&
1235 			    s->src.mask == r->src.mask &&
1236 			    s->src.not == r->src.not);
1237 			PF_CALC_SKIP_STEP(2,
1238 			    s->src.port[0] == r->src.port[0] &&
1239 			    s->src.port[1] == r->src.port[1] &&
1240 			    s->src.port_op == r->src.port_op);
1241 			PF_CALC_SKIP_STEP(3,
1242 			    s->dst.addr == r->dst.addr &&
1243 			    s->dst.mask == r->dst.mask &&
1244 			    s->dst.not == r->dst.not);
1245 			PF_CALC_SKIP_STEP(4,
1246 			    s->dst.port[0] == r->dst.port[0] &&
1247 			    s->dst.port[1] == r->dst.port[1] &&
1248 			    s->dst.port_op == r->dst.port_op);
1249 			s = TAILQ_NEXT(s, entries);
1250 		}
1251 		r = TAILQ_NEXT(r, entries);
1252 	}
1253 }
1254 
1255 u_int16_t
1256 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new)
1257 {
1258 	u_int32_t l = cksum + old - new;
1259 
1260 	l = (l >> 16) + (l & 65535);
1261 	l = l & 65535;
1262 	if (l)
1263 		return (l);
1264 	else
1265 		return (65535);
1266 }
1267 
1268 void
1269 pf_change_ap(u_int32_t *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
1270     u_int32_t an, u_int16_t pn)
1271 {
1272 	u_int32_t ao = *a;
1273 	u_int16_t po = *p;
1274 
1275 	*a = an;
1276 	*ic = pf_cksum_fixup(pf_cksum_fixup(*ic, ao / 65536,
1277 	    an / 65536), ao % 65536, an % 65536);
1278 	*p = pn;
1279 	*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, ao / 65536,
1280 	    an / 65536), ao % 65536, an % 65536),
1281 	    po, pn);
1282 }
1283 
1284 void
1285 pf_change_a(u_int32_t *a, u_int16_t *c, u_int32_t an)
1286 {
1287 	u_int32_t ao = *a;
1288 
1289 	*a = an;
1290 	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536),
1291 	    ao % 65536, an % 65536);
1292 }
1293 
1294 void
1295 pf_change_icmp(u_int32_t *ia, u_int16_t *ip, u_int32_t *oa, u_int32_t na,
1296     u_int16_t np, u_int16_t *pc, u_int16_t *h2c, u_int16_t *ic, u_int16_t *hc)
1297 {
1298 	u_int32_t oia = *ia, ooa = *oa, opc, oh2c = *h2c;
1299 	u_int16_t oip = *ip;
1300 
1301 	if (pc != NULL)
1302 		opc = *pc;
1303 	/* Change inner protocol port, fix inner protocol checksum. */
1304 	*ip = np;
1305 	if (pc != NULL)
1306 		*pc = pf_cksum_fixup(*pc, oip, *ip);
1307 	*ic = pf_cksum_fixup(*ic, oip, *ip);
1308 	if (pc != NULL)
1309 		*ic = pf_cksum_fixup(*ic, opc, *pc);
1310 	/* Change inner ip address, fix inner ip checksum and icmp checksum. */
1311 	*ia = na;
1312 	*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c, oia / 65536, *ia / 65536),
1313 	    oia % 65536, *ia % 65536);
1314 	*ic = pf_cksum_fixup(pf_cksum_fixup(*ic, oia / 65536, *ia / 65536),
1315 	    oia % 65536, *ia % 65536);
1316 	*ic = pf_cksum_fixup(*ic, oh2c, *h2c);
1317 	/* Change outer ip address, fix outer ip checksum. */
1318 	*oa = na;
1319 	*hc = pf_cksum_fixup(pf_cksum_fixup(*hc, ooa / 65536, *oa / 65536),
1320 	    ooa % 65536, *oa % 65536);
1321 }
1322 
1323 void
1324 pf_send_reset(struct ip *h, int off, struct tcphdr *th)
1325 {
1326 	struct mbuf *m;
1327 	struct m_tag *mtag;
1328 	int len = sizeof(struct ip) + sizeof(struct tcphdr);
1329 	struct ip *h2;
1330 	struct tcphdr *th2;
1331 
1332 	/* don't reply to RST packets */
1333 	if (th->th_flags & TH_RST)
1334 		return;
1335 
1336 	/* create outgoing mbuf */
1337 	mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
1338 	if (mtag == NULL)
1339 		return;
1340 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
1341 	if (m == NULL) {
1342 		m_tag_free(mtag);
1343 		return;
1344 	}
1345 	m_tag_prepend(m, mtag);
1346 	m->m_data += max_linkhdr;
1347 	m->m_pkthdr.len = m->m_len = len;
1348 	m->m_pkthdr.rcvif = NULL;
1349 	bzero(m->m_data, len);
1350 	h2 = mtod(m, struct ip *);
1351 
1352 	/* IP header fields included in the TCP checksum */
1353 	h2->ip_p = IPPROTO_TCP;
1354 	h2->ip_len = htons(sizeof(*th2));
1355 	h2->ip_src.s_addr = h->ip_dst.s_addr;
1356 	h2->ip_dst.s_addr = h->ip_src.s_addr;
1357 
1358 	/* TCP header */
1359 	th2 = (struct tcphdr *)((caddr_t)h2 + sizeof(struct ip));
1360 	th2->th_sport = th->th_dport;
1361 	th2->th_dport = th->th_sport;
1362 	if (th->th_flags & TH_ACK) {
1363 		th2->th_seq = th->th_ack;
1364 		th2->th_flags = TH_RST;
1365 	} else {
1366 		int tlen = h->ip_len - off - (th->th_off << 2);
1367 		if (th->th_flags & TH_SYN)
1368 			tlen++;
1369 		if (th->th_flags & TH_FIN)
1370 			tlen++;
1371 		th2->th_ack = htonl(ntohl(th->th_seq) + tlen);
1372 		th2->th_flags = TH_RST | TH_ACK;
1373 	}
1374 	th2->th_off = sizeof(*th2) >> 2;
1375 
1376 	/* TCP checksum */
1377 	th2->th_sum = in_cksum(m, len);
1378 
1379 	/* Finish the IP header */
1380 	h2->ip_v = 4;
1381 	h2->ip_hl = sizeof(*h2) >> 2;
1382 	h2->ip_ttl = 128;
1383 	h2->ip_sum = 0;
1384 	h2->ip_len = len;
1385 	h2->ip_off = 0;
1386 	ip_output(m, NULL, NULL, 0, NULL, NULL);
1387 }
1388 
1389 void
1390 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code)
1391 {
1392 	struct m_tag *mtag;
1393 	struct mbuf *m0;
1394 
1395 	mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
1396 	if (mtag == NULL)
1397 		return;
1398 	m0 = m_copy(m, 0, M_COPYALL);
1399 	if (m0 == NULL) {
1400 		m_tag_free(mtag);
1401 		return;
1402 	}
1403 	m_tag_prepend(m0, mtag);
1404 	icmp_error(m0, type, code, 0, 0);
1405 }
1406 
1407 /*
1408  * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
1409  * If n is 0, they match if they are equal. If n is != 0, they match if they
1410  * are different.
1411  */
1412 int
1413 pf_match_addr(u_int8_t n, u_int32_t a, u_int32_t m, u_int32_t b)
1414 {
1415 	if ((a & m) == (b & m)) {
1416 		if (n)
1417 			return (0);
1418 		else
1419 			return (1);
1420 	} else {
1421 		if (n)
1422 			return (1);
1423 		else
1424 			return (0);
1425 	}
1426 }
1427 
1428 int
1429 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
1430 {
1431 	NTOHS(a1);
1432 	NTOHS(a2);
1433 	NTOHS(p);
1434 	switch (op) {
1435 	case PF_OP_IRG:
1436 		return (p > a1) && (p < a2);
1437 	case PF_OP_XRG:
1438 		return (p < a1) || (p > a2);
1439 	case PF_OP_EQ:
1440 		return (p == a1);
1441 	case PF_OP_NE:
1442 		return (p != a1);
1443 	case PF_OP_LT:
1444 		return (p < a1);
1445 	case PF_OP_LE:
1446 		return (p <= a1);
1447 	case PF_OP_GT:
1448 		return (p > a1);
1449 	case PF_OP_GE:
1450 		return (p >= a1);
1451 	}
1452 	return (0); /* never reached */
1453 }
1454 
1455 int
1456 pf_chk_sport(struct pf_port_list *plist, u_int16_t port)
1457 {
1458 	struct pf_port_node	*pnode;
1459 
1460 	LIST_FOREACH(pnode, plist, next) {
1461 		if (pnode->port == port)
1462 			return (1);
1463 	}
1464 
1465 	return (0);
1466 }
1467 
1468 int
1469 pf_add_sport(struct pf_port_list *plist, u_int16_t port)
1470 {
1471 	struct pf_port_node *pnode;
1472 
1473 	pnode = pool_get(&pf_sport_pl, M_NOWAIT);
1474 	if (pnode == NULL)
1475 		return (ENOMEM);
1476 
1477 	pnode->port = port;
1478 	LIST_INSERT_HEAD(plist, pnode, next);
1479 
1480 	return (0);
1481 }
1482 
1483 void
1484 pf_put_sport(u_int8_t proto, u_int16_t port)
1485 {
1486 	struct pf_port_list	*plist;
1487 	struct pf_port_node	*pnode;
1488 
1489 	if (proto == IPPROTO_TCP)
1490 		plist = &pf_tcp_ports;
1491 	else if (proto == IPPROTO_UDP)
1492 		plist = &pf_udp_ports;
1493 	else
1494 		return;
1495 
1496 	LIST_FOREACH(pnode, plist, next) {
1497 		if (pnode->port == port) {
1498 			LIST_REMOVE(pnode, next);
1499 			pool_put(&pf_sport_pl, pnode);
1500 			break;
1501 		}
1502 	}
1503 }
1504 
1505 int
1506 pf_get_sport(u_int8_t proto, u_int16_t low, u_int16_t high, u_int16_t *port)
1507 {
1508 	struct pf_port_list	*plist;
1509 	int			step;
1510 	u_int16_t		cut;
1511 
1512 	if (proto == IPPROTO_TCP)
1513 		plist = &pf_tcp_ports;
1514 	else if (proto == IPPROTO_UDP)
1515 		plist = &pf_udp_ports;
1516 	else
1517 		return (EINVAL);
1518 
1519 	/* port search; start random, step; similar 2 portloop in in_pcbbind */
1520 	if (low == high) {
1521 		*port = low;
1522 		if (!pf_chk_sport(plist, *port))
1523 			goto found;
1524 		return (1);
1525 	} else if (low < high) {
1526 		step = 1;
1527 		cut = arc4random() % (high - low) + low;
1528 	} else {
1529 		step = -1;
1530 		cut = arc4random() % (low - high) + high;
1531 	}
1532 
1533 	*port = cut - step;
1534 	do {
1535 		*port += step;
1536 		if (!pf_chk_sport(plist, *port))
1537 			goto found;
1538 	} while (*port != low && *port != high);
1539 
1540 	step = -step;
1541 	*port = cut;
1542 	do {
1543 		*port += step;
1544 		if (!pf_chk_sport(plist, *port))
1545 			goto found;
1546 	} while (*port != low && *port != high);
1547 
1548 	return (1);					/* none available */
1549 
1550 found:
1551 	return (pf_add_sport(plist, *port));
1552 }
1553 
1554 struct pf_nat *
1555 pf_get_nat(struct ifnet *ifp, u_int8_t proto, u_int32_t saddr, u_int32_t daddr)
1556 {
1557 	struct pf_nat *n, *nm = NULL;
1558 
1559 	n = TAILQ_FIRST(pf_nats_active);
1560 	while (n && nm == NULL) {
1561 		if (((n->ifp == NULL) || (n->ifp == ifp && !n->ifnot) ||
1562 		    (n->ifp != ifp && n->ifnot)) &&
1563 		    (!n->proto || n->proto == proto) &&
1564 		    pf_match_addr(n->snot, n->saddr, n->smask, saddr) &&
1565 		    pf_match_addr(n->dnot, n->daddr, n->dmask, daddr))
1566 			nm = n;
1567 		else
1568 			n = TAILQ_NEXT(n, entries);
1569 	}
1570 	return (nm);
1571 }
1572 
1573 struct pf_rdr *
1574 pf_get_rdr(struct ifnet *ifp, u_int8_t proto, u_int32_t saddr, u_int32_t daddr,
1575     u_int16_t dport)
1576 {
1577 	struct pf_rdr *r, *rm = NULL;
1578 
1579 	r = TAILQ_FIRST(pf_rdrs_active);
1580 	while (r && rm == NULL) {
1581 		if (((r->ifp == NULL) || (r->ifp == ifp && !r->ifnot) ||
1582 		    (r->ifp != ifp && r->ifnot)) &&
1583 		    (!r->proto || r->proto == proto) &&
1584 		    pf_match_addr(r->snot, r->saddr, r->smask, saddr) &&
1585 		    pf_match_addr(r->dnot, r->daddr, r->dmask, daddr) &&
1586 		    ((!r->dport2 && dport == r->dport) ||
1587 		    (r->dport2 && (ntohs(dport) >= ntohs(r->dport)) &&
1588 		    ntohs(dport) <= ntohs(r->dport2))))
1589 			rm = r;
1590 		else
1591 			r = TAILQ_NEXT(r, entries);
1592 	}
1593 	return (rm);
1594 }
1595 
1596 u_int16_t
1597 pf_map_port_range(struct pf_rdr *rdr, u_int16_t port)
1598 {
1599 	u_int32_t nport;
1600 
1601 	nport = ntohs(rdr->rport) - ntohs(rdr->dport) + ntohs(port);
1602 	/* wrap around if necessary */
1603 	if (nport > 65535)
1604 		nport -= 65535;
1605 	return htons((u_int16_t)nport);
1606 }
1607 
1608 int
1609 pf_test_tcp(int direction, struct ifnet *ifp, struct mbuf *m,
1610     int ipoff, int off, struct ip *h, struct tcphdr *th)
1611 {
1612 	struct pf_nat *nat = NULL;
1613 	struct pf_rdr *rdr = NULL;
1614 	u_int32_t baddr;
1615 	u_int16_t bport, nport = 0;
1616 	struct pf_rule *r, *rm = NULL;
1617 	u_short reason;
1618 	int rewrite = 0, error;
1619 
1620 	if (direction == PF_OUT) {
1621 		/* check outgoing packet for NAT */
1622 		if ((nat = pf_get_nat(ifp, IPPROTO_TCP,
1623 		    h->ip_src.s_addr, h->ip_dst.s_addr)) != NULL) {
1624 			baddr = h->ip_src.s_addr;
1625 			bport = th->th_sport;
1626 			error = pf_get_sport(IPPROTO_TCP, 50001,
1627 			    65535, &nport);
1628 			if (error)
1629 				return (PF_DROP);
1630 			pf_change_ap(&h->ip_src.s_addr, &th->th_sport,
1631 			    &h->ip_sum, &th->th_sum, nat->raddr, htons(nport));
1632 			rewrite++;
1633 		}
1634 	} else {
1635 		/* check incoming packet for RDR */
1636 		if ((rdr = pf_get_rdr(ifp, IPPROTO_TCP, h->ip_src.s_addr,
1637 		    h->ip_dst.s_addr, th->th_dport)) != NULL) {
1638 			baddr = h->ip_dst.s_addr;
1639 			bport = th->th_dport;
1640 			if (rdr->opts & PF_RPORT_RANGE)
1641 				nport = pf_map_port_range(rdr, th->th_dport);
1642 			else
1643 				nport = rdr->rport;
1644 
1645 			pf_change_ap(&h->ip_dst.s_addr, &th->th_dport,
1646 			    &h->ip_sum, &th->th_sum, rdr->raddr, nport);
1647 			rewrite++;
1648 		}
1649 	}
1650 
1651 	r = TAILQ_FIRST(pf_rules_active);
1652 	while (r != NULL) {
1653 		if (r->action == PF_SCRUB) {
1654 			r = TAILQ_NEXT(r, entries);
1655 			continue;
1656 		}
1657 		if (r->proto && r->proto != h->ip_p)
1658 			r = r->skip[0];
1659 		else if (r->src.mask && !pf_match_addr(r->src.not,
1660 		    r->src.addr, r->src.mask, h->ip_src.s_addr))
1661 			r = r->skip[1];
1662 		else if (r->src.port_op && !pf_match_port(r->src.port_op,
1663 		    r->src.port[0], r->src.port[1], th->th_sport))
1664 			r = r->skip[2];
1665 		else if (r->dst.mask && !pf_match_addr(r->dst.not,
1666 		    r->dst.addr, r->dst.mask, h->ip_dst.s_addr))
1667 			r = r->skip[3];
1668 		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
1669 		    r->dst.port[0], r->dst.port[1], th->th_dport))
1670 			r = r->skip[4];
1671 		else if (r->direction != direction)
1672 			r = TAILQ_NEXT(r, entries);
1673 		else if (r->ifp != NULL && r->ifp != ifp)
1674 			r = TAILQ_NEXT(r, entries);
1675 		else if ((r->flagset & th->th_flags) != r->flags)
1676 			r = TAILQ_NEXT(r, entries);
1677 		else {
1678 			rm = r;
1679 			if (rm->quick)
1680 				break;
1681 			r = TAILQ_NEXT(r, entries);
1682 		}
1683 	}
1684 
1685 	if (rm != NULL) {
1686 		REASON_SET(&reason, PFRES_MATCH);
1687 
1688 		/* XXX will log packet before rewrite */
1689 		if (rm->log)
1690 			PFLOG_PACKET(h, m, AF_INET, direction, reason, rm);
1691 
1692 		if ((rm->action == PF_DROP) &&
1693 		    ((rm->rule_flag & PFRULE_RETURNRST) || rm->return_icmp)) {
1694 			/* undo NAT/RST changes, if they have taken place */
1695 			if (nat != NULL) {
1696 				pf_change_ap(&h->ip_src.s_addr, &th->th_sport,
1697 				    &h->ip_sum, &th->th_sum, baddr, bport);
1698 				rewrite++;
1699 			} else if (rdr != NULL) {
1700 				pf_change_ap(&h->ip_dst.s_addr, &th->th_dport,
1701 				    &h->ip_sum, &th->th_sum, baddr, bport);
1702 				rewrite++;
1703 			}
1704 			if (rm->rule_flag & PFRULE_RETURNRST)
1705 				pf_send_reset(h, off, th);
1706 			else
1707 				pf_send_icmp(m, rm->return_icmp >> 8,
1708 				    rm->return_icmp & 255);
1709 		}
1710 
1711 		if (rm->action == PF_DROP) {
1712 			if (nport && nat != NULL)
1713 				pf_put_sport(IPPROTO_TCP, nport);
1714 			return (PF_DROP);
1715 		}
1716 	}
1717 
1718 	if (((rm != NULL) && rm->keep_state) || nat != NULL || rdr != NULL) {
1719 		/* create new state */
1720 		u_int16_t len;
1721 		struct pf_state *s;
1722 
1723 		len = h->ip_len - off - (th->th_off << 2);
1724 		s = pool_get(&pf_state_pl, PR_NOWAIT);
1725 		if (s == NULL) {
1726 			if (nport && nat != NULL)
1727 				pf_put_sport(IPPROTO_TCP, nport);
1728 			return (PF_DROP);
1729 		}
1730 
1731 		s->rule = rm;
1732 		s->log = rm && (rm->log & 2);
1733 		s->proto = IPPROTO_TCP;
1734 		s->direction = direction;
1735 		if (direction == PF_OUT) {
1736 			s->gwy.addr = h->ip_src.s_addr;
1737 			s->gwy.port = th->th_sport;		/* sport */
1738 			s->ext.addr = h->ip_dst.s_addr;
1739 			s->ext.port = th->th_dport;
1740 			if (nat != NULL) {
1741 				s->lan.addr = baddr;
1742 				s->lan.port = bport;
1743 			} else {
1744 				s->lan.addr = s->gwy.addr;
1745 				s->lan.port = s->gwy.port;
1746 			}
1747 		} else {
1748 			s->lan.addr = h->ip_dst.s_addr;
1749 			s->lan.port = th->th_dport;
1750 			s->ext.addr = h->ip_src.s_addr;
1751 			s->ext.port = th->th_sport;
1752 			if (rdr != NULL) {
1753 				s->gwy.addr = baddr;
1754 				s->gwy.port = bport;
1755 			} else {
1756 				s->gwy.addr = s->lan.addr;
1757 				s->gwy.port = s->lan.port;
1758 			}
1759 		}
1760 		s->src.seqlo = ntohl(th->th_seq) + len;
1761 		if (th->th_flags & TH_SYN)
1762 			s->src.seqlo++;
1763 		if (th->th_flags & TH_FIN)
1764 			s->src.seqlo++;
1765 		s->src.seqhi = s->src.seqlo + 1;
1766 		s->src.max_win = MAX(ntohs(th->th_win), 1);
1767 
1768 		s->dst.seqlo = 0;	/* Haven't seen these yet */
1769 		s->dst.seqhi = 1;
1770 		s->dst.max_win = 1;
1771 		s->src.state = 1;
1772 		s->dst.state = 0;
1773 		s->creation = pftv.tv_sec;
1774 		s->expire = pftv.tv_sec + 60;
1775 		s->packets = 1;
1776 		s->bytes = len;
1777 		pf_insert_state(s);
1778 	}
1779 
1780 	/* copy back packet headers if we performed NAT operations */
1781 	if (rewrite)
1782 		m_copyback(m, off, sizeof(*th), (caddr_t)th);
1783 
1784 	return (PF_PASS);
1785 }
1786 
1787 int
1788 pf_test_udp(int direction, struct ifnet *ifp, struct mbuf *m,
1789     int ipoff, int off, struct ip *h, struct udphdr *uh)
1790 {
1791 	struct pf_nat *nat = NULL;
1792 	struct pf_rdr *rdr = NULL;
1793 	u_int32_t baddr;
1794 	u_int16_t bport, nport = 0;
1795 	struct pf_rule *r, *rm = NULL;
1796 	u_short reason;
1797 	int rewrite = 0, error;
1798 
1799 	if (direction == PF_OUT) {
1800 		/* check outgoing packet for NAT */
1801 		if ((nat = pf_get_nat(ifp, IPPROTO_UDP,
1802 		    h->ip_src.s_addr, h->ip_dst.s_addr)) != NULL) {
1803 			baddr = h->ip_src.s_addr;
1804 			bport = uh->uh_sport;
1805 			error = pf_get_sport(IPPROTO_UDP, 50001,
1806 			    65535, &nport);
1807 			if (error)
1808 				return (PF_DROP);
1809 			pf_change_ap(&h->ip_src.s_addr, &uh->uh_sport,
1810 			    &h->ip_sum, &uh->uh_sum, nat->raddr, htons(nport));
1811 			rewrite++;
1812 		}
1813 	} else {
1814 		/* check incoming packet for RDR */
1815 		if ((rdr = pf_get_rdr(ifp, IPPROTO_UDP, h->ip_src.s_addr,
1816 		    h->ip_dst.s_addr, uh->uh_dport)) != NULL) {
1817 			baddr = h->ip_dst.s_addr;
1818 			bport = uh->uh_dport;
1819 			if (rdr->opts & PF_RPORT_RANGE)
1820 				nport = pf_map_port_range(rdr, uh->uh_dport);
1821 			else
1822 				nport = rdr->rport;
1823 
1824 			pf_change_ap(&h->ip_dst.s_addr, &uh->uh_dport,
1825 			    &h->ip_sum, &uh->uh_sum, rdr->raddr,
1826 			    nport);
1827 
1828 			rewrite++;
1829 		}
1830 	}
1831 
1832 	r = TAILQ_FIRST(pf_rules_active);
1833 	while (r != NULL) {
1834 		if (r->action == PF_SCRUB) {
1835 			r = TAILQ_NEXT(r, entries);
1836 			continue;
1837 		}
1838 		if (r->proto && r->proto != h->ip_p)
1839 			r = r->skip[0];
1840 		else if (r->src.mask && !pf_match_addr(r->src.not,
1841 		    r->src.addr, r->src.mask, h->ip_src.s_addr))
1842 			r = r->skip[1];
1843 		else if (r->src.port_op && !pf_match_port(r->src.port_op,
1844 		    r->src.port[0], r->src.port[1], uh->uh_sport))
1845 			r = r->skip[2];
1846 		else if (r->dst.mask && !pf_match_addr(r->dst.not,
1847 		    r->dst.addr, r->dst.mask, h->ip_dst.s_addr))
1848 			r = r->skip[3];
1849 		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
1850 		    r->dst.port[0], r->dst.port[1], uh->uh_dport))
1851 			r = r->skip[4];
1852 		else if (r->direction != direction)
1853 			r = TAILQ_NEXT(r, entries);
1854 		else if (r->ifp != NULL && r->ifp != ifp)
1855 			r = TAILQ_NEXT(r, entries);
1856 		else {
1857 			rm = r;
1858 			if (rm->quick)
1859 				break;
1860 			r = TAILQ_NEXT(r, entries);
1861 		}
1862 	}
1863 
1864 	if (rm != NULL) {
1865 		REASON_SET(&reason, PFRES_MATCH);
1866 
1867 		/* XXX will log packet before rewrite */
1868 		if (rm->log)
1869 			PFLOG_PACKET(h, m, AF_INET, direction, reason, rm);
1870 
1871 		if ((rm->action == PF_DROP) && rm->return_icmp) {
1872 			/* undo NAT/RST changes, if they have taken place */
1873 			if (nat != NULL) {
1874 				pf_change_ap(&h->ip_src.s_addr, &uh->uh_sport,
1875 				    &h->ip_sum, &uh->uh_sum, baddr, bport);
1876 				rewrite++;
1877 			} else if (rdr != NULL) {
1878 				pf_change_ap(&h->ip_dst.s_addr, &uh->uh_dport,
1879 				    &h->ip_sum, &uh->uh_sum, baddr, bport);
1880 				rewrite++;
1881 			}
1882 			pf_send_icmp(m, rm->return_icmp >> 8,
1883 			    rm->return_icmp & 255);
1884 		}
1885 
1886 		if (rm->action == PF_DROP) {
1887 			if (nport && nat != NULL)
1888 				pf_put_sport(IPPROTO_UDP, nport);
1889 			return (PF_DROP);
1890 		}
1891 	}
1892 
1893 	if ((rm != NULL && rm->keep_state) || nat != NULL || rdr != NULL) {
1894 		/* create new state */
1895 		u_int16_t len;
1896 		struct pf_state *s;
1897 
1898 		len = h->ip_len - off - sizeof(*uh);
1899 		s = pool_get(&pf_state_pl, PR_NOWAIT);
1900 		if (s == NULL) {
1901 			if (nport && nat != NULL)
1902 				pf_put_sport(IPPROTO_UDP, nport);
1903 			return (PF_DROP);
1904 		}
1905 
1906 		s->rule = rm;
1907 		s->log = rm && (rm->log & 2);
1908 		s->proto = IPPROTO_UDP;
1909 		s->direction = direction;
1910 		if (direction == PF_OUT) {
1911 			s->gwy.addr = h->ip_src.s_addr;
1912 			s->gwy.port = uh->uh_sport;
1913 			s->ext.addr = h->ip_dst.s_addr;
1914 			s->ext.port = uh->uh_dport;
1915 			if (nat != NULL) {
1916 				s->lan.addr = baddr;
1917 				s->lan.port = bport;
1918 			} else {
1919 				s->lan.addr = s->gwy.addr;
1920 				s->lan.port = s->gwy.port;
1921 			}
1922 		} else {
1923 			s->lan.addr = h->ip_dst.s_addr;
1924 			s->lan.port = uh->uh_dport;
1925 			s->ext.addr = h->ip_src.s_addr;
1926 			s->ext.port = uh->uh_sport;
1927 			if (rdr != NULL) {
1928 				s->gwy.addr = baddr;
1929 				s->gwy.port = bport;
1930 			} else {
1931 				s->gwy.addr = s->lan.addr;
1932 				s->gwy.port = s->lan.port;
1933 			}
1934 		}
1935 		s->src.seqlo  = 0;
1936 		s->src.seqhi = 0;
1937 		s->src.max_win = 0;
1938 		s->src.state = 1;
1939 		s->dst.seqlo = 0;
1940 		s->dst.seqhi = 0;
1941 		s->dst.max_win = 0;
1942 		s->dst.state = 0;
1943 		s->creation = pftv.tv_sec;
1944 		s->expire = pftv.tv_sec + 30;
1945 		s->packets = 1;
1946 		s->bytes = len;
1947 		pf_insert_state(s);
1948 	}
1949 
1950 	/* copy back packet headers if we performed NAT operations */
1951 	if (rewrite)
1952 		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
1953 
1954 	return (PF_PASS);
1955 }
1956 
1957 int
1958 pf_test_icmp(int direction, struct ifnet *ifp, struct mbuf *m,
1959     int ipoff, int off, struct ip *h, struct icmp *ih)
1960 {
1961 	struct pf_nat *nat = NULL;
1962 	u_int32_t baddr;
1963 	struct pf_rule *r, *rm = NULL;
1964 	u_short reason;
1965 
1966 	if (direction == PF_OUT) {
1967 		/* check outgoing packet for NAT */
1968 		if ((nat = pf_get_nat(ifp, IPPROTO_ICMP,
1969 		    h->ip_src.s_addr, h->ip_dst.s_addr)) != NULL) {
1970 			baddr = h->ip_src.s_addr;
1971 			pf_change_a(&h->ip_src.s_addr, &h->ip_sum, nat->raddr);
1972 		}
1973 	}
1974 
1975 	r = TAILQ_FIRST(pf_rules_active);
1976 	while (r != NULL) {
1977 		if (r->action == PF_SCRUB) {
1978 			r = TAILQ_NEXT(r, entries);
1979 			continue;
1980 		}
1981 		if (r->proto && r->proto != h->ip_p)
1982 			r = r->skip[0];
1983 		else if (r->src.mask && !pf_match_addr(r->src.not,
1984 		    r->src.addr, r->src.mask, h->ip_src.s_addr))
1985 			r = r->skip[1];
1986 		else if (r->dst.mask && !pf_match_addr(r->dst.not,
1987 		    r->dst.addr, r->dst.mask, h->ip_dst.s_addr))
1988 			r = r->skip[3];
1989 		else if (r->direction != direction)
1990 			r = TAILQ_NEXT(r, entries);
1991 		else if (r->ifp != NULL && r->ifp != ifp)
1992 			r = TAILQ_NEXT(r, entries);
1993 		else if (r->type && r->type != ih->icmp_type + 1)
1994 			r = TAILQ_NEXT(r, entries);
1995 		else if (r->code && r->code != ih->icmp_code + 1)
1996 			r = TAILQ_NEXT(r, entries);
1997 		else {
1998 			rm = r;
1999 			if (rm->quick)
2000 				break;
2001 			r = TAILQ_NEXT(r, entries);
2002 		}
2003 	}
2004 
2005 	if (rm != NULL) {
2006 		REASON_SET(&reason, PFRES_MATCH);
2007 
2008 		/* XXX will log packet before rewrite */
2009 		if (rm->log)
2010 			PFLOG_PACKET(h, m, AF_INET, direction, reason, rm);
2011 
2012 		if (rm->action != PF_PASS)
2013 			return (PF_DROP);
2014 	}
2015 
2016 	if ((rm != NULL && rm->keep_state) || nat != NULL) {
2017 		/* create new state */
2018 		u_int16_t len;
2019 		u_int16_t id;
2020 		struct pf_state *s;
2021 
2022 		len = h->ip_len - off - ICMP_MINLEN;
2023 		id = ih->icmp_id;
2024 		s = pool_get(&pf_state_pl, PR_NOWAIT);
2025 		if (s == NULL)
2026 			return (PF_DROP);
2027 
2028 		s->rule	 = rm;
2029 		s->log	 = rm && (rm->log & 2);
2030 		s->proto = IPPROTO_ICMP;
2031 		s->direction = direction;
2032 		if (direction == PF_OUT) {
2033 			s->gwy.addr = h->ip_src.s_addr;
2034 			s->gwy.port = id;
2035 			s->ext.addr = h->ip_dst.s_addr;
2036 			s->ext.port = id;
2037 			if (nat != NULL)
2038 				s->lan.addr = baddr;
2039 			else
2040 				s->lan.addr = s->gwy.addr;
2041 			s->lan.port = id;
2042 		} else {
2043 			s->lan.addr = h->ip_dst.s_addr;
2044 			s->lan.port = id;
2045 			s->ext.addr = h->ip_src.s_addr;
2046 			s->ext.port = id;
2047 			s->gwy.addr = s->lan.addr;
2048 			s->gwy.port = id;
2049 		}
2050 		s->src.seqlo = 0;
2051 		s->src.seqhi = 0;
2052 		s->src.max_win = 0;
2053 		s->src.state = 0;
2054 		s->dst.seqlo = 0;
2055 		s->dst.seqhi = 0;
2056 		s->dst.max_win = 0;
2057 		s->dst.state = 0;
2058 		s->creation = pftv.tv_sec;
2059 		s->expire = pftv.tv_sec + 20;
2060 		s->packets = 1;
2061 		s->bytes = len;
2062 		pf_insert_state(s);
2063 	}
2064 
2065 	return (PF_PASS);
2066 }
2067 
2068 int
2069 pf_test_other(int direction, struct ifnet *ifp, struct mbuf *m, struct ip *h)
2070 {
2071 	struct pf_rule *r, *rm = NULL;
2072 
2073 	r = TAILQ_FIRST(pf_rules_active);
2074 	while (r != NULL) {
2075 		if (r->action == PF_SCRUB) {
2076 			r = TAILQ_NEXT(r, entries);
2077 			continue;
2078 		}
2079 		if (r->proto && r->proto != h->ip_p)
2080 			r = r->skip[0];
2081 		else if (r->src.mask && !pf_match_addr(r->src.not,
2082 		    r->src.addr, r->src.mask, h->ip_src.s_addr))
2083 			r = r->skip[1];
2084 		else if (r->dst.mask && !pf_match_addr(r->dst.not,
2085 		    r->dst.addr, r->dst.mask, h->ip_dst.s_addr))
2086 			r = r->skip[3];
2087 		else if (r->direction != direction)
2088 			r = TAILQ_NEXT(r, entries);
2089 		else if (r->ifp != NULL && r->ifp != ifp)
2090 			r = TAILQ_NEXT(r, entries);
2091 		else {
2092 			rm = r;
2093 			if (rm->quick)
2094 				break;
2095 			r = TAILQ_NEXT(r, entries);
2096 		}
2097 	}
2098 
2099 	if (rm != NULL) {
2100 		u_short reason;
2101 
2102 		REASON_SET(&reason, PFRES_MATCH);
2103 		if (rm->log)
2104 			PFLOG_PACKET(h, m, AF_INET, direction, reason, rm);
2105 
2106 		if (rm->action != PF_PASS)
2107 			return (PF_DROP);
2108 	}
2109 	return (PF_PASS);
2110 }
2111 
2112 int
2113 pf_test_state_tcp(struct pf_state **state, int direction, struct ifnet *ifp,
2114     struct mbuf *m, int ipoff, int off, struct ip *h, struct tcphdr *th)
2115 {
2116 	struct pf_tree_key key;
2117 	u_int16_t len = h->ip_len - off - (th->th_off << 2);
2118 	u_int16_t win = ntohs(th->th_win);
2119 	u_int32_t seq = ntohl(th->th_seq), ack = ntohl(th->th_ack);
2120 	u_int32_t end;
2121 	int ackskew;
2122 	struct pf_state_peer *src, *dst;
2123 
2124 	end = seq + len;
2125 	if (th->th_flags & TH_SYN)
2126 		end++;
2127 	if (th->th_flags & TH_FIN)
2128 		end++;
2129 
2130 	key.proto   = IPPROTO_TCP;
2131 	key.addr[0] = h->ip_src;
2132 	key.port[0] = th->th_sport;
2133 	key.addr[1] = h->ip_dst;
2134 	key.port[1] = th->th_dport;
2135 
2136 	if (direction == PF_IN)
2137 		*state = pf_find_state(tree_ext_gwy, &key);
2138 	else
2139 		*state = pf_find_state(tree_lan_ext, &key);
2140 	if (*state == NULL)
2141 		return (PF_DROP);
2142 
2143 	if (direction == (*state)->direction) {
2144 		src = &(*state)->src;
2145 		dst = &(*state)->dst;
2146 	} else {
2147 		src = &(*state)->dst;
2148 		dst = &(*state)->src;
2149 	}
2150 
2151 	if (src->seqlo == 0) {
2152 		/* First packet from this end.  Set its state */
2153 		src->seqlo = end;
2154 		src->seqhi = end + 1;
2155 		src->max_win = 1;
2156 		if (src->state < 1)
2157 			src->state = 1;
2158 	}
2159 
2160 	if ((th->th_flags & TH_ACK) == 0) {
2161 		/* Let it pass through the ack skew check */
2162 		ack = dst->seqlo;
2163 	} else if (ack == 0 &&
2164 	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) {
2165 		/* broken tcp stacks do not set ack */
2166 		ack = dst->seqlo;
2167 	}
2168 
2169 	if (seq == end) {
2170 		/* Ease sequencing restrictions on no data packets */
2171 		seq = src->seqlo;
2172 		end = seq;
2173 	}
2174 
2175 	ackskew = dst->seqlo - ack;
2176 
2177 #define MAXACKWINDOW (0xffff + 1500)
2178 	if (SEQ_GEQ(src->seqhi, end) &&
2179 	    /* Last octet inside other's window space */
2180 	    SEQ_GEQ(seq, src->seqlo - dst->max_win) &&
2181 	    /* Retrans: not more than one window back */
2182 	    (ackskew >= -MAXACKWINDOW) &&
2183 	    /* Acking not more than one window back */
2184 	    (ackskew <= MAXACKWINDOW)) {
2185 	    /* Acking not more than one window forward */
2186 
2187 		if (ackskew < 0) {
2188 			/* The sequencing algorithm is exteremely lossy
2189 			 * when there is fragmentation since the full
2190 			 * packet length can not be determined.  So we
2191 			 * deduce how much data passed by what the
2192 			 * other endpoint ACKs.  Thanks Guido!
2193 			 * (Why MAXACKWINDOW is used)
2194 			 */
2195 			dst->seqlo = ack;
2196 		}
2197 
2198 		(*state)->packets++;
2199 		(*state)->bytes += len;
2200 
2201 		/* update max window */
2202 		if (src->max_win < win)
2203 			src->max_win = win;
2204 		/* syncronize sequencing */
2205 		if (SEQ_GT(end, src->seqlo))
2206 			src->seqlo = end;
2207 		/* slide the window of what the other end can send */
2208 		if (SEQ_GEQ(ack + win, dst->seqhi))
2209 			dst->seqhi = ack + MAX(win, 1);
2210 
2211 
2212 		/* update states */
2213 		if (th->th_flags & TH_SYN)
2214 			if (src->state < 1)
2215 				src->state = 1;
2216 		if (th->th_flags & TH_FIN)
2217 			if (src->state < 3)
2218 				src->state = 3;
2219 		if (th->th_flags & TH_ACK) {
2220 			if (dst->state == 1)
2221 				dst->state = 2;
2222 			else if (dst->state == 3)
2223 				dst->state = 4;
2224 		}
2225 		if (th->th_flags & TH_RST)
2226 			src->state = dst->state = 5;
2227 
2228 		/* update expire time */
2229 		if (src->state >= 4 && dst->state >= 4)
2230 			(*state)->expire = pftv.tv_sec + 5;
2231 		else if (src->state >= 3 && dst->state >= 3)
2232 			(*state)->expire = pftv.tv_sec + 300;
2233 		else if (src->state < 2 || dst->state < 2)
2234 			(*state)->expire = pftv.tv_sec + 30;
2235 		else
2236 			(*state)->expire = pftv.tv_sec + 24*60*60;
2237 
2238 		/* translate source/destination address, if needed */
2239 		if (STATE_TRANSLATE(*state)) {
2240 			if (direction == PF_OUT)
2241 				pf_change_ap(&h->ip_src.s_addr,
2242 				    &th->th_sport, &h->ip_sum,
2243 				    &th->th_sum, (*state)->gwy.addr,
2244 				    (*state)->gwy.port);
2245 			else
2246 				pf_change_ap(&h->ip_dst.s_addr,
2247 				    &th->th_dport, &h->ip_sum,
2248 				    &th->th_sum, (*state)->lan.addr,
2249 				    (*state)->lan.port);
2250 			m_copyback(m, off, sizeof(*th), (caddr_t)th);
2251 		}
2252 
2253 		return (PF_PASS);
2254 
2255 	} else {
2256 		if (pf_status.debug >= PF_DEBUG_MISC) {
2257 			printf("pf: BAD state: ");
2258 			pf_print_state(*state);
2259 			pf_print_flags(th->th_flags);
2260 			printf(" seq=%lu ack=%lu len=%u ", seq, ack, len);
2261 			printf("\n");
2262 			printf("State failure: %c %c %c %c\n",
2263 			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
2264 			    SEQ_GEQ(seq, src->seqlo - dst->max_win) ? ' ': '2',
2265 			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
2266 			    (ackskew <= MAXACKWINDOW) ? ' ' : '4');
2267 		}
2268 		return (PF_DROP);
2269 	}
2270 }
2271 
2272 int
2273 pf_test_state_udp(struct pf_state **state, int direction, struct ifnet *ifp,
2274     struct mbuf *m, int ipoff, int off, struct ip *h, struct udphdr *uh)
2275 {
2276 	u_int16_t len = h->ip_len - off - sizeof(*uh);
2277 	struct pf_state_peer *src, *dst;
2278 	struct pf_tree_key key;
2279 
2280 	key.proto   = IPPROTO_UDP;
2281 	key.addr[0] = h->ip_src;
2282 	key.port[0] = uh->uh_sport;
2283 	key.addr[1] = h->ip_dst;
2284 	key.port[1] = uh->uh_dport;
2285 
2286 	if (direction == PF_IN)
2287 		*state = pf_find_state(tree_ext_gwy, &key);
2288 	else
2289 		*state = pf_find_state(tree_lan_ext, &key);
2290 	if (*state == NULL)
2291 		return (PF_DROP);
2292 
2293 	if (direction == (*state)->direction) {
2294 		src = &(*state)->src;
2295 		dst = &(*state)->dst;
2296 	} else {
2297 		src = &(*state)->dst;
2298 		dst = &(*state)->src;
2299 	}
2300 
2301 	(*state)->packets++;
2302 	(*state)->bytes += len;
2303 
2304 	/* update states */
2305 	if (src->state < 1)
2306 		src->state = 1;
2307 	if (dst->state == 1)
2308 		dst->state = 2;
2309 
2310 	/* update expire time */
2311 	if (src->state == 2 && dst->state == 2)
2312 		(*state)->expire = pftv.tv_sec + 60;
2313 	else
2314 		(*state)->expire = pftv.tv_sec + 20;
2315 
2316 	/* translate source/destination address, if necessary */
2317 	if (STATE_TRANSLATE(*state)) {
2318 		if (direction == PF_OUT)
2319 			pf_change_ap(&h->ip_src.s_addr, &uh->uh_sport,
2320 			    &h->ip_sum, &uh->uh_sum,
2321 			    (*state)->gwy.addr, (*state)->gwy.port);
2322 		else
2323 			pf_change_ap(&h->ip_dst.s_addr, &uh->uh_dport,
2324 			    &h->ip_sum, &uh->uh_sum,
2325 			    (*state)->lan.addr, (*state)->lan.port);
2326 		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
2327 	}
2328 
2329 	return (PF_PASS);
2330 }
2331 
2332 int
2333 pf_test_state_icmp(struct pf_state **state, int direction, struct ifnet *ifp,
2334     struct mbuf *m, int ipoff, int off, struct ip *h, struct icmp *ih)
2335 {
2336 	u_int16_t len = h->ip_len - off - sizeof(*ih);
2337 
2338 	if (ih->icmp_type != ICMP_UNREACH &&
2339 	    ih->icmp_type != ICMP_SOURCEQUENCH &&
2340 	    ih->icmp_type != ICMP_REDIRECT &&
2341 	    ih->icmp_type != ICMP_TIMXCEED &&
2342 	    ih->icmp_type != ICMP_PARAMPROB) {
2343 
2344 		/*
2345 		 * ICMP query/reply message not related to a TCP/UDP packet.
2346 		 * Search for an ICMP state.
2347 		 */
2348 		struct pf_tree_key key;
2349 
2350 		key.proto   = IPPROTO_ICMP;
2351 		key.addr[0] = h->ip_src;
2352 		key.port[0] = ih->icmp_id;
2353 		key.addr[1] = h->ip_dst;
2354 		key.port[1] = ih->icmp_id;
2355 
2356 		if (direction == PF_IN)
2357 			*state = pf_find_state(tree_ext_gwy, &key);
2358 		else
2359 			*state = pf_find_state(tree_lan_ext, &key);
2360 		if (*state == NULL)
2361 			return (PF_DROP);
2362 
2363 		(*state)->packets++;
2364 		(*state)->bytes += len;
2365 		(*state)->expire = pftv.tv_sec + 10;
2366 
2367 		/* translate source/destination address, if needed */
2368 		if ((*state)->lan.addr != (*state)->gwy.addr) {
2369 			if (direction == PF_OUT)
2370 				pf_change_a(&h->ip_src.s_addr,
2371 				    &h->ip_sum, (*state)->gwy.addr);
2372 			else
2373 				pf_change_a(&h->ip_dst.s_addr,
2374 				    &h->ip_sum, (*state)->lan.addr);
2375 		}
2376 
2377 		return (PF_PASS);
2378 
2379 	} else {
2380 
2381 		/*
2382 		 * ICMP error message in response to a TCP/UDP packet.
2383 		 * Extract the inner TCP/UDP header and search for that state.
2384 		 */
2385 
2386 		struct ip h2;
2387 		int ipoff2;
2388 		int off2;
2389 
2390 		ipoff2 = off + ICMP_MINLEN;	/* offset of h2 in mbuf chain */
2391 		if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2), NULL, NULL)) {
2392 			DPFPRINTF(PF_DEBUG_MISC,
2393 			    ("pf: ICMP error message too short (ip)\n"));
2394 			return (PF_DROP);
2395 		}
2396 
2397 		/* ICMP error messages don't refer to non-first fragments */
2398 		if (h2.ip_off & IP_OFFMASK)
2399 			return (PF_DROP);
2400 
2401 		/* offset of protocol header that follows h2 */
2402 		off2 = ipoff2 + (h2.ip_hl << 2);
2403 
2404 		switch (h2.ip_p) {
2405 		case IPPROTO_TCP: {
2406 			struct tcphdr th;
2407 			u_int32_t seq;
2408 			struct pf_tree_key key;
2409 			struct pf_state_peer *src, *dst;
2410 
2411 			/*
2412 			 * Only the first 8 bytes of the TCP header can be
2413 			 * expected. Don't access any TCP header fields after
2414 			 * th_seq, an ackskew test is not possible.
2415 			 */
2416 			if (!pf_pull_hdr(m, off2, &th, 8, NULL, NULL)) {
2417 				DPFPRINTF(PF_DEBUG_MISC,
2418 				    ("pf: ICMP error message too short (tcp)\n"));
2419 				return (PF_DROP);
2420 			}
2421 			seq = ntohl(th.th_seq);
2422 
2423 			key.proto   = IPPROTO_TCP;
2424 			key.addr[0] = h2.ip_dst;
2425 			key.port[0] = th.th_dport;
2426 			key.addr[1] = h2.ip_src;
2427 			key.port[1] = th.th_sport;
2428 
2429 			if (direction == PF_IN)
2430 				*state = pf_find_state(tree_ext_gwy, &key);
2431 			else
2432 				*state = pf_find_state(tree_lan_ext, &key);
2433 			if (*state == NULL)
2434 				return (PF_DROP);
2435 
2436 			if (direction == (*state)->direction) {
2437 				src = &(*state)->dst;
2438 				dst = &(*state)->src;
2439 			} else {
2440 				src = &(*state)->src;
2441 				dst = &(*state)->dst;
2442 			}
2443 
2444 			if (!SEQ_GEQ(src->seqhi, seq) ||
2445 			    !SEQ_GEQ(seq, src->seqlo - dst->max_win)) {
2446 				if (pf_status.debug >= PF_DEBUG_MISC) {
2447 					printf("pf: BAD ICMP state: ");
2448 					pf_print_state(*state);
2449 					printf(" seq=%lu\n", seq);
2450 				}
2451 				return (PF_DROP);
2452 			}
2453 
2454 			if (STATE_TRANSLATE(*state)) {
2455 				if (direction == PF_IN) {
2456 					pf_change_icmp(&h2.ip_src.s_addr,
2457 					    &th.th_sport, &h->ip_dst.s_addr,
2458 					    (*state)->lan.addr,
2459 					    (*state)->lan.port, NULL,
2460 					    &h2.ip_sum, &ih->icmp_cksum,
2461 					    &h->ip_sum);
2462 				} else {
2463 					pf_change_icmp(&h2.ip_dst.s_addr,
2464 					    &th.th_dport, &h->ip_src.s_addr,
2465 					    (*state)->gwy.addr,
2466 					    (*state)->gwy.port, NULL,
2467 					    &h2.ip_sum, &ih->icmp_cksum,
2468 					    &h->ip_sum);
2469 				}
2470 				m_copyback(m, off, ICMP_MINLEN, (caddr_t)ih);
2471 				m_copyback(m, ipoff2, sizeof(h2),
2472 				    (caddr_t)&h2);
2473 				m_copyback(m, off2, 8,
2474 				    (caddr_t)&th);
2475 			}
2476 
2477 			return (PF_PASS);
2478 			break;
2479 		}
2480 		case IPPROTO_UDP: {
2481 			struct udphdr uh;
2482 			struct pf_tree_key key;
2483 
2484 			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
2485 			    NULL, NULL)) {
2486 				DPFPRINTF(PF_DEBUG_MISC,
2487 				    ("pf: ICMP error message too short (udp)\n"));
2488 				return (PF_DROP);
2489 			}
2490 
2491 			key.proto   = IPPROTO_UDP;
2492 			key.addr[0] = h2.ip_dst;
2493 			key.port[0] = uh.uh_dport;
2494 			key.addr[1] = h2.ip_src;
2495 			key.port[1] = uh.uh_sport;
2496 
2497 			if (direction == PF_IN)
2498 				*state = pf_find_state(tree_ext_gwy, &key);
2499 			else
2500 				*state = pf_find_state(tree_lan_ext, &key);
2501 			if (*state == NULL)
2502 				return (PF_DROP);
2503 
2504 			if (STATE_TRANSLATE(*state)) {
2505 				if (direction == PF_IN) {
2506 					pf_change_icmp(&h2.ip_src.s_addr,
2507 					    &uh.uh_sport, &h->ip_dst.s_addr,
2508 					    (*state)->lan.addr,
2509 					    (*state)->lan.port, &uh.uh_sum,
2510 					    &h2.ip_sum, &ih->icmp_cksum,
2511 					    &h->ip_sum);
2512 				} else {
2513 					pf_change_icmp(&h2.ip_dst.s_addr,
2514 					    &uh.uh_dport, &h->ip_src.s_addr,
2515 					    (*state)->gwy.addr,
2516 					    (*state)->gwy.port, &uh.uh_sum,
2517 					    &h2.ip_sum, &ih->icmp_cksum,
2518 					    &h->ip_sum);
2519 				}
2520 				m_copyback(m, off, ICMP_MINLEN, (caddr_t)ih);
2521 				m_copyback(m, ipoff2, sizeof(h2),
2522 				    (caddr_t)&h2);
2523 				m_copyback(m, off2, sizeof(uh),
2524 				    (caddr_t)&uh);
2525 			}
2526 
2527 			return (PF_PASS);
2528 			break;
2529 		}
2530 		case IPPROTO_ICMP: {
2531 			struct icmp iih;
2532 			struct pf_tree_key key;
2533 
2534 			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
2535 			    NULL, NULL)) {
2536 				DPFPRINTF(PF_DEBUG_MISC,
2537 				    ("pf: ICMP error message too short (icmp)\n"));
2538 				return (PF_DROP);
2539 			}
2540 
2541 			key.proto   = IPPROTO_ICMP;
2542 			key.addr[0] = h2.ip_dst;
2543 			key.port[0] = iih.icmp_id;
2544 			key.addr[1] = h2.ip_src;
2545 			key.port[1] = iih.icmp_id;
2546 
2547 			if (direction == PF_IN)
2548 				*state = pf_find_state(tree_ext_gwy, &key);
2549 			else
2550 				*state = pf_find_state(tree_lan_ext, &key);
2551 			if (*state == NULL)
2552 				return (PF_DROP);
2553 
2554 			if (STATE_TRANSLATE(*state)) {
2555 				if (direction == PF_IN) {
2556 					pf_change_icmp(&h2.ip_src.s_addr,
2557 					    &iih.icmp_id, &h->ip_dst.s_addr,
2558 					    (*state)->lan.addr,
2559 					    (*state)->lan.port, NULL,
2560 					    &h2.ip_sum, &ih->icmp_cksum,
2561 					    &h->ip_sum);
2562 				} else {
2563 					pf_change_icmp(&h2.ip_dst.s_addr,
2564 					    &iih.icmp_id, &h->ip_src.s_addr,
2565 					    (*state)->gwy.addr,
2566 					    (*state)->gwy.port, NULL,
2567 					    &h2.ip_sum, &ih->icmp_cksum,
2568 					    &h->ip_sum);
2569 				}
2570 				m_copyback(m, off, ICMP_MINLEN, (caddr_t)ih);
2571 				m_copyback(m, ipoff2, sizeof(h2),
2572 				    (caddr_t)&h2);
2573 				m_copyback(m, off2, ICMP_MINLEN,
2574 				    (caddr_t)&iih);
2575 			}
2576 
2577 			return (PF_PASS);
2578 			break;
2579 		}
2580 		default:
2581 			DPFPRINTF(PF_DEBUG_MISC,
2582 			    ("pf: ICMP error message for bad proto\n"));
2583 			return (PF_DROP);
2584 		}
2585 
2586 	}
2587 }
2588 
2589 /*
2590  * ipoff and off are measured from the start of the mbuf chain.
2591  * h must be at "ipoff" on the mbuf chain.
2592  */
2593 void *
2594 pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
2595     u_short *actionp, u_short *reasonp)
2596 {
2597 	struct ip *h = mtod(m, struct ip *);
2598 	u_int16_t fragoff = (h->ip_off & IP_OFFMASK) << 3;
2599 
2600 	if (fragoff) {
2601 		if (fragoff >= len)
2602 			ACTION_SET(actionp, PF_PASS);
2603 		else {
2604 			ACTION_SET(actionp, PF_DROP);
2605 			REASON_SET(reasonp, PFRES_FRAG);
2606 		}
2607 		return (NULL);
2608 	}
2609 	if (m->m_pkthdr.len < off + len || h->ip_len < off + len) {
2610 		ACTION_SET(actionp, PF_DROP);
2611 		REASON_SET(reasonp, PFRES_SHORT);
2612 		return (NULL);
2613 	}
2614 	m_copydata(m, off, len, p);
2615 	return (p);
2616 }
2617 
2618 int
2619 pf_test(int dir, struct ifnet *ifp, struct mbuf **m0)
2620 {
2621 	u_short action, reason = 0, log = 0;
2622 	struct mbuf *m = *m0;
2623 	struct ip *h;
2624 	struct pf_rule *r = NULL;
2625 	struct pf_state *s;
2626 	int off;
2627 
2628 	if (!pf_status.running ||
2629 	    (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL))
2630 		return (PF_PASS);
2631 
2632 #ifdef DIAGNOSTIC
2633 	if ((m->m_flags & M_PKTHDR) == 0)
2634 		panic("non-M_PKTHDR is passed to pf_test");
2635 #endif
2636 
2637 	/* purge expire states, at most once every 10 seconds */
2638 	microtime(&pftv);
2639 	if (pftv.tv_sec - pf_last_purge >= 10) {
2640 		pf_purge_expired_states();
2641 		pf_purge_expired_fragments();
2642 		pf_last_purge = pftv.tv_sec;
2643 	}
2644 
2645 	if (m->m_pkthdr.len < sizeof(*h)) {
2646 		action = PF_DROP;
2647 		REASON_SET(&reason, PFRES_SHORT);
2648 		log = 1;
2649 		goto done;
2650 	}
2651 
2652 	/* We do IP header normalization and packet reassembly here */
2653 	if (pf_normalize_ip(m0, dir, ifp, &reason) != PF_PASS) {
2654 		ACTION_SET(&action, PF_DROP);
2655 		goto done;
2656 	}
2657 	m = *m0;
2658 	h = mtod(m, struct ip *);
2659 
2660 	off = h->ip_hl << 2;
2661 
2662 	switch (h->ip_p) {
2663 
2664 	case IPPROTO_TCP: {
2665 		struct tcphdr th;
2666 
2667 		if (!pf_pull_hdr(m, off, &th, sizeof(th), &action, &reason)) {
2668 			log = action != PF_PASS;
2669 			goto done;
2670 		}
2671 		action = pf_normalize_tcp(dir, ifp, m, 0, off, h, &th);
2672 		if (action == PF_DROP)
2673 			break;
2674 		action = pf_test_state_tcp(&s, dir, ifp, m, 0, off, h , &th);
2675 		if (action == PF_PASS) {
2676 			r = s->rule;
2677 			log = s->log;
2678 		} else if (s == NULL)
2679 			action = pf_test_tcp(dir, ifp, m, 0, off, h , &th);
2680 		break;
2681 	}
2682 
2683 	case IPPROTO_UDP: {
2684 		struct udphdr uh;
2685 
2686 		if (!pf_pull_hdr(m, off, &uh, sizeof(uh), &action, &reason)) {
2687 			log = action != PF_PASS;
2688 			goto done;
2689 		}
2690 		action = pf_test_state_udp(&s, dir, ifp, m, 0, off, h, &uh);
2691 		if (action == PF_PASS) {
2692 			r = s->rule;
2693 			log = s->log;
2694 		} else if (s == NULL)
2695 			action = pf_test_udp(dir, ifp, m, 0, off, h, &uh);
2696 		break;
2697 	}
2698 
2699 	case IPPROTO_ICMP: {
2700 		struct icmp ih;
2701 
2702 		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN, &action, &reason)) {
2703 			log = action != PF_PASS;
2704 			goto done;
2705 		}
2706 		action = pf_test_state_icmp(&s, dir, ifp, m, 0, off, h, &ih);
2707 		if (action == PF_PASS) {
2708 			r = s->rule;
2709 			log = s->log;
2710 		} else if (s == NULL)
2711 			action = pf_test_icmp(dir, ifp, m, 0, off, h, &ih);
2712 		break;
2713 	}
2714 
2715 	default:
2716 		action = pf_test_other(dir, ifp, m, h);
2717 		break;
2718 	}
2719 
2720 	if (ifp == status_ifp) {
2721 		pf_status.bcounters[dir] += h->ip_len;
2722 		pf_status.pcounters[dir][action]++;
2723 	}
2724 
2725 done:
2726 	if (log) {
2727 		struct pf_rule r0;
2728 
2729 		if (r == NULL) {
2730 			r0.ifp = ifp;
2731 			r0.action = action;
2732 			r0.nr = -1;
2733 			r = &r0;
2734 		}
2735 		PFLOG_PACKET(h, m, AF_INET, dir, reason, r);
2736 	}
2737 	return (action);
2738 }
2739