xref: /openbsd-src/sys/net/pf_lb.c (revision 5054e3e78af0749a9bb00ba9a024b3ee2d90290f)
1 /*	$OpenBSD: pf_lb.c,v 1.8 2009/11/03 10:59:04 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2008 Henning Brauer
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  *    - Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *    - Redistributions in binary form must reproduce the above
15  *      copyright notice, this list of conditions and the following
16  *      disclaimer in the documentation and/or other materials provided
17  *      with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Effort sponsored in part by the Defense Advanced Research Projects
33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35  *
36  */
37 
38 #include "bpfilter.h"
39 #include "pflog.h"
40 #include "pfsync.h"
41 #include "pflow.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/mbuf.h>
46 #include <sys/filio.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/kernel.h>
50 #include <sys/time.h>
51 #include <sys/pool.h>
52 #include <sys/proc.h>
53 #include <sys/rwlock.h>
54 
55 #include <crypto/md5.h>
56 
57 #include <net/if.h>
58 #include <net/if_types.h>
59 #include <net/bpf.h>
60 #include <net/route.h>
61 #include <net/radix_mpath.h>
62 
63 #include <netinet/in.h>
64 #include <netinet/in_var.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/ip.h>
67 #include <netinet/ip_var.h>
68 #include <netinet/tcp.h>
69 #include <netinet/tcp_seq.h>
70 #include <netinet/udp.h>
71 #include <netinet/ip_icmp.h>
72 #include <netinet/in_pcb.h>
73 #include <netinet/tcp_timer.h>
74 #include <netinet/tcp_var.h>
75 #include <netinet/udp_var.h>
76 #include <netinet/icmp_var.h>
77 #include <netinet/if_ether.h>
78 
79 #include <dev/rndvar.h>
80 #include <net/pfvar.h>
81 #include <net/if_pflog.h>
82 #include <net/if_pflow.h>
83 
84 #if NPFSYNC > 0
85 #include <net/if_pfsync.h>
86 #endif /* NPFSYNC > 0 */
87 
88 #ifdef INET6
89 #include <netinet/ip6.h>
90 #include <netinet/in_pcb.h>
91 #include <netinet/icmp6.h>
92 #include <netinet6/nd6.h>
93 #endif /* INET6 */
94 
95 
96 #define DPFPRINTF(n, x)	if (pf_status.debug >= (n)) printf x
97 
98 /*
99  * Global variables
100  */
101 
102 void			 pf_hash(struct pf_addr *, struct pf_addr *,
103 			    struct pf_poolhashkey *, sa_family_t);
104 int			 pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
105 			    struct pf_addr *, struct pf_addr *, u_int16_t,
106 			    struct pf_addr *, u_int16_t *, u_int16_t, u_int16_t,
107 			    struct pf_src_node **, int);
108 
109 #define mix(a,b,c) \
110 	do {					\
111 		a -= b; a -= c; a ^= (c >> 13);	\
112 		b -= c; b -= a; b ^= (a << 8);	\
113 		c -= a; c -= b; c ^= (b >> 13);	\
114 		a -= b; a -= c; a ^= (c >> 12);	\
115 		b -= c; b -= a; b ^= (a << 16);	\
116 		c -= a; c -= b; c ^= (b >> 5);	\
117 		a -= b; a -= c; a ^= (c >> 3);	\
118 		b -= c; b -= a; b ^= (a << 10);	\
119 		c -= a; c -= b; c ^= (b >> 15);	\
120 	} while (0)
121 
122 /*
123  * hash function based on bridge_hash in if_bridge.c
124  */
125 void
126 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
127     struct pf_poolhashkey *key, sa_family_t af)
128 {
129 	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
130 
131 	switch (af) {
132 #ifdef INET
133 	case AF_INET:
134 		a += inaddr->addr32[0];
135 		b += key->key32[1];
136 		mix(a, b, c);
137 		hash->addr32[0] = c + key->key32[2];
138 		break;
139 #endif /* INET */
140 #ifdef INET6
141 	case AF_INET6:
142 		a += inaddr->addr32[0];
143 		b += inaddr->addr32[2];
144 		mix(a, b, c);
145 		hash->addr32[0] = c;
146 		a += inaddr->addr32[1];
147 		b += inaddr->addr32[3];
148 		c += key->key32[1];
149 		mix(a, b, c);
150 		hash->addr32[1] = c;
151 		a += inaddr->addr32[2];
152 		b += inaddr->addr32[1];
153 		c += key->key32[2];
154 		mix(a, b, c);
155 		hash->addr32[2] = c;
156 		a += inaddr->addr32[3];
157 		b += inaddr->addr32[0];
158 		c += key->key32[3];
159 		mix(a, b, c);
160 		hash->addr32[3] = c;
161 		break;
162 #endif /* INET6 */
163 	}
164 }
165 
166 int
167 pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
168     struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
169     struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
170     struct pf_src_node **sn, int rdomain)
171 {
172 	struct pf_state_key_cmp	key;
173 	struct pf_addr		init_addr;
174 	u_int16_t		cut;
175 
176 	bzero(&init_addr, sizeof(init_addr));
177 	if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn, &r->nat))
178 		return (1);
179 
180 	if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) {
181 		if (dport == htons(ICMP6_ECHO_REQUEST) ||
182 		    dport == htons(ICMP_ECHO)) {
183 			low = 1;
184 			high = 65535;
185 		} else
186 			return (0);	/* Don't try to modify non-echo ICMP */
187 	}
188 
189 	do {
190 		key.af = af;
191 		key.proto = proto;
192 		key.rdomain = rdomain;
193 		PF_ACPY(&key.addr[1], daddr, key.af);
194 		PF_ACPY(&key.addr[0], naddr, key.af);
195 		key.port[1] = dport;
196 
197 		/*
198 		 * port search; start random, step;
199 		 * similar 2 portloop in in_pcbbind
200 		 */
201 		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
202 		    proto == IPPROTO_ICMP)) {
203 			/* XXX bug icmp states dont use the id on both sides */
204 			key.port[0] = dport;
205 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL)
206 				return (0);
207 		} else if (low == 0 && high == 0) {
208 			key.port[0] = *nport;
209 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL)
210 				return (0);
211 		} else if (low == high) {
212 			key.port[0] = htons(low);
213 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
214 				*nport = htons(low);
215 				return (0);
216 			}
217 		} else {
218 			u_int16_t tmp;
219 
220 			if (low > high) {
221 				tmp = low;
222 				low = high;
223 				high = tmp;
224 			}
225 			/* low < high */
226 			cut = arc4random_uniform(1 + high - low) + low;
227 			/* low <= cut <= high */
228 			for (tmp = cut; tmp <= high; ++(tmp)) {
229 				key.port[0] = htons(tmp);
230 				if (pf_find_state_all(&key, PF_IN, NULL) ==
231 				    NULL && !in_baddynamic(tmp, proto)) {
232 					*nport = htons(tmp);
233 					return (0);
234 				}
235 			}
236 			for (tmp = cut - 1; tmp >= low; --(tmp)) {
237 				key.port[0] = htons(tmp);
238 				if (pf_find_state_all(&key, PF_IN, NULL) ==
239 				    NULL && !in_baddynamic(tmp, proto)) {
240 					*nport = htons(tmp);
241 					return (0);
242 				}
243 			}
244 		}
245 
246 		switch (r->nat.opts & PF_POOL_TYPEMASK) {
247 		case PF_POOL_RANDOM:
248 		case PF_POOL_ROUNDROBIN:
249 			if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn,
250 			    &r->nat))
251 				return (1);
252 			break;
253 		case PF_POOL_NONE:
254 		case PF_POOL_SRCHASH:
255 		case PF_POOL_BITMASK:
256 		default:
257 			return (1);
258 		}
259 	} while (! PF_AEQ(&init_addr, naddr, af) );
260 	return (1);					/* none available */
261 }
262 
263 int
264 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
265     struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn,
266     struct pf_pool *rpool)
267 {
268 	unsigned char		 hash[16];
269 	struct pf_addr		*raddr = &rpool->cur->addr.v.a.addr;
270 	struct pf_addr		*rmask = &rpool->cur->addr.v.a.mask;
271 	struct pf_pooladdr	*acur = rpool->cur;
272 	struct pf_src_node	 k;
273 
274 	if (*sn == NULL && rpool->opts & PF_POOL_STICKYADDR &&
275 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
276 		k.af = af;
277 		PF_ACPY(&k.addr, saddr, af);
278 		if (r->rule_flag & PFRULE_RULESRCTRACK ||
279 		    rpool->opts & PF_POOL_STICKYADDR)
280 			k.rule.ptr = r;
281 		else
282 			k.rule.ptr = NULL;
283 		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
284 		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
285 		if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
286 			PF_ACPY(naddr, &(*sn)->raddr, af);
287 			if (pf_status.debug >= PF_DEBUG_MISC) {
288 				printf("pf_map_addr: src tracking maps ");
289 				pf_print_host(&k.addr, 0, af);
290 				printf(" to ");
291 				pf_print_host(naddr, 0, af);
292 				printf("\n");
293 			}
294 			return (0);
295 		}
296 	}
297 
298 	if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
299 		return (1);
300 	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
301 		switch (af) {
302 #ifdef INET
303 		case AF_INET:
304 			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
305 			    (rpool->opts & PF_POOL_TYPEMASK) !=
306 			    PF_POOL_ROUNDROBIN)
307 				return (1);
308 			 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
309 			 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
310 			break;
311 #endif /* INET */
312 #ifdef INET6
313 		case AF_INET6:
314 			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
315 			    (rpool->opts & PF_POOL_TYPEMASK) !=
316 			    PF_POOL_ROUNDROBIN)
317 				return (1);
318 			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
319 			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
320 			break;
321 #endif /* INET6 */
322 		}
323 	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
324 		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
325 			return (1); /* unsupported */
326 	} else {
327 		raddr = &rpool->cur->addr.v.a.addr;
328 		rmask = &rpool->cur->addr.v.a.mask;
329 	}
330 
331 	switch (rpool->opts & PF_POOL_TYPEMASK) {
332 	case PF_POOL_NONE:
333 		PF_ACPY(naddr, raddr, af);
334 		break;
335 	case PF_POOL_BITMASK:
336 		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
337 		break;
338 	case PF_POOL_RANDOM:
339 		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
340 			switch (af) {
341 #ifdef INET
342 			case AF_INET:
343 				rpool->counter.addr32[0] = htonl(arc4random());
344 				break;
345 #endif /* INET */
346 #ifdef INET6
347 			case AF_INET6:
348 				if (rmask->addr32[3] != 0xffffffff)
349 					rpool->counter.addr32[3] =
350 					    htonl(arc4random());
351 				else
352 					break;
353 				if (rmask->addr32[2] != 0xffffffff)
354 					rpool->counter.addr32[2] =
355 					    htonl(arc4random());
356 				else
357 					break;
358 				if (rmask->addr32[1] != 0xffffffff)
359 					rpool->counter.addr32[1] =
360 					    htonl(arc4random());
361 				else
362 					break;
363 				if (rmask->addr32[0] != 0xffffffff)
364 					rpool->counter.addr32[0] =
365 					    htonl(arc4random());
366 				break;
367 #endif /* INET6 */
368 			}
369 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
370 			PF_ACPY(init_addr, naddr, af);
371 
372 		} else {
373 			PF_AINC(&rpool->counter, af);
374 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
375 		}
376 		break;
377 	case PF_POOL_SRCHASH:
378 		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
379 		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
380 		break;
381 	case PF_POOL_ROUNDROBIN:
382 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
383 			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
384 			    &rpool->tblidx, &rpool->counter,
385 			    &raddr, &rmask, af))
386 				goto get_addr;
387 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
388 			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
389 			    &rpool->tblidx, &rpool->counter,
390 			    &raddr, &rmask, af))
391 				goto get_addr;
392 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
393 			goto get_addr;
394 
395 	try_next:
396 		if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
397 			rpool->cur = TAILQ_FIRST(&rpool->list);
398 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
399 			rpool->tblidx = -1;
400 			if (pfr_pool_get(rpool->cur->addr.p.tbl,
401 			    &rpool->tblidx, &rpool->counter,
402 			    &raddr, &rmask, af)) {
403 				/* table contains no address of type 'af' */
404 				if (rpool->cur != acur)
405 					goto try_next;
406 				return (1);
407 			}
408 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
409 			rpool->tblidx = -1;
410 			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
411 			    &rpool->tblidx, &rpool->counter,
412 			    &raddr, &rmask, af)) {
413 				/* table contains no address of type 'af' */
414 				if (rpool->cur != acur)
415 					goto try_next;
416 				return (1);
417 			}
418 		} else {
419 			raddr = &rpool->cur->addr.v.a.addr;
420 			rmask = &rpool->cur->addr.v.a.mask;
421 			PF_ACPY(&rpool->counter, raddr, af);
422 		}
423 
424 	get_addr:
425 		PF_ACPY(naddr, &rpool->counter, af);
426 		if (init_addr != NULL && PF_AZERO(init_addr, af))
427 			PF_ACPY(init_addr, naddr, af);
428 		PF_AINC(&rpool->counter, af);
429 		break;
430 	}
431 	if (*sn != NULL)
432 		PF_ACPY(&(*sn)->raddr, naddr, af);
433 
434 	if (pf_status.debug >= PF_DEBUG_NOISY &&
435 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
436 		printf("pf_map_addr: selected address ");
437 		pf_print_host(naddr, 0, af);
438 		printf("\n");
439 	}
440 
441 	return (0);
442 }
443 
444 int
445 pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd, struct pf_addr *saddr,
446     u_int16_t *sport, struct pf_addr *daddr, u_int16_t *dport)
447 {
448 	struct pf_addr	naddr;
449 	u_int16_t	nport = 0;
450 
451 	struct pf_src_node srcnode, *sn = &srcnode;
452 
453 	if (!TAILQ_EMPTY(&r->nat.list)) {
454 		/* XXX is this right? what if rtable is changed at the same
455 		 * XXX time? where do I need to figure out the sport? */
456 		if (pf_get_sport(pd->af, pd->proto, r, saddr,
457 		    daddr, *dport, &naddr, &nport, r->nat.proxy_port[0],
458 		    r->nat.proxy_port[1], &sn, pd->rdomain)) {
459 			DPFPRINTF(PF_DEBUG_MISC,
460 			    ("pf: NAT proxy port allocation "
461 			    "(%u-%u) failed\n",
462 			    r->nat.proxy_port[0],
463 			    r->nat.proxy_port[1]));
464 			return (-1);
465 		}
466 		PF_ACPY(saddr, &naddr, pd->af);
467 		if (nport)
468 			*sport = nport;
469 	}
470 	if (!TAILQ_EMPTY(&r->rdr.list)) {
471 		if (pf_map_addr(pd->af, r, saddr, &naddr, NULL, &sn, &r->rdr))
472 			return (-1);
473 		if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
474 			PF_POOLMASK(&naddr, &naddr,  &r->rdr.cur->addr.v.a.mask,
475 			    daddr, pd->af);
476 
477 			if (r->rdr.proxy_port[1]) {
478 				u_int32_t	tmp_nport;
479 
480 				tmp_nport = ((ntohs(*dport) -
481 				    ntohs(r->dst.port[0])) %
482 				    (r->rdr.proxy_port[1] -
483 				    r->rdr.proxy_port[0] + 1)) +
484 				    r->rdr.proxy_port[0];
485 
486 				/* wrap around if necessary */
487 				if (tmp_nport > 65535)
488 					tmp_nport -= 65535;
489 				nport = htons((u_int16_t)tmp_nport);
490 			} else if (r->rdr.proxy_port[0])
491 				nport = htons(r->rdr.proxy_port[0]);
492 
493 		PF_ACPY(daddr, &naddr, pd->af);
494 		if (nport)
495 			*dport = nport;
496 	}
497 
498 	return (0);
499 }
500 
501