xref: /netbsd-src/sys/netinet/in_selsrc.c (revision b1c86f5f087524e68db12794ee9c3e3da1ab17a0)
1 /*	$NetBSD: in_selsrc.c,v 1.8 2009/10/19 23:19:39 rmind Exp $	*/
2 
3 /*-
4  * Copyright (c) 2005 David Young.  All rights reserved.
5  *
6  * This code was written by David Young.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY DAVID YOUNG ``AS IS'' AND ANY
18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
20  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
21  * FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __KERNEL_RCSID(0, "$NetBSD: in_selsrc.c,v 1.8 2009/10/19 23:19:39 rmind Exp $");
33 
34 #include "opt_inet.h"
35 #include "opt_inet_conf.h"
36 
37 #include <lib/libkern/libkern.h>
38 
39 #include <sys/param.h>
40 #include <sys/ioctl.h>
41 #include <sys/errno.h>
42 #include <sys/malloc.h>
43 #include <sys/socket.h>
44 #include <sys/socketvar.h>
45 #include <sys/sysctl.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/syslog.h>
49 
50 #include <net/if.h>
51 
52 #include <net/if_ether.h>
53 
54 #include <netinet/in_systm.h>
55 #include <netinet/in.h>
56 #include <netinet/in_var.h>
57 #include <netinet/ip.h>
58 #include <netinet/ip_var.h>
59 #include <netinet/in_ifattach.h>
60 #include <netinet/in_pcb.h>
61 #include <netinet/if_inarp.h>
62 #include <netinet/ip_mroute.h>
63 #include <netinet/igmp_var.h>
64 #include <netinet/in_selsrc.h>
65 
66 #ifdef INET
67 struct score_src_name {
68 	const char		*sn_name;
69 	const in_score_src_t	sn_score_src;
70 };
71 
72 static const struct sysctlnode *in_domifattach_sysctl(struct in_ifsysctl *);
73 static int in_preference(const struct in_addr *, int, int,
74     const struct in_addr *);
75 static int in_index(const struct in_addr *, int, int, const struct in_addr *);
76 static int in_matchlen(const struct in_addr *, int, int,
77     const struct in_addr *);
78 static int in_match_category(const struct in_addr *, int, int,
79     const struct in_addr *);
80 static size_t in_get_selectsrc(const struct in_ifselsrc *, char *,
81     const size_t);
82 static int in_set_selectsrc(struct in_ifselsrc *, char *buf);
83 static int in_sysctl_selectsrc(SYSCTLFN_PROTO);
84 static in_score_src_t name_to_score_src(const char *);
85 static const char *score_src_to_name(const in_score_src_t);
86 static void in_score(const in_score_src_t *, int *, int *,
87     const struct in_addr *, int, int, const struct in_addr *);
88 
89 static const struct score_src_name score_src_names[] = {
90 	  {"same-category", in_match_category}
91 	, {"common-prefix-len", in_matchlen}
92 	, {"index", in_index}
93 	, {"preference", in_preference}
94 	, {NULL, NULL}
95 };
96 
97 static const struct in_ifselsrc initial_iss = { 0, {NULL} };
98 
99 static struct in_ifselsrc default_iss = { 0, {in_index} };
100 
101 #ifdef GETIFA_DEBUG
102 int in_selsrc_debug = 0;
103 #endif /* GETIFA_DEBUG */
104 
105 SYSCTL_SETUP(sysctl_selectsrc_setup, "sysctl selectsrc subtree setup")
106 {
107 	int rc;
108 	const struct sysctlnode *rnode, *cnode;
109 
110 	if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
111 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "net",
112 	    NULL, NULL, 0, NULL, 0, CTL_NET, CTL_EOL)) != 0) {
113 		printf("%s: could not create net, rc = %d\n", __func__, rc);
114 		return;
115 	}
116 	if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
117 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "inet",
118 	    NULL, NULL, 0, NULL, 0, CTL_NET, PF_INET, CTL_EOL)) != 0) {
119 		printf("%s: could not create net.inet, rc = %d\n", __func__,
120 		    rc);
121 		return;
122 	}
123 	if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
124 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "ip",
125 	    NULL, NULL, 0, NULL, 0,
126 	    CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL)) != 0) {
127 		printf("%s: could not create net.inet.ip, rc = %d\n", __func__,
128 		    rc);
129 		return;
130 	}
131 	if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
132 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "selectsrc",
133 	    NULL, NULL, 0, NULL, 0,
134 	    CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE, CTL_EOL)) != 0) {
135 		printf("%s: could not create net.inet.ip.selectsrc, "
136 		       "rc = %d\n", __func__, rc);
137 		return;
138 	}
139 #ifdef GETIFA_DEBUG
140 	if ((rc = sysctl_createv(clog, 0, &rnode, &cnode,
141 	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "debug",
142 	    SYSCTL_DESCR("enable source-selection debug messages"),
143 	    NULL, 0, &in_selsrc_debug, 0, CTL_CREATE, CTL_EOL)) != 0) {
144 		printf("%s: could not create net.inet.ip.selectsrc.debug, "
145 		       "rc = %d\n", __func__, rc);
146 		return;
147 	}
148 #endif /* GETIFA_DEBUG */
149 	if ((rc = sysctl_createv(clog, 0, &rnode, &cnode,
150 	    CTLFLAG_READWRITE, CTLTYPE_STRING, "default",
151 	    SYSCTL_DESCR("default source selection policy"),
152 	    in_sysctl_selectsrc, 0, &default_iss, IN_SELECTSRC_LEN,
153 	    CTL_CREATE, CTL_EOL)) != 0) {
154 		printf(
155 		    "%s: could not create net.inet.ip.selectsrc.default (%d)\n",
156 		    __func__, rc);
157 		return;
158 	}
159 }
160 
161 /*
162  * Score by address preference: prefer addresses with higher preference
163  * number.  Preference numbers are assigned with ioctl SIOCSIFADDRPREF.
164  */
165 static int
166 in_preference(const struct in_addr *src, int preference,
167     int idx, const struct in_addr *dst)
168 {
169 	return preference;
170 }
171 
172 /*
173  * Score by address "index": prefer addresses nearer the head of
174  * the ifaddr list.
175  */
176 static int
177 in_index(const struct in_addr *src, int preference, int idx,
178     const struct in_addr *dst)
179 {
180 	return -idx;
181 }
182 
183 /*
184  * Length of longest common prefix of src and dst.
185  *
186  * (Derived from in6_matchlen.)
187  */
188 static int
189 in_matchlen(const struct in_addr *src, int preference,
190     int idx, const struct in_addr *dst)
191 {
192 	int match = 0;
193 	const uint8_t *s = (const uint8_t *)src, *d = (const uint8_t *)dst;
194 	const uint8_t *lim = s + 4;
195 	uint_fast8_t r = 0;
196 
197 	while (s < lim && (r = (*d++ ^ *s++)) == 0)
198 		match += 8;
199 
200 	if (s == lim)
201 		return match;
202 
203 	while ((r & 0x80) == 0) {
204 		match++;
205 		r <<= 1;
206 	}
207 	return match;
208 }
209 
210 static enum in_category
211 in_categorize(const struct in_addr *s)
212 {
213 	if (IN_ANY_LOCAL(s->s_addr))
214 		return IN_CATEGORY_LINKLOCAL;
215 	else if (IN_PRIVATE(s->s_addr))
216 		return IN_CATEGORY_PRIVATE;
217 	else
218 		return IN_CATEGORY_OTHER;
219 }
220 
221 static int
222 in_match_category(const struct in_addr *src, int preference,
223     int idx, const struct in_addr *dst)
224 {
225 	enum in_category dst_c = in_categorize(dst),
226 	                 src_c = in_categorize(src);
227 #ifdef GETIFA_DEBUG
228 	if (in_selsrc_debug) {
229 		printf("%s: dst %#08" PRIx32 " categ %d, src %#08" PRIx32
230 		    " categ %d\n", __func__, ntohl(dst->s_addr), dst_c,
231 		    ntohl(src->s_addr), src_c);
232 	}
233 #endif /* GETIFA_DEBUG */
234 
235 	if (dst_c == src_c)
236 		return 2;
237 	else if (dst_c == IN_CATEGORY_LINKLOCAL && src_c == IN_CATEGORY_PRIVATE)
238 		return 1;
239 	else if (dst_c == IN_CATEGORY_PRIVATE && src_c == IN_CATEGORY_LINKLOCAL)
240 		return 1;
241 	else if (dst_c == IN_CATEGORY_OTHER && src_c == IN_CATEGORY_PRIVATE)
242 		return 1;
243 	else
244 		return 0;
245 }
246 
247 static void
248 in_score(const in_score_src_t *score_src, int *score, int *scorelenp,
249     const struct in_addr *src, int preference, int idx,
250     const struct in_addr *dst)
251 {
252 	int i;
253 
254 	for (i = 0; i < IN_SCORE_SRC_MAX && score_src[i] != NULL; i++)
255 		score[i] = (*score_src[i])(src, preference, idx, dst);
256 	if (scorelenp != NULL)
257 		*scorelenp = i;
258 }
259 
260 static int
261 in_score_cmp(int *score1, int *score2, int scorelen)
262 {
263 	int i;
264 
265 	for (i = 0; i < scorelen; i++) {
266 		if (score1[i] == score2[i])
267 			continue;
268 		return score1[i] - score2[i];
269 	}
270 	return 0;
271 }
272 
273 #ifdef GETIFA_DEBUG
274 static void
275 in_score_println(int *score, int scorelen)
276 {
277 	int i;
278 	const char *delim = "[";
279 
280 	for (i = 0; i < scorelen; i++) {
281 		printf("%s%d", delim, score[i]);
282 		delim = ", ";
283 	}
284 	printf("]\n");
285 }
286 #endif /* GETIFA_DEBUG */
287 
288 /* Scan the interface addresses on the interface ifa->ifa_ifp for
289  * the source address that best matches the destination, dst0,
290  * according to the source address-selection policy for this
291  * interface.  If there is no better match than `ifa', return `ifa'.
292  * Otherwise, return the best address.
293  *
294  * Note that in_getifa is called after the kernel has decided which
295  * output interface to use (ifa->ifa_ifp), and in_getifa will not
296  * scan an address belonging to any other interface.
297  */
298 struct ifaddr *
299 in_getifa(struct ifaddr *ifa, const struct sockaddr *dst0)
300 {
301 	const in_score_src_t *score_src;
302 	int idx, scorelen;
303 	const struct sockaddr_in *dst, *src;
304 	struct ifaddr *alt_ifa, *best_ifa;
305 	struct ifnet *ifp;
306 	struct in_ifsysctl *isc;
307 	struct in_ifselsrc *iss;
308 	int best_score[IN_SCORE_SRC_MAX], score[IN_SCORE_SRC_MAX];
309 
310 	if (ifa->ifa_addr->sa_family != AF_INET ||
311 	    dst0 == NULL || dst0->sa_family != AF_INET) {	/* Possible. */
312 		ifa->ifa_seqno = NULL;
313 		return ifa;
314 	}
315 
316 	ifp = ifa->ifa_ifp;
317 	isc = (struct in_ifsysctl *)ifp->if_afdata[AF_INET];
318 	if (isc != NULL && isc->isc_selsrc != NULL &&
319 	    isc->isc_selsrc->iss_score_src[0] != NULL)
320 		iss = isc->isc_selsrc;
321 	else
322 		iss = &default_iss;
323 	score_src = &iss->iss_score_src[0];
324 
325 	dst = (const struct sockaddr_in *)dst0;
326 
327 	best_ifa = ifa;
328 
329 	/* Find out the index of this ifaddr. */
330 	idx = 0;
331 	IFADDR_FOREACH(alt_ifa, ifa->ifa_ifp) {
332 		if (alt_ifa == best_ifa)
333 			break;
334 		idx++;
335 	}
336 	in_score(score_src, best_score, &scorelen, &IA_SIN(best_ifa)->sin_addr,
337 	    best_ifa->ifa_preference, idx, &dst->sin_addr);
338 
339 #ifdef GETIFA_DEBUG
340 	if (in_selsrc_debug) {
341 		printf("%s: enter dst %#" PRIx32 " src %#" PRIx32 " score ",
342 		    __func__, ntohl(dst->sin_addr.s_addr),
343 		    ntohl(satosin(best_ifa->ifa_addr)->sin_addr.s_addr));
344 		in_score_println(best_score, scorelen);
345 	}
346 #endif /* GETIFA_DEBUG */
347 
348 	idx = -1;
349 	IFADDR_FOREACH(alt_ifa, ifa->ifa_ifp) {
350 		++idx;
351 		src = IA_SIN(alt_ifa);
352 
353 		if (alt_ifa == ifa || src->sin_family != AF_INET)
354 			continue;
355 
356 		in_score(score_src, score, NULL, &src->sin_addr,
357 		         alt_ifa->ifa_preference, idx, &dst->sin_addr);
358 
359 #ifdef GETIFA_DEBUG
360 		if (in_selsrc_debug) {
361 			printf("%s: src %#" PRIx32 " score ", __func__,
362 			    ntohl(src->sin_addr.s_addr));
363 			in_score_println(score, scorelen);
364 		}
365 #endif /* GETIFA_DEBUG */
366 
367 		if (in_score_cmp(score, best_score, scorelen) > 0) {
368 			(void)memcpy(best_score, score, sizeof(best_score));
369 			best_ifa = alt_ifa;
370 		}
371 	}
372 #ifdef GETIFA_DEBUG
373 	if (in_selsrc_debug) {
374 		printf("%s: choose src %#" PRIx32 " score ", __func__,
375 		    ntohl(IA_SIN(best_ifa)->sin_addr.s_addr));
376 		in_score_println(best_score, scorelen);
377 	}
378 #endif /* GETIFA_DEBUG */
379 
380 	best_ifa->ifa_seqno = &iss->iss_seqno;
381 	return best_ifa;
382 }
383 
384 static in_score_src_t
385 name_to_score_src(const char *name)
386 {
387 	int i;
388 
389 	for (i = 0; score_src_names[i].sn_name != NULL; i++) {
390 		if (strcmp(score_src_names[i].sn_name, name) == 0)
391 			return score_src_names[i].sn_score_src;
392 	}
393 	return NULL;
394 }
395 
396 static const char *
397 score_src_to_name(const in_score_src_t score_src)
398 {
399 	int i;
400 	for (i = 0; score_src_names[i].sn_name != NULL; i++) {
401 		if (score_src == score_src_names[i].sn_score_src)
402 			return score_src_names[i].sn_name;
403 	}
404 	return "<unknown>";
405 }
406 
407 static size_t
408 in_get_selectsrc(const struct in_ifselsrc *iss, char *buf0,
409     const size_t buflen0)
410 {
411 	int i, rc;
412 	char *buf = buf0;
413 	const char *delim;
414 	size_t buflen = buflen0;
415 
416 	KASSERT(buflen >= 1);
417 
418 	for (delim = "", i = 0;
419 	     i < IN_SCORE_SRC_MAX && iss->iss_score_src[i] != NULL;
420 	     delim = ",", i++) {
421 		rc = snprintf(buf, buflen, "%s%s",
422 		    delim, score_src_to_name(iss->iss_score_src[i]));
423 		if (rc == -1)
424 			return buflen0 - buflen;
425 		if (rc >= buflen)
426 			return buflen0 + rc - buflen;
427 		buf += rc;
428 		buflen -= rc;
429 	}
430 	if (buf == buf0)
431 		*buf++ = '\0';
432 	return buf - buf0;
433 }
434 
435 static int
436 in_set_selectsrc(struct in_ifselsrc *iss, char *buf)
437 {
438 	int i, s;
439 	char *next = buf;
440 	const char *name;
441 	in_score_src_t score_src;
442 	in_score_src_t scorers[IN_SCORE_SRC_MAX];
443 
444 	memset(&scorers, 0, sizeof(scorers));
445 	for (i = 0;
446 	     (name = strsep(&next, ",")) != NULL && i < IN_SCORE_SRC_MAX;
447 	     i++) {
448 		if (strcmp(name, "") == 0)
449 			break;
450 		if ((score_src = name_to_score_src(name)) == NULL)
451 			return EINVAL;
452 		scorers[i] = score_src;
453 	}
454 	if (i == IN_SCORE_SRC_MAX && name != NULL)
455 		return EFBIG;
456 	s = splnet();
457 	(void)memcpy(iss->iss_score_src, scorers, sizeof(iss->iss_score_src));
458         /* If iss affects a specific interface that used to use
459          * the default policy, increase the sequence number on the
460          * default policy, forcing routes that cache a source
461          * (rt_ifa) found by the default policy to refresh their
462          * cache.
463 	 */
464 	if (iss != &default_iss && iss->iss_score_src[0] == NULL &&
465 	    scorers[0] != NULL)
466 		default_iss.iss_seqno++;
467 	iss->iss_seqno++;
468 	splx(s);
469 	return 0;
470 }
471 
472 /*
473  * sysctl helper routine for net.inet.ip.interfaces.<interface>.selectsrc.
474  * Pulls the old value out as a human-readable string, interprets
475  * and records the new value.
476  */
477 static int
478 in_sysctl_selectsrc(SYSCTLFN_ARGS)
479 {
480 	char policy[IN_SELECTSRC_LEN];
481 	int error;
482 	struct sysctlnode node;
483 	struct in_ifselsrc *iss;
484 
485 	node = *rnode;
486 	iss = (struct in_ifselsrc *)node.sysctl_data;
487 	if (oldp != NULL &&
488 	    (error = in_get_selectsrc(iss, policy, sizeof(policy))) >= sizeof(policy))
489 		return error;
490 	node.sysctl_data = &policy[0];
491 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
492 	if (error || newp == NULL)
493 		return (error);
494 
495 	return in_set_selectsrc(iss, policy);
496 }
497 
498 static const struct sysctlnode *
499 in_domifattach_sysctl(struct in_ifsysctl *isc)
500 {
501 	int rc;
502 	const struct sysctlnode *rnode;
503 
504 	if ((rc = sysctl_createv(&isc->isc_log, 0, NULL, &rnode,
505 	                         CTLFLAG_READONLY, CTLTYPE_NODE,
506 				 "interfaces", NULL,
507 				 NULL, 0, NULL, 0,
508 				 CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE,
509 				 CTL_EOL)) != 0) {
510 		printf("%s: could not create net.inet.ip.interfaces, rc = %d\n",
511 		    __func__, rc);
512 		return NULL;
513 	}
514 	if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode,
515 	                         CTLFLAG_READONLY, CTLTYPE_NODE,
516 				 isc->isc_ifp->if_xname,
517 				 SYSCTL_DESCR("interface ip options"),
518 				 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL)) != 0) {
519 		printf("%s: could not create net.inet.ip.interfaces.%s, "
520 		       "rc = %d\n", __func__, isc->isc_ifp->if_xname, rc);
521 		goto err;
522 	}
523 	if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode,
524 	                         CTLFLAG_READWRITE, CTLTYPE_STRING,
525 				 "selectsrc",
526 				 SYSCTL_DESCR("source selection policy"),
527 				 in_sysctl_selectsrc, 0,
528 				 isc->isc_selsrc, IN_SELECTSRC_LEN,
529 				 CTL_CREATE, CTL_EOL)) != 0) {
530 		printf(
531 		    "%s: could not create net.inet.ip.%s.selectsrc, rc = %d\n",
532 		    __func__, isc->isc_ifp->if_xname, rc);
533 		goto err;
534 	}
535 	return rnode;
536 err:
537 	sysctl_teardown(&isc->isc_log);
538 	return NULL;
539 }
540 
541 void *
542 in_domifattach(struct ifnet *ifp)
543 {
544 	struct in_ifsysctl *isc;
545 	struct in_ifselsrc *iss;
546 
547 	isc = (struct in_ifsysctl *)malloc(sizeof(*isc), M_IFADDR,
548 	    M_WAITOK | M_ZERO);
549 
550 	iss = (struct in_ifselsrc *)malloc(sizeof(*iss), M_IFADDR,
551 	    M_WAITOK | M_ZERO);
552 
553 	memcpy(&iss->iss_score_src[0], &initial_iss.iss_score_src[0],
554 	    MIN(sizeof(iss->iss_score_src), sizeof(initial_iss.iss_score_src)));
555 
556 	isc->isc_ifp = ifp;
557 	isc->isc_selsrc = iss;
558 
559 	if (in_domifattach_sysctl(isc) == NULL)
560 		goto err;
561 
562 	return isc;
563 err:
564 	free(iss, M_IFADDR);
565 	free(isc, M_IFADDR);
566 	return NULL;
567 }
568 
569 void
570 in_domifdetach(struct ifnet *ifp, void *aux)
571 {
572 	struct in_ifsysctl *isc;
573 	struct in_ifselsrc *iss;
574 
575 	if (aux == NULL)
576 		return;
577 	isc = (struct in_ifsysctl *)aux;
578 	iss = isc->isc_selsrc;
579 	sysctl_teardown(&isc->isc_log);
580 	free(isc, M_IFADDR);
581 	free(iss, M_IFADDR);
582 }
583 #endif /* INET */
584