xref: /netbsd-src/sys/netinet/in_selsrc.c (revision c0179c282a5968435315a82f4128c61372c68fc3)
1 /*	$NetBSD: in_selsrc.c,v 1.3 2006/11/16 01:33:45 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2005 David Young.  All rights reserved.
5  *
6  * This code was written by David Young.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by David Young.
19  * 4. The name of David Young may not be used to endorse or promote
20  *    products derived from this software without specific prior
21  *    written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY DAVID YOUNG ``AS IS'' AND ANY
24  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
25  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
26  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
27  * FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
34  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: in_selsrc.c,v 1.3 2006/11/16 01:33:45 christos Exp $");
39 
40 #include "opt_inet.h"
41 #include "opt_inet_conf.h"
42 
43 #include <lib/libkern/libkern.h>
44 
45 #include <sys/param.h>
46 #include <sys/ioctl.h>
47 #include <sys/errno.h>
48 #include <sys/malloc.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/proc.h>
54 #include <sys/syslog.h>
55 
56 #include <net/if.h>
57 
58 #include <net/if_ether.h>
59 
60 #include <netinet/in_systm.h>
61 #include <netinet/in.h>
62 #include <netinet/in_var.h>
63 #include <netinet/ip.h>
64 #include <netinet/ip_var.h>
65 #include <netinet/in_ifattach.h>
66 #include <netinet/in_pcb.h>
67 #include <netinet/if_inarp.h>
68 #include <netinet/ip_mroute.h>
69 #include <netinet/igmp_var.h>
70 #include <netinet/in_selsrc.h>
71 
72 #ifdef INET
73 struct score_src_name {
74 	const char		*sn_name;
75 	const in_score_src_t	sn_score_src;
76 };
77 
78 static const struct sysctlnode *in_domifattach_sysctl(struct in_ifsysctl *);
79 static int in_preference(const struct in_addr *, int, int,
80     const struct in_addr *);
81 static int in_index(const struct in_addr *, int, int, const struct in_addr *);
82 static int in_matchlen(const struct in_addr *, int, int,
83     const struct in_addr *);
84 static int in_match_category(const struct in_addr *, int, int,
85     const struct in_addr *);
86 static size_t in_get_selectsrc(const struct in_ifselsrc *, char *,
87     const size_t);
88 static int in_set_selectsrc(struct in_ifselsrc *, char *buf);
89 static int in_sysctl_selectsrc(SYSCTLFN_PROTO);
90 static in_score_src_t name_to_score_src(const char *);
91 static const char *score_src_to_name(const in_score_src_t);
92 static void in_score(const in_score_src_t *, int *, int *,
93     const struct in_addr *, int, int, const struct in_addr *);
94 
95 static const struct score_src_name score_src_names[] = {
96 	  {"same-category", in_match_category}
97 	, {"common-prefix-len", in_matchlen}
98 	, {"index", in_index}
99 	, {"preference", in_preference}
100 	, {NULL, NULL}
101 };
102 
103 static const struct in_ifselsrc initial_iss = { 0, {NULL} };
104 
105 static struct in_ifselsrc default_iss = { 0, {in_index} };
106 
107 #ifdef GETIFA_DEBUG
108 int in_selsrc_debug = 0;
109 
110 SYSCTL_SETUP(sysctl_selectsrc_setup, "sysctl selectsrc subtree setup")
111 {
112 	int rc;
113 	const struct sysctlnode *rnode, *cnode;
114 
115 	if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
116 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "net",
117 	    NULL, NULL, 0, NULL, 0, CTL_NET, CTL_EOL)) != 0) {
118 		printf("%s: could not create net, rc = %d\n", __func__, rc);
119 		return;
120 	}
121 	if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
122 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "inet",
123 	    NULL, NULL, 0, NULL, 0, CTL_NET, PF_INET, CTL_EOL)) != 0) {
124 		printf("%s: could not create net.inet, rc = %d\n", __func__,
125 		    rc);
126 		return;
127 	}
128 	if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
129 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "ip",
130 	    NULL, NULL, 0, NULL, 0,
131 	    CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL)) != 0) {
132 		printf("%s: could not create net.inet.ip, rc = %d\n", __func__,
133 		    rc);
134 		return;
135 	}
136 	if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
137 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "selectsrc",
138 	    NULL, NULL, 0, NULL, 0,
139 	    CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE, CTL_EOL)) != 0) {
140 		printf("%s: could not create net.inet.ip.selectsrc, "
141 		       "rc = %d\n", __func__, rc);
142 		return;
143 	}
144 	if ((rc = sysctl_createv(clog, 0, &rnode, &cnode,
145 	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "debug",
146 	    SYSCTL_DESCR("enable source-selection debug messages"),
147 	    NULL, 0, &in_selsrc_debug, 0, CTL_CREATE, CTL_EOL)) != 0) {
148 		printf("%s: could not create net.inet.ip.selectsrc.debug, "
149 		       "rc = %d\n", __func__, rc);
150 		return;
151 	}
152 	if ((rc = sysctl_createv(clog, 0, &rnode, &cnode,
153 	    CTLFLAG_READWRITE, CTLTYPE_STRING, "default",
154 	    SYSCTL_DESCR("default source selection policy"),
155 	    in_sysctl_selectsrc, 0, &default_iss, IN_SELECTSRC_LEN,
156 	    CTL_CREATE, CTL_EOL)) != 0) {
157 		printf(
158 		    "%s: could not create net.inet.ip.selectsrc.default (%d)\n",
159 		    __func__, rc);
160 		return;
161 	}
162 }
163 #endif /* GETIFA_DEBUG */
164 
165 /*
166  * Score by address preference: prefer addresses with lower preference
167  * number.  Preference numbers are assigned with ioctl SIOCSIFADDRPREF.
168  */
169 static int
170 in_preference(const struct in_addr *src, int preference,
171     int idx, const struct in_addr *dst)
172 {
173 	return -preference;
174 }
175 
176 /*
177  * Score by address "index": prefer addresses nearer the head of
178  * the ifaddr list.
179  */
180 static int
181 in_index(const struct in_addr *src, int preference, int idx,
182     const struct in_addr *dst)
183 {
184 	return -idx;
185 }
186 
187 /*
188  * Length of longest common prefix of src and dst.
189  *
190  * (Derived from in6_matchlen.)
191  */
192 static int
193 in_matchlen(const struct in_addr *src, int preference,
194     int idx, const struct in_addr *dst)
195 {
196 	int match = 0;
197 	const uint8_t *s = (const uint8_t *)src, *d = (const uint8_t *)dst;
198 	const uint8_t *lim = s + 4;
199 	uint_fast8_t r = 0;
200 
201 	while (s < lim && (r = (*d++ ^ *s++)) == 0)
202 		match += 8;
203 
204 	if (s == lim)
205 		return match;
206 
207 	while ((r & 0x80) == 0) {
208 		match++;
209 		r <<= 1;
210 	}
211 	return match;
212 }
213 
214 static enum in_category
215 in_categorize(const struct in_addr *s)
216 {
217 	if (IN_ANY_LOCAL(s->s_addr))
218 		return IN_CATEGORY_LINKLOCAL;
219 	else if (IN_PRIVATE(s->s_addr))
220 		return IN_CATEGORY_PRIVATE;
221 	else
222 		return IN_CATEGORY_OTHER;
223 }
224 
225 static int
226 in_match_category(const struct in_addr *src, int preference,
227     int idx, const struct in_addr *dst)
228 {
229 	enum in_category dst_c = in_categorize(dst),
230 	                 src_c = in_categorize(src);
231 #ifdef GETIFA_DEBUG
232 	if (in_selsrc_debug) {
233 		printf("%s: dst %#08" PRIx32 " categ %d, src %#08" PRIx32
234 		    " categ %d\n", __func__, ntohl(dst->s_addr), dst_c,
235 		    ntohl(src->s_addr), src_c);
236 	}
237 #endif /* GETIFA_DEBUG */
238 
239 	if (dst_c == src_c)
240 		return 2;
241 	else if (dst_c == IN_CATEGORY_LINKLOCAL && src_c == IN_CATEGORY_PRIVATE)
242 		return 1;
243 	else if (dst_c == IN_CATEGORY_PRIVATE && src_c == IN_CATEGORY_LINKLOCAL)
244 		return 1;
245 	else if (dst_c == IN_CATEGORY_OTHER && src_c == IN_CATEGORY_PRIVATE)
246 		return 1;
247 	else
248 		return 0;
249 }
250 
251 static void
252 in_score(const in_score_src_t *score_src, int *score, int *scorelenp,
253     const struct in_addr *src, int preference, int idx,
254     const struct in_addr *dst)
255 {
256 	int i;
257 
258 	for (i = 0; i < IN_SCORE_SRC_MAX && score_src[i] != NULL; i++)
259 		score[i] = (*score_src[i])(src, preference, idx, dst);
260 	if (scorelenp != NULL)
261 		*scorelenp = i;
262 }
263 
264 static int
265 in_score_cmp(int *score1, int *score2, int scorelen)
266 {
267 	int i;
268 
269 	for (i = 0; i < scorelen; i++) {
270 		if (score1[i] == score2[i])
271 			continue;
272 		return score1[i] - score2[i];
273 	}
274 	return 0;
275 }
276 
277 #ifdef GETIFA_DEBUG
278 static void
279 in_score_println(int *score, int scorelen)
280 {
281 	int i;
282 	const char *delim = "[";
283 
284 	for (i = 0; i < scorelen; i++) {
285 		printf("%s%d", delim, score[i]);
286 		delim = ", ";
287 	}
288 	printf("]\n");
289 }
290 #endif /* GETIFA_DEBUG */
291 
292 /* Scan the interface addresses on the interface ifa->ifa_ifp for
293  * the source address that best matches the destination, dst0,
294  * according to the source address-selection policy for this
295  * interface.  If there is no better match than `ifa', return `ifa'.
296  * Otherwise, return the best address.
297  *
298  * Note that in_getifa is called after the kernel has decided which
299  * output interface to use (ifa->ifa_ifp), and in_getifa will not
300  * scan an address belonging to any other interface.
301  */
302 struct ifaddr *
303 in_getifa(struct ifaddr *ifa, const struct sockaddr *dst0)
304 {
305 	const in_score_src_t *score_src;
306 	int idx, scorelen;
307 	const struct sockaddr_in *dst, *src;
308 	struct ifaddr *alt_ifa, *best_ifa;
309 	struct ifnet *ifp;
310 	struct in_ifsysctl *isc;
311 	struct in_ifselsrc *iss;
312 	int best_score[IN_SCORE_SRC_MAX], score[IN_SCORE_SRC_MAX];
313 
314 	if (ifa->ifa_addr->sa_family != AF_INET ||
315 	    dst0 == NULL || dst0->sa_family != AF_INET) {	/* Possible. */
316 		ifa->ifa_seqno = NULL;
317 		return ifa;
318 	}
319 
320 	ifp = ifa->ifa_ifp;
321 	isc = (struct in_ifsysctl *)ifp->if_afdata[AF_INET];
322 	if (isc != NULL && isc->isc_selsrc != NULL &&
323 	    isc->isc_selsrc->iss_score_src[0] != NULL)
324 		iss = isc->isc_selsrc;
325 	else
326 		iss = &default_iss;
327 	score_src = &iss->iss_score_src[0];
328 
329 	dst = (const struct sockaddr_in *)dst0;
330 
331 	best_ifa = ifa;
332 
333 	/* Find out the index of this ifaddr. */
334 	idx = 0;
335 	TAILQ_FOREACH(alt_ifa, &ifa->ifa_ifp->if_addrlist, ifa_list) {
336 		if (alt_ifa == best_ifa)
337 			break;
338 		idx++;
339 	}
340 	in_score(score_src, best_score, &scorelen, &IA_SIN(best_ifa)->sin_addr,
341 	    best_ifa->ifa_preference, idx, &dst->sin_addr);
342 
343 #ifdef GETIFA_DEBUG
344 	if (in_selsrc_debug) {
345 		printf("%s: enter dst %#" PRIx32 " src %#" PRIx32 " score ",
346 		    __func__, ntohl(dst->sin_addr.s_addr),
347 		    ntohl(satosin(best_ifa->ifa_addr)->sin_addr.s_addr));
348 		in_score_println(best_score, scorelen);
349 	}
350 #endif /* GETIFA_DEBUG */
351 
352 	idx = -1;
353 	TAILQ_FOREACH(alt_ifa, &ifa->ifa_ifp->if_addrlist, ifa_list) {
354 		++idx;
355 		src = IA_SIN(alt_ifa);
356 
357 		if (alt_ifa == ifa || src->sin_family != AF_INET)
358 			continue;
359 
360 		in_score(score_src, score, NULL, &src->sin_addr,
361 		         alt_ifa->ifa_preference, idx, &dst->sin_addr);
362 
363 #ifdef GETIFA_DEBUG
364 		if (in_selsrc_debug) {
365 			printf("%s: src %#" PRIx32 " score ", __func__,
366 			    ntohl(src->sin_addr.s_addr));
367 			in_score_println(score, scorelen);
368 		}
369 #endif /* GETIFA_DEBUG */
370 
371 		if (in_score_cmp(score, best_score, scorelen) > 0) {
372 			(void)memcpy(best_score, score, sizeof(best_score));
373 			best_ifa = alt_ifa;
374 		}
375 	}
376 #ifdef GETIFA_DEBUG
377 	if (in_selsrc_debug) {
378 		printf("%s: choose src %#" PRIx32 " score ", __func__,
379 		    ntohl(IA_SIN(best_ifa)->sin_addr.s_addr));
380 		in_score_println(best_score, scorelen);
381 	}
382 #endif /* GETIFA_DEBUG */
383 
384 	best_ifa->ifa_seqno = &iss->iss_seqno;
385 	return best_ifa;
386 }
387 
388 static in_score_src_t
389 name_to_score_src(const char *name)
390 {
391 	int i;
392 
393 	for (i = 0; score_src_names[i].sn_name != NULL; i++) {
394 		if (strcmp(score_src_names[i].sn_name, name) == 0)
395 			return score_src_names[i].sn_score_src;
396 	}
397 	return NULL;
398 }
399 
400 static const char *
401 score_src_to_name(const in_score_src_t score_src)
402 {
403 	int i;
404 	for (i = 0; score_src_names[i].sn_name != NULL; i++) {
405 		if (score_src == score_src_names[i].sn_score_src)
406 			return score_src_names[i].sn_name;
407 	}
408 	return "<unknown>";
409 }
410 
411 static size_t
412 in_get_selectsrc(const struct in_ifselsrc *iss, char *buf0,
413     const size_t buflen0)
414 {
415 	int i, rc;
416 	char *buf = buf0;
417 	const char *delim;
418 	size_t buflen = buflen0;
419 
420 	KASSERT(buflen >= 1);
421 
422 	for (delim = "", i = 0;
423 	     i < IN_SCORE_SRC_MAX && iss->iss_score_src[i] != NULL;
424 	     delim = ",", i++) {
425 		rc = snprintf(buf, buflen, "%s%s",
426 		    delim, score_src_to_name(iss->iss_score_src[i]));
427 		if (rc == -1)
428 			return buflen0 - buflen;
429 		if (rc >= buflen)
430 			return buflen0 + rc - buflen;
431 		buf += rc;
432 		buflen -= rc;
433 	}
434 	if (buf == buf0)
435 		*buf++ = '\0';
436 	return buf - buf0;
437 }
438 
439 static int
440 in_set_selectsrc(struct in_ifselsrc *iss, char *buf)
441 {
442 	int i, s;
443 	char *next = buf;
444 	const char *name;
445 	in_score_src_t score_src;
446 	in_score_src_t scorers[IN_SCORE_SRC_MAX];
447 
448 	memset(&scorers, 0, sizeof(scorers));
449 	for (i = 0;
450 	     (name = strsep(&next, ",")) != NULL && i < IN_SCORE_SRC_MAX;
451 	     i++) {
452 		if (strcmp(name, "") == 0)
453 			break;
454 		if ((score_src = name_to_score_src(name)) == NULL)
455 			return EINVAL;
456 		scorers[i] = score_src;
457 	}
458 	if (i == IN_SCORE_SRC_MAX && name != NULL)
459 		return EFBIG;
460 	s = splnet();
461 	(void)memcpy(iss->iss_score_src, scorers, sizeof(iss->iss_score_src));
462         /* If iss affects a specific interface that used to use
463          * the default policy, increase the sequence number on the
464          * default policy, forcing routes that cache a source
465          * (rt_ifa) found by the default policy to refresh their
466          * cache.
467 	 */
468 	if (iss != &default_iss && iss->iss_score_src[0] == NULL &&
469 	    scorers[0] != NULL)
470 		default_iss.iss_seqno++;
471 	iss->iss_seqno++;
472 	splx(s);
473 	return 0;
474 }
475 
476 /*
477  * sysctl helper routine for net.inet.ip.interfaces.<interface>.selectsrc.
478  * Pulls the old value out as a human-readable string, interprets
479  * and records the new value.
480  */
481 static int
482 in_sysctl_selectsrc(SYSCTLFN_ARGS)
483 {
484 	char policy[IN_SELECTSRC_LEN];
485 	int error;
486 	struct sysctlnode node;
487 	struct in_ifselsrc *iss;
488 
489 	node = *rnode;
490 	iss = (struct in_ifselsrc *)node.sysctl_data;
491 	if (oldp != NULL &&
492 	    (error = in_get_selectsrc(iss, policy, sizeof(policy))) >= sizeof(policy))
493 		return error;
494 	node.sysctl_data = &policy[0];
495 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
496 	if (error || newp == NULL)
497 		return (error);
498 
499 	return in_set_selectsrc(iss, policy);
500 }
501 
502 static const struct sysctlnode *
503 in_domifattach_sysctl(struct in_ifsysctl *isc)
504 {
505 	int rc;
506 	const struct sysctlnode *rnode;
507 
508 	if ((rc = sysctl_createv(&isc->isc_log, 0, NULL, &rnode,
509 	                         CTLFLAG_READWRITE, CTLTYPE_NODE,
510 				 "interfaces", NULL,
511 				 NULL, 0, NULL, 0,
512 				 CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE,
513 				 CTL_EOL)) != 0) {
514 		printf("%s: could not create net.inet.ip.interfaces, rc = %d\n",
515 		    __func__, rc);
516 		return NULL;
517 	}
518 	if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode,
519 	                         CTLFLAG_READWRITE, CTLTYPE_NODE,
520 				 isc->isc_ifp->if_xname,
521 				 SYSCTL_DESCR("interface ip options"),
522 				 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL)) != 0) {
523 		printf("%s: could not create net.inet.ip.interfaces.%s, "
524 		       "rc = %d\n", __func__, isc->isc_ifp->if_xname, rc);
525 		goto err;
526 	}
527 	if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode,
528 	                         CTLFLAG_READWRITE, CTLTYPE_STRING,
529 				 "selectsrc",
530 				 SYSCTL_DESCR("source selection policy"),
531 				 in_sysctl_selectsrc, 0,
532 				 isc->isc_selsrc, IN_SELECTSRC_LEN,
533 				 CTL_CREATE, CTL_EOL)) != 0) {
534 		printf(
535 		    "%s: could not create net.inet.ip.%s.selectsrc, rc = %d\n",
536 		    __func__, isc->isc_ifp->if_xname, rc);
537 		goto err;
538 	}
539 	return rnode;
540 err:
541 	sysctl_teardown(&isc->isc_log);
542 	return NULL;
543 }
544 
545 void *
546 in_domifattach(struct ifnet *ifp)
547 {
548 	struct in_ifsysctl *isc;
549 	struct in_ifselsrc *iss;
550 
551 	isc = (struct in_ifsysctl *)malloc(sizeof(*isc), M_IFADDR,
552 	    M_WAITOK | M_ZERO);
553 
554 	iss = (struct in_ifselsrc *)malloc(sizeof(*iss), M_IFADDR,
555 	    M_WAITOK | M_ZERO);
556 
557 	memcpy(&iss->iss_score_src[0], &initial_iss.iss_score_src[0],
558 	    MIN(sizeof(iss->iss_score_src), sizeof(initial_iss.iss_score_src)));
559 
560 	isc->isc_ifp = ifp;
561 	isc->isc_selsrc = iss;
562 
563 	if (in_domifattach_sysctl(isc) == NULL)
564 		goto err;
565 
566 	return isc;
567 err:
568 	free(iss, M_IFADDR);
569 	free(isc, M_IFADDR);
570 	return NULL;
571 }
572 
573 void
574 in_domifdetach(struct ifnet *ifp, void *aux)
575 {
576 	struct in_ifsysctl *isc;
577 	struct in_ifselsrc *iss;
578 
579 	if (aux == NULL)
580 		return;
581 	isc = (struct in_ifsysctl *)aux;
582 	iss = isc->isc_selsrc;
583 	sysctl_teardown(&isc->isc_log);
584 	free(isc, M_IFADDR);
585 	free(iss, M_IFADDR);
586 }
587 #endif /* INET */
588