xref: /netbsd-src/sys/netinet/in_selsrc.c (revision 6cf6fe02a981b55727c49c3d37b0d8191a98c0ee)
1 /*	$NetBSD: in_selsrc.c,v 1.11 2014/02/25 18:30:12 pooka Exp $	*/
2 
3 /*-
4  * Copyright (c) 2005 David Young.  All rights reserved.
5  *
6  * This code was written by David Young.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY DAVID YOUNG ``AS IS'' AND ANY
18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
20  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
21  * FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __KERNEL_RCSID(0, "$NetBSD: in_selsrc.c,v 1.11 2014/02/25 18:30:12 pooka Exp $");
33 
34 #include "opt_inet.h"
35 #include "opt_inet_conf.h"
36 
37 #include <lib/libkern/libkern.h>
38 
39 #include <sys/param.h>
40 #include <sys/ioctl.h>
41 #include <sys/errno.h>
42 #include <sys/malloc.h>
43 #include <sys/socket.h>
44 #include <sys/socketvar.h>
45 #include <sys/sysctl.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/syslog.h>
49 
50 #include <net/if.h>
51 
52 #include <net/if_ether.h>
53 
54 #include <netinet/in_systm.h>
55 #include <netinet/in.h>
56 #include <netinet/in_var.h>
57 #include <netinet/ip.h>
58 #include <netinet/ip_var.h>
59 #include <netinet/in_ifattach.h>
60 #include <netinet/in_pcb.h>
61 #include <netinet/if_inarp.h>
62 #include <netinet/ip_mroute.h>
63 #include <netinet/igmp_var.h>
64 #include <netinet/in_selsrc.h>
65 
66 #ifdef INET
67 struct score_src_name {
68 	const char		*sn_name;
69 	const in_score_src_t	sn_score_src;
70 };
71 
72 static const struct sysctlnode *in_domifattach_sysctl(struct in_ifsysctl *);
73 static int in_preference(const struct in_addr *, int, int,
74     const struct in_addr *);
75 static int in_index(const struct in_addr *, int, int, const struct in_addr *);
76 static int in_matchlen(const struct in_addr *, int, int,
77     const struct in_addr *);
78 static int in_match_category(const struct in_addr *, int, int,
79     const struct in_addr *);
80 static size_t in_get_selectsrc(const struct in_ifselsrc *, char *,
81     const size_t);
82 static int in_set_selectsrc(struct in_ifselsrc *, char *buf);
83 static int in_sysctl_selectsrc(SYSCTLFN_PROTO);
84 static in_score_src_t name_to_score_src(const char *);
85 static const char *score_src_to_name(const in_score_src_t);
86 static void in_score(const in_score_src_t *, int *, int *,
87     const struct in_addr *, int, int, const struct in_addr *);
88 
89 static const struct score_src_name score_src_names[] = {
90 	  {"same-category", in_match_category}
91 	, {"common-prefix-len", in_matchlen}
92 	, {"index", in_index}
93 	, {"preference", in_preference}
94 	, {NULL, NULL}
95 };
96 
97 static const struct in_ifselsrc initial_iss = { 0, {NULL} };
98 
99 static struct in_ifselsrc default_iss = { 0, {in_index} };
100 
101 #ifdef GETIFA_DEBUG
102 int in_selsrc_debug = 0;
103 #endif /* GETIFA_DEBUG */
104 
105 SYSCTL_SETUP(sysctl_selectsrc_setup, "sysctl selectsrc subtree setup")
106 {
107 	int rc;
108 	const struct sysctlnode *rnode, *cnode;
109 
110 	if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
111 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "inet",
112 	    NULL, NULL, 0, NULL, 0, CTL_NET, PF_INET, CTL_EOL)) != 0) {
113 		printf("%s: could not create net.inet, rc = %d\n", __func__,
114 		    rc);
115 		return;
116 	}
117 	if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
118 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "ip",
119 	    NULL, NULL, 0, NULL, 0,
120 	    CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL)) != 0) {
121 		printf("%s: could not create net.inet.ip, rc = %d\n", __func__,
122 		    rc);
123 		return;
124 	}
125 	if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
126 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "selectsrc",
127 	    NULL, NULL, 0, NULL, 0,
128 	    CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE, CTL_EOL)) != 0) {
129 		printf("%s: could not create net.inet.ip.selectsrc, "
130 		       "rc = %d\n", __func__, rc);
131 		return;
132 	}
133 #ifdef GETIFA_DEBUG
134 	if ((rc = sysctl_createv(clog, 0, &rnode, &cnode,
135 	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "debug",
136 	    SYSCTL_DESCR("enable source-selection debug messages"),
137 	    NULL, 0, &in_selsrc_debug, 0, CTL_CREATE, CTL_EOL)) != 0) {
138 		printf("%s: could not create net.inet.ip.selectsrc.debug, "
139 		       "rc = %d\n", __func__, rc);
140 		return;
141 	}
142 #endif /* GETIFA_DEBUG */
143 	if ((rc = sysctl_createv(clog, 0, &rnode, &cnode,
144 	    CTLFLAG_READWRITE, CTLTYPE_STRUCT, "default",
145 	    SYSCTL_DESCR("default source selection policy"),
146 	    in_sysctl_selectsrc, 0, &default_iss, IN_SELECTSRC_LEN,
147 	    CTL_CREATE, CTL_EOL)) != 0) {
148 		printf(
149 		    "%s: could not create net.inet.ip.selectsrc.default (%d)\n",
150 		    __func__, rc);
151 		return;
152 	}
153 }
154 
155 /*
156  * Score by address preference: prefer addresses with higher preference
157  * number.  Preference numbers are assigned with ioctl SIOCSIFADDRPREF.
158  */
159 static int
160 in_preference(const struct in_addr *src, int preference,
161     int idx, const struct in_addr *dst)
162 {
163 	return preference;
164 }
165 
166 /*
167  * Score by address "index": prefer addresses nearer the head of
168  * the ifaddr list.
169  */
170 static int
171 in_index(const struct in_addr *src, int preference, int idx,
172     const struct in_addr *dst)
173 {
174 	return -idx;
175 }
176 
177 /*
178  * Length of longest common prefix of src and dst.
179  *
180  * (Derived from in6_matchlen.)
181  */
182 static int
183 in_matchlen(const struct in_addr *src, int preference,
184     int idx, const struct in_addr *dst)
185 {
186 	int match = 0;
187 	const uint8_t *s = (const uint8_t *)src, *d = (const uint8_t *)dst;
188 	const uint8_t *lim = s + 4;
189 	uint_fast8_t r = 0;
190 
191 	while (s < lim && (r = (*d++ ^ *s++)) == 0)
192 		match += 8;
193 
194 	if (s == lim)
195 		return match;
196 
197 	while ((r & 0x80) == 0) {
198 		match++;
199 		r <<= 1;
200 	}
201 	return match;
202 }
203 
204 static enum in_category
205 in_categorize(const struct in_addr *s)
206 {
207 	if (IN_ANY_LOCAL(s->s_addr))
208 		return IN_CATEGORY_LINKLOCAL;
209 	else if (IN_PRIVATE(s->s_addr))
210 		return IN_CATEGORY_PRIVATE;
211 	else
212 		return IN_CATEGORY_OTHER;
213 }
214 
215 static int
216 in_match_category(const struct in_addr *src, int preference,
217     int idx, const struct in_addr *dst)
218 {
219 	enum in_category dst_c = in_categorize(dst),
220 	                 src_c = in_categorize(src);
221 #ifdef GETIFA_DEBUG
222 	if (in_selsrc_debug) {
223 		printf("%s: dst %#08" PRIx32 " categ %d, src %#08" PRIx32
224 		    " categ %d\n", __func__, ntohl(dst->s_addr), dst_c,
225 		    ntohl(src->s_addr), src_c);
226 	}
227 #endif /* GETIFA_DEBUG */
228 
229 	if (dst_c == src_c)
230 		return 2;
231 	else if (dst_c == IN_CATEGORY_LINKLOCAL && src_c == IN_CATEGORY_PRIVATE)
232 		return 1;
233 	else if (dst_c == IN_CATEGORY_PRIVATE && src_c == IN_CATEGORY_LINKLOCAL)
234 		return 1;
235 	else if (dst_c == IN_CATEGORY_OTHER && src_c == IN_CATEGORY_PRIVATE)
236 		return 1;
237 	else
238 		return 0;
239 }
240 
241 static void
242 in_score(const in_score_src_t *score_src, int *score, int *scorelenp,
243     const struct in_addr *src, int preference, int idx,
244     const struct in_addr *dst)
245 {
246 	int i;
247 
248 	for (i = 0; i < IN_SCORE_SRC_MAX && score_src[i] != NULL; i++)
249 		score[i] = (*score_src[i])(src, preference, idx, dst);
250 	if (scorelenp != NULL)
251 		*scorelenp = i;
252 }
253 
254 static int
255 in_score_cmp(int *score1, int *score2, int scorelen)
256 {
257 	int i;
258 
259 	for (i = 0; i < scorelen; i++) {
260 		if (score1[i] == score2[i])
261 			continue;
262 		return score1[i] - score2[i];
263 	}
264 	return 0;
265 }
266 
267 #ifdef GETIFA_DEBUG
268 static void
269 in_score_println(int *score, int scorelen)
270 {
271 	int i;
272 	const char *delim = "[";
273 
274 	for (i = 0; i < scorelen; i++) {
275 		printf("%s%d", delim, score[i]);
276 		delim = ", ";
277 	}
278 	printf("]\n");
279 }
280 #endif /* GETIFA_DEBUG */
281 
282 /* Scan the interface addresses on the interface ifa->ifa_ifp for
283  * the source address that best matches the destination, dst0,
284  * according to the source address-selection policy for this
285  * interface.  If there is no better match than `ifa', return `ifa'.
286  * Otherwise, return the best address.
287  *
288  * Note that in_getifa is called after the kernel has decided which
289  * output interface to use (ifa->ifa_ifp), and in_getifa will not
290  * scan an address belonging to any other interface.
291  */
292 struct ifaddr *
293 in_getifa(struct ifaddr *ifa, const struct sockaddr *dst0)
294 {
295 	const in_score_src_t *score_src;
296 	int idx, scorelen;
297 	const struct sockaddr_in *dst, *src;
298 	struct ifaddr *alt_ifa, *best_ifa;
299 	struct ifnet *ifp;
300 	struct in_ifsysctl *isc;
301 	struct in_ifselsrc *iss;
302 	int best_score[IN_SCORE_SRC_MAX], score[IN_SCORE_SRC_MAX];
303 
304 	if (ifa->ifa_addr->sa_family != AF_INET ||
305 	    dst0 == NULL || dst0->sa_family != AF_INET) {	/* Possible. */
306 		ifa->ifa_seqno = NULL;
307 		return ifa;
308 	}
309 
310 	ifp = ifa->ifa_ifp;
311 	isc = (struct in_ifsysctl *)ifp->if_afdata[AF_INET];
312 	if (isc != NULL && isc->isc_selsrc != NULL &&
313 	    isc->isc_selsrc->iss_score_src[0] != NULL)
314 		iss = isc->isc_selsrc;
315 	else
316 		iss = &default_iss;
317 	score_src = &iss->iss_score_src[0];
318 
319 	dst = (const struct sockaddr_in *)dst0;
320 
321 	best_ifa = ifa;
322 
323 	/* Find out the index of this ifaddr. */
324 	idx = 0;
325 	IFADDR_FOREACH(alt_ifa, ifa->ifa_ifp) {
326 		if (alt_ifa == best_ifa)
327 			break;
328 		idx++;
329 	}
330 	in_score(score_src, best_score, &scorelen, &IA_SIN(best_ifa)->sin_addr,
331 	    best_ifa->ifa_preference, idx, &dst->sin_addr);
332 
333 #ifdef GETIFA_DEBUG
334 	if (in_selsrc_debug) {
335 		printf("%s: enter dst %#" PRIx32 " src %#" PRIx32 " score ",
336 		    __func__, ntohl(dst->sin_addr.s_addr),
337 		    ntohl(satosin(best_ifa->ifa_addr)->sin_addr.s_addr));
338 		in_score_println(best_score, scorelen);
339 	}
340 #endif /* GETIFA_DEBUG */
341 
342 	idx = -1;
343 	IFADDR_FOREACH(alt_ifa, ifa->ifa_ifp) {
344 		++idx;
345 		src = IA_SIN(alt_ifa);
346 
347 		if (alt_ifa == ifa || src->sin_family != AF_INET)
348 			continue;
349 
350 		in_score(score_src, score, NULL, &src->sin_addr,
351 		         alt_ifa->ifa_preference, idx, &dst->sin_addr);
352 
353 #ifdef GETIFA_DEBUG
354 		if (in_selsrc_debug) {
355 			printf("%s: src %#" PRIx32 " score ", __func__,
356 			    ntohl(src->sin_addr.s_addr));
357 			in_score_println(score, scorelen);
358 		}
359 #endif /* GETIFA_DEBUG */
360 
361 		if (in_score_cmp(score, best_score, scorelen) > 0) {
362 			(void)memcpy(best_score, score, sizeof(best_score));
363 			best_ifa = alt_ifa;
364 		}
365 	}
366 #ifdef GETIFA_DEBUG
367 	if (in_selsrc_debug) {
368 		printf("%s: choose src %#" PRIx32 " score ", __func__,
369 		    ntohl(IA_SIN(best_ifa)->sin_addr.s_addr));
370 		in_score_println(best_score, scorelen);
371 	}
372 #endif /* GETIFA_DEBUG */
373 
374 	best_ifa->ifa_seqno = &iss->iss_seqno;
375 	return best_ifa;
376 }
377 
378 static in_score_src_t
379 name_to_score_src(const char *name)
380 {
381 	int i;
382 
383 	for (i = 0; score_src_names[i].sn_name != NULL; i++) {
384 		if (strcmp(score_src_names[i].sn_name, name) == 0)
385 			return score_src_names[i].sn_score_src;
386 	}
387 	return NULL;
388 }
389 
390 static const char *
391 score_src_to_name(const in_score_src_t score_src)
392 {
393 	int i;
394 	for (i = 0; score_src_names[i].sn_name != NULL; i++) {
395 		if (score_src == score_src_names[i].sn_score_src)
396 			return score_src_names[i].sn_name;
397 	}
398 	return "<unknown>";
399 }
400 
401 static size_t
402 in_get_selectsrc(const struct in_ifselsrc *iss, char *buf0,
403     const size_t buflen0)
404 {
405 	int i, rc;
406 	char *buf = buf0;
407 	const char *delim;
408 	size_t buflen = buflen0;
409 
410 	KASSERT(buflen >= 1);
411 
412 	for (delim = "", i = 0;
413 	     i < IN_SCORE_SRC_MAX && iss->iss_score_src[i] != NULL;
414 	     delim = ",", i++) {
415 		rc = snprintf(buf, buflen, "%s%s",
416 		    delim, score_src_to_name(iss->iss_score_src[i]));
417 		if (rc == -1)
418 			return buflen0 - buflen;
419 		if (rc >= buflen)
420 			return buflen0 + rc - buflen;
421 		buf += rc;
422 		buflen -= rc;
423 	}
424 	if (buf == buf0)
425 		*buf++ = '\0';
426 	return buf - buf0;
427 }
428 
429 static int
430 in_set_selectsrc(struct in_ifselsrc *iss, char *buf)
431 {
432 	int i, s;
433 	char *next = buf;
434 	const char *name;
435 	in_score_src_t score_src;
436 	in_score_src_t scorers[IN_SCORE_SRC_MAX];
437 
438 	memset(&scorers, 0, sizeof(scorers));
439 	for (i = 0;
440 	     (name = strsep(&next, ",")) != NULL && i < IN_SCORE_SRC_MAX;
441 	     i++) {
442 		if (strcmp(name, "") == 0)
443 			break;
444 		if ((score_src = name_to_score_src(name)) == NULL)
445 			return EINVAL;
446 		scorers[i] = score_src;
447 	}
448 	if (i == IN_SCORE_SRC_MAX && name != NULL)
449 		return EFBIG;
450 	s = splnet();
451 	(void)memcpy(iss->iss_score_src, scorers, sizeof(iss->iss_score_src));
452         /* If iss affects a specific interface that used to use
453          * the default policy, increase the sequence number on the
454          * default policy, forcing routes that cache a source
455          * (rt_ifa) found by the default policy to refresh their
456          * cache.
457 	 */
458 	if (iss != &default_iss && iss->iss_score_src[0] == NULL &&
459 	    scorers[0] != NULL)
460 		default_iss.iss_seqno++;
461 	iss->iss_seqno++;
462 	splx(s);
463 	return 0;
464 }
465 
466 /*
467  * sysctl helper routine for net.inet.ip.interfaces.<interface>.selectsrc.
468  * Pulls the old value out as a human-readable string, interprets
469  * and records the new value.
470  */
471 static int
472 in_sysctl_selectsrc(SYSCTLFN_ARGS)
473 {
474 	char policy[IN_SELECTSRC_LEN];
475 	int error;
476 	struct sysctlnode node;
477 	struct in_ifselsrc *iss;
478 
479 	node = *rnode;
480 	iss = (struct in_ifselsrc *)node.sysctl_data;
481 	if (oldp != NULL &&
482 	    (error = in_get_selectsrc(iss, policy, sizeof(policy))) >= sizeof(policy))
483 		return error;
484 	node.sysctl_data = &policy[0];
485 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
486 	if (error || newp == NULL)
487 		return (error);
488 
489 	return in_set_selectsrc(iss, policy);
490 }
491 
492 static const struct sysctlnode *
493 in_domifattach_sysctl(struct in_ifsysctl *isc)
494 {
495 	int rc;
496 	const struct sysctlnode *rnode;
497 
498 	if ((rc = sysctl_createv(&isc->isc_log, 0, NULL, &rnode,
499 	                         CTLFLAG_READONLY, CTLTYPE_NODE,
500 				 "interfaces", NULL,
501 				 NULL, 0, NULL, 0,
502 				 CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE,
503 				 CTL_EOL)) != 0) {
504 		printf("%s: could not create net.inet.ip.interfaces, rc = %d\n",
505 		    __func__, rc);
506 		return NULL;
507 	}
508 	if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode,
509 	                         CTLFLAG_READONLY, CTLTYPE_NODE,
510 				 isc->isc_ifp->if_xname,
511 				 SYSCTL_DESCR("interface ip options"),
512 				 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL)) != 0) {
513 		printf("%s: could not create net.inet.ip.interfaces.%s, "
514 		       "rc = %d\n", __func__, isc->isc_ifp->if_xname, rc);
515 		goto err;
516 	}
517 	if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode,
518 	                         CTLFLAG_READWRITE, CTLTYPE_STRING,
519 				 "selectsrc",
520 				 SYSCTL_DESCR("source selection policy"),
521 				 in_sysctl_selectsrc, 0,
522 				 (void *)isc->isc_selsrc, IN_SELECTSRC_LEN,
523 				 CTL_CREATE, CTL_EOL)) != 0) {
524 		printf(
525 		    "%s: could not create net.inet.ip.%s.selectsrc, rc = %d\n",
526 		    __func__, isc->isc_ifp->if_xname, rc);
527 		goto err;
528 	}
529 	return rnode;
530 err:
531 	sysctl_teardown(&isc->isc_log);
532 	return NULL;
533 }
534 
535 void *
536 in_domifattach(struct ifnet *ifp)
537 {
538 	struct in_ifsysctl *isc;
539 	struct in_ifselsrc *iss;
540 
541 	isc = (struct in_ifsysctl *)malloc(sizeof(*isc), M_IFADDR,
542 	    M_WAITOK | M_ZERO);
543 
544 	iss = (struct in_ifselsrc *)malloc(sizeof(*iss), M_IFADDR,
545 	    M_WAITOK | M_ZERO);
546 
547 	memcpy(&iss->iss_score_src[0], &initial_iss.iss_score_src[0],
548 	    MIN(sizeof(iss->iss_score_src), sizeof(initial_iss.iss_score_src)));
549 
550 	isc->isc_ifp = ifp;
551 	isc->isc_selsrc = iss;
552 
553 	if (in_domifattach_sysctl(isc) == NULL)
554 		goto err;
555 
556 	return isc;
557 err:
558 	free(iss, M_IFADDR);
559 	free(isc, M_IFADDR);
560 	return NULL;
561 }
562 
563 void
564 in_domifdetach(struct ifnet *ifp, void *aux)
565 {
566 	struct in_ifsysctl *isc;
567 	struct in_ifselsrc *iss;
568 
569 	if (aux == NULL)
570 		return;
571 	isc = (struct in_ifsysctl *)aux;
572 	iss = isc->isc_selsrc;
573 	sysctl_teardown(&isc->isc_log);
574 	free(isc, M_IFADDR);
575 	free(iss, M_IFADDR);
576 }
577 #endif /* INET */
578