xref: /netbsd-src/sys/netinet/in_selsrc.c (revision 37afb7eb6895c833050f8bfb1d1bb2f99f332539)
1 /*	$NetBSD: in_selsrc.c,v 1.13 2015/06/08 07:59:54 roy Exp $	*/
2 
3 /*-
4  * Copyright (c) 2005 David Young.  All rights reserved.
5  *
6  * This code was written by David Young.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY DAVID YOUNG ``AS IS'' AND ANY
18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
20  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
21  * FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __KERNEL_RCSID(0, "$NetBSD: in_selsrc.c,v 1.13 2015/06/08 07:59:54 roy Exp $");
33 
34 #include "opt_inet.h"
35 #include "opt_inet_conf.h"
36 
37 #include <lib/libkern/libkern.h>
38 
39 #include <sys/param.h>
40 #include <sys/ioctl.h>
41 #include <sys/errno.h>
42 #include <sys/malloc.h>
43 #include <sys/socket.h>
44 #include <sys/socketvar.h>
45 #include <sys/sysctl.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/syslog.h>
49 
50 #include <net/if.h>
51 
52 #include <net/if_ether.h>
53 
54 #include <netinet/in_systm.h>
55 #include <netinet/in.h>
56 #include <netinet/in_var.h>
57 #include <netinet/ip.h>
58 #include <netinet/ip_var.h>
59 #include <netinet/in_ifattach.h>
60 #include <netinet/in_pcb.h>
61 #include <netinet/if_inarp.h>
62 #include <netinet/ip_mroute.h>
63 #include <netinet/igmp_var.h>
64 #include <netinet/in_selsrc.h>
65 
66 #ifdef INET
67 struct score_src_name {
68 	const char		*sn_name;
69 	const in_score_src_t	sn_score_src;
70 };
71 
72 static const struct sysctlnode *in_domifattach_sysctl(struct in_ifsysctl *);
73 static int in_preference(const struct in_addr *, int, int,
74     const struct in_addr *);
75 static int in_index(const struct in_addr *, int, int, const struct in_addr *);
76 static int in_matchlen(const struct in_addr *, int, int,
77     const struct in_addr *);
78 static int in_match_category(const struct in_addr *, int, int,
79     const struct in_addr *);
80 static size_t in_get_selectsrc(const struct in_ifselsrc *, char *,
81     const size_t);
82 static int in_set_selectsrc(struct in_ifselsrc *, char *buf);
83 static int in_sysctl_selectsrc(SYSCTLFN_PROTO);
84 static in_score_src_t name_to_score_src(const char *);
85 static const char *score_src_to_name(const in_score_src_t);
86 static void in_score(const in_score_src_t *, int *, int *,
87     const struct in_addr *, int, int, const struct in_addr *);
88 
89 static const struct score_src_name score_src_names[] = {
90 	  {"same-category", in_match_category}
91 	, {"common-prefix-len", in_matchlen}
92 	, {"index", in_index}
93 	, {"preference", in_preference}
94 	, {NULL, NULL}
95 };
96 
97 static const struct in_ifselsrc initial_iss = { 0, {NULL} };
98 
99 static struct in_ifselsrc default_iss = { 0, {in_index} };
100 
101 #ifdef GETIFA_DEBUG
102 int in_selsrc_debug = 0;
103 #endif /* GETIFA_DEBUG */
104 
105 SYSCTL_SETUP(sysctl_selectsrc_setup, "sysctl selectsrc subtree setup")
106 {
107 	int rc;
108 	const struct sysctlnode *rnode, *cnode;
109 
110 	if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
111 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "inet",
112 	    NULL, NULL, 0, NULL, 0, CTL_NET, PF_INET, CTL_EOL)) != 0) {
113 		printf("%s: could not create net.inet, rc = %d\n", __func__,
114 		    rc);
115 		return;
116 	}
117 	if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
118 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "ip",
119 	    NULL, NULL, 0, NULL, 0,
120 	    CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL)) != 0) {
121 		printf("%s: could not create net.inet.ip, rc = %d\n", __func__,
122 		    rc);
123 		return;
124 	}
125 	if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
126 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "selectsrc",
127 	    NULL, NULL, 0, NULL, 0,
128 	    CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE, CTL_EOL)) != 0) {
129 		printf("%s: could not create net.inet.ip.selectsrc, "
130 		       "rc = %d\n", __func__, rc);
131 		return;
132 	}
133 #ifdef GETIFA_DEBUG
134 	if ((rc = sysctl_createv(clog, 0, &rnode, &cnode,
135 	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "debug",
136 	    SYSCTL_DESCR("enable source-selection debug messages"),
137 	    NULL, 0, &in_selsrc_debug, 0, CTL_CREATE, CTL_EOL)) != 0) {
138 		printf("%s: could not create net.inet.ip.selectsrc.debug, "
139 		       "rc = %d\n", __func__, rc);
140 		return;
141 	}
142 #endif /* GETIFA_DEBUG */
143 	if ((rc = sysctl_createv(clog, 0, &rnode, &cnode,
144 	    CTLFLAG_READWRITE, CTLTYPE_STRUCT, "default",
145 	    SYSCTL_DESCR("default source selection policy"),
146 	    in_sysctl_selectsrc, 0, &default_iss, IN_SELECTSRC_LEN,
147 	    CTL_CREATE, CTL_EOL)) != 0) {
148 		printf(
149 		    "%s: could not create net.inet.ip.selectsrc.default (%d)\n",
150 		    __func__, rc);
151 		return;
152 	}
153 }
154 
155 /*
156  * Score by address preference: prefer addresses with higher preference
157  * number.  Preference numbers are assigned with ioctl SIOCSIFADDRPREF.
158  */
159 static int
160 in_preference(const struct in_addr *src, int preference,
161     int idx, const struct in_addr *dst)
162 {
163 	return preference;
164 }
165 
166 /*
167  * Score by address "index": prefer addresses nearer the head of
168  * the ifaddr list.
169  */
170 static int
171 in_index(const struct in_addr *src, int preference, int idx,
172     const struct in_addr *dst)
173 {
174 	return -idx;
175 }
176 
177 /*
178  * Length of longest common prefix of src and dst.
179  *
180  * (Derived from in6_matchlen.)
181  */
182 static int
183 in_matchlen(const struct in_addr *src, int preference,
184     int idx, const struct in_addr *dst)
185 {
186 	int match = 0;
187 	const uint8_t *s = (const uint8_t *)src, *d = (const uint8_t *)dst;
188 	const uint8_t *lim = s + 4;
189 	uint_fast8_t r = 0;
190 
191 	while (s < lim && (r = (*d++ ^ *s++)) == 0)
192 		match += 8;
193 
194 	if (s == lim)
195 		return match;
196 
197 	while ((r & 0x80) == 0) {
198 		match++;
199 		r <<= 1;
200 	}
201 	return match;
202 }
203 
204 static enum in_category
205 in_categorize(const struct in_addr *s)
206 {
207 	if (IN_ANY_LOCAL(s->s_addr))
208 		return IN_CATEGORY_LINKLOCAL;
209 	else if (IN_PRIVATE(s->s_addr))
210 		return IN_CATEGORY_PRIVATE;
211 	else
212 		return IN_CATEGORY_OTHER;
213 }
214 
215 static int
216 in_match_category(const struct in_addr *src, int preference,
217     int idx, const struct in_addr *dst)
218 {
219 	enum in_category dst_c = in_categorize(dst),
220 	                 src_c = in_categorize(src);
221 #ifdef GETIFA_DEBUG
222 	if (in_selsrc_debug) {
223 		printf("%s: dst %#08" PRIx32 " categ %d, src %#08" PRIx32
224 		    " categ %d\n", __func__, ntohl(dst->s_addr), dst_c,
225 		    ntohl(src->s_addr), src_c);
226 	}
227 #endif /* GETIFA_DEBUG */
228 
229 	if (dst_c == src_c)
230 		return 2;
231 	else if (dst_c == IN_CATEGORY_LINKLOCAL && src_c == IN_CATEGORY_PRIVATE)
232 		return 1;
233 	else if (dst_c == IN_CATEGORY_PRIVATE && src_c == IN_CATEGORY_LINKLOCAL)
234 		return 1;
235 	else if (dst_c == IN_CATEGORY_OTHER && src_c == IN_CATEGORY_PRIVATE)
236 		return 1;
237 	else
238 		return 0;
239 }
240 
241 static void
242 in_score(const in_score_src_t *score_src, int *score, int *scorelenp,
243     const struct in_addr *src, int preference, int idx,
244     const struct in_addr *dst)
245 {
246 	int i;
247 
248 	for (i = 0; i < IN_SCORE_SRC_MAX && score_src[i] != NULL; i++)
249 		score[i] = (*score_src[i])(src, preference, idx, dst);
250 	if (scorelenp != NULL)
251 		*scorelenp = i;
252 }
253 
254 static int
255 in_score_cmp(int *score1, int *score2, int scorelen)
256 {
257 	int i;
258 
259 	for (i = 0; i < scorelen; i++) {
260 		if (score1[i] == score2[i])
261 			continue;
262 		return score1[i] - score2[i];
263 	}
264 	return 0;
265 }
266 
267 #ifdef GETIFA_DEBUG
268 static void
269 in_score_println(int *score, int scorelen)
270 {
271 	int i;
272 	const char *delim = "[";
273 
274 	for (i = 0; i < scorelen; i++) {
275 		printf("%s%d", delim, score[i]);
276 		delim = ", ";
277 	}
278 	printf("]\n");
279 }
280 #endif /* GETIFA_DEBUG */
281 
282 /* Scan the interface addresses on the interface ifa->ifa_ifp for
283  * the source address that best matches the destination, dst0,
284  * according to the source address-selection policy for this
285  * interface.  If there is no better match than `ifa', return `ifa'.
286  * Otherwise, return the best address.
287  *
288  * Note that in_getifa is called after the kernel has decided which
289  * output interface to use (ifa->ifa_ifp), and in_getifa will not
290  * scan an address belonging to any other interface.
291  */
292 struct ifaddr *
293 in_getifa(struct ifaddr *ifa, const struct sockaddr *dst0)
294 {
295 	const in_score_src_t *score_src;
296 	int idx, scorelen;
297 	const struct sockaddr_in *dst, *src;
298 	struct ifaddr *alt_ifa, *best_ifa;
299 	struct ifnet *ifp;
300 	struct in_ifsysctl *isc;
301 	struct in_ifselsrc *iss;
302 	int best_score[IN_SCORE_SRC_MAX], score[IN_SCORE_SRC_MAX];
303 	struct in_ifaddr *ia;
304 
305 	if (ifa->ifa_addr->sa_family != AF_INET ||
306 	    dst0 == NULL || dst0->sa_family != AF_INET) {	/* Possible. */
307 		ifa->ifa_seqno = NULL;
308 		return ifa;
309 	}
310 
311 	ifp = ifa->ifa_ifp;
312 	isc = (struct in_ifsysctl *)ifp->if_afdata[AF_INET];
313 	if (isc != NULL && isc->isc_selsrc != NULL &&
314 	    isc->isc_selsrc->iss_score_src[0] != NULL)
315 		iss = isc->isc_selsrc;
316 	else
317 		iss = &default_iss;
318 	score_src = &iss->iss_score_src[0];
319 
320 	dst = (const struct sockaddr_in *)dst0;
321 
322 	best_ifa = ifa;
323 
324 	/* Find out the index of this ifaddr. */
325 	idx = 0;
326 	IFADDR_FOREACH(alt_ifa, ifa->ifa_ifp) {
327 		if (alt_ifa == best_ifa)
328 			break;
329 		idx++;
330 	}
331 	in_score(score_src, best_score, &scorelen, &IA_SIN(best_ifa)->sin_addr,
332 	    best_ifa->ifa_preference, idx, &dst->sin_addr);
333 
334 #ifdef GETIFA_DEBUG
335 	if (in_selsrc_debug) {
336 		printf("%s: enter dst %#" PRIx32 " src %#" PRIx32 " score ",
337 		    __func__, ntohl(dst->sin_addr.s_addr),
338 		    ntohl(satosin(best_ifa->ifa_addr)->sin_addr.s_addr));
339 		in_score_println(best_score, scorelen);
340 	}
341 #endif /* GETIFA_DEBUG */
342 
343 	idx = -1;
344 	IFADDR_FOREACH(alt_ifa, ifa->ifa_ifp) {
345 		++idx;
346 		src = IA_SIN(alt_ifa);
347 
348 		if (alt_ifa == ifa || src->sin_family != AF_INET)
349 			continue;
350 		ia = (struct in_ifaddr *)alt_ifa;
351 		if (ia->ia4_flags & IN_IFF_NOTREADY)
352 			continue;
353 
354 		in_score(score_src, score, NULL, &src->sin_addr,
355 		         alt_ifa->ifa_preference, idx, &dst->sin_addr);
356 
357 #ifdef GETIFA_DEBUG
358 		if (in_selsrc_debug) {
359 			printf("%s: src %#" PRIx32 " score ", __func__,
360 			    ntohl(src->sin_addr.s_addr));
361 			in_score_println(score, scorelen);
362 		}
363 #endif /* GETIFA_DEBUG */
364 
365 		if (in_score_cmp(score, best_score, scorelen) > 0) {
366 			(void)memcpy(best_score, score, sizeof(best_score));
367 			best_ifa = alt_ifa;
368 		}
369 	}
370 
371 	ia = (struct in_ifaddr *)best_ifa;
372 	if (ia->ia4_flags & IN_IFF_NOTREADY)
373 		return NULL;
374 
375 #ifdef GETIFA_DEBUG
376 	if (in_selsrc_debug) {
377 		printf("%s: choose src %#" PRIx32 " score ", __func__,
378 		    ntohl(IA_SIN(best_ifa)->sin_addr.s_addr));
379 		in_score_println(best_score, scorelen);
380 	}
381 #endif /* GETIFA_DEBUG */
382 
383 	best_ifa->ifa_seqno = &iss->iss_seqno;
384 	return best_ifa;
385 }
386 
387 static in_score_src_t
388 name_to_score_src(const char *name)
389 {
390 	int i;
391 
392 	for (i = 0; score_src_names[i].sn_name != NULL; i++) {
393 		if (strcmp(score_src_names[i].sn_name, name) == 0)
394 			return score_src_names[i].sn_score_src;
395 	}
396 	return NULL;
397 }
398 
399 static const char *
400 score_src_to_name(const in_score_src_t score_src)
401 {
402 	int i;
403 	for (i = 0; score_src_names[i].sn_name != NULL; i++) {
404 		if (score_src == score_src_names[i].sn_score_src)
405 			return score_src_names[i].sn_name;
406 	}
407 	return "<unknown>";
408 }
409 
410 static size_t
411 in_get_selectsrc(const struct in_ifselsrc *iss, char *buf0,
412     const size_t buflen0)
413 {
414 	int i, rc;
415 	char *buf = buf0;
416 	const char *delim;
417 	size_t buflen = buflen0;
418 
419 	KASSERT(buflen >= 1);
420 
421 	for (delim = "", i = 0;
422 	     i < IN_SCORE_SRC_MAX && iss->iss_score_src[i] != NULL;
423 	     delim = ",", i++) {
424 		rc = snprintf(buf, buflen, "%s%s",
425 		    delim, score_src_to_name(iss->iss_score_src[i]));
426 		if (rc == -1)
427 			return buflen0 - buflen;
428 		if (rc >= buflen)
429 			return buflen0 + rc - buflen;
430 		buf += rc;
431 		buflen -= rc;
432 	}
433 	if (buf == buf0)
434 		*buf++ = '\0';
435 	return buf - buf0;
436 }
437 
438 static int
439 in_set_selectsrc(struct in_ifselsrc *iss, char *buf)
440 {
441 	int i, s;
442 	char *next = buf;
443 	const char *name;
444 	in_score_src_t score_src;
445 	in_score_src_t scorers[IN_SCORE_SRC_MAX];
446 
447 	memset(&scorers, 0, sizeof(scorers));
448 	for (i = 0;
449 	     (name = strsep(&next, ",")) != NULL && i < IN_SCORE_SRC_MAX;
450 	     i++) {
451 		if (strcmp(name, "") == 0)
452 			break;
453 		if ((score_src = name_to_score_src(name)) == NULL)
454 			return EINVAL;
455 		scorers[i] = score_src;
456 	}
457 	if (i == IN_SCORE_SRC_MAX && name != NULL)
458 		return EFBIG;
459 	s = splnet();
460 	(void)memcpy(iss->iss_score_src, scorers, sizeof(iss->iss_score_src));
461         /* If iss affects a specific interface that used to use
462          * the default policy, increase the sequence number on the
463          * default policy, forcing routes that cache a source
464          * (rt_ifa) found by the default policy to refresh their
465          * cache.
466 	 */
467 	if (iss != &default_iss && iss->iss_score_src[0] == NULL &&
468 	    scorers[0] != NULL)
469 		default_iss.iss_seqno++;
470 	iss->iss_seqno++;
471 	splx(s);
472 	return 0;
473 }
474 
475 /*
476  * sysctl helper routine for net.inet.ip.interfaces.<interface>.selectsrc.
477  * Pulls the old value out as a human-readable string, interprets
478  * and records the new value.
479  */
480 static int
481 in_sysctl_selectsrc(SYSCTLFN_ARGS)
482 {
483 	char policy[IN_SELECTSRC_LEN];
484 	int error;
485 	struct sysctlnode node;
486 	struct in_ifselsrc *iss;
487 
488 	node = *rnode;
489 	iss = (struct in_ifselsrc *)node.sysctl_data;
490 	if (oldp != NULL &&
491 	    (error = in_get_selectsrc(iss, policy, sizeof(policy))) >= sizeof(policy))
492 		return error;
493 	node.sysctl_data = &policy[0];
494 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
495 	if (error || newp == NULL)
496 		return (error);
497 
498 	return in_set_selectsrc(iss, policy);
499 }
500 
501 static const struct sysctlnode *
502 in_domifattach_sysctl(struct in_ifsysctl *isc)
503 {
504 	int rc;
505 	const struct sysctlnode *rnode;
506 
507 	if ((rc = sysctl_createv(&isc->isc_log, 0, NULL, &rnode,
508 	                         CTLFLAG_READONLY, CTLTYPE_NODE,
509 				 "interfaces", NULL,
510 				 NULL, 0, NULL, 0,
511 				 CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE,
512 				 CTL_EOL)) != 0) {
513 		printf("%s: could not create net.inet.ip.interfaces, rc = %d\n",
514 		    __func__, rc);
515 		return NULL;
516 	}
517 	if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode,
518 	                         CTLFLAG_READONLY, CTLTYPE_NODE,
519 				 isc->isc_ifp->if_xname,
520 				 SYSCTL_DESCR("interface ip options"),
521 				 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL)) != 0) {
522 		printf("%s: could not create net.inet.ip.interfaces.%s, "
523 		       "rc = %d\n", __func__, isc->isc_ifp->if_xname, rc);
524 		goto err;
525 	}
526 	if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode,
527 	                         CTLFLAG_READWRITE, CTLTYPE_STRING,
528 				 "selectsrc",
529 				 SYSCTL_DESCR("source selection policy"),
530 				 in_sysctl_selectsrc, 0,
531 				 (void *)isc->isc_selsrc, IN_SELECTSRC_LEN,
532 				 CTL_CREATE, CTL_EOL)) != 0) {
533 		printf(
534 		    "%s: could not create net.inet.ip.%s.selectsrc, rc = %d\n",
535 		    __func__, isc->isc_ifp->if_xname, rc);
536 		goto err;
537 	}
538 	return rnode;
539 err:
540 	sysctl_teardown(&isc->isc_log);
541 	return NULL;
542 }
543 
544 void *
545 in_domifattach(struct ifnet *ifp)
546 {
547 	struct in_ifsysctl *isc;
548 	struct in_ifselsrc *iss;
549 
550 	isc = (struct in_ifsysctl *)malloc(sizeof(*isc), M_IFADDR,
551 	    M_WAITOK | M_ZERO);
552 
553 	iss = (struct in_ifselsrc *)malloc(sizeof(*iss), M_IFADDR,
554 	    M_WAITOK | M_ZERO);
555 
556 	memcpy(&iss->iss_score_src[0], &initial_iss.iss_score_src[0],
557 	    MIN(sizeof(iss->iss_score_src), sizeof(initial_iss.iss_score_src)));
558 
559 	isc->isc_ifp = ifp;
560 	isc->isc_selsrc = iss;
561 
562 	if (in_domifattach_sysctl(isc) == NULL)
563 		goto err;
564 
565 	return isc;
566 err:
567 	free(iss, M_IFADDR);
568 	free(isc, M_IFADDR);
569 	return NULL;
570 }
571 
572 void
573 in_domifdetach(struct ifnet *ifp, void *aux)
574 {
575 	struct in_ifsysctl *isc;
576 	struct in_ifselsrc *iss;
577 
578 	if (aux == NULL)
579 		return;
580 	isc = (struct in_ifsysctl *)aux;
581 	iss = isc->isc_selsrc;
582 	sysctl_teardown(&isc->isc_log);
583 	free(isc, M_IFADDR);
584 	free(iss, M_IFADDR);
585 }
586 #endif /* INET */
587