1 /* $NetBSD: in_selsrc.c,v 1.6 2007/12/04 10:33:11 dyoung Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 David Young. All rights reserved. 5 * 6 * This code was written by David Young. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by David Young. 19 * 4. The name of David Young may not be used to endorse or promote 20 * products derived from this software without specific prior 21 * written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY DAVID YOUNG ``AS IS'' AND ANY 24 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 25 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 26 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 32 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 33 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 34 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: in_selsrc.c,v 1.6 2007/12/04 10:33:11 dyoung Exp $"); 39 40 #include "opt_inet.h" 41 #include "opt_inet_conf.h" 42 43 #include <lib/libkern/libkern.h> 44 45 #include <sys/param.h> 46 #include <sys/ioctl.h> 47 #include <sys/errno.h> 48 #include <sys/malloc.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/sysctl.h> 52 #include <sys/systm.h> 53 #include <sys/proc.h> 54 #include <sys/syslog.h> 55 56 #include <net/if.h> 57 58 #include <net/if_ether.h> 59 60 #include <netinet/in_systm.h> 61 #include <netinet/in.h> 62 #include <netinet/in_var.h> 63 #include <netinet/ip.h> 64 #include <netinet/ip_var.h> 65 #include <netinet/in_ifattach.h> 66 #include <netinet/in_pcb.h> 67 #include <netinet/if_inarp.h> 68 #include <netinet/ip_mroute.h> 69 #include <netinet/igmp_var.h> 70 #include <netinet/in_selsrc.h> 71 72 #ifdef INET 73 struct score_src_name { 74 const char *sn_name; 75 const in_score_src_t sn_score_src; 76 }; 77 78 static const struct sysctlnode *in_domifattach_sysctl(struct in_ifsysctl *); 79 static int in_preference(const struct in_addr *, int, int, 80 const struct in_addr *); 81 static int in_index(const struct in_addr *, int, int, const struct in_addr *); 82 static int in_matchlen(const struct in_addr *, int, int, 83 const struct in_addr *); 84 static int in_match_category(const struct in_addr *, int, int, 85 const struct in_addr *); 86 static size_t in_get_selectsrc(const struct in_ifselsrc *, char *, 87 const size_t); 88 static int in_set_selectsrc(struct in_ifselsrc *, char *buf); 89 static int in_sysctl_selectsrc(SYSCTLFN_PROTO); 90 static in_score_src_t name_to_score_src(const char *); 91 static const char *score_src_to_name(const in_score_src_t); 92 static void in_score(const in_score_src_t *, int *, int *, 93 const struct in_addr *, int, int, const struct in_addr *); 94 95 static const struct score_src_name score_src_names[] = { 96 {"same-category", in_match_category} 97 , {"common-prefix-len", in_matchlen} 98 , {"index", in_index} 99 , {"preference", in_preference} 100 , {NULL, NULL} 101 }; 102 103 static const struct in_ifselsrc initial_iss = { 0, {NULL} }; 104 105 static struct in_ifselsrc default_iss = { 0, {in_index} }; 106 107 #ifdef GETIFA_DEBUG 108 int in_selsrc_debug = 0; 109 #endif /* GETIFA_DEBUG */ 110 111 SYSCTL_SETUP(sysctl_selectsrc_setup, "sysctl selectsrc subtree setup") 112 { 113 int rc; 114 const struct sysctlnode *rnode, *cnode; 115 116 if ((rc = sysctl_createv(clog, 0, NULL, &rnode, 117 CTLFLAG_PERMANENT, CTLTYPE_NODE, "net", 118 NULL, NULL, 0, NULL, 0, CTL_NET, CTL_EOL)) != 0) { 119 printf("%s: could not create net, rc = %d\n", __func__, rc); 120 return; 121 } 122 if ((rc = sysctl_createv(clog, 0, NULL, &rnode, 123 CTLFLAG_PERMANENT, CTLTYPE_NODE, "inet", 124 NULL, NULL, 0, NULL, 0, CTL_NET, PF_INET, CTL_EOL)) != 0) { 125 printf("%s: could not create net.inet, rc = %d\n", __func__, 126 rc); 127 return; 128 } 129 if ((rc = sysctl_createv(clog, 0, NULL, &rnode, 130 CTLFLAG_PERMANENT, CTLTYPE_NODE, "ip", 131 NULL, NULL, 0, NULL, 0, 132 CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL)) != 0) { 133 printf("%s: could not create net.inet.ip, rc = %d\n", __func__, 134 rc); 135 return; 136 } 137 if ((rc = sysctl_createv(clog, 0, NULL, &rnode, 138 CTLFLAG_PERMANENT, CTLTYPE_NODE, "selectsrc", 139 NULL, NULL, 0, NULL, 0, 140 CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE, CTL_EOL)) != 0) { 141 printf("%s: could not create net.inet.ip.selectsrc, " 142 "rc = %d\n", __func__, rc); 143 return; 144 } 145 #ifdef GETIFA_DEBUG 146 if ((rc = sysctl_createv(clog, 0, &rnode, &cnode, 147 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "debug", 148 SYSCTL_DESCR("enable source-selection debug messages"), 149 NULL, 0, &in_selsrc_debug, 0, CTL_CREATE, CTL_EOL)) != 0) { 150 printf("%s: could not create net.inet.ip.selectsrc.debug, " 151 "rc = %d\n", __func__, rc); 152 return; 153 } 154 #endif /* GETIFA_DEBUG */ 155 if ((rc = sysctl_createv(clog, 0, &rnode, &cnode, 156 CTLFLAG_READWRITE, CTLTYPE_STRING, "default", 157 SYSCTL_DESCR("default source selection policy"), 158 in_sysctl_selectsrc, 0, &default_iss, IN_SELECTSRC_LEN, 159 CTL_CREATE, CTL_EOL)) != 0) { 160 printf( 161 "%s: could not create net.inet.ip.selectsrc.default (%d)\n", 162 __func__, rc); 163 return; 164 } 165 } 166 167 /* 168 * Score by address preference: prefer addresses with higher preference 169 * number. Preference numbers are assigned with ioctl SIOCSIFADDRPREF. 170 */ 171 static int 172 in_preference(const struct in_addr *src, int preference, 173 int idx, const struct in_addr *dst) 174 { 175 return preference; 176 } 177 178 /* 179 * Score by address "index": prefer addresses nearer the head of 180 * the ifaddr list. 181 */ 182 static int 183 in_index(const struct in_addr *src, int preference, int idx, 184 const struct in_addr *dst) 185 { 186 return -idx; 187 } 188 189 /* 190 * Length of longest common prefix of src and dst. 191 * 192 * (Derived from in6_matchlen.) 193 */ 194 static int 195 in_matchlen(const struct in_addr *src, int preference, 196 int idx, const struct in_addr *dst) 197 { 198 int match = 0; 199 const uint8_t *s = (const uint8_t *)src, *d = (const uint8_t *)dst; 200 const uint8_t *lim = s + 4; 201 uint_fast8_t r = 0; 202 203 while (s < lim && (r = (*d++ ^ *s++)) == 0) 204 match += 8; 205 206 if (s == lim) 207 return match; 208 209 while ((r & 0x80) == 0) { 210 match++; 211 r <<= 1; 212 } 213 return match; 214 } 215 216 static enum in_category 217 in_categorize(const struct in_addr *s) 218 { 219 if (IN_ANY_LOCAL(s->s_addr)) 220 return IN_CATEGORY_LINKLOCAL; 221 else if (IN_PRIVATE(s->s_addr)) 222 return IN_CATEGORY_PRIVATE; 223 else 224 return IN_CATEGORY_OTHER; 225 } 226 227 static int 228 in_match_category(const struct in_addr *src, int preference, 229 int idx, const struct in_addr *dst) 230 { 231 enum in_category dst_c = in_categorize(dst), 232 src_c = in_categorize(src); 233 #ifdef GETIFA_DEBUG 234 if (in_selsrc_debug) { 235 printf("%s: dst %#08" PRIx32 " categ %d, src %#08" PRIx32 236 " categ %d\n", __func__, ntohl(dst->s_addr), dst_c, 237 ntohl(src->s_addr), src_c); 238 } 239 #endif /* GETIFA_DEBUG */ 240 241 if (dst_c == src_c) 242 return 2; 243 else if (dst_c == IN_CATEGORY_LINKLOCAL && src_c == IN_CATEGORY_PRIVATE) 244 return 1; 245 else if (dst_c == IN_CATEGORY_PRIVATE && src_c == IN_CATEGORY_LINKLOCAL) 246 return 1; 247 else if (dst_c == IN_CATEGORY_OTHER && src_c == IN_CATEGORY_PRIVATE) 248 return 1; 249 else 250 return 0; 251 } 252 253 static void 254 in_score(const in_score_src_t *score_src, int *score, int *scorelenp, 255 const struct in_addr *src, int preference, int idx, 256 const struct in_addr *dst) 257 { 258 int i; 259 260 for (i = 0; i < IN_SCORE_SRC_MAX && score_src[i] != NULL; i++) 261 score[i] = (*score_src[i])(src, preference, idx, dst); 262 if (scorelenp != NULL) 263 *scorelenp = i; 264 } 265 266 static int 267 in_score_cmp(int *score1, int *score2, int scorelen) 268 { 269 int i; 270 271 for (i = 0; i < scorelen; i++) { 272 if (score1[i] == score2[i]) 273 continue; 274 return score1[i] - score2[i]; 275 } 276 return 0; 277 } 278 279 #ifdef GETIFA_DEBUG 280 static void 281 in_score_println(int *score, int scorelen) 282 { 283 int i; 284 const char *delim = "["; 285 286 for (i = 0; i < scorelen; i++) { 287 printf("%s%d", delim, score[i]); 288 delim = ", "; 289 } 290 printf("]\n"); 291 } 292 #endif /* GETIFA_DEBUG */ 293 294 /* Scan the interface addresses on the interface ifa->ifa_ifp for 295 * the source address that best matches the destination, dst0, 296 * according to the source address-selection policy for this 297 * interface. If there is no better match than `ifa', return `ifa'. 298 * Otherwise, return the best address. 299 * 300 * Note that in_getifa is called after the kernel has decided which 301 * output interface to use (ifa->ifa_ifp), and in_getifa will not 302 * scan an address belonging to any other interface. 303 */ 304 struct ifaddr * 305 in_getifa(struct ifaddr *ifa, const struct sockaddr *dst0) 306 { 307 const in_score_src_t *score_src; 308 int idx, scorelen; 309 const struct sockaddr_in *dst, *src; 310 struct ifaddr *alt_ifa, *best_ifa; 311 struct ifnet *ifp; 312 struct in_ifsysctl *isc; 313 struct in_ifselsrc *iss; 314 int best_score[IN_SCORE_SRC_MAX], score[IN_SCORE_SRC_MAX]; 315 316 if (ifa->ifa_addr->sa_family != AF_INET || 317 dst0 == NULL || dst0->sa_family != AF_INET) { /* Possible. */ 318 ifa->ifa_seqno = NULL; 319 return ifa; 320 } 321 322 ifp = ifa->ifa_ifp; 323 isc = (struct in_ifsysctl *)ifp->if_afdata[AF_INET]; 324 if (isc != NULL && isc->isc_selsrc != NULL && 325 isc->isc_selsrc->iss_score_src[0] != NULL) 326 iss = isc->isc_selsrc; 327 else 328 iss = &default_iss; 329 score_src = &iss->iss_score_src[0]; 330 331 dst = (const struct sockaddr_in *)dst0; 332 333 best_ifa = ifa; 334 335 /* Find out the index of this ifaddr. */ 336 idx = 0; 337 IFADDR_FOREACH(alt_ifa, ifa->ifa_ifp) { 338 if (alt_ifa == best_ifa) 339 break; 340 idx++; 341 } 342 in_score(score_src, best_score, &scorelen, &IA_SIN(best_ifa)->sin_addr, 343 best_ifa->ifa_preference, idx, &dst->sin_addr); 344 345 #ifdef GETIFA_DEBUG 346 if (in_selsrc_debug) { 347 printf("%s: enter dst %#" PRIx32 " src %#" PRIx32 " score ", 348 __func__, ntohl(dst->sin_addr.s_addr), 349 ntohl(satosin(best_ifa->ifa_addr)->sin_addr.s_addr)); 350 in_score_println(best_score, scorelen); 351 } 352 #endif /* GETIFA_DEBUG */ 353 354 idx = -1; 355 IFADDR_FOREACH(alt_ifa, ifa->ifa_ifp) { 356 ++idx; 357 src = IA_SIN(alt_ifa); 358 359 if (alt_ifa == ifa || src->sin_family != AF_INET) 360 continue; 361 362 in_score(score_src, score, NULL, &src->sin_addr, 363 alt_ifa->ifa_preference, idx, &dst->sin_addr); 364 365 #ifdef GETIFA_DEBUG 366 if (in_selsrc_debug) { 367 printf("%s: src %#" PRIx32 " score ", __func__, 368 ntohl(src->sin_addr.s_addr)); 369 in_score_println(score, scorelen); 370 } 371 #endif /* GETIFA_DEBUG */ 372 373 if (in_score_cmp(score, best_score, scorelen) > 0) { 374 (void)memcpy(best_score, score, sizeof(best_score)); 375 best_ifa = alt_ifa; 376 } 377 } 378 #ifdef GETIFA_DEBUG 379 if (in_selsrc_debug) { 380 printf("%s: choose src %#" PRIx32 " score ", __func__, 381 ntohl(IA_SIN(best_ifa)->sin_addr.s_addr)); 382 in_score_println(best_score, scorelen); 383 } 384 #endif /* GETIFA_DEBUG */ 385 386 best_ifa->ifa_seqno = &iss->iss_seqno; 387 return best_ifa; 388 } 389 390 static in_score_src_t 391 name_to_score_src(const char *name) 392 { 393 int i; 394 395 for (i = 0; score_src_names[i].sn_name != NULL; i++) { 396 if (strcmp(score_src_names[i].sn_name, name) == 0) 397 return score_src_names[i].sn_score_src; 398 } 399 return NULL; 400 } 401 402 static const char * 403 score_src_to_name(const in_score_src_t score_src) 404 { 405 int i; 406 for (i = 0; score_src_names[i].sn_name != NULL; i++) { 407 if (score_src == score_src_names[i].sn_score_src) 408 return score_src_names[i].sn_name; 409 } 410 return "<unknown>"; 411 } 412 413 static size_t 414 in_get_selectsrc(const struct in_ifselsrc *iss, char *buf0, 415 const size_t buflen0) 416 { 417 int i, rc; 418 char *buf = buf0; 419 const char *delim; 420 size_t buflen = buflen0; 421 422 KASSERT(buflen >= 1); 423 424 for (delim = "", i = 0; 425 i < IN_SCORE_SRC_MAX && iss->iss_score_src[i] != NULL; 426 delim = ",", i++) { 427 rc = snprintf(buf, buflen, "%s%s", 428 delim, score_src_to_name(iss->iss_score_src[i])); 429 if (rc == -1) 430 return buflen0 - buflen; 431 if (rc >= buflen) 432 return buflen0 + rc - buflen; 433 buf += rc; 434 buflen -= rc; 435 } 436 if (buf == buf0) 437 *buf++ = '\0'; 438 return buf - buf0; 439 } 440 441 static int 442 in_set_selectsrc(struct in_ifselsrc *iss, char *buf) 443 { 444 int i, s; 445 char *next = buf; 446 const char *name; 447 in_score_src_t score_src; 448 in_score_src_t scorers[IN_SCORE_SRC_MAX]; 449 450 memset(&scorers, 0, sizeof(scorers)); 451 for (i = 0; 452 (name = strsep(&next, ",")) != NULL && i < IN_SCORE_SRC_MAX; 453 i++) { 454 if (strcmp(name, "") == 0) 455 break; 456 if ((score_src = name_to_score_src(name)) == NULL) 457 return EINVAL; 458 scorers[i] = score_src; 459 } 460 if (i == IN_SCORE_SRC_MAX && name != NULL) 461 return EFBIG; 462 s = splnet(); 463 (void)memcpy(iss->iss_score_src, scorers, sizeof(iss->iss_score_src)); 464 /* If iss affects a specific interface that used to use 465 * the default policy, increase the sequence number on the 466 * default policy, forcing routes that cache a source 467 * (rt_ifa) found by the default policy to refresh their 468 * cache. 469 */ 470 if (iss != &default_iss && iss->iss_score_src[0] == NULL && 471 scorers[0] != NULL) 472 default_iss.iss_seqno++; 473 iss->iss_seqno++; 474 splx(s); 475 return 0; 476 } 477 478 /* 479 * sysctl helper routine for net.inet.ip.interfaces.<interface>.selectsrc. 480 * Pulls the old value out as a human-readable string, interprets 481 * and records the new value. 482 */ 483 static int 484 in_sysctl_selectsrc(SYSCTLFN_ARGS) 485 { 486 char policy[IN_SELECTSRC_LEN]; 487 int error; 488 struct sysctlnode node; 489 struct in_ifselsrc *iss; 490 491 node = *rnode; 492 iss = (struct in_ifselsrc *)node.sysctl_data; 493 if (oldp != NULL && 494 (error = in_get_selectsrc(iss, policy, sizeof(policy))) >= sizeof(policy)) 495 return error; 496 node.sysctl_data = &policy[0]; 497 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 498 if (error || newp == NULL) 499 return (error); 500 501 return in_set_selectsrc(iss, policy); 502 } 503 504 static const struct sysctlnode * 505 in_domifattach_sysctl(struct in_ifsysctl *isc) 506 { 507 int rc; 508 const struct sysctlnode *rnode; 509 510 if ((rc = sysctl_createv(&isc->isc_log, 0, NULL, &rnode, 511 CTLFLAG_READWRITE, CTLTYPE_NODE, 512 "interfaces", NULL, 513 NULL, 0, NULL, 0, 514 CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE, 515 CTL_EOL)) != 0) { 516 printf("%s: could not create net.inet.ip.interfaces, rc = %d\n", 517 __func__, rc); 518 return NULL; 519 } 520 if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode, 521 CTLFLAG_READWRITE, CTLTYPE_NODE, 522 isc->isc_ifp->if_xname, 523 SYSCTL_DESCR("interface ip options"), 524 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL)) != 0) { 525 printf("%s: could not create net.inet.ip.interfaces.%s, " 526 "rc = %d\n", __func__, isc->isc_ifp->if_xname, rc); 527 goto err; 528 } 529 if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode, 530 CTLFLAG_READWRITE, CTLTYPE_STRING, 531 "selectsrc", 532 SYSCTL_DESCR("source selection policy"), 533 in_sysctl_selectsrc, 0, 534 isc->isc_selsrc, IN_SELECTSRC_LEN, 535 CTL_CREATE, CTL_EOL)) != 0) { 536 printf( 537 "%s: could not create net.inet.ip.%s.selectsrc, rc = %d\n", 538 __func__, isc->isc_ifp->if_xname, rc); 539 goto err; 540 } 541 return rnode; 542 err: 543 sysctl_teardown(&isc->isc_log); 544 return NULL; 545 } 546 547 void * 548 in_domifattach(struct ifnet *ifp) 549 { 550 struct in_ifsysctl *isc; 551 struct in_ifselsrc *iss; 552 553 isc = (struct in_ifsysctl *)malloc(sizeof(*isc), M_IFADDR, 554 M_WAITOK | M_ZERO); 555 556 iss = (struct in_ifselsrc *)malloc(sizeof(*iss), M_IFADDR, 557 M_WAITOK | M_ZERO); 558 559 memcpy(&iss->iss_score_src[0], &initial_iss.iss_score_src[0], 560 MIN(sizeof(iss->iss_score_src), sizeof(initial_iss.iss_score_src))); 561 562 isc->isc_ifp = ifp; 563 isc->isc_selsrc = iss; 564 565 if (in_domifattach_sysctl(isc) == NULL) 566 goto err; 567 568 return isc; 569 err: 570 free(iss, M_IFADDR); 571 free(isc, M_IFADDR); 572 return NULL; 573 } 574 575 void 576 in_domifdetach(struct ifnet *ifp, void *aux) 577 { 578 struct in_ifsysctl *isc; 579 struct in_ifselsrc *iss; 580 581 if (aux == NULL) 582 return; 583 isc = (struct in_ifsysctl *)aux; 584 iss = isc->isc_selsrc; 585 sysctl_teardown(&isc->isc_log); 586 free(isc, M_IFADDR); 587 free(iss, M_IFADDR); 588 } 589 #endif /* INET */ 590