1 /* $NetBSD: in_selsrc.c,v 1.8 2009/10/19 23:19:39 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 David Young. All rights reserved. 5 * 6 * This code was written by David Young. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY DAVID YOUNG ``AS IS'' AND ANY 18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 19 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 20 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __KERNEL_RCSID(0, "$NetBSD: in_selsrc.c,v 1.8 2009/10/19 23:19:39 rmind Exp $"); 33 34 #include "opt_inet.h" 35 #include "opt_inet_conf.h" 36 37 #include <lib/libkern/libkern.h> 38 39 #include <sys/param.h> 40 #include <sys/ioctl.h> 41 #include <sys/errno.h> 42 #include <sys/malloc.h> 43 #include <sys/socket.h> 44 #include <sys/socketvar.h> 45 #include <sys/sysctl.h> 46 #include <sys/systm.h> 47 #include <sys/proc.h> 48 #include <sys/syslog.h> 49 50 #include <net/if.h> 51 52 #include <net/if_ether.h> 53 54 #include <netinet/in_systm.h> 55 #include <netinet/in.h> 56 #include <netinet/in_var.h> 57 #include <netinet/ip.h> 58 #include <netinet/ip_var.h> 59 #include <netinet/in_ifattach.h> 60 #include <netinet/in_pcb.h> 61 #include <netinet/if_inarp.h> 62 #include <netinet/ip_mroute.h> 63 #include <netinet/igmp_var.h> 64 #include <netinet/in_selsrc.h> 65 66 #ifdef INET 67 struct score_src_name { 68 const char *sn_name; 69 const in_score_src_t sn_score_src; 70 }; 71 72 static const struct sysctlnode *in_domifattach_sysctl(struct in_ifsysctl *); 73 static int in_preference(const struct in_addr *, int, int, 74 const struct in_addr *); 75 static int in_index(const struct in_addr *, int, int, const struct in_addr *); 76 static int in_matchlen(const struct in_addr *, int, int, 77 const struct in_addr *); 78 static int in_match_category(const struct in_addr *, int, int, 79 const struct in_addr *); 80 static size_t in_get_selectsrc(const struct in_ifselsrc *, char *, 81 const size_t); 82 static int in_set_selectsrc(struct in_ifselsrc *, char *buf); 83 static int in_sysctl_selectsrc(SYSCTLFN_PROTO); 84 static in_score_src_t name_to_score_src(const char *); 85 static const char *score_src_to_name(const in_score_src_t); 86 static void in_score(const in_score_src_t *, int *, int *, 87 const struct in_addr *, int, int, const struct in_addr *); 88 89 static const struct score_src_name score_src_names[] = { 90 {"same-category", in_match_category} 91 , {"common-prefix-len", in_matchlen} 92 , {"index", in_index} 93 , {"preference", in_preference} 94 , {NULL, NULL} 95 }; 96 97 static const struct in_ifselsrc initial_iss = { 0, {NULL} }; 98 99 static struct in_ifselsrc default_iss = { 0, {in_index} }; 100 101 #ifdef GETIFA_DEBUG 102 int in_selsrc_debug = 0; 103 #endif /* GETIFA_DEBUG */ 104 105 SYSCTL_SETUP(sysctl_selectsrc_setup, "sysctl selectsrc subtree setup") 106 { 107 int rc; 108 const struct sysctlnode *rnode, *cnode; 109 110 if ((rc = sysctl_createv(clog, 0, NULL, &rnode, 111 CTLFLAG_PERMANENT, CTLTYPE_NODE, "net", 112 NULL, NULL, 0, NULL, 0, CTL_NET, CTL_EOL)) != 0) { 113 printf("%s: could not create net, rc = %d\n", __func__, rc); 114 return; 115 } 116 if ((rc = sysctl_createv(clog, 0, NULL, &rnode, 117 CTLFLAG_PERMANENT, CTLTYPE_NODE, "inet", 118 NULL, NULL, 0, NULL, 0, CTL_NET, PF_INET, CTL_EOL)) != 0) { 119 printf("%s: could not create net.inet, rc = %d\n", __func__, 120 rc); 121 return; 122 } 123 if ((rc = sysctl_createv(clog, 0, NULL, &rnode, 124 CTLFLAG_PERMANENT, CTLTYPE_NODE, "ip", 125 NULL, NULL, 0, NULL, 0, 126 CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL)) != 0) { 127 printf("%s: could not create net.inet.ip, rc = %d\n", __func__, 128 rc); 129 return; 130 } 131 if ((rc = sysctl_createv(clog, 0, NULL, &rnode, 132 CTLFLAG_PERMANENT, CTLTYPE_NODE, "selectsrc", 133 NULL, NULL, 0, NULL, 0, 134 CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE, CTL_EOL)) != 0) { 135 printf("%s: could not create net.inet.ip.selectsrc, " 136 "rc = %d\n", __func__, rc); 137 return; 138 } 139 #ifdef GETIFA_DEBUG 140 if ((rc = sysctl_createv(clog, 0, &rnode, &cnode, 141 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "debug", 142 SYSCTL_DESCR("enable source-selection debug messages"), 143 NULL, 0, &in_selsrc_debug, 0, CTL_CREATE, CTL_EOL)) != 0) { 144 printf("%s: could not create net.inet.ip.selectsrc.debug, " 145 "rc = %d\n", __func__, rc); 146 return; 147 } 148 #endif /* GETIFA_DEBUG */ 149 if ((rc = sysctl_createv(clog, 0, &rnode, &cnode, 150 CTLFLAG_READWRITE, CTLTYPE_STRING, "default", 151 SYSCTL_DESCR("default source selection policy"), 152 in_sysctl_selectsrc, 0, &default_iss, IN_SELECTSRC_LEN, 153 CTL_CREATE, CTL_EOL)) != 0) { 154 printf( 155 "%s: could not create net.inet.ip.selectsrc.default (%d)\n", 156 __func__, rc); 157 return; 158 } 159 } 160 161 /* 162 * Score by address preference: prefer addresses with higher preference 163 * number. Preference numbers are assigned with ioctl SIOCSIFADDRPREF. 164 */ 165 static int 166 in_preference(const struct in_addr *src, int preference, 167 int idx, const struct in_addr *dst) 168 { 169 return preference; 170 } 171 172 /* 173 * Score by address "index": prefer addresses nearer the head of 174 * the ifaddr list. 175 */ 176 static int 177 in_index(const struct in_addr *src, int preference, int idx, 178 const struct in_addr *dst) 179 { 180 return -idx; 181 } 182 183 /* 184 * Length of longest common prefix of src and dst. 185 * 186 * (Derived from in6_matchlen.) 187 */ 188 static int 189 in_matchlen(const struct in_addr *src, int preference, 190 int idx, const struct in_addr *dst) 191 { 192 int match = 0; 193 const uint8_t *s = (const uint8_t *)src, *d = (const uint8_t *)dst; 194 const uint8_t *lim = s + 4; 195 uint_fast8_t r = 0; 196 197 while (s < lim && (r = (*d++ ^ *s++)) == 0) 198 match += 8; 199 200 if (s == lim) 201 return match; 202 203 while ((r & 0x80) == 0) { 204 match++; 205 r <<= 1; 206 } 207 return match; 208 } 209 210 static enum in_category 211 in_categorize(const struct in_addr *s) 212 { 213 if (IN_ANY_LOCAL(s->s_addr)) 214 return IN_CATEGORY_LINKLOCAL; 215 else if (IN_PRIVATE(s->s_addr)) 216 return IN_CATEGORY_PRIVATE; 217 else 218 return IN_CATEGORY_OTHER; 219 } 220 221 static int 222 in_match_category(const struct in_addr *src, int preference, 223 int idx, const struct in_addr *dst) 224 { 225 enum in_category dst_c = in_categorize(dst), 226 src_c = in_categorize(src); 227 #ifdef GETIFA_DEBUG 228 if (in_selsrc_debug) { 229 printf("%s: dst %#08" PRIx32 " categ %d, src %#08" PRIx32 230 " categ %d\n", __func__, ntohl(dst->s_addr), dst_c, 231 ntohl(src->s_addr), src_c); 232 } 233 #endif /* GETIFA_DEBUG */ 234 235 if (dst_c == src_c) 236 return 2; 237 else if (dst_c == IN_CATEGORY_LINKLOCAL && src_c == IN_CATEGORY_PRIVATE) 238 return 1; 239 else if (dst_c == IN_CATEGORY_PRIVATE && src_c == IN_CATEGORY_LINKLOCAL) 240 return 1; 241 else if (dst_c == IN_CATEGORY_OTHER && src_c == IN_CATEGORY_PRIVATE) 242 return 1; 243 else 244 return 0; 245 } 246 247 static void 248 in_score(const in_score_src_t *score_src, int *score, int *scorelenp, 249 const struct in_addr *src, int preference, int idx, 250 const struct in_addr *dst) 251 { 252 int i; 253 254 for (i = 0; i < IN_SCORE_SRC_MAX && score_src[i] != NULL; i++) 255 score[i] = (*score_src[i])(src, preference, idx, dst); 256 if (scorelenp != NULL) 257 *scorelenp = i; 258 } 259 260 static int 261 in_score_cmp(int *score1, int *score2, int scorelen) 262 { 263 int i; 264 265 for (i = 0; i < scorelen; i++) { 266 if (score1[i] == score2[i]) 267 continue; 268 return score1[i] - score2[i]; 269 } 270 return 0; 271 } 272 273 #ifdef GETIFA_DEBUG 274 static void 275 in_score_println(int *score, int scorelen) 276 { 277 int i; 278 const char *delim = "["; 279 280 for (i = 0; i < scorelen; i++) { 281 printf("%s%d", delim, score[i]); 282 delim = ", "; 283 } 284 printf("]\n"); 285 } 286 #endif /* GETIFA_DEBUG */ 287 288 /* Scan the interface addresses on the interface ifa->ifa_ifp for 289 * the source address that best matches the destination, dst0, 290 * according to the source address-selection policy for this 291 * interface. If there is no better match than `ifa', return `ifa'. 292 * Otherwise, return the best address. 293 * 294 * Note that in_getifa is called after the kernel has decided which 295 * output interface to use (ifa->ifa_ifp), and in_getifa will not 296 * scan an address belonging to any other interface. 297 */ 298 struct ifaddr * 299 in_getifa(struct ifaddr *ifa, const struct sockaddr *dst0) 300 { 301 const in_score_src_t *score_src; 302 int idx, scorelen; 303 const struct sockaddr_in *dst, *src; 304 struct ifaddr *alt_ifa, *best_ifa; 305 struct ifnet *ifp; 306 struct in_ifsysctl *isc; 307 struct in_ifselsrc *iss; 308 int best_score[IN_SCORE_SRC_MAX], score[IN_SCORE_SRC_MAX]; 309 310 if (ifa->ifa_addr->sa_family != AF_INET || 311 dst0 == NULL || dst0->sa_family != AF_INET) { /* Possible. */ 312 ifa->ifa_seqno = NULL; 313 return ifa; 314 } 315 316 ifp = ifa->ifa_ifp; 317 isc = (struct in_ifsysctl *)ifp->if_afdata[AF_INET]; 318 if (isc != NULL && isc->isc_selsrc != NULL && 319 isc->isc_selsrc->iss_score_src[0] != NULL) 320 iss = isc->isc_selsrc; 321 else 322 iss = &default_iss; 323 score_src = &iss->iss_score_src[0]; 324 325 dst = (const struct sockaddr_in *)dst0; 326 327 best_ifa = ifa; 328 329 /* Find out the index of this ifaddr. */ 330 idx = 0; 331 IFADDR_FOREACH(alt_ifa, ifa->ifa_ifp) { 332 if (alt_ifa == best_ifa) 333 break; 334 idx++; 335 } 336 in_score(score_src, best_score, &scorelen, &IA_SIN(best_ifa)->sin_addr, 337 best_ifa->ifa_preference, idx, &dst->sin_addr); 338 339 #ifdef GETIFA_DEBUG 340 if (in_selsrc_debug) { 341 printf("%s: enter dst %#" PRIx32 " src %#" PRIx32 " score ", 342 __func__, ntohl(dst->sin_addr.s_addr), 343 ntohl(satosin(best_ifa->ifa_addr)->sin_addr.s_addr)); 344 in_score_println(best_score, scorelen); 345 } 346 #endif /* GETIFA_DEBUG */ 347 348 idx = -1; 349 IFADDR_FOREACH(alt_ifa, ifa->ifa_ifp) { 350 ++idx; 351 src = IA_SIN(alt_ifa); 352 353 if (alt_ifa == ifa || src->sin_family != AF_INET) 354 continue; 355 356 in_score(score_src, score, NULL, &src->sin_addr, 357 alt_ifa->ifa_preference, idx, &dst->sin_addr); 358 359 #ifdef GETIFA_DEBUG 360 if (in_selsrc_debug) { 361 printf("%s: src %#" PRIx32 " score ", __func__, 362 ntohl(src->sin_addr.s_addr)); 363 in_score_println(score, scorelen); 364 } 365 #endif /* GETIFA_DEBUG */ 366 367 if (in_score_cmp(score, best_score, scorelen) > 0) { 368 (void)memcpy(best_score, score, sizeof(best_score)); 369 best_ifa = alt_ifa; 370 } 371 } 372 #ifdef GETIFA_DEBUG 373 if (in_selsrc_debug) { 374 printf("%s: choose src %#" PRIx32 " score ", __func__, 375 ntohl(IA_SIN(best_ifa)->sin_addr.s_addr)); 376 in_score_println(best_score, scorelen); 377 } 378 #endif /* GETIFA_DEBUG */ 379 380 best_ifa->ifa_seqno = &iss->iss_seqno; 381 return best_ifa; 382 } 383 384 static in_score_src_t 385 name_to_score_src(const char *name) 386 { 387 int i; 388 389 for (i = 0; score_src_names[i].sn_name != NULL; i++) { 390 if (strcmp(score_src_names[i].sn_name, name) == 0) 391 return score_src_names[i].sn_score_src; 392 } 393 return NULL; 394 } 395 396 static const char * 397 score_src_to_name(const in_score_src_t score_src) 398 { 399 int i; 400 for (i = 0; score_src_names[i].sn_name != NULL; i++) { 401 if (score_src == score_src_names[i].sn_score_src) 402 return score_src_names[i].sn_name; 403 } 404 return "<unknown>"; 405 } 406 407 static size_t 408 in_get_selectsrc(const struct in_ifselsrc *iss, char *buf0, 409 const size_t buflen0) 410 { 411 int i, rc; 412 char *buf = buf0; 413 const char *delim; 414 size_t buflen = buflen0; 415 416 KASSERT(buflen >= 1); 417 418 for (delim = "", i = 0; 419 i < IN_SCORE_SRC_MAX && iss->iss_score_src[i] != NULL; 420 delim = ",", i++) { 421 rc = snprintf(buf, buflen, "%s%s", 422 delim, score_src_to_name(iss->iss_score_src[i])); 423 if (rc == -1) 424 return buflen0 - buflen; 425 if (rc >= buflen) 426 return buflen0 + rc - buflen; 427 buf += rc; 428 buflen -= rc; 429 } 430 if (buf == buf0) 431 *buf++ = '\0'; 432 return buf - buf0; 433 } 434 435 static int 436 in_set_selectsrc(struct in_ifselsrc *iss, char *buf) 437 { 438 int i, s; 439 char *next = buf; 440 const char *name; 441 in_score_src_t score_src; 442 in_score_src_t scorers[IN_SCORE_SRC_MAX]; 443 444 memset(&scorers, 0, sizeof(scorers)); 445 for (i = 0; 446 (name = strsep(&next, ",")) != NULL && i < IN_SCORE_SRC_MAX; 447 i++) { 448 if (strcmp(name, "") == 0) 449 break; 450 if ((score_src = name_to_score_src(name)) == NULL) 451 return EINVAL; 452 scorers[i] = score_src; 453 } 454 if (i == IN_SCORE_SRC_MAX && name != NULL) 455 return EFBIG; 456 s = splnet(); 457 (void)memcpy(iss->iss_score_src, scorers, sizeof(iss->iss_score_src)); 458 /* If iss affects a specific interface that used to use 459 * the default policy, increase the sequence number on the 460 * default policy, forcing routes that cache a source 461 * (rt_ifa) found by the default policy to refresh their 462 * cache. 463 */ 464 if (iss != &default_iss && iss->iss_score_src[0] == NULL && 465 scorers[0] != NULL) 466 default_iss.iss_seqno++; 467 iss->iss_seqno++; 468 splx(s); 469 return 0; 470 } 471 472 /* 473 * sysctl helper routine for net.inet.ip.interfaces.<interface>.selectsrc. 474 * Pulls the old value out as a human-readable string, interprets 475 * and records the new value. 476 */ 477 static int 478 in_sysctl_selectsrc(SYSCTLFN_ARGS) 479 { 480 char policy[IN_SELECTSRC_LEN]; 481 int error; 482 struct sysctlnode node; 483 struct in_ifselsrc *iss; 484 485 node = *rnode; 486 iss = (struct in_ifselsrc *)node.sysctl_data; 487 if (oldp != NULL && 488 (error = in_get_selectsrc(iss, policy, sizeof(policy))) >= sizeof(policy)) 489 return error; 490 node.sysctl_data = &policy[0]; 491 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 492 if (error || newp == NULL) 493 return (error); 494 495 return in_set_selectsrc(iss, policy); 496 } 497 498 static const struct sysctlnode * 499 in_domifattach_sysctl(struct in_ifsysctl *isc) 500 { 501 int rc; 502 const struct sysctlnode *rnode; 503 504 if ((rc = sysctl_createv(&isc->isc_log, 0, NULL, &rnode, 505 CTLFLAG_READONLY, CTLTYPE_NODE, 506 "interfaces", NULL, 507 NULL, 0, NULL, 0, 508 CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE, 509 CTL_EOL)) != 0) { 510 printf("%s: could not create net.inet.ip.interfaces, rc = %d\n", 511 __func__, rc); 512 return NULL; 513 } 514 if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode, 515 CTLFLAG_READONLY, CTLTYPE_NODE, 516 isc->isc_ifp->if_xname, 517 SYSCTL_DESCR("interface ip options"), 518 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL)) != 0) { 519 printf("%s: could not create net.inet.ip.interfaces.%s, " 520 "rc = %d\n", __func__, isc->isc_ifp->if_xname, rc); 521 goto err; 522 } 523 if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode, 524 CTLFLAG_READWRITE, CTLTYPE_STRING, 525 "selectsrc", 526 SYSCTL_DESCR("source selection policy"), 527 in_sysctl_selectsrc, 0, 528 isc->isc_selsrc, IN_SELECTSRC_LEN, 529 CTL_CREATE, CTL_EOL)) != 0) { 530 printf( 531 "%s: could not create net.inet.ip.%s.selectsrc, rc = %d\n", 532 __func__, isc->isc_ifp->if_xname, rc); 533 goto err; 534 } 535 return rnode; 536 err: 537 sysctl_teardown(&isc->isc_log); 538 return NULL; 539 } 540 541 void * 542 in_domifattach(struct ifnet *ifp) 543 { 544 struct in_ifsysctl *isc; 545 struct in_ifselsrc *iss; 546 547 isc = (struct in_ifsysctl *)malloc(sizeof(*isc), M_IFADDR, 548 M_WAITOK | M_ZERO); 549 550 iss = (struct in_ifselsrc *)malloc(sizeof(*iss), M_IFADDR, 551 M_WAITOK | M_ZERO); 552 553 memcpy(&iss->iss_score_src[0], &initial_iss.iss_score_src[0], 554 MIN(sizeof(iss->iss_score_src), sizeof(initial_iss.iss_score_src))); 555 556 isc->isc_ifp = ifp; 557 isc->isc_selsrc = iss; 558 559 if (in_domifattach_sysctl(isc) == NULL) 560 goto err; 561 562 return isc; 563 err: 564 free(iss, M_IFADDR); 565 free(isc, M_IFADDR); 566 return NULL; 567 } 568 569 void 570 in_domifdetach(struct ifnet *ifp, void *aux) 571 { 572 struct in_ifsysctl *isc; 573 struct in_ifselsrc *iss; 574 575 if (aux == NULL) 576 return; 577 isc = (struct in_ifsysctl *)aux; 578 iss = isc->isc_selsrc; 579 sysctl_teardown(&isc->isc_log); 580 free(isc, M_IFADDR); 581 free(iss, M_IFADDR); 582 } 583 #endif /* INET */ 584