1 /* $NetBSD: in_selsrc.c,v 1.3 2006/11/16 01:33:45 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 David Young. All rights reserved. 5 * 6 * This code was written by David Young. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by David Young. 19 * 4. The name of David Young may not be used to endorse or promote 20 * products derived from this software without specific prior 21 * written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY DAVID YOUNG ``AS IS'' AND ANY 24 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 25 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 26 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 32 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 33 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 34 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: in_selsrc.c,v 1.3 2006/11/16 01:33:45 christos Exp $"); 39 40 #include "opt_inet.h" 41 #include "opt_inet_conf.h" 42 43 #include <lib/libkern/libkern.h> 44 45 #include <sys/param.h> 46 #include <sys/ioctl.h> 47 #include <sys/errno.h> 48 #include <sys/malloc.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/sysctl.h> 52 #include <sys/systm.h> 53 #include <sys/proc.h> 54 #include <sys/syslog.h> 55 56 #include <net/if.h> 57 58 #include <net/if_ether.h> 59 60 #include <netinet/in_systm.h> 61 #include <netinet/in.h> 62 #include <netinet/in_var.h> 63 #include <netinet/ip.h> 64 #include <netinet/ip_var.h> 65 #include <netinet/in_ifattach.h> 66 #include <netinet/in_pcb.h> 67 #include <netinet/if_inarp.h> 68 #include <netinet/ip_mroute.h> 69 #include <netinet/igmp_var.h> 70 #include <netinet/in_selsrc.h> 71 72 #ifdef INET 73 struct score_src_name { 74 const char *sn_name; 75 const in_score_src_t sn_score_src; 76 }; 77 78 static const struct sysctlnode *in_domifattach_sysctl(struct in_ifsysctl *); 79 static int in_preference(const struct in_addr *, int, int, 80 const struct in_addr *); 81 static int in_index(const struct in_addr *, int, int, const struct in_addr *); 82 static int in_matchlen(const struct in_addr *, int, int, 83 const struct in_addr *); 84 static int in_match_category(const struct in_addr *, int, int, 85 const struct in_addr *); 86 static size_t in_get_selectsrc(const struct in_ifselsrc *, char *, 87 const size_t); 88 static int in_set_selectsrc(struct in_ifselsrc *, char *buf); 89 static int in_sysctl_selectsrc(SYSCTLFN_PROTO); 90 static in_score_src_t name_to_score_src(const char *); 91 static const char *score_src_to_name(const in_score_src_t); 92 static void in_score(const in_score_src_t *, int *, int *, 93 const struct in_addr *, int, int, const struct in_addr *); 94 95 static const struct score_src_name score_src_names[] = { 96 {"same-category", in_match_category} 97 , {"common-prefix-len", in_matchlen} 98 , {"index", in_index} 99 , {"preference", in_preference} 100 , {NULL, NULL} 101 }; 102 103 static const struct in_ifselsrc initial_iss = { 0, {NULL} }; 104 105 static struct in_ifselsrc default_iss = { 0, {in_index} }; 106 107 #ifdef GETIFA_DEBUG 108 int in_selsrc_debug = 0; 109 110 SYSCTL_SETUP(sysctl_selectsrc_setup, "sysctl selectsrc subtree setup") 111 { 112 int rc; 113 const struct sysctlnode *rnode, *cnode; 114 115 if ((rc = sysctl_createv(clog, 0, NULL, &rnode, 116 CTLFLAG_PERMANENT, CTLTYPE_NODE, "net", 117 NULL, NULL, 0, NULL, 0, CTL_NET, CTL_EOL)) != 0) { 118 printf("%s: could not create net, rc = %d\n", __func__, rc); 119 return; 120 } 121 if ((rc = sysctl_createv(clog, 0, NULL, &rnode, 122 CTLFLAG_PERMANENT, CTLTYPE_NODE, "inet", 123 NULL, NULL, 0, NULL, 0, CTL_NET, PF_INET, CTL_EOL)) != 0) { 124 printf("%s: could not create net.inet, rc = %d\n", __func__, 125 rc); 126 return; 127 } 128 if ((rc = sysctl_createv(clog, 0, NULL, &rnode, 129 CTLFLAG_PERMANENT, CTLTYPE_NODE, "ip", 130 NULL, NULL, 0, NULL, 0, 131 CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL)) != 0) { 132 printf("%s: could not create net.inet.ip, rc = %d\n", __func__, 133 rc); 134 return; 135 } 136 if ((rc = sysctl_createv(clog, 0, NULL, &rnode, 137 CTLFLAG_PERMANENT, CTLTYPE_NODE, "selectsrc", 138 NULL, NULL, 0, NULL, 0, 139 CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE, CTL_EOL)) != 0) { 140 printf("%s: could not create net.inet.ip.selectsrc, " 141 "rc = %d\n", __func__, rc); 142 return; 143 } 144 if ((rc = sysctl_createv(clog, 0, &rnode, &cnode, 145 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "debug", 146 SYSCTL_DESCR("enable source-selection debug messages"), 147 NULL, 0, &in_selsrc_debug, 0, CTL_CREATE, CTL_EOL)) != 0) { 148 printf("%s: could not create net.inet.ip.selectsrc.debug, " 149 "rc = %d\n", __func__, rc); 150 return; 151 } 152 if ((rc = sysctl_createv(clog, 0, &rnode, &cnode, 153 CTLFLAG_READWRITE, CTLTYPE_STRING, "default", 154 SYSCTL_DESCR("default source selection policy"), 155 in_sysctl_selectsrc, 0, &default_iss, IN_SELECTSRC_LEN, 156 CTL_CREATE, CTL_EOL)) != 0) { 157 printf( 158 "%s: could not create net.inet.ip.selectsrc.default (%d)\n", 159 __func__, rc); 160 return; 161 } 162 } 163 #endif /* GETIFA_DEBUG */ 164 165 /* 166 * Score by address preference: prefer addresses with lower preference 167 * number. Preference numbers are assigned with ioctl SIOCSIFADDRPREF. 168 */ 169 static int 170 in_preference(const struct in_addr *src, int preference, 171 int idx, const struct in_addr *dst) 172 { 173 return -preference; 174 } 175 176 /* 177 * Score by address "index": prefer addresses nearer the head of 178 * the ifaddr list. 179 */ 180 static int 181 in_index(const struct in_addr *src, int preference, int idx, 182 const struct in_addr *dst) 183 { 184 return -idx; 185 } 186 187 /* 188 * Length of longest common prefix of src and dst. 189 * 190 * (Derived from in6_matchlen.) 191 */ 192 static int 193 in_matchlen(const struct in_addr *src, int preference, 194 int idx, const struct in_addr *dst) 195 { 196 int match = 0; 197 const uint8_t *s = (const uint8_t *)src, *d = (const uint8_t *)dst; 198 const uint8_t *lim = s + 4; 199 uint_fast8_t r = 0; 200 201 while (s < lim && (r = (*d++ ^ *s++)) == 0) 202 match += 8; 203 204 if (s == lim) 205 return match; 206 207 while ((r & 0x80) == 0) { 208 match++; 209 r <<= 1; 210 } 211 return match; 212 } 213 214 static enum in_category 215 in_categorize(const struct in_addr *s) 216 { 217 if (IN_ANY_LOCAL(s->s_addr)) 218 return IN_CATEGORY_LINKLOCAL; 219 else if (IN_PRIVATE(s->s_addr)) 220 return IN_CATEGORY_PRIVATE; 221 else 222 return IN_CATEGORY_OTHER; 223 } 224 225 static int 226 in_match_category(const struct in_addr *src, int preference, 227 int idx, const struct in_addr *dst) 228 { 229 enum in_category dst_c = in_categorize(dst), 230 src_c = in_categorize(src); 231 #ifdef GETIFA_DEBUG 232 if (in_selsrc_debug) { 233 printf("%s: dst %#08" PRIx32 " categ %d, src %#08" PRIx32 234 " categ %d\n", __func__, ntohl(dst->s_addr), dst_c, 235 ntohl(src->s_addr), src_c); 236 } 237 #endif /* GETIFA_DEBUG */ 238 239 if (dst_c == src_c) 240 return 2; 241 else if (dst_c == IN_CATEGORY_LINKLOCAL && src_c == IN_CATEGORY_PRIVATE) 242 return 1; 243 else if (dst_c == IN_CATEGORY_PRIVATE && src_c == IN_CATEGORY_LINKLOCAL) 244 return 1; 245 else if (dst_c == IN_CATEGORY_OTHER && src_c == IN_CATEGORY_PRIVATE) 246 return 1; 247 else 248 return 0; 249 } 250 251 static void 252 in_score(const in_score_src_t *score_src, int *score, int *scorelenp, 253 const struct in_addr *src, int preference, int idx, 254 const struct in_addr *dst) 255 { 256 int i; 257 258 for (i = 0; i < IN_SCORE_SRC_MAX && score_src[i] != NULL; i++) 259 score[i] = (*score_src[i])(src, preference, idx, dst); 260 if (scorelenp != NULL) 261 *scorelenp = i; 262 } 263 264 static int 265 in_score_cmp(int *score1, int *score2, int scorelen) 266 { 267 int i; 268 269 for (i = 0; i < scorelen; i++) { 270 if (score1[i] == score2[i]) 271 continue; 272 return score1[i] - score2[i]; 273 } 274 return 0; 275 } 276 277 #ifdef GETIFA_DEBUG 278 static void 279 in_score_println(int *score, int scorelen) 280 { 281 int i; 282 const char *delim = "["; 283 284 for (i = 0; i < scorelen; i++) { 285 printf("%s%d", delim, score[i]); 286 delim = ", "; 287 } 288 printf("]\n"); 289 } 290 #endif /* GETIFA_DEBUG */ 291 292 /* Scan the interface addresses on the interface ifa->ifa_ifp for 293 * the source address that best matches the destination, dst0, 294 * according to the source address-selection policy for this 295 * interface. If there is no better match than `ifa', return `ifa'. 296 * Otherwise, return the best address. 297 * 298 * Note that in_getifa is called after the kernel has decided which 299 * output interface to use (ifa->ifa_ifp), and in_getifa will not 300 * scan an address belonging to any other interface. 301 */ 302 struct ifaddr * 303 in_getifa(struct ifaddr *ifa, const struct sockaddr *dst0) 304 { 305 const in_score_src_t *score_src; 306 int idx, scorelen; 307 const struct sockaddr_in *dst, *src; 308 struct ifaddr *alt_ifa, *best_ifa; 309 struct ifnet *ifp; 310 struct in_ifsysctl *isc; 311 struct in_ifselsrc *iss; 312 int best_score[IN_SCORE_SRC_MAX], score[IN_SCORE_SRC_MAX]; 313 314 if (ifa->ifa_addr->sa_family != AF_INET || 315 dst0 == NULL || dst0->sa_family != AF_INET) { /* Possible. */ 316 ifa->ifa_seqno = NULL; 317 return ifa; 318 } 319 320 ifp = ifa->ifa_ifp; 321 isc = (struct in_ifsysctl *)ifp->if_afdata[AF_INET]; 322 if (isc != NULL && isc->isc_selsrc != NULL && 323 isc->isc_selsrc->iss_score_src[0] != NULL) 324 iss = isc->isc_selsrc; 325 else 326 iss = &default_iss; 327 score_src = &iss->iss_score_src[0]; 328 329 dst = (const struct sockaddr_in *)dst0; 330 331 best_ifa = ifa; 332 333 /* Find out the index of this ifaddr. */ 334 idx = 0; 335 TAILQ_FOREACH(alt_ifa, &ifa->ifa_ifp->if_addrlist, ifa_list) { 336 if (alt_ifa == best_ifa) 337 break; 338 idx++; 339 } 340 in_score(score_src, best_score, &scorelen, &IA_SIN(best_ifa)->sin_addr, 341 best_ifa->ifa_preference, idx, &dst->sin_addr); 342 343 #ifdef GETIFA_DEBUG 344 if (in_selsrc_debug) { 345 printf("%s: enter dst %#" PRIx32 " src %#" PRIx32 " score ", 346 __func__, ntohl(dst->sin_addr.s_addr), 347 ntohl(satosin(best_ifa->ifa_addr)->sin_addr.s_addr)); 348 in_score_println(best_score, scorelen); 349 } 350 #endif /* GETIFA_DEBUG */ 351 352 idx = -1; 353 TAILQ_FOREACH(alt_ifa, &ifa->ifa_ifp->if_addrlist, ifa_list) { 354 ++idx; 355 src = IA_SIN(alt_ifa); 356 357 if (alt_ifa == ifa || src->sin_family != AF_INET) 358 continue; 359 360 in_score(score_src, score, NULL, &src->sin_addr, 361 alt_ifa->ifa_preference, idx, &dst->sin_addr); 362 363 #ifdef GETIFA_DEBUG 364 if (in_selsrc_debug) { 365 printf("%s: src %#" PRIx32 " score ", __func__, 366 ntohl(src->sin_addr.s_addr)); 367 in_score_println(score, scorelen); 368 } 369 #endif /* GETIFA_DEBUG */ 370 371 if (in_score_cmp(score, best_score, scorelen) > 0) { 372 (void)memcpy(best_score, score, sizeof(best_score)); 373 best_ifa = alt_ifa; 374 } 375 } 376 #ifdef GETIFA_DEBUG 377 if (in_selsrc_debug) { 378 printf("%s: choose src %#" PRIx32 " score ", __func__, 379 ntohl(IA_SIN(best_ifa)->sin_addr.s_addr)); 380 in_score_println(best_score, scorelen); 381 } 382 #endif /* GETIFA_DEBUG */ 383 384 best_ifa->ifa_seqno = &iss->iss_seqno; 385 return best_ifa; 386 } 387 388 static in_score_src_t 389 name_to_score_src(const char *name) 390 { 391 int i; 392 393 for (i = 0; score_src_names[i].sn_name != NULL; i++) { 394 if (strcmp(score_src_names[i].sn_name, name) == 0) 395 return score_src_names[i].sn_score_src; 396 } 397 return NULL; 398 } 399 400 static const char * 401 score_src_to_name(const in_score_src_t score_src) 402 { 403 int i; 404 for (i = 0; score_src_names[i].sn_name != NULL; i++) { 405 if (score_src == score_src_names[i].sn_score_src) 406 return score_src_names[i].sn_name; 407 } 408 return "<unknown>"; 409 } 410 411 static size_t 412 in_get_selectsrc(const struct in_ifselsrc *iss, char *buf0, 413 const size_t buflen0) 414 { 415 int i, rc; 416 char *buf = buf0; 417 const char *delim; 418 size_t buflen = buflen0; 419 420 KASSERT(buflen >= 1); 421 422 for (delim = "", i = 0; 423 i < IN_SCORE_SRC_MAX && iss->iss_score_src[i] != NULL; 424 delim = ",", i++) { 425 rc = snprintf(buf, buflen, "%s%s", 426 delim, score_src_to_name(iss->iss_score_src[i])); 427 if (rc == -1) 428 return buflen0 - buflen; 429 if (rc >= buflen) 430 return buflen0 + rc - buflen; 431 buf += rc; 432 buflen -= rc; 433 } 434 if (buf == buf0) 435 *buf++ = '\0'; 436 return buf - buf0; 437 } 438 439 static int 440 in_set_selectsrc(struct in_ifselsrc *iss, char *buf) 441 { 442 int i, s; 443 char *next = buf; 444 const char *name; 445 in_score_src_t score_src; 446 in_score_src_t scorers[IN_SCORE_SRC_MAX]; 447 448 memset(&scorers, 0, sizeof(scorers)); 449 for (i = 0; 450 (name = strsep(&next, ",")) != NULL && i < IN_SCORE_SRC_MAX; 451 i++) { 452 if (strcmp(name, "") == 0) 453 break; 454 if ((score_src = name_to_score_src(name)) == NULL) 455 return EINVAL; 456 scorers[i] = score_src; 457 } 458 if (i == IN_SCORE_SRC_MAX && name != NULL) 459 return EFBIG; 460 s = splnet(); 461 (void)memcpy(iss->iss_score_src, scorers, sizeof(iss->iss_score_src)); 462 /* If iss affects a specific interface that used to use 463 * the default policy, increase the sequence number on the 464 * default policy, forcing routes that cache a source 465 * (rt_ifa) found by the default policy to refresh their 466 * cache. 467 */ 468 if (iss != &default_iss && iss->iss_score_src[0] == NULL && 469 scorers[0] != NULL) 470 default_iss.iss_seqno++; 471 iss->iss_seqno++; 472 splx(s); 473 return 0; 474 } 475 476 /* 477 * sysctl helper routine for net.inet.ip.interfaces.<interface>.selectsrc. 478 * Pulls the old value out as a human-readable string, interprets 479 * and records the new value. 480 */ 481 static int 482 in_sysctl_selectsrc(SYSCTLFN_ARGS) 483 { 484 char policy[IN_SELECTSRC_LEN]; 485 int error; 486 struct sysctlnode node; 487 struct in_ifselsrc *iss; 488 489 node = *rnode; 490 iss = (struct in_ifselsrc *)node.sysctl_data; 491 if (oldp != NULL && 492 (error = in_get_selectsrc(iss, policy, sizeof(policy))) >= sizeof(policy)) 493 return error; 494 node.sysctl_data = &policy[0]; 495 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 496 if (error || newp == NULL) 497 return (error); 498 499 return in_set_selectsrc(iss, policy); 500 } 501 502 static const struct sysctlnode * 503 in_domifattach_sysctl(struct in_ifsysctl *isc) 504 { 505 int rc; 506 const struct sysctlnode *rnode; 507 508 if ((rc = sysctl_createv(&isc->isc_log, 0, NULL, &rnode, 509 CTLFLAG_READWRITE, CTLTYPE_NODE, 510 "interfaces", NULL, 511 NULL, 0, NULL, 0, 512 CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE, 513 CTL_EOL)) != 0) { 514 printf("%s: could not create net.inet.ip.interfaces, rc = %d\n", 515 __func__, rc); 516 return NULL; 517 } 518 if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode, 519 CTLFLAG_READWRITE, CTLTYPE_NODE, 520 isc->isc_ifp->if_xname, 521 SYSCTL_DESCR("interface ip options"), 522 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL)) != 0) { 523 printf("%s: could not create net.inet.ip.interfaces.%s, " 524 "rc = %d\n", __func__, isc->isc_ifp->if_xname, rc); 525 goto err; 526 } 527 if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode, 528 CTLFLAG_READWRITE, CTLTYPE_STRING, 529 "selectsrc", 530 SYSCTL_DESCR("source selection policy"), 531 in_sysctl_selectsrc, 0, 532 isc->isc_selsrc, IN_SELECTSRC_LEN, 533 CTL_CREATE, CTL_EOL)) != 0) { 534 printf( 535 "%s: could not create net.inet.ip.%s.selectsrc, rc = %d\n", 536 __func__, isc->isc_ifp->if_xname, rc); 537 goto err; 538 } 539 return rnode; 540 err: 541 sysctl_teardown(&isc->isc_log); 542 return NULL; 543 } 544 545 void * 546 in_domifattach(struct ifnet *ifp) 547 { 548 struct in_ifsysctl *isc; 549 struct in_ifselsrc *iss; 550 551 isc = (struct in_ifsysctl *)malloc(sizeof(*isc), M_IFADDR, 552 M_WAITOK | M_ZERO); 553 554 iss = (struct in_ifselsrc *)malloc(sizeof(*iss), M_IFADDR, 555 M_WAITOK | M_ZERO); 556 557 memcpy(&iss->iss_score_src[0], &initial_iss.iss_score_src[0], 558 MIN(sizeof(iss->iss_score_src), sizeof(initial_iss.iss_score_src))); 559 560 isc->isc_ifp = ifp; 561 isc->isc_selsrc = iss; 562 563 if (in_domifattach_sysctl(isc) == NULL) 564 goto err; 565 566 return isc; 567 err: 568 free(iss, M_IFADDR); 569 free(isc, M_IFADDR); 570 return NULL; 571 } 572 573 void 574 in_domifdetach(struct ifnet *ifp, void *aux) 575 { 576 struct in_ifsysctl *isc; 577 struct in_ifselsrc *iss; 578 579 if (aux == NULL) 580 return; 581 isc = (struct in_ifsysctl *)aux; 582 iss = isc->isc_selsrc; 583 sysctl_teardown(&isc->isc_log); 584 free(isc, M_IFADDR); 585 free(iss, M_IFADDR); 586 } 587 #endif /* INET */ 588