1 /* 2 * domain name resolvers, see rfcs 1035 and 1123 3 */ 4 #include <u.h> 5 #include <libc.h> 6 #include <ip.h> 7 #include <bio.h> 8 #include <ndb.h> 9 #include "dns.h" 10 11 #define NS2MS(ns) ((ns) / 1000000L) 12 #define S2MS(s) ((s) * 1000) 13 14 typedef struct Dest Dest; 15 typedef struct Ipaddr Ipaddr; 16 typedef struct Query Query; 17 18 enum 19 { 20 Udp, Tcp, 21 Maxdest= 24, /* maximum destinations for a request message */ 22 Maxtrans= 3, /* maximum transmissions to a server */ 23 Destmagic= 0xcafebabe, 24 Querymagic= 0xdeadbeef, 25 }; 26 enum { Hurry, Patient, }; 27 enum { Outns, Inns, }; 28 enum { Remntretry = 15, }; /* min. sec.s between remount attempts */ 29 30 struct Ipaddr { 31 Ipaddr *next; 32 uchar ip[IPaddrlen]; 33 }; 34 35 struct Dest 36 { 37 uchar a[IPaddrlen]; /* ip address */ 38 DN *s; /* name server */ 39 int nx; /* number of transmissions */ 40 int code; /* response code; used to clear dp->respcode */ 41 42 ulong magic; 43 }; 44 45 struct Query { 46 DN *dp; /* domain */ 47 int type; /* and type to look up */ 48 Request *req; 49 RR *nsrp; /* name servers to consult */ 50 51 /* dest must not be on the stack due to forking in slave() */ 52 Dest *dest; /* array of destinations */ 53 Dest *curdest; /* pointer to one of them */ 54 int ndest; 55 56 int udpfd; /* can be shared by all udp users */ 57 58 QLock tcplock; /* only one tcp call at a time per query */ 59 int tcpset; 60 int tcpfd; /* if Tcp, read replies from here */ 61 int tcpctlfd; 62 uchar tcpip[IPaddrlen]; 63 64 ulong magic; 65 }; 66 67 /* estimated % probability of such a record existing at all */ 68 int likely[] = { 69 [Ta] 95, 70 [Taaaa] 10, 71 [Tcname] 15, 72 [Tmx] 60, 73 [Tns] 90, 74 [Tnull] 5, 75 [Tptr] 35, 76 [Tsoa] 90, 77 [Tsrv] 60, 78 [Ttxt] 15, 79 [Tall] 95, 80 }; 81 82 static RR* dnresolve1(char*, int, int, Request*, int, int); 83 static int netquery(Query *, int); 84 85 /* 86 * reading /proc/pid/args yields either "name" or "name [display args]", 87 * so return only display args, if any. 88 */ 89 static char * 90 procgetname(void) 91 { 92 int fd, n; 93 char *lp, *rp; 94 char buf[256]; 95 96 snprint(buf, sizeof buf, "#p/%d/args", getpid()); 97 if((fd = open(buf, OREAD)) < 0) 98 return strdup(""); 99 *buf = '\0'; 100 n = read(fd, buf, sizeof buf-1); 101 close(fd); 102 if (n >= 0) 103 buf[n] = '\0'; 104 if ((lp = strchr(buf, '[')) == nil || 105 (rp = strrchr(buf, ']')) == nil) 106 return strdup(""); 107 *rp = '\0'; 108 return strdup(lp+1); 109 } 110 111 /* 112 * lookup 'type' info for domain name 'name'. If it doesn't exist, try 113 * looking it up as a canonical name. 114 */ 115 RR* 116 dnresolve(char *name, int class, int type, Request *req, RR **cn, int depth, 117 int recurse, int rooted, int *status) 118 { 119 RR *rp, *nrp, *drp; 120 DN *dp; 121 int loops; 122 char *procname; 123 char nname[Domlen]; 124 125 if(status) 126 *status = 0; 127 128 if(depth > 12) /* in a recursive loop? */ 129 return nil; 130 131 procname = procgetname(); 132 /* 133 * hack for systems that don't have resolve search 134 * lists. Just look up the simple name in the database. 135 */ 136 if(!rooted && strchr(name, '.') == 0){ 137 rp = nil; 138 drp = domainlist(class); 139 for(nrp = drp; rp == nil && nrp != nil; nrp = nrp->next){ 140 snprint(nname, sizeof nname, "%s.%s", name, 141 nrp->ptr->name); 142 rp = dnresolve(nname, class, type, req, cn, depth+1, 143 recurse, rooted, status); 144 rrfreelist(rrremneg(&rp)); 145 } 146 if(drp != nil) 147 rrfree(drp); 148 procsetname(procname); 149 free(procname); 150 return rp; 151 } 152 153 /* 154 * try the name directly 155 */ 156 rp = dnresolve1(name, class, type, req, depth, recurse); 157 if(rp) { 158 procsetname(procname); 159 free(procname); 160 return randomize(rp); 161 } 162 163 /* try it as a canonical name if we weren't told the name didn't exist */ 164 dp = dnlookup(name, class, 0); 165 if(type != Tptr && dp->respcode != Rname) 166 for(loops = 0; rp == nil && loops < 32; loops++){ 167 rp = dnresolve1(name, class, Tcname, req, depth, recurse); 168 if(rp == nil) 169 break; 170 171 if(rp->negative){ 172 rrfreelist(rp); 173 rp = nil; 174 break; 175 } 176 177 name = rp->host->name; 178 if(cn) 179 rrcat(cn, rp); 180 else 181 rrfreelist(rp); 182 183 rp = dnresolve1(name, class, type, req, depth, recurse); 184 } 185 186 /* distinction between not found and not good */ 187 if(rp == nil && status != nil && dp->respcode != 0) 188 *status = dp->respcode; 189 190 procsetname(procname); 191 free(procname); 192 return randomize(rp); 193 } 194 195 static void 196 queryinit(Query *qp, DN *dp, int type, Request *req) 197 { 198 memset(qp, 0, sizeof *qp); 199 qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1; 200 qp->dp = dp; 201 qp->type = type; 202 qp->req = req; 203 qp->nsrp = nil; 204 qp->dest = qp->curdest = nil; 205 qp->magic = Querymagic; 206 } 207 208 static void 209 queryck(Query *qp) 210 { 211 assert(qp); 212 assert(qp->magic == Querymagic); 213 } 214 215 static void 216 querydestroy(Query *qp) 217 { 218 queryck(qp); 219 /* leave udpfd alone */ 220 if (qp->tcpfd > 0) 221 close(qp->tcpfd); 222 if (qp->tcpctlfd > 0) { 223 hangup(qp->tcpctlfd); 224 close(qp->tcpctlfd); 225 } 226 free(qp->dest); 227 memset(qp, 0, sizeof *qp); /* prevent accidents */ 228 qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1; 229 } 230 231 static void 232 destinit(Dest *p) 233 { 234 memset(p, 0, sizeof *p); 235 p->magic = Destmagic; 236 } 237 238 static void 239 destck(Dest *p) 240 { 241 assert(p); 242 assert(p->magic == Destmagic); 243 } 244 245 static void 246 destdestroy(Dest *p) 247 { 248 USED(p); 249 } 250 251 /* 252 * if the response to a query hasn't arrived within 100 ms., 253 * it's unlikely to arrive at all. after 1 s., it's really unlikely. 254 * queries for missing RRs are likely to produce time-outs rather than 255 * negative responses, so cname and aaaa queries are likely to time out, 256 * thus we don't wait very long for them. 257 */ 258 static void 259 notestats(vlong start, int tmout, int type) 260 { 261 qlock(&stats); 262 if (tmout) { 263 stats.tmout++; 264 if (type == Taaaa) 265 stats.tmoutv6++; 266 else if (type == Tcname) 267 stats.tmoutcname++; 268 } else { 269 long wait10ths = NS2MS(nsec() - start) / 100; 270 271 if (wait10ths <= 0) 272 stats.under10ths[0]++; 273 else if (wait10ths >= nelem(stats.under10ths)) 274 stats.under10ths[nelem(stats.under10ths) - 1]++; 275 else 276 stats.under10ths[wait10ths]++; 277 } 278 qunlock(&stats); 279 } 280 281 static void 282 noteinmem(void) 283 { 284 qlock(&stats); 285 stats.answinmem++; 286 qunlock(&stats); 287 } 288 289 static RR* 290 dnresolve1(char *name, int class, int type, Request *req, int depth, 291 int recurse) 292 { 293 DN *dp, *nsdp; 294 RR *rp, *nsrp, *dbnsrp; 295 char *cp; 296 Query query; 297 298 if(debug) 299 dnslog("[%d] dnresolve1 %s %d %d", getpid(), name, type, class); 300 301 /* only class Cin implemented so far */ 302 if(class != Cin) 303 return nil; 304 305 dp = dnlookup(name, class, 1); 306 307 /* 308 * Try the cache first 309 */ 310 rp = rrlookup(dp, type, OKneg); 311 if(rp) 312 if(rp->db){ 313 /* unauthoritative db entries are hints */ 314 if(rp->auth) { 315 noteinmem(); 316 return rp; 317 } 318 } else 319 /* cached entry must still be valid */ 320 if(rp->ttl > now) 321 /* but Tall entries are special */ 322 if(type != Tall || rp->query == Tall) { 323 noteinmem(); 324 return rp; 325 } 326 327 rrfreelist(rp); 328 329 /* 330 * try the cache for a canonical name. if found punt 331 * since we'll find it during the canonical name search 332 * in dnresolve(). 333 */ 334 if(type != Tcname){ 335 rp = rrlookup(dp, Tcname, NOneg); 336 rrfreelist(rp); 337 if(rp) 338 return nil; 339 } 340 341 queryinit(&query, dp, type, req); 342 343 /* 344 * if we're running as just a resolver, query our 345 * designated name servers 346 */ 347 if(cfg.resolver){ 348 nsrp = randomize(getdnsservers(class)); 349 if(nsrp != nil) { 350 query.nsrp = nsrp; 351 if(netquery(&query, depth+1)){ 352 rrfreelist(nsrp); 353 querydestroy(&query); 354 return rrlookup(dp, type, OKneg); 355 } 356 rrfreelist(nsrp); 357 } 358 } 359 360 /* 361 * walk up the domain name looking for 362 * a name server for the domain. 363 */ 364 for(cp = name; cp; cp = walkup(cp)){ 365 /* 366 * if this is a local (served by us) domain, 367 * return answer 368 */ 369 dbnsrp = randomize(dblookup(cp, class, Tns, 0, 0)); 370 if(dbnsrp && dbnsrp->local){ 371 rp = dblookup(name, class, type, 1, dbnsrp->ttl); 372 rrfreelist(dbnsrp); 373 querydestroy(&query); 374 return rp; 375 } 376 377 /* 378 * if recursion isn't set, just accept local 379 * entries 380 */ 381 if(recurse == Dontrecurse){ 382 if(dbnsrp) 383 rrfreelist(dbnsrp); 384 continue; 385 } 386 387 /* look for ns in cache */ 388 nsdp = dnlookup(cp, class, 0); 389 nsrp = nil; 390 if(nsdp) 391 nsrp = randomize(rrlookup(nsdp, Tns, NOneg)); 392 393 /* if the entry timed out, ignore it */ 394 if(nsrp && nsrp->ttl < now){ 395 rrfreelist(nsrp); 396 nsrp = nil; 397 } 398 399 if(nsrp){ 400 rrfreelist(dbnsrp); 401 402 /* query the name servers found in cache */ 403 query.nsrp = nsrp; 404 if(netquery(&query, depth+1)){ 405 rrfreelist(nsrp); 406 querydestroy(&query); 407 return rrlookup(dp, type, OKneg); 408 } 409 rrfreelist(nsrp); 410 continue; 411 } 412 413 /* use ns from db */ 414 if(dbnsrp){ 415 /* try the name servers found in db */ 416 query.nsrp = dbnsrp; 417 if(netquery(&query, depth+1)){ 418 /* we got an answer */ 419 rrfreelist(dbnsrp); 420 querydestroy(&query); 421 return rrlookup(dp, type, NOneg); 422 } 423 rrfreelist(dbnsrp); 424 } 425 } 426 querydestroy(&query); 427 428 /* settle for a non-authoritative answer */ 429 rp = rrlookup(dp, type, OKneg); 430 if(rp) 431 return rp; 432 433 /* noone answered. try the database, we might have a chance. */ 434 return dblookup(name, class, type, 0, 0); 435 } 436 437 /* 438 * walk a domain name one element to the right. 439 * return a pointer to that element. 440 * in other words, return a pointer to the parent domain name. 441 */ 442 char* 443 walkup(char *name) 444 { 445 char *cp; 446 447 cp = strchr(name, '.'); 448 if(cp) 449 return cp+1; 450 else if(*name) 451 return ""; 452 else 453 return 0; 454 } 455 456 /* 457 * Get a udp port for sending requests and reading replies. Put the port 458 * into "headers" mode. 459 */ 460 static char *hmsg = "headers"; 461 462 int 463 udpport(char *mtpt) 464 { 465 int fd, ctl; 466 char ds[64], adir[64]; 467 468 /* get a udp port */ 469 snprint(ds, sizeof ds, "%s/udp!*!0", (mtpt? mtpt: "/net")); 470 ctl = announce(ds, adir); 471 if(ctl < 0){ 472 /* warning("can't get udp port"); */ 473 return -1; 474 } 475 476 /* turn on header style interface */ 477 if(write(ctl, hmsg, strlen(hmsg)) , 0){ 478 close(ctl); 479 warning(hmsg); 480 return -1; 481 } 482 483 /* grab the data file */ 484 snprint(ds, sizeof ds, "%s/data", adir); 485 fd = open(ds, ORDWR); 486 close(ctl); 487 if(fd < 0) 488 warning("can't open udp port %s: %r", ds); 489 return fd; 490 } 491 492 /* generate a DNS UDP query packet */ 493 int 494 mkreq(DN *dp, int type, uchar *buf, int flags, ushort reqno) 495 { 496 DNSmsg m; 497 int len; 498 Udphdr *uh = (Udphdr*)buf; 499 500 /* stuff port number into output buffer */ 501 memset(uh, 0, sizeof *uh); 502 hnputs(uh->rport, 53); 503 504 /* make request and convert it to output format */ 505 memset(&m, 0, sizeof m); 506 m.flags = flags; 507 m.id = reqno; 508 m.qd = rralloc(type); 509 m.qd->owner = dp; 510 m.qd->type = type; 511 len = convDNS2M(&m, &buf[Udphdrsize], Maxudp); 512 rrfree(m.qd); 513 return len; 514 } 515 516 /* for alarms in readreply */ 517 static void 518 ding(void *x, char *msg) 519 { 520 USED(x); 521 if(strcmp(msg, "alarm") == 0) 522 noted(NCONT); 523 else 524 noted(NDFLT); 525 } 526 527 void 528 freeanswers(DNSmsg *mp) 529 { 530 rrfreelist(mp->qd); 531 rrfreelist(mp->an); 532 rrfreelist(mp->ns); 533 rrfreelist(mp->ar); 534 mp->qd = mp->an = mp->ns = mp->ar = nil; 535 } 536 537 /* sets srcip */ 538 static int 539 readnet(Query *qp, int medium, uchar *ibuf, ulong endtime, uchar **replyp, 540 uchar *srcip) 541 { 542 int len, fd; 543 long ms; 544 vlong startns = nsec(); 545 uchar *reply; 546 uchar lenbuf[2]; 547 548 /* timed read of reply */ 549 ms = S2MS(endtime) - NS2MS(startns); 550 if (ms < 2000) 551 ms = 2000; /* give the remote ns a fighting chance */ 552 reply = ibuf; 553 len = -1; /* pessimism */ 554 memset(srcip, 0, IPaddrlen); 555 if (medium == Udp) 556 if (qp->udpfd <= 0) 557 dnslog("readnet: qp->udpfd closed"); 558 else { 559 alarm(ms); 560 len = read(qp->udpfd, ibuf, Udphdrsize+Maxudpin); 561 alarm(0); 562 notestats(startns, len < 0, qp->type); 563 if (len >= IPaddrlen) 564 memmove(srcip, ibuf, IPaddrlen); 565 if (len >= Udphdrsize) { 566 len -= Udphdrsize; 567 reply += Udphdrsize; 568 } 569 } 570 else { 571 if (!qp->tcpset) 572 dnslog("readnet: tcp params not set"); 573 alarm(ms); 574 fd = qp->tcpfd; 575 if (fd <= 0) 576 dnslog("readnet: %s: tcp fd unset for dest %I", 577 qp->dp->name, qp->tcpip); 578 else if (readn(fd, lenbuf, 2) != 2) { 579 dnslog("readnet: short read of tcp size from %I", 580 qp->tcpip); 581 /* probably a time-out */ 582 notestats(startns, 1, qp->type); 583 } else { 584 len = lenbuf[0]<<8 | lenbuf[1]; 585 if (readn(fd, ibuf, len) != len) { 586 dnslog("readnet: short read of tcp data from %I", 587 qp->tcpip); 588 /* probably a time-out */ 589 notestats(startns, 1, qp->type); 590 len = -1; 591 } 592 } 593 alarm(0); 594 memmove(srcip, qp->tcpip, IPaddrlen); 595 } 596 *replyp = reply; 597 return len; 598 } 599 600 /* 601 * read replies to a request and remember the rrs in the answer(s). 602 * ignore any of the wrong type. 603 * wait at most until endtime. 604 */ 605 static int 606 readreply(Query *qp, int medium, ushort req, uchar *ibuf, DNSmsg *mp, 607 ulong endtime) 608 { 609 int len, rv; 610 char *err; 611 char tbuf[32]; 612 uchar *reply; 613 uchar srcip[IPaddrlen]; 614 RR *rp; 615 616 notify(ding); 617 618 queryck(qp); 619 rv = 0; 620 memset(mp, 0, sizeof *mp); 621 if (time(nil) >= endtime) 622 return -1; /* timed out before we started */ 623 624 memset(srcip, 0, sizeof srcip); 625 if (0) 626 len = -1; 627 for (; time(nil) < endtime && 628 (len = readnet(qp, medium, ibuf, endtime, &reply, srcip)) >= 0; 629 freeanswers(mp)){ 630 /* convert into internal format */ 631 memset(mp, 0, sizeof *mp); 632 err = convM2DNS(reply, len, mp, nil); 633 if (mp->flags & Ftrunc) { 634 free(err); 635 freeanswers(mp); 636 /* notify our caller to retry the query via tcp. */ 637 return -1; 638 } else if(err){ 639 dnslog("readreply: %s: input err, len %d: %s: %I", 640 qp->dp->name, len, err, srcip); 641 free(err); 642 continue; 643 } 644 if(debug) 645 logreply(qp->req->id, srcip, mp); 646 647 /* answering the right question? */ 648 if(mp->id != req) 649 dnslog("%d: id %d instead of %d: %I", qp->req->id, 650 mp->id, req, srcip); 651 else if(mp->qd == 0) 652 dnslog("%d: no question RR: %I", qp->req->id, srcip); 653 else if(mp->qd->owner != qp->dp) 654 dnslog("%d: owner %s instead of %s: %I", qp->req->id, 655 mp->qd->owner->name, qp->dp->name, srcip); 656 else if(mp->qd->type != qp->type) 657 dnslog("%d: qp->type %d instead of %d: %I", 658 qp->req->id, mp->qd->type, qp->type, srcip); 659 else { 660 /* remember what request this is in answer to */ 661 for(rp = mp->an; rp; rp = rp->next) 662 rp->query = qp->type; 663 return rv; 664 } 665 } 666 if (time(nil) >= endtime) { 667 ; /* query expired */ 668 } else if (0) { 669 /* this happens routinely when a read times out */ 670 dnslog("readreply: %s type %s: ns %I read error or eof " 671 "(returned %d): %r", qp->dp->name, rrname(qp->type, 672 tbuf, sizeof tbuf), srcip, len); 673 if (medium == Udp) 674 for (rp = qp->nsrp; rp != nil; rp = rp->next) 675 if (rp->type == Tns) 676 dnslog("readreply: %s: query sent to " 677 "ns %s", qp->dp->name, 678 rp->host->name); 679 } 680 return -1; 681 } 682 683 /* 684 * return non-0 if first list includes second list 685 */ 686 int 687 contains(RR *rp1, RR *rp2) 688 { 689 RR *trp1, *trp2; 690 691 for(trp2 = rp2; trp2; trp2 = trp2->next){ 692 for(trp1 = rp1; trp1; trp1 = trp1->next) 693 if(trp1->type == trp2->type) 694 if(trp1->host == trp2->host) 695 if(trp1->owner == trp2->owner) 696 break; 697 if(trp1 == nil) 698 return 0; 699 } 700 return 1; 701 } 702 703 704 /* 705 * return multicast version if any 706 */ 707 int 708 ipisbm(uchar *ip) 709 { 710 if(isv4(ip)){ 711 if (ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0 || 712 ipcmp(ip, IPv4bcast) == 0) 713 return 4; 714 } else 715 if(ip[0] == 0xff) 716 return 6; 717 return 0; 718 } 719 720 /* 721 * Get next server address 722 */ 723 static int 724 serveraddrs(Query *qp, int nd, int depth) 725 { 726 RR *rp, *arp, *trp; 727 Dest *cur; 728 729 if(nd >= Maxdest) 730 return 0; 731 732 /* 733 * look for a server whose address we already know. 734 * if we find one, mark it so we ignore this on 735 * subsequent passes. 736 */ 737 arp = 0; 738 for(rp = qp->nsrp; rp; rp = rp->next){ 739 assert(rp->magic == RRmagic); 740 if(rp->marker) 741 continue; 742 arp = rrlookup(rp->host, Ta, NOneg); 743 if(arp){ 744 rp->marker = 1; 745 break; 746 } 747 arp = dblookup(rp->host->name, Cin, Ta, 0, 0); 748 if(arp){ 749 rp->marker = 1; 750 break; 751 } 752 } 753 754 /* 755 * if the cache and database lookup didn't find any new 756 * server addresses, try resolving one via the network. 757 * Mark any we try to resolve so we don't try a second time. 758 */ 759 if(arp == 0) 760 for(rp = qp->nsrp; rp; rp = rp->next){ 761 if(rp->marker) 762 continue; 763 rp->marker = 1; 764 765 /* 766 * avoid loops looking up a server under itself 767 */ 768 if(subsume(rp->owner->name, rp->host->name)) 769 continue; 770 771 arp = dnresolve(rp->host->name, Cin, Ta, qp->req, 0, 772 depth+1, Recurse, 1, 0); 773 rrfreelist(rrremneg(&arp)); 774 if(arp) 775 break; 776 } 777 778 /* use any addresses that we found */ 779 for(trp = arp; trp && nd < Maxdest; trp = trp->next){ 780 cur = &qp->dest[nd]; 781 parseip(cur->a, trp->ip->name); 782 /* 783 * straddling servers can reject all nameservers if they are all 784 * inside, so be sure to list at least one outside ns at 785 * the end of the ns list in /lib/ndb for `dom='. 786 */ 787 if (ipisbm(cur->a) || 788 cfg.straddle && !insideaddr(qp->dp->name) && insidens(cur->a)) 789 continue; 790 cur->nx = 0; 791 cur->s = trp->owner; 792 cur->code = Rtimeout; 793 nd++; 794 } 795 rrfreelist(arp); 796 return nd; 797 } 798 799 /* 800 * cache negative responses 801 */ 802 static void 803 cacheneg(DN *dp, int type, int rcode, RR *soarr) 804 { 805 RR *rp; 806 DN *soaowner; 807 ulong ttl; 808 809 /* no cache time specified, don't make anything up */ 810 if(soarr != nil){ 811 if(soarr->next != nil){ 812 rrfreelist(soarr->next); 813 soarr->next = nil; 814 } 815 soaowner = soarr->owner; 816 } else 817 soaowner = nil; 818 819 /* the attach can cause soarr to be freed so mine it now */ 820 if(soarr != nil && soarr->soa != nil) 821 ttl = soarr->soa->minttl+now; 822 else 823 ttl = 5*Min; 824 825 /* add soa and negative RR to the database */ 826 rrattach(soarr, 1); 827 828 rp = rralloc(type); 829 rp->owner = dp; 830 rp->negative = 1; 831 rp->negsoaowner = soaowner; 832 rp->negrcode = rcode; 833 rp->ttl = ttl; 834 rrattach(rp, 1); 835 } 836 837 static int 838 setdestoutns(Dest *p, int n) 839 { 840 uchar *outns = outsidens(n); 841 842 destck(p); 843 destinit(p); 844 if (outns == nil) { 845 if (n == 0) 846 dnslog("[%d] no outside-ns in ndb", getpid()); 847 return -1; 848 } 849 memmove(p->a, outns, sizeof p->a); 850 p->s = dnlookup("outside-ns-ips", Cin, 1); 851 return 0; 852 } 853 854 /* 855 * issue query via UDP or TCP as appropriate. 856 * for TCP, returns with qp->tcpip set from udppkt header. 857 */ 858 static int 859 mydnsquery(Query *qp, int medium, uchar *udppkt, int len) 860 { 861 int rv = -1, nfd; 862 char *domain; 863 char conndir[40]; 864 uchar belen[2]; 865 NetConnInfo *nci; 866 867 queryck(qp); 868 switch (medium) { 869 case Udp: 870 nfd = dup(qp->udpfd, -1); 871 if (nfd < 0) { 872 warning("mydnsquery: qp->udpfd %d: %r", qp->udpfd); 873 close(qp->udpfd); /* ensure it's closed */ 874 qp->udpfd = -1; /* poison it */ 875 return rv; 876 } 877 close(nfd); 878 879 if (qp->udpfd <= 0) 880 dnslog("mydnsquery: qp->udpfd %d closed", qp->udpfd); 881 else { 882 if (write(qp->udpfd, udppkt, len+Udphdrsize) != 883 len+Udphdrsize) 884 warning("sending udp msg: %r"); 885 else { 886 stats.qsent++; 887 rv = 0; 888 } 889 } 890 break; 891 case Tcp: 892 /* send via TCP & keep fd around for reply */ 893 domain = smprint("%I", udppkt); 894 alarm(10*1000); 895 qp->tcpfd = rv = dial(netmkaddr(domain, "tcp", "dns"), nil, 896 conndir, &qp->tcpctlfd); 897 alarm(0); 898 if (qp->tcpfd < 0) { 899 dnslog("can't dial tcp!%s!dns: %r", domain); 900 free(domain); 901 break; 902 } 903 free(domain); 904 nci = getnetconninfo(conndir, qp->tcpfd); 905 if (nci) { 906 parseip(qp->tcpip, nci->rsys); 907 freenetconninfo(nci); 908 } else 909 dnslog("mydnsquery: getnetconninfo failed"); 910 qp->tcpset = 1; 911 912 belen[0] = len >> 8; 913 belen[1] = len; 914 if (write(qp->tcpfd, belen, 2) != 2 || 915 write(qp->tcpfd, udppkt + Udphdrsize, len) != len) 916 warning("sending tcp msg: %r"); 917 break; 918 default: 919 sysfatal("mydnsquery: bad medium"); 920 } 921 return rv; 922 } 923 924 /* 925 * send query to all UDP destinations or one TCP destination, 926 * taken from obuf (udp packet) header 927 */ 928 static int 929 xmitquery(Query *qp, int medium, int depth, uchar *obuf, int inns, int len) 930 { 931 int j, n; 932 char buf[32]; 933 Dest *p; 934 935 queryck(qp); 936 if(time(nil) >= qp->req->aborttime) 937 return -1; 938 939 /* 940 * get a nameserver address if we need one. 941 * serveraddrs populates qp->dest. 942 */ 943 p = qp->dest; 944 destck(p); 945 if (qp->ndest < 0 || qp->ndest > Maxdest) 946 dnslog("qp->ndest %d out of range", qp->ndest); 947 if (qp->ndest > qp->curdest - p) 948 qp->curdest = &qp->dest[serveraddrs(qp, qp->curdest - p, depth)]; 949 destck(qp->curdest); 950 951 /* no servers, punt */ 952 if (qp->curdest == qp->dest) 953 if (cfg.straddle && cfg.inside) { 954 /* get ips of "outside-ns-ips" */ 955 p = qp->curdest = qp->dest; 956 for(n = 0; n < Maxdest; n++, qp->curdest++) 957 if (setdestoutns(qp->curdest, n) < 0) 958 break; 959 } else { 960 /* it's probably just a bogus domain, don't log it */ 961 // dnslog("xmitquery: %s: no nameservers", qp->dp->name); 962 return -1; 963 } 964 965 /* send to first 'qp->ndest' destinations */ 966 j = 0; 967 if (medium == Tcp) { 968 j++; 969 queryck(qp); 970 assert(qp->dp); 971 procsetname("tcp %sside query for %s %s", (inns? "in": "out"), 972 qp->dp->name, rrname(qp->type, buf, sizeof buf)); 973 mydnsquery(qp, medium, obuf, len); /* sets qp->tcpip from obuf */ 974 if(debug) 975 logsend(qp->req->id, depth, qp->tcpip, "", qp->dp->name, 976 qp->type); 977 } else 978 for(; p < &qp->dest[qp->ndest] && p < qp->curdest; p++){ 979 /* skip destinations we've finished with */ 980 if(p->nx >= Maxtrans) 981 continue; 982 983 j++; 984 985 /* exponential backoff of requests */ 986 if((1<<p->nx) > qp->ndest) 987 continue; 988 989 procsetname("udp %sside query to %I/%s %s %s", 990 (inns? "in": "out"), p->a, p->s->name, 991 qp->dp->name, rrname(qp->type, buf, sizeof buf)); 992 if(debug) 993 logsend(qp->req->id, depth, p->a, p->s->name, 994 qp->dp->name, qp->type); 995 996 /* fill in UDP destination addr & send it */ 997 memmove(obuf, p->a, sizeof p->a); 998 mydnsquery(qp, medium, obuf, len); 999 p->nx++; 1000 } 1001 if(j == 0) { 1002 // dnslog("xmitquery: %s: no destinations left", qp->dp->name); 1003 return -1; 1004 } 1005 return 0; 1006 } 1007 1008 static int 1009 procansw(Query *qp, DNSmsg *mp, uchar *srcip, int depth, Dest *p) 1010 { 1011 int rv; 1012 char buf[32]; 1013 DN *ndp; 1014 Query nquery; 1015 RR *tp, *soarr; 1016 1017 /* ignore any error replies */ 1018 if((mp->flags & Rmask) == Rserver){ 1019 freeanswers(mp); 1020 if(p != qp->curdest) 1021 p->code = Rserver; 1022 return -1; 1023 } 1024 1025 /* ignore any bad delegations */ 1026 if(mp->ns && baddelegation(mp->ns, qp->nsrp, srcip)){ 1027 if(mp->an == nil){ 1028 freeanswers(mp); 1029 if(p != qp->curdest) 1030 p->code = Rserver; 1031 return -1; 1032 } 1033 rrfreelist(mp->ns); 1034 mp->ns = nil; 1035 } 1036 1037 /* remove any soa's from the authority section */ 1038 soarr = rrremtype(&mp->ns, Tsoa); 1039 1040 /* incorporate answers */ 1041 if(mp->an) 1042 rrattach(mp->an, (mp->flags & Fauth) != 0); 1043 if(mp->ar) 1044 rrattach(mp->ar, 0); 1045 if(mp->ns && !cfg.justforw){ 1046 ndp = mp->ns->owner; 1047 rrattach(mp->ns, 0); 1048 } else 1049 ndp = nil; 1050 1051 /* free the question */ 1052 if(mp->qd) { 1053 rrfreelist(mp->qd); 1054 mp->qd = nil; 1055 } 1056 1057 /* 1058 * Any reply from an authoritative server, 1059 * or a positive reply terminates the search. 1060 * A negative response now also terminates the search. 1061 */ 1062 if(mp->an != nil || (mp->flags & Fauth)){ 1063 if(mp->an == nil && (mp->flags & Rmask) == Rname) 1064 qp->dp->respcode = Rname; 1065 else 1066 qp->dp->respcode = 0; 1067 1068 /* 1069 * cache any negative responses, free soarr. 1070 * negative responses need not be authoritative: 1071 * they can legitimately come from a cache. 1072 */ 1073 if( /* (mp->flags & Fauth) && */ mp->an == nil) 1074 cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr); 1075 else 1076 rrfreelist(soarr); 1077 return 1; 1078 } else if (mp->an == nil && (mp->flags & Rmask) == Rname) { 1079 qp->dp->respcode = Rname; 1080 /* 1081 * cache negative response. 1082 * negative responses need not be authoritative: 1083 * they can legitimately come from a cache. 1084 */ 1085 cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr); 1086 return 1; 1087 } 1088 rrfreelist(soarr); 1089 1090 /* 1091 * if we've been given better name servers, recurse. 1092 * we're called from udpquery, called from 1093 * netquery, which current holds qp->dp->querylck, 1094 * so release it now and acquire it upon return. 1095 * if we're a pure resolver, don't recurse, we have 1096 * to forward to a fixed set of named servers. 1097 */ 1098 if(!mp->ns || cfg.resolver && cfg.justforw) 1099 return 0; 1100 tp = rrlookup(ndp, Tns, NOneg); 1101 if(contains(qp->nsrp, tp)){ 1102 rrfreelist(tp); 1103 return 0; 1104 } 1105 procsetname("recursive query for %s %s", qp->dp->name, 1106 rrname(qp->type, buf, sizeof buf)); 1107 qunlock(&qp->dp->querylck); 1108 1109 queryinit(&nquery, qp->dp, qp->type, qp->req); 1110 nquery.nsrp = tp; 1111 rv = netquery(&nquery, depth+1); 1112 1113 qlock(&qp->dp->querylck); 1114 rrfreelist(tp); 1115 querydestroy(&nquery); 1116 return rv; 1117 } 1118 1119 /* 1120 * send a query via tcp to a single address (from ibuf's udp header) 1121 * and read the answer(s) into mp->an. 1122 */ 1123 static int 1124 tcpquery(Query *qp, DNSmsg *mp, int depth, uchar *ibuf, uchar *obuf, int len, 1125 int waitsecs, int inns, ushort req) 1126 { 1127 int rv = 0; 1128 ulong endtime; 1129 1130 endtime = time(nil) + waitsecs; 1131 if(endtime > qp->req->aborttime) 1132 endtime = qp->req->aborttime; 1133 1134 if (0) 1135 dnslog("%s: udp reply truncated; retrying query via tcp to %I", 1136 qp->dp->name, qp->tcpip); 1137 1138 qlock(&qp->tcplock); 1139 memmove(obuf, ibuf, IPaddrlen); /* send back to respondent */ 1140 /* sets qp->tcpip from obuf's udp header */ 1141 if (xmitquery(qp, Tcp, depth, obuf, inns, len) < 0 || 1142 readreply(qp, Tcp, req, ibuf, mp, endtime) < 0) 1143 rv = -1; 1144 if (qp->tcpfd > 0) { 1145 hangup(qp->tcpctlfd); 1146 close(qp->tcpctlfd); 1147 close(qp->tcpfd); 1148 } 1149 qp->tcpfd = qp->tcpctlfd = -1; 1150 qunlock(&qp->tcplock); 1151 return rv; 1152 } 1153 1154 /* 1155 * query name servers. If the name server returns a pointer to another 1156 * name server, recurse. 1157 */ 1158 static int 1159 netquery1(Query *qp, int depth, uchar *ibuf, uchar *obuf, int waitsecs, int inns) 1160 { 1161 int ndest, len, replywaits, rv; 1162 ushort req; 1163 ulong endtime; 1164 char buf[12]; 1165 uchar srcip[IPaddrlen]; 1166 Dest *p, *np, *dest; 1167 // Dest dest[Maxdest]; 1168 1169 /* pack request into a udp message */ 1170 req = rand(); 1171 len = mkreq(qp->dp, qp->type, obuf, Frecurse|Oquery, req); 1172 1173 /* no server addresses yet */ 1174 queryck(qp); 1175 dest = emalloc(Maxdest * sizeof *dest); /* dest can't be on stack */ 1176 for (p = dest; p < dest + Maxdest; p++) 1177 destinit(p); 1178 qp->curdest = qp->dest = dest; 1179 1180 /* 1181 * transmit udp requests and wait for answers. 1182 * at most Maxtrans attempts to each address. 1183 * each cycle send one more message than the previous. 1184 * retry a query via tcp if its response is truncated. 1185 */ 1186 for(ndest = 1; ndest < Maxdest; ndest++){ 1187 qp->ndest = ndest; 1188 qp->tcpset = 0; 1189 if (xmitquery(qp, Udp, depth, obuf, inns, len) < 0) 1190 break; 1191 1192 endtime = time(nil) + waitsecs; 1193 if(endtime > qp->req->aborttime) 1194 endtime = qp->req->aborttime; 1195 1196 for(replywaits = 0; replywaits < ndest; replywaits++){ 1197 DNSmsg m; 1198 1199 procsetname("reading %sside reply from %I for %s %s", 1200 (inns? "in": "out"), obuf, qp->dp->name, 1201 rrname(qp->type, buf, sizeof buf)); 1202 1203 /* read udp answer into m */ 1204 if (readreply(qp, Udp, req, ibuf, &m, endtime) >= 0) 1205 memmove(srcip, ibuf, IPaddrlen); 1206 else if (!(m.flags & Ftrunc)) { 1207 freeanswers(&m); 1208 break; /* timed out on this dest */ 1209 } else { 1210 /* whoops, it was truncated! ask again via tcp */ 1211 rv = tcpquery(qp, &m, depth, ibuf, obuf, len, 1212 waitsecs, inns, req); /* answer in m */ 1213 if (rv < 0) { 1214 freeanswers(&m); 1215 break; /* failed via tcp too */ 1216 } 1217 memmove(srcip, qp->tcpip, IPaddrlen); 1218 } 1219 1220 /* find responder */ 1221 // dnslog("netquery1 got reply from %I", srcip); 1222 for(p = qp->dest; p < qp->curdest; p++) 1223 if(memcmp(p->a, srcip, sizeof p->a) == 0) 1224 break; 1225 1226 /* remove all addrs of responding server from list */ 1227 for(np = qp->dest; np < qp->curdest; np++) 1228 if(np->s == p->s) 1229 p->nx = Maxtrans; 1230 1231 /* free or incorporate RRs in m */ 1232 rv = procansw(qp, &m, srcip, depth, p); 1233 if (rv > 0) 1234 return rv; 1235 } 1236 } 1237 1238 /* if all servers returned failure, propagate it */ 1239 qp->dp->respcode = Rserver; 1240 for(p = dest; p < qp->curdest; p++) { 1241 destck(p); 1242 if(p->code != Rserver) 1243 qp->dp->respcode = 0; 1244 p->magic = 0; /* prevent accidents */ 1245 } 1246 1247 // if (qp->dp->respcode) 1248 // dnslog("netquery1 setting Rserver for %s", qp->dp->name); 1249 1250 free(qp->dest); 1251 qp->dest = qp->curdest = nil; /* prevent accidents */ 1252 return 0; 1253 } 1254 1255 /* 1256 * run a command with a supplied fd as standard input 1257 */ 1258 char * 1259 system(int fd, char *cmd) 1260 { 1261 int pid, p, i; 1262 static Waitmsg msg; 1263 1264 if((pid = fork()) == -1) 1265 sysfatal("fork failed: %r"); 1266 else if(pid == 0){ 1267 dup(fd, 0); 1268 close(fd); 1269 for (i = 3; i < 200; i++) 1270 close(i); /* don't leak fds */ 1271 execl("/bin/rc", "rc", "-c", cmd, nil); 1272 sysfatal("exec rc: %r"); 1273 } 1274 for(p = waitpid(); p >= 0; p = waitpid()) 1275 if(p == pid) 1276 return msg.msg; 1277 return "lost child"; 1278 } 1279 1280 /* 1281 * in principle we could use a single descriptor for a udp port 1282 * to send all queries and receive all the answers to them, 1283 * but we'd have to sort out the answers by dns-query id. 1284 */ 1285 static int 1286 udpquery(Query *qp, char *mntpt, int depth, int patient, int inns) 1287 { 1288 int fd, rv, wait; 1289 long now; 1290 ulong pcntprob; 1291 char *msg; 1292 uchar *obuf, *ibuf; 1293 static QLock mntlck; 1294 static ulong lastmount; 1295 1296 /* use alloced buffers rather than ones from the stack */ 1297 // ibuf = emalloc(Maxudpin+Udphdrsize); 1298 ibuf = emalloc(64*1024); /* max. tcp reply size */ 1299 obuf = emalloc(Maxudp+Udphdrsize); 1300 1301 fd = udpport(mntpt); 1302 while (fd < 0 && cfg.straddle && strcmp(mntpt, "/net.alt") == 0) { 1303 /* HACK: remount /net.alt */ 1304 now = time(nil); 1305 if (now < lastmount + Remntretry) 1306 sleep((lastmount + Remntretry - now)*1000); 1307 qlock(&mntlck); 1308 fd = udpport(mntpt); /* try again under lock */ 1309 if (fd < 0) { 1310 dnslog("[%d] remounting /net.alt", getpid()); 1311 unmount(nil, "/net.alt"); 1312 1313 msg = system(open("/dev/null", ORDWR), "outside"); 1314 1315 lastmount = time(nil); 1316 if (msg && *msg) { 1317 dnslog("[%d] can't remount /net.alt: %s", 1318 getpid(), msg); 1319 sleep(10*1000); /* don't spin wildly */ 1320 } else 1321 fd = udpport(mntpt); 1322 } 1323 qunlock(&mntlck); 1324 } 1325 if (fd < 0) { 1326 dnslog("can't get udpport for %s query of name %s: %r", 1327 mntpt, qp->dp->name); 1328 sysfatal("out of udp conversations"); /* we're buggered */ 1329 } 1330 1331 if (qp->type < 0 || qp->type >= nelem(likely)) 1332 pcntprob = 35; 1333 else 1334 pcntprob = likely[qp->type]; 1335 if (!patient) 1336 pcntprob /= 2; 1337 /* 1338 * Our QIP servers are busted, don't answer AAAA 1339 * and take forever to answer CNAME if there isn't one. 1340 * make time-to-wait proportional to estimated probability of an 1341 * RR of that type existing. 1342 */ 1343 qp->req->aborttime = time(nil) + (Maxreqtm * pcntprob)/100; 1344 if (qp->req->aborttime < time(nil) + 2) 1345 qp->req->aborttime = time(nil) + 2; 1346 qp->udpfd = fd; 1347 wait = (15 * pcntprob) / 100; /* for this outgoing query */ 1348 if (wait < 2) 1349 wait = 2; 1350 1351 rv = netquery1(qp, depth, ibuf, obuf, wait, inns); 1352 close(fd); 1353 qp->udpfd = -1; 1354 1355 free(obuf); 1356 free(ibuf); 1357 return rv; 1358 } 1359 1360 /* look up (dp->name,type) via *nsrp with results in *reqp */ 1361 static int 1362 netquery(Query *qp, int depth) 1363 { 1364 int lock, rv, triedin, inname; 1365 RR *rp; 1366 1367 if(depth > 12) /* in a recursive loop? */ 1368 return 0; 1369 1370 slave(qp->req); 1371 /* 1372 * slave might have forked. if so, the parent process longjmped to 1373 * req->mret; we're usually the child slave, but if there are too 1374 * many children already, we're still the same process. 1375 */ 1376 1377 /* don't lock before call to slave so only children can block */ 1378 if(1) 1379 lock = qp->req->isslave != 0; 1380 if(1 && lock) { 1381 procsetname("query lock wait for %s", qp->dp->name); 1382 /* 1383 * don't make concurrent queries for this name. 1384 * dozens of processes blocking here probably indicates 1385 * an error in our dns data that causes us to not 1386 * recognise a zone (area) as one of our own, thus 1387 * causing us to query other nameservers. 1388 */ 1389 qlock(&qp->dp->querylck); 1390 } 1391 procsetname("netquery: %s", qp->dp->name); 1392 1393 /* prepare server RR's for incremental lookup */ 1394 for(rp = qp->nsrp; rp; rp = rp->next) 1395 rp->marker = 0; 1396 1397 rv = 0; /* pessimism */ 1398 triedin = 0; 1399 1400 /* 1401 * normal resolvers and servers will just use mntpt for all addresses, 1402 * even on the outside. straddling servers will use mntpt (/net) 1403 * for inside addresses and /net.alt for outside addresses, 1404 * thus bypassing other inside nameservers. 1405 */ 1406 inname = insideaddr(qp->dp->name); 1407 if (!cfg.straddle || inname) { 1408 rv = udpquery(qp, mntpt, depth, Hurry, (cfg.inside? Inns: Outns)); 1409 triedin = 1; 1410 } 1411 1412 /* 1413 * if we're still looking, are inside, and have an outside domain, 1414 * try it on our outside interface, if any. 1415 */ 1416 if (rv == 0 && cfg.inside && !inname) { 1417 if (triedin) 1418 dnslog( 1419 "[%d] netquery: internal nameservers failed for %s; trying external", 1420 getpid(), qp->dp->name); 1421 1422 /* prepare server RR's for incremental lookup */ 1423 for(rp = qp->nsrp; rp; rp = rp->next) 1424 rp->marker = 0; 1425 1426 rv = udpquery(qp, "/net.alt", depth, Patient, Outns); 1427 } 1428 // if (rv == 0) /* could ask /net.alt/dns directly */ 1429 // askoutdns(qp->dp, qp->type); 1430 1431 if(1 && lock) 1432 qunlock(&qp->dp->querylck); 1433 return rv; 1434 } 1435 1436 int 1437 seerootns(void) 1438 { 1439 int rv; 1440 char root[] = ""; 1441 Request req; 1442 Query query; 1443 1444 memset(&req, 0, sizeof req); 1445 req.isslave = 1; 1446 req.aborttime = now + Maxreqtm; 1447 queryinit(&query, dnlookup(root, Cin, 1), Tns, &req); 1448 query.nsrp = dblookup(root, Cin, Tns, 0, 0); 1449 rv = netquery(&query, 0); 1450 rrfreelist(query.nsrp); 1451 querydestroy(&query); 1452 return rv; 1453 } 1454