1 /* 2 * domain name resolvers, see rfcs 1035 and 1123 3 */ 4 #include <u.h> 5 #include <libc.h> 6 #include <ip.h> 7 #include <bio.h> 8 #include <ndb.h> 9 #include "dns.h" 10 11 #define NS2MS(ns) ((ns) / 1000000L) 12 #define S2MS(s) ((s) * 1000) 13 14 typedef struct Dest Dest; 15 typedef struct Ipaddr Ipaddr; 16 typedef struct Query Query; 17 18 enum 19 { 20 Udp, Tcp, 21 Maxdest= 24, /* maximum destinations for a request message */ 22 Maxtrans= 3, /* maximum transmissions to a server */ 23 Destmagic= 0xcafebabe, 24 Querymagic= 0xdeadbeef, 25 }; 26 enum { Hurry, Patient, }; 27 enum { Outns, Inns, }; 28 enum { Remntretry = 15, }; /* min. sec.s between remount attempts */ 29 30 struct Ipaddr { 31 Ipaddr *next; 32 uchar ip[IPaddrlen]; 33 }; 34 35 struct Dest 36 { 37 uchar a[IPaddrlen]; /* ip address */ 38 DN *s; /* name server */ 39 int nx; /* number of transmissions */ 40 int code; /* response code; used to clear dp->respcode */ 41 42 ulong magic; 43 }; 44 45 struct Query { 46 DN *dp; /* domain */ 47 int type; /* and type to look up */ 48 Request *req; 49 RR *nsrp; /* name servers to consult */ 50 51 /* dest must not be on the stack due to forking in slave() */ 52 Dest *dest; /* array of destinations */ 53 Dest *curdest; /* pointer to one of them */ 54 int ndest; 55 56 int udpfd; 57 58 QLock tcplock; /* only one tcp call at a time per query */ 59 int tcpset; 60 int tcpfd; /* if Tcp, read replies from here */ 61 int tcpctlfd; 62 uchar tcpip[IPaddrlen]; 63 64 ulong magic; 65 }; 66 67 /* estimated % probability of such a record existing at all */ 68 int likely[] = { 69 [Ta] 95, 70 [Taaaa] 10, 71 [Tcname] 15, 72 [Tmx] 60, 73 [Tns] 90, 74 [Tnull] 5, 75 [Tptr] 35, 76 [Tsoa] 90, 77 [Tsrv] 60, 78 [Ttxt] 15, 79 [Tall] 95, 80 }; 81 82 static RR* dnresolve1(char*, int, int, Request*, int, int); 83 static int netquery(Query *, int); 84 85 /* 86 * reading /proc/pid/args yields either "name" or "name [display args]", 87 * so return only display args, if any. 88 */ 89 static char * 90 procgetname(void) 91 { 92 int fd, n; 93 char *lp, *rp; 94 char buf[256]; 95 96 snprint(buf, sizeof buf, "#p/%d/args", getpid()); 97 if((fd = open(buf, OREAD)) < 0) 98 return strdup(""); 99 *buf = '\0'; 100 n = read(fd, buf, sizeof buf-1); 101 close(fd); 102 if (n >= 0) 103 buf[n] = '\0'; 104 if ((lp = strchr(buf, '[')) == nil || 105 (rp = strrchr(buf, ']')) == nil) 106 return strdup(""); 107 *rp = '\0'; 108 return strdup(lp+1); 109 } 110 111 /* 112 * lookup 'type' info for domain name 'name'. If it doesn't exist, try 113 * looking it up as a canonical name. 114 */ 115 RR* 116 dnresolve(char *name, int class, int type, Request *req, RR **cn, int depth, 117 int recurse, int rooted, int *status) 118 { 119 RR *rp, *nrp, *drp; 120 DN *dp; 121 int loops; 122 char *procname; 123 char nname[Domlen]; 124 125 if(status) 126 *status = 0; 127 128 if(depth > 12) /* in a recursive loop? */ 129 return nil; 130 131 procname = procgetname(); 132 /* 133 * hack for systems that don't have resolve search 134 * lists. Just look up the simple name in the database. 135 */ 136 if(!rooted && strchr(name, '.') == 0){ 137 rp = nil; 138 drp = domainlist(class); 139 for(nrp = drp; rp == nil && nrp != nil; nrp = nrp->next){ 140 snprint(nname, sizeof nname, "%s.%s", name, 141 nrp->ptr->name); 142 rp = dnresolve(nname, class, type, req, cn, depth+1, 143 recurse, rooted, status); 144 rrfreelist(rrremneg(&rp)); 145 } 146 if(drp != nil) 147 rrfree(drp); 148 procsetname(procname); 149 free(procname); 150 return rp; 151 } 152 153 /* 154 * try the name directly 155 */ 156 rp = dnresolve1(name, class, type, req, depth, recurse); 157 if(rp) { 158 procsetname(procname); 159 free(procname); 160 return randomize(rp); 161 } 162 163 /* try it as a canonical name if we weren't told the name didn't exist */ 164 dp = dnlookup(name, class, 0); 165 if(type != Tptr && dp->respcode != Rname) 166 for(loops = 0; rp == nil && loops < 32; loops++){ 167 rp = dnresolve1(name, class, Tcname, req, depth, recurse); 168 if(rp == nil) 169 break; 170 171 if(rp->negative){ 172 rrfreelist(rp); 173 rp = nil; 174 break; 175 } 176 177 name = rp->host->name; 178 if(cn) 179 rrcat(cn, rp); 180 else 181 rrfreelist(rp); 182 183 rp = dnresolve1(name, class, type, req, depth, recurse); 184 } 185 186 /* distinction between not found and not good */ 187 if(rp == nil && status != nil && dp->respcode != 0) 188 *status = dp->respcode; 189 190 procsetname(procname); 191 free(procname); 192 return randomize(rp); 193 } 194 195 static void 196 queryinit(Query *qp, DN *dp, int type, Request *req) 197 { 198 memset(qp, 0, sizeof *qp); 199 qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1; 200 qp->dp = dp; 201 qp->type = type; 202 qp->req = req; 203 qp->nsrp = nil; 204 qp->dest = qp->curdest = nil; 205 qp->magic = Querymagic; 206 } 207 208 static void 209 queryck(Query *qp) 210 { 211 assert(qp); 212 assert(qp->magic == Querymagic); 213 } 214 215 static void 216 querydestroy(Query *qp) 217 { 218 queryck(qp); 219 /* leave udpfd alone */ 220 if (qp->tcpfd > 0) 221 close(qp->tcpfd); 222 if (qp->tcpctlfd > 0) { 223 hangup(qp->tcpctlfd); 224 close(qp->tcpctlfd); 225 } 226 free(qp->dest); 227 memset(qp, 0, sizeof *qp); /* prevent accidents */ 228 qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1; 229 } 230 231 static void 232 destinit(Dest *p) 233 { 234 memset(p, 0, sizeof *p); 235 p->magic = Destmagic; 236 } 237 238 static void 239 destck(Dest *p) 240 { 241 assert(p); 242 assert(p->magic == Destmagic); 243 } 244 245 static void 246 destdestroy(Dest *p) 247 { 248 USED(p); 249 } 250 251 /* 252 * if the response to a query hasn't arrived within 100 ms., 253 * it's unlikely to arrive at all. after 1 s., it's really unlikely. 254 * queries for missing RRs are likely to produce time-outs rather than 255 * negative responses, so cname and aaaa queries are likely to time out, 256 * thus we don't wait very long for them. 257 */ 258 static void 259 notestats(vlong start, int tmout, int type) 260 { 261 qlock(&stats); 262 if (tmout) { 263 stats.tmout++; 264 if (type == Taaaa) 265 stats.tmoutv6++; 266 else if (type == Tcname) 267 stats.tmoutcname++; 268 } else { 269 long wait10ths = NS2MS(nsec() - start) / 100; 270 271 if (wait10ths <= 0) 272 stats.under10ths[0]++; 273 else if (wait10ths >= nelem(stats.under10ths)) 274 stats.under10ths[nelem(stats.under10ths) - 1]++; 275 else 276 stats.under10ths[wait10ths]++; 277 } 278 qunlock(&stats); 279 } 280 281 static void 282 noteinmem(void) 283 { 284 qlock(&stats); 285 stats.answinmem++; 286 qunlock(&stats); 287 } 288 289 static RR* 290 dnresolve1(char *name, int class, int type, Request *req, int depth, 291 int recurse) 292 { 293 char *cp; 294 Area *area; 295 DN *dp, *nsdp; 296 RR *rp, *nsrp, *dbnsrp; 297 Query query; 298 299 if(debug) 300 dnslog("[%d] dnresolve1 %s %d %d", getpid(), name, type, class); 301 302 /* only class Cin implemented so far */ 303 if(class != Cin) 304 return nil; 305 306 dp = dnlookup(name, class, 1); 307 308 /* 309 * Try the cache first 310 */ 311 rp = rrlookup(dp, type, OKneg); 312 if(rp) 313 if(rp->db){ 314 /* unauthoritative db entries are hints */ 315 if(rp->auth) { 316 noteinmem(); 317 return rp; 318 } 319 } else 320 /* cached entry must still be valid */ 321 if(rp->ttl > now) 322 /* but Tall entries are special */ 323 if(type != Tall || rp->query == Tall) { 324 noteinmem(); 325 return rp; 326 } 327 328 rrfreelist(rp); 329 330 /* 331 * try the cache for a canonical name. if found punt 332 * since we'll find it during the canonical name search 333 * in dnresolve(). 334 */ 335 if(type != Tcname){ 336 rp = rrlookup(dp, Tcname, NOneg); 337 rrfreelist(rp); 338 if(rp) 339 return nil; 340 } 341 342 /* 343 * if the domain name is within an area of ours, 344 * we should have found its data in memory by now. 345 */ 346 area = inmyarea(dp->name); 347 if (area) { 348 // char buf[32]; 349 350 // dnslog("%s %s: no data in area %s", dp->name, 351 // rrname(type, buf, sizeof buf), area->soarr->owner->name); 352 return nil; 353 } 354 355 queryinit(&query, dp, type, req); 356 357 /* 358 * if we're running as just a resolver, query our 359 * designated name servers 360 */ 361 if(cfg.resolver){ 362 nsrp = randomize(getdnsservers(class)); 363 if(nsrp != nil) { 364 query.nsrp = nsrp; 365 if(netquery(&query, depth+1)){ 366 rrfreelist(nsrp); 367 querydestroy(&query); 368 return rrlookup(dp, type, OKneg); 369 } 370 rrfreelist(nsrp); 371 } 372 } 373 374 /* 375 * walk up the domain name looking for 376 * a name server for the domain. 377 */ 378 for(cp = name; cp; cp = walkup(cp)){ 379 /* 380 * if this is a local (served by us) domain, 381 * return answer 382 */ 383 dbnsrp = randomize(dblookup(cp, class, Tns, 0, 0)); 384 if(dbnsrp && dbnsrp->local){ 385 rp = dblookup(name, class, type, 1, dbnsrp->ttl); 386 rrfreelist(dbnsrp); 387 querydestroy(&query); 388 return rp; 389 } 390 391 /* 392 * if recursion isn't set, just accept local 393 * entries 394 */ 395 if(recurse == Dontrecurse){ 396 if(dbnsrp) 397 rrfreelist(dbnsrp); 398 continue; 399 } 400 401 /* look for ns in cache */ 402 nsdp = dnlookup(cp, class, 0); 403 nsrp = nil; 404 if(nsdp) 405 nsrp = randomize(rrlookup(nsdp, Tns, NOneg)); 406 407 /* if the entry timed out, ignore it */ 408 if(nsrp && nsrp->ttl < now){ 409 rrfreelist(nsrp); 410 nsrp = nil; 411 } 412 413 if(nsrp){ 414 rrfreelist(dbnsrp); 415 416 /* query the name servers found in cache */ 417 query.nsrp = nsrp; 418 if(netquery(&query, depth+1)){ 419 rrfreelist(nsrp); 420 querydestroy(&query); 421 return rrlookup(dp, type, OKneg); 422 } 423 rrfreelist(nsrp); 424 continue; 425 } 426 427 /* use ns from db */ 428 if(dbnsrp){ 429 /* try the name servers found in db */ 430 query.nsrp = dbnsrp; 431 if(netquery(&query, depth+1)){ 432 /* we got an answer */ 433 rrfreelist(dbnsrp); 434 querydestroy(&query); 435 return rrlookup(dp, type, NOneg); 436 } 437 rrfreelist(dbnsrp); 438 } 439 } 440 querydestroy(&query); 441 442 /* settle for a non-authoritative answer */ 443 rp = rrlookup(dp, type, OKneg); 444 if(rp) 445 return rp; 446 447 /* noone answered. try the database, we might have a chance. */ 448 return dblookup(name, class, type, 0, 0); 449 } 450 451 /* 452 * walk a domain name one element to the right. 453 * return a pointer to that element. 454 * in other words, return a pointer to the parent domain name. 455 */ 456 char* 457 walkup(char *name) 458 { 459 char *cp; 460 461 cp = strchr(name, '.'); 462 if(cp) 463 return cp+1; 464 else if(*name) 465 return ""; 466 else 467 return 0; 468 } 469 470 /* 471 * Get a udp port for sending requests and reading replies. Put the port 472 * into "headers" mode. 473 */ 474 static char *hmsg = "headers"; 475 476 int 477 udpport(char *mtpt) 478 { 479 int fd, ctl; 480 char ds[64], adir[64]; 481 482 /* get a udp port */ 483 snprint(ds, sizeof ds, "%s/udp!*!0", (mtpt? mtpt: "/net")); 484 ctl = announce(ds, adir); 485 if(ctl < 0){ 486 /* warning("can't get udp port"); */ 487 return -1; 488 } 489 490 /* turn on header style interface */ 491 if(write(ctl, hmsg, strlen(hmsg)) , 0){ 492 close(ctl); 493 warning(hmsg); 494 return -1; 495 } 496 497 /* grab the data file */ 498 snprint(ds, sizeof ds, "%s/data", adir); 499 fd = open(ds, ORDWR); 500 close(ctl); 501 if(fd < 0) 502 warning("can't open udp port %s: %r", ds); 503 return fd; 504 } 505 506 /* generate a DNS UDP query packet */ 507 int 508 mkreq(DN *dp, int type, uchar *buf, int flags, ushort reqno) 509 { 510 DNSmsg m; 511 int len; 512 Udphdr *uh = (Udphdr*)buf; 513 514 /* stuff port number into output buffer */ 515 memset(uh, 0, sizeof *uh); 516 hnputs(uh->rport, 53); 517 518 /* make request and convert it to output format */ 519 memset(&m, 0, sizeof m); 520 m.flags = flags; 521 m.id = reqno; 522 m.qd = rralloc(type); 523 m.qd->owner = dp; 524 m.qd->type = type; 525 len = convDNS2M(&m, &buf[Udphdrsize], Maxudp); 526 rrfree(m.qd); 527 return len; 528 } 529 530 /* for alarms in readreply */ 531 static void 532 ding(void *x, char *msg) 533 { 534 USED(x); 535 if(strcmp(msg, "alarm") == 0) 536 noted(NCONT); 537 else 538 noted(NDFLT); 539 } 540 541 void 542 freeanswers(DNSmsg *mp) 543 { 544 rrfreelist(mp->qd); 545 rrfreelist(mp->an); 546 rrfreelist(mp->ns); 547 rrfreelist(mp->ar); 548 mp->qd = mp->an = mp->ns = mp->ar = nil; 549 } 550 551 /* sets srcip */ 552 static int 553 readnet(Query *qp, int medium, uchar *ibuf, ulong endtime, uchar **replyp, 554 uchar *srcip) 555 { 556 int len, fd; 557 long ms; 558 vlong startns = nsec(); 559 uchar *reply; 560 uchar lenbuf[2]; 561 562 /* timed read of reply */ 563 ms = S2MS(endtime) - NS2MS(startns); 564 if (ms < 2000) 565 ms = 2000; /* give the remote ns a fighting chance */ 566 reply = ibuf; 567 len = -1; /* pessimism */ 568 memset(srcip, 0, IPaddrlen); 569 if (medium == Udp) 570 if (qp->udpfd <= 0) 571 dnslog("readnet: qp->udpfd closed"); 572 else { 573 alarm(ms); 574 len = read(qp->udpfd, ibuf, Udphdrsize+Maxudpin); 575 alarm(0); 576 notestats(startns, len < 0, qp->type); 577 if (len >= IPaddrlen) 578 memmove(srcip, ibuf, IPaddrlen); 579 if (len >= Udphdrsize) { 580 len -= Udphdrsize; 581 reply += Udphdrsize; 582 } 583 } 584 else { 585 if (!qp->tcpset) 586 dnslog("readnet: tcp params not set"); 587 alarm(ms); 588 fd = qp->tcpfd; 589 if (fd <= 0) 590 dnslog("readnet: %s: tcp fd unset for dest %I", 591 qp->dp->name, qp->tcpip); 592 else if (readn(fd, lenbuf, 2) != 2) { 593 dnslog("readnet: short read of tcp size from %I", 594 qp->tcpip); 595 /* probably a time-out */ 596 notestats(startns, 1, qp->type); 597 } else { 598 len = lenbuf[0]<<8 | lenbuf[1]; 599 if (readn(fd, ibuf, len) != len) { 600 dnslog("readnet: short read of tcp data from %I", 601 qp->tcpip); 602 /* probably a time-out */ 603 notestats(startns, 1, qp->type); 604 len = -1; 605 } 606 } 607 alarm(0); 608 memmove(srcip, qp->tcpip, IPaddrlen); 609 } 610 *replyp = reply; 611 return len; 612 } 613 614 /* 615 * read replies to a request and remember the rrs in the answer(s). 616 * ignore any of the wrong type. 617 * wait at most until endtime. 618 */ 619 static int 620 readreply(Query *qp, int medium, ushort req, uchar *ibuf, DNSmsg *mp, 621 ulong endtime) 622 { 623 int len, rv; 624 char *err; 625 char tbuf[32]; 626 uchar *reply; 627 uchar srcip[IPaddrlen]; 628 RR *rp; 629 630 notify(ding); 631 632 queryck(qp); 633 rv = 0; 634 memset(mp, 0, sizeof *mp); 635 if (time(nil) >= endtime) 636 return -1; /* timed out before we started */ 637 638 memset(srcip, 0, sizeof srcip); 639 if (0) 640 len = -1; 641 for (; time(nil) < endtime && 642 (len = readnet(qp, medium, ibuf, endtime, &reply, srcip)) >= 0; 643 freeanswers(mp)){ 644 /* convert into internal format */ 645 memset(mp, 0, sizeof *mp); 646 err = convM2DNS(reply, len, mp, nil); 647 if (mp->flags & Ftrunc) { 648 free(err); 649 freeanswers(mp); 650 /* notify our caller to retry the query via tcp. */ 651 return -1; 652 } else if(err){ 653 dnslog("readreply: %s: input err, len %d: %s: %I", 654 qp->dp->name, len, err, srcip); 655 free(err); 656 continue; 657 } 658 if(debug) 659 logreply(qp->req->id, srcip, mp); 660 661 /* answering the right question? */ 662 if(mp->id != req) 663 dnslog("%d: id %d instead of %d: %I", qp->req->id, 664 mp->id, req, srcip); 665 else if(mp->qd == 0) 666 dnslog("%d: no question RR: %I", qp->req->id, srcip); 667 else if(mp->qd->owner != qp->dp) 668 dnslog("%d: owner %s instead of %s: %I", qp->req->id, 669 mp->qd->owner->name, qp->dp->name, srcip); 670 else if(mp->qd->type != qp->type) 671 dnslog("%d: qp->type %d instead of %d: %I", 672 qp->req->id, mp->qd->type, qp->type, srcip); 673 else { 674 /* remember what request this is in answer to */ 675 for(rp = mp->an; rp; rp = rp->next) 676 rp->query = qp->type; 677 return rv; 678 } 679 } 680 if (time(nil) >= endtime) { 681 ; /* query expired */ 682 } else if (0) { 683 /* this happens routinely when a read times out */ 684 dnslog("readreply: %s type %s: ns %I read error or eof " 685 "(returned %d): %r", qp->dp->name, rrname(qp->type, 686 tbuf, sizeof tbuf), srcip, len); 687 if (medium == Udp) 688 for (rp = qp->nsrp; rp != nil; rp = rp->next) 689 if (rp->type == Tns) 690 dnslog("readreply: %s: query sent to " 691 "ns %s", qp->dp->name, 692 rp->host->name); 693 } 694 return -1; 695 } 696 697 /* 698 * return non-0 if first list includes second list 699 */ 700 int 701 contains(RR *rp1, RR *rp2) 702 { 703 RR *trp1, *trp2; 704 705 for(trp2 = rp2; trp2; trp2 = trp2->next){ 706 for(trp1 = rp1; trp1; trp1 = trp1->next) 707 if(trp1->type == trp2->type) 708 if(trp1->host == trp2->host) 709 if(trp1->owner == trp2->owner) 710 break; 711 if(trp1 == nil) 712 return 0; 713 } 714 return 1; 715 } 716 717 718 /* 719 * return multicast version if any 720 */ 721 int 722 ipisbm(uchar *ip) 723 { 724 if(isv4(ip)){ 725 if (ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0 || 726 ipcmp(ip, IPv4bcast) == 0) 727 return 4; 728 } else 729 if(ip[0] == 0xff) 730 return 6; 731 return 0; 732 } 733 734 /* 735 * Get next server address 736 */ 737 static int 738 serveraddrs(Query *qp, int nd, int depth) 739 { 740 RR *rp, *arp, *trp; 741 Dest *cur; 742 743 if(nd >= Maxdest) 744 return 0; 745 746 /* 747 * look for a server whose address we already know. 748 * if we find one, mark it so we ignore this on 749 * subsequent passes. 750 */ 751 arp = 0; 752 for(rp = qp->nsrp; rp; rp = rp->next){ 753 assert(rp->magic == RRmagic); 754 if(rp->marker) 755 continue; 756 arp = rrlookup(rp->host, Ta, NOneg); 757 if(arp){ 758 rp->marker = 1; 759 break; 760 } 761 arp = dblookup(rp->host->name, Cin, Ta, 0, 0); 762 if(arp){ 763 rp->marker = 1; 764 break; 765 } 766 } 767 768 /* 769 * if the cache and database lookup didn't find any new 770 * server addresses, try resolving one via the network. 771 * Mark any we try to resolve so we don't try a second time. 772 */ 773 if(arp == 0) 774 for(rp = qp->nsrp; rp; rp = rp->next){ 775 if(rp->marker) 776 continue; 777 rp->marker = 1; 778 779 /* 780 * avoid loops looking up a server under itself 781 */ 782 if(subsume(rp->owner->name, rp->host->name)) 783 continue; 784 785 arp = dnresolve(rp->host->name, Cin, Ta, qp->req, 0, 786 depth+1, Recurse, 1, 0); 787 rrfreelist(rrremneg(&arp)); 788 if(arp) 789 break; 790 } 791 792 /* use any addresses that we found */ 793 for(trp = arp; trp && nd < Maxdest; trp = trp->next){ 794 cur = &qp->dest[nd]; 795 parseip(cur->a, trp->ip->name); 796 /* 797 * straddling servers can reject all nameservers if they are all 798 * inside, so be sure to list at least one outside ns at 799 * the end of the ns list in /lib/ndb for `dom='. 800 */ 801 if (ipisbm(cur->a) || 802 cfg.straddle && !insideaddr(qp->dp->name) && insidens(cur->a)) 803 continue; 804 cur->nx = 0; 805 cur->s = trp->owner; 806 cur->code = Rtimeout; 807 nd++; 808 } 809 rrfreelist(arp); 810 return nd; 811 } 812 813 /* 814 * cache negative responses 815 */ 816 static void 817 cacheneg(DN *dp, int type, int rcode, RR *soarr) 818 { 819 RR *rp; 820 DN *soaowner; 821 ulong ttl; 822 823 stats.negcached++; 824 825 /* no cache time specified, don't make anything up */ 826 if(soarr != nil){ 827 if(soarr->next != nil){ 828 rrfreelist(soarr->next); 829 soarr->next = nil; 830 } 831 soaowner = soarr->owner; 832 } else 833 soaowner = nil; 834 835 /* the attach can cause soarr to be freed so mine it now */ 836 if(soarr != nil && soarr->soa != nil) 837 ttl = soarr->soa->minttl+now; 838 else 839 ttl = 5*Min; 840 841 /* add soa and negative RR to the database */ 842 rrattach(soarr, 1); 843 844 rp = rralloc(type); 845 rp->owner = dp; 846 rp->negative = 1; 847 rp->negsoaowner = soaowner; 848 rp->negrcode = rcode; 849 rp->ttl = ttl; 850 rrattach(rp, 1); 851 } 852 853 static int 854 setdestoutns(Dest *p, int n) 855 { 856 uchar *outns = outsidens(n); 857 858 destck(p); 859 destinit(p); 860 if (outns == nil) { 861 if (n == 0) 862 dnslog("[%d] no outside-ns in ndb", getpid()); 863 return -1; 864 } 865 memmove(p->a, outns, sizeof p->a); 866 p->s = dnlookup("outside-ns-ips", Cin, 1); 867 return 0; 868 } 869 870 /* 871 * issue query via UDP or TCP as appropriate. 872 * for TCP, returns with qp->tcpip set from udppkt header. 873 */ 874 static int 875 mydnsquery(Query *qp, int medium, uchar *udppkt, int len) 876 { 877 int rv = -1, nfd; 878 char *domain; 879 char conndir[40]; 880 uchar belen[2]; 881 NetConnInfo *nci; 882 883 queryck(qp); 884 switch (medium) { 885 case Udp: 886 nfd = dup(qp->udpfd, -1); 887 if (nfd < 0) { 888 warning("mydnsquery: qp->udpfd %d: %r", qp->udpfd); 889 close(qp->udpfd); /* ensure it's closed */ 890 qp->udpfd = -1; /* poison it */ 891 return rv; 892 } 893 close(nfd); 894 895 if (qp->udpfd <= 0) 896 dnslog("mydnsquery: qp->udpfd %d closed", qp->udpfd); 897 else { 898 if (write(qp->udpfd, udppkt, len+Udphdrsize) != 899 len+Udphdrsize) 900 warning("sending udp msg: %r"); 901 else { 902 stats.qsent++; 903 rv = 0; 904 } 905 } 906 break; 907 case Tcp: 908 /* send via TCP & keep fd around for reply */ 909 domain = smprint("%I", udppkt); 910 alarm(10*1000); 911 qp->tcpfd = rv = dial(netmkaddr(domain, "tcp", "dns"), nil, 912 conndir, &qp->tcpctlfd); 913 alarm(0); 914 if (qp->tcpfd < 0) { 915 dnslog("can't dial tcp!%s!dns: %r", domain); 916 free(domain); 917 break; 918 } 919 free(domain); 920 nci = getnetconninfo(conndir, qp->tcpfd); 921 if (nci) { 922 parseip(qp->tcpip, nci->rsys); 923 freenetconninfo(nci); 924 } else 925 dnslog("mydnsquery: getnetconninfo failed"); 926 qp->tcpset = 1; 927 928 belen[0] = len >> 8; 929 belen[1] = len; 930 if (write(qp->tcpfd, belen, 2) != 2 || 931 write(qp->tcpfd, udppkt + Udphdrsize, len) != len) 932 warning("sending tcp msg: %r"); 933 break; 934 default: 935 sysfatal("mydnsquery: bad medium"); 936 } 937 return rv; 938 } 939 940 /* 941 * send query to all UDP destinations or one TCP destination, 942 * taken from obuf (udp packet) header 943 */ 944 static int 945 xmitquery(Query *qp, int medium, int depth, uchar *obuf, int inns, int len) 946 { 947 int j, n; 948 char buf[32]; 949 Dest *p; 950 951 queryck(qp); 952 if(time(nil) >= qp->req->aborttime) 953 return -1; 954 955 /* 956 * get a nameserver address if we need one. 957 * serveraddrs populates qp->dest. 958 */ 959 p = qp->dest; 960 destck(p); 961 if (qp->ndest < 0 || qp->ndest > Maxdest) 962 dnslog("qp->ndest %d out of range", qp->ndest); 963 if (qp->ndest > qp->curdest - p) 964 qp->curdest = &qp->dest[serveraddrs(qp, qp->curdest - p, depth)]; 965 destck(qp->curdest); 966 967 /* no servers, punt */ 968 if (qp->curdest == qp->dest) 969 if (cfg.straddle && cfg.inside) { 970 /* get ips of "outside-ns-ips" */ 971 p = qp->curdest = qp->dest; 972 for(n = 0; n < Maxdest; n++, qp->curdest++) 973 if (setdestoutns(qp->curdest, n) < 0) 974 break; 975 } else { 976 /* it's probably just a bogus domain, don't log it */ 977 // dnslog("xmitquery: %s: no nameservers", qp->dp->name); 978 return -1; 979 } 980 981 /* send to first 'qp->ndest' destinations */ 982 j = 0; 983 if (medium == Tcp) { 984 j++; 985 queryck(qp); 986 assert(qp->dp); 987 procsetname("tcp %sside query for %s %s", (inns? "in": "out"), 988 qp->dp->name, rrname(qp->type, buf, sizeof buf)); 989 mydnsquery(qp, medium, obuf, len); /* sets qp->tcpip from obuf */ 990 if(debug) 991 logsend(qp->req->id, depth, qp->tcpip, "", qp->dp->name, 992 qp->type); 993 } else 994 for(; p < &qp->dest[qp->ndest] && p < qp->curdest; p++){ 995 /* skip destinations we've finished with */ 996 if(p->nx >= Maxtrans) 997 continue; 998 999 j++; 1000 1001 /* exponential backoff of requests */ 1002 if((1<<p->nx) > qp->ndest) 1003 continue; 1004 1005 procsetname("udp %sside query to %I/%s %s %s", 1006 (inns? "in": "out"), p->a, p->s->name, 1007 qp->dp->name, rrname(qp->type, buf, sizeof buf)); 1008 if(debug) 1009 logsend(qp->req->id, depth, p->a, p->s->name, 1010 qp->dp->name, qp->type); 1011 1012 /* fill in UDP destination addr & send it */ 1013 memmove(obuf, p->a, sizeof p->a); 1014 mydnsquery(qp, medium, obuf, len); 1015 p->nx++; 1016 } 1017 if(j == 0) { 1018 // dnslog("xmitquery: %s: no destinations left", qp->dp->name); 1019 return -1; 1020 } 1021 return 0; 1022 } 1023 1024 static int lckindex[Maxlcks] = { 1025 0, /* all others map here */ 1026 Ta, 1027 Tns, 1028 Tcname, 1029 Tsoa, 1030 Tptr, 1031 Tmx, 1032 Ttxt, 1033 Taaaa, 1034 }; 1035 1036 static int 1037 qtype2lck(int qtype) /* map query type to querylck index */ 1038 { 1039 int i; 1040 1041 for (i = 1; i < nelem(lckindex); i++) 1042 if (lckindex[i] == qtype) 1043 return i; 1044 return 0; 1045 } 1046 1047 static int 1048 procansw(Query *qp, DNSmsg *mp, uchar *srcip, int depth, Dest *p) 1049 { 1050 int rv; 1051 char buf[32]; 1052 DN *ndp; 1053 Query nquery; 1054 RR *tp, *soarr; 1055 1056 if (mp->an == nil) 1057 stats.negans++; 1058 1059 /* ignore any error replies */ 1060 if((mp->flags & Rmask) == Rserver){ 1061 freeanswers(mp); 1062 if(p != qp->curdest) 1063 p->code = Rserver; 1064 return -1; 1065 } 1066 1067 /* ignore any bad delegations */ 1068 if(mp->ns && baddelegation(mp->ns, qp->nsrp, srcip)){ 1069 if(mp->an == nil){ 1070 freeanswers(mp); 1071 if(p != qp->curdest) 1072 p->code = Rserver; 1073 return -1; 1074 } 1075 rrfreelist(mp->ns); 1076 mp->ns = nil; 1077 } 1078 1079 /* remove any soa's from the authority section */ 1080 soarr = rrremtype(&mp->ns, Tsoa); 1081 1082 /* incorporate answers */ 1083 if(mp->an) 1084 rrattach(mp->an, (mp->flags & Fauth) != 0); 1085 if(mp->ar) 1086 rrattach(mp->ar, 0); 1087 if(mp->ns && !cfg.justforw){ 1088 ndp = mp->ns->owner; 1089 rrattach(mp->ns, 0); 1090 } else 1091 ndp = nil; 1092 1093 /* free the question */ 1094 if(mp->qd) { 1095 rrfreelist(mp->qd); 1096 mp->qd = nil; 1097 } 1098 1099 /* 1100 * Any reply from an authoritative server, 1101 * or a positive reply terminates the search. 1102 * A negative response now also terminates the search. 1103 */ 1104 if(mp->an != nil || (mp->flags & Fauth)){ 1105 if(mp->an == nil && (mp->flags & Rmask) == Rname) 1106 qp->dp->respcode = Rname; 1107 else 1108 qp->dp->respcode = 0; 1109 1110 /* 1111 * cache any negative responses, free soarr. 1112 * negative responses need not be authoritative: 1113 * they can legitimately come from a cache. 1114 */ 1115 if( /* (mp->flags & Fauth) && */ mp->an == nil) 1116 cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr); 1117 else 1118 rrfreelist(soarr); 1119 return 1; 1120 } else if (mp->an == nil && (mp->flags & Rmask) == Rname) { 1121 qp->dp->respcode = Rname; 1122 /* 1123 * cache negative response. 1124 * negative responses need not be authoritative: 1125 * they can legitimately come from a cache. 1126 */ 1127 cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr); 1128 return 1; 1129 } 1130 rrfreelist(soarr); 1131 1132 /* 1133 * if we've been given better name servers, recurse. 1134 * we're called from udpquery, called from 1135 * netquery, which current holds qp->dp->querylck, 1136 * so release it now and acquire it upon return. 1137 * if we're a pure resolver, don't recurse, we have 1138 * to forward to a fixed set of named servers. 1139 */ 1140 if(!mp->ns || cfg.resolver && cfg.justforw) 1141 return 0; 1142 tp = rrlookup(ndp, Tns, NOneg); 1143 if(contains(qp->nsrp, tp)){ 1144 rrfreelist(tp); 1145 return 0; 1146 } 1147 procsetname("recursive query for %s %s", qp->dp->name, 1148 rrname(qp->type, buf, sizeof buf)); 1149 qunlock(&qp->dp->querylck[qtype2lck(qp->type)]); 1150 1151 queryinit(&nquery, qp->dp, qp->type, qp->req); 1152 nquery.nsrp = tp; 1153 rv = netquery(&nquery, depth+1); 1154 1155 qlock(&qp->dp->querylck[qtype2lck(qp->type)]); 1156 rrfreelist(tp); 1157 querydestroy(&nquery); 1158 return rv; 1159 } 1160 1161 /* 1162 * send a query via tcp to a single address (from ibuf's udp header) 1163 * and read the answer(s) into mp->an. 1164 */ 1165 static int 1166 tcpquery(Query *qp, DNSmsg *mp, int depth, uchar *ibuf, uchar *obuf, int len, 1167 int waitsecs, int inns, ushort req) 1168 { 1169 int rv = 0; 1170 ulong endtime; 1171 1172 endtime = time(nil) + waitsecs; 1173 if(endtime > qp->req->aborttime) 1174 endtime = qp->req->aborttime; 1175 1176 if (0) 1177 dnslog("%s: udp reply truncated; retrying query via tcp to %I", 1178 qp->dp->name, qp->tcpip); 1179 1180 qlock(&qp->tcplock); 1181 memmove(obuf, ibuf, IPaddrlen); /* send back to respondent */ 1182 /* sets qp->tcpip from obuf's udp header */ 1183 if (xmitquery(qp, Tcp, depth, obuf, inns, len) < 0 || 1184 readreply(qp, Tcp, req, ibuf, mp, endtime) < 0) 1185 rv = -1; 1186 if (qp->tcpfd > 0) { 1187 hangup(qp->tcpctlfd); 1188 close(qp->tcpctlfd); 1189 close(qp->tcpfd); 1190 } 1191 qp->tcpfd = qp->tcpctlfd = -1; 1192 qunlock(&qp->tcplock); 1193 return rv; 1194 } 1195 1196 /* 1197 * query name servers. If the name server returns a pointer to another 1198 * name server, recurse. 1199 */ 1200 static int 1201 netquery1(Query *qp, int depth, uchar *ibuf, uchar *obuf, int waitsecs, int inns) 1202 { 1203 int ndest, len, replywaits, rv; 1204 ushort req; 1205 ulong endtime; 1206 char buf[12]; 1207 uchar srcip[IPaddrlen]; 1208 Dest *p, *np, *dest; 1209 // Dest dest[Maxdest]; 1210 1211 /* pack request into a udp message */ 1212 req = rand(); 1213 len = mkreq(qp->dp, qp->type, obuf, Frecurse|Oquery, req); 1214 1215 /* no server addresses yet */ 1216 queryck(qp); 1217 dest = emalloc(Maxdest * sizeof *dest); /* dest can't be on stack */ 1218 for (p = dest; p < dest + Maxdest; p++) 1219 destinit(p); 1220 qp->curdest = qp->dest = dest; 1221 1222 /* 1223 * transmit udp requests and wait for answers. 1224 * at most Maxtrans attempts to each address. 1225 * each cycle send one more message than the previous. 1226 * retry a query via tcp if its response is truncated. 1227 */ 1228 for(ndest = 1; ndest < Maxdest; ndest++){ 1229 qp->ndest = ndest; 1230 qp->tcpset = 0; 1231 if (xmitquery(qp, Udp, depth, obuf, inns, len) < 0) 1232 break; 1233 1234 endtime = time(nil) + waitsecs; 1235 if(endtime > qp->req->aborttime) 1236 endtime = qp->req->aborttime; 1237 1238 for(replywaits = 0; replywaits < ndest; replywaits++){ 1239 DNSmsg m; 1240 1241 procsetname("reading %sside reply from %I: %s %s from %s", 1242 (inns? "in": "out"), obuf, qp->dp->name, 1243 rrname(qp->type, buf, sizeof buf), qp->req->from); 1244 1245 /* read udp answer into m */ 1246 if (readreply(qp, Udp, req, ibuf, &m, endtime) >= 0) 1247 memmove(srcip, ibuf, IPaddrlen); 1248 else if (!(m.flags & Ftrunc)) { 1249 freeanswers(&m); 1250 break; /* timed out on this dest */ 1251 } else { 1252 /* whoops, it was truncated! ask again via tcp */ 1253 rv = tcpquery(qp, &m, depth, ibuf, obuf, len, 1254 waitsecs, inns, req); /* answer in m */ 1255 if (rv < 0) { 1256 freeanswers(&m); 1257 break; /* failed via tcp too */ 1258 } 1259 memmove(srcip, qp->tcpip, IPaddrlen); 1260 } 1261 1262 /* find responder */ 1263 // dnslog("netquery1 got reply from %I", srcip); 1264 for(p = qp->dest; p < qp->curdest; p++) 1265 if(memcmp(p->a, srcip, sizeof p->a) == 0) 1266 break; 1267 1268 /* remove all addrs of responding server from list */ 1269 for(np = qp->dest; np < qp->curdest; np++) 1270 if(np->s == p->s) 1271 p->nx = Maxtrans; 1272 1273 /* free or incorporate RRs in m */ 1274 rv = procansw(qp, &m, srcip, depth, p); 1275 if (rv > 0) 1276 return rv; 1277 } 1278 } 1279 1280 /* if all servers returned failure, propagate it */ 1281 qp->dp->respcode = Rserver; 1282 for(p = dest; p < qp->curdest; p++) { 1283 destck(p); 1284 if(p->code != Rserver) 1285 qp->dp->respcode = 0; 1286 p->magic = 0; /* prevent accidents */ 1287 } 1288 1289 // if (qp->dp->respcode) 1290 // dnslog("netquery1 setting Rserver for %s", qp->dp->name); 1291 1292 free(qp->dest); 1293 qp->dest = qp->curdest = nil; /* prevent accidents */ 1294 return 0; 1295 } 1296 1297 /* 1298 * run a command with a supplied fd as standard input 1299 */ 1300 char * 1301 system(int fd, char *cmd) 1302 { 1303 int pid, p, i; 1304 static Waitmsg msg; 1305 1306 if((pid = fork()) == -1) 1307 sysfatal("fork failed: %r"); 1308 else if(pid == 0){ 1309 dup(fd, 0); 1310 close(fd); 1311 for (i = 3; i < 200; i++) 1312 close(i); /* don't leak fds */ 1313 execl("/bin/rc", "rc", "-c", cmd, nil); 1314 sysfatal("exec rc: %r"); 1315 } 1316 for(p = waitpid(); p >= 0; p = waitpid()) 1317 if(p == pid) 1318 return msg.msg; 1319 return "lost child"; 1320 } 1321 1322 /* 1323 * in principle we could use a single descriptor for a udp port 1324 * to send all queries and receive all the answers to them, 1325 * but we'd have to sort out the answers by dns-query id. 1326 */ 1327 static int 1328 udpquery(Query *qp, char *mntpt, int depth, int patient, int inns) 1329 { 1330 int fd, rv, wait; 1331 long now; 1332 ulong pcntprob; 1333 char *msg; 1334 uchar *obuf, *ibuf; 1335 static QLock mntlck; 1336 static ulong lastmount; 1337 1338 /* use alloced buffers rather than ones from the stack */ 1339 // ibuf = emalloc(Maxudpin+Udphdrsize); 1340 ibuf = emalloc(64*1024); /* max. tcp reply size */ 1341 obuf = emalloc(Maxudp+Udphdrsize); 1342 1343 fd = udpport(mntpt); 1344 while (fd < 0 && cfg.straddle && strcmp(mntpt, "/net.alt") == 0) { 1345 /* HACK: remount /net.alt */ 1346 now = time(nil); 1347 if (now < lastmount + Remntretry) 1348 sleep((lastmount + Remntretry - now)*1000); 1349 qlock(&mntlck); 1350 fd = udpport(mntpt); /* try again under lock */ 1351 if (fd < 0) { 1352 dnslog("[%d] remounting /net.alt", getpid()); 1353 unmount(nil, "/net.alt"); 1354 1355 msg = system(open("/dev/null", ORDWR), "outside"); 1356 1357 lastmount = time(nil); 1358 if (msg && *msg) { 1359 dnslog("[%d] can't remount /net.alt: %s", 1360 getpid(), msg); 1361 sleep(10*1000); /* don't spin wildly */ 1362 } else 1363 fd = udpport(mntpt); 1364 } 1365 qunlock(&mntlck); 1366 } 1367 if (fd < 0) { 1368 dnslog("can't get udpport for %s query of name %s: %r", 1369 mntpt, qp->dp->name); 1370 sysfatal("out of udp conversations"); /* we're buggered */ 1371 } 1372 1373 if (qp->type < 0 || qp->type >= nelem(likely)) 1374 pcntprob = 35; 1375 else 1376 pcntprob = likely[qp->type]; 1377 if (!patient) 1378 pcntprob /= 2; 1379 /* 1380 * Our QIP servers are busted, don't answer AAAA 1381 * and take forever to answer CNAME if there isn't one. 1382 * make time-to-wait proportional to estimated probability of an 1383 * RR of that type existing. 1384 */ 1385 qp->req->aborttime = time(nil) + (Maxreqtm * pcntprob)/100; 1386 if (qp->req->aborttime < time(nil) + 2) 1387 qp->req->aborttime = time(nil) + 2; 1388 qp->udpfd = fd; 1389 wait = (15 * pcntprob) / 100; /* for this outgoing query */ 1390 if (wait < 2) 1391 wait = 2; 1392 1393 rv = netquery1(qp, depth, ibuf, obuf, wait, inns); 1394 close(fd); 1395 qp->udpfd = -1; 1396 1397 free(obuf); 1398 free(ibuf); 1399 return rv; 1400 } 1401 1402 /* look up (dp->name,type) via *nsrp with results in *reqp */ 1403 static int 1404 netquery(Query *qp, int depth) 1405 { 1406 int lock, rv, triedin, inname; 1407 char buf[32]; 1408 RR *rp; 1409 1410 if(depth > 12) /* in a recursive loop? */ 1411 return 0; 1412 1413 slave(qp->req); 1414 /* 1415 * slave might have forked. if so, the parent process longjmped to 1416 * req->mret; we're usually the child slave, but if there are too 1417 * many children already, we're still the same process. 1418 */ 1419 1420 /* don't lock before call to slave so only children can block */ 1421 if(1) 1422 lock = qp->req->isslave != 0; 1423 if(1 && lock) { 1424 procsetname("query lock wait: %s %s from %s", qp->dp->name, 1425 rrname(qp->type, buf, sizeof buf), qp->req->from); 1426 /* 1427 * don't make concurrent queries for this name. 1428 * dozens of processes blocking here probably indicates 1429 * an error in our dns data that causes us to not 1430 * recognise a zone (area) as one of our own, thus 1431 * causing us to query other nameservers. 1432 */ 1433 qlock(&qp->dp->querylck[qtype2lck(qp->type)]); 1434 } 1435 procsetname("netquery: %s", qp->dp->name); 1436 1437 /* prepare server RR's for incremental lookup */ 1438 for(rp = qp->nsrp; rp; rp = rp->next) 1439 rp->marker = 0; 1440 1441 rv = 0; /* pessimism */ 1442 triedin = 0; 1443 1444 /* 1445 * normal resolvers and servers will just use mntpt for all addresses, 1446 * even on the outside. straddling servers will use mntpt (/net) 1447 * for inside addresses and /net.alt for outside addresses, 1448 * thus bypassing other inside nameservers. 1449 */ 1450 inname = insideaddr(qp->dp->name); 1451 if (!cfg.straddle || inname) { 1452 rv = udpquery(qp, mntpt, depth, Hurry, (cfg.inside? Inns: Outns)); 1453 triedin = 1; 1454 } 1455 1456 /* 1457 * if we're still looking, are inside, and have an outside domain, 1458 * try it on our outside interface, if any. 1459 */ 1460 if (rv == 0 && cfg.inside && !inname) { 1461 if (triedin) 1462 dnslog( 1463 "[%d] netquery: internal nameservers failed for %s; trying external", 1464 getpid(), qp->dp->name); 1465 1466 /* prepare server RR's for incremental lookup */ 1467 for(rp = qp->nsrp; rp; rp = rp->next) 1468 rp->marker = 0; 1469 1470 rv = udpquery(qp, "/net.alt", depth, Patient, Outns); 1471 } 1472 // if (rv == 0) /* could ask /net.alt/dns directly */ 1473 // askoutdns(qp->dp, qp->type); 1474 1475 if(1 && lock) 1476 qunlock(&qp->dp->querylck[qtype2lck(qp->type)]); 1477 return rv; 1478 } 1479 1480 int 1481 seerootns(void) 1482 { 1483 int rv; 1484 char root[] = ""; 1485 Request req; 1486 Query query; 1487 1488 memset(&req, 0, sizeof req); 1489 req.isslave = 1; 1490 req.aborttime = now + Maxreqtm; 1491 req.from = "internal"; 1492 queryinit(&query, dnlookup(root, Cin, 1), Tns, &req); 1493 query.nsrp = dblookup(root, Cin, Tns, 0, 0); 1494 rv = netquery(&query, 0); 1495 rrfreelist(query.nsrp); 1496 querydestroy(&query); 1497 return rv; 1498 } 1499