1 /* 2 * domain name resolvers, see rfcs 1035 and 1123 3 */ 4 #include <u.h> 5 #include <libc.h> 6 #include <ip.h> 7 #include <bio.h> 8 #include <ndb.h> 9 #include "dns.h" 10 11 #define NS2MS(ns) ((ns) / 1000000L) 12 #define S2MS(s) ((s) * 1000) 13 #define MS2S(ms) ((ms) / 1000) 14 15 typedef struct Dest Dest; 16 typedef struct Ipaddr Ipaddr; 17 typedef struct Query Query; 18 19 enum 20 { 21 Udp, Tcp, 22 Maxdest= 24, /* maximum destinations for a request message */ 23 Maxtrans= 3, /* maximum transmissions to a server */ 24 Destmagic= 0xcafebabe, 25 Querymagic= 0xdeadbeef, 26 }; 27 enum { Hurry, Patient, }; 28 enum { Outns, Inns, }; 29 enum { Remntretry = 15, }; /* min. sec.s between remount attempts */ 30 31 struct Ipaddr { 32 Ipaddr *next; 33 uchar ip[IPaddrlen]; 34 }; 35 36 struct Dest 37 { 38 uchar a[IPaddrlen]; /* ip address */ 39 DN *s; /* name server */ 40 int nx; /* number of transmissions */ 41 int code; /* response code; used to clear dp->respcode */ 42 43 ulong magic; 44 }; 45 46 struct Query { 47 DN *dp; /* domain */ 48 int type; /* and type to look up */ 49 Request *req; 50 RR *nsrp; /* name servers to consult */ 51 52 /* dest must not be on the stack due to forking in slave() */ 53 Dest *dest; /* array of destinations */ 54 Dest *curdest; /* pointer to one of them */ 55 int ndest; 56 57 int udpfd; 58 59 QLock tcplock; /* only one tcp call at a time per query */ 60 int tcpset; 61 int tcpfd; /* if Tcp, read replies from here */ 62 int tcpctlfd; 63 uchar tcpip[IPaddrlen]; 64 65 ulong magic; 66 }; 67 68 /* estimated % probability of such a record existing at all */ 69 int likely[] = { 70 [Ta] 95, 71 [Taaaa] 10, 72 [Tcname] 15, 73 [Tmx] 60, 74 [Tns] 90, 75 [Tnull] 5, 76 [Tptr] 35, 77 [Tsoa] 90, 78 [Tsrv] 60, 79 [Ttxt] 15, 80 [Tall] 95, 81 }; 82 83 static RR* dnresolve1(char*, int, int, Request*, int, int); 84 static int netquery(Query *, int); 85 86 /* 87 * reading /proc/pid/args yields either "name" or "name [display args]", 88 * so return only display args, if any. 89 */ 90 static char * 91 procgetname(void) 92 { 93 int fd, n; 94 char *lp, *rp; 95 char buf[256]; 96 97 snprint(buf, sizeof buf, "#p/%d/args", getpid()); 98 if((fd = open(buf, OREAD)) < 0) 99 return strdup(""); 100 *buf = '\0'; 101 n = read(fd, buf, sizeof buf-1); 102 close(fd); 103 if (n >= 0) 104 buf[n] = '\0'; 105 if ((lp = strchr(buf, '[')) == nil || 106 (rp = strrchr(buf, ']')) == nil) 107 return strdup(""); 108 *rp = '\0'; 109 return strdup(lp+1); 110 } 111 112 /* 113 * lookup 'type' info for domain name 'name'. If it doesn't exist, try 114 * looking it up as a canonical name. 115 */ 116 RR* 117 dnresolve(char *name, int class, int type, Request *req, RR **cn, int depth, 118 int recurse, int rooted, int *status) 119 { 120 RR *rp, *nrp, *drp; 121 DN *dp; 122 int loops; 123 char *procname; 124 char nname[Domlen]; 125 126 if(status) 127 *status = 0; 128 129 if(depth > 12) /* in a recursive loop? */ 130 return nil; 131 132 procname = procgetname(); 133 /* 134 * hack for systems that don't have resolve search 135 * lists. Just look up the simple name in the database. 136 */ 137 if(!rooted && strchr(name, '.') == nil){ 138 rp = nil; 139 drp = domainlist(class); 140 for(nrp = drp; rp == nil && nrp != nil; nrp = nrp->next){ 141 snprint(nname, sizeof nname, "%s.%s", name, 142 nrp->ptr->name); 143 rp = dnresolve(nname, class, type, req, cn, depth+1, 144 recurse, rooted, status); 145 rrfreelist(rrremneg(&rp)); 146 } 147 if(drp != nil) 148 rrfreelist(drp); /* was rrfree */ 149 procsetname(procname); 150 free(procname); 151 return rp; 152 } 153 154 /* 155 * try the name directly 156 */ 157 rp = dnresolve1(name, class, type, req, depth, recurse); 158 if(rp) { 159 procsetname(procname); 160 free(procname); 161 return randomize(rp); 162 } 163 164 /* try it as a canonical name if we weren't told the name didn't exist */ 165 dp = dnlookup(name, class, 0); 166 if(type != Tptr && dp->respcode != Rname) 167 for(loops = 0; rp == nil && loops < 32; loops++){ 168 rp = dnresolve1(name, class, Tcname, req, depth, recurse); 169 if(rp == nil) 170 break; 171 172 if(rp->negative){ 173 rrfreelist(rp); 174 rp = nil; 175 break; 176 } 177 178 name = rp->host->name; 179 if(cn) 180 rrcat(cn, rp); 181 else 182 rrfreelist(rp); 183 184 rp = dnresolve1(name, class, type, req, depth, recurse); 185 } 186 187 /* distinction between not found and not good */ 188 if(rp == nil && status != nil && dp->respcode != 0) 189 *status = dp->respcode; 190 191 procsetname(procname); 192 free(procname); 193 return randomize(rp); 194 } 195 196 static void 197 queryinit(Query *qp, DN *dp, int type, Request *req) 198 { 199 memset(qp, 0, sizeof *qp); 200 qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1; 201 qp->dp = dp; 202 qp->type = type; 203 qp->req = req; 204 qp->nsrp = nil; 205 qp->dest = qp->curdest = nil; 206 qp->magic = Querymagic; 207 } 208 209 static void 210 queryck(Query *qp) 211 { 212 assert(qp); 213 assert(qp->magic == Querymagic); 214 } 215 216 static void 217 querydestroy(Query *qp) 218 { 219 queryck(qp); 220 /* leave udpfd alone */ 221 if (qp->tcpfd > 0) 222 close(qp->tcpfd); 223 if (qp->tcpctlfd > 0) { 224 hangup(qp->tcpctlfd); 225 close(qp->tcpctlfd); 226 } 227 free(qp->dest); 228 memset(qp, 0, sizeof *qp); /* prevent accidents */ 229 qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1; 230 } 231 232 static void 233 destinit(Dest *p) 234 { 235 memset(p, 0, sizeof *p); 236 p->magic = Destmagic; 237 } 238 239 static void 240 destck(Dest *p) 241 { 242 assert(p); 243 assert(p->magic == Destmagic); 244 } 245 246 static void 247 destdestroy(Dest *p) 248 { 249 USED(p); 250 } 251 252 /* 253 * if the response to a query hasn't arrived within 100 ms., 254 * it's unlikely to arrive at all. after 1 s., it's really unlikely. 255 * queries for missing RRs are likely to produce time-outs rather than 256 * negative responses, so cname and aaaa queries are likely to time out, 257 * thus we don't wait very long for them. 258 */ 259 static void 260 notestats(vlong start, int tmout, int type) 261 { 262 qlock(&stats); 263 if (tmout) { 264 stats.tmout++; 265 if (type == Taaaa) 266 stats.tmoutv6++; 267 else if (type == Tcname) 268 stats.tmoutcname++; 269 } else { 270 long wait10ths = NS2MS(nsec() - start) / 100; 271 272 if (wait10ths <= 0) 273 stats.under10ths[0]++; 274 else if (wait10ths >= nelem(stats.under10ths)) 275 stats.under10ths[nelem(stats.under10ths) - 1]++; 276 else 277 stats.under10ths[wait10ths]++; 278 } 279 qunlock(&stats); 280 } 281 282 static void 283 noteinmem(void) 284 { 285 qlock(&stats); 286 stats.answinmem++; 287 qunlock(&stats); 288 } 289 290 static RR* 291 dnresolve1(char *name, int class, int type, Request *req, int depth, 292 int recurse) 293 { 294 char *cp; 295 Area *area; 296 DN *dp, *nsdp; 297 RR *rp, *nsrp, *dbnsrp; 298 Query query; 299 300 if(debug) 301 dnslog("[%d] dnresolve1 %s %d %d", getpid(), name, type, class); 302 303 /* only class Cin implemented so far */ 304 if(class != Cin) 305 return nil; 306 307 dp = dnlookup(name, class, 1); 308 309 /* 310 * Try the cache first 311 */ 312 rp = rrlookup(dp, type, OKneg); 313 if(rp) 314 if(rp->db){ 315 /* unauthoritative db entries are hints */ 316 if(rp->auth) { 317 noteinmem(); 318 return rp; 319 } 320 } else 321 /* cached entry must still be valid */ 322 if(rp->ttl > now) 323 /* but Tall entries are special */ 324 if(type != Tall || rp->query == Tall) { 325 noteinmem(); 326 return rp; 327 } 328 329 rrfreelist(rp); 330 331 /* 332 * try the cache for a canonical name. if found punt 333 * since we'll find it during the canonical name search 334 * in dnresolve(). 335 */ 336 if(type != Tcname){ 337 rp = rrlookup(dp, Tcname, NOneg); 338 rrfreelist(rp); 339 if(rp) 340 return nil; 341 } 342 343 /* 344 * if the domain name is within an area of ours, 345 * we should have found its data in memory by now. 346 */ 347 area = inmyarea(dp->name); 348 if (area || strncmp(dp->name, "local#", 6) == 0) { 349 // char buf[32]; 350 351 // dnslog("%s %s: no data in area %s", dp->name, 352 // rrname(type, buf, sizeof buf), area->soarr->owner->name); 353 return nil; 354 } 355 356 queryinit(&query, dp, type, req); 357 358 /* 359 * if we're running as just a resolver, query our 360 * designated name servers 361 */ 362 if(cfg.resolver){ 363 nsrp = randomize(getdnsservers(class)); 364 if(nsrp != nil) { 365 query.nsrp = nsrp; 366 if(netquery(&query, depth+1)){ 367 rrfreelist(nsrp); 368 querydestroy(&query); 369 return rrlookup(dp, type, OKneg); 370 } 371 rrfreelist(nsrp); 372 } 373 } 374 375 /* 376 * walk up the domain name looking for 377 * a name server for the domain. 378 */ 379 for(cp = name; cp; cp = walkup(cp)){ 380 /* 381 * if this is a local (served by us) domain, 382 * return answer 383 */ 384 dbnsrp = randomize(dblookup(cp, class, Tns, 0, 0)); 385 if(dbnsrp && dbnsrp->local){ 386 rp = dblookup(name, class, type, 1, dbnsrp->ttl); 387 rrfreelist(dbnsrp); 388 querydestroy(&query); 389 return rp; 390 } 391 392 /* 393 * if recursion isn't set, just accept local 394 * entries 395 */ 396 if(recurse == Dontrecurse){ 397 if(dbnsrp) 398 rrfreelist(dbnsrp); 399 continue; 400 } 401 402 /* look for ns in cache */ 403 nsdp = dnlookup(cp, class, 0); 404 nsrp = nil; 405 if(nsdp) 406 nsrp = randomize(rrlookup(nsdp, Tns, NOneg)); 407 408 /* if the entry timed out, ignore it */ 409 if(nsrp && nsrp->ttl < now){ 410 rrfreelist(nsrp); 411 nsrp = nil; 412 } 413 414 if(nsrp){ 415 rrfreelist(dbnsrp); 416 417 /* query the name servers found in cache */ 418 query.nsrp = nsrp; 419 if(netquery(&query, depth+1)){ 420 rrfreelist(nsrp); 421 querydestroy(&query); 422 return rrlookup(dp, type, OKneg); 423 } 424 rrfreelist(nsrp); 425 continue; 426 } 427 428 /* use ns from db */ 429 if(dbnsrp){ 430 /* try the name servers found in db */ 431 query.nsrp = dbnsrp; 432 if(netquery(&query, depth+1)){ 433 /* we got an answer */ 434 rrfreelist(dbnsrp); 435 querydestroy(&query); 436 return rrlookup(dp, type, NOneg); 437 } 438 rrfreelist(dbnsrp); 439 } 440 } 441 querydestroy(&query); 442 443 /* settle for a non-authoritative answer */ 444 rp = rrlookup(dp, type, OKneg); 445 if(rp) 446 return rp; 447 448 /* noone answered. try the database, we might have a chance. */ 449 return dblookup(name, class, type, 0, 0); 450 } 451 452 /* 453 * walk a domain name one element to the right. 454 * return a pointer to that element. 455 * in other words, return a pointer to the parent domain name. 456 */ 457 char* 458 walkup(char *name) 459 { 460 char *cp; 461 462 cp = strchr(name, '.'); 463 if(cp) 464 return cp+1; 465 else if(*name) 466 return ""; 467 else 468 return 0; 469 } 470 471 /* 472 * Get a udp port for sending requests and reading replies. Put the port 473 * into "headers" mode. 474 */ 475 static char *hmsg = "headers"; 476 477 int 478 udpport(char *mtpt) 479 { 480 int fd, ctl; 481 char ds[64], adir[64]; 482 483 /* get a udp port */ 484 snprint(ds, sizeof ds, "%s/udp!*!0", (mtpt? mtpt: "/net")); 485 ctl = announce(ds, adir); 486 if(ctl < 0){ 487 /* warning("can't get udp port"); */ 488 return -1; 489 } 490 491 /* turn on header style interface */ 492 if(write(ctl, hmsg, strlen(hmsg)) , 0){ 493 close(ctl); 494 warning(hmsg); 495 return -1; 496 } 497 498 /* grab the data file */ 499 snprint(ds, sizeof ds, "%s/data", adir); 500 fd = open(ds, ORDWR); 501 close(ctl); 502 if(fd < 0) 503 warning("can't open udp port %s: %r", ds); 504 return fd; 505 } 506 507 /* generate a DNS UDP query packet */ 508 int 509 mkreq(DN *dp, int type, uchar *buf, int flags, ushort reqno) 510 { 511 DNSmsg m; 512 int len; 513 Udphdr *uh = (Udphdr*)buf; 514 515 /* stuff port number into output buffer */ 516 memset(uh, 0, sizeof *uh); 517 hnputs(uh->rport, 53); 518 519 /* make request and convert it to output format */ 520 memset(&m, 0, sizeof m); 521 m.flags = flags; 522 m.id = reqno; 523 m.qd = rralloc(type); 524 m.qd->owner = dp; 525 m.qd->type = type; 526 len = convDNS2M(&m, &buf[Udphdrsize], Maxudp); 527 rrfree(m.qd); 528 return len; 529 } 530 531 /* for alarms in readreply */ 532 static void 533 ding(void *x, char *msg) 534 { 535 USED(x); 536 if(strcmp(msg, "alarm") == 0) 537 noted(NCONT); 538 else 539 noted(NDFLT); 540 } 541 542 void 543 freeanswers(DNSmsg *mp) 544 { 545 rrfreelist(mp->qd); 546 rrfreelist(mp->an); 547 rrfreelist(mp->ns); 548 rrfreelist(mp->ar); 549 mp->qd = mp->an = mp->ns = mp->ar = nil; 550 } 551 552 /* sets srcip */ 553 static int 554 readnet(Query *qp, int medium, uchar *ibuf, ulong endtime, uchar **replyp, 555 uchar *srcip) 556 { 557 int len, fd; 558 long ms; 559 vlong startns = nsec(); 560 uchar *reply; 561 uchar lenbuf[2]; 562 563 /* timed read of reply */ 564 ms = S2MS(endtime) - NS2MS(startns); 565 if (ms < 2000) 566 ms = 2000; /* give the remote ns a fighting chance */ 567 reply = ibuf; 568 len = -1; /* pessimism */ 569 memset(srcip, 0, IPaddrlen); 570 if (medium == Udp) 571 if (qp->udpfd <= 0) 572 dnslog("readnet: qp->udpfd closed"); 573 else { 574 alarm(ms); 575 len = read(qp->udpfd, ibuf, Udphdrsize+Maxudpin); 576 alarm(0); 577 notestats(startns, len < 0, qp->type); 578 if (len >= IPaddrlen) 579 memmove(srcip, ibuf, IPaddrlen); 580 if (len >= Udphdrsize) { 581 len -= Udphdrsize; 582 reply += Udphdrsize; 583 } 584 } 585 else { 586 if (!qp->tcpset) 587 dnslog("readnet: tcp params not set"); 588 alarm(ms); 589 fd = qp->tcpfd; 590 if (fd <= 0) 591 dnslog("readnet: %s: tcp fd unset for dest %I", 592 qp->dp->name, qp->tcpip); 593 else if (readn(fd, lenbuf, 2) != 2) { 594 dnslog("readnet: short read of tcp size from %I", 595 qp->tcpip); 596 /* probably a time-out */ 597 notestats(startns, 1, qp->type); 598 } else { 599 len = lenbuf[0]<<8 | lenbuf[1]; 600 if (readn(fd, ibuf, len) != len) { 601 dnslog("readnet: short read of tcp data from %I", 602 qp->tcpip); 603 /* probably a time-out */ 604 notestats(startns, 1, qp->type); 605 len = -1; 606 } 607 } 608 alarm(0); 609 memmove(srcip, qp->tcpip, IPaddrlen); 610 } 611 *replyp = reply; 612 return len; 613 } 614 615 /* 616 * read replies to a request and remember the rrs in the answer(s). 617 * ignore any of the wrong type. 618 * wait at most until endtime. 619 */ 620 static int 621 readreply(Query *qp, int medium, ushort req, uchar *ibuf, DNSmsg *mp, 622 ulong endtime) 623 { 624 int len, rv; 625 char *err; 626 char tbuf[32]; 627 uchar *reply; 628 uchar srcip[IPaddrlen]; 629 RR *rp; 630 631 notify(ding); 632 633 queryck(qp); 634 rv = 0; 635 memset(mp, 0, sizeof *mp); 636 if (time(nil) >= endtime) 637 return -1; /* timed out before we started */ 638 639 memset(srcip, 0, sizeof srcip); 640 if (0) 641 len = -1; 642 for (; time(nil) < endtime && 643 (len = readnet(qp, medium, ibuf, endtime, &reply, srcip)) >= 0; 644 freeanswers(mp)){ 645 /* convert into internal format */ 646 memset(mp, 0, sizeof *mp); 647 err = convM2DNS(reply, len, mp, nil); 648 if (mp->flags & Ftrunc) { 649 free(err); 650 freeanswers(mp); 651 /* notify our caller to retry the query via tcp. */ 652 return -1; 653 } else if(err){ 654 dnslog("readreply: %s: input err, len %d: %s: %I", 655 qp->dp->name, len, err, srcip); 656 free(err); 657 continue; 658 } 659 if(debug) 660 logreply(qp->req->id, srcip, mp); 661 662 /* answering the right question? */ 663 if(mp->id != req) 664 dnslog("%d: id %d instead of %d: %I", qp->req->id, 665 mp->id, req, srcip); 666 else if(mp->qd == 0) 667 dnslog("%d: no question RR: %I", qp->req->id, srcip); 668 else if(mp->qd->owner != qp->dp) 669 dnslog("%d: owner %s instead of %s: %I", qp->req->id, 670 mp->qd->owner->name, qp->dp->name, srcip); 671 else if(mp->qd->type != qp->type) 672 dnslog("%d: qp->type %d instead of %d: %I", 673 qp->req->id, mp->qd->type, qp->type, srcip); 674 else { 675 /* remember what request this is in answer to */ 676 for(rp = mp->an; rp; rp = rp->next) 677 rp->query = qp->type; 678 return rv; 679 } 680 } 681 if (time(nil) >= endtime) { 682 ; /* query expired */ 683 } else if (0) { 684 /* this happens routinely when a read times out */ 685 dnslog("readreply: %s type %s: ns %I read error or eof " 686 "(returned %d): %r", qp->dp->name, rrname(qp->type, 687 tbuf, sizeof tbuf), srcip, len); 688 if (medium == Udp) 689 for (rp = qp->nsrp; rp != nil; rp = rp->next) 690 if (rp->type == Tns) 691 dnslog("readreply: %s: query sent to " 692 "ns %s", qp->dp->name, 693 rp->host->name); 694 } 695 return -1; 696 } 697 698 /* 699 * return non-0 if first list includes second list 700 */ 701 int 702 contains(RR *rp1, RR *rp2) 703 { 704 RR *trp1, *trp2; 705 706 for(trp2 = rp2; trp2; trp2 = trp2->next){ 707 for(trp1 = rp1; trp1; trp1 = trp1->next) 708 if(trp1->type == trp2->type) 709 if(trp1->host == trp2->host) 710 if(trp1->owner == trp2->owner) 711 break; 712 if(trp1 == nil) 713 return 0; 714 } 715 return 1; 716 } 717 718 719 /* 720 * return multicast version if any 721 */ 722 int 723 ipisbm(uchar *ip) 724 { 725 if(isv4(ip)){ 726 if (ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0 || 727 ipcmp(ip, IPv4bcast) == 0) 728 return 4; 729 } else 730 if(ip[0] == 0xff) 731 return 6; 732 return 0; 733 } 734 735 /* 736 * Get next server address 737 */ 738 static int 739 serveraddrs(Query *qp, int nd, int depth) 740 { 741 RR *rp, *arp, *trp; 742 Dest *cur; 743 744 if(nd >= Maxdest) 745 return 0; 746 747 /* 748 * look for a server whose address we already know. 749 * if we find one, mark it so we ignore this on 750 * subsequent passes. 751 */ 752 arp = 0; 753 for(rp = qp->nsrp; rp; rp = rp->next){ 754 assert(rp->magic == RRmagic); 755 if(rp->marker) 756 continue; 757 arp = rrlookup(rp->host, Ta, NOneg); 758 if(arp){ 759 rp->marker = 1; 760 break; 761 } 762 arp = dblookup(rp->host->name, Cin, Ta, 0, 0); 763 if(arp){ 764 rp->marker = 1; 765 break; 766 } 767 } 768 769 /* 770 * if the cache and database lookup didn't find any new 771 * server addresses, try resolving one via the network. 772 * Mark any we try to resolve so we don't try a second time. 773 */ 774 if(arp == 0) 775 for(rp = qp->nsrp; rp; rp = rp->next){ 776 if(rp->marker) 777 continue; 778 rp->marker = 1; 779 780 /* 781 * avoid loops looking up a server under itself 782 */ 783 if(subsume(rp->owner->name, rp->host->name)) 784 continue; 785 786 arp = dnresolve(rp->host->name, Cin, Ta, qp->req, 0, 787 depth+1, Recurse, 1, 0); 788 rrfreelist(rrremneg(&arp)); 789 if(arp) 790 break; 791 } 792 793 /* use any addresses that we found */ 794 for(trp = arp; trp && nd < Maxdest; trp = trp->next){ 795 cur = &qp->dest[nd]; 796 parseip(cur->a, trp->ip->name); 797 /* 798 * straddling servers can reject all nameservers if they are all 799 * inside, so be sure to list at least one outside ns at 800 * the end of the ns list in /lib/ndb for `dom='. 801 */ 802 if (ipisbm(cur->a) || 803 cfg.straddle && !insideaddr(qp->dp->name) && insidens(cur->a)) 804 continue; 805 cur->nx = 0; 806 cur->s = trp->owner; 807 cur->code = Rtimeout; 808 nd++; 809 } 810 rrfreelist(arp); 811 return nd; 812 } 813 814 /* 815 * cache negative responses 816 */ 817 static void 818 cacheneg(DN *dp, int type, int rcode, RR *soarr) 819 { 820 RR *rp; 821 DN *soaowner; 822 ulong ttl; 823 824 stats.negcached++; 825 826 /* no cache time specified, don't make anything up */ 827 if(soarr != nil){ 828 if(soarr->next != nil){ 829 rrfreelist(soarr->next); 830 soarr->next = nil; 831 } 832 soaowner = soarr->owner; 833 } else 834 soaowner = nil; 835 836 /* the attach can cause soarr to be freed so mine it now */ 837 if(soarr != nil && soarr->soa != nil) 838 ttl = soarr->soa->minttl+now; 839 else 840 ttl = 5*Min; 841 842 /* add soa and negative RR to the database */ 843 rrattach(soarr, Authoritative); 844 845 rp = rralloc(type); 846 rp->owner = dp; 847 rp->negative = 1; 848 rp->negsoaowner = soaowner; 849 rp->negrcode = rcode; 850 rp->ttl = ttl; 851 rrattach(rp, Authoritative); 852 } 853 854 static int 855 setdestoutns(Dest *p, int n) 856 { 857 uchar *outns = outsidens(n); 858 859 destck(p); 860 destinit(p); 861 if (outns == nil) { 862 if (n == 0) 863 dnslog("[%d] no outside-ns in ndb", getpid()); 864 return -1; 865 } 866 memmove(p->a, outns, sizeof p->a); 867 p->s = dnlookup("outside-ns-ips", Cin, 1); 868 return 0; 869 } 870 871 /* 872 * issue query via UDP or TCP as appropriate. 873 * for TCP, returns with qp->tcpip set from udppkt header. 874 */ 875 static int 876 mydnsquery(Query *qp, int medium, uchar *udppkt, int len) 877 { 878 int rv = -1, nfd; 879 char *domain; 880 char conndir[40]; 881 uchar belen[2]; 882 NetConnInfo *nci; 883 884 queryck(qp); 885 domain = smprint("%I", udppkt); 886 if (myaddr(domain)) { 887 dnslog("mydnsquery: trying to send to myself (%s); bzzzt", 888 domain); 889 free(domain); 890 return rv; 891 } 892 893 switch (medium) { 894 case Udp: 895 free(domain); 896 nfd = dup(qp->udpfd, -1); 897 if (nfd < 0) { 898 warning("mydnsquery: qp->udpfd %d: %r", qp->udpfd); 899 close(qp->udpfd); /* ensure it's closed */ 900 qp->udpfd = -1; /* poison it */ 901 return rv; 902 } 903 close(nfd); 904 905 if (qp->udpfd <= 0) 906 dnslog("mydnsquery: qp->udpfd %d closed", qp->udpfd); 907 else { 908 if (write(qp->udpfd, udppkt, len+Udphdrsize) != 909 len+Udphdrsize) 910 warning("sending udp msg: %r"); 911 else { 912 stats.qsent++; 913 rv = 0; 914 } 915 } 916 break; 917 case Tcp: 918 /* send via TCP & keep fd around for reply */ 919 alarm(10*1000); 920 qp->tcpfd = rv = dial(netmkaddr(domain, "tcp", "dns"), nil, 921 conndir, &qp->tcpctlfd); 922 alarm(0); 923 if (qp->tcpfd < 0) { 924 dnslog("can't dial tcp!%s!dns: %r", domain); 925 free(domain); 926 break; 927 } 928 free(domain); 929 nci = getnetconninfo(conndir, qp->tcpfd); 930 if (nci) { 931 parseip(qp->tcpip, nci->rsys); 932 freenetconninfo(nci); 933 } else 934 dnslog("mydnsquery: getnetconninfo failed"); 935 qp->tcpset = 1; 936 937 belen[0] = len >> 8; 938 belen[1] = len; 939 if (write(qp->tcpfd, belen, 2) != 2 || 940 write(qp->tcpfd, udppkt + Udphdrsize, len) != len) 941 warning("sending tcp msg: %r"); 942 break; 943 default: 944 sysfatal("mydnsquery: bad medium"); 945 } 946 return rv; 947 } 948 949 /* 950 * send query to all UDP destinations or one TCP destination, 951 * taken from obuf (udp packet) header 952 */ 953 static int 954 xmitquery(Query *qp, int medium, int depth, uchar *obuf, int inns, int len) 955 { 956 int j, n; 957 char buf[32]; 958 Dest *p; 959 960 queryck(qp); 961 if(time(nil) >= qp->req->aborttime) 962 return -1; 963 964 /* 965 * get a nameserver address if we need one. 966 * serveraddrs populates qp->dest. 967 */ 968 p = qp->dest; 969 destck(p); 970 if (qp->ndest < 0 || qp->ndest > Maxdest) 971 dnslog("qp->ndest %d out of range", qp->ndest); 972 if (qp->ndest > qp->curdest - p) 973 qp->curdest = &qp->dest[serveraddrs(qp, qp->curdest - p, depth)]; 974 destck(qp->curdest); 975 976 /* no servers, punt */ 977 if (qp->curdest == qp->dest) 978 if (cfg.straddle && cfg.inside) { 979 /* get ips of "outside-ns-ips" */ 980 p = qp->curdest = qp->dest; 981 for(n = 0; n < Maxdest; n++, qp->curdest++) 982 if (setdestoutns(qp->curdest, n) < 0) 983 break; 984 } else { 985 /* it's probably just a bogus domain, don't log it */ 986 // dnslog("xmitquery: %s: no nameservers", qp->dp->name); 987 return -1; 988 } 989 990 /* send to first 'qp->ndest' destinations */ 991 j = 0; 992 if (medium == Tcp) { 993 j++; 994 queryck(qp); 995 assert(qp->dp); 996 procsetname("tcp %sside query for %s %s", (inns? "in": "out"), 997 qp->dp->name, rrname(qp->type, buf, sizeof buf)); 998 mydnsquery(qp, medium, obuf, len); /* sets qp->tcpip from obuf */ 999 if(debug) 1000 logsend(qp->req->id, depth, qp->tcpip, "", qp->dp->name, 1001 qp->type); 1002 } else 1003 for(; p < &qp->dest[qp->ndest] && p < qp->curdest; p++){ 1004 /* skip destinations we've finished with */ 1005 if(p->nx >= Maxtrans) 1006 continue; 1007 1008 j++; 1009 1010 /* exponential backoff of requests */ 1011 if((1<<p->nx) > qp->ndest) 1012 continue; 1013 1014 procsetname("udp %sside query to %I/%s %s %s", 1015 (inns? "in": "out"), p->a, p->s->name, 1016 qp->dp->name, rrname(qp->type, buf, sizeof buf)); 1017 if(debug) 1018 logsend(qp->req->id, depth, p->a, p->s->name, 1019 qp->dp->name, qp->type); 1020 1021 /* fill in UDP destination addr & send it */ 1022 memmove(obuf, p->a, sizeof p->a); 1023 mydnsquery(qp, medium, obuf, len); 1024 p->nx++; 1025 } 1026 if(j == 0) { 1027 // dnslog("xmitquery: %s: no destinations left", qp->dp->name); 1028 return -1; 1029 } 1030 return 0; 1031 } 1032 1033 static int lckindex[Maxlcks] = { 1034 0, /* all others map here */ 1035 Ta, 1036 Tns, 1037 Tcname, 1038 Tsoa, 1039 Tptr, 1040 Tmx, 1041 Ttxt, 1042 Taaaa, 1043 }; 1044 1045 static int 1046 qtype2lck(int qtype) /* map query type to querylck index */ 1047 { 1048 int i; 1049 1050 for (i = 1; i < nelem(lckindex); i++) 1051 if (lckindex[i] == qtype) 1052 return i; 1053 return 0; 1054 } 1055 1056 /* is mp a cachable negative response (with Rname set)? */ 1057 static int 1058 isnegrname(DNSmsg *mp) 1059 { 1060 /* TODO: could add || cfg.justforw to RHS of && */ 1061 return mp->an == nil && (mp->flags & Rmask) == Rname; 1062 } 1063 1064 static int 1065 procansw(Query *qp, DNSmsg *mp, uchar *srcip, int depth, Dest *p) 1066 { 1067 int rv; 1068 // int lcktype; 1069 char buf[32]; 1070 DN *ndp; 1071 Query nquery; 1072 RR *tp, *soarr; 1073 1074 if (mp->an == nil) 1075 stats.negans++; 1076 1077 /* ignore any error replies */ 1078 if((mp->flags & Rmask) == Rserver){ 1079 stats.negserver++; 1080 freeanswers(mp); 1081 if(p != qp->curdest) 1082 p->code = Rserver; 1083 return -1; 1084 } 1085 1086 /* ignore any bad delegations */ 1087 if(mp->ns && baddelegation(mp->ns, qp->nsrp, srcip)){ 1088 stats.negbaddeleg++; 1089 if(mp->an == nil){ 1090 stats.negbdnoans++; 1091 freeanswers(mp); 1092 if(p != qp->curdest) 1093 p->code = Rserver; 1094 return -1; 1095 } 1096 rrfreelist(mp->ns); 1097 mp->ns = nil; 1098 } 1099 1100 /* remove any soa's from the authority section */ 1101 soarr = rrremtype(&mp->ns, Tsoa); 1102 1103 /* incorporate answers */ 1104 unique(mp->an); 1105 unique(mp->ns); 1106 unique(mp->ar); 1107 if(mp->an) 1108 rrattach(mp->an, (mp->flags & Fauth) != 0); 1109 if(mp->ar) 1110 rrattach(mp->ar, Notauthoritative); 1111 if(mp->ns && !cfg.justforw){ 1112 ndp = mp->ns->owner; 1113 rrattach(mp->ns, Notauthoritative); 1114 } else { 1115 ndp = nil; 1116 rrfreelist(mp->ns); 1117 mp->ns = nil; 1118 } 1119 1120 /* free the question */ 1121 if(mp->qd) { 1122 rrfreelist(mp->qd); 1123 mp->qd = nil; 1124 } 1125 1126 /* 1127 * Any reply from an authoritative server, 1128 * or a positive reply terminates the search. 1129 * A negative response now also terminates the search. 1130 */ 1131 if(mp->an != nil || (mp->flags & Fauth)){ 1132 if(isnegrname(mp)) 1133 qp->dp->respcode = Rname; 1134 else 1135 qp->dp->respcode = 0; 1136 1137 /* 1138 * cache any negative responses, free soarr. 1139 * negative responses need not be authoritative: 1140 * they can legitimately come from a cache. 1141 */ 1142 if( /* (mp->flags & Fauth) && */ mp->an == nil) 1143 cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr); 1144 else 1145 rrfreelist(soarr); 1146 return 1; 1147 } else if (isnegrname(mp)) { 1148 qp->dp->respcode = Rname; 1149 /* 1150 * cache negative response. 1151 * negative responses need not be authoritative: 1152 * they can legitimately come from a cache. 1153 */ 1154 cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr); 1155 return 1; 1156 } 1157 stats.negnorname++; 1158 rrfreelist(soarr); 1159 1160 /* 1161 * if we've been given better name servers, recurse. 1162 * if we're a pure resolver, don't recurse, we have 1163 * to forward to a fixed set of named servers. 1164 */ 1165 if(!mp->ns || cfg.resolver && cfg.justforw) 1166 return 0; 1167 tp = rrlookup(ndp, Tns, NOneg); 1168 if(contains(qp->nsrp, tp)){ 1169 rrfreelist(tp); 1170 return 0; 1171 } 1172 procsetname("recursive query for %s %s", qp->dp->name, 1173 rrname(qp->type, buf, sizeof buf)); 1174 /* 1175 * we're called from udpquery, called from 1176 * netquery, which current holds qp->dp->querylck, 1177 * so release it now and acquire it upon return. 1178 */ 1179 // lcktype = qtype2lck(qp->type); 1180 // qunlock(&qp->dp->querylck[lcktype]); 1181 1182 queryinit(&nquery, qp->dp, qp->type, qp->req); 1183 nquery.nsrp = tp; 1184 rv = netquery(&nquery, depth+1); 1185 1186 // qlock(&qp->dp->querylck[lcktype]); 1187 rrfreelist(tp); 1188 querydestroy(&nquery); 1189 return rv; 1190 } 1191 1192 /* 1193 * send a query via tcp to a single address (from ibuf's udp header) 1194 * and read the answer(s) into mp->an. 1195 */ 1196 static int 1197 tcpquery(Query *qp, DNSmsg *mp, int depth, uchar *ibuf, uchar *obuf, int len, 1198 int waitsecs, int inns, ushort req) 1199 { 1200 int rv = 0; 1201 ulong endtime; 1202 1203 endtime = time(nil) + waitsecs; 1204 if(endtime > qp->req->aborttime) 1205 endtime = qp->req->aborttime; 1206 1207 if (0) 1208 dnslog("%s: udp reply truncated; retrying query via tcp to %I", 1209 qp->dp->name, qp->tcpip); 1210 1211 qlock(&qp->tcplock); 1212 memmove(obuf, ibuf, IPaddrlen); /* send back to respondent */ 1213 /* sets qp->tcpip from obuf's udp header */ 1214 if (xmitquery(qp, Tcp, depth, obuf, inns, len) < 0 || 1215 readreply(qp, Tcp, req, ibuf, mp, endtime) < 0) 1216 rv = -1; 1217 if (qp->tcpfd > 0) { 1218 hangup(qp->tcpctlfd); 1219 close(qp->tcpctlfd); 1220 close(qp->tcpfd); 1221 } 1222 qp->tcpfd = qp->tcpctlfd = -1; 1223 qunlock(&qp->tcplock); 1224 return rv; 1225 } 1226 1227 /* 1228 * query name servers. If the name server returns a pointer to another 1229 * name server, recurse. 1230 */ 1231 static int 1232 queryns(Query *qp, int depth, uchar *ibuf, uchar *obuf, int waitsecs, int inns) 1233 { 1234 int ndest, len, replywaits, rv; 1235 ushort req; 1236 ulong endtime; 1237 char buf[12]; 1238 uchar srcip[IPaddrlen]; 1239 Dest *p, *np, *dest; 1240 // Dest dest[Maxdest]; 1241 1242 /* pack request into a udp message */ 1243 req = rand(); 1244 len = mkreq(qp->dp, qp->type, obuf, Frecurse|Oquery, req); 1245 1246 /* no server addresses yet */ 1247 queryck(qp); 1248 dest = emalloc(Maxdest * sizeof *dest); /* dest can't be on stack */ 1249 for (p = dest; p < dest + Maxdest; p++) 1250 destinit(p); 1251 qp->curdest = qp->dest = dest; 1252 1253 /* 1254 * transmit udp requests and wait for answers. 1255 * at most Maxtrans attempts to each address. 1256 * each cycle send one more message than the previous. 1257 * retry a query via tcp if its response is truncated. 1258 */ 1259 for(ndest = 1; ndest < Maxdest; ndest++){ 1260 qp->ndest = ndest; 1261 qp->tcpset = 0; 1262 if (xmitquery(qp, Udp, depth, obuf, inns, len) < 0) 1263 break; 1264 1265 endtime = time(nil) + waitsecs; 1266 if(endtime > qp->req->aborttime) 1267 endtime = qp->req->aborttime; 1268 1269 for(replywaits = 0; replywaits < ndest; replywaits++){ 1270 DNSmsg m; 1271 1272 procsetname("reading %sside reply from %I: %s %s from %s", 1273 (inns? "in": "out"), obuf, qp->dp->name, 1274 rrname(qp->type, buf, sizeof buf), qp->req->from); 1275 1276 /* read udp answer into m */ 1277 if (readreply(qp, Udp, req, ibuf, &m, endtime) >= 0) 1278 memmove(srcip, ibuf, IPaddrlen); 1279 else if (!(m.flags & Ftrunc)) { 1280 freeanswers(&m); 1281 break; /* timed out on this dest */ 1282 } else { 1283 /* whoops, it was truncated! ask again via tcp */ 1284 rv = tcpquery(qp, &m, depth, ibuf, obuf, len, 1285 waitsecs, inns, req); /* answer in m */ 1286 if (rv < 0) { 1287 freeanswers(&m); 1288 break; /* failed via tcp too */ 1289 } 1290 memmove(srcip, qp->tcpip, IPaddrlen); 1291 } 1292 1293 /* find responder */ 1294 // dnslog("queryns got reply from %I", srcip); 1295 for(p = qp->dest; p < qp->curdest; p++) 1296 if(memcmp(p->a, srcip, sizeof p->a) == 0) 1297 break; 1298 1299 /* remove all addrs of responding server from list */ 1300 for(np = qp->dest; np < qp->curdest; np++) 1301 if(np->s == p->s) 1302 p->nx = Maxtrans; 1303 1304 /* free or incorporate RRs in m */ 1305 rv = procansw(qp, &m, srcip, depth, p); 1306 if (rv > 0) 1307 return rv; 1308 } 1309 } 1310 1311 /* if all servers returned failure, propagate it */ 1312 qp->dp->respcode = Rserver; 1313 for(p = dest; p < qp->curdest; p++) { 1314 destck(p); 1315 if(p->code != Rserver) 1316 qp->dp->respcode = 0; 1317 p->magic = 0; /* prevent accidents */ 1318 } 1319 1320 // if (qp->dp->respcode) 1321 // dnslog("queryns setting Rserver for %s", qp->dp->name); 1322 1323 free(qp->dest); 1324 qp->dest = qp->curdest = nil; /* prevent accidents */ 1325 return 0; 1326 } 1327 1328 /* 1329 * run a command with a supplied fd as standard input 1330 */ 1331 char * 1332 system(int fd, char *cmd) 1333 { 1334 int pid, p, i; 1335 static Waitmsg msg; 1336 1337 if((pid = fork()) == -1) 1338 sysfatal("fork failed: %r"); 1339 else if(pid == 0){ 1340 dup(fd, 0); 1341 close(fd); 1342 for (i = 3; i < 200; i++) 1343 close(i); /* don't leak fds */ 1344 execl("/bin/rc", "rc", "-c", cmd, nil); 1345 sysfatal("exec rc: %r"); 1346 } 1347 for(p = waitpid(); p >= 0; p = waitpid()) 1348 if(p == pid) 1349 return msg.msg; 1350 return "lost child"; 1351 } 1352 1353 /* compute wait, weighted by probability of success, with minimum */ 1354 static ulong 1355 weight(ulong ms, unsigned pcntprob) 1356 { 1357 ulong wait; 1358 1359 wait = (ms * pcntprob) / 100; 1360 if (wait < 1500) 1361 wait = 1500; 1362 return wait; 1363 } 1364 1365 /* 1366 * in principle we could use a single descriptor for a udp port 1367 * to send all queries and receive all the answers to them, 1368 * but we'd have to sort out the answers by dns-query id. 1369 */ 1370 static int 1371 udpquery(Query *qp, char *mntpt, int depth, int patient, int inns) 1372 { 1373 int fd, rv; 1374 long now; 1375 ulong pcntprob, wait, reqtm; 1376 char *msg; 1377 uchar *obuf, *ibuf; 1378 static QLock mntlck; 1379 static ulong lastmount; 1380 1381 /* use alloced buffers rather than ones from the stack */ 1382 // ibuf = emalloc(Maxudpin+Udphdrsize); 1383 ibuf = emalloc(64*1024); /* max. tcp reply size */ 1384 obuf = emalloc(Maxudp+Udphdrsize); 1385 1386 fd = udpport(mntpt); 1387 while (fd < 0 && cfg.straddle && strcmp(mntpt, "/net.alt") == 0) { 1388 /* HACK: remount /net.alt */ 1389 now = time(nil); 1390 if (now < lastmount + Remntretry) 1391 sleep((lastmount + Remntretry - now)*1000); 1392 qlock(&mntlck); 1393 fd = udpport(mntpt); /* try again under lock */ 1394 if (fd < 0) { 1395 dnslog("[%d] remounting /net.alt", getpid()); 1396 unmount(nil, "/net.alt"); 1397 1398 msg = system(open("/dev/null", ORDWR), "outside"); 1399 1400 lastmount = time(nil); 1401 if (msg && *msg) { 1402 dnslog("[%d] can't remount /net.alt: %s", 1403 getpid(), msg); 1404 sleep(10*1000); /* don't spin wildly */ 1405 } else 1406 fd = udpport(mntpt); 1407 } 1408 qunlock(&mntlck); 1409 } 1410 if (fd < 0) { 1411 dnslog("can't get udpport for %s query of name %s: %r", 1412 mntpt, qp->dp->name); 1413 sysfatal("out of udp conversations"); /* we're buggered */ 1414 } 1415 1416 /* 1417 * Our QIP servers are busted, don't answer AAAA and 1418 * take forever to answer CNAME if there isn't one. 1419 * They rarely set Rname. 1420 * make time-to-wait proportional to estimated probability of an 1421 * RR of that type existing. 1422 */ 1423 if (qp->type < 0 || qp->type >= nelem(likely)) 1424 pcntprob = 35; /* unpopular query type */ 1425 else 1426 pcntprob = likely[qp->type]; 1427 reqtm = (patient? 2*Maxreqtm: Maxreqtm); 1428 /* time for a single outgoing udp query */ 1429 wait = weight(S2MS(reqtm)/3, pcntprob); 1430 qp->req->aborttime = time(nil) + MS2S(3*wait); /* for all udp queries */ 1431 1432 qp->udpfd = fd; 1433 rv = queryns(qp, depth, ibuf, obuf, MS2S(wait), inns); 1434 close(fd); 1435 qp->udpfd = -1; 1436 1437 free(obuf); 1438 free(ibuf); 1439 return rv; 1440 } 1441 1442 /* look up (qp->dp->name,qp->type) rr in dns, via *nsrp with results in *reqp */ 1443 static int 1444 netquery(Query *qp, int depth) 1445 { 1446 int lock, rv, triedin, inname, lcktype; 1447 char buf[32]; 1448 RR *rp; 1449 DN *dp; 1450 1451 if(depth > 12) /* in a recursive loop? */ 1452 return 0; 1453 1454 slave(qp->req); 1455 /* 1456 * slave might have forked. if so, the parent process longjmped to 1457 * req->mret; we're usually the child slave, but if there are too 1458 * many children already, we're still the same process. 1459 */ 1460 1461 /* 1462 * don't lock before call to slave so only children can block. 1463 * just lock at top-level invocation. 1464 */ 1465 lock = depth <= 1 && qp->req->isslave != 0; 1466 dp = qp->dp; /* ensure that it doesn't change underfoot */ 1467 if(lock) { 1468 procsetname("query lock wait: %s %s from %s", dp->name, 1469 rrname(qp->type, buf, sizeof buf), qp->req->from); 1470 /* 1471 * don't make concurrent queries for this name. 1472 * dozens of processes blocking here probably indicates 1473 * an error in our dns data that causes us to not 1474 * recognise a zone (area) as one of our own, thus 1475 * causing us to query other nameservers. 1476 */ 1477 lcktype = qtype2lck(qp->type); 1478 qlock(&dp->querylck[lcktype]); 1479 } else 1480 lcktype = 0; 1481 procsetname("netquery: %s", dp->name); 1482 1483 /* prepare server RR's for incremental lookup */ 1484 for(rp = qp->nsrp; rp; rp = rp->next) 1485 rp->marker = 0; 1486 1487 rv = 0; /* pessimism */ 1488 triedin = 0; 1489 1490 /* 1491 * normal resolvers and servers will just use mntpt for all addresses, 1492 * even on the outside. straddling servers will use mntpt (/net) 1493 * for inside addresses and /net.alt for outside addresses, 1494 * thus bypassing other inside nameservers. 1495 */ 1496 inname = insideaddr(dp->name); 1497 if (!cfg.straddle || inname) { 1498 rv = udpquery(qp, mntpt, depth, Hurry, (cfg.inside? Inns: Outns)); 1499 triedin = 1; 1500 } 1501 1502 /* 1503 * if we're still looking, are inside, and have an outside domain, 1504 * try it on our outside interface, if any. 1505 */ 1506 if (rv == 0 && cfg.inside && !inname) { 1507 if (triedin) 1508 dnslog( 1509 "[%d] netquery: internal nameservers failed for %s; trying external", 1510 getpid(), dp->name); 1511 1512 /* prepare server RR's for incremental lookup */ 1513 for(rp = qp->nsrp; rp; rp = rp->next) 1514 rp->marker = 0; 1515 1516 rv = udpquery(qp, "/net.alt", depth, Patient, Outns); 1517 } 1518 // if (rv == 0) /* could ask /net.alt/dns directly */ 1519 // askoutdns(dp, qp->type); 1520 1521 if(lock) 1522 qunlock(&dp->querylck[lcktype]); 1523 return rv; 1524 } 1525 1526 int 1527 seerootns(void) 1528 { 1529 int rv; 1530 char root[] = ""; 1531 Request req; 1532 Query query; 1533 1534 memset(&req, 0, sizeof req); 1535 req.isslave = 1; 1536 req.aborttime = now + Maxreqtm; 1537 req.from = "internal"; 1538 queryinit(&query, dnlookup(root, Cin, 1), Tns, &req); 1539 query.nsrp = dblookup(root, Cin, Tns, 0, 0); 1540 rv = netquery(&query, 0); 1541 rrfreelist(query.nsrp); 1542 querydestroy(&query); 1543 return rv; 1544 } 1545