1 /*
2 * domain name resolvers, see rfcs 1035 and 1123
3 */
4 #include <u.h>
5 #include <libc.h>
6 #include <ip.h>
7 #include <bio.h>
8 #include <ndb.h>
9 #include "dns.h"
10
11 typedef struct Dest Dest;
12 typedef struct Ipaddr Ipaddr;
13 typedef struct Query Query;
14
15 enum
16 {
17 Udp, Tcp,
18
19 Answerr= -1,
20 Answnone,
21
22 Maxdest= 24, /* maximum destinations for a request message */
23 Maxoutstanding= 15, /* max. outstanding queries per domain name */
24 Remntretry= 15, /* min. sec.s between /net.alt remount tries */
25
26 /*
27 * these are the old values; we're trying longer timeouts now
28 * primarily for the benefit of remote nameservers querying us
29 * during times of bad connectivity.
30 */
31 // Maxtrans= 3, /* maximum transmissions to a server */
32 // Maxretries= 3, /* cname+actual resends: was 32; have pity on user */
33 // Maxwaitms= 1000, /* wait no longer for a remote dns query */
34 // Minwaitms= 100, /* willing to wait for a remote dns query */
35
36 Maxtrans= 5, /* maximum transmissions to a server */
37 Maxretries= 5, /* cname+actual resends: was 32; have pity on user */
38 Maxwaitms= 5000, /* wait no longer for a remote dns query */
39 Minwaitms= 500, /* willing to wait for a remote dns query */
40
41 Destmagic= 0xcafebabe,
42 Querymagic= 0xdeadbeef,
43 };
44 enum { Hurry, Patient, };
45 enum { Outns, Inns, };
46
47 struct Ipaddr {
48 Ipaddr *next;
49 uchar ip[IPaddrlen];
50 };
51
52 struct Dest
53 {
54 uchar a[IPaddrlen]; /* ip address */
55 DN *s; /* name server */
56 int nx; /* number of transmissions */
57 int code; /* response code; used to clear dp->respcode */
58
59 ulong magic;
60 };
61
62 /*
63 * Query has a QLock in it, thus it can't be an automatic
64 * variable, since each process would see a separate copy
65 * of the lock on its stack.
66 */
67 struct Query {
68 DN *dp; /* domain */
69 ushort type; /* and type to look up */
70 Request *req;
71 RR *nsrp; /* name servers to consult */
72
73 /* dest must not be on the stack due to forking in slave() */
74 Dest *dest; /* array of destinations */
75 Dest *curdest; /* pointer to next to fill */
76 int ndest; /* transmit to this many on this round */
77
78 int udpfd;
79
80 QLock tcplock; /* only one tcp call at a time per query */
81 int tcpset;
82 int tcpfd; /* if Tcp, read replies from here */
83 int tcpctlfd;
84 uchar tcpip[IPaddrlen];
85
86 ulong magic;
87 };
88
89 /* estimated % probability of such a record existing at all */
90 int likely[] = {
91 [Ta] 95,
92 [Taaaa] 10,
93 [Tcname] 15,
94 [Tmx] 60,
95 [Tns] 90,
96 [Tnull] 5,
97 [Tptr] 35,
98 [Tsoa] 90,
99 [Tsrv] 60,
100 [Ttxt] 15,
101 [Tall] 95,
102 };
103
104 static RR* dnresolve1(char*, int, int, Request*, int, int);
105 static int netquery(Query *, int);
106
107 /*
108 * reading /proc/pid/args yields either "name args" or "name [display args]",
109 * so return only display args, if any.
110 */
111 static char *
procgetname(void)112 procgetname(void)
113 {
114 int fd, n;
115 char *lp, *rp;
116 char buf[256];
117
118 snprint(buf, sizeof buf, "#p/%d/args", getpid());
119 if((fd = open(buf, OREAD)) < 0)
120 return strdup("");
121 *buf = '\0';
122 n = read(fd, buf, sizeof buf-1);
123 close(fd);
124 if (n >= 0)
125 buf[n] = '\0';
126 if ((lp = strchr(buf, '[')) == nil ||
127 (rp = strrchr(buf, ']')) == nil)
128 return strdup("");
129 *rp = '\0';
130 return strdup(lp+1);
131 }
132
133 void
rrfreelistptr(RR ** rpp)134 rrfreelistptr(RR **rpp)
135 {
136 RR *rp;
137
138 if (rpp == nil || *rpp == nil)
139 return;
140 rp = *rpp;
141 *rpp = nil; /* update pointer in memory before freeing list */
142 rrfreelist(rp);
143 }
144
145 /*
146 * lookup 'type' info for domain name 'name'. If it doesn't exist, try
147 * looking it up as a canonical name.
148 *
149 * this process can be quite slow if time-outs are set too high when querying
150 * nameservers that just don't respond to certain query types. in that case,
151 * there will be multiple udp retries, multiple nameservers will be queried,
152 * and this will be repeated for a cname query. the whole thing will be
153 * retried several times until we get an answer or a time-out.
154 */
155 RR*
dnresolve(char * name,int class,int type,Request * req,RR ** cn,int depth,int recurse,int rooted,int * status)156 dnresolve(char *name, int class, int type, Request *req, RR **cn, int depth,
157 int recurse, int rooted, int *status)
158 {
159 RR *rp, *nrp, *drp;
160 DN *dp;
161 int loops;
162 char *procname;
163 char nname[Domlen];
164
165 if(status)
166 *status = 0;
167
168 if(depth > 12) /* in a recursive loop? */
169 return nil;
170
171 procname = procgetname();
172 /*
173 * hack for systems that don't have resolve search
174 * lists. Just look up the simple name in the database.
175 */
176 if(!rooted && strchr(name, '.') == nil){
177 rp = nil;
178 drp = domainlist(class);
179 for(nrp = drp; rp == nil && nrp != nil; nrp = nrp->next){
180 snprint(nname, sizeof nname, "%s.%s", name,
181 nrp->ptr->name);
182 rp = dnresolve(nname, class, type, req, cn, depth+1,
183 recurse, rooted, status);
184 lock(&dnlock);
185 rrfreelist(rrremneg(&rp));
186 unlock(&dnlock);
187 }
188 if(drp != nil)
189 rrfreelist(drp);
190 procsetname(procname);
191 free(procname);
192 return rp;
193 }
194
195 /*
196 * try the name directly
197 */
198 rp = dnresolve1(name, class, type, req, depth, recurse);
199 if(rp == nil) {
200 /*
201 * try it as a canonical name if we weren't told
202 * that the name didn't exist
203 */
204 dp = dnlookup(name, class, 0);
205 if(type != Tptr && dp->respcode != Rname)
206 for(loops = 0; rp == nil && loops < Maxretries; loops++){
207 /* retry cname, then the actual type */
208 rp = dnresolve1(name, class, Tcname, req,
209 depth, recurse);
210 if(rp == nil)
211 break;
212
213 /* rp->host == nil shouldn't happen, but does */
214 if(rp->negative || rp->host == nil){
215 rrfreelist(rp);
216 rp = nil;
217 break;
218 }
219
220 name = rp->host->name;
221 lock(&dnlock);
222 if(cn)
223 rrcat(cn, rp);
224 else
225 rrfreelist(rp);
226 unlock(&dnlock);
227
228 rp = dnresolve1(name, class, type, req,
229 depth, recurse);
230 }
231
232 /* distinction between not found and not good */
233 if(rp == nil && status != nil && dp->respcode != Rok)
234 *status = dp->respcode;
235 }
236 procsetname(procname);
237 free(procname);
238 return randomize(rp);
239 }
240
241 static void
queryinit(Query * qp,DN * dp,int type,Request * req)242 queryinit(Query *qp, DN *dp, int type, Request *req)
243 {
244 memset(qp, 0, sizeof *qp);
245 qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
246 qp->dp = dp;
247 qp->type = type;
248 if (qp->type != type)
249 dnslog("queryinit: bogus type %d", type);
250 qp->req = req;
251 qp->nsrp = nil;
252 qp->dest = qp->curdest = nil;
253 qp->magic = Querymagic;
254 }
255
256 static void
queryck(Query * qp)257 queryck(Query *qp)
258 {
259 assert(qp);
260 assert(qp->magic == Querymagic);
261 }
262
263 static void
querydestroy(Query * qp)264 querydestroy(Query *qp)
265 {
266 queryck(qp);
267 /* leave udpfd open */
268 if (qp->tcpfd > 0)
269 close(qp->tcpfd);
270 if (qp->tcpctlfd > 0) {
271 hangup(qp->tcpctlfd);
272 close(qp->tcpctlfd);
273 }
274 free(qp->dest);
275 memset(qp, 0, sizeof *qp); /* prevent accidents */
276 qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
277 }
278
279 static void
destinit(Dest * p)280 destinit(Dest *p)
281 {
282 memset(p, 0, sizeof *p);
283 p->magic = Destmagic;
284 }
285
286 static void
destck(Dest * p)287 destck(Dest *p)
288 {
289 assert(p);
290 assert(p->magic == Destmagic);
291 }
292
293 /*
294 * if the response to a query hasn't arrived within 100 ms.,
295 * it's unlikely to arrive at all. after 1 s., it's really unlikely.
296 * queries for missing RRs are likely to produce time-outs rather than
297 * negative responses, so cname and aaaa queries are likely to time out,
298 * thus we don't wait very long for them.
299 */
300 static void
notestats(vlong start,int tmout,int type)301 notestats(vlong start, int tmout, int type)
302 {
303 qlock(&stats);
304 if (tmout) {
305 stats.tmout++;
306 if (type == Taaaa)
307 stats.tmoutv6++;
308 else if (type == Tcname)
309 stats.tmoutcname++;
310 } else {
311 long wait10ths = NS2MS(nsec() - start) / 100;
312
313 if (wait10ths <= 0)
314 stats.under10ths[0]++;
315 else if (wait10ths >= nelem(stats.under10ths))
316 stats.under10ths[nelem(stats.under10ths) - 1]++;
317 else
318 stats.under10ths[wait10ths]++;
319 }
320 qunlock(&stats);
321 }
322
323 static void
noteinmem(void)324 noteinmem(void)
325 {
326 qlock(&stats);
327 stats.answinmem++;
328 qunlock(&stats);
329 }
330
331 /* netquery with given name servers, free ns rrs when done */
332 static int
netqueryns(Query * qp,int depth,RR * nsrp)333 netqueryns(Query *qp, int depth, RR *nsrp)
334 {
335 int rv;
336
337 qp->nsrp = nsrp;
338 rv = netquery(qp, depth);
339 lock(&dnlock);
340 rrfreelist(nsrp);
341 unlock(&dnlock);
342 return rv;
343 }
344
345 static RR*
issuequery(Query * qp,char * name,int class,int depth,int recurse)346 issuequery(Query *qp, char *name, int class, int depth, int recurse)
347 {
348 char *cp;
349 DN *nsdp;
350 RR *rp, *nsrp, *dbnsrp;
351
352 /*
353 * if we're running as just a resolver, query our
354 * designated name servers
355 */
356 if(cfg.resolver){
357 nsrp = randomize(getdnsservers(class));
358 if(nsrp != nil)
359 if(netqueryns(qp, depth+1, nsrp) > Answnone)
360 return rrlookup(qp->dp, qp->type, OKneg);
361 }
362
363 /*
364 * walk up the domain name looking for
365 * a name server for the domain.
366 */
367 for(cp = name; cp; cp = walkup(cp)){
368 /*
369 * if this is a local (served by us) domain,
370 * return answer
371 */
372 dbnsrp = randomize(dblookup(cp, class, Tns, 0, 0));
373 if(dbnsrp && dbnsrp->local){
374 rp = dblookup(name, class, qp->type, 1, dbnsrp->ttl);
375 lock(&dnlock);
376 rrfreelist(dbnsrp);
377 unlock(&dnlock);
378 return rp;
379 }
380
381 /*
382 * if recursion isn't set, just accept local
383 * entries
384 */
385 if(recurse == Dontrecurse){
386 if(dbnsrp) {
387 lock(&dnlock);
388 rrfreelist(dbnsrp);
389 unlock(&dnlock);
390 }
391 continue;
392 }
393
394 /* look for ns in cache */
395 nsdp = dnlookup(cp, class, 0);
396 nsrp = nil;
397 if(nsdp)
398 nsrp = randomize(rrlookup(nsdp, Tns, NOneg));
399
400 /* if the entry timed out, ignore it */
401 if(nsrp && nsrp->ttl < now){
402 lock(&dnlock);
403 rrfreelistptr(&nsrp);
404 unlock(&dnlock);
405 }
406
407 if(nsrp){
408 lock(&dnlock);
409 rrfreelistptr(&dbnsrp);
410 unlock(&dnlock);
411
412 /* query the name servers found in cache */
413 if(netqueryns(qp, depth+1, nsrp) > Answnone)
414 return rrlookup(qp->dp, qp->type, OKneg);
415 } else if(dbnsrp)
416 /* try the name servers found in db */
417 if(netqueryns(qp, depth+1, dbnsrp) > Answnone)
418 return rrlookup(qp->dp, qp->type, NOneg);
419 }
420 return nil;
421 }
422
423 static RR*
dnresolve1(char * name,int class,int type,Request * req,int depth,int recurse)424 dnresolve1(char *name, int class, int type, Request *req, int depth,
425 int recurse)
426 {
427 Area *area;
428 DN *dp;
429 RR *rp;
430 Query *qp;
431
432 if(debug)
433 dnslog("[%d] dnresolve1 %s %d %d", getpid(), name, type, class);
434
435 /* only class Cin implemented so far */
436 if(class != Cin)
437 return nil;
438
439 dp = dnlookup(name, class, 1);
440
441 /*
442 * Try the cache first
443 */
444 rp = rrlookup(dp, type, OKneg);
445 if(rp)
446 if(rp->db){
447 /* unauthoritative db entries are hints */
448 if(rp->auth) {
449 noteinmem();
450 if(debug)
451 dnslog("[%d] dnresolve1 %s %d %d: auth rr in db",
452 getpid(), name, type, class);
453 return rp;
454 }
455 } else
456 /* cached entry must still be valid */
457 if(rp->ttl > now)
458 /* but Tall entries are special */
459 if(type != Tall || rp->query == Tall) {
460 noteinmem();
461 if(debug)
462 dnslog("[%d] dnresolve1 %s %d %d: rr not in db",
463 getpid(), name, type, class);
464 return rp;
465 }
466 lock(&dnlock);
467 rrfreelist(rp);
468 unlock(&dnlock);
469 rp = nil; /* accident prevention */
470 USED(rp);
471
472 /*
473 * try the cache for a canonical name. if found punt
474 * since we'll find it during the canonical name search
475 * in dnresolve().
476 */
477 if(type != Tcname){
478 rp = rrlookup(dp, Tcname, NOneg);
479 lock(&dnlock);
480 rrfreelist(rp);
481 unlock(&dnlock);
482 if(rp){
483 if(debug)
484 dnslog("[%d] dnresolve1 %s %d %d: rr from rrlookup for non-cname",
485 getpid(), name, type, class);
486 return nil;
487 }
488 }
489
490 /*
491 * if the domain name is within an area of ours,
492 * we should have found its data in memory by now.
493 */
494 area = inmyarea(dp->name);
495 if (area || strncmp(dp->name, "local#", 6) == 0) {
496 // char buf[32];
497
498 // dnslog("%s %s: no data in area %s", dp->name,
499 // rrname(type, buf, sizeof buf), area->soarr->owner->name);
500 return nil;
501 }
502
503 qp = emalloc(sizeof *qp);
504 queryinit(qp, dp, type, req);
505 rp = issuequery(qp, name, class, depth, recurse);
506 querydestroy(qp);
507 free(qp);
508 if(rp){
509 if(debug)
510 dnslog("[%d] dnresolve1 %s %d %d: rr from query",
511 getpid(), name, type, class);
512 return rp;
513 }
514
515 /* settle for a non-authoritative answer */
516 rp = rrlookup(dp, type, OKneg);
517 if(rp){
518 if(debug)
519 dnslog("[%d] dnresolve1 %s %d %d: rr from rrlookup",
520 getpid(), name, type, class);
521 return rp;
522 }
523
524 /* noone answered. try the database, we might have a chance. */
525 rp = dblookup(name, class, type, 0, 0);
526 if (rp) {
527 if(debug)
528 dnslog("[%d] dnresolve1 %s %d %d: rr from dblookup",
529 getpid(), name, type, class);
530 }else{
531 if(debug)
532 dnslog("[%d] dnresolve1 %s %d %d: no rr from dblookup; crapped out",
533 getpid(), name, type, class);
534 }
535 return rp;
536 }
537
538 /*
539 * walk a domain name one element to the right.
540 * return a pointer to that element.
541 * in other words, return a pointer to the parent domain name.
542 */
543 char*
walkup(char * name)544 walkup(char *name)
545 {
546 char *cp;
547
548 cp = strchr(name, '.');
549 if(cp)
550 return cp+1;
551 else if(*name)
552 return "";
553 else
554 return 0;
555 }
556
557 /*
558 * Get a udp port for sending requests and reading replies. Put the port
559 * into "headers" mode.
560 */
561 static char *hmsg = "headers";
562
563 int
udpport(char * mtpt)564 udpport(char *mtpt)
565 {
566 int fd, ctl;
567 char ds[64], adir[64];
568
569 /* get a udp port */
570 snprint(ds, sizeof ds, "%s/udp!*!0", (mtpt? mtpt: "/net"));
571 ctl = announce(ds, adir);
572 if(ctl < 0){
573 /* warning("can't get udp port"); */
574 return -1;
575 }
576
577 /* turn on header style interface */
578 if(write(ctl, hmsg, strlen(hmsg)) != strlen(hmsg)){
579 close(ctl);
580 warning(hmsg);
581 return -1;
582 }
583
584 /* grab the data file */
585 snprint(ds, sizeof ds, "%s/data", adir);
586 fd = open(ds, ORDWR);
587 close(ctl);
588 if(fd < 0)
589 warning("can't open udp port %s: %r", ds);
590 return fd;
591 }
592
593 void
initdnsmsg(DNSmsg * mp,RR * rp,int flags,ushort reqno)594 initdnsmsg(DNSmsg *mp, RR *rp, int flags, ushort reqno)
595 {
596 mp->flags = flags;
597 mp->id = reqno;
598 mp->qd = rp;
599 if(rp != nil)
600 mp->qdcount = 1;
601 }
602
603 DNSmsg *
newdnsmsg(RR * rp,int flags,ushort reqno)604 newdnsmsg(RR *rp, int flags, ushort reqno)
605 {
606 DNSmsg *mp;
607
608 mp = emalloc(sizeof *mp);
609 initdnsmsg(mp, rp, flags, reqno);
610 return mp;
611 }
612
613 /* generate a DNS UDP query packet */
614 int
mkreq(DN * dp,int type,uchar * buf,int flags,ushort reqno)615 mkreq(DN *dp, int type, uchar *buf, int flags, ushort reqno)
616 {
617 DNSmsg m;
618 int len;
619 Udphdr *uh = (Udphdr*)buf;
620 RR *rp;
621
622 /* stuff port number into output buffer */
623 memset(uh, 0, sizeof *uh);
624 hnputs(uh->rport, Dnsport);
625
626 /* make request and convert it to output format */
627 memset(&m, 0, sizeof m);
628 rp = rralloc(type);
629 rp->owner = dp;
630 initdnsmsg(&m, rp, flags, reqno);
631 len = convDNS2M(&m, &buf[Udphdrsize], Maxdnspayload);
632 rrfreelistptr(&m.qd);
633 memset(&m, 0, sizeof m); /* cause trouble */
634 return len;
635 }
636
637 void
freeanswers(DNSmsg * mp)638 freeanswers(DNSmsg *mp)
639 {
640 lock(&dnlock);
641 rrfreelistptr(&mp->qd);
642 rrfreelistptr(&mp->an);
643 rrfreelistptr(&mp->ns);
644 rrfreelistptr(&mp->ar);
645 unlock(&dnlock);
646 mp->qdcount = mp->ancount = mp->nscount = mp->arcount = 0;
647 }
648
649 /* timed read of reply. sets srcip. ibuf must be 64K to handle tcp answers. */
650 static int
readnet(Query * qp,int medium,uchar * ibuf,uvlong endms,uchar ** replyp,uchar * srcip)651 readnet(Query *qp, int medium, uchar *ibuf, uvlong endms, uchar **replyp,
652 uchar *srcip)
653 {
654 int len, fd;
655 long ms;
656 vlong startns = nsec();
657 uchar *reply;
658 uchar lenbuf[2];
659
660 len = -1; /* pessimism */
661 ms = endms - NS2MS(startns);
662 if (ms <= 0)
663 return -1; /* taking too long */
664
665 reply = ibuf;
666 memset(srcip, 0, IPaddrlen);
667 alarm(ms);
668 if (medium == Udp)
669 if (qp->udpfd <= 0)
670 dnslog("readnet: qp->udpfd closed");
671 else {
672 len = read(qp->udpfd, ibuf, Udphdrsize+Maxpayload);
673 alarm(0);
674 notestats(startns, len < 0, qp->type);
675 if (len >= IPaddrlen)
676 memmove(srcip, ibuf, IPaddrlen);
677 if (len >= Udphdrsize) {
678 len -= Udphdrsize;
679 reply += Udphdrsize;
680 }
681 }
682 else {
683 if (!qp->tcpset)
684 dnslog("readnet: tcp params not set");
685 fd = qp->tcpfd;
686 if (fd <= 0)
687 dnslog("readnet: %s: tcp fd unset for dest %I",
688 qp->dp->name, qp->tcpip);
689 else if (readn(fd, lenbuf, 2) != 2) {
690 dnslog("readnet: short read of 2-byte tcp msg size from %I",
691 qp->tcpip);
692 /* probably a time-out */
693 notestats(startns, 1, qp->type);
694 } else {
695 len = lenbuf[0]<<8 | lenbuf[1];
696 if (readn(fd, ibuf, len) != len) {
697 dnslog("readnet: short read of tcp data from %I",
698 qp->tcpip);
699 /* probably a time-out */
700 notestats(startns, 1, qp->type);
701 len = -1;
702 }
703 }
704 memmove(srcip, qp->tcpip, IPaddrlen);
705 }
706 alarm(0);
707 *replyp = reply;
708 return len;
709 }
710
711 /*
712 * read replies to a request and remember the rrs in the answer(s).
713 * ignore any of the wrong type.
714 * wait at most until endms.
715 */
716 static int
readreply(Query * qp,int medium,ushort req,uchar * ibuf,DNSmsg * mp,uvlong endms)717 readreply(Query *qp, int medium, ushort req, uchar *ibuf, DNSmsg *mp,
718 uvlong endms)
719 {
720 int len;
721 char *err;
722 char tbuf[32];
723 uchar *reply;
724 uchar srcip[IPaddrlen];
725 RR *rp;
726
727 queryck(qp);
728 memset(mp, 0, sizeof *mp);
729 memset(srcip, 0, sizeof srcip);
730 if (0)
731 len = -1;
732 for (; timems() < endms &&
733 (len = readnet(qp, medium, ibuf, endms, &reply, srcip)) >= 0;
734 freeanswers(mp)){
735 /* convert into internal format */
736 memset(mp, 0, sizeof *mp);
737 err = convM2DNS(reply, len, mp, nil);
738 if (mp->flags & Ftrunc) {
739 free(err);
740 freeanswers(mp);
741 /* notify our caller to retry the query via tcp. */
742 return -1;
743 } else if(err){
744 dnslog("readreply: %s: input err, len %d: %s: %I",
745 qp->dp->name, len, err, srcip);
746 free(err);
747 continue;
748 }
749 if(debug)
750 logreply(qp->req->id, srcip, mp);
751
752 /* answering the right question? */
753 if(mp->id != req)
754 dnslog("%d: id %d instead of %d: %I", qp->req->id,
755 mp->id, req, srcip);
756 else if(mp->qd == 0)
757 dnslog("%d: no question RR: %I", qp->req->id, srcip);
758 else if(mp->qd->owner != qp->dp)
759 dnslog("%d: owner %s instead of %s: %I", qp->req->id,
760 mp->qd->owner->name, qp->dp->name, srcip);
761 else if(mp->qd->type != qp->type)
762 dnslog("%d: qp->type %d instead of %d: %I",
763 qp->req->id, mp->qd->type, qp->type, srcip);
764 else {
765 /* remember what request this is in answer to */
766 for(rp = mp->an; rp; rp = rp->next)
767 rp->query = qp->type;
768 return 0;
769 }
770 }
771 if (timems() >= endms) {
772 ; /* query expired */
773 } else if (0) {
774 /* this happens routinely when a read times out */
775 dnslog("readreply: %s type %s: ns %I read error or eof "
776 "(returned %d): %r", qp->dp->name, rrname(qp->type,
777 tbuf, sizeof tbuf), srcip, len);
778 if (medium == Udp)
779 for (rp = qp->nsrp; rp != nil; rp = rp->next)
780 if (rp->type == Tns)
781 dnslog("readreply: %s: query sent to "
782 "ns %s", qp->dp->name,
783 rp->host->name);
784 }
785 return -1;
786 }
787
788 /*
789 * return non-0 if first list includes second list
790 */
791 int
contains(RR * rp1,RR * rp2)792 contains(RR *rp1, RR *rp2)
793 {
794 RR *trp1, *trp2;
795
796 for(trp2 = rp2; trp2; trp2 = trp2->next){
797 for(trp1 = rp1; trp1; trp1 = trp1->next)
798 if(trp1->type == trp2->type)
799 if(trp1->host == trp2->host)
800 if(trp1->owner == trp2->owner)
801 break;
802 if(trp1 == nil)
803 return 0;
804 }
805 return 1;
806 }
807
808
809 /*
810 * return multicast version if any
811 */
812 int
ipisbm(uchar * ip)813 ipisbm(uchar *ip)
814 {
815 if(isv4(ip)){
816 if (ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0 ||
817 ipcmp(ip, IPv4bcast) == 0)
818 return 4;
819 } else
820 if(ip[0] == 0xff)
821 return 6;
822 return 0;
823 }
824
825 /*
826 * Get next server address(es) into qp->dest[nd] and beyond
827 */
828 static int
serveraddrs(Query * qp,int nd,int depth)829 serveraddrs(Query *qp, int nd, int depth)
830 {
831 RR *rp, *arp, *trp;
832 Dest *cur;
833
834 if(nd >= Maxdest) /* dest array is full? */
835 return Maxdest - 1;
836
837 /*
838 * look for a server whose address we already know.
839 * if we find one, mark it so we ignore this on
840 * subsequent passes.
841 */
842 arp = 0;
843 for(rp = qp->nsrp; rp; rp = rp->next){
844 assert(rp->magic == RRmagic);
845 if(rp->marker)
846 continue;
847 arp = rrlookup(rp->host, Ta, NOneg);
848 if(arp == nil)
849 arp = rrlookup(rp->host, Taaaa, NOneg);
850 if(arp){
851 rp->marker = 1;
852 break;
853 }
854 arp = dblookup(rp->host->name, Cin, Ta, 0, 0);
855 if(arp == nil)
856 arp = dblookup(rp->host->name, Cin, Taaaa, 0, 0);
857 if(arp){
858 rp->marker = 1;
859 break;
860 }
861 }
862
863 /*
864 * if the cache and database lookup didn't find any new
865 * server addresses, try resolving one via the network.
866 * Mark any we try to resolve so we don't try a second time.
867 */
868 if(arp == 0)
869 for(rp = qp->nsrp; rp; rp = rp->next){
870 if(rp->marker)
871 continue;
872 rp->marker = 1;
873
874 /*
875 * avoid loops looking up a server under itself
876 */
877 if(subsume(rp->owner->name, rp->host->name))
878 continue;
879
880 arp = dnresolve(rp->host->name, Cin, Ta, qp->req, 0,
881 depth+1, Recurse, 1, 0);
882 if(arp == nil)
883 arp = dnresolve(rp->host->name, Cin, Taaaa,
884 qp->req, 0, depth+1, Recurse, 1, 0);
885 lock(&dnlock);
886 rrfreelist(rrremneg(&arp));
887 unlock(&dnlock);
888 if(arp)
889 break;
890 }
891
892 /* use any addresses that we found */
893 for(trp = arp; trp && nd < Maxdest; trp = trp->next){
894 cur = &qp->dest[nd];
895 parseip(cur->a, trp->ip->name);
896 /*
897 * straddling servers can reject all nameservers if they are all
898 * inside, so be sure to list at least one outside ns at
899 * the end of the ns list in /lib/ndb for `dom='.
900 */
901 if (ipisbm(cur->a) ||
902 cfg.straddle && !insideaddr(qp->dp->name) && insidens(cur->a))
903 continue;
904 cur->nx = 0;
905 cur->s = trp->owner;
906 cur->code = Rtimeout;
907 nd++;
908 }
909 lock(&dnlock);
910 rrfreelist(arp);
911 unlock(&dnlock);
912 if(nd >= Maxdest) /* dest array is full? */
913 return Maxdest - 1;
914 return nd;
915 }
916
917 /*
918 * cache negative responses
919 */
920 static void
cacheneg(DN * dp,int type,int rcode,RR * soarr)921 cacheneg(DN *dp, int type, int rcode, RR *soarr)
922 {
923 RR *rp;
924 DN *soaowner;
925 ulong ttl;
926
927 stats.negcached++;
928
929 /* no cache time specified, don't make anything up */
930 if(soarr != nil){
931 lock(&dnlock);
932 if(soarr->next != nil)
933 rrfreelistptr(&soarr->next);
934 unlock(&dnlock);
935 soaowner = soarr->owner;
936 } else
937 soaowner = nil;
938
939 /* the attach can cause soarr to be freed so mine it now */
940 if(soarr != nil && soarr->soa != nil)
941 ttl = soarr->soa->minttl+now;
942 else
943 ttl = 5*Min;
944
945 /* add soa and negative RR to the database */
946 rrattach(soarr, Authoritative);
947
948 rp = rralloc(type);
949 rp->owner = dp;
950 rp->negative = 1;
951 rp->negsoaowner = soaowner;
952 rp->negrcode = rcode;
953 rp->ttl = ttl;
954 rrattach(rp, Authoritative);
955 }
956
957 static int
setdestoutns(Dest * p,int n)958 setdestoutns(Dest *p, int n)
959 {
960 uchar *outns = outsidens(n);
961
962 destck(p);
963 destinit(p);
964 if (outns == nil) {
965 if (n == 0)
966 dnslog("[%d] no outside-ns in ndb", getpid());
967 return -1;
968 }
969 memmove(p->a, outns, sizeof p->a);
970 p->s = dnlookup("outside-ns-ips", Cin, 1);
971 return 0;
972 }
973
974 /*
975 * issue query via UDP or TCP as appropriate.
976 * for TCP, returns with qp->tcpip set from udppkt header.
977 */
978 static int
mydnsquery(Query * qp,int medium,uchar * udppkt,int len)979 mydnsquery(Query *qp, int medium, uchar *udppkt, int len)
980 {
981 int rv = -1, nfd;
982 char *domain;
983 char conndir[NETPATHLEN], net[NETPATHLEN];
984 uchar belen[2];
985 NetConnInfo *nci;
986
987 queryck(qp);
988 domain = smprint("%I", udppkt);
989 if (myaddr(domain)) {
990 dnslog("mydnsquery: trying to send to myself (%s); bzzzt",
991 domain);
992 free(domain);
993 return rv;
994 }
995
996 switch (medium) {
997 case Udp:
998 free(domain);
999 nfd = dup(qp->udpfd, -1);
1000 if (nfd < 0) {
1001 warning("mydnsquery: qp->udpfd %d: %r", qp->udpfd);
1002 close(qp->udpfd); /* ensure it's closed */
1003 qp->udpfd = -1; /* poison it */
1004 return rv;
1005 }
1006 close(nfd);
1007
1008 if (qp->udpfd <= 0)
1009 dnslog("mydnsquery: qp->udpfd %d closed", qp->udpfd);
1010 else {
1011 if (write(qp->udpfd, udppkt, len+Udphdrsize) !=
1012 len+Udphdrsize)
1013 warning("sending udp msg: %r");
1014 else {
1015 stats.qsent++;
1016 rv = 0;
1017 }
1018 }
1019 break;
1020 case Tcp:
1021 /* send via TCP & keep fd around for reply */
1022 snprint(net, sizeof net, "%s/tcp",
1023 (mntpt[0] != '\0'? mntpt: "/net"));
1024 alarm(10*1000);
1025 qp->tcpfd = rv = dial(netmkaddr(domain, net, "dns"), nil,
1026 conndir, &qp->tcpctlfd);
1027 alarm(0);
1028 if (qp->tcpfd < 0) {
1029 dnslog("can't dial tcp!%s!dns: %r", domain);
1030 free(domain);
1031 break;
1032 }
1033 free(domain);
1034 nci = getnetconninfo(conndir, qp->tcpfd);
1035 if (nci) {
1036 parseip(qp->tcpip, nci->rsys);
1037 freenetconninfo(nci);
1038 } else
1039 dnslog("mydnsquery: getnetconninfo failed");
1040 qp->tcpset = 1;
1041
1042 belen[0] = len >> 8;
1043 belen[1] = len;
1044 if (write(qp->tcpfd, belen, 2) != 2 ||
1045 write(qp->tcpfd, udppkt + Udphdrsize, len) != len)
1046 warning("sending tcp msg: %r");
1047 break;
1048 default:
1049 sysfatal("mydnsquery: bad medium");
1050 }
1051 return rv;
1052 }
1053
1054 /*
1055 * send query to all UDP destinations or one TCP destination,
1056 * taken from obuf (udp packet) header
1057 */
1058 static int
xmitquery(Query * qp,int medium,int depth,uchar * obuf,int inns,int len)1059 xmitquery(Query *qp, int medium, int depth, uchar *obuf, int inns, int len)
1060 {
1061 int j, n;
1062 char buf[32];
1063 Dest *p;
1064
1065 queryck(qp);
1066 if(timems() >= qp->req->aborttime)
1067 return -1;
1068
1069 /*
1070 * get a nameserver address if we need one.
1071 * serveraddrs populates qp->dest.
1072 */
1073 p = qp->dest;
1074 destck(p);
1075 if (qp->ndest < 0 || qp->ndest > Maxdest) {
1076 dnslog("qp->ndest %d out of range", qp->ndest);
1077 abort();
1078 }
1079 /*
1080 * we're to transmit to more destinations than we currently have,
1081 * so get another.
1082 */
1083 if (qp->ndest > qp->curdest - p) {
1084 j = serveraddrs(qp, qp->curdest - p, depth);
1085 if (j < 0 || j >= Maxdest) {
1086 dnslog("serveraddrs() result %d out of range", j);
1087 abort();
1088 }
1089 qp->curdest = &qp->dest[j];
1090 }
1091 destck(qp->curdest);
1092
1093 /* no servers, punt */
1094 if (qp->ndest == 0)
1095 if (cfg.straddle && cfg.inside) {
1096 /* get ips of "outside-ns-ips" */
1097 qp->curdest = qp->dest;
1098 for(n = 0; n < Maxdest; n++, qp->curdest++)
1099 if (setdestoutns(qp->curdest, n) < 0)
1100 break;
1101 if(n == 0)
1102 dnslog("xmitquery: %s: no outside-ns nameservers",
1103 qp->dp->name);
1104 } else
1105 /* it's probably just a bogus domain, don't log it */
1106 return -1;
1107
1108 /* send to first 'qp->ndest' destinations */
1109 j = 0;
1110 if (medium == Tcp) {
1111 j++;
1112 queryck(qp);
1113 assert(qp->dp);
1114 procsetname("tcp %sside query for %s %s", (inns? "in": "out"),
1115 qp->dp->name, rrname(qp->type, buf, sizeof buf));
1116 mydnsquery(qp, medium, obuf, len); /* sets qp->tcpip from obuf */
1117 if(debug)
1118 logsend(qp->req->id, depth, qp->tcpip, "", qp->dp->name,
1119 qp->type);
1120 } else
1121 for(; p < &qp->dest[qp->ndest] && p < qp->curdest; p++){
1122 /* skip destinations we've finished with */
1123 if(p->nx >= Maxtrans)
1124 continue;
1125
1126 j++;
1127
1128 /* exponential backoff of requests */
1129 if((1<<p->nx) > qp->ndest)
1130 continue;
1131
1132 if(memcmp(p->a, IPnoaddr, sizeof IPnoaddr) == 0)
1133 continue; /* mistake */
1134
1135 procsetname("udp %sside query to %I/%s %s %s",
1136 (inns? "in": "out"), p->a, p->s->name,
1137 qp->dp->name, rrname(qp->type, buf, sizeof buf));
1138 if(debug)
1139 logsend(qp->req->id, depth, p->a, p->s->name,
1140 qp->dp->name, qp->type);
1141
1142 /* fill in UDP destination addr & send it */
1143 memmove(obuf, p->a, sizeof p->a);
1144 mydnsquery(qp, medium, obuf, len);
1145 p->nx++;
1146 }
1147 if(j == 0) {
1148 return -1;
1149 }
1150 return 0;
1151 }
1152
1153 static int lckindex[Maxlcks] = {
1154 0, /* all others map here */
1155 Ta,
1156 Tns,
1157 Tcname,
1158 Tsoa,
1159 Tptr,
1160 Tmx,
1161 Ttxt,
1162 Taaaa,
1163 };
1164
1165 static int
qtype2lck(int qtype)1166 qtype2lck(int qtype) /* map query type to querylck index */
1167 {
1168 int i;
1169
1170 for (i = 1; i < nelem(lckindex); i++)
1171 if (lckindex[i] == qtype)
1172 return i;
1173 return 0;
1174 }
1175
1176 /* is mp a cachable negative response (with Rname set)? */
1177 static int
isnegrname(DNSmsg * mp)1178 isnegrname(DNSmsg *mp)
1179 {
1180 /* TODO: could add || cfg.justforw to RHS of && */
1181 return mp->an == nil && (mp->flags & Rmask) == Rname;
1182 }
1183
1184 /* returns Answerr (-1) on errors, else number of answers, which can be zero. */
1185 static int
procansw(Query * qp,DNSmsg * mp,uchar * srcip,int depth,Dest * p)1186 procansw(Query *qp, DNSmsg *mp, uchar *srcip, int depth, Dest *p)
1187 {
1188 int rv;
1189 // int lcktype;
1190 char buf[32];
1191 DN *ndp;
1192 Query *nqp;
1193 RR *tp, *soarr;
1194
1195 if (mp->an == nil)
1196 stats.negans++;
1197
1198 /* ignore any error replies */
1199 if((mp->flags & Rmask) == Rserver){
1200 stats.negserver++;
1201 freeanswers(mp);
1202 if(p != qp->curdest)
1203 p->code = Rserver;
1204 return Answerr;
1205 }
1206
1207 /* ignore any bad delegations */
1208 if(mp->ns && baddelegation(mp->ns, qp->nsrp, srcip)){
1209 stats.negbaddeleg++;
1210 if(mp->an == nil){
1211 stats.negbdnoans++;
1212 freeanswers(mp);
1213 if(p != qp->curdest)
1214 p->code = Rserver;
1215 dnslog(" and no answers");
1216 return Answerr;
1217 }
1218 dnslog(" but has answers; ignoring ns");
1219 lock(&dnlock);
1220 rrfreelistptr(&mp->ns);
1221 unlock(&dnlock);
1222 mp->nscount = 0;
1223 }
1224
1225 /* remove any soa's from the authority section */
1226 lock(&dnlock);
1227 soarr = rrremtype(&mp->ns, Tsoa);
1228
1229 /* incorporate answers */
1230 unique(mp->an);
1231 unique(mp->ns);
1232 unique(mp->ar);
1233 unlock(&dnlock);
1234
1235 if(mp->an)
1236 rrattach(mp->an, (mp->flags & Fauth) != 0);
1237 if(mp->ar)
1238 rrattach(mp->ar, Notauthoritative);
1239 if(mp->ns && !cfg.justforw){
1240 ndp = mp->ns->owner;
1241 rrattach(mp->ns, Notauthoritative);
1242 } else {
1243 ndp = nil;
1244 lock(&dnlock);
1245 rrfreelistptr(&mp->ns);
1246 unlock(&dnlock);
1247 mp->nscount = 0;
1248 }
1249
1250 /* free the question */
1251 if(mp->qd) {
1252 lock(&dnlock);
1253 rrfreelistptr(&mp->qd);
1254 unlock(&dnlock);
1255 mp->qdcount = 0;
1256 }
1257
1258 /*
1259 * Any reply from an authoritative server,
1260 * or a positive reply terminates the search.
1261 * A negative response now also terminates the search.
1262 */
1263 if(mp->an != nil || (mp->flags & Fauth)){
1264 if(isnegrname(mp))
1265 qp->dp->respcode = Rname;
1266 else
1267 qp->dp->respcode = Rok;
1268
1269 /*
1270 * cache any negative responses, free soarr.
1271 * negative responses need not be authoritative:
1272 * they can legitimately come from a cache.
1273 */
1274 if( /* (mp->flags & Fauth) && */ mp->an == nil)
1275 cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
1276 else {
1277 lock(&dnlock);
1278 rrfreelist(soarr);
1279 unlock(&dnlock);
1280 }
1281 return 1;
1282 } else if (isnegrname(mp)) {
1283 qp->dp->respcode = Rname;
1284 /*
1285 * cache negative response.
1286 * negative responses need not be authoritative:
1287 * they can legitimately come from a cache.
1288 */
1289 cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
1290 return 1;
1291 }
1292 stats.negnorname++;
1293 lock(&dnlock);
1294 rrfreelist(soarr);
1295 unlock(&dnlock);
1296
1297 /*
1298 * if we've been given better name servers, recurse.
1299 * if we're a pure resolver, don't recurse, we have
1300 * to forward to a fixed set of named servers.
1301 */
1302 if(!mp->ns || cfg.resolver && cfg.justforw)
1303 return Answnone;
1304 tp = rrlookup(ndp, Tns, NOneg);
1305 if(contains(qp->nsrp, tp)){
1306 lock(&dnlock);
1307 rrfreelist(tp);
1308 unlock(&dnlock);
1309 return Answnone;
1310 }
1311 procsetname("recursive query for %s %s", qp->dp->name,
1312 rrname(qp->type, buf, sizeof buf));
1313 /*
1314 * we're called from udpquery, called from
1315 * netquery, which current holds qp->dp->querylck,
1316 * so release it now and acquire it upon return.
1317 */
1318 // lcktype = qtype2lck(qp->type); /* someday try this again */
1319 // qunlock(&qp->dp->querylck[lcktype]);
1320
1321 nqp = emalloc(sizeof *nqp);
1322 queryinit(nqp, qp->dp, qp->type, qp->req);
1323 nqp->nsrp = tp;
1324 rv = netquery(nqp, depth+1);
1325
1326 // qlock(&qp->dp->querylck[lcktype]);
1327 rrfreelist(nqp->nsrp);
1328 querydestroy(nqp);
1329 free(nqp);
1330 return rv;
1331 }
1332
1333 /*
1334 * send a query via tcp to a single address (from ibuf's udp header)
1335 * and read the answer(s) into mp->an.
1336 */
1337 static int
tcpquery(Query * qp,DNSmsg * mp,int depth,uchar * ibuf,uchar * obuf,int len,ulong waitms,int inns,ushort req)1338 tcpquery(Query *qp, DNSmsg *mp, int depth, uchar *ibuf, uchar *obuf, int len,
1339 ulong waitms, int inns, ushort req)
1340 {
1341 int rv = 0;
1342 uvlong endms;
1343
1344 endms = timems() + waitms;
1345 if(endms > qp->req->aborttime)
1346 endms = qp->req->aborttime;
1347
1348 if (0)
1349 dnslog("%s: udp reply truncated; retrying query via tcp to %I",
1350 qp->dp->name, qp->tcpip);
1351
1352 qlock(&qp->tcplock);
1353 memmove(obuf, ibuf, IPaddrlen); /* send back to respondent */
1354 /* sets qp->tcpip from obuf's udp header */
1355 if (xmitquery(qp, Tcp, depth, obuf, inns, len) < 0 ||
1356 readreply(qp, Tcp, req, ibuf, mp, endms) < 0)
1357 rv = -1;
1358 if (qp->tcpfd > 0) {
1359 hangup(qp->tcpctlfd);
1360 close(qp->tcpctlfd);
1361 close(qp->tcpfd);
1362 }
1363 qp->tcpfd = qp->tcpctlfd = -1;
1364 qunlock(&qp->tcplock);
1365 return rv;
1366 }
1367
1368 /*
1369 * query name servers. fill in obuf with on-the-wire representation of a
1370 * DNSmsg derived from qp. if the name server returns a pointer to another
1371 * name server, recurse.
1372 */
1373 static int
queryns(Query * qp,int depth,uchar * ibuf,uchar * obuf,ulong waitms,int inns)1374 queryns(Query *qp, int depth, uchar *ibuf, uchar *obuf, ulong waitms, int inns)
1375 {
1376 int ndest, len, replywaits, rv;
1377 ushort req;
1378 uvlong endms;
1379 char buf[12];
1380 uchar srcip[IPaddrlen];
1381 Dest *p, *np, *dest;
1382
1383 /* pack request into a udp message */
1384 req = rand();
1385 len = mkreq(qp->dp, qp->type, obuf, Frecurse|Oquery, req);
1386
1387 /* no server addresses yet */
1388 queryck(qp);
1389 dest = emalloc(Maxdest * sizeof *dest); /* dest can't be on stack */
1390 for (p = dest; p < dest + Maxdest; p++)
1391 destinit(p);
1392 /* this dest array is local to this call of queryns() */
1393 free(qp->dest);
1394 qp->curdest = qp->dest = dest;
1395
1396 /*
1397 * transmit udp requests and wait for answers.
1398 * at most Maxtrans attempts to each address.
1399 * each cycle send one more message than the previous.
1400 * retry a query via tcp if its response is truncated.
1401 */
1402 for(ndest = 1; ndest < Maxdest; ndest++){
1403 qp->ndest = ndest;
1404 qp->tcpset = 0;
1405 if (xmitquery(qp, Udp, depth, obuf, inns, len) < 0)
1406 break;
1407
1408 endms = timems() + waitms;
1409 if(endms > qp->req->aborttime)
1410 endms = qp->req->aborttime;
1411
1412 for(replywaits = 0; replywaits < ndest; replywaits++){
1413 DNSmsg m;
1414
1415 procsetname("reading %sside reply from %I: %s %s from %s",
1416 (inns? "in": "out"), obuf, qp->dp->name,
1417 rrname(qp->type, buf, sizeof buf), qp->req->from);
1418
1419 /* read udp answer into m */
1420 if (readreply(qp, Udp, req, ibuf, &m, endms) >= 0)
1421 memmove(srcip, ibuf, IPaddrlen);
1422 else if (!(m.flags & Ftrunc)) {
1423 freeanswers(&m);
1424 break; /* timed out on this dest */
1425 } else {
1426 /* whoops, it was truncated! ask again via tcp */
1427 freeanswers(&m);
1428 rv = tcpquery(qp, &m, depth, ibuf, obuf, len,
1429 waitms, inns, req); /* answer in m */
1430 if (rv < 0) {
1431 freeanswers(&m);
1432 break; /* failed via tcp too */
1433 }
1434 memmove(srcip, qp->tcpip, IPaddrlen);
1435 }
1436
1437 /* find responder */
1438 // dnslog("queryns got reply from %I", srcip);
1439 for(p = qp->dest; p < qp->curdest; p++)
1440 if(memcmp(p->a, srcip, sizeof p->a) == 0)
1441 break;
1442
1443 /* remove all addrs of responding server from list */
1444 for(np = qp->dest; np < qp->curdest; np++)
1445 if(np->s == p->s)
1446 np->nx = Maxtrans;
1447
1448 /* free or incorporate RRs in m */
1449 rv = procansw(qp, &m, srcip, depth, p);
1450 if (rv > Answnone) {
1451 free(qp->dest);
1452 qp->dest = qp->curdest = nil; /* prevent accidents */
1453 return rv;
1454 }
1455 }
1456 }
1457
1458 /* if all servers returned failure, propagate it */
1459 qp->dp->respcode = Rserver;
1460 for(p = dest; p < qp->curdest; p++) {
1461 destck(p);
1462 if(p->code != Rserver)
1463 qp->dp->respcode = Rok;
1464 p->magic = 0; /* prevent accidents */
1465 }
1466
1467 // if (qp->dp->respcode)
1468 // dnslog("queryns setting Rserver for %s", qp->dp->name);
1469
1470 free(qp->dest);
1471 qp->dest = qp->curdest = nil; /* prevent accidents */
1472 return Answnone;
1473 }
1474
1475 /*
1476 * run a command with a supplied fd as standard input
1477 */
1478 char *
system(int fd,char * cmd)1479 system(int fd, char *cmd)
1480 {
1481 int pid, p, i;
1482 static Waitmsg msg;
1483
1484 if((pid = fork()) == -1)
1485 sysfatal("fork failed: %r");
1486 else if(pid == 0){
1487 dup(fd, 0);
1488 close(fd);
1489 for (i = 3; i < 200; i++)
1490 close(i); /* don't leak fds */
1491 execl("/bin/rc", "rc", "-c", cmd, nil);
1492 sysfatal("exec rc: %r");
1493 }
1494 for(p = waitpid(); p >= 0; p = waitpid())
1495 if(p == pid)
1496 return msg.msg;
1497 return "lost child";
1498 }
1499
1500 /* compute wait, weighted by probability of success, with bounds */
1501 static ulong
weight(ulong ms,unsigned pcntprob)1502 weight(ulong ms, unsigned pcntprob)
1503 {
1504 ulong wait;
1505
1506 wait = (ms * pcntprob) / 100;
1507 if (wait < Minwaitms)
1508 wait = Minwaitms;
1509 if (wait > Maxwaitms)
1510 wait = Maxwaitms;
1511 return wait;
1512 }
1513
1514 /*
1515 * in principle we could use a single descriptor for a udp port
1516 * to send all queries and receive all the answers to them,
1517 * but we'd have to sort out the answers by dns-query id.
1518 */
1519 static int
udpquery(Query * qp,char * mntpt,int depth,int patient,int inns)1520 udpquery(Query *qp, char *mntpt, int depth, int patient, int inns)
1521 {
1522 int fd, rv;
1523 ulong now, pcntprob;
1524 uvlong wait, reqtm;
1525 char *msg;
1526 uchar *obuf, *ibuf;
1527 static QLock mntlck;
1528 static ulong lastmount;
1529
1530 /* use alloced buffers rather than ones from the stack */
1531 ibuf = emalloc(64*1024); /* max. tcp reply size */
1532 obuf = emalloc(Maxpayload+Udphdrsize);
1533
1534 fd = udpport(mntpt);
1535 while (fd < 0 && cfg.straddle && strcmp(mntpt, "/net.alt") == 0) {
1536 /* HACK: remount /net.alt */
1537 now = time(nil);
1538 if (now < lastmount + Remntretry)
1539 sleep(S2MS(lastmount + Remntretry - now));
1540 qlock(&mntlck);
1541 fd = udpport(mntpt); /* try again under lock */
1542 if (fd < 0) {
1543 dnslog("[%d] remounting /net.alt", getpid());
1544 unmount(nil, "/net.alt");
1545
1546 msg = system(open("/dev/null", ORDWR), "outside");
1547
1548 lastmount = time(nil);
1549 if (msg && *msg) {
1550 dnslog("[%d] can't remount /net.alt: %s",
1551 getpid(), msg);
1552 sleep(10*1000); /* don't spin remounting */
1553 } else
1554 fd = udpport(mntpt);
1555 }
1556 qunlock(&mntlck);
1557 }
1558 if (fd < 0) {
1559 dnslog("can't get udpport for %s query of name %s: %r",
1560 mntpt, qp->dp->name);
1561 sysfatal("out of udp conversations"); /* we're buggered */
1562 }
1563
1564 /*
1565 * Our QIP servers are busted and respond to AAAA and CNAME queries
1566 * with (sometimes malformed [too short] packets and) no answers and
1567 * just NS RRs but not Rname errors. so make time-to-wait
1568 * proportional to estimated probability of an RR of that type existing.
1569 */
1570 if (qp->type >= nelem(likely))
1571 pcntprob = 35; /* unpopular query type */
1572 else
1573 pcntprob = likely[qp->type];
1574 reqtm = (patient? 2 * Maxreqtm: Maxreqtm);
1575 wait = weight(reqtm / 3, pcntprob); /* time for one udp query */
1576 qp->req->aborttime = timems() + 3*wait; /* for all udp queries */
1577
1578 qp->udpfd = fd;
1579 rv = queryns(qp, depth, ibuf, obuf, wait, inns);
1580 close(fd);
1581 qp->udpfd = -1;
1582
1583 free(obuf);
1584 free(ibuf);
1585 return rv;
1586 }
1587
1588 /*
1589 * look up (qp->dp->name, qp->type) rr in dns,
1590 * using nameservers in qp->nsrp.
1591 */
1592 static int
netquery(Query * qp,int depth)1593 netquery(Query *qp, int depth)
1594 {
1595 int lock, rv, triedin, inname;
1596 char buf[32];
1597 RR *rp;
1598 DN *dp;
1599 Querylck *qlp;
1600 static int whined;
1601
1602 rv = Answnone; /* pessimism */
1603 if(depth > 12) /* in a recursive loop? */
1604 return Answnone;
1605
1606 slave(qp->req);
1607 /*
1608 * slave might have forked. if so, the parent process longjmped to
1609 * req->mret; we're usually the child slave, but if there are too
1610 * many children already, we're still the same process.
1611 */
1612
1613 /*
1614 * don't lock before call to slave so only children can block.
1615 * just lock at top-level invocation.
1616 */
1617 lock = depth <= 1 && qp->req->isslave;
1618 dp = qp->dp; /* ensure that it doesn't change underfoot */
1619 qlp = nil;
1620 if(lock) {
1621 procsetname("query lock wait: %s %s from %s", dp->name,
1622 rrname(qp->type, buf, sizeof buf), qp->req->from);
1623 /*
1624 * don't make concurrent queries for this name.
1625 * dozens of processes blocking here probably indicates
1626 * an error in our dns data that causes us to not
1627 * recognise a zone (area) as one of our own, thus
1628 * causing us to query other nameservers.
1629 */
1630 qlp = &dp->querylck[qtype2lck(qp->type)];
1631 qlock(qlp);
1632 if (qlp->Ref.ref > Maxoutstanding) {
1633 qunlock(qlp);
1634 if (!whined) {
1635 whined = 1;
1636 dnslog("too many outstanding queries for %s;"
1637 " dropping this one; no further logging"
1638 " of drops", dp->name);
1639 }
1640 return 0;
1641 }
1642 ++qlp->Ref.ref;
1643 qunlock(qlp);
1644 }
1645 procsetname("netquery: %s", dp->name);
1646
1647 /* prepare server RR's for incremental lookup */
1648 for(rp = qp->nsrp; rp; rp = rp->next)
1649 rp->marker = 0;
1650
1651 triedin = 0;
1652
1653 /*
1654 * normal resolvers and servers will just use mntpt for all addresses,
1655 * even on the outside. straddling servers will use mntpt (/net)
1656 * for inside addresses and /net.alt for outside addresses,
1657 * thus bypassing other inside nameservers.
1658 */
1659 inname = insideaddr(dp->name);
1660 if (!cfg.straddle || inname) {
1661 rv = udpquery(qp, mntpt, depth, Hurry, (cfg.inside? Inns: Outns));
1662 triedin = 1;
1663 }
1664
1665 /*
1666 * if we're still looking, are inside, and have an outside domain,
1667 * try it on our outside interface, if any.
1668 */
1669 if (rv == Answnone && cfg.inside && !inname) {
1670 if (triedin)
1671 dnslog(
1672 "[%d] netquery: internal nameservers failed for %s; trying external",
1673 getpid(), dp->name);
1674
1675 /* prepare server RR's for incremental lookup */
1676 for(rp = qp->nsrp; rp; rp = rp->next)
1677 rp->marker = 0;
1678
1679 rv = udpquery(qp, "/net.alt", depth, Patient, Outns);
1680 }
1681 // if (rv == Answnone) /* could ask /net.alt/dns directly */
1682 // askoutdns(dp, qp->type);
1683
1684 if(lock && qlp) {
1685 qlock(qlp);
1686 assert(qlp->Ref.ref > 0);
1687 qunlock(qlp);
1688 decref(qlp);
1689 }
1690 return rv;
1691 }
1692
1693 int
seerootns(void)1694 seerootns(void)
1695 {
1696 int rv;
1697 char root[] = "";
1698 Request req;
1699 RR *rr;
1700 Query *qp;
1701
1702 memset(&req, 0, sizeof req);
1703 req.isslave = 1;
1704 req.aborttime = timems() + Maxreqtm;
1705 req.from = "internal";
1706
1707 qp = emalloc(sizeof *qp);
1708 queryinit(qp, dnlookup(root, Cin, 1), Tns, &req);
1709 qp->nsrp = dblookup(root, Cin, Tns, 0, 0);
1710 for (rr = qp->nsrp; rr != nil; rr = rr->next) /* DEBUG */
1711 dnslog("seerootns query nsrp: %R", rr);
1712
1713 rv = netquery(qp, 0); /* lookup ". ns" using qp->nsrp */
1714
1715 rrfreelist(qp->nsrp);
1716 querydestroy(qp);
1717 free(qp);
1718 return rv;
1719 }
1720