xref: /plan9/sys/src/cmd/ndb/dnresolve.c (revision 98813beef1db23409911a4b339e6bb9c03d0a5c0)
1 /*
2  * domain name resolvers, see rfcs 1035 and 1123
3  */
4 #include <u.h>
5 #include <libc.h>
6 #include <ip.h>
7 #include <bio.h>
8 #include <ndb.h>
9 #include "dns.h"
10 
11 typedef struct Dest Dest;
12 typedef struct Ipaddr Ipaddr;
13 typedef struct Query Query;
14 
15 enum
16 {
17 	Udp, Tcp,
18 
19 	Answerr=	-1,
20 	Answnone,
21 
22 	Maxdest=	24,	/* maximum destinations for a request message */
23 	Maxoutstanding=	15,	/* max. outstanding queries per domain name */
24 	Remntretry=	15,	/* min. sec.s between /net.alt remount tries */
25 
26 	/*
27 	 * these are the old values; we're trying longer timeouts now
28 	 * primarily for the benefit of remote nameservers querying us
29 	 * during times of bad connectivity.
30 	 */
31 //	Maxtrans=	3,	/* maximum transmissions to a server */
32 //	Maxretries=	3, /* cname+actual resends: was 32; have pity on user */
33 //	Maxwaitms=	1000,	/* wait no longer for a remote dns query */
34 //	Minwaitms=	100,	/* willing to wait for a remote dns query */
35 
36 	Maxtrans=	5,	/* maximum transmissions to a server */
37 	Maxretries=	5, /* cname+actual resends: was 32; have pity on user */
38 	Maxwaitms=	5000,	/* wait no longer for a remote dns query */
39 	Minwaitms=	500,	/* willing to wait for a remote dns query */
40 
41 	Destmagic=	0xcafebabe,
42 	Querymagic=	0xdeadbeef,
43 };
44 enum { Hurry, Patient, };
45 enum { Outns, Inns, };
46 
47 struct Ipaddr {
48 	Ipaddr *next;
49 	uchar	ip[IPaddrlen];
50 };
51 
52 struct Dest
53 {
54 	uchar	a[IPaddrlen];	/* ip address */
55 	DN	*s;		/* name server */
56 	int	nx;		/* number of transmissions */
57 	int	code;		/* response code; used to clear dp->respcode */
58 
59 	ulong	magic;
60 };
61 
62 /*
63  * Query has a QLock in it, thus it can't be an automatic
64  * variable, since each process would see a separate copy
65  * of the lock on its stack.
66  */
67 struct Query {
68 	DN	*dp;		/* domain */
69 	ushort	type;		/* and type to look up */
70 	Request *req;
71 	RR	*nsrp;		/* name servers to consult */
72 
73 	/* dest must not be on the stack due to forking in slave() */
74 	Dest	*dest;		/* array of destinations */
75 	Dest	*curdest;	/* pointer to next to fill */
76 	int	ndest;		/* transmit to this many on this round */
77 
78 	int	udpfd;
79 
80 	QLock	tcplock;	/* only one tcp call at a time per query */
81 	int	tcpset;
82 	int	tcpfd;		/* if Tcp, read replies from here */
83 	int	tcpctlfd;
84 	uchar	tcpip[IPaddrlen];
85 
86 	ulong	magic;
87 };
88 
89 /* estimated % probability of such a record existing at all */
90 int likely[] = {
91 	[Ta]		95,
92 	[Taaaa]		10,
93 	[Tcname]	15,
94 	[Tmx]		60,
95 	[Tns]		90,
96 	[Tnull]		5,
97 	[Tptr]		35,
98 	[Tsoa]		90,
99 	[Tsrv]		60,
100 	[Ttxt]		15,
101 	[Tall]		95,
102 };
103 
104 static RR*	dnresolve1(char*, int, int, Request*, int, int);
105 static int	netquery(Query *, int);
106 
107 /*
108  * reading /proc/pid/args yields either "name args" or "name [display args]",
109  * so return only display args, if any.
110  */
111 static char *
procgetname(void)112 procgetname(void)
113 {
114 	int fd, n;
115 	char *lp, *rp;
116 	char buf[256];
117 
118 	snprint(buf, sizeof buf, "#p/%d/args", getpid());
119 	if((fd = open(buf, OREAD)) < 0)
120 		return strdup("");
121 	*buf = '\0';
122 	n = read(fd, buf, sizeof buf-1);
123 	close(fd);
124 	if (n >= 0)
125 		buf[n] = '\0';
126 	if ((lp = strchr(buf, '[')) == nil ||
127 	    (rp = strrchr(buf, ']')) == nil)
128 		return strdup("");
129 	*rp = '\0';
130 	return strdup(lp+1);
131 }
132 
133 void
rrfreelistptr(RR ** rpp)134 rrfreelistptr(RR **rpp)
135 {
136 	RR *rp;
137 
138 	if (rpp == nil || *rpp == nil)
139 		return;
140 	rp = *rpp;
141 	*rpp = nil;	/* update pointer in memory before freeing list */
142 	rrfreelist(rp);
143 }
144 
145 /*
146  *  lookup 'type' info for domain name 'name'.  If it doesn't exist, try
147  *  looking it up as a canonical name.
148  *
149  *  this process can be quite slow if time-outs are set too high when querying
150  *  nameservers that just don't respond to certain query types.  in that case,
151  *  there will be multiple udp retries, multiple nameservers will be queried,
152  *  and this will be repeated for a cname query.  the whole thing will be
153  *  retried several times until we get an answer or a time-out.
154  */
155 RR*
dnresolve(char * name,int class,int type,Request * req,RR ** cn,int depth,int recurse,int rooted,int * status)156 dnresolve(char *name, int class, int type, Request *req, RR **cn, int depth,
157 	int recurse, int rooted, int *status)
158 {
159 	RR *rp, *nrp, *drp;
160 	DN *dp;
161 	int loops;
162 	char *procname;
163 	char nname[Domlen];
164 
165 	if(status)
166 		*status = 0;
167 
168 	if(depth > 12)			/* in a recursive loop? */
169 		return nil;
170 
171 	procname = procgetname();
172 	/*
173 	 *  hack for systems that don't have resolve search
174 	 *  lists.  Just look up the simple name in the database.
175 	 */
176 	if(!rooted && strchr(name, '.') == nil){
177 		rp = nil;
178 		drp = domainlist(class);
179 		for(nrp = drp; rp == nil && nrp != nil; nrp = nrp->next){
180 			snprint(nname, sizeof nname, "%s.%s", name,
181 				nrp->ptr->name);
182 			rp = dnresolve(nname, class, type, req, cn, depth+1,
183 				recurse, rooted, status);
184 			lock(&dnlock);
185 			rrfreelist(rrremneg(&rp));
186 			unlock(&dnlock);
187 		}
188 		if(drp != nil)
189 			rrfreelist(drp);
190 		procsetname(procname);
191 		free(procname);
192 		return rp;
193 	}
194 
195 	/*
196 	 *  try the name directly
197 	 */
198 	rp = dnresolve1(name, class, type, req, depth, recurse);
199 	if(rp == nil) {
200 		/*
201 		 * try it as a canonical name if we weren't told
202 		 * that the name didn't exist
203 		 */
204 		dp = dnlookup(name, class, 0);
205 		if(type != Tptr && dp->respcode != Rname)
206 			for(loops = 0; rp == nil && loops < Maxretries; loops++){
207 				/* retry cname, then the actual type */
208 				rp = dnresolve1(name, class, Tcname, req,
209 					depth, recurse);
210 				if(rp == nil)
211 					break;
212 
213 				/* rp->host == nil shouldn't happen, but does */
214 				if(rp->negative || rp->host == nil){
215 					rrfreelist(rp);
216 					rp = nil;
217 					break;
218 				}
219 
220 				name = rp->host->name;
221 				lock(&dnlock);
222 				if(cn)
223 					rrcat(cn, rp);
224 				else
225 					rrfreelist(rp);
226 				unlock(&dnlock);
227 
228 				rp = dnresolve1(name, class, type, req,
229 					depth, recurse);
230 			}
231 
232 		/* distinction between not found and not good */
233 		if(rp == nil && status != nil && dp->respcode != Rok)
234 			*status = dp->respcode;
235 	}
236 	procsetname(procname);
237 	free(procname);
238 	return randomize(rp);
239 }
240 
241 static void
queryinit(Query * qp,DN * dp,int type,Request * req)242 queryinit(Query *qp, DN *dp, int type, Request *req)
243 {
244 	memset(qp, 0, sizeof *qp);
245 	qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
246 	qp->dp = dp;
247 	qp->type = type;
248 	if (qp->type != type)
249 		dnslog("queryinit: bogus type %d", type);
250 	qp->req = req;
251 	qp->nsrp = nil;
252 	qp->dest = qp->curdest = nil;
253 	qp->magic = Querymagic;
254 }
255 
256 static void
queryck(Query * qp)257 queryck(Query *qp)
258 {
259 	assert(qp);
260 	assert(qp->magic == Querymagic);
261 }
262 
263 static void
querydestroy(Query * qp)264 querydestroy(Query *qp)
265 {
266 	queryck(qp);
267 	/* leave udpfd open */
268 	if (qp->tcpfd > 0)
269 		close(qp->tcpfd);
270 	if (qp->tcpctlfd > 0) {
271 		hangup(qp->tcpctlfd);
272 		close(qp->tcpctlfd);
273 	}
274 	free(qp->dest);
275 	memset(qp, 0, sizeof *qp);	/* prevent accidents */
276 	qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
277 }
278 
279 static void
destinit(Dest * p)280 destinit(Dest *p)
281 {
282 	memset(p, 0, sizeof *p);
283 	p->magic = Destmagic;
284 }
285 
286 static void
destck(Dest * p)287 destck(Dest *p)
288 {
289 	assert(p);
290 	assert(p->magic == Destmagic);
291 }
292 
293 /*
294  * if the response to a query hasn't arrived within 100 ms.,
295  * it's unlikely to arrive at all.  after 1 s., it's really unlikely.
296  * queries for missing RRs are likely to produce time-outs rather than
297  * negative responses, so cname and aaaa queries are likely to time out,
298  * thus we don't wait very long for them.
299  */
300 static void
notestats(vlong start,int tmout,int type)301 notestats(vlong start, int tmout, int type)
302 {
303 	qlock(&stats);
304 	if (tmout) {
305 		stats.tmout++;
306 		if (type == Taaaa)
307 			stats.tmoutv6++;
308 		else if (type == Tcname)
309 			stats.tmoutcname++;
310 	} else {
311 		long wait10ths = NS2MS(nsec() - start) / 100;
312 
313 		if (wait10ths <= 0)
314 			stats.under10ths[0]++;
315 		else if (wait10ths >= nelem(stats.under10ths))
316 			stats.under10ths[nelem(stats.under10ths) - 1]++;
317 		else
318 			stats.under10ths[wait10ths]++;
319 	}
320 	qunlock(&stats);
321 }
322 
323 static void
noteinmem(void)324 noteinmem(void)
325 {
326 	qlock(&stats);
327 	stats.answinmem++;
328 	qunlock(&stats);
329 }
330 
331 /* netquery with given name servers, free ns rrs when done */
332 static int
netqueryns(Query * qp,int depth,RR * nsrp)333 netqueryns(Query *qp, int depth, RR *nsrp)
334 {
335 	int rv;
336 
337 	qp->nsrp = nsrp;
338 	rv = netquery(qp, depth);
339 	lock(&dnlock);
340 	rrfreelist(nsrp);
341 	unlock(&dnlock);
342 	return rv;
343 }
344 
345 static RR*
issuequery(Query * qp,char * name,int class,int depth,int recurse)346 issuequery(Query *qp, char *name, int class, int depth, int recurse)
347 {
348 	char *cp;
349 	DN *nsdp;
350 	RR *rp, *nsrp, *dbnsrp;
351 
352 	/*
353 	 *  if we're running as just a resolver, query our
354 	 *  designated name servers
355 	 */
356 	if(cfg.resolver){
357 		nsrp = randomize(getdnsservers(class));
358 		if(nsrp != nil)
359 			if(netqueryns(qp, depth+1, nsrp) > Answnone)
360 				return rrlookup(qp->dp, qp->type, OKneg);
361 	}
362 
363 	/*
364  	 *  walk up the domain name looking for
365 	 *  a name server for the domain.
366 	 */
367 	for(cp = name; cp; cp = walkup(cp)){
368 		/*
369 		 *  if this is a local (served by us) domain,
370 		 *  return answer
371 		 */
372 		dbnsrp = randomize(dblookup(cp, class, Tns, 0, 0));
373 		if(dbnsrp && dbnsrp->local){
374 			rp = dblookup(name, class, qp->type, 1, dbnsrp->ttl);
375 			lock(&dnlock);
376 			rrfreelist(dbnsrp);
377 			unlock(&dnlock);
378 			return rp;
379 		}
380 
381 		/*
382 		 *  if recursion isn't set, just accept local
383 		 *  entries
384 		 */
385 		if(recurse == Dontrecurse){
386 			if(dbnsrp) {
387 				lock(&dnlock);
388 				rrfreelist(dbnsrp);
389 				unlock(&dnlock);
390 			}
391 			continue;
392 		}
393 
394 		/* look for ns in cache */
395 		nsdp = dnlookup(cp, class, 0);
396 		nsrp = nil;
397 		if(nsdp)
398 			nsrp = randomize(rrlookup(nsdp, Tns, NOneg));
399 
400 		/* if the entry timed out, ignore it */
401 		if(nsrp && nsrp->ttl < now){
402 			lock(&dnlock);
403 			rrfreelistptr(&nsrp);
404 			unlock(&dnlock);
405 		}
406 
407 		if(nsrp){
408 			lock(&dnlock);
409 			rrfreelistptr(&dbnsrp);
410 			unlock(&dnlock);
411 
412 			/* query the name servers found in cache */
413 			if(netqueryns(qp, depth+1, nsrp) > Answnone)
414 				return rrlookup(qp->dp, qp->type, OKneg);
415 		} else if(dbnsrp)
416 			/* try the name servers found in db */
417 			if(netqueryns(qp, depth+1, dbnsrp) > Answnone)
418 				return rrlookup(qp->dp, qp->type, NOneg);
419 	}
420 	return nil;
421 }
422 
423 static RR*
dnresolve1(char * name,int class,int type,Request * req,int depth,int recurse)424 dnresolve1(char *name, int class, int type, Request *req, int depth,
425 	int recurse)
426 {
427 	Area *area;
428 	DN *dp;
429 	RR *rp;
430 	Query *qp;
431 
432 	if(debug)
433 		dnslog("[%d] dnresolve1 %s %d %d", getpid(), name, type, class);
434 
435 	/* only class Cin implemented so far */
436 	if(class != Cin)
437 		return nil;
438 
439 	dp = dnlookup(name, class, 1);
440 
441 	/*
442 	 *  Try the cache first
443 	 */
444 	rp = rrlookup(dp, type, OKneg);
445 	if(rp)
446 		if(rp->db){
447 			/* unauthoritative db entries are hints */
448 			if(rp->auth) {
449 				noteinmem();
450 				if(debug)
451 					dnslog("[%d] dnresolve1 %s %d %d: auth rr in db",
452 						getpid(), name, type, class);
453 				return rp;
454 			}
455 		} else
456 			/* cached entry must still be valid */
457 			if(rp->ttl > now)
458 				/* but Tall entries are special */
459 				if(type != Tall || rp->query == Tall) {
460 					noteinmem();
461 					if(debug)
462 						dnslog("[%d] dnresolve1 %s %d %d: rr not in db",
463 							getpid(), name, type, class);
464 					return rp;
465 				}
466 	lock(&dnlock);
467 	rrfreelist(rp);
468 	unlock(&dnlock);
469 	rp = nil;		/* accident prevention */
470 	USED(rp);
471 
472 	/*
473 	 * try the cache for a canonical name. if found punt
474 	 * since we'll find it during the canonical name search
475 	 * in dnresolve().
476 	 */
477 	if(type != Tcname){
478 		rp = rrlookup(dp, Tcname, NOneg);
479 		lock(&dnlock);
480 		rrfreelist(rp);
481 		unlock(&dnlock);
482 		if(rp){
483 			if(debug)
484 				dnslog("[%d] dnresolve1 %s %d %d: rr from rrlookup for non-cname",
485 					getpid(), name, type, class);
486 			return nil;
487 		}
488 	}
489 
490 	/*
491 	 * if the domain name is within an area of ours,
492 	 * we should have found its data in memory by now.
493 	 */
494 	area = inmyarea(dp->name);
495 	if (area || strncmp(dp->name, "local#", 6) == 0) {
496 //		char buf[32];
497 
498 //		dnslog("%s %s: no data in area %s", dp->name,
499 //			rrname(type, buf, sizeof buf), area->soarr->owner->name);
500 		return nil;
501 	}
502 
503 	qp = emalloc(sizeof *qp);
504 	queryinit(qp, dp, type, req);
505 	rp = issuequery(qp, name, class, depth, recurse);
506 	querydestroy(qp);
507 	free(qp);
508 	if(rp){
509 		if(debug)
510 			dnslog("[%d] dnresolve1 %s %d %d: rr from query",
511 				getpid(), name, type, class);
512 		return rp;
513 	}
514 
515 	/* settle for a non-authoritative answer */
516 	rp = rrlookup(dp, type, OKneg);
517 	if(rp){
518 		if(debug)
519 			dnslog("[%d] dnresolve1 %s %d %d: rr from rrlookup",
520 				getpid(), name, type, class);
521 		return rp;
522 	}
523 
524 	/* noone answered.  try the database, we might have a chance. */
525 	rp = dblookup(name, class, type, 0, 0);
526 	if (rp) {
527 		if(debug)
528 			dnslog("[%d] dnresolve1 %s %d %d: rr from dblookup",
529 				getpid(), name, type, class);
530 	}else{
531 		if(debug)
532 			dnslog("[%d] dnresolve1 %s %d %d: no rr from dblookup; crapped out",
533 				getpid(), name, type, class);
534 	}
535 	return rp;
536 }
537 
538 /*
539  *  walk a domain name one element to the right.
540  *  return a pointer to that element.
541  *  in other words, return a pointer to the parent domain name.
542  */
543 char*
walkup(char * name)544 walkup(char *name)
545 {
546 	char *cp;
547 
548 	cp = strchr(name, '.');
549 	if(cp)
550 		return cp+1;
551 	else if(*name)
552 		return "";
553 	else
554 		return 0;
555 }
556 
557 /*
558  *  Get a udp port for sending requests and reading replies.  Put the port
559  *  into "headers" mode.
560  */
561 static char *hmsg = "headers";
562 
563 int
udpport(char * mtpt)564 udpport(char *mtpt)
565 {
566 	int fd, ctl;
567 	char ds[64], adir[64];
568 
569 	/* get a udp port */
570 	snprint(ds, sizeof ds, "%s/udp!*!0", (mtpt? mtpt: "/net"));
571 	ctl = announce(ds, adir);
572 	if(ctl < 0){
573 		/* warning("can't get udp port"); */
574 		return -1;
575 	}
576 
577 	/* turn on header style interface */
578 	if(write(ctl, hmsg, strlen(hmsg)) != strlen(hmsg)){
579 		close(ctl);
580 		warning(hmsg);
581 		return -1;
582 	}
583 
584 	/* grab the data file */
585 	snprint(ds, sizeof ds, "%s/data", adir);
586 	fd = open(ds, ORDWR);
587 	close(ctl);
588 	if(fd < 0)
589 		warning("can't open udp port %s: %r", ds);
590 	return fd;
591 }
592 
593 void
initdnsmsg(DNSmsg * mp,RR * rp,int flags,ushort reqno)594 initdnsmsg(DNSmsg *mp, RR *rp, int flags, ushort reqno)
595 {
596 	mp->flags = flags;
597 	mp->id = reqno;
598 	mp->qd = rp;
599 	if(rp != nil)
600 		mp->qdcount = 1;
601 }
602 
603 DNSmsg *
newdnsmsg(RR * rp,int flags,ushort reqno)604 newdnsmsg(RR *rp, int flags, ushort reqno)
605 {
606 	DNSmsg *mp;
607 
608 	mp = emalloc(sizeof *mp);
609 	initdnsmsg(mp, rp, flags, reqno);
610 	return mp;
611 }
612 
613 /* generate a DNS UDP query packet */
614 int
mkreq(DN * dp,int type,uchar * buf,int flags,ushort reqno)615 mkreq(DN *dp, int type, uchar *buf, int flags, ushort reqno)
616 {
617 	DNSmsg m;
618 	int len;
619 	Udphdr *uh = (Udphdr*)buf;
620 	RR *rp;
621 
622 	/* stuff port number into output buffer */
623 	memset(uh, 0, sizeof *uh);
624 	hnputs(uh->rport, Dnsport);
625 
626 	/* make request and convert it to output format */
627 	memset(&m, 0, sizeof m);
628 	rp = rralloc(type);
629 	rp->owner = dp;
630 	initdnsmsg(&m, rp, flags, reqno);
631 	len = convDNS2M(&m, &buf[Udphdrsize], Maxdnspayload);
632 	rrfreelistptr(&m.qd);
633 	memset(&m, 0, sizeof m);		/* cause trouble */
634 	return len;
635 }
636 
637 void
freeanswers(DNSmsg * mp)638 freeanswers(DNSmsg *mp)
639 {
640 	lock(&dnlock);
641 	rrfreelistptr(&mp->qd);
642 	rrfreelistptr(&mp->an);
643 	rrfreelistptr(&mp->ns);
644 	rrfreelistptr(&mp->ar);
645 	unlock(&dnlock);
646 	mp->qdcount = mp->ancount = mp->nscount = mp->arcount = 0;
647 }
648 
649 /* timed read of reply.  sets srcip.  ibuf must be 64K to handle tcp answers. */
650 static int
readnet(Query * qp,int medium,uchar * ibuf,uvlong endms,uchar ** replyp,uchar * srcip)651 readnet(Query *qp, int medium, uchar *ibuf, uvlong endms, uchar **replyp,
652 	uchar *srcip)
653 {
654 	int len, fd;
655 	long ms;
656 	vlong startns = nsec();
657 	uchar *reply;
658 	uchar lenbuf[2];
659 
660 	len = -1;			/* pessimism */
661 	ms = endms - NS2MS(startns);
662 	if (ms <= 0)
663 		return -1;		/* taking too long */
664 
665 	reply = ibuf;
666 	memset(srcip, 0, IPaddrlen);
667 	alarm(ms);
668 	if (medium == Udp)
669 		if (qp->udpfd <= 0)
670 			dnslog("readnet: qp->udpfd closed");
671 		else {
672 			len = read(qp->udpfd, ibuf, Udphdrsize+Maxpayload);
673 			alarm(0);
674 			notestats(startns, len < 0, qp->type);
675 			if (len >= IPaddrlen)
676 				memmove(srcip, ibuf, IPaddrlen);
677 			if (len >= Udphdrsize) {
678 				len   -= Udphdrsize;
679 				reply += Udphdrsize;
680 			}
681 		}
682 	else {
683 		if (!qp->tcpset)
684 			dnslog("readnet: tcp params not set");
685 		fd = qp->tcpfd;
686 		if (fd <= 0)
687 			dnslog("readnet: %s: tcp fd unset for dest %I",
688 				qp->dp->name, qp->tcpip);
689 		else if (readn(fd, lenbuf, 2) != 2) {
690 			dnslog("readnet: short read of 2-byte tcp msg size from %I",
691 				qp->tcpip);
692 			/* probably a time-out */
693 			notestats(startns, 1, qp->type);
694 		} else {
695 			len = lenbuf[0]<<8 | lenbuf[1];
696 			if (readn(fd, ibuf, len) != len) {
697 				dnslog("readnet: short read of tcp data from %I",
698 					qp->tcpip);
699 				/* probably a time-out */
700 				notestats(startns, 1, qp->type);
701 				len = -1;
702 			}
703 		}
704 		memmove(srcip, qp->tcpip, IPaddrlen);
705 	}
706 	alarm(0);
707 	*replyp = reply;
708 	return len;
709 }
710 
711 /*
712  *  read replies to a request and remember the rrs in the answer(s).
713  *  ignore any of the wrong type.
714  *  wait at most until endms.
715  */
716 static int
readreply(Query * qp,int medium,ushort req,uchar * ibuf,DNSmsg * mp,uvlong endms)717 readreply(Query *qp, int medium, ushort req, uchar *ibuf, DNSmsg *mp,
718 	uvlong endms)
719 {
720 	int len;
721 	char *err;
722 	char tbuf[32];
723 	uchar *reply;
724 	uchar srcip[IPaddrlen];
725 	RR *rp;
726 
727 	queryck(qp);
728 	memset(mp, 0, sizeof *mp);
729 	memset(srcip, 0, sizeof srcip);
730 	if (0)
731 		len = -1;
732 	for (; timems() < endms &&
733 	    (len = readnet(qp, medium, ibuf, endms, &reply, srcip)) >= 0;
734 	    freeanswers(mp)){
735 		/* convert into internal format  */
736 		memset(mp, 0, sizeof *mp);
737 		err = convM2DNS(reply, len, mp, nil);
738 		if (mp->flags & Ftrunc) {
739 			free(err);
740 			freeanswers(mp);
741 			/* notify our caller to retry the query via tcp. */
742 			return -1;
743 		} else if(err){
744 			dnslog("readreply: %s: input err, len %d: %s: %I",
745 				qp->dp->name, len, err, srcip);
746 			free(err);
747 			continue;
748 		}
749 		if(debug)
750 			logreply(qp->req->id, srcip, mp);
751 
752 		/* answering the right question? */
753 		if(mp->id != req)
754 			dnslog("%d: id %d instead of %d: %I", qp->req->id,
755 				mp->id, req, srcip);
756 		else if(mp->qd == 0)
757 			dnslog("%d: no question RR: %I", qp->req->id, srcip);
758 		else if(mp->qd->owner != qp->dp)
759 			dnslog("%d: owner %s instead of %s: %I", qp->req->id,
760 				mp->qd->owner->name, qp->dp->name, srcip);
761 		else if(mp->qd->type != qp->type)
762 			dnslog("%d: qp->type %d instead of %d: %I",
763 				qp->req->id, mp->qd->type, qp->type, srcip);
764 		else {
765 			/* remember what request this is in answer to */
766 			for(rp = mp->an; rp; rp = rp->next)
767 				rp->query = qp->type;
768 			return 0;
769 		}
770 	}
771 	if (timems() >= endms) {
772 		;				/* query expired */
773 	} else if (0) {
774 		/* this happens routinely when a read times out */
775 		dnslog("readreply: %s type %s: ns %I read error or eof "
776 			"(returned %d): %r", qp->dp->name, rrname(qp->type,
777 			tbuf, sizeof tbuf), srcip, len);
778 		if (medium == Udp)
779 			for (rp = qp->nsrp; rp != nil; rp = rp->next)
780 				if (rp->type == Tns)
781 					dnslog("readreply: %s: query sent to "
782 						"ns %s", qp->dp->name,
783 						rp->host->name);
784 	}
785 	return -1;
786 }
787 
788 /*
789  *	return non-0 if first list includes second list
790  */
791 int
contains(RR * rp1,RR * rp2)792 contains(RR *rp1, RR *rp2)
793 {
794 	RR *trp1, *trp2;
795 
796 	for(trp2 = rp2; trp2; trp2 = trp2->next){
797 		for(trp1 = rp1; trp1; trp1 = trp1->next)
798 			if(trp1->type == trp2->type)
799 			if(trp1->host == trp2->host)
800 			if(trp1->owner == trp2->owner)
801 				break;
802 		if(trp1 == nil)
803 			return 0;
804 	}
805 	return 1;
806 }
807 
808 
809 /*
810  *  return multicast version if any
811  */
812 int
ipisbm(uchar * ip)813 ipisbm(uchar *ip)
814 {
815 	if(isv4(ip)){
816 		if (ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0 ||
817 		    ipcmp(ip, IPv4bcast) == 0)
818 			return 4;
819 	} else
820 		if(ip[0] == 0xff)
821 			return 6;
822 	return 0;
823 }
824 
825 /*
826  *  Get next server address(es) into qp->dest[nd] and beyond
827  */
828 static int
serveraddrs(Query * qp,int nd,int depth)829 serveraddrs(Query *qp, int nd, int depth)
830 {
831 	RR *rp, *arp, *trp;
832 	Dest *cur;
833 
834 	if(nd >= Maxdest)		/* dest array is full? */
835 		return Maxdest - 1;
836 
837 	/*
838 	 *  look for a server whose address we already know.
839 	 *  if we find one, mark it so we ignore this on
840 	 *  subsequent passes.
841 	 */
842 	arp = 0;
843 	for(rp = qp->nsrp; rp; rp = rp->next){
844 		assert(rp->magic == RRmagic);
845 		if(rp->marker)
846 			continue;
847 		arp = rrlookup(rp->host, Ta, NOneg);
848 		if(arp == nil)
849 			arp = rrlookup(rp->host, Taaaa, NOneg);
850 		if(arp){
851 			rp->marker = 1;
852 			break;
853 		}
854 		arp = dblookup(rp->host->name, Cin, Ta, 0, 0);
855 		if(arp == nil)
856 			arp = dblookup(rp->host->name, Cin, Taaaa, 0, 0);
857 		if(arp){
858 			rp->marker = 1;
859 			break;
860 		}
861 	}
862 
863 	/*
864 	 *  if the cache and database lookup didn't find any new
865 	 *  server addresses, try resolving one via the network.
866 	 *  Mark any we try to resolve so we don't try a second time.
867 	 */
868 	if(arp == 0)
869 		for(rp = qp->nsrp; rp; rp = rp->next){
870 			if(rp->marker)
871 				continue;
872 			rp->marker = 1;
873 
874 			/*
875 			 *  avoid loops looking up a server under itself
876 			 */
877 			if(subsume(rp->owner->name, rp->host->name))
878 				continue;
879 
880 			arp = dnresolve(rp->host->name, Cin, Ta, qp->req, 0,
881 				depth+1, Recurse, 1, 0);
882 			if(arp == nil)
883 				arp = dnresolve(rp->host->name, Cin, Taaaa,
884 					qp->req, 0, depth+1, Recurse, 1, 0);
885 			lock(&dnlock);
886 			rrfreelist(rrremneg(&arp));
887 			unlock(&dnlock);
888 			if(arp)
889 				break;
890 		}
891 
892 	/* use any addresses that we found */
893 	for(trp = arp; trp && nd < Maxdest; trp = trp->next){
894 		cur = &qp->dest[nd];
895 		parseip(cur->a, trp->ip->name);
896 		/*
897 		 * straddling servers can reject all nameservers if they are all
898 		 * inside, so be sure to list at least one outside ns at
899 		 * the end of the ns list in /lib/ndb for `dom='.
900 		 */
901 		if (ipisbm(cur->a) ||
902 		    cfg.straddle && !insideaddr(qp->dp->name) && insidens(cur->a))
903 			continue;
904 		cur->nx = 0;
905 		cur->s = trp->owner;
906 		cur->code = Rtimeout;
907 		nd++;
908 	}
909 	lock(&dnlock);
910 	rrfreelist(arp);
911 	unlock(&dnlock);
912 	if(nd >= Maxdest)		/* dest array is full? */
913 		return Maxdest - 1;
914 	return nd;
915 }
916 
917 /*
918  *  cache negative responses
919  */
920 static void
cacheneg(DN * dp,int type,int rcode,RR * soarr)921 cacheneg(DN *dp, int type, int rcode, RR *soarr)
922 {
923 	RR *rp;
924 	DN *soaowner;
925 	ulong ttl;
926 
927 	stats.negcached++;
928 
929 	/* no cache time specified, don't make anything up */
930 	if(soarr != nil){
931 		lock(&dnlock);
932 		if(soarr->next != nil)
933 			rrfreelistptr(&soarr->next);
934 		unlock(&dnlock);
935 		soaowner = soarr->owner;
936 	} else
937 		soaowner = nil;
938 
939 	/* the attach can cause soarr to be freed so mine it now */
940 	if(soarr != nil && soarr->soa != nil)
941 		ttl = soarr->soa->minttl+now;
942 	else
943 		ttl = 5*Min;
944 
945 	/* add soa and negative RR to the database */
946 	rrattach(soarr, Authoritative);
947 
948 	rp = rralloc(type);
949 	rp->owner = dp;
950 	rp->negative = 1;
951 	rp->negsoaowner = soaowner;
952 	rp->negrcode = rcode;
953 	rp->ttl = ttl;
954 	rrattach(rp, Authoritative);
955 }
956 
957 static int
setdestoutns(Dest * p,int n)958 setdestoutns(Dest *p, int n)
959 {
960 	uchar *outns = outsidens(n);
961 
962 	destck(p);
963 	destinit(p);
964 	if (outns == nil) {
965 		if (n == 0)
966 			dnslog("[%d] no outside-ns in ndb", getpid());
967 		return -1;
968 	}
969 	memmove(p->a, outns, sizeof p->a);
970 	p->s = dnlookup("outside-ns-ips", Cin, 1);
971 	return 0;
972 }
973 
974 /*
975  * issue query via UDP or TCP as appropriate.
976  * for TCP, returns with qp->tcpip set from udppkt header.
977  */
978 static int
mydnsquery(Query * qp,int medium,uchar * udppkt,int len)979 mydnsquery(Query *qp, int medium, uchar *udppkt, int len)
980 {
981 	int rv = -1, nfd;
982 	char *domain;
983 	char conndir[NETPATHLEN], net[NETPATHLEN];
984 	uchar belen[2];
985 	NetConnInfo *nci;
986 
987 	queryck(qp);
988 	domain = smprint("%I", udppkt);
989 	if (myaddr(domain)) {
990 		dnslog("mydnsquery: trying to send to myself (%s); bzzzt",
991 			domain);
992 		free(domain);
993 		return rv;
994 	}
995 
996 	switch (medium) {
997 	case Udp:
998 		free(domain);
999 		nfd = dup(qp->udpfd, -1);
1000 		if (nfd < 0) {
1001 			warning("mydnsquery: qp->udpfd %d: %r", qp->udpfd);
1002 			close(qp->udpfd);	/* ensure it's closed */
1003 			qp->udpfd = -1;		/* poison it */
1004 			return rv;
1005 		}
1006 		close(nfd);
1007 
1008 		if (qp->udpfd <= 0)
1009 			dnslog("mydnsquery: qp->udpfd %d closed", qp->udpfd);
1010 		else {
1011 			if (write(qp->udpfd, udppkt, len+Udphdrsize) !=
1012 			    len+Udphdrsize)
1013 				warning("sending udp msg: %r");
1014 			else {
1015 				stats.qsent++;
1016 				rv = 0;
1017 			}
1018 		}
1019 		break;
1020 	case Tcp:
1021 		/* send via TCP & keep fd around for reply */
1022 		snprint(net, sizeof net, "%s/tcp",
1023 			(mntpt[0] != '\0'? mntpt: "/net"));
1024 		alarm(10*1000);
1025 		qp->tcpfd = rv = dial(netmkaddr(domain, net, "dns"), nil,
1026 			conndir, &qp->tcpctlfd);
1027 		alarm(0);
1028 		if (qp->tcpfd < 0) {
1029 			dnslog("can't dial tcp!%s!dns: %r", domain);
1030 			free(domain);
1031 			break;
1032 		}
1033 		free(domain);
1034 		nci = getnetconninfo(conndir, qp->tcpfd);
1035 		if (nci) {
1036 			parseip(qp->tcpip, nci->rsys);
1037 			freenetconninfo(nci);
1038 		} else
1039 			dnslog("mydnsquery: getnetconninfo failed");
1040 		qp->tcpset = 1;
1041 
1042 		belen[0] = len >> 8;
1043 		belen[1] = len;
1044 		if (write(qp->tcpfd, belen, 2) != 2 ||
1045 		    write(qp->tcpfd, udppkt + Udphdrsize, len) != len)
1046 			warning("sending tcp msg: %r");
1047 		break;
1048 	default:
1049 		sysfatal("mydnsquery: bad medium");
1050 	}
1051 	return rv;
1052 }
1053 
1054 /*
1055  * send query to all UDP destinations or one TCP destination,
1056  * taken from obuf (udp packet) header
1057  */
1058 static int
xmitquery(Query * qp,int medium,int depth,uchar * obuf,int inns,int len)1059 xmitquery(Query *qp, int medium, int depth, uchar *obuf, int inns, int len)
1060 {
1061 	int j, n;
1062 	char buf[32];
1063 	Dest *p;
1064 
1065 	queryck(qp);
1066 	if(timems() >= qp->req->aborttime)
1067 		return -1;
1068 
1069 	/*
1070 	 * get a nameserver address if we need one.
1071 	 * serveraddrs populates qp->dest.
1072 	 */
1073 	p = qp->dest;
1074 	destck(p);
1075 	if (qp->ndest < 0 || qp->ndest > Maxdest) {
1076 		dnslog("qp->ndest %d out of range", qp->ndest);
1077 		abort();
1078 	}
1079 	/*
1080 	 * we're to transmit to more destinations than we currently have,
1081 	 * so get another.
1082 	 */
1083 	if (qp->ndest > qp->curdest - p) {
1084 		j = serveraddrs(qp, qp->curdest - p, depth);
1085 		if (j < 0 || j >= Maxdest) {
1086 			dnslog("serveraddrs() result %d out of range", j);
1087 			abort();
1088 		}
1089 		qp->curdest = &qp->dest[j];
1090 	}
1091 	destck(qp->curdest);
1092 
1093 	/* no servers, punt */
1094 	if (qp->ndest == 0)
1095 		if (cfg.straddle && cfg.inside) {
1096 			/* get ips of "outside-ns-ips" */
1097 			qp->curdest = qp->dest;
1098 			for(n = 0; n < Maxdest; n++, qp->curdest++)
1099 				if (setdestoutns(qp->curdest, n) < 0)
1100 					break;
1101 			if(n == 0)
1102 				dnslog("xmitquery: %s: no outside-ns nameservers",
1103 					qp->dp->name);
1104 		} else
1105 			/* it's probably just a bogus domain, don't log it */
1106 			return -1;
1107 
1108 	/* send to first 'qp->ndest' destinations */
1109 	j = 0;
1110 	if (medium == Tcp) {
1111 		j++;
1112 		queryck(qp);
1113 		assert(qp->dp);
1114 		procsetname("tcp %sside query for %s %s", (inns? "in": "out"),
1115 			qp->dp->name, rrname(qp->type, buf, sizeof buf));
1116 		mydnsquery(qp, medium, obuf, len); /* sets qp->tcpip from obuf */
1117 		if(debug)
1118 			logsend(qp->req->id, depth, qp->tcpip, "", qp->dp->name,
1119 				qp->type);
1120 	} else
1121 		for(; p < &qp->dest[qp->ndest] && p < qp->curdest; p++){
1122 			/* skip destinations we've finished with */
1123 			if(p->nx >= Maxtrans)
1124 				continue;
1125 
1126 			j++;
1127 
1128 			/* exponential backoff of requests */
1129 			if((1<<p->nx) > qp->ndest)
1130 				continue;
1131 
1132 			if(memcmp(p->a, IPnoaddr, sizeof IPnoaddr) == 0)
1133 				continue;		/* mistake */
1134 
1135 			procsetname("udp %sside query to %I/%s %s %s",
1136 				(inns? "in": "out"), p->a, p->s->name,
1137 				qp->dp->name, rrname(qp->type, buf, sizeof buf));
1138 			if(debug)
1139 				logsend(qp->req->id, depth, p->a, p->s->name,
1140 					qp->dp->name, qp->type);
1141 
1142 			/* fill in UDP destination addr & send it */
1143 			memmove(obuf, p->a, sizeof p->a);
1144 			mydnsquery(qp, medium, obuf, len);
1145 			p->nx++;
1146 		}
1147 	if(j == 0) {
1148 		return -1;
1149 	}
1150 	return 0;
1151 }
1152 
1153 static int lckindex[Maxlcks] = {
1154 	0,			/* all others map here */
1155 	Ta,
1156 	Tns,
1157 	Tcname,
1158 	Tsoa,
1159 	Tptr,
1160 	Tmx,
1161 	Ttxt,
1162 	Taaaa,
1163 };
1164 
1165 static int
qtype2lck(int qtype)1166 qtype2lck(int qtype)		/* map query type to querylck index */
1167 {
1168 	int i;
1169 
1170 	for (i = 1; i < nelem(lckindex); i++)
1171 		if (lckindex[i] == qtype)
1172 			return i;
1173 	return 0;
1174 }
1175 
1176 /* is mp a cachable negative response (with Rname set)? */
1177 static int
isnegrname(DNSmsg * mp)1178 isnegrname(DNSmsg *mp)
1179 {
1180 	/* TODO: could add || cfg.justforw to RHS of && */
1181 	return mp->an == nil && (mp->flags & Rmask) == Rname;
1182 }
1183 
1184 /* returns Answerr (-1) on errors, else number of answers, which can be zero. */
1185 static int
procansw(Query * qp,DNSmsg * mp,uchar * srcip,int depth,Dest * p)1186 procansw(Query *qp, DNSmsg *mp, uchar *srcip, int depth, Dest *p)
1187 {
1188 	int rv;
1189 //	int lcktype;
1190 	char buf[32];
1191 	DN *ndp;
1192 	Query *nqp;
1193 	RR *tp, *soarr;
1194 
1195 	if (mp->an == nil)
1196 		stats.negans++;
1197 
1198 	/* ignore any error replies */
1199 	if((mp->flags & Rmask) == Rserver){
1200 		stats.negserver++;
1201 		freeanswers(mp);
1202 		if(p != qp->curdest)
1203 			p->code = Rserver;
1204 		return Answerr;
1205 	}
1206 
1207 	/* ignore any bad delegations */
1208 	if(mp->ns && baddelegation(mp->ns, qp->nsrp, srcip)){
1209 		stats.negbaddeleg++;
1210 		if(mp->an == nil){
1211 			stats.negbdnoans++;
1212 			freeanswers(mp);
1213 			if(p != qp->curdest)
1214 				p->code = Rserver;
1215 			dnslog(" and no answers");
1216 			return Answerr;
1217 		}
1218 		dnslog(" but has answers; ignoring ns");
1219 		lock(&dnlock);
1220 		rrfreelistptr(&mp->ns);
1221 		unlock(&dnlock);
1222 		mp->nscount = 0;
1223 	}
1224 
1225 	/* remove any soa's from the authority section */
1226 	lock(&dnlock);
1227 	soarr = rrremtype(&mp->ns, Tsoa);
1228 
1229 	/* incorporate answers */
1230 	unique(mp->an);
1231 	unique(mp->ns);
1232 	unique(mp->ar);
1233 	unlock(&dnlock);
1234 
1235 	if(mp->an)
1236 		rrattach(mp->an, (mp->flags & Fauth) != 0);
1237 	if(mp->ar)
1238 		rrattach(mp->ar, Notauthoritative);
1239 	if(mp->ns && !cfg.justforw){
1240 		ndp = mp->ns->owner;
1241 		rrattach(mp->ns, Notauthoritative);
1242 	} else {
1243 		ndp = nil;
1244 		lock(&dnlock);
1245 		rrfreelistptr(&mp->ns);
1246 		unlock(&dnlock);
1247 		mp->nscount = 0;
1248 	}
1249 
1250 	/* free the question */
1251 	if(mp->qd) {
1252 		lock(&dnlock);
1253 		rrfreelistptr(&mp->qd);
1254 		unlock(&dnlock);
1255 		mp->qdcount = 0;
1256 	}
1257 
1258 	/*
1259 	 *  Any reply from an authoritative server,
1260 	 *  or a positive reply terminates the search.
1261 	 *  A negative response now also terminates the search.
1262 	 */
1263 	if(mp->an != nil || (mp->flags & Fauth)){
1264 		if(isnegrname(mp))
1265 			qp->dp->respcode = Rname;
1266 		else
1267 			qp->dp->respcode = Rok;
1268 
1269 		/*
1270 		 *  cache any negative responses, free soarr.
1271 		 *  negative responses need not be authoritative:
1272 		 *  they can legitimately come from a cache.
1273 		 */
1274 		if( /* (mp->flags & Fauth) && */ mp->an == nil)
1275 			cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
1276 		else {
1277 			lock(&dnlock);
1278 			rrfreelist(soarr);
1279 			unlock(&dnlock);
1280 		}
1281 		return 1;
1282 	} else if (isnegrname(mp)) {
1283 		qp->dp->respcode = Rname;
1284 		/*
1285 		 *  cache negative response.
1286 		 *  negative responses need not be authoritative:
1287 		 *  they can legitimately come from a cache.
1288 		 */
1289 		cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
1290 		return 1;
1291 	}
1292 	stats.negnorname++;
1293 	lock(&dnlock);
1294 	rrfreelist(soarr);
1295 	unlock(&dnlock);
1296 
1297 	/*
1298 	 *  if we've been given better name servers, recurse.
1299 	 *  if we're a pure resolver, don't recurse, we have
1300 	 *  to forward to a fixed set of named servers.
1301 	 */
1302 	if(!mp->ns || cfg.resolver && cfg.justforw)
1303 		return Answnone;
1304 	tp = rrlookup(ndp, Tns, NOneg);
1305 	if(contains(qp->nsrp, tp)){
1306 		lock(&dnlock);
1307 		rrfreelist(tp);
1308 		unlock(&dnlock);
1309 		return Answnone;
1310 	}
1311 	procsetname("recursive query for %s %s", qp->dp->name,
1312 		rrname(qp->type, buf, sizeof buf));
1313 	/*
1314 	 *  we're called from udpquery, called from
1315 	 *  netquery, which current holds qp->dp->querylck,
1316 	 *  so release it now and acquire it upon return.
1317 	 */
1318 //	lcktype = qtype2lck(qp->type);		/* someday try this again */
1319 //	qunlock(&qp->dp->querylck[lcktype]);
1320 
1321 	nqp = emalloc(sizeof *nqp);
1322 	queryinit(nqp, qp->dp, qp->type, qp->req);
1323 	nqp->nsrp = tp;
1324 	rv = netquery(nqp, depth+1);
1325 
1326 //	qlock(&qp->dp->querylck[lcktype]);
1327 	rrfreelist(nqp->nsrp);
1328 	querydestroy(nqp);
1329 	free(nqp);
1330 	return rv;
1331 }
1332 
1333 /*
1334  * send a query via tcp to a single address (from ibuf's udp header)
1335  * and read the answer(s) into mp->an.
1336  */
1337 static int
tcpquery(Query * qp,DNSmsg * mp,int depth,uchar * ibuf,uchar * obuf,int len,ulong waitms,int inns,ushort req)1338 tcpquery(Query *qp, DNSmsg *mp, int depth, uchar *ibuf, uchar *obuf, int len,
1339 	ulong waitms, int inns, ushort req)
1340 {
1341 	int rv = 0;
1342 	uvlong endms;
1343 
1344 	endms = timems() + waitms;
1345 	if(endms > qp->req->aborttime)
1346 		endms = qp->req->aborttime;
1347 
1348 	if (0)
1349 		dnslog("%s: udp reply truncated; retrying query via tcp to %I",
1350 			qp->dp->name, qp->tcpip);
1351 
1352 	qlock(&qp->tcplock);
1353 	memmove(obuf, ibuf, IPaddrlen);		/* send back to respondent */
1354 	/* sets qp->tcpip from obuf's udp header */
1355 	if (xmitquery(qp, Tcp, depth, obuf, inns, len) < 0 ||
1356 	    readreply(qp, Tcp, req, ibuf, mp, endms) < 0)
1357 		rv = -1;
1358 	if (qp->tcpfd > 0) {
1359 		hangup(qp->tcpctlfd);
1360 		close(qp->tcpctlfd);
1361 		close(qp->tcpfd);
1362 	}
1363 	qp->tcpfd = qp->tcpctlfd = -1;
1364 	qunlock(&qp->tcplock);
1365 	return rv;
1366 }
1367 
1368 /*
1369  *  query name servers.  fill in obuf with on-the-wire representation of a
1370  *  DNSmsg derived from qp.  if the name server returns a pointer to another
1371  *  name server, recurse.
1372  */
1373 static int
queryns(Query * qp,int depth,uchar * ibuf,uchar * obuf,ulong waitms,int inns)1374 queryns(Query *qp, int depth, uchar *ibuf, uchar *obuf, ulong waitms, int inns)
1375 {
1376 	int ndest, len, replywaits, rv;
1377 	ushort req;
1378 	uvlong endms;
1379 	char buf[12];
1380 	uchar srcip[IPaddrlen];
1381 	Dest *p, *np, *dest;
1382 
1383 	/* pack request into a udp message */
1384 	req = rand();
1385 	len = mkreq(qp->dp, qp->type, obuf, Frecurse|Oquery, req);
1386 
1387 	/* no server addresses yet */
1388 	queryck(qp);
1389 	dest = emalloc(Maxdest * sizeof *dest);	/* dest can't be on stack */
1390 	for (p = dest; p < dest + Maxdest; p++)
1391 		destinit(p);
1392 	/* this dest array is local to this call of queryns() */
1393 	free(qp->dest);
1394 	qp->curdest = qp->dest = dest;
1395 
1396 	/*
1397 	 *  transmit udp requests and wait for answers.
1398 	 *  at most Maxtrans attempts to each address.
1399 	 *  each cycle send one more message than the previous.
1400 	 *  retry a query via tcp if its response is truncated.
1401 	 */
1402 	for(ndest = 1; ndest < Maxdest; ndest++){
1403 		qp->ndest = ndest;
1404 		qp->tcpset = 0;
1405 		if (xmitquery(qp, Udp, depth, obuf, inns, len) < 0)
1406 			break;
1407 
1408 		endms = timems() + waitms;
1409 		if(endms > qp->req->aborttime)
1410 			endms = qp->req->aborttime;
1411 
1412 		for(replywaits = 0; replywaits < ndest; replywaits++){
1413 			DNSmsg m;
1414 
1415 			procsetname("reading %sside reply from %I: %s %s from %s",
1416 				(inns? "in": "out"), obuf, qp->dp->name,
1417 				rrname(qp->type, buf, sizeof buf), qp->req->from);
1418 
1419 			/* read udp answer into m */
1420 			if (readreply(qp, Udp, req, ibuf, &m, endms) >= 0)
1421 				memmove(srcip, ibuf, IPaddrlen);
1422 			else if (!(m.flags & Ftrunc)) {
1423 				freeanswers(&m);
1424 				break;		/* timed out on this dest */
1425 			} else {
1426 				/* whoops, it was truncated! ask again via tcp */
1427 				freeanswers(&m);
1428 				rv = tcpquery(qp, &m, depth, ibuf, obuf, len,
1429 					waitms, inns, req);  /* answer in m */
1430 				if (rv < 0) {
1431 					freeanswers(&m);
1432 					break;		/* failed via tcp too */
1433 				}
1434 				memmove(srcip, qp->tcpip, IPaddrlen);
1435 			}
1436 
1437 			/* find responder */
1438 			// dnslog("queryns got reply from %I", srcip);
1439 			for(p = qp->dest; p < qp->curdest; p++)
1440 				if(memcmp(p->a, srcip, sizeof p->a) == 0)
1441 					break;
1442 
1443 			/* remove all addrs of responding server from list */
1444 			for(np = qp->dest; np < qp->curdest; np++)
1445 				if(np->s == p->s)
1446 					np->nx = Maxtrans;
1447 
1448 			/* free or incorporate RRs in m */
1449 			rv = procansw(qp, &m, srcip, depth, p);
1450 			if (rv > Answnone) {
1451 				free(qp->dest);
1452 				qp->dest = qp->curdest = nil; /* prevent accidents */
1453 				return rv;
1454 			}
1455 		}
1456 	}
1457 
1458 	/* if all servers returned failure, propagate it */
1459 	qp->dp->respcode = Rserver;
1460 	for(p = dest; p < qp->curdest; p++) {
1461 		destck(p);
1462 		if(p->code != Rserver)
1463 			qp->dp->respcode = Rok;
1464 		p->magic = 0;			/* prevent accidents */
1465 	}
1466 
1467 //	if (qp->dp->respcode)
1468 //		dnslog("queryns setting Rserver for %s", qp->dp->name);
1469 
1470 	free(qp->dest);
1471 	qp->dest = qp->curdest = nil;		/* prevent accidents */
1472 	return Answnone;
1473 }
1474 
1475 /*
1476  *  run a command with a supplied fd as standard input
1477  */
1478 char *
system(int fd,char * cmd)1479 system(int fd, char *cmd)
1480 {
1481 	int pid, p, i;
1482 	static Waitmsg msg;
1483 
1484 	if((pid = fork()) == -1)
1485 		sysfatal("fork failed: %r");
1486 	else if(pid == 0){
1487 		dup(fd, 0);
1488 		close(fd);
1489 		for (i = 3; i < 200; i++)
1490 			close(i);		/* don't leak fds */
1491 		execl("/bin/rc", "rc", "-c", cmd, nil);
1492 		sysfatal("exec rc: %r");
1493 	}
1494 	for(p = waitpid(); p >= 0; p = waitpid())
1495 		if(p == pid)
1496 			return msg.msg;
1497 	return "lost child";
1498 }
1499 
1500 /* compute wait, weighted by probability of success, with bounds */
1501 static ulong
weight(ulong ms,unsigned pcntprob)1502 weight(ulong ms, unsigned pcntprob)
1503 {
1504 	ulong wait;
1505 
1506 	wait = (ms * pcntprob) / 100;
1507 	if (wait < Minwaitms)
1508 		wait = Minwaitms;
1509 	if (wait > Maxwaitms)
1510 		wait = Maxwaitms;
1511 	return wait;
1512 }
1513 
1514 /*
1515  * in principle we could use a single descriptor for a udp port
1516  * to send all queries and receive all the answers to them,
1517  * but we'd have to sort out the answers by dns-query id.
1518  */
1519 static int
udpquery(Query * qp,char * mntpt,int depth,int patient,int inns)1520 udpquery(Query *qp, char *mntpt, int depth, int patient, int inns)
1521 {
1522 	int fd, rv;
1523 	ulong now, pcntprob;
1524 	uvlong wait, reqtm;
1525 	char *msg;
1526 	uchar *obuf, *ibuf;
1527 	static QLock mntlck;
1528 	static ulong lastmount;
1529 
1530 	/* use alloced buffers rather than ones from the stack */
1531 	ibuf = emalloc(64*1024);		/* max. tcp reply size */
1532 	obuf = emalloc(Maxpayload+Udphdrsize);
1533 
1534 	fd = udpport(mntpt);
1535 	while (fd < 0 && cfg.straddle && strcmp(mntpt, "/net.alt") == 0) {
1536 		/* HACK: remount /net.alt */
1537 		now = time(nil);
1538 		if (now < lastmount + Remntretry)
1539 			sleep(S2MS(lastmount + Remntretry - now));
1540 		qlock(&mntlck);
1541 		fd = udpport(mntpt);	/* try again under lock */
1542 		if (fd < 0) {
1543 			dnslog("[%d] remounting /net.alt", getpid());
1544 			unmount(nil, "/net.alt");
1545 
1546 			msg = system(open("/dev/null", ORDWR), "outside");
1547 
1548 			lastmount = time(nil);
1549 			if (msg && *msg) {
1550 				dnslog("[%d] can't remount /net.alt: %s",
1551 					getpid(), msg);
1552 				sleep(10*1000);	/* don't spin remounting */
1553 			} else
1554 				fd = udpport(mntpt);
1555 		}
1556 		qunlock(&mntlck);
1557 	}
1558 	if (fd < 0) {
1559 		dnslog("can't get udpport for %s query of name %s: %r",
1560 			mntpt, qp->dp->name);
1561 		sysfatal("out of udp conversations");	/* we're buggered */
1562 	}
1563 
1564 	/*
1565 	 * Our QIP servers are busted and respond to AAAA and CNAME queries
1566 	 * with (sometimes malformed [too short] packets and) no answers and
1567 	 * just NS RRs but not Rname errors.  so make time-to-wait
1568 	 * proportional to estimated probability of an RR of that type existing.
1569 	 */
1570 	if (qp->type >= nelem(likely))
1571 		pcntprob = 35;			/* unpopular query type */
1572 	else
1573 		pcntprob = likely[qp->type];
1574 	reqtm = (patient? 2 * Maxreqtm: Maxreqtm);
1575 	wait = weight(reqtm / 3, pcntprob);	/* time for one udp query */
1576 	qp->req->aborttime = timems() + 3*wait; /* for all udp queries */
1577 
1578 	qp->udpfd = fd;
1579 	rv = queryns(qp, depth, ibuf, obuf, wait, inns);
1580 	close(fd);
1581 	qp->udpfd = -1;
1582 
1583 	free(obuf);
1584 	free(ibuf);
1585 	return rv;
1586 }
1587 
1588 /*
1589  * look up (qp->dp->name, qp->type) rr in dns,
1590  * using nameservers in qp->nsrp.
1591  */
1592 static int
netquery(Query * qp,int depth)1593 netquery(Query *qp, int depth)
1594 {
1595 	int lock, rv, triedin, inname;
1596 	char buf[32];
1597 	RR *rp;
1598 	DN *dp;
1599 	Querylck *qlp;
1600 	static int whined;
1601 
1602 	rv = Answnone;			/* pessimism */
1603 	if(depth > 12)			/* in a recursive loop? */
1604 		return Answnone;
1605 
1606 	slave(qp->req);
1607 	/*
1608 	 * slave might have forked.  if so, the parent process longjmped to
1609 	 * req->mret; we're usually the child slave, but if there are too
1610 	 * many children already, we're still the same process.
1611 	 */
1612 
1613 	/*
1614 	 * don't lock before call to slave so only children can block.
1615 	 * just lock at top-level invocation.
1616 	 */
1617 	lock = depth <= 1 && qp->req->isslave;
1618 	dp = qp->dp;		/* ensure that it doesn't change underfoot */
1619 	qlp = nil;
1620 	if(lock) {
1621 		procsetname("query lock wait: %s %s from %s", dp->name,
1622 			rrname(qp->type, buf, sizeof buf), qp->req->from);
1623 		/*
1624 		 * don't make concurrent queries for this name.
1625 		 * dozens of processes blocking here probably indicates
1626 		 * an error in our dns data that causes us to not
1627 		 * recognise a zone (area) as one of our own, thus
1628 		 * causing us to query other nameservers.
1629 		 */
1630 		qlp = &dp->querylck[qtype2lck(qp->type)];
1631 		qlock(qlp);
1632 		if (qlp->Ref.ref > Maxoutstanding) {
1633 			qunlock(qlp);
1634 			if (!whined) {
1635 				whined = 1;
1636 				dnslog("too many outstanding queries for %s;"
1637 					" dropping this one; no further logging"
1638 					" of drops", dp->name);
1639 			}
1640 			return 0;
1641 		}
1642 		++qlp->Ref.ref;
1643 		qunlock(qlp);
1644 	}
1645 	procsetname("netquery: %s", dp->name);
1646 
1647 	/* prepare server RR's for incremental lookup */
1648 	for(rp = qp->nsrp; rp; rp = rp->next)
1649 		rp->marker = 0;
1650 
1651 	triedin = 0;
1652 
1653 	/*
1654 	 * normal resolvers and servers will just use mntpt for all addresses,
1655 	 * even on the outside.  straddling servers will use mntpt (/net)
1656 	 * for inside addresses and /net.alt for outside addresses,
1657 	 * thus bypassing other inside nameservers.
1658 	 */
1659 	inname = insideaddr(dp->name);
1660 	if (!cfg.straddle || inname) {
1661 		rv = udpquery(qp, mntpt, depth, Hurry, (cfg.inside? Inns: Outns));
1662 		triedin = 1;
1663 	}
1664 
1665 	/*
1666 	 * if we're still looking, are inside, and have an outside domain,
1667 	 * try it on our outside interface, if any.
1668 	 */
1669 	if (rv == Answnone && cfg.inside && !inname) {
1670 		if (triedin)
1671 			dnslog(
1672 	   "[%d] netquery: internal nameservers failed for %s; trying external",
1673 				getpid(), dp->name);
1674 
1675 		/* prepare server RR's for incremental lookup */
1676 		for(rp = qp->nsrp; rp; rp = rp->next)
1677 			rp->marker = 0;
1678 
1679 		rv = udpquery(qp, "/net.alt", depth, Patient, Outns);
1680 	}
1681 //	if (rv == Answnone)		/* could ask /net.alt/dns directly */
1682 //		askoutdns(dp, qp->type);
1683 
1684 	if(lock && qlp) {
1685 		qlock(qlp);
1686 		assert(qlp->Ref.ref > 0);
1687 		qunlock(qlp);
1688 		decref(qlp);
1689 	}
1690 	return rv;
1691 }
1692 
1693 int
seerootns(void)1694 seerootns(void)
1695 {
1696 	int rv;
1697 	char root[] = "";
1698 	Request req;
1699 	RR *rr;
1700 	Query *qp;
1701 
1702 	memset(&req, 0, sizeof req);
1703 	req.isslave = 1;
1704 	req.aborttime = timems() + Maxreqtm;
1705 	req.from = "internal";
1706 
1707 	qp = emalloc(sizeof *qp);
1708 	queryinit(qp, dnlookup(root, Cin, 1), Tns, &req);
1709 	qp->nsrp = dblookup(root, Cin, Tns, 0, 0);
1710 	for (rr = qp->nsrp; rr != nil; rr = rr->next)	/* DEBUG */
1711 		dnslog("seerootns query nsrp: %R", rr);
1712 
1713 	rv = netquery(qp, 0);		/* lookup ". ns" using qp->nsrp */
1714 
1715 	rrfreelist(qp->nsrp);
1716 	querydestroy(qp);
1717 	free(qp);
1718 	return rv;
1719 }
1720