xref: /plan9-contrib/sys/src/cmd/ndb/dnresolve.c (revision 82f6abee43a55f41b2fe672a88aaa23f21f003ab)
1 /*
2  * domain name resolvers, see rfcs 1035 and 1123
3  */
4 #include <u.h>
5 #include <libc.h>
6 #include <ip.h>
7 #include <bio.h>
8 #include <ndb.h>
9 #include "dns.h"
10 
11 typedef struct Dest Dest;
12 typedef struct Ipaddr Ipaddr;
13 typedef struct Query Query;
14 
15 enum
16 {
17 	Udp, Tcp,
18 
19 	Answerr=	-1,
20 	Answnone,
21 
22 	Maxdest=	24,	/* maximum destinations for a request message */
23 	Maxoutstanding=	15,	/* max. outstanding queries per domain name */
24 	Remntretry=	15,	/* min. sec.s between /net.alt remount tries */
25 
26 	/*
27 	 * these are the old values; we're trying longer timeouts now
28 	 * primarily for the benefit of remote nameservers querying us
29 	 * during times of bad connectivity.
30 	 */
31 //	Maxtrans=	3,	/* maximum transmissions to a server */
32 //	Maxretries=	3, /* cname+actual resends: was 32; have pity on user */
33 //	Maxwaitms=	1000,	/* wait no longer for a remote dns query */
34 //	Minwaitms=	100,	/* willing to wait for a remote dns query */
35 
36 	Maxtrans=	5,	/* maximum transmissions to a server */
37 	Maxretries=	5, /* cname+actual resends: was 32; have pity on user */
38 	Maxwaitms=	5000,	/* wait no longer for a remote dns query */
39 	Minwaitms=	500,	/* willing to wait for a remote dns query */
40 
41 	Destmagic=	0xcafebabe,
42 	Querymagic=	0xdeadbeef,
43 };
44 enum { Hurry, Patient, };
45 enum { Outns, Inns, };
46 
47 struct Ipaddr {
48 	Ipaddr *next;
49 	uchar	ip[IPaddrlen];
50 };
51 
52 struct Dest
53 {
54 	uchar	a[IPaddrlen];	/* ip address */
55 	DN	*s;		/* name server */
56 	int	nx;		/* number of transmissions */
57 	int	code;		/* response code; used to clear dp->respcode */
58 
59 	ulong	magic;
60 };
61 
62 /*
63  * Query has a QLock in it, thus it can't be an automatic
64  * variable, since each process would see a separate copy
65  * of the lock on its stack.
66  */
67 struct Query {
68 	DN	*dp;		/* domain */
69 	ushort	type;		/* and type to look up */
70 	Request *req;
71 	RR	*nsrp;		/* name servers to consult */
72 
73 	/* dest must not be on the stack due to forking in slave() */
74 	Dest	*dest;		/* array of destinations */
75 	Dest	*curdest;	/* pointer to next to fill */
76 	int	ndest;		/* transmit to this many on this round */
77 
78 	int	udpfd;
79 
80 	QLock	tcplock;	/* only one tcp call at a time per query */
81 	int	tcpset;
82 	int	tcpfd;		/* if Tcp, read replies from here */
83 	int	tcpctlfd;
84 	uchar	tcpip[IPaddrlen];
85 
86 	ulong	magic;
87 };
88 
89 /* estimated % probability of such a record existing at all */
90 int likely[] = {
91 	[Ta]		95,
92 	[Taaaa]		10,
93 	[Tcname]	15,
94 	[Tmx]		60,
95 	[Tns]		90,
96 	[Tnull]		5,
97 	[Tptr]		35,
98 	[Tsoa]		90,
99 	[Tsrv]		60,
100 	[Ttxt]		15,
101 	[Tall]		95,
102 };
103 
104 static RR*	dnresolve1(char*, int, int, Request*, int, int);
105 static int	netquery(Query *, int);
106 
107 /*
108  * reading /proc/pid/args yields either "name args" or "name [display args]",
109  * so return only display args, if any.
110  */
111 static char *
112 procgetname(void)
113 {
114 	int fd, n;
115 	char *lp, *rp;
116 	char buf[256];
117 
118 	snprint(buf, sizeof buf, "#p/%d/args", getpid());
119 	if((fd = open(buf, OREAD)) < 0)
120 		return strdup("");
121 	*buf = '\0';
122 	n = read(fd, buf, sizeof buf-1);
123 	close(fd);
124 	if (n >= 0)
125 		buf[n] = '\0';
126 	if ((lp = strchr(buf, '[')) == nil ||
127 	    (rp = strrchr(buf, ']')) == nil)
128 		return strdup("");
129 	*rp = '\0';
130 	return strdup(lp+1);
131 }
132 
133 void
134 rrfreelistptr(RR **rpp)
135 {
136 	RR *rp;
137 
138 	if (rpp == nil || *rpp == nil)
139 		return;
140 	rp = *rpp;
141 	*rpp = nil;	/* update pointer in memory before freeing list */
142 	rrfreelist(rp);
143 }
144 
145 /*
146  *  lookup 'type' info for domain name 'name'.  If it doesn't exist, try
147  *  looking it up as a canonical name.
148  *
149  *  this process can be quite slow if time-outs are set too high when querying
150  *  nameservers that just don't respond to certain query types.  in that case,
151  *  there will be multiple udp retries, multiple nameservers will be queried,
152  *  and this will be repeated for a cname query.  the whole thing will be
153  *  retried several times until we get an answer or a time-out.
154  */
155 RR*
156 dnresolve(char *name, int class, int type, Request *req, RR **cn, int depth,
157 	int recurse, int rooted, int *status)
158 {
159 	RR *rp, *nrp, *drp;
160 	DN *dp;
161 	int loops;
162 	char *procname;
163 	char nname[Domlen];
164 
165 	if(status)
166 		*status = 0;
167 
168 	if(depth > 12)			/* in a recursive loop? */
169 		return nil;
170 
171 	procname = procgetname();
172 	/*
173 	 *  hack for systems that don't have resolve search
174 	 *  lists.  Just look up the simple name in the database.
175 	 */
176 	if(!rooted && strchr(name, '.') == nil){
177 		rp = nil;
178 		drp = domainlist(class);
179 		for(nrp = drp; rp == nil && nrp != nil; nrp = nrp->next){
180 			snprint(nname, sizeof nname, "%s.%s", name,
181 				nrp->ptr->name);
182 			rp = dnresolve(nname, class, type, req, cn, depth+1,
183 				recurse, rooted, status);
184 			lock(&dnlock);
185 			rrfreelist(rrremneg(&rp));
186 			unlock(&dnlock);
187 		}
188 		if(drp != nil)
189 			rrfreelist(drp);
190 		procsetname(procname);
191 		free(procname);
192 		return rp;
193 	}
194 
195 	/*
196 	 *  try the name directly
197 	 */
198 	rp = dnresolve1(name, class, type, req, depth, recurse);
199 	if(rp == nil) {
200 		/*
201 		 * try it as a canonical name if we weren't told
202 		 * that the name didn't exist
203 		 */
204 		dp = dnlookup(name, class, 0);
205 		if(type != Tptr && dp->respcode != Rname)
206 			for(loops = 0; rp == nil && loops < Maxretries; loops++){
207 				/* retry cname, then the actual type */
208 				rp = dnresolve1(name, class, Tcname, req,
209 					depth, recurse);
210 				if(rp == nil)
211 					break;
212 
213 				/* rp->host == nil shouldn't happen, but does */
214 				if(rp->negative || rp->host == nil){
215 					rrfreelist(rp);
216 					rp = nil;
217 					break;
218 				}
219 
220 				name = rp->host->name;
221 				lock(&dnlock);
222 				if(cn)
223 					rrcat(cn, rp);
224 				else
225 					rrfreelist(rp);
226 				unlock(&dnlock);
227 
228 				rp = dnresolve1(name, class, type, req,
229 					depth, recurse);
230 			}
231 
232 		/* distinction between not found and not good */
233 		if(rp == nil && status != nil && dp->respcode != Rok)
234 			*status = dp->respcode;
235 	}
236 	procsetname(procname);
237 	free(procname);
238 	return randomize(rp);
239 }
240 
241 static void
242 queryinit(Query *qp, DN *dp, int type, Request *req)
243 {
244 	memset(qp, 0, sizeof *qp);
245 	qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
246 	qp->dp = dp;
247 	qp->type = type;
248 	if (qp->type != type)
249 		dnslog("queryinit: bogus type %d", type);
250 	qp->req = req;
251 	qp->nsrp = nil;
252 	qp->dest = qp->curdest = nil;
253 	qp->magic = Querymagic;
254 }
255 
256 static void
257 queryck(Query *qp)
258 {
259 	assert(qp);
260 	assert(qp->magic == Querymagic);
261 }
262 
263 static void
264 querydestroy(Query *qp)
265 {
266 	queryck(qp);
267 	/* leave udpfd open */
268 	if (qp->tcpfd > 0)
269 		close(qp->tcpfd);
270 	if (qp->tcpctlfd > 0) {
271 		hangup(qp->tcpctlfd);
272 		close(qp->tcpctlfd);
273 	}
274 	free(qp->dest);
275 	memset(qp, 0, sizeof *qp);	/* prevent accidents */
276 	qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
277 }
278 
279 static void
280 destinit(Dest *p)
281 {
282 	memset(p, 0, sizeof *p);
283 	p->magic = Destmagic;
284 }
285 
286 static void
287 destck(Dest *p)
288 {
289 	assert(p);
290 	assert(p->magic == Destmagic);
291 }
292 
293 /*
294  * if the response to a query hasn't arrived within 100 ms.,
295  * it's unlikely to arrive at all.  after 1 s., it's really unlikely.
296  * queries for missing RRs are likely to produce time-outs rather than
297  * negative responses, so cname and aaaa queries are likely to time out,
298  * thus we don't wait very long for them.
299  */
300 static void
301 notestats(vlong start, int tmout, int type)
302 {
303 	qlock(&stats);
304 	if (tmout) {
305 		stats.tmout++;
306 		if (type == Taaaa)
307 			stats.tmoutv6++;
308 		else if (type == Tcname)
309 			stats.tmoutcname++;
310 	} else {
311 		long wait10ths = NS2MS(nsec() - start) / 100;
312 
313 		if (wait10ths <= 0)
314 			stats.under10ths[0]++;
315 		else if (wait10ths >= nelem(stats.under10ths))
316 			stats.under10ths[nelem(stats.under10ths) - 1]++;
317 		else
318 			stats.under10ths[wait10ths]++;
319 	}
320 	qunlock(&stats);
321 }
322 
323 static void
324 noteinmem(void)
325 {
326 	qlock(&stats);
327 	stats.answinmem++;
328 	qunlock(&stats);
329 }
330 
331 /* netquery with given name servers, free ns rrs when done */
332 static int
333 netqueryns(Query *qp, int depth, RR *nsrp)
334 {
335 	int rv;
336 
337 	qp->nsrp = nsrp;
338 	rv = netquery(qp, depth);
339 	lock(&dnlock);
340 	rrfreelist(nsrp);
341 	unlock(&dnlock);
342 	return rv;
343 }
344 
345 static RR*
346 issuequery(Query *qp, char *name, int class, int depth, int recurse)
347 {
348 	char *cp;
349 	DN *nsdp;
350 	RR *rp, *nsrp, *dbnsrp;
351 
352 	/*
353 	 *  if we're running as just a resolver, query our
354 	 *  designated name servers
355 	 */
356 	if(cfg.resolver){
357 		nsrp = randomize(getdnsservers(class));
358 		if(nsrp != nil)
359 			if(netqueryns(qp, depth+1, nsrp) > Answnone)
360 				return rrlookup(qp->dp, qp->type, OKneg);
361 	}
362 
363 	/*
364  	 *  walk up the domain name looking for
365 	 *  a name server for the domain.
366 	 */
367 	for(cp = name; cp; cp = walkup(cp)){
368 		/*
369 		 *  if this is a local (served by us) domain,
370 		 *  return answer
371 		 */
372 		dbnsrp = randomize(dblookup(cp, class, Tns, 0, 0));
373 		if(dbnsrp && dbnsrp->local){
374 			rp = dblookup(name, class, qp->type, 1, dbnsrp->ttl);
375 			lock(&dnlock);
376 			rrfreelist(dbnsrp);
377 			unlock(&dnlock);
378 			return rp;
379 		}
380 
381 		/*
382 		 *  if recursion isn't set, just accept local
383 		 *  entries
384 		 */
385 		if(recurse == Dontrecurse){
386 			if(dbnsrp) {
387 				lock(&dnlock);
388 				rrfreelist(dbnsrp);
389 				unlock(&dnlock);
390 			}
391 			continue;
392 		}
393 
394 		/* look for ns in cache */
395 		nsdp = dnlookup(cp, class, 0);
396 		nsrp = nil;
397 		if(nsdp)
398 			nsrp = randomize(rrlookup(nsdp, Tns, NOneg));
399 
400 		/* if the entry timed out, ignore it */
401 		if(nsrp && nsrp->ttl < now){
402 			lock(&dnlock);
403 			rrfreelistptr(&nsrp);
404 			unlock(&dnlock);
405 		}
406 
407 		if(nsrp){
408 			lock(&dnlock);
409 			rrfreelistptr(&dbnsrp);
410 			unlock(&dnlock);
411 
412 			/* query the name servers found in cache */
413 			if(netqueryns(qp, depth+1, nsrp) > Answnone)
414 				return rrlookup(qp->dp, qp->type, OKneg);
415 		} else if(dbnsrp)
416 			/* try the name servers found in db */
417 			if(netqueryns(qp, depth+1, dbnsrp) > Answnone)
418 				return rrlookup(qp->dp, qp->type, NOneg);
419 	}
420 	return nil;
421 }
422 
423 static RR*
424 dnresolve1(char *name, int class, int type, Request *req, int depth,
425 	int recurse)
426 {
427 	Area *area;
428 	DN *dp;
429 	RR *rp;
430 	Query *qp;
431 
432 	if(debug)
433 		dnslog("[%d] dnresolve1 %s %d %d", getpid(), name, type, class);
434 
435 	/* only class Cin implemented so far */
436 	if(class != Cin)
437 		return nil;
438 
439 	dp = dnlookup(name, class, 1);
440 
441 	/*
442 	 *  Try the cache first
443 	 */
444 	rp = rrlookup(dp, type, OKneg);
445 	if(rp)
446 		if(rp->db){
447 			/* unauthoritative db entries are hints */
448 			if(rp->auth) {
449 				noteinmem();
450 				if(debug)
451 					dnslog("[%d] dnresolve1 %s %d %d: auth rr in db",
452 						getpid(), name, type, class);
453 				return rp;
454 			}
455 		} else
456 			/* cached entry must still be valid */
457 			if(rp->ttl > now)
458 				/* but Tall entries are special */
459 				if(type != Tall || rp->query == Tall) {
460 					noteinmem();
461 					if(debug)
462 						dnslog("[%d] dnresolve1 %s %d %d: rr not in db",
463 							getpid(), name, type, class);
464 					return rp;
465 				}
466 	lock(&dnlock);
467 	rrfreelist(rp);
468 	unlock(&dnlock);
469 	rp = nil;		/* accident prevention */
470 	USED(rp);
471 
472 	/*
473 	 * try the cache for a canonical name. if found punt
474 	 * since we'll find it during the canonical name search
475 	 * in dnresolve().
476 	 */
477 	if(type != Tcname){
478 		rp = rrlookup(dp, Tcname, NOneg);
479 		lock(&dnlock);
480 		rrfreelist(rp);
481 		unlock(&dnlock);
482 		if(rp){
483 			if(debug)
484 				dnslog("[%d] dnresolve1 %s %d %d: rr from rrlookup for non-cname",
485 					getpid(), name, type, class);
486 			return nil;
487 		}
488 	}
489 
490 	/*
491 	 * if the domain name is within an area of ours,
492 	 * we should have found its data in memory by now.
493 	 */
494 	area = inmyarea(dp->name);
495 	if (area || strncmp(dp->name, "local#", 6) == 0) {
496 //		char buf[32];
497 
498 //		dnslog("%s %s: no data in area %s", dp->name,
499 //			rrname(type, buf, sizeof buf), area->soarr->owner->name);
500 		return nil;
501 	}
502 
503 	qp = emalloc(sizeof *qp);
504 	queryinit(qp, dp, type, req);
505 	rp = issuequery(qp, name, class, depth, recurse);
506 	querydestroy(qp);
507 	free(qp);
508 	if(rp){
509 		if(debug)
510 			dnslog("[%d] dnresolve1 %s %d %d: rr from query",
511 				getpid(), name, type, class);
512 		return rp;
513 	}
514 
515 	/* settle for a non-authoritative answer */
516 	rp = rrlookup(dp, type, OKneg);
517 	if(rp){
518 		if(debug)
519 			dnslog("[%d] dnresolve1 %s %d %d: rr from rrlookup",
520 				getpid(), name, type, class);
521 		return rp;
522 	}
523 
524 	/* noone answered.  try the database, we might have a chance. */
525 	rp = dblookup(name, class, type, 0, 0);
526 	if (rp) {
527 		if(debug)
528 			dnslog("[%d] dnresolve1 %s %d %d: rr from dblookup",
529 				getpid(), name, type, class);
530 	}else{
531 		if(debug)
532 			dnslog("[%d] dnresolve1 %s %d %d: no rr from dblookup; crapped out",
533 				getpid(), name, type, class);
534 	}
535 	return rp;
536 }
537 
538 /*
539  *  walk a domain name one element to the right.
540  *  return a pointer to that element.
541  *  in other words, return a pointer to the parent domain name.
542  */
543 char*
544 walkup(char *name)
545 {
546 	char *cp;
547 
548 	cp = strchr(name, '.');
549 	if(cp)
550 		return cp+1;
551 	else if(*name)
552 		return "";
553 	else
554 		return 0;
555 }
556 
557 /*
558  *  Get a udp port for sending requests and reading replies.  Put the port
559  *  into "headers" mode.
560  */
561 static char *hmsg = "headers";
562 
563 int
564 udpport(char *mtpt)
565 {
566 	int fd, ctl;
567 	char ds[64], adir[64];
568 
569 	/* get a udp port */
570 	snprint(ds, sizeof ds, "%s/udp!*!0", (mtpt? mtpt: "/net"));
571 	ctl = announce(ds, adir);
572 	if(ctl < 0){
573 		/* warning("can't get udp port"); */
574 		return -1;
575 	}
576 
577 	/* turn on header style interface */
578 	if(write(ctl, hmsg, strlen(hmsg)) != strlen(hmsg)){
579 		close(ctl);
580 		warning(hmsg);
581 		return -1;
582 	}
583 
584 	/* grab the data file */
585 	snprint(ds, sizeof ds, "%s/data", adir);
586 	fd = open(ds, ORDWR);
587 	close(ctl);
588 	if(fd < 0)
589 		warning("can't open udp port %s: %r", ds);
590 	return fd;
591 }
592 
593 void
594 initdnsmsg(DNSmsg *mp, RR *rp, int flags, ushort reqno)
595 {
596 	mp->flags = flags;
597 	mp->id = reqno;
598 	mp->qd = rp;
599 	if(rp != nil)
600 		mp->qdcount = 1;
601 }
602 
603 DNSmsg *
604 newdnsmsg(RR *rp, int flags, ushort reqno)
605 {
606 	DNSmsg *mp;
607 
608 	mp = emalloc(sizeof *mp);
609 	initdnsmsg(mp, rp, flags, reqno);
610 	return mp;
611 }
612 
613 /* generate a DNS UDP query packet */
614 int
615 mkreq(DN *dp, int type, uchar *buf, int flags, ushort reqno)
616 {
617 	DNSmsg m;
618 	int len;
619 	Udphdr *uh = (Udphdr*)buf;
620 	RR *rp;
621 
622 	/* stuff port number into output buffer */
623 	memset(uh, 0, sizeof *uh);
624 	hnputs(uh->rport, 53);
625 
626 	/* make request and convert it to output format */
627 	memset(&m, 0, sizeof m);
628 	rp = rralloc(type);
629 	rp->owner = dp;
630 	initdnsmsg(&m, rp, flags, reqno);
631 	len = convDNS2M(&m, &buf[Udphdrsize], Maxudp);
632 	rrfreelistptr(&m.qd);
633 	memset(&m, 0, sizeof m);		/* cause trouble */
634 	return len;
635 }
636 
637 void
638 freeanswers(DNSmsg *mp)
639 {
640 	lock(&dnlock);
641 	rrfreelistptr(&mp->qd);
642 	rrfreelistptr(&mp->an);
643 	rrfreelistptr(&mp->ns);
644 	rrfreelistptr(&mp->ar);
645 	unlock(&dnlock);
646 	mp->qdcount = mp->ancount = mp->nscount = mp->arcount = 0;
647 }
648 
649 /* timed read of reply.  sets srcip.  ibuf must be 64K to handle tcp answers. */
650 static int
651 readnet(Query *qp, int medium, uchar *ibuf, uvlong endms, uchar **replyp,
652 	uchar *srcip)
653 {
654 	int len, fd;
655 	long ms;
656 	vlong startns = nsec();
657 	uchar *reply;
658 	uchar lenbuf[2];
659 
660 	len = -1;			/* pessimism */
661 	ms = endms - NS2MS(startns);
662 	if (ms <= 0)
663 		return -1;		/* taking too long */
664 
665 	reply = ibuf;
666 	memset(srcip, 0, IPaddrlen);
667 	alarm(ms);
668 	if (medium == Udp)
669 		if (qp->udpfd <= 0)
670 			dnslog("readnet: qp->udpfd closed");
671 		else {
672 			len = read(qp->udpfd, ibuf, Udphdrsize+Maxudpin);
673 			alarm(0);
674 			notestats(startns, len < 0, qp->type);
675 			if (len >= IPaddrlen)
676 				memmove(srcip, ibuf, IPaddrlen);
677 			if (len >= Udphdrsize) {
678 				len   -= Udphdrsize;
679 				reply += Udphdrsize;
680 			}
681 		}
682 	else {
683 		if (!qp->tcpset)
684 			dnslog("readnet: tcp params not set");
685 		fd = qp->tcpfd;
686 		if (fd <= 0)
687 			dnslog("readnet: %s: tcp fd unset for dest %I",
688 				qp->dp->name, qp->tcpip);
689 		else if (readn(fd, lenbuf, 2) != 2) {
690 			dnslog("readnet: short read of 2-byte tcp msg size from %I",
691 				qp->tcpip);
692 			/* probably a time-out */
693 			notestats(startns, 1, qp->type);
694 		} else {
695 			len = lenbuf[0]<<8 | lenbuf[1];
696 			if (readn(fd, ibuf, len) != len) {
697 				dnslog("readnet: short read of tcp data from %I",
698 					qp->tcpip);
699 				/* probably a time-out */
700 				notestats(startns, 1, qp->type);
701 				len = -1;
702 			}
703 		}
704 		memmove(srcip, qp->tcpip, IPaddrlen);
705 	}
706 	alarm(0);
707 	*replyp = reply;
708 	return len;
709 }
710 
711 /*
712  *  read replies to a request and remember the rrs in the answer(s).
713  *  ignore any of the wrong type.
714  *  wait at most until endms.
715  */
716 static int
717 readreply(Query *qp, int medium, ushort req, uchar *ibuf, DNSmsg *mp,
718 	uvlong endms)
719 {
720 	int len;
721 	char *err;
722 	char tbuf[32];
723 	uchar *reply;
724 	uchar srcip[IPaddrlen];
725 	RR *rp;
726 
727 	queryck(qp);
728 	memset(mp, 0, sizeof *mp);
729 	memset(srcip, 0, sizeof srcip);
730 	if (0)
731 		len = -1;
732 	for (; timems() < endms &&
733 	    (len = readnet(qp, medium, ibuf, endms, &reply, srcip)) >= 0;
734 	    freeanswers(mp)){
735 		/* convert into internal format  */
736 		memset(mp, 0, sizeof *mp);
737 		err = convM2DNS(reply, len, mp, nil);
738 		if (mp->flags & Ftrunc) {
739 			free(err);
740 			freeanswers(mp);
741 			/* notify our caller to retry the query via tcp. */
742 			return -1;
743 		} else if(err){
744 			dnslog("readreply: %s: input err, len %d: %s: %I",
745 				qp->dp->name, len, err, srcip);
746 			free(err);
747 			continue;
748 		}
749 		if(debug)
750 			logreply(qp->req->id, srcip, mp);
751 
752 		/* answering the right question? */
753 		if(mp->id != req)
754 			dnslog("%d: id %d instead of %d: %I", qp->req->id,
755 				mp->id, req, srcip);
756 		else if(mp->qd == 0)
757 			dnslog("%d: no question RR: %I", qp->req->id, srcip);
758 		else if(mp->qd->owner != qp->dp)
759 			dnslog("%d: owner %s instead of %s: %I", qp->req->id,
760 				mp->qd->owner->name, qp->dp->name, srcip);
761 		else if(mp->qd->type != qp->type)
762 			dnslog("%d: qp->type %d instead of %d: %I",
763 				qp->req->id, mp->qd->type, qp->type, srcip);
764 		else {
765 			/* remember what request this is in answer to */
766 			for(rp = mp->an; rp; rp = rp->next)
767 				rp->query = qp->type;
768 			return 0;
769 		}
770 	}
771 	if (timems() >= endms) {
772 		;				/* query expired */
773 	} else if (0) {
774 		/* this happens routinely when a read times out */
775 		dnslog("readreply: %s type %s: ns %I read error or eof "
776 			"(returned %d): %r", qp->dp->name, rrname(qp->type,
777 			tbuf, sizeof tbuf), srcip, len);
778 		if (medium == Udp)
779 			for (rp = qp->nsrp; rp != nil; rp = rp->next)
780 				if (rp->type == Tns)
781 					dnslog("readreply: %s: query sent to "
782 						"ns %s", qp->dp->name,
783 						rp->host->name);
784 	}
785 	return -1;
786 }
787 
788 /*
789  *	return non-0 if first list includes second list
790  */
791 int
792 contains(RR *rp1, RR *rp2)
793 {
794 	RR *trp1, *trp2;
795 
796 	for(trp2 = rp2; trp2; trp2 = trp2->next){
797 		for(trp1 = rp1; trp1; trp1 = trp1->next)
798 			if(trp1->type == trp2->type)
799 			if(trp1->host == trp2->host)
800 			if(trp1->owner == trp2->owner)
801 				break;
802 		if(trp1 == nil)
803 			return 0;
804 	}
805 	return 1;
806 }
807 
808 
809 /*
810  *  return multicast version if any
811  */
812 int
813 ipisbm(uchar *ip)
814 {
815 	if(isv4(ip)){
816 		if (ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0 ||
817 		    ipcmp(ip, IPv4bcast) == 0)
818 			return 4;
819 	} else
820 		if(ip[0] == 0xff)
821 			return 6;
822 	return 0;
823 }
824 
825 /*
826  *  Get next server address(es) into qp->dest[nd] and beyond
827  */
828 static int
829 serveraddrs(Query *qp, int nd, int depth)
830 {
831 	RR *rp, *arp, *trp;
832 	Dest *cur;
833 
834 	if(nd >= Maxdest)		/* dest array is full? */
835 		return Maxdest - 1;
836 
837 	/*
838 	 *  look for a server whose address we already know.
839 	 *  if we find one, mark it so we ignore this on
840 	 *  subsequent passes.
841 	 */
842 	arp = 0;
843 	for(rp = qp->nsrp; rp; rp = rp->next){
844 		assert(rp->magic == RRmagic);
845 		if(rp->marker)
846 			continue;
847 		arp = rrlookup(rp->host, Ta, NOneg);
848 		if(arp == nil)
849 			arp = rrlookup(rp->host, Taaaa, NOneg);
850 		if(arp){
851 			rp->marker = 1;
852 			break;
853 		}
854 		arp = dblookup(rp->host->name, Cin, Ta, 0, 0);
855 		if(arp == nil)
856 			arp = dblookup(rp->host->name, Cin, Taaaa, 0, 0);
857 		if(arp){
858 			rp->marker = 1;
859 			break;
860 		}
861 	}
862 
863 	/*
864 	 *  if the cache and database lookup didn't find any new
865 	 *  server addresses, try resolving one via the network.
866 	 *  Mark any we try to resolve so we don't try a second time.
867 	 */
868 	if(arp == 0)
869 		for(rp = qp->nsrp; rp; rp = rp->next){
870 			if(rp->marker)
871 				continue;
872 			rp->marker = 1;
873 
874 			/*
875 			 *  avoid loops looking up a server under itself
876 			 */
877 			if(subsume(rp->owner->name, rp->host->name))
878 				continue;
879 
880 			arp = dnresolve(rp->host->name, Cin, Ta, qp->req, 0,
881 				depth+1, Recurse, 1, 0);
882 			if(arp == nil)
883 				arp = dnresolve(rp->host->name, Cin, Taaaa,
884 					qp->req, 0, depth+1, Recurse, 1, 0);
885 			lock(&dnlock);
886 			rrfreelist(rrremneg(&arp));
887 			unlock(&dnlock);
888 			if(arp)
889 				break;
890 		}
891 
892 	/* use any addresses that we found */
893 	for(trp = arp; trp && nd < Maxdest; trp = trp->next){
894 		cur = &qp->dest[nd];
895 		parseip(cur->a, trp->ip->name);
896 		/*
897 		 * straddling servers can reject all nameservers if they are all
898 		 * inside, so be sure to list at least one outside ns at
899 		 * the end of the ns list in /lib/ndb for `dom='.
900 		 */
901 		if (ipisbm(cur->a) ||
902 		    cfg.straddle && !insideaddr(qp->dp->name) && insidens(cur->a))
903 			continue;
904 		cur->nx = 0;
905 		cur->s = trp->owner;
906 		cur->code = Rtimeout;
907 		nd++;
908 	}
909 	lock(&dnlock);
910 	rrfreelist(arp);
911 	unlock(&dnlock);
912 	return nd;
913 }
914 
915 /*
916  *  cache negative responses
917  */
918 static void
919 cacheneg(DN *dp, int type, int rcode, RR *soarr)
920 {
921 	RR *rp;
922 	DN *soaowner;
923 	ulong ttl;
924 
925 	stats.negcached++;
926 
927 	/* no cache time specified, don't make anything up */
928 	if(soarr != nil){
929 		lock(&dnlock);
930 		if(soarr->next != nil)
931 			rrfreelistptr(&soarr->next);
932 		unlock(&dnlock);
933 		soaowner = soarr->owner;
934 	} else
935 		soaowner = nil;
936 
937 	/* the attach can cause soarr to be freed so mine it now */
938 	if(soarr != nil && soarr->soa != nil)
939 		ttl = soarr->soa->minttl+now;
940 	else
941 		ttl = 5*Min;
942 
943 	/* add soa and negative RR to the database */
944 	rrattach(soarr, Authoritative);
945 
946 	rp = rralloc(type);
947 	rp->owner = dp;
948 	rp->negative = 1;
949 	rp->negsoaowner = soaowner;
950 	rp->negrcode = rcode;
951 	rp->ttl = ttl;
952 	rrattach(rp, Authoritative);
953 }
954 
955 static int
956 setdestoutns(Dest *p, int n)
957 {
958 	uchar *outns = outsidens(n);
959 
960 	destck(p);
961 	destinit(p);
962 	if (outns == nil) {
963 		if (n == 0)
964 			dnslog("[%d] no outside-ns in ndb", getpid());
965 		return -1;
966 	}
967 	memmove(p->a, outns, sizeof p->a);
968 	p->s = dnlookup("outside-ns-ips", Cin, 1);
969 	return 0;
970 }
971 
972 /*
973  * issue query via UDP or TCP as appropriate.
974  * for TCP, returns with qp->tcpip set from udppkt header.
975  */
976 static int
977 mydnsquery(Query *qp, int medium, uchar *udppkt, int len)
978 {
979 	int rv = -1, nfd;
980 	char *domain;
981 	char conndir[40], net[40];
982 	uchar belen[2];
983 	NetConnInfo *nci;
984 
985 	queryck(qp);
986 	domain = smprint("%I", udppkt);
987 	if (myaddr(domain)) {
988 		dnslog("mydnsquery: trying to send to myself (%s); bzzzt",
989 			domain);
990 		free(domain);
991 		return rv;
992 	}
993 
994 	switch (medium) {
995 	case Udp:
996 		free(domain);
997 		nfd = dup(qp->udpfd, -1);
998 		if (nfd < 0) {
999 			warning("mydnsquery: qp->udpfd %d: %r", qp->udpfd);
1000 			close(qp->udpfd);	/* ensure it's closed */
1001 			qp->udpfd = -1;		/* poison it */
1002 			return rv;
1003 		}
1004 		close(nfd);
1005 
1006 		if (qp->udpfd <= 0)
1007 			dnslog("mydnsquery: qp->udpfd %d closed", qp->udpfd);
1008 		else {
1009 			if (write(qp->udpfd, udppkt, len+Udphdrsize) !=
1010 			    len+Udphdrsize)
1011 				warning("sending udp msg: %r");
1012 			else {
1013 				stats.qsent++;
1014 				rv = 0;
1015 			}
1016 		}
1017 		break;
1018 	case Tcp:
1019 		/* send via TCP & keep fd around for reply */
1020 		snprint(net, sizeof net, "%s/tcp",
1021 			(mntpt[0] != '\0'? mntpt: "/net"));
1022 		alarm(10*1000);
1023 		qp->tcpfd = rv = dial(netmkaddr(domain, net, "dns"), nil,
1024 			conndir, &qp->tcpctlfd);
1025 		alarm(0);
1026 		if (qp->tcpfd < 0) {
1027 			dnslog("can't dial tcp!%s!dns: %r", domain);
1028 			free(domain);
1029 			break;
1030 		}
1031 		free(domain);
1032 		nci = getnetconninfo(conndir, qp->tcpfd);
1033 		if (nci) {
1034 			parseip(qp->tcpip, nci->rsys);
1035 			freenetconninfo(nci);
1036 		} else
1037 			dnslog("mydnsquery: getnetconninfo failed");
1038 		qp->tcpset = 1;
1039 
1040 		belen[0] = len >> 8;
1041 		belen[1] = len;
1042 		if (write(qp->tcpfd, belen, 2) != 2 ||
1043 		    write(qp->tcpfd, udppkt + Udphdrsize, len) != len)
1044 			warning("sending tcp msg: %r");
1045 		break;
1046 	default:
1047 		sysfatal("mydnsquery: bad medium");
1048 	}
1049 	return rv;
1050 }
1051 
1052 /*
1053  * send query to all UDP destinations or one TCP destination,
1054  * taken from obuf (udp packet) header
1055  */
1056 static int
1057 xmitquery(Query *qp, int medium, int depth, uchar *obuf, int inns, int len)
1058 {
1059 	int j, n;
1060 	char buf[32];
1061 	Dest *p;
1062 
1063 	queryck(qp);
1064 	if(timems() >= qp->req->aborttime)
1065 		return -1;
1066 
1067 	/*
1068 	 * get a nameserver address if we need one.
1069 	 * serveraddrs populates qp->dest.
1070 	 */
1071 	p = qp->dest;
1072 	destck(p);
1073 	if (qp->ndest < 0 || qp->ndest > Maxdest) {
1074 		dnslog("qp->ndest %d out of range", qp->ndest);
1075 		abort();
1076 	}
1077 	/*
1078 	 * we're to transmit to more destinations than we currently have,
1079 	 * so get another.
1080 	 */
1081 	if (qp->ndest > qp->curdest - p) {
1082 		j = serveraddrs(qp, qp->curdest - p, depth);
1083 		if (j < 0 || j >= Maxdest) {
1084 			dnslog("serveraddrs() result %d out of range", j);
1085 			abort();
1086 		}
1087 		qp->curdest = &qp->dest[j];
1088 	}
1089 	destck(qp->curdest);
1090 
1091 	/* no servers, punt */
1092 	if (qp->ndest == 0)
1093 		if (cfg.straddle && cfg.inside) {
1094 			/* get ips of "outside-ns-ips" */
1095 			qp->curdest = qp->dest;
1096 			for(n = 0; n < Maxdest; n++, qp->curdest++)
1097 				if (setdestoutns(qp->curdest, n) < 0)
1098 					break;
1099 			if(n == 0)
1100 				dnslog("xmitquery: %s: no outside-ns nameservers",
1101 					qp->dp->name);
1102 		} else
1103 			/* it's probably just a bogus domain, don't log it */
1104 			return -1;
1105 
1106 	/* send to first 'qp->ndest' destinations */
1107 	j = 0;
1108 	if (medium == Tcp) {
1109 		j++;
1110 		queryck(qp);
1111 		assert(qp->dp);
1112 		procsetname("tcp %sside query for %s %s", (inns? "in": "out"),
1113 			qp->dp->name, rrname(qp->type, buf, sizeof buf));
1114 		mydnsquery(qp, medium, obuf, len); /* sets qp->tcpip from obuf */
1115 		if(debug)
1116 			logsend(qp->req->id, depth, qp->tcpip, "", qp->dp->name,
1117 				qp->type);
1118 	} else
1119 		for(; p < &qp->dest[qp->ndest] && p < qp->curdest; p++){
1120 			/* skip destinations we've finished with */
1121 			if(p->nx >= Maxtrans)
1122 				continue;
1123 
1124 			j++;
1125 
1126 			/* exponential backoff of requests */
1127 			if((1<<p->nx) > qp->ndest)
1128 				continue;
1129 
1130 			if(memcmp(p->a, IPnoaddr, sizeof IPnoaddr) == 0)
1131 				continue;		/* mistake */
1132 
1133 			procsetname("udp %sside query to %I/%s %s %s",
1134 				(inns? "in": "out"), p->a, p->s->name,
1135 				qp->dp->name, rrname(qp->type, buf, sizeof buf));
1136 			if(debug)
1137 				logsend(qp->req->id, depth, p->a, p->s->name,
1138 					qp->dp->name, qp->type);
1139 
1140 			/* fill in UDP destination addr & send it */
1141 			memmove(obuf, p->a, sizeof p->a);
1142 			mydnsquery(qp, medium, obuf, len);
1143 			p->nx++;
1144 		}
1145 	if(j == 0) {
1146 		return -1;
1147 	}
1148 	return 0;
1149 }
1150 
1151 static int lckindex[Maxlcks] = {
1152 	0,			/* all others map here */
1153 	Ta,
1154 	Tns,
1155 	Tcname,
1156 	Tsoa,
1157 	Tptr,
1158 	Tmx,
1159 	Ttxt,
1160 	Taaaa,
1161 };
1162 
1163 static int
1164 qtype2lck(int qtype)		/* map query type to querylck index */
1165 {
1166 	int i;
1167 
1168 	for (i = 1; i < nelem(lckindex); i++)
1169 		if (lckindex[i] == qtype)
1170 			return i;
1171 	return 0;
1172 }
1173 
1174 /* is mp a cachable negative response (with Rname set)? */
1175 static int
1176 isnegrname(DNSmsg *mp)
1177 {
1178 	/* TODO: could add || cfg.justforw to RHS of && */
1179 	return mp->an == nil && (mp->flags & Rmask) == Rname;
1180 }
1181 
1182 /* returns Answerr (-1) on errors, else number of answers, which can be zero. */
1183 static int
1184 procansw(Query *qp, DNSmsg *mp, uchar *srcip, int depth, Dest *p)
1185 {
1186 	int rv;
1187 //	int lcktype;
1188 	char buf[32];
1189 	DN *ndp;
1190 	Query *nqp;
1191 	RR *tp, *soarr;
1192 
1193 	if (mp->an == nil)
1194 		stats.negans++;
1195 
1196 	/* ignore any error replies */
1197 	if((mp->flags & Rmask) == Rserver){
1198 		stats.negserver++;
1199 		freeanswers(mp);
1200 		if(p != qp->curdest)
1201 			p->code = Rserver;
1202 		return Answerr;
1203 	}
1204 
1205 	/* ignore any bad delegations */
1206 	if(mp->ns && baddelegation(mp->ns, qp->nsrp, srcip)){
1207 		stats.negbaddeleg++;
1208 		if(mp->an == nil){
1209 			stats.negbdnoans++;
1210 			freeanswers(mp);
1211 			if(p != qp->curdest)
1212 				p->code = Rserver;
1213 			dnslog(" and no answers");
1214 			return Answerr;
1215 		}
1216 		dnslog(" but has answers; ignoring ns");
1217 		lock(&dnlock);
1218 		rrfreelistptr(&mp->ns);
1219 		unlock(&dnlock);
1220 		mp->nscount = 0;
1221 	}
1222 
1223 	/* remove any soa's from the authority section */
1224 	lock(&dnlock);
1225 	soarr = rrremtype(&mp->ns, Tsoa);
1226 
1227 	/* incorporate answers */
1228 	unique(mp->an);
1229 	unique(mp->ns);
1230 	unique(mp->ar);
1231 	unlock(&dnlock);
1232 
1233 	if(mp->an)
1234 		rrattach(mp->an, (mp->flags & Fauth) != 0);
1235 	if(mp->ar)
1236 		rrattach(mp->ar, Notauthoritative);
1237 	if(mp->ns && !cfg.justforw){
1238 		ndp = mp->ns->owner;
1239 		rrattach(mp->ns, Notauthoritative);
1240 	} else {
1241 		ndp = nil;
1242 		lock(&dnlock);
1243 		rrfreelistptr(&mp->ns);
1244 		unlock(&dnlock);
1245 		mp->nscount = 0;
1246 	}
1247 
1248 	/* free the question */
1249 	if(mp->qd) {
1250 		lock(&dnlock);
1251 		rrfreelistptr(&mp->qd);
1252 		unlock(&dnlock);
1253 		mp->qdcount = 0;
1254 	}
1255 
1256 	/*
1257 	 *  Any reply from an authoritative server,
1258 	 *  or a positive reply terminates the search.
1259 	 *  A negative response now also terminates the search.
1260 	 */
1261 	if(mp->an != nil || (mp->flags & Fauth)){
1262 		if(isnegrname(mp))
1263 			qp->dp->respcode = Rname;
1264 		else
1265 			qp->dp->respcode = Rok;
1266 
1267 		/*
1268 		 *  cache any negative responses, free soarr.
1269 		 *  negative responses need not be authoritative:
1270 		 *  they can legitimately come from a cache.
1271 		 */
1272 		if( /* (mp->flags & Fauth) && */ mp->an == nil)
1273 			cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
1274 		else {
1275 			lock(&dnlock);
1276 			rrfreelist(soarr);
1277 			unlock(&dnlock);
1278 		}
1279 		return 1;
1280 	} else if (isnegrname(mp)) {
1281 		qp->dp->respcode = Rname;
1282 		/*
1283 		 *  cache negative response.
1284 		 *  negative responses need not be authoritative:
1285 		 *  they can legitimately come from a cache.
1286 		 */
1287 		cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
1288 		return 1;
1289 	}
1290 	stats.negnorname++;
1291 	lock(&dnlock);
1292 	rrfreelist(soarr);
1293 	unlock(&dnlock);
1294 
1295 	/*
1296 	 *  if we've been given better name servers, recurse.
1297 	 *  if we're a pure resolver, don't recurse, we have
1298 	 *  to forward to a fixed set of named servers.
1299 	 */
1300 	if(!mp->ns || cfg.resolver && cfg.justforw)
1301 		return Answnone;
1302 	tp = rrlookup(ndp, Tns, NOneg);
1303 	if(contains(qp->nsrp, tp)){
1304 		lock(&dnlock);
1305 		rrfreelist(tp);
1306 		unlock(&dnlock);
1307 		return Answnone;
1308 	}
1309 	procsetname("recursive query for %s %s", qp->dp->name,
1310 		rrname(qp->type, buf, sizeof buf));
1311 	/*
1312 	 *  we're called from udpquery, called from
1313 	 *  netquery, which current holds qp->dp->querylck,
1314 	 *  so release it now and acquire it upon return.
1315 	 */
1316 //	lcktype = qtype2lck(qp->type);		/* someday try this again */
1317 //	qunlock(&qp->dp->querylck[lcktype]);
1318 
1319 	nqp = emalloc(sizeof *nqp);
1320 	queryinit(nqp, qp->dp, qp->type, qp->req);
1321 	nqp->nsrp = tp;
1322 	rv = netquery(nqp, depth+1);
1323 
1324 //	qlock(&qp->dp->querylck[lcktype]);
1325 	rrfreelist(nqp->nsrp);
1326 	querydestroy(nqp);
1327 	free(nqp);
1328 	return rv;
1329 }
1330 
1331 /*
1332  * send a query via tcp to a single address (from ibuf's udp header)
1333  * and read the answer(s) into mp->an.
1334  */
1335 static int
1336 tcpquery(Query *qp, DNSmsg *mp, int depth, uchar *ibuf, uchar *obuf, int len,
1337 	ulong waitms, int inns, ushort req)
1338 {
1339 	int rv = 0;
1340 	uvlong endms;
1341 
1342 	endms = timems() + waitms;
1343 	if(endms > qp->req->aborttime)
1344 		endms = qp->req->aborttime;
1345 
1346 	if (0)
1347 		dnslog("%s: udp reply truncated; retrying query via tcp to %I",
1348 			qp->dp->name, qp->tcpip);
1349 
1350 	qlock(&qp->tcplock);
1351 	memmove(obuf, ibuf, IPaddrlen);		/* send back to respondent */
1352 	/* sets qp->tcpip from obuf's udp header */
1353 	if (xmitquery(qp, Tcp, depth, obuf, inns, len) < 0 ||
1354 	    readreply(qp, Tcp, req, ibuf, mp, endms) < 0)
1355 		rv = -1;
1356 	if (qp->tcpfd > 0) {
1357 		hangup(qp->tcpctlfd);
1358 		close(qp->tcpctlfd);
1359 		close(qp->tcpfd);
1360 	}
1361 	qp->tcpfd = qp->tcpctlfd = -1;
1362 	qunlock(&qp->tcplock);
1363 	return rv;
1364 }
1365 
1366 /*
1367  *  query name servers.  fill in obuf with on-the-wire representation of a
1368  *  DNSmsg derived from qp.  if the name server returns a pointer to another
1369  *  name server, recurse.
1370  */
1371 static int
1372 queryns(Query *qp, int depth, uchar *ibuf, uchar *obuf, ulong waitms, int inns)
1373 {
1374 	int ndest, len, replywaits, rv;
1375 	ushort req;
1376 	uvlong endms;
1377 	char buf[12];
1378 	uchar srcip[IPaddrlen];
1379 	Dest *p, *np, *dest;
1380 
1381 	/* pack request into a udp message */
1382 	req = rand();
1383 	len = mkreq(qp->dp, qp->type, obuf, Frecurse|Oquery, req);
1384 
1385 	/* no server addresses yet */
1386 	queryck(qp);
1387 	dest = emalloc(Maxdest * sizeof *dest);	/* dest can't be on stack */
1388 	for (p = dest; p < dest + Maxdest; p++)
1389 		destinit(p);
1390 	/* this dest array is local to this call of queryns() */
1391 	free(qp->dest);
1392 	qp->curdest = qp->dest = dest;
1393 
1394 	/*
1395 	 *  transmit udp requests and wait for answers.
1396 	 *  at most Maxtrans attempts to each address.
1397 	 *  each cycle send one more message than the previous.
1398 	 *  retry a query via tcp if its response is truncated.
1399 	 */
1400 	for(ndest = 1; ndest < Maxdest; ndest++){
1401 		qp->ndest = ndest;
1402 		qp->tcpset = 0;
1403 		if (xmitquery(qp, Udp, depth, obuf, inns, len) < 0)
1404 			break;
1405 
1406 		endms = timems() + waitms;
1407 		if(endms > qp->req->aborttime)
1408 			endms = qp->req->aborttime;
1409 
1410 		for(replywaits = 0; replywaits < ndest; replywaits++){
1411 			DNSmsg m;
1412 
1413 			procsetname("reading %sside reply from %I: %s %s from %s",
1414 				(inns? "in": "out"), obuf, qp->dp->name,
1415 				rrname(qp->type, buf, sizeof buf), qp->req->from);
1416 
1417 			/* read udp answer into m */
1418 			if (readreply(qp, Udp, req, ibuf, &m, endms) >= 0)
1419 				memmove(srcip, ibuf, IPaddrlen);
1420 			else if (!(m.flags & Ftrunc)) {
1421 				freeanswers(&m);
1422 				break;		/* timed out on this dest */
1423 			} else {
1424 				/* whoops, it was truncated! ask again via tcp */
1425 				freeanswers(&m);
1426 				rv = tcpquery(qp, &m, depth, ibuf, obuf, len,
1427 					waitms, inns, req);  /* answer in m */
1428 				if (rv < 0) {
1429 					freeanswers(&m);
1430 					break;		/* failed via tcp too */
1431 				}
1432 				memmove(srcip, qp->tcpip, IPaddrlen);
1433 			}
1434 
1435 			/* find responder */
1436 			// dnslog("queryns got reply from %I", srcip);
1437 			for(p = qp->dest; p < qp->curdest; p++)
1438 				if(memcmp(p->a, srcip, sizeof p->a) == 0)
1439 					break;
1440 
1441 			/* remove all addrs of responding server from list */
1442 			for(np = qp->dest; np < qp->curdest; np++)
1443 				if(np->s == p->s)
1444 					p->nx = Maxtrans;
1445 
1446 			/* free or incorporate RRs in m */
1447 			rv = procansw(qp, &m, srcip, depth, p);
1448 			if (rv > Answnone) {
1449 				free(qp->dest);
1450 				qp->dest = qp->curdest = nil; /* prevent accidents */
1451 				return rv;
1452 			}
1453 		}
1454 	}
1455 
1456 	/* if all servers returned failure, propagate it */
1457 	qp->dp->respcode = Rserver;
1458 	for(p = dest; p < qp->curdest; p++) {
1459 		destck(p);
1460 		if(p->code != Rserver)
1461 			qp->dp->respcode = Rok;
1462 		p->magic = 0;			/* prevent accidents */
1463 	}
1464 
1465 //	if (qp->dp->respcode)
1466 //		dnslog("queryns setting Rserver for %s", qp->dp->name);
1467 
1468 	free(qp->dest);
1469 	qp->dest = qp->curdest = nil;		/* prevent accidents */
1470 	return Answnone;
1471 }
1472 
1473 /*
1474  *  run a command with a supplied fd as standard input
1475  */
1476 char *
1477 system(int fd, char *cmd)
1478 {
1479 	int pid, p, i;
1480 	static Waitmsg msg;
1481 
1482 	if((pid = fork()) == -1)
1483 		sysfatal("fork failed: %r");
1484 	else if(pid == 0){
1485 		dup(fd, 0);
1486 		close(fd);
1487 		for (i = 3; i < 200; i++)
1488 			close(i);		/* don't leak fds */
1489 		execl("/bin/rc", "rc", "-c", cmd, nil);
1490 		sysfatal("exec rc: %r");
1491 	}
1492 	for(p = waitpid(); p >= 0; p = waitpid())
1493 		if(p == pid)
1494 			return msg.msg;
1495 	return "lost child";
1496 }
1497 
1498 /* compute wait, weighted by probability of success, with bounds */
1499 static ulong
1500 weight(ulong ms, unsigned pcntprob)
1501 {
1502 	ulong wait;
1503 
1504 	wait = (ms * pcntprob) / 100;
1505 	if (wait < Minwaitms)
1506 		wait = Minwaitms;
1507 	if (wait > Maxwaitms)
1508 		wait = Maxwaitms;
1509 	return wait;
1510 }
1511 
1512 /*
1513  * in principle we could use a single descriptor for a udp port
1514  * to send all queries and receive all the answers to them,
1515  * but we'd have to sort out the answers by dns-query id.
1516  */
1517 static int
1518 udpquery(Query *qp, char *mntpt, int depth, int patient, int inns)
1519 {
1520 	int fd, rv;
1521 	long now;
1522 	ulong pcntprob;
1523 	uvlong wait, reqtm;
1524 	char *msg;
1525 	uchar *obuf, *ibuf;
1526 	static QLock mntlck;
1527 	static ulong lastmount;
1528 
1529 	/* use alloced buffers rather than ones from the stack */
1530 	ibuf = emalloc(64*1024);		/* max. tcp reply size */
1531 	obuf = emalloc(Maxudp+Udphdrsize);
1532 
1533 	fd = udpport(mntpt);
1534 	while (fd < 0 && cfg.straddle && strcmp(mntpt, "/net.alt") == 0) {
1535 		/* HACK: remount /net.alt */
1536 		now = time(nil);
1537 		if (now < lastmount + Remntretry)
1538 			sleep(S2MS(lastmount + Remntretry - now));
1539 		qlock(&mntlck);
1540 		fd = udpport(mntpt);	/* try again under lock */
1541 		if (fd < 0) {
1542 			dnslog("[%d] remounting /net.alt", getpid());
1543 			unmount(nil, "/net.alt");
1544 
1545 			msg = system(open("/dev/null", ORDWR), "outside");
1546 
1547 			lastmount = time(nil);
1548 			if (msg && *msg) {
1549 				dnslog("[%d] can't remount /net.alt: %s",
1550 					getpid(), msg);
1551 				sleep(10*1000);	/* don't spin remounting */
1552 			} else
1553 				fd = udpport(mntpt);
1554 		}
1555 		qunlock(&mntlck);
1556 	}
1557 	if (fd < 0) {
1558 		dnslog("can't get udpport for %s query of name %s: %r",
1559 			mntpt, qp->dp->name);
1560 		sysfatal("out of udp conversations");	/* we're buggered */
1561 	}
1562 
1563 	/*
1564 	 * Our QIP servers are busted and respond to AAAA and CNAME queries
1565 	 * with (sometimes malformed [too short] packets and) no answers and
1566 	 * just NS RRs but not Rname errors.  so make time-to-wait
1567 	 * proportional to estimated probability of an RR of that type existing.
1568 	 */
1569 	if (qp->type >= nelem(likely))
1570 		pcntprob = 35;			/* unpopular query type */
1571 	else
1572 		pcntprob = likely[qp->type];
1573 	reqtm = (patient? 2 * Maxreqtm: Maxreqtm);
1574 	wait = weight(reqtm / 3, pcntprob);	/* time for one udp query */
1575 	qp->req->aborttime = timems() + 3*wait; /* for all udp queries */
1576 
1577 	qp->udpfd = fd;
1578 	rv = queryns(qp, depth, ibuf, obuf, wait, inns);
1579 	close(fd);
1580 	qp->udpfd = -1;
1581 
1582 	free(obuf);
1583 	free(ibuf);
1584 	return rv;
1585 }
1586 
1587 /*
1588  * look up (qp->dp->name, qp->type) rr in dns,
1589  * using nameservers in qp->nsrp.
1590  */
1591 static int
1592 netquery(Query *qp, int depth)
1593 {
1594 	int lock, rv, triedin, inname;
1595 	char buf[32];
1596 	RR *rp;
1597 	DN *dp;
1598 	Querylck *qlp;
1599 	static int whined;
1600 
1601 	rv = Answnone;			/* pessimism */
1602 	if(depth > 12)			/* in a recursive loop? */
1603 		return Answnone;
1604 
1605 	slave(qp->req);
1606 	/*
1607 	 * slave might have forked.  if so, the parent process longjmped to
1608 	 * req->mret; we're usually the child slave, but if there are too
1609 	 * many children already, we're still the same process.
1610 	 */
1611 
1612 	/*
1613 	 * don't lock before call to slave so only children can block.
1614 	 * just lock at top-level invocation.
1615 	 */
1616 	lock = depth <= 1 && qp->req->isslave;
1617 	dp = qp->dp;		/* ensure that it doesn't change underfoot */
1618 	qlp = nil;
1619 	if(lock) {
1620 		procsetname("query lock wait: %s %s from %s", dp->name,
1621 			rrname(qp->type, buf, sizeof buf), qp->req->from);
1622 		/*
1623 		 * don't make concurrent queries for this name.
1624 		 * dozens of processes blocking here probably indicates
1625 		 * an error in our dns data that causes us to not
1626 		 * recognise a zone (area) as one of our own, thus
1627 		 * causing us to query other nameservers.
1628 		 */
1629 		qlp = &dp->querylck[qtype2lck(qp->type)];
1630 		qlock(qlp);
1631 		if (qlp->Ref.ref > Maxoutstanding) {
1632 			qunlock(qlp);
1633 			if (!whined) {
1634 				whined = 1;
1635 				dnslog("too many outstanding queries for %s;"
1636 					" dropping this one; no further logging"
1637 					" of drops", dp->name);
1638 			}
1639 			return 0;
1640 		}
1641 		++qlp->Ref.ref;
1642 		qunlock(qlp);
1643 	}
1644 	procsetname("netquery: %s", dp->name);
1645 
1646 	/* prepare server RR's for incremental lookup */
1647 	for(rp = qp->nsrp; rp; rp = rp->next)
1648 		rp->marker = 0;
1649 
1650 	triedin = 0;
1651 
1652 	/*
1653 	 * normal resolvers and servers will just use mntpt for all addresses,
1654 	 * even on the outside.  straddling servers will use mntpt (/net)
1655 	 * for inside addresses and /net.alt for outside addresses,
1656 	 * thus bypassing other inside nameservers.
1657 	 */
1658 	inname = insideaddr(dp->name);
1659 	if (!cfg.straddle || inname) {
1660 		rv = udpquery(qp, mntpt, depth, Hurry, (cfg.inside? Inns: Outns));
1661 		triedin = 1;
1662 	}
1663 
1664 	/*
1665 	 * if we're still looking, are inside, and have an outside domain,
1666 	 * try it on our outside interface, if any.
1667 	 */
1668 	if (rv == Answnone && cfg.inside && !inname) {
1669 		if (triedin)
1670 			dnslog(
1671 	   "[%d] netquery: internal nameservers failed for %s; trying external",
1672 				getpid(), dp->name);
1673 
1674 		/* prepare server RR's for incremental lookup */
1675 		for(rp = qp->nsrp; rp; rp = rp->next)
1676 			rp->marker = 0;
1677 
1678 		rv = udpquery(qp, "/net.alt", depth, Patient, Outns);
1679 	}
1680 //	if (rv == Answnone)		/* could ask /net.alt/dns directly */
1681 //		askoutdns(dp, qp->type);
1682 
1683 	if(lock && qlp) {
1684 		qlock(qlp);
1685 		assert(qlp->Ref.ref > 0);
1686 		qunlock(qlp);
1687 		decref(qlp);
1688 	}
1689 	return rv;
1690 }
1691 
1692 int
1693 seerootns(void)
1694 {
1695 	int rv;
1696 	char root[] = "";
1697 	Request req;
1698 	RR *rr;
1699 	Query *qp;
1700 
1701 	memset(&req, 0, sizeof req);
1702 	req.isslave = 1;
1703 	req.aborttime = timems() + Maxreqtm;
1704 	req.from = "internal";
1705 
1706 	qp = emalloc(sizeof *qp);
1707 	queryinit(qp, dnlookup(root, Cin, 1), Tns, &req);
1708 	qp->nsrp = dblookup(root, Cin, Tns, 0, 0);
1709 	for (rr = qp->nsrp; rr != nil; rr = rr->next)	/* DEBUG */
1710 		dnslog("seerootns query nsrp: %R", rr);
1711 
1712 	rv = netquery(qp, 0);		/* lookup ". ns" using qp->nsrp */
1713 
1714 	rrfreelist(qp->nsrp);
1715 	querydestroy(qp);
1716 	free(qp);
1717 	return rv;
1718 }
1719