xref: /plan9-contrib/sys/src/cmd/ndb/dnresolve.c (revision 3468a4915d661daa200976acc4f80f51aae144b2)
1 /*
2  * domain name resolvers, see rfcs 1035 and 1123
3  */
4 #include <u.h>
5 #include <libc.h>
6 #include <ip.h>
7 #include <bio.h>
8 #include <ndb.h>
9 #include "dns.h"
10 
11 #define NS2MS(ns) ((ns) / 1000000L)
12 #define S2MS(s)   ((s)  * 1000)
13 #define MS2S(ms)  ((ms) / 1000)
14 
15 typedef struct Dest Dest;
16 typedef struct Ipaddr Ipaddr;
17 typedef struct Query Query;
18 
19 enum
20 {
21 	Udp, Tcp,
22 	Maxdest=	24,	/* maximum destinations for a request message */
23 	Maxtrans=	3,	/* maximum transmissions to a server */
24 	Destmagic=	0xcafebabe,
25 	Querymagic=	0xdeadbeef,
26 };
27 enum { Hurry, Patient, };
28 enum { Outns, Inns, };
29 enum { Remntretry = 15, };	/* min. sec.s between remount attempts */
30 
31 struct Ipaddr {
32 	Ipaddr *next;
33 	uchar	ip[IPaddrlen];
34 };
35 
36 struct Dest
37 {
38 	uchar	a[IPaddrlen];	/* ip address */
39 	DN	*s;		/* name server */
40 	int	nx;		/* number of transmissions */
41 	int	code;		/* response code; used to clear dp->respcode */
42 
43 	ulong	magic;
44 };
45 
46 /*
47  * Query has a QLock in it, thus it can't be an automatic
48  * variable, since each process would see a separate copy
49  * of the lock on its stack.
50  */
51 struct Query {
52 	DN	*dp;		/* domain */
53 	ushort	type;		/* and type to look up */
54 	Request *req;
55 	RR	*nsrp;		/* name servers to consult */
56 
57 	/* dest must not be on the stack due to forking in slave() */
58 	Dest	*dest;		/* array of destinations */
59 	Dest	*curdest;	/* pointer to one of them */
60 	int	ndest;
61 
62 	int	udpfd;
63 
64 	QLock	tcplock;	/* only one tcp call at a time per query */
65 	int	tcpset;
66 	int	tcpfd;		/* if Tcp, read replies from here */
67 	int	tcpctlfd;
68 	uchar	tcpip[IPaddrlen];
69 
70 	ulong	magic;
71 };
72 
73 /* estimated % probability of such a record existing at all */
74 int likely[] = {
75 	[Ta]		95,
76 	[Taaaa]		10,
77 	[Tcname]	15,
78 	[Tmx]		60,
79 	[Tns]		90,
80 	[Tnull]		5,
81 	[Tptr]		35,
82 	[Tsoa]		90,
83 	[Tsrv]		60,
84 	[Ttxt]		15,
85 	[Tall]		95,
86 };
87 
88 static RR*	dnresolve1(char*, int, int, Request*, int, int);
89 static int	netquery(Query *, int);
90 
91 /*
92  * reading /proc/pid/args yields either "name args" or "name [display args]",
93  * so return only display args, if any.
94  */
95 static char *
96 procgetname(void)
97 {
98 	int fd, n;
99 	char *lp, *rp;
100 	char buf[256];
101 
102 	snprint(buf, sizeof buf, "#p/%d/args", getpid());
103 	if((fd = open(buf, OREAD)) < 0)
104 		return strdup("");
105 	*buf = '\0';
106 	n = read(fd, buf, sizeof buf-1);
107 	close(fd);
108 	if (n >= 0)
109 		buf[n] = '\0';
110 	if ((lp = strchr(buf, '[')) == nil ||
111 	    (rp = strrchr(buf, ']')) == nil)
112 		return strdup("");
113 	*rp = '\0';
114 	return strdup(lp+1);
115 }
116 
117 /*
118  *  lookup 'type' info for domain name 'name'.  If it doesn't exist, try
119  *  looking it up as a canonical name.
120  */
121 RR*
122 dnresolve(char *name, int class, int type, Request *req, RR **cn, int depth,
123 	int recurse, int rooted, int *status)
124 {
125 	RR *rp, *nrp, *drp;
126 	DN *dp;
127 	int loops;
128 	char *procname;
129 	char nname[Domlen];
130 
131 	if(status)
132 		*status = 0;
133 
134 	if(depth > 12)			/* in a recursive loop? */
135 		return nil;
136 
137 	procname = procgetname();
138 	/*
139 	 *  hack for systems that don't have resolve search
140 	 *  lists.  Just look up the simple name in the database.
141 	 */
142 	if(!rooted && strchr(name, '.') == nil){
143 		rp = nil;
144 		drp = domainlist(class);
145 		for(nrp = drp; rp == nil && nrp != nil; nrp = nrp->next){
146 			snprint(nname, sizeof nname, "%s.%s", name,
147 				nrp->ptr->name);
148 			rp = dnresolve(nname, class, type, req, cn, depth+1,
149 				recurse, rooted, status);
150 			lock(&dnlock);
151 			rrfreelist(rrremneg(&rp));
152 			unlock(&dnlock);
153 		}
154 		if(drp != nil)
155 			rrfreelist(drp);
156 		procsetname(procname);
157 		free(procname);
158 		return rp;
159 	}
160 
161 	/*
162 	 *  try the name directly
163 	 */
164 	rp = dnresolve1(name, class, type, req, depth, recurse);
165 	if(rp == nil) {
166 		/*
167 		 * try it as a canonical name if we weren't told
168 		 * that the name didn't exist
169 		 */
170 		dp = dnlookup(name, class, 0);
171 		if(type != Tptr && dp->respcode != Rname)
172 			for(loops = 0; rp == nil && loops < 32; loops++){
173 				rp = dnresolve1(name, class, Tcname, req,
174 					depth, recurse);
175 				if(rp == nil)
176 					break;
177 
178 				/* rp->host == nil shouldn't happen, but does */
179 				if(rp->negative || rp->host == nil){
180 					rrfreelist(rp);
181 					rp = nil;
182 					break;
183 				}
184 
185 				name = rp->host->name;
186 				lock(&dnlock);
187 				if(cn)
188 					rrcat(cn, rp);
189 				else
190 					rrfreelist(rp);
191 				unlock(&dnlock);
192 
193 				rp = dnresolve1(name, class, type, req,
194 					depth, recurse);
195 			}
196 
197 		/* distinction between not found and not good */
198 		if(rp == nil && status != nil && dp->respcode != 0)
199 			*status = dp->respcode;
200 	}
201 	procsetname(procname);
202 	free(procname);
203 	return randomize(rp);
204 }
205 
206 static void
207 queryinit(Query *qp, DN *dp, int type, Request *req)
208 {
209 	memset(qp, 0, sizeof *qp);
210 	qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
211 	qp->dp = dp;
212 	qp->type = type;
213 	if (qp->type != type)
214 		dnslog("queryinit: bogus type %d", type);
215 	qp->req = req;
216 	qp->nsrp = nil;
217 	qp->dest = qp->curdest = nil;
218 	qp->magic = Querymagic;
219 }
220 
221 static void
222 queryck(Query *qp)
223 {
224 	assert(qp);
225 	assert(qp->magic == Querymagic);
226 }
227 
228 static void
229 querydestroy(Query *qp)
230 {
231 	queryck(qp);
232 	/* leave udpfd open */
233 	if (qp->tcpfd > 0)
234 		close(qp->tcpfd);
235 	if (qp->tcpctlfd > 0) {
236 		hangup(qp->tcpctlfd);
237 		close(qp->tcpctlfd);
238 	}
239 	free(qp->dest);
240 	memset(qp, 0, sizeof *qp);	/* prevent accidents */
241 	qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
242 }
243 
244 static void
245 destinit(Dest *p)
246 {
247 	memset(p, 0, sizeof *p);
248 	p->magic = Destmagic;
249 }
250 
251 static void
252 destck(Dest *p)
253 {
254 	assert(p);
255 	assert(p->magic == Destmagic);
256 }
257 
258 static void
259 destdestroy(Dest *p)
260 {
261 	USED(p);
262 }
263 
264 /*
265  * if the response to a query hasn't arrived within 100 ms.,
266  * it's unlikely to arrive at all.  after 1 s., it's really unlikely.
267  * queries for missing RRs are likely to produce time-outs rather than
268  * negative responses, so cname and aaaa queries are likely to time out,
269  * thus we don't wait very long for them.
270  */
271 static void
272 notestats(vlong start, int tmout, int type)
273 {
274 	qlock(&stats);
275 	if (tmout) {
276 		stats.tmout++;
277 		if (type == Taaaa)
278 			stats.tmoutv6++;
279 		else if (type == Tcname)
280 			stats.tmoutcname++;
281 	} else {
282 		long wait10ths = NS2MS(nsec() - start) / 100;
283 
284 		if (wait10ths <= 0)
285 			stats.under10ths[0]++;
286 		else if (wait10ths >= nelem(stats.under10ths))
287 			stats.under10ths[nelem(stats.under10ths) - 1]++;
288 		else
289 			stats.under10ths[wait10ths]++;
290 	}
291 	qunlock(&stats);
292 }
293 
294 static void
295 noteinmem(void)
296 {
297 	qlock(&stats);
298 	stats.answinmem++;
299 	qunlock(&stats);
300 }
301 
302 static RR*
303 issuequery(Query *qp, char *name, int class, int depth, int recurse)
304 {
305 	char *cp;
306 	DN *nsdp;
307 	RR *rp, *nsrp, *dbnsrp;
308 
309 	/*
310 	 *  if we're running as just a resolver, query our
311 	 *  designated name servers
312 	 */
313 	if(cfg.resolver){
314 		nsrp = randomize(getdnsservers(class));
315 		if(nsrp != nil) {
316 			qp->nsrp = nsrp;
317 			if(netquery(qp, depth+1)){
318 				rrfreelist(nsrp);
319 				return rrlookup(qp->dp, qp->type, OKneg);
320 			}
321 			rrfreelist(nsrp);
322 		}
323 	}
324 
325 	/*
326  	 *  walk up the domain name looking for
327 	 *  a name server for the domain.
328 	 */
329 	for(cp = name; cp; cp = walkup(cp)){
330 		/*
331 		 *  if this is a local (served by us) domain,
332 		 *  return answer
333 		 */
334 		dbnsrp = randomize(dblookup(cp, class, Tns, 0, 0));
335 		if(dbnsrp && dbnsrp->local){
336 			rp = dblookup(name, class, qp->type, 1, dbnsrp->ttl);
337 			rrfreelist(dbnsrp);
338 			return rp;
339 		}
340 
341 		/*
342 		 *  if recursion isn't set, just accept local
343 		 *  entries
344 		 */
345 		if(recurse == Dontrecurse){
346 			if(dbnsrp)
347 				rrfreelist(dbnsrp);
348 			continue;
349 		}
350 
351 		/* look for ns in cache */
352 		nsdp = dnlookup(cp, class, 0);
353 		nsrp = nil;
354 		if(nsdp)
355 			nsrp = randomize(rrlookup(nsdp, Tns, NOneg));
356 
357 		/* if the entry timed out, ignore it */
358 		if(nsrp && nsrp->ttl < now){
359 			rrfreelist(nsrp);
360 			nsrp = nil;
361 		}
362 
363 		if(nsrp){
364 			rrfreelist(dbnsrp);
365 
366 			/* query the name servers found in cache */
367 			qp->nsrp = nsrp;
368 			if(netquery(qp, depth+1)){
369 				rrfreelist(nsrp);
370 				return rrlookup(qp->dp, qp->type, OKneg);
371 			}
372 			rrfreelist(nsrp);
373 			continue;
374 		}
375 
376 		/* use ns from db */
377 		if(dbnsrp){
378 			/* try the name servers found in db */
379 			qp->nsrp = dbnsrp;
380 			if(netquery(qp, depth+1)){
381 				/* we got an answer */
382 				rrfreelist(dbnsrp);
383 				return rrlookup(qp->dp, qp->type, NOneg);
384 			}
385 			rrfreelist(dbnsrp);
386 		}
387 	}
388 	return nil;
389 }
390 
391 static RR*
392 dnresolve1(char *name, int class, int type, Request *req, int depth,
393 	int recurse)
394 {
395 	Area *area;
396 	DN *dp;
397 	RR *rp;
398 	Query *qp;
399 
400 	if(debug)
401 		dnslog("[%d] dnresolve1 %s %d %d", getpid(), name, type, class);
402 
403 	/* only class Cin implemented so far */
404 	if(class != Cin)
405 		return nil;
406 
407 	dp = dnlookup(name, class, 1);
408 
409 	/*
410 	 *  Try the cache first
411 	 */
412 	rp = rrlookup(dp, type, OKneg);
413 	if(rp)
414 		if(rp->db){
415 			/* unauthoritative db entries are hints */
416 			if(rp->auth) {
417 				noteinmem();
418 				return rp;
419 			}
420 		} else
421 			/* cached entry must still be valid */
422 			if(rp->ttl > now)
423 				/* but Tall entries are special */
424 				if(type != Tall || rp->query == Tall) {
425 					noteinmem();
426 					return rp;
427 				}
428 	rrfreelist(rp);
429 	rp = nil;		/* accident prevention */
430 	USED(rp);
431 
432 	/*
433 	 * try the cache for a canonical name. if found punt
434 	 * since we'll find it during the canonical name search
435 	 * in dnresolve().
436 	 */
437 	if(type != Tcname){
438 		rp = rrlookup(dp, Tcname, NOneg);
439 		rrfreelist(rp);
440 		if(rp)
441 			return nil;
442 	}
443 
444 	/*
445 	 * if the domain name is within an area of ours,
446 	 * we should have found its data in memory by now.
447 	 */
448 	area = inmyarea(dp->name);
449 	if (area || strncmp(dp->name, "local#", 6) == 0) {
450 //		char buf[32];
451 
452 //		dnslog("%s %s: no data in area %s", dp->name,
453 //			rrname(type, buf, sizeof buf), area->soarr->owner->name);
454 		return nil;
455 	}
456 
457 	qp = emalloc(sizeof *qp);
458 	queryinit(qp, dp, type, req);
459 	rp = issuequery(qp, name, class, depth, recurse);
460 	querydestroy(qp);
461 	free(qp);
462 	if(rp)
463 		return rp;
464 
465 	/* settle for a non-authoritative answer */
466 	rp = rrlookup(dp, type, OKneg);
467 	if(rp)
468 		return rp;
469 
470 	/* noone answered.  try the database, we might have a chance. */
471 	return dblookup(name, class, type, 0, 0);
472 }
473 
474 /*
475  *  walk a domain name one element to the right.
476  *  return a pointer to that element.
477  *  in other words, return a pointer to the parent domain name.
478  */
479 char*
480 walkup(char *name)
481 {
482 	char *cp;
483 
484 	cp = strchr(name, '.');
485 	if(cp)
486 		return cp+1;
487 	else if(*name)
488 		return "";
489 	else
490 		return 0;
491 }
492 
493 /*
494  *  Get a udp port for sending requests and reading replies.  Put the port
495  *  into "headers" mode.
496  */
497 static char *hmsg = "headers";
498 
499 int
500 udpport(char *mtpt)
501 {
502 	int fd, ctl;
503 	char ds[64], adir[64];
504 
505 	/* get a udp port */
506 	snprint(ds, sizeof ds, "%s/udp!*!0", (mtpt? mtpt: "/net"));
507 	ctl = announce(ds, adir);
508 	if(ctl < 0){
509 		/* warning("can't get udp port"); */
510 		return -1;
511 	}
512 
513 	/* turn on header style interface */
514 	if(write(ctl, hmsg, strlen(hmsg)) != strlen(hmsg)){
515 		close(ctl);
516 		warning(hmsg);
517 		return -1;
518 	}
519 
520 	/* grab the data file */
521 	snprint(ds, sizeof ds, "%s/data", adir);
522 	fd = open(ds, ORDWR);
523 	close(ctl);
524 	if(fd < 0)
525 		warning("can't open udp port %s: %r", ds);
526 	return fd;
527 }
528 
529 /* generate a DNS UDP query packet */
530 int
531 mkreq(DN *dp, int type, uchar *buf, int flags, ushort reqno)
532 {
533 	DNSmsg m;
534 	int len;
535 	Udphdr *uh = (Udphdr*)buf;
536 
537 	/* stuff port number into output buffer */
538 	memset(uh, 0, sizeof *uh);
539 	hnputs(uh->rport, 53);
540 
541 	/* make request and convert it to output format */
542 	memset(&m, 0, sizeof m);
543 	m.flags = flags;
544 	m.id = reqno;
545 	m.qd = rralloc(type);
546 	m.qd->owner = dp;
547 	m.qd->type = type;
548 	if (m.qd->type != type)
549 		dnslog("mkreq: bogus type %d", type);
550 	len = convDNS2M(&m, &buf[Udphdrsize], Maxudp);
551 	rrfree(m.qd);
552 	memset(&m, 0, sizeof m);		/* cause trouble */
553 	return len;
554 }
555 
556 void
557 freeanswers(DNSmsg *mp)
558 {
559 	rrfreelist(mp->qd);
560 	rrfreelist(mp->an);
561 	rrfreelist(mp->ns);
562 	rrfreelist(mp->ar);
563 	mp->qd = mp->an = mp->ns = mp->ar = nil;
564 }
565 
566 /* sets srcip */
567 static int
568 readnet(Query *qp, int medium, uchar *ibuf, ulong endtime, uchar **replyp,
569 	uchar *srcip)
570 {
571 	int len, fd;
572 	long ms;
573 	vlong startns = nsec();
574 	uchar *reply;
575 	uchar lenbuf[2];
576 
577 	/* timed read of reply */
578 	ms = S2MS(endtime) - NS2MS(startns);
579 	if (ms < 2000)
580 		ms = 2000;	/* give the remote ns a fighting chance */
581 	reply = ibuf;
582 	len = -1;			/* pessimism */
583 	memset(srcip, 0, IPaddrlen);
584 	if (medium == Udp)
585 		if (qp->udpfd <= 0)
586 			dnslog("readnet: qp->udpfd closed");
587 		else {
588 			alarm(ms);
589 			len = read(qp->udpfd, ibuf, Udphdrsize+Maxudpin);
590 			alarm(0);
591 			notestats(startns, len < 0, qp->type);
592 			if (len >= IPaddrlen)
593 				memmove(srcip, ibuf, IPaddrlen);
594 			if (len >= Udphdrsize) {
595 				len   -= Udphdrsize;
596 				reply += Udphdrsize;
597 			}
598 		}
599 	else {
600 		if (!qp->tcpset)
601 			dnslog("readnet: tcp params not set");
602 		alarm(ms);
603 		fd = qp->tcpfd;
604 		if (fd <= 0)
605 			dnslog("readnet: %s: tcp fd unset for dest %I",
606 				qp->dp->name, qp->tcpip);
607 		else if (readn(fd, lenbuf, 2) != 2) {
608 			dnslog("readnet: short read of tcp size from %I",
609 				qp->tcpip);
610 			/* probably a time-out */
611 			notestats(startns, 1, qp->type);
612 		} else {
613 			len = lenbuf[0]<<8 | lenbuf[1];
614 			if (readn(fd, ibuf, len) != len) {
615 				dnslog("readnet: short read of tcp data from %I",
616 					qp->tcpip);
617 				/* probably a time-out */
618 				notestats(startns, 1, qp->type);
619 				len = -1;
620 			}
621 		}
622 		alarm(0);
623 		memmove(srcip, qp->tcpip, IPaddrlen);
624 	}
625 	*replyp = reply;
626 	return len;
627 }
628 
629 /*
630  *  read replies to a request and remember the rrs in the answer(s).
631  *  ignore any of the wrong type.
632  *  wait at most until endtime.
633  */
634 static int
635 readreply(Query *qp, int medium, ushort req, uchar *ibuf, DNSmsg *mp,
636 	ulong endtime)
637 {
638 	int len, rv;
639 	char *err;
640 	char tbuf[32];
641 	uchar *reply;
642 	uchar srcip[IPaddrlen];
643 	RR *rp;
644 
645 	queryck(qp);
646 	rv = 0;
647 	memset(mp, 0, sizeof *mp);
648 	if (time(nil) >= endtime)
649 		return -1;		/* timed out before we started */
650 
651 	memset(srcip, 0, sizeof srcip);
652 	if (0)
653 		len = -1;
654 	for (; time(nil) < endtime &&
655 	    (len = readnet(qp, medium, ibuf, endtime, &reply, srcip)) >= 0;
656 	    freeanswers(mp)){
657 		/* convert into internal format  */
658 		memset(mp, 0, sizeof *mp);
659 		err = convM2DNS(reply, len, mp, nil);
660 		if (mp->flags & Ftrunc) {
661 			free(err);
662 			freeanswers(mp);
663 			/* notify our caller to retry the query via tcp. */
664 			return -1;
665 		} else if(err){
666 			dnslog("readreply: %s: input err, len %d: %s: %I",
667 				qp->dp->name, len, err, srcip);
668 			free(err);
669 			continue;
670 		}
671 		if(debug)
672 			logreply(qp->req->id, srcip, mp);
673 
674 		/* answering the right question? */
675 		if(mp->id != req)
676 			dnslog("%d: id %d instead of %d: %I", qp->req->id,
677 				mp->id, req, srcip);
678 		else if(mp->qd == 0)
679 			dnslog("%d: no question RR: %I", qp->req->id, srcip);
680 		else if(mp->qd->owner != qp->dp)
681 			dnslog("%d: owner %s instead of %s: %I", qp->req->id,
682 				mp->qd->owner->name, qp->dp->name, srcip);
683 		else if(mp->qd->type != qp->type)
684 			dnslog("%d: qp->type %d instead of %d: %I",
685 				qp->req->id, mp->qd->type, qp->type, srcip);
686 		else {
687 			/* remember what request this is in answer to */
688 			for(rp = mp->an; rp; rp = rp->next)
689 				rp->query = qp->type;
690 			return rv;
691 		}
692 	}
693 	if (time(nil) >= endtime) {
694 		;				/* query expired */
695 	} else if (0) {
696 		/* this happens routinely when a read times out */
697 		dnslog("readreply: %s type %s: ns %I read error or eof "
698 			"(returned %d): %r", qp->dp->name, rrname(qp->type,
699 			tbuf, sizeof tbuf), srcip, len);
700 		if (medium == Udp)
701 			for (rp = qp->nsrp; rp != nil; rp = rp->next)
702 				if (rp->type == Tns)
703 					dnslog("readreply: %s: query sent to "
704 						"ns %s", qp->dp->name,
705 						rp->host->name);
706 	}
707 	return -1;
708 }
709 
710 /*
711  *	return non-0 if first list includes second list
712  */
713 int
714 contains(RR *rp1, RR *rp2)
715 {
716 	RR *trp1, *trp2;
717 
718 	for(trp2 = rp2; trp2; trp2 = trp2->next){
719 		for(trp1 = rp1; trp1; trp1 = trp1->next)
720 			if(trp1->type == trp2->type)
721 			if(trp1->host == trp2->host)
722 			if(trp1->owner == trp2->owner)
723 				break;
724 		if(trp1 == nil)
725 			return 0;
726 	}
727 	return 1;
728 }
729 
730 
731 /*
732  *  return multicast version if any
733  */
734 int
735 ipisbm(uchar *ip)
736 {
737 	if(isv4(ip)){
738 		if (ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0 ||
739 		    ipcmp(ip, IPv4bcast) == 0)
740 			return 4;
741 	} else
742 		if(ip[0] == 0xff)
743 			return 6;
744 	return 0;
745 }
746 
747 /*
748  *  Get next server address
749  */
750 static int
751 serveraddrs(Query *qp, int nd, int depth)
752 {
753 	RR *rp, *arp, *trp;
754 	Dest *cur;
755 
756 	if(nd >= Maxdest)
757 		return 0;
758 
759 	/*
760 	 *  look for a server whose address we already know.
761 	 *  if we find one, mark it so we ignore this on
762 	 *  subsequent passes.
763 	 */
764 	arp = 0;
765 	for(rp = qp->nsrp; rp; rp = rp->next){
766 		assert(rp->magic == RRmagic);
767 		if(rp->marker)
768 			continue;
769 		arp = rrlookup(rp->host, Ta, NOneg);
770 		if(arp){
771 			rp->marker = 1;
772 			break;
773 		}
774 		arp = dblookup(rp->host->name, Cin, Ta, 0, 0);
775 		if(arp){
776 			rp->marker = 1;
777 			break;
778 		}
779 	}
780 
781 	/*
782 	 *  if the cache and database lookup didn't find any new
783 	 *  server addresses, try resolving one via the network.
784 	 *  Mark any we try to resolve so we don't try a second time.
785 	 */
786 	if(arp == 0)
787 		for(rp = qp->nsrp; rp; rp = rp->next){
788 			if(rp->marker)
789 				continue;
790 			rp->marker = 1;
791 
792 			/*
793 			 *  avoid loops looking up a server under itself
794 			 */
795 			if(subsume(rp->owner->name, rp->host->name))
796 				continue;
797 
798 			arp = dnresolve(rp->host->name, Cin, Ta, qp->req, 0,
799 				depth+1, Recurse, 1, 0);
800 			lock(&dnlock);
801 			rrfreelist(rrremneg(&arp));
802 			unlock(&dnlock);
803 			if(arp)
804 				break;
805 		}
806 
807 	/* use any addresses that we found */
808 	for(trp = arp; trp && nd < Maxdest; trp = trp->next){
809 		cur = &qp->dest[nd];
810 		parseip(cur->a, trp->ip->name);
811 		/*
812 		 * straddling servers can reject all nameservers if they are all
813 		 * inside, so be sure to list at least one outside ns at
814 		 * the end of the ns list in /lib/ndb for `dom='.
815 		 */
816 		if (ipisbm(cur->a) ||
817 		    cfg.straddle && !insideaddr(qp->dp->name) && insidens(cur->a))
818 			continue;
819 		cur->nx = 0;
820 		cur->s = trp->owner;
821 		cur->code = Rtimeout;
822 		nd++;
823 	}
824 	rrfreelist(arp);
825 	return nd;
826 }
827 
828 /*
829  *  cache negative responses
830  */
831 static void
832 cacheneg(DN *dp, int type, int rcode, RR *soarr)
833 {
834 	RR *rp;
835 	DN *soaowner;
836 	ulong ttl;
837 
838 	stats.negcached++;
839 
840 	/* no cache time specified, don't make anything up */
841 	if(soarr != nil){
842 		if(soarr->next != nil){
843 			rrfreelist(soarr->next);
844 			soarr->next = nil;
845 		}
846 		soaowner = soarr->owner;
847 	} else
848 		soaowner = nil;
849 
850 	/* the attach can cause soarr to be freed so mine it now */
851 	if(soarr != nil && soarr->soa != nil)
852 		ttl = soarr->soa->minttl+now;
853 	else
854 		ttl = 5*Min;
855 
856 	/* add soa and negative RR to the database */
857 	rrattach(soarr, Authoritative);
858 
859 	rp = rralloc(type);
860 	rp->owner = dp;
861 	rp->negative = 1;
862 	rp->negsoaowner = soaowner;
863 	rp->negrcode = rcode;
864 	rp->ttl = ttl;
865 	rrattach(rp, Authoritative);
866 }
867 
868 static int
869 setdestoutns(Dest *p, int n)
870 {
871 	uchar *outns = outsidens(n);
872 
873 	destck(p);
874 	destinit(p);
875 	if (outns == nil) {
876 		if (n == 0)
877 			dnslog("[%d] no outside-ns in ndb", getpid());
878 		return -1;
879 	}
880 	memmove(p->a, outns, sizeof p->a);
881 	p->s = dnlookup("outside-ns-ips", Cin, 1);
882 	return 0;
883 }
884 
885 /*
886  * issue query via UDP or TCP as appropriate.
887  * for TCP, returns with qp->tcpip set from udppkt header.
888  */
889 static int
890 mydnsquery(Query *qp, int medium, uchar *udppkt, int len)
891 {
892 	int rv = -1, nfd;
893 	char *domain;
894 	char conndir[40];
895 	uchar belen[2];
896 	NetConnInfo *nci;
897 
898 	queryck(qp);
899 	domain = smprint("%I", udppkt);
900 	if (myaddr(domain)) {
901 		dnslog("mydnsquery: trying to send to myself (%s); bzzzt",
902 			domain);
903 		free(domain);
904 		return rv;
905 	}
906 
907 	switch (medium) {
908 	case Udp:
909 		free(domain);
910 		nfd = dup(qp->udpfd, -1);
911 		if (nfd < 0) {
912 			warning("mydnsquery: qp->udpfd %d: %r", qp->udpfd);
913 			close(qp->udpfd);	/* ensure it's closed */
914 			qp->udpfd = -1;		/* poison it */
915 			return rv;
916 		}
917 		close(nfd);
918 
919 		if (qp->udpfd <= 0)
920 			dnslog("mydnsquery: qp->udpfd %d closed", qp->udpfd);
921 		else {
922 			if (write(qp->udpfd, udppkt, len+Udphdrsize) !=
923 			    len+Udphdrsize)
924 				warning("sending udp msg: %r");
925 			else {
926 				stats.qsent++;
927 				rv = 0;
928 			}
929 		}
930 		break;
931 	case Tcp:
932 		/* send via TCP & keep fd around for reply */
933 		alarm(10*1000);
934 		qp->tcpfd = rv = dial(netmkaddr(domain, "tcp", "dns"), nil,
935 			conndir, &qp->tcpctlfd);
936 		alarm(0);
937 		if (qp->tcpfd < 0) {
938 			dnslog("can't dial tcp!%s!dns: %r", domain);
939 			free(domain);
940 			break;
941 		}
942 		free(domain);
943 		nci = getnetconninfo(conndir, qp->tcpfd);
944 		if (nci) {
945 			parseip(qp->tcpip, nci->rsys);
946 			freenetconninfo(nci);
947 		} else
948 			dnslog("mydnsquery: getnetconninfo failed");
949 		qp->tcpset = 1;
950 
951 		belen[0] = len >> 8;
952 		belen[1] = len;
953 		if (write(qp->tcpfd, belen, 2) != 2 ||
954 		    write(qp->tcpfd, udppkt + Udphdrsize, len) != len)
955 			warning("sending tcp msg: %r");
956 		break;
957 	default:
958 		sysfatal("mydnsquery: bad medium");
959 	}
960 	return rv;
961 }
962 
963 /*
964  * send query to all UDP destinations or one TCP destination,
965  * taken from obuf (udp packet) header
966  */
967 static int
968 xmitquery(Query *qp, int medium, int depth, uchar *obuf, int inns, int len)
969 {
970 	int j, n;
971 	char buf[32];
972 	Dest *p;
973 
974 	queryck(qp);
975 	if(time(nil) >= qp->req->aborttime)
976 		return -1;
977 
978 	/*
979 	 * get a nameserver address if we need one.
980 	 * serveraddrs populates qp->dest.
981 	 */
982 	p = qp->dest;
983 	destck(p);
984 	if (qp->ndest < 0 || qp->ndest > Maxdest)
985 		dnslog("qp->ndest %d out of range", qp->ndest);
986 	if (qp->ndest > qp->curdest - p) {
987 		j = serveraddrs(qp, qp->curdest - p, depth);
988 		if (j < 0 || j >= Maxdest) {
989 			dnslog("serveraddrs() result %d out of range", j);
990 			abort();
991 		}
992 		qp->curdest = &qp->dest[j];
993 	}
994 	destck(qp->curdest);
995 
996 	/* no servers, punt */
997 	if (qp->ndest == 0)
998 		if (cfg.straddle && cfg.inside) {
999 			/* get ips of "outside-ns-ips" */
1000 			p = qp->curdest = qp->dest;
1001 			for(n = 0; n < Maxdest; n++, qp->curdest++)
1002 				if (setdestoutns(qp->curdest, n) < 0)
1003 					break;
1004 		} else {
1005 			/* it's probably just a bogus domain, don't log it */
1006 			// dnslog("xmitquery: %s: no nameservers", qp->dp->name);
1007 			return -1;
1008 		}
1009 
1010 	/* send to first 'qp->ndest' destinations */
1011 	j = 0;
1012 	if (medium == Tcp) {
1013 		j++;
1014 		queryck(qp);
1015 		assert(qp->dp);
1016 		procsetname("tcp %sside query for %s %s", (inns? "in": "out"),
1017 			qp->dp->name, rrname(qp->type, buf, sizeof buf));
1018 		mydnsquery(qp, medium, obuf, len); /* sets qp->tcpip from obuf */
1019 		if(debug)
1020 			logsend(qp->req->id, depth, qp->tcpip, "", qp->dp->name,
1021 				qp->type);
1022 	} else
1023 		for(; p < &qp->dest[qp->ndest] && p < qp->curdest; p++){
1024 			/* skip destinations we've finished with */
1025 			if(p->nx >= Maxtrans)
1026 				continue;
1027 
1028 			j++;
1029 
1030 			/* exponential backoff of requests */
1031 			if((1<<p->nx) > qp->ndest)
1032 				continue;
1033 
1034 			procsetname("udp %sside query to %I/%s %s %s",
1035 				(inns? "in": "out"), p->a, p->s->name,
1036 				qp->dp->name, rrname(qp->type, buf, sizeof buf));
1037 			if(debug)
1038 				logsend(qp->req->id, depth, p->a, p->s->name,
1039 					qp->dp->name, qp->type);
1040 
1041 			/* fill in UDP destination addr & send it */
1042 			memmove(obuf, p->a, sizeof p->a);
1043 			mydnsquery(qp, medium, obuf, len);
1044 			p->nx++;
1045 		}
1046 	if(j == 0) {
1047 		// dnslog("xmitquery: %s: no destinations left", qp->dp->name);
1048 		return -1;
1049 	}
1050 	return 0;
1051 }
1052 
1053 static int lckindex[Maxlcks] = {
1054 	0,			/* all others map here */
1055 	Ta,
1056 	Tns,
1057 	Tcname,
1058 	Tsoa,
1059 	Tptr,
1060 	Tmx,
1061 	Ttxt,
1062 	Taaaa,
1063 };
1064 
1065 static int
1066 qtype2lck(int qtype)		/* map query type to querylck index */
1067 {
1068 	int i;
1069 
1070 	for (i = 1; i < nelem(lckindex); i++)
1071 		if (lckindex[i] == qtype)
1072 			return i;
1073 	return 0;
1074 }
1075 
1076 /* is mp a cachable negative response (with Rname set)? */
1077 static int
1078 isnegrname(DNSmsg *mp)
1079 {
1080 	/* TODO: could add || cfg.justforw to RHS of && */
1081 	return mp->an == nil && (mp->flags & Rmask) == Rname;
1082 }
1083 
1084 static int
1085 procansw(Query *qp, DNSmsg *mp, uchar *srcip, int depth, Dest *p)
1086 {
1087 	int rv;
1088 //	int lcktype;
1089 	char buf[32];
1090 	DN *ndp;
1091 	Query *nqp;
1092 	RR *tp, *soarr;
1093 
1094 	if (mp->an == nil)
1095 		stats.negans++;
1096 
1097 	/* ignore any error replies */
1098 	if((mp->flags & Rmask) == Rserver){
1099 		stats.negserver++;
1100 		freeanswers(mp);
1101 		if(p != qp->curdest)
1102 			p->code = Rserver;
1103 		return -1;
1104 	}
1105 
1106 	/* ignore any bad delegations */
1107 	if(mp->ns && baddelegation(mp->ns, qp->nsrp, srcip)){
1108 		stats.negbaddeleg++;
1109 		if(mp->an == nil){
1110 			stats.negbdnoans++;
1111 			freeanswers(mp);
1112 			if(p != qp->curdest)
1113 				p->code = Rserver;
1114 			return -1;
1115 		}
1116 		rrfreelist(mp->ns);
1117 		mp->ns = nil;
1118 	}
1119 
1120 	/* remove any soa's from the authority section */
1121 	lock(&dnlock);
1122 	soarr = rrremtype(&mp->ns, Tsoa);
1123 
1124 	/* incorporate answers */
1125 	unique(mp->an);
1126 	unique(mp->ns);
1127 	unique(mp->ar);
1128 	unlock(&dnlock);
1129 	if(mp->an)
1130 		rrattach(mp->an, (mp->flags & Fauth) != 0);
1131 	if(mp->ar)
1132 		rrattach(mp->ar, Notauthoritative);
1133 	if(mp->ns && !cfg.justforw){
1134 		ndp = mp->ns->owner;
1135 		rrattach(mp->ns, Notauthoritative);
1136 	} else {
1137 		ndp = nil;
1138 		rrfreelist(mp->ns);
1139 		mp->ns = nil;
1140 	}
1141 
1142 	/* free the question */
1143 	if(mp->qd) {
1144 		rrfreelist(mp->qd);
1145 		mp->qd = nil;
1146 	}
1147 
1148 	/*
1149 	 *  Any reply from an authoritative server,
1150 	 *  or a positive reply terminates the search.
1151 	 *  A negative response now also terminates the search.
1152 	 */
1153 	if(mp->an != nil || (mp->flags & Fauth)){
1154 		if(isnegrname(mp))
1155 			qp->dp->respcode = Rname;
1156 		else
1157 			qp->dp->respcode = 0;
1158 
1159 		/*
1160 		 *  cache any negative responses, free soarr.
1161 		 *  negative responses need not be authoritative:
1162 		 *  they can legitimately come from a cache.
1163 		 */
1164 		if( /* (mp->flags & Fauth) && */ mp->an == nil)
1165 			cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
1166 		else
1167 			rrfreelist(soarr);
1168 		return 1;
1169 	} else if (isnegrname(mp)) {
1170 		qp->dp->respcode = Rname;
1171 		/*
1172 		 *  cache negative response.
1173 		 *  negative responses need not be authoritative:
1174 		 *  they can legitimately come from a cache.
1175 		 */
1176 		cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
1177 		return 1;
1178 	}
1179 	stats.negnorname++;
1180 	rrfreelist(soarr);
1181 
1182 	/*
1183 	 *  if we've been given better name servers, recurse.
1184 	 *  if we're a pure resolver, don't recurse, we have
1185 	 *  to forward to a fixed set of named servers.
1186 	 */
1187 	if(!mp->ns || cfg.resolver && cfg.justforw)
1188 		return 0;
1189 	tp = rrlookup(ndp, Tns, NOneg);
1190 	if(contains(qp->nsrp, tp)){
1191 		rrfreelist(tp);
1192 		return 0;
1193 	}
1194 	procsetname("recursive query for %s %s", qp->dp->name,
1195 		rrname(qp->type, buf, sizeof buf));
1196 	/*
1197 	 *  we're called from udpquery, called from
1198 	 *  netquery, which current holds qp->dp->querylck,
1199 	 *  so release it now and acquire it upon return.
1200 	 */
1201 //	lcktype = qtype2lck(qp->type);
1202 //	qunlock(&qp->dp->querylck[lcktype]);
1203 
1204 	nqp = emalloc(sizeof *nqp);
1205 	queryinit(nqp, qp->dp, qp->type, qp->req);
1206 	nqp->nsrp = tp;
1207 	rv = netquery(nqp, depth+1);
1208 
1209 //	qlock(&qp->dp->querylck[lcktype]);
1210 	rrfreelist(nqp->nsrp);
1211 	querydestroy(nqp);
1212 	free(nqp);
1213 	return rv;
1214 }
1215 
1216 /*
1217  * send a query via tcp to a single address (from ibuf's udp header)
1218  * and read the answer(s) into mp->an.
1219  */
1220 static int
1221 tcpquery(Query *qp, DNSmsg *mp, int depth, uchar *ibuf, uchar *obuf, int len,
1222 	int waitsecs, int inns, ushort req)
1223 {
1224 	int rv = 0;
1225 	ulong endtime;
1226 
1227 	endtime = time(nil) + waitsecs;
1228 	if(endtime > qp->req->aborttime)
1229 		endtime = qp->req->aborttime;
1230 
1231 	if (0)
1232 		dnslog("%s: udp reply truncated; retrying query via tcp to %I",
1233 			qp->dp->name, qp->tcpip);
1234 
1235 	qlock(&qp->tcplock);
1236 	memmove(obuf, ibuf, IPaddrlen);		/* send back to respondent */
1237 	/* sets qp->tcpip from obuf's udp header */
1238 	if (xmitquery(qp, Tcp, depth, obuf, inns, len) < 0 ||
1239 	    readreply(qp, Tcp, req, ibuf, mp, endtime) < 0)
1240 		rv = -1;
1241 	if (qp->tcpfd > 0) {
1242 		hangup(qp->tcpctlfd);
1243 		close(qp->tcpctlfd);
1244 		close(qp->tcpfd);
1245 	}
1246 	qp->tcpfd = qp->tcpctlfd = -1;
1247 	qunlock(&qp->tcplock);
1248 	return rv;
1249 }
1250 
1251 /*
1252  *  query name servers.  If the name server returns a pointer to another
1253  *  name server, recurse.
1254  */
1255 static int
1256 queryns(Query *qp, int depth, uchar *ibuf, uchar *obuf, int waitsecs, int inns)
1257 {
1258 	int ndest, len, replywaits, rv;
1259 	ushort req;
1260 	ulong endtime;
1261 	char buf[12];
1262 	uchar srcip[IPaddrlen];
1263 	Dest *p, *np, *dest;
1264 
1265 	/* pack request into a udp message */
1266 	req = rand();
1267 	len = mkreq(qp->dp, qp->type, obuf, Frecurse|Oquery, req);
1268 
1269 	/* no server addresses yet */
1270 	queryck(qp);
1271 	dest = emalloc(Maxdest * sizeof *dest);	/* dest can't be on stack */
1272 	for (p = dest; p < dest + Maxdest; p++)
1273 		destinit(p);
1274 	/* this dest array is local to this call of queryns() */
1275 	free(qp->dest);
1276 	qp->curdest = qp->dest = dest;
1277 
1278 	/*
1279 	 *  transmit udp requests and wait for answers.
1280 	 *  at most Maxtrans attempts to each address.
1281 	 *  each cycle send one more message than the previous.
1282 	 *  retry a query via tcp if its response is truncated.
1283 	 */
1284 	for(ndest = 1; ndest < Maxdest; ndest++){
1285 		qp->ndest = ndest;
1286 		qp->tcpset = 0;
1287 		if (xmitquery(qp, Udp, depth, obuf, inns, len) < 0)
1288 			break;
1289 
1290 		endtime = time(nil) + waitsecs;
1291 		if(endtime > qp->req->aborttime)
1292 			endtime = qp->req->aborttime;
1293 
1294 		for(replywaits = 0; replywaits < ndest; replywaits++){
1295 			DNSmsg m;
1296 
1297 			procsetname("reading %sside reply from %I: %s %s from %s",
1298 				(inns? "in": "out"), obuf, qp->dp->name,
1299 				rrname(qp->type, buf, sizeof buf), qp->req->from);
1300 
1301 			/* read udp answer into m */
1302 			if (readreply(qp, Udp, req, ibuf, &m, endtime) >= 0)
1303 				memmove(srcip, ibuf, IPaddrlen);
1304 			else if (!(m.flags & Ftrunc)) {
1305 				freeanswers(&m);
1306 				break;		/* timed out on this dest */
1307 			} else {
1308 				/* whoops, it was truncated! ask again via tcp */
1309 				freeanswers(&m);
1310 				rv = tcpquery(qp, &m, depth, ibuf, obuf, len,
1311 					waitsecs, inns, req);  /* answer in m */
1312 				if (rv < 0) {
1313 					freeanswers(&m);
1314 					break;		/* failed via tcp too */
1315 				}
1316 				memmove(srcip, qp->tcpip, IPaddrlen);
1317 			}
1318 
1319 			/* find responder */
1320 			// dnslog("queryns got reply from %I", srcip);
1321 			for(p = qp->dest; p < qp->curdest; p++)
1322 				if(memcmp(p->a, srcip, sizeof p->a) == 0)
1323 					break;
1324 
1325 			/* remove all addrs of responding server from list */
1326 			for(np = qp->dest; np < qp->curdest; np++)
1327 				if(np->s == p->s)
1328 					p->nx = Maxtrans;
1329 
1330 			/* free or incorporate RRs in m */
1331 			rv = procansw(qp, &m, srcip, depth, p);
1332 			if (rv > 0) {
1333 				free(qp->dest);
1334 				qp->dest = qp->curdest = nil; /* prevent accidents */
1335 				return rv;
1336 			}
1337 		}
1338 	}
1339 
1340 	/* if all servers returned failure, propagate it */
1341 	qp->dp->respcode = Rserver;
1342 	for(p = dest; p < qp->curdest; p++) {
1343 		destck(p);
1344 		if(p->code != Rserver)
1345 			qp->dp->respcode = 0;
1346 		p->magic = 0;			/* prevent accidents */
1347 	}
1348 
1349 //	if (qp->dp->respcode)
1350 //		dnslog("queryns setting Rserver for %s", qp->dp->name);
1351 
1352 	free(qp->dest);
1353 	qp->dest = qp->curdest = nil;		/* prevent accidents */
1354 	return 0;
1355 }
1356 
1357 /*
1358  *  run a command with a supplied fd as standard input
1359  */
1360 char *
1361 system(int fd, char *cmd)
1362 {
1363 	int pid, p, i;
1364 	static Waitmsg msg;
1365 
1366 	if((pid = fork()) == -1)
1367 		sysfatal("fork failed: %r");
1368 	else if(pid == 0){
1369 		dup(fd, 0);
1370 		close(fd);
1371 		for (i = 3; i < 200; i++)
1372 			close(i);		/* don't leak fds */
1373 		execl("/bin/rc", "rc", "-c", cmd, nil);
1374 		sysfatal("exec rc: %r");
1375 	}
1376 	for(p = waitpid(); p >= 0; p = waitpid())
1377 		if(p == pid)
1378 			return msg.msg;
1379 	return "lost child";
1380 }
1381 
1382 /* compute wait, weighted by probability of success, with minimum */
1383 static ulong
1384 weight(ulong ms, unsigned pcntprob)
1385 {
1386 	ulong wait;
1387 
1388 	wait = (ms * pcntprob) / 100;
1389 	if (wait < 1500)
1390 		wait = 1500;
1391 	return wait;
1392 }
1393 
1394 /*
1395  * in principle we could use a single descriptor for a udp port
1396  * to send all queries and receive all the answers to them,
1397  * but we'd have to sort out the answers by dns-query id.
1398  */
1399 static int
1400 udpquery(Query *qp, char *mntpt, int depth, int patient, int inns)
1401 {
1402 	int fd, rv;
1403 	long now;
1404 	ulong pcntprob, wait, reqtm;
1405 	char *msg;
1406 	uchar *obuf, *ibuf;
1407 	static QLock mntlck;
1408 	static ulong lastmount;
1409 
1410 	/* use alloced buffers rather than ones from the stack */
1411 	// ibuf = emalloc(Maxudpin+Udphdrsize);
1412 	ibuf = emalloc(64*1024);		/* max. tcp reply size */
1413 	obuf = emalloc(Maxudp+Udphdrsize);
1414 
1415 	fd = udpport(mntpt);
1416 	while (fd < 0 && cfg.straddle && strcmp(mntpt, "/net.alt") == 0) {
1417 		/* HACK: remount /net.alt */
1418 		now = time(nil);
1419 		if (now < lastmount + Remntretry)
1420 			sleep((lastmount + Remntretry - now)*1000);
1421 		qlock(&mntlck);
1422 		fd = udpport(mntpt);	/* try again under lock */
1423 		if (fd < 0) {
1424 			dnslog("[%d] remounting /net.alt", getpid());
1425 			unmount(nil, "/net.alt");
1426 
1427 			msg = system(open("/dev/null", ORDWR), "outside");
1428 
1429 			lastmount = time(nil);
1430 			if (msg && *msg) {
1431 				dnslog("[%d] can't remount /net.alt: %s",
1432 					getpid(), msg);
1433 				sleep(10*1000);		/* don't spin wildly */
1434 			} else
1435 				fd = udpport(mntpt);
1436 		}
1437 		qunlock(&mntlck);
1438 	}
1439 	if (fd < 0) {
1440 		dnslog("can't get udpport for %s query of name %s: %r",
1441 			mntpt, qp->dp->name);
1442 		sysfatal("out of udp conversations");	/* we're buggered */
1443 	}
1444 
1445 	/*
1446 	 * Our QIP servers are busted, don't answer AAAA and
1447 	 * take forever to answer CNAME if there isn't one.
1448 	 * They rarely set Rname.
1449 	 * make time-to-wait proportional to estimated probability of an
1450 	 * RR of that type existing.
1451 	 */
1452 	if (qp->type >= nelem(likely))
1453 		pcntprob = 35;			/* unpopular query type */
1454 	else
1455 		pcntprob = likely[qp->type];
1456 	reqtm = (patient? 2*Maxreqtm: Maxreqtm);
1457 	/* time for a single outgoing udp query */
1458 	wait = weight(S2MS(reqtm)/3, pcntprob);
1459 	qp->req->aborttime = time(nil) + MS2S(3*wait); /* for all udp queries */
1460 
1461 	qp->udpfd = fd;
1462 	rv = queryns(qp, depth, ibuf, obuf, MS2S(wait), inns);
1463 	close(fd);
1464 	qp->udpfd = -1;
1465 
1466 	free(obuf);
1467 	free(ibuf);
1468 	return rv;
1469 }
1470 
1471 /* look up (qp->dp->name,qp->type) rr in dns, via *nsrp with results in *reqp */
1472 static int
1473 netquery(Query *qp, int depth)
1474 {
1475 	int lock, rv, triedin, inname;
1476 //	char buf[32];
1477 	RR *rp;
1478 	DN *dp;
1479 	Querylck *qlp;
1480 	static int whined;
1481 
1482 	rv = 0;				/* pessimism */
1483 	if(depth > 12)			/* in a recursive loop? */
1484 		return 0;
1485 
1486 	slave(qp->req);
1487 	/*
1488 	 * slave might have forked.  if so, the parent process longjmped to
1489 	 * req->mret; we're usually the child slave, but if there are too
1490 	 * many children already, we're still the same process.
1491 	 */
1492 
1493 	/*
1494 	 * don't lock before call to slave so only children can block.
1495 	 * just lock at top-level invocation.
1496 	 */
1497 	lock = depth <= 1 && qp->req->isslave;
1498 	dp = qp->dp;		/* ensure that it doesn't change underfoot */
1499 	qlp = nil;
1500 	if(lock) {
1501 //		procsetname("query lock wait: %s %s from %s", dp->name,
1502 //			rrname(qp->type, buf, sizeof buf), qp->req->from);
1503 		/*
1504 		 * don't make concurrent queries for this name.
1505 		 * dozens of processes blocking here probably indicates
1506 		 * an error in our dns data that causes us to not
1507 		 * recognise a zone (area) as one of our own, thus
1508 		 * causing us to query other nameservers.
1509 		 */
1510 		qlp = &dp->querylck[qtype2lck(qp->type)];
1511 		qlock(qlp);
1512 		if (qlp->Ref.ref > 10) {
1513 			qunlock(qlp);
1514 			if (!whined) {
1515 				whined = 1;
1516 				dnslog("too many outstanding queries for %s;"
1517 					" dropping this one; no further logging"
1518 					" of drops", dp->name);
1519 			}
1520 			return 0;
1521 		}
1522 		++qlp->Ref.ref;
1523 		qunlock(qlp);
1524 	}
1525 	procsetname("netquery: %s", dp->name);
1526 
1527 	/* prepare server RR's for incremental lookup */
1528 	for(rp = qp->nsrp; rp; rp = rp->next)
1529 		rp->marker = 0;
1530 
1531 	triedin = 0;
1532 
1533 	/*
1534 	 * normal resolvers and servers will just use mntpt for all addresses,
1535 	 * even on the outside.  straddling servers will use mntpt (/net)
1536 	 * for inside addresses and /net.alt for outside addresses,
1537 	 * thus bypassing other inside nameservers.
1538 	 */
1539 	inname = insideaddr(dp->name);
1540 	if (!cfg.straddle || inname) {
1541 		rv = udpquery(qp, mntpt, depth, Hurry, (cfg.inside? Inns: Outns));
1542 		triedin = 1;
1543 	}
1544 
1545 	/*
1546 	 * if we're still looking, are inside, and have an outside domain,
1547 	 * try it on our outside interface, if any.
1548 	 */
1549 	if (rv == 0 && cfg.inside && !inname) {
1550 		if (triedin)
1551 			dnslog(
1552 	   "[%d] netquery: internal nameservers failed for %s; trying external",
1553 				getpid(), dp->name);
1554 
1555 		/* prepare server RR's for incremental lookup */
1556 		for(rp = qp->nsrp; rp; rp = rp->next)
1557 			rp->marker = 0;
1558 
1559 		rv = udpquery(qp, "/net.alt", depth, Patient, Outns);
1560 	}
1561 //	if (rv == 0)		/* could ask /net.alt/dns directly */
1562 //		askoutdns(dp, qp->type);
1563 
1564 	if(lock && qlp) {
1565 		qlock(qlp);
1566 		assert(qlp->Ref.ref > 0);
1567 		qunlock(qlp);
1568 		decref(qlp);
1569 	}
1570 	return rv;
1571 }
1572 
1573 int
1574 seerootns(void)
1575 {
1576 	int rv;
1577 	char root[] = "";
1578 	Request req;
1579 	Query *qp;
1580 
1581 	memset(&req, 0, sizeof req);
1582 	req.isslave = 1;
1583 	req.aborttime = now + Maxreqtm;
1584 	req.from = "internal";
1585 	qp = emalloc(sizeof *qp);
1586 	queryinit(qp, dnlookup(root, Cin, 1), Tns, &req);
1587 
1588 	qp->nsrp = dblookup(root, Cin, Tns, 0, 0);
1589 	rv = netquery(qp, 0);
1590 
1591 	rrfreelist(qp->nsrp);
1592 	querydestroy(qp);
1593 	free(qp);
1594 	return rv;
1595 }
1596