xref: /plan9/sys/src/cmd/ndb/dnresolve.c (revision 530fef6600a0fb31e4c0a6ecda1320e97bcd937c)
1 /*
2  * domain name resolvers, see rfcs 1035 and 1123
3  */
4 #include <u.h>
5 #include <libc.h>
6 #include <ip.h>
7 #include <bio.h>
8 #include <ndb.h>
9 #include "dns.h"
10 
11 #define NS2MS(ns) ((ns) / 1000000L)
12 #define S2MS(s)   ((s)  * 1000)
13 #define MS2S(ms)  ((ms) / 1000)
14 
15 typedef struct Dest Dest;
16 typedef struct Ipaddr Ipaddr;
17 typedef struct Query Query;
18 
19 enum
20 {
21 	Udp, Tcp,
22 	Maxdest=	24,	/* maximum destinations for a request message */
23 	Maxtrans=	3,	/* maximum transmissions to a server */
24 	Destmagic=	0xcafebabe,
25 	Querymagic=	0xdeadbeef,
26 };
27 enum { Hurry, Patient, };
28 enum { Outns, Inns, };
29 enum { Remntretry = 15, };	/* min. sec.s between remount attempts */
30 
31 struct Ipaddr {
32 	Ipaddr *next;
33 	uchar	ip[IPaddrlen];
34 };
35 
36 struct Dest
37 {
38 	uchar	a[IPaddrlen];	/* ip address */
39 	DN	*s;		/* name server */
40 	int	nx;		/* number of transmissions */
41 	int	code;		/* response code; used to clear dp->respcode */
42 
43 	ulong	magic;
44 };
45 
46 /*
47  * Query has a QLock in it, thus it can't be an automatic
48  * variable, since each process would see a separate copy
49  * of the lock on its stack.
50  */
51 struct Query {
52 	DN	*dp;		/* domain */
53 	ushort	type;		/* and type to look up */
54 	Request *req;
55 	RR	*nsrp;		/* name servers to consult */
56 
57 	/* dest must not be on the stack due to forking in slave() */
58 	Dest	*dest;		/* array of destinations */
59 	Dest	*curdest;	/* pointer to one of them */
60 	int	ndest;
61 
62 	int	udpfd;
63 
64 	QLock	tcplock;	/* only one tcp call at a time per query */
65 	int	tcpset;
66 	int	tcpfd;		/* if Tcp, read replies from here */
67 	int	tcpctlfd;
68 	uchar	tcpip[IPaddrlen];
69 
70 	ulong	magic;
71 };
72 
73 /* estimated % probability of such a record existing at all */
74 int likely[] = {
75 	[Ta]		95,
76 	[Taaaa]		10,
77 	[Tcname]	15,
78 	[Tmx]		60,
79 	[Tns]		90,
80 	[Tnull]		5,
81 	[Tptr]		35,
82 	[Tsoa]		90,
83 	[Tsrv]		60,
84 	[Ttxt]		15,
85 	[Tall]		95,
86 };
87 
88 static RR*	dnresolve1(char*, int, int, Request*, int, int);
89 static int	netquery(Query *, int);
90 
91 /*
92  * reading /proc/pid/args yields either "name args" or "name [display args]",
93  * so return only display args, if any.
94  */
95 static char *
96 procgetname(void)
97 {
98 	int fd, n;
99 	char *lp, *rp;
100 	char buf[256];
101 
102 	snprint(buf, sizeof buf, "#p/%d/args", getpid());
103 	if((fd = open(buf, OREAD)) < 0)
104 		return strdup("");
105 	*buf = '\0';
106 	n = read(fd, buf, sizeof buf-1);
107 	close(fd);
108 	if (n >= 0)
109 		buf[n] = '\0';
110 	if ((lp = strchr(buf, '[')) == nil ||
111 	    (rp = strrchr(buf, ']')) == nil)
112 		return strdup("");
113 	*rp = '\0';
114 	return strdup(lp+1);
115 }
116 
117 /*
118  *  lookup 'type' info for domain name 'name'.  If it doesn't exist, try
119  *  looking it up as a canonical name.
120  */
121 RR*
122 dnresolve(char *name, int class, int type, Request *req, RR **cn, int depth,
123 	int recurse, int rooted, int *status)
124 {
125 	RR *rp, *nrp, *drp;
126 	DN *dp;
127 	int loops;
128 	char *procname;
129 	char nname[Domlen];
130 
131 	if(status)
132 		*status = 0;
133 
134 	if(depth > 12)			/* in a recursive loop? */
135 		return nil;
136 
137 	procname = procgetname();
138 	/*
139 	 *  hack for systems that don't have resolve search
140 	 *  lists.  Just look up the simple name in the database.
141 	 */
142 	if(!rooted && strchr(name, '.') == nil){
143 		rp = nil;
144 		drp = domainlist(class);
145 		for(nrp = drp; rp == nil && nrp != nil; nrp = nrp->next){
146 			snprint(nname, sizeof nname, "%s.%s", name,
147 				nrp->ptr->name);
148 			rp = dnresolve(nname, class, type, req, cn, depth+1,
149 				recurse, rooted, status);
150 			lock(&dnlock);
151 			rrfreelist(rrremneg(&rp));
152 			unlock(&dnlock);
153 		}
154 		if(drp != nil)
155 			rrfreelist(drp);
156 		procsetname(procname);
157 		free(procname);
158 		return rp;
159 	}
160 
161 	/*
162 	 *  try the name directly
163 	 */
164 	rp = dnresolve1(name, class, type, req, depth, recurse);
165 	if(rp == nil) {
166 		/*
167 		 * try it as a canonical name if we weren't told
168 		 * that the name didn't exist
169 		 */
170 		dp = dnlookup(name, class, 0);
171 		if(type != Tptr && dp->respcode != Rname)
172 			for(loops = 0; rp == nil && loops < 32; loops++){
173 				rp = dnresolve1(name, class, Tcname, req,
174 					depth, recurse);
175 				if(rp == nil)
176 					break;
177 
178 				/* rp->host == nil shouldn't happen, but does */
179 				if(rp->negative || rp->host == nil){
180 					rrfreelist(rp);
181 					rp = nil;
182 					break;
183 				}
184 
185 				name = rp->host->name;
186 				lock(&dnlock);
187 				if(cn)
188 					rrcat(cn, rp);
189 				else
190 					rrfreelist(rp);
191 				unlock(&dnlock);
192 
193 				rp = dnresolve1(name, class, type, req,
194 					depth, recurse);
195 			}
196 
197 		/* distinction between not found and not good */
198 		if(rp == nil && status != nil && dp->respcode != 0)
199 			*status = dp->respcode;
200 	}
201 	procsetname(procname);
202 	free(procname);
203 	return randomize(rp);
204 }
205 
206 static void
207 queryinit(Query *qp, DN *dp, int type, Request *req)
208 {
209 	memset(qp, 0, sizeof *qp);
210 	qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
211 	qp->dp = dp;
212 	qp->type = type;
213 	if (qp->type != type)
214 		dnslog("queryinit: bogus type %d", type);
215 	qp->req = req;
216 	qp->nsrp = nil;
217 	qp->dest = qp->curdest = nil;
218 	qp->magic = Querymagic;
219 }
220 
221 static void
222 queryck(Query *qp)
223 {
224 	assert(qp);
225 	assert(qp->magic == Querymagic);
226 }
227 
228 static void
229 querydestroy(Query *qp)
230 {
231 	queryck(qp);
232 	/* leave udpfd open */
233 	if (qp->tcpfd > 0)
234 		close(qp->tcpfd);
235 	if (qp->tcpctlfd > 0) {
236 		hangup(qp->tcpctlfd);
237 		close(qp->tcpctlfd);
238 	}
239 	free(qp->dest);
240 	memset(qp, 0, sizeof *qp);	/* prevent accidents */
241 	qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
242 }
243 
244 static void
245 destinit(Dest *p)
246 {
247 	memset(p, 0, sizeof *p);
248 	p->magic = Destmagic;
249 }
250 
251 static void
252 destck(Dest *p)
253 {
254 	assert(p);
255 	assert(p->magic == Destmagic);
256 }
257 
258 static void
259 destdestroy(Dest *p)
260 {
261 	USED(p);
262 }
263 
264 /*
265  * if the response to a query hasn't arrived within 100 ms.,
266  * it's unlikely to arrive at all.  after 1 s., it's really unlikely.
267  * queries for missing RRs are likely to produce time-outs rather than
268  * negative responses, so cname and aaaa queries are likely to time out,
269  * thus we don't wait very long for them.
270  */
271 static void
272 notestats(vlong start, int tmout, int type)
273 {
274 	qlock(&stats);
275 	if (tmout) {
276 		stats.tmout++;
277 		if (type == Taaaa)
278 			stats.tmoutv6++;
279 		else if (type == Tcname)
280 			stats.tmoutcname++;
281 	} else {
282 		long wait10ths = NS2MS(nsec() - start) / 100;
283 
284 		if (wait10ths <= 0)
285 			stats.under10ths[0]++;
286 		else if (wait10ths >= nelem(stats.under10ths))
287 			stats.under10ths[nelem(stats.under10ths) - 1]++;
288 		else
289 			stats.under10ths[wait10ths]++;
290 	}
291 	qunlock(&stats);
292 }
293 
294 static void
295 noteinmem(void)
296 {
297 	qlock(&stats);
298 	stats.answinmem++;
299 	qunlock(&stats);
300 }
301 
302 static RR*
303 issuequery(Query *qp, char *name, int class, int depth, int recurse)
304 {
305 	char *cp;
306 	DN *nsdp;
307 	RR *rp, *nsrp, *dbnsrp;
308 
309 	/*
310 	 *  if we're running as just a resolver, query our
311 	 *  designated name servers
312 	 */
313 	if(cfg.resolver){
314 		nsrp = randomize(getdnsservers(class));
315 		if(nsrp != nil) {
316 			qp->nsrp = nsrp;
317 			if(netquery(qp, depth+1)){
318 				rrfreelist(nsrp);
319 				return rrlookup(qp->dp, qp->type, OKneg);
320 			}
321 			rrfreelist(nsrp);
322 		}
323 	}
324 
325 	/*
326  	 *  walk up the domain name looking for
327 	 *  a name server for the domain.
328 	 */
329 	for(cp = name; cp; cp = walkup(cp)){
330 		/*
331 		 *  if this is a local (served by us) domain,
332 		 *  return answer
333 		 */
334 		dbnsrp = randomize(dblookup(cp, class, Tns, 0, 0));
335 		if(dbnsrp && dbnsrp->local){
336 			rp = dblookup(name, class, qp->type, 1, dbnsrp->ttl);
337 			rrfreelist(dbnsrp);
338 			return rp;
339 		}
340 
341 		/*
342 		 *  if recursion isn't set, just accept local
343 		 *  entries
344 		 */
345 		if(recurse == Dontrecurse){
346 			if(dbnsrp)
347 				rrfreelist(dbnsrp);
348 			continue;
349 		}
350 
351 		/* look for ns in cache */
352 		nsdp = dnlookup(cp, class, 0);
353 		nsrp = nil;
354 		if(nsdp)
355 			nsrp = randomize(rrlookup(nsdp, Tns, NOneg));
356 
357 		/* if the entry timed out, ignore it */
358 		if(nsrp && nsrp->ttl < now){
359 			rrfreelist(nsrp);
360 			nsrp = nil;
361 		}
362 
363 		if(nsrp){
364 			rrfreelist(dbnsrp);
365 
366 			/* query the name servers found in cache */
367 			qp->nsrp = nsrp;
368 			if(netquery(qp, depth+1)){
369 				rrfreelist(nsrp);
370 				return rrlookup(qp->dp, qp->type, OKneg);
371 			}
372 			rrfreelist(nsrp);
373 			continue;
374 		}
375 
376 		/* use ns from db */
377 		if(dbnsrp){
378 			/* try the name servers found in db */
379 			qp->nsrp = dbnsrp;
380 			if(netquery(qp, depth+1)){
381 				/* we got an answer */
382 				rrfreelist(dbnsrp);
383 				return rrlookup(qp->dp, qp->type, NOneg);
384 			}
385 			rrfreelist(dbnsrp);
386 		}
387 	}
388 	return nil;
389 }
390 
391 static RR*
392 dnresolve1(char *name, int class, int type, Request *req, int depth,
393 	int recurse)
394 {
395 	Area *area;
396 	DN *dp;
397 	RR *rp;
398 	Query *qp;
399 
400 	if(debug)
401 		dnslog("[%d] dnresolve1 %s %d %d", getpid(), name, type, class);
402 
403 	/* only class Cin implemented so far */
404 	if(class != Cin)
405 		return nil;
406 
407 	dp = dnlookup(name, class, 1);
408 
409 	/*
410 	 *  Try the cache first
411 	 */
412 	rp = rrlookup(dp, type, OKneg);
413 	if(rp)
414 		if(rp->db){
415 			/* unauthoritative db entries are hints */
416 			if(rp->auth) {
417 				noteinmem();
418 				return rp;
419 			}
420 		} else
421 			/* cached entry must still be valid */
422 			if(rp->ttl > now)
423 				/* but Tall entries are special */
424 				if(type != Tall || rp->query == Tall) {
425 					noteinmem();
426 					return rp;
427 				}
428 	rrfreelist(rp);
429 	rp = nil;		/* accident prevention */
430 	USED(rp);
431 
432 	/*
433 	 * try the cache for a canonical name. if found punt
434 	 * since we'll find it during the canonical name search
435 	 * in dnresolve().
436 	 */
437 	if(type != Tcname){
438 		rp = rrlookup(dp, Tcname, NOneg);
439 		rrfreelist(rp);
440 		if(rp)
441 			return nil;
442 	}
443 
444 	/*
445 	 * if the domain name is within an area of ours,
446 	 * we should have found its data in memory by now.
447 	 */
448 	area = inmyarea(dp->name);
449 	if (area || strncmp(dp->name, "local#", 6) == 0) {
450 //		char buf[32];
451 
452 //		dnslog("%s %s: no data in area %s", dp->name,
453 //			rrname(type, buf, sizeof buf), area->soarr->owner->name);
454 		return nil;
455 	}
456 
457 	qp = emalloc(sizeof *qp);
458 	queryinit(qp, dp, type, req);
459 	rp = issuequery(qp, name, class, depth, recurse);
460 	querydestroy(qp);
461 	free(qp);
462 	if(rp)
463 		return rp;
464 
465 	/* settle for a non-authoritative answer */
466 	rp = rrlookup(dp, type, OKneg);
467 	if(rp)
468 		return rp;
469 
470 	/* noone answered.  try the database, we might have a chance. */
471 	return dblookup(name, class, type, 0, 0);
472 }
473 
474 /*
475  *  walk a domain name one element to the right.
476  *  return a pointer to that element.
477  *  in other words, return a pointer to the parent domain name.
478  */
479 char*
480 walkup(char *name)
481 {
482 	char *cp;
483 
484 	cp = strchr(name, '.');
485 	if(cp)
486 		return cp+1;
487 	else if(*name)
488 		return "";
489 	else
490 		return 0;
491 }
492 
493 /*
494  *  Get a udp port for sending requests and reading replies.  Put the port
495  *  into "headers" mode.
496  */
497 static char *hmsg = "headers";
498 
499 int
500 udpport(char *mtpt)
501 {
502 	int fd, ctl;
503 	char ds[64], adir[64];
504 
505 	/* get a udp port */
506 	snprint(ds, sizeof ds, "%s/udp!*!0", (mtpt? mtpt: "/net"));
507 	ctl = announce(ds, adir);
508 	if(ctl < 0){
509 		/* warning("can't get udp port"); */
510 		return -1;
511 	}
512 
513 	/* turn on header style interface */
514 	if(write(ctl, hmsg, strlen(hmsg)) != strlen(hmsg)){
515 		close(ctl);
516 		warning(hmsg);
517 		return -1;
518 	}
519 
520 	/* grab the data file */
521 	snprint(ds, sizeof ds, "%s/data", adir);
522 	fd = open(ds, ORDWR);
523 	close(ctl);
524 	if(fd < 0)
525 		warning("can't open udp port %s: %r", ds);
526 	return fd;
527 }
528 
529 /* generate a DNS UDP query packet */
530 int
531 mkreq(DN *dp, int type, uchar *buf, int flags, ushort reqno)
532 {
533 	DNSmsg m;
534 	int len;
535 	Udphdr *uh = (Udphdr*)buf;
536 
537 	/* stuff port number into output buffer */
538 	memset(uh, 0, sizeof *uh);
539 	hnputs(uh->rport, 53);
540 
541 	/* make request and convert it to output format */
542 	memset(&m, 0, sizeof m);
543 	m.flags = flags;
544 	m.id = reqno;
545 	m.qd = rralloc(type);
546 	m.qd->owner = dp;
547 	m.qd->type = type;
548 	if (m.qd->type != type)
549 		dnslog("mkreq: bogus type %d", type);
550 	len = convDNS2M(&m, &buf[Udphdrsize], Maxudp);
551 	rrfree(m.qd);
552 	memset(&m, 0, sizeof m);		/* cause trouble */
553 	return len;
554 }
555 
556 void
557 freeanswers(DNSmsg *mp)
558 {
559 	rrfreelist(mp->qd);
560 	rrfreelist(mp->an);
561 	rrfreelist(mp->ns);
562 	rrfreelist(mp->ar);
563 	mp->qd = mp->an = mp->ns = mp->ar = nil;
564 }
565 
566 /* sets srcip */
567 static int
568 readnet(Query *qp, int medium, uchar *ibuf, ulong endtime, uchar **replyp,
569 	uchar *srcip)
570 {
571 	int len, fd;
572 	long ms;
573 	vlong startns = nsec();
574 	uchar *reply;
575 	uchar lenbuf[2];
576 
577 	/* timed read of reply */
578 	ms = S2MS(endtime) - NS2MS(startns);
579 	if (ms < 2000)
580 		ms = 2000;	/* give the remote ns a fighting chance */
581 	reply = ibuf;
582 	len = -1;			/* pessimism */
583 	memset(srcip, 0, IPaddrlen);
584 	if (medium == Udp)
585 		if (qp->udpfd <= 0)
586 			dnslog("readnet: qp->udpfd closed");
587 		else {
588 			alarm(ms);
589 			len = read(qp->udpfd, ibuf, Udphdrsize+Maxudpin);
590 			alarm(0);
591 			notestats(startns, len < 0, qp->type);
592 			if (len >= IPaddrlen)
593 				memmove(srcip, ibuf, IPaddrlen);
594 			if (len >= Udphdrsize) {
595 				len   -= Udphdrsize;
596 				reply += Udphdrsize;
597 			}
598 		}
599 	else {
600 		if (!qp->tcpset)
601 			dnslog("readnet: tcp params not set");
602 		alarm(ms);
603 		fd = qp->tcpfd;
604 		if (fd <= 0)
605 			dnslog("readnet: %s: tcp fd unset for dest %I",
606 				qp->dp->name, qp->tcpip);
607 		else if (readn(fd, lenbuf, 2) != 2) {
608 			dnslog("readnet: short read of tcp size from %I",
609 				qp->tcpip);
610 			/* probably a time-out */
611 			notestats(startns, 1, qp->type);
612 		} else {
613 			len = lenbuf[0]<<8 | lenbuf[1];
614 			if (readn(fd, ibuf, len) != len) {
615 				dnslog("readnet: short read of tcp data from %I",
616 					qp->tcpip);
617 				/* probably a time-out */
618 				notestats(startns, 1, qp->type);
619 				len = -1;
620 			}
621 		}
622 		alarm(0);
623 		memmove(srcip, qp->tcpip, IPaddrlen);
624 	}
625 	*replyp = reply;
626 	return len;
627 }
628 
629 /*
630  *  read replies to a request and remember the rrs in the answer(s).
631  *  ignore any of the wrong type.
632  *  wait at most until endtime.
633  */
634 static int
635 readreply(Query *qp, int medium, ushort req, uchar *ibuf, DNSmsg *mp,
636 	ulong endtime)
637 {
638 	int len, rv;
639 	char *err;
640 	char tbuf[32];
641 	uchar *reply;
642 	uchar srcip[IPaddrlen];
643 	RR *rp;
644 
645 	queryck(qp);
646 	rv = 0;
647 	memset(mp, 0, sizeof *mp);
648 	if (time(nil) >= endtime)
649 		return -1;		/* timed out before we started */
650 
651 	memset(srcip, 0, sizeof srcip);
652 	if (0)
653 		len = -1;
654 	for (; time(nil) < endtime &&
655 	    (len = readnet(qp, medium, ibuf, endtime, &reply, srcip)) >= 0;
656 	    freeanswers(mp)){
657 		/* convert into internal format  */
658 		memset(mp, 0, sizeof *mp);
659 		err = convM2DNS(reply, len, mp, nil);
660 		if (mp->flags & Ftrunc) {
661 			free(err);
662 			freeanswers(mp);
663 			/* notify our caller to retry the query via tcp. */
664 			return -1;
665 		} else if(err){
666 			dnslog("readreply: %s: input err, len %d: %s: %I",
667 				qp->dp->name, len, err, srcip);
668 			free(err);
669 			continue;
670 		}
671 		if(debug)
672 			logreply(qp->req->id, srcip, mp);
673 
674 		/* answering the right question? */
675 		if(mp->id != req)
676 			dnslog("%d: id %d instead of %d: %I", qp->req->id,
677 				mp->id, req, srcip);
678 		else if(mp->qd == 0)
679 			dnslog("%d: no question RR: %I", qp->req->id, srcip);
680 		else if(mp->qd->owner != qp->dp)
681 			dnslog("%d: owner %s instead of %s: %I", qp->req->id,
682 				mp->qd->owner->name, qp->dp->name, srcip);
683 		else if(mp->qd->type != qp->type)
684 			dnslog("%d: qp->type %d instead of %d: %I",
685 				qp->req->id, mp->qd->type, qp->type, srcip);
686 		else {
687 			/* remember what request this is in answer to */
688 			for(rp = mp->an; rp; rp = rp->next)
689 				rp->query = qp->type;
690 			return rv;
691 		}
692 	}
693 	if (time(nil) >= endtime) {
694 		;				/* query expired */
695 	} else if (0) {
696 		/* this happens routinely when a read times out */
697 		dnslog("readreply: %s type %s: ns %I read error or eof "
698 			"(returned %d): %r", qp->dp->name, rrname(qp->type,
699 			tbuf, sizeof tbuf), srcip, len);
700 		if (medium == Udp)
701 			for (rp = qp->nsrp; rp != nil; rp = rp->next)
702 				if (rp->type == Tns)
703 					dnslog("readreply: %s: query sent to "
704 						"ns %s", qp->dp->name,
705 						rp->host->name);
706 	}
707 	return -1;
708 }
709 
710 /*
711  *	return non-0 if first list includes second list
712  */
713 int
714 contains(RR *rp1, RR *rp2)
715 {
716 	RR *trp1, *trp2;
717 
718 	for(trp2 = rp2; trp2; trp2 = trp2->next){
719 		for(trp1 = rp1; trp1; trp1 = trp1->next)
720 			if(trp1->type == trp2->type)
721 			if(trp1->host == trp2->host)
722 			if(trp1->owner == trp2->owner)
723 				break;
724 		if(trp1 == nil)
725 			return 0;
726 	}
727 	return 1;
728 }
729 
730 
731 /*
732  *  return multicast version if any
733  */
734 int
735 ipisbm(uchar *ip)
736 {
737 	if(isv4(ip)){
738 		if (ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0 ||
739 		    ipcmp(ip, IPv4bcast) == 0)
740 			return 4;
741 	} else
742 		if(ip[0] == 0xff)
743 			return 6;
744 	return 0;
745 }
746 
747 /*
748  *  Get next server address
749  */
750 static int
751 serveraddrs(Query *qp, int nd, int depth)
752 {
753 	RR *rp, *arp, *trp;
754 	Dest *cur;
755 
756 	if(nd >= Maxdest)
757 		return 0;
758 
759 	/*
760 	 *  look for a server whose address we already know.
761 	 *  if we find one, mark it so we ignore this on
762 	 *  subsequent passes.
763 	 */
764 	arp = 0;
765 	for(rp = qp->nsrp; rp; rp = rp->next){
766 		assert(rp->magic == RRmagic);
767 		if(rp->marker)
768 			continue;
769 		arp = rrlookup(rp->host, Ta, NOneg);
770 		if(arp){
771 			rp->marker = 1;
772 			break;
773 		}
774 		arp = dblookup(rp->host->name, Cin, Ta, 0, 0);
775 		if(arp){
776 			rp->marker = 1;
777 			break;
778 		}
779 	}
780 
781 	/*
782 	 *  if the cache and database lookup didn't find any new
783 	 *  server addresses, try resolving one via the network.
784 	 *  Mark any we try to resolve so we don't try a second time.
785 	 */
786 	if(arp == 0)
787 		for(rp = qp->nsrp; rp; rp = rp->next){
788 			if(rp->marker)
789 				continue;
790 			rp->marker = 1;
791 
792 			/*
793 			 *  avoid loops looking up a server under itself
794 			 */
795 			if(subsume(rp->owner->name, rp->host->name))
796 				continue;
797 
798 			arp = dnresolve(rp->host->name, Cin, Ta, qp->req, 0,
799 				depth+1, Recurse, 1, 0);
800 			lock(&dnlock);
801 			rrfreelist(rrremneg(&arp));
802 			unlock(&dnlock);
803 			if(arp)
804 				break;
805 		}
806 
807 	/* use any addresses that we found */
808 	for(trp = arp; trp && nd < Maxdest; trp = trp->next){
809 		cur = &qp->dest[nd];
810 		parseip(cur->a, trp->ip->name);
811 		/*
812 		 * straddling servers can reject all nameservers if they are all
813 		 * inside, so be sure to list at least one outside ns at
814 		 * the end of the ns list in /lib/ndb for `dom='.
815 		 */
816 		if (ipisbm(cur->a) ||
817 		    cfg.straddle && !insideaddr(qp->dp->name) && insidens(cur->a))
818 			continue;
819 		cur->nx = 0;
820 		cur->s = trp->owner;
821 		cur->code = Rtimeout;
822 		nd++;
823 	}
824 	rrfreelist(arp);
825 	return nd;
826 }
827 
828 /*
829  *  cache negative responses
830  */
831 static void
832 cacheneg(DN *dp, int type, int rcode, RR *soarr)
833 {
834 	RR *rp;
835 	DN *soaowner;
836 	ulong ttl;
837 
838 	stats.negcached++;
839 
840 	/* no cache time specified, don't make anything up */
841 	if(soarr != nil){
842 		if(soarr->next != nil){
843 			rrfreelist(soarr->next);
844 			soarr->next = nil;
845 		}
846 		soaowner = soarr->owner;
847 	} else
848 		soaowner = nil;
849 
850 	/* the attach can cause soarr to be freed so mine it now */
851 	if(soarr != nil && soarr->soa != nil)
852 		ttl = soarr->soa->minttl+now;
853 	else
854 		ttl = 5*Min;
855 
856 	/* add soa and negative RR to the database */
857 	rrattach(soarr, Authoritative);
858 
859 	rp = rralloc(type);
860 	rp->owner = dp;
861 	rp->negative = 1;
862 	rp->negsoaowner = soaowner;
863 	rp->negrcode = rcode;
864 	rp->ttl = ttl;
865 	rrattach(rp, Authoritative);
866 }
867 
868 static int
869 setdestoutns(Dest *p, int n)
870 {
871 	uchar *outns = outsidens(n);
872 
873 	destck(p);
874 	destinit(p);
875 	if (outns == nil) {
876 		if (n == 0)
877 			dnslog("[%d] no outside-ns in ndb", getpid());
878 		return -1;
879 	}
880 	memmove(p->a, outns, sizeof p->a);
881 	p->s = dnlookup("outside-ns-ips", Cin, 1);
882 	return 0;
883 }
884 
885 /*
886  * issue query via UDP or TCP as appropriate.
887  * for TCP, returns with qp->tcpip set from udppkt header.
888  */
889 static int
890 mydnsquery(Query *qp, int medium, uchar *udppkt, int len)
891 {
892 	int rv = -1, nfd;
893 	char *domain;
894 	char conndir[40];
895 	uchar belen[2];
896 	NetConnInfo *nci;
897 
898 	queryck(qp);
899 	domain = smprint("%I", udppkt);
900 	if (myaddr(domain)) {
901 		dnslog("mydnsquery: trying to send to myself (%s); bzzzt",
902 			domain);
903 		free(domain);
904 		return rv;
905 	}
906 
907 	switch (medium) {
908 	case Udp:
909 		free(domain);
910 		nfd = dup(qp->udpfd, -1);
911 		if (nfd < 0) {
912 			warning("mydnsquery: qp->udpfd %d: %r", qp->udpfd);
913 			close(qp->udpfd);	/* ensure it's closed */
914 			qp->udpfd = -1;		/* poison it */
915 			return rv;
916 		}
917 		close(nfd);
918 
919 		if (qp->udpfd <= 0)
920 			dnslog("mydnsquery: qp->udpfd %d closed", qp->udpfd);
921 		else {
922 			if (write(qp->udpfd, udppkt, len+Udphdrsize) !=
923 			    len+Udphdrsize)
924 				warning("sending udp msg: %r");
925 			else {
926 				stats.qsent++;
927 				rv = 0;
928 			}
929 		}
930 		break;
931 	case Tcp:
932 		/* send via TCP & keep fd around for reply */
933 		alarm(10*1000);
934 		qp->tcpfd = rv = dial(netmkaddr(domain, "tcp", "dns"), nil,
935 			conndir, &qp->tcpctlfd);
936 		alarm(0);
937 		if (qp->tcpfd < 0) {
938 			dnslog("can't dial tcp!%s!dns: %r", domain);
939 			free(domain);
940 			break;
941 		}
942 		free(domain);
943 		nci = getnetconninfo(conndir, qp->tcpfd);
944 		if (nci) {
945 			parseip(qp->tcpip, nci->rsys);
946 			freenetconninfo(nci);
947 		} else
948 			dnslog("mydnsquery: getnetconninfo failed");
949 		qp->tcpset = 1;
950 
951 		belen[0] = len >> 8;
952 		belen[1] = len;
953 		if (write(qp->tcpfd, belen, 2) != 2 ||
954 		    write(qp->tcpfd, udppkt + Udphdrsize, len) != len)
955 			warning("sending tcp msg: %r");
956 		break;
957 	default:
958 		sysfatal("mydnsquery: bad medium");
959 	}
960 	return rv;
961 }
962 
963 /*
964  * send query to all UDP destinations or one TCP destination,
965  * taken from obuf (udp packet) header
966  */
967 static int
968 xmitquery(Query *qp, int medium, int depth, uchar *obuf, int inns, int len)
969 {
970 	int j, n;
971 	char buf[32];
972 	Dest *p;
973 
974 	queryck(qp);
975 	if(time(nil) >= qp->req->aborttime)
976 		return -1;
977 
978 	/*
979 	 * get a nameserver address if we need one.
980 	 * serveraddrs populates qp->dest.
981 	 */
982 	p = qp->dest;
983 	destck(p);
984 	if (qp->ndest < 0 || qp->ndest > Maxdest)
985 		dnslog("qp->ndest %d out of range", qp->ndest);
986 	if (qp->ndest > qp->curdest - p)
987 		qp->curdest = &qp->dest[serveraddrs(qp, qp->curdest - p, depth)];
988 	destck(qp->curdest);
989 
990 	/* no servers, punt */
991 	if (qp->curdest == qp->dest)
992 		if (cfg.straddle && cfg.inside) {
993 			/* get ips of "outside-ns-ips" */
994 			p = qp->curdest = qp->dest;
995 			for(n = 0; n < Maxdest; n++, qp->curdest++)
996 				if (setdestoutns(qp->curdest, n) < 0)
997 					break;
998 		} else {
999 			/* it's probably just a bogus domain, don't log it */
1000 			// dnslog("xmitquery: %s: no nameservers", qp->dp->name);
1001 			return -1;
1002 		}
1003 
1004 	/* send to first 'qp->ndest' destinations */
1005 	j = 0;
1006 	if (medium == Tcp) {
1007 		j++;
1008 		queryck(qp);
1009 		assert(qp->dp);
1010 		procsetname("tcp %sside query for %s %s", (inns? "in": "out"),
1011 			qp->dp->name, rrname(qp->type, buf, sizeof buf));
1012 		mydnsquery(qp, medium, obuf, len); /* sets qp->tcpip from obuf */
1013 		if(debug)
1014 			logsend(qp->req->id, depth, qp->tcpip, "", qp->dp->name,
1015 				qp->type);
1016 	} else
1017 		for(; p < &qp->dest[qp->ndest] && p < qp->curdest; p++){
1018 			/* skip destinations we've finished with */
1019 			if(p->nx >= Maxtrans)
1020 				continue;
1021 
1022 			j++;
1023 
1024 			/* exponential backoff of requests */
1025 			if((1<<p->nx) > qp->ndest)
1026 				continue;
1027 
1028 			procsetname("udp %sside query to %I/%s %s %s",
1029 				(inns? "in": "out"), p->a, p->s->name,
1030 				qp->dp->name, rrname(qp->type, buf, sizeof buf));
1031 			if(debug)
1032 				logsend(qp->req->id, depth, p->a, p->s->name,
1033 					qp->dp->name, qp->type);
1034 
1035 			/* fill in UDP destination addr & send it */
1036 			memmove(obuf, p->a, sizeof p->a);
1037 			mydnsquery(qp, medium, obuf, len);
1038 			p->nx++;
1039 		}
1040 	if(j == 0) {
1041 		// dnslog("xmitquery: %s: no destinations left", qp->dp->name);
1042 		return -1;
1043 	}
1044 	return 0;
1045 }
1046 
1047 static int lckindex[Maxlcks] = {
1048 	0,			/* all others map here */
1049 	Ta,
1050 	Tns,
1051 	Tcname,
1052 	Tsoa,
1053 	Tptr,
1054 	Tmx,
1055 	Ttxt,
1056 	Taaaa,
1057 };
1058 
1059 static int
1060 qtype2lck(int qtype)		/* map query type to querylck index */
1061 {
1062 	int i;
1063 
1064 	for (i = 1; i < nelem(lckindex); i++)
1065 		if (lckindex[i] == qtype)
1066 			return i;
1067 	return 0;
1068 }
1069 
1070 /* is mp a cachable negative response (with Rname set)? */
1071 static int
1072 isnegrname(DNSmsg *mp)
1073 {
1074 	/* TODO: could add || cfg.justforw to RHS of && */
1075 	return mp->an == nil && (mp->flags & Rmask) == Rname;
1076 }
1077 
1078 static int
1079 procansw(Query *qp, DNSmsg *mp, uchar *srcip, int depth, Dest *p)
1080 {
1081 	int rv;
1082 //	int lcktype;
1083 	char buf[32];
1084 	DN *ndp;
1085 	Query *nqp;
1086 	RR *tp, *soarr;
1087 
1088 	if (mp->an == nil)
1089 		stats.negans++;
1090 
1091 	/* ignore any error replies */
1092 	if((mp->flags & Rmask) == Rserver){
1093 		stats.negserver++;
1094 		freeanswers(mp);
1095 		if(p != qp->curdest)
1096 			p->code = Rserver;
1097 		return -1;
1098 	}
1099 
1100 	/* ignore any bad delegations */
1101 	if(mp->ns && baddelegation(mp->ns, qp->nsrp, srcip)){
1102 		stats.negbaddeleg++;
1103 		if(mp->an == nil){
1104 			stats.negbdnoans++;
1105 			freeanswers(mp);
1106 			if(p != qp->curdest)
1107 				p->code = Rserver;
1108 			return -1;
1109 		}
1110 		rrfreelist(mp->ns);
1111 		mp->ns = nil;
1112 	}
1113 
1114 	/* remove any soa's from the authority section */
1115 	lock(&dnlock);
1116 	soarr = rrremtype(&mp->ns, Tsoa);
1117 
1118 	/* incorporate answers */
1119 	unique(mp->an);
1120 	unique(mp->ns);
1121 	unique(mp->ar);
1122 	unlock(&dnlock);
1123 	if(mp->an)
1124 		rrattach(mp->an, (mp->flags & Fauth) != 0);
1125 	if(mp->ar)
1126 		rrattach(mp->ar, Notauthoritative);
1127 	if(mp->ns && !cfg.justforw){
1128 		ndp = mp->ns->owner;
1129 		rrattach(mp->ns, Notauthoritative);
1130 	} else {
1131 		ndp = nil;
1132 		rrfreelist(mp->ns);
1133 		mp->ns = nil;
1134 	}
1135 
1136 	/* free the question */
1137 	if(mp->qd) {
1138 		rrfreelist(mp->qd);
1139 		mp->qd = nil;
1140 	}
1141 
1142 	/*
1143 	 *  Any reply from an authoritative server,
1144 	 *  or a positive reply terminates the search.
1145 	 *  A negative response now also terminates the search.
1146 	 */
1147 	if(mp->an != nil || (mp->flags & Fauth)){
1148 		if(isnegrname(mp))
1149 			qp->dp->respcode = Rname;
1150 		else
1151 			qp->dp->respcode = 0;
1152 
1153 		/*
1154 		 *  cache any negative responses, free soarr.
1155 		 *  negative responses need not be authoritative:
1156 		 *  they can legitimately come from a cache.
1157 		 */
1158 		if( /* (mp->flags & Fauth) && */ mp->an == nil)
1159 			cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
1160 		else
1161 			rrfreelist(soarr);
1162 		return 1;
1163 	} else if (isnegrname(mp)) {
1164 		qp->dp->respcode = Rname;
1165 		/*
1166 		 *  cache negative response.
1167 		 *  negative responses need not be authoritative:
1168 		 *  they can legitimately come from a cache.
1169 		 */
1170 		cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
1171 		return 1;
1172 	}
1173 	stats.negnorname++;
1174 	rrfreelist(soarr);
1175 
1176 	/*
1177 	 *  if we've been given better name servers, recurse.
1178 	 *  if we're a pure resolver, don't recurse, we have
1179 	 *  to forward to a fixed set of named servers.
1180 	 */
1181 	if(!mp->ns || cfg.resolver && cfg.justforw)
1182 		return 0;
1183 	tp = rrlookup(ndp, Tns, NOneg);
1184 	if(contains(qp->nsrp, tp)){
1185 		rrfreelist(tp);
1186 		return 0;
1187 	}
1188 	procsetname("recursive query for %s %s", qp->dp->name,
1189 		rrname(qp->type, buf, sizeof buf));
1190 	/*
1191 	 *  we're called from udpquery, called from
1192 	 *  netquery, which current holds qp->dp->querylck,
1193 	 *  so release it now and acquire it upon return.
1194 	 */
1195 //	lcktype = qtype2lck(qp->type);
1196 //	qunlock(&qp->dp->querylck[lcktype]);
1197 
1198 	nqp = emalloc(sizeof *nqp);
1199 	queryinit(nqp, qp->dp, qp->type, qp->req);
1200 	nqp->nsrp = tp;
1201 	rv = netquery(nqp, depth+1);
1202 
1203 //	qlock(&qp->dp->querylck[lcktype]);
1204 	rrfreelist(nqp->nsrp);
1205 	querydestroy(nqp);
1206 	free(nqp);
1207 	return rv;
1208 }
1209 
1210 /*
1211  * send a query via tcp to a single address (from ibuf's udp header)
1212  * and read the answer(s) into mp->an.
1213  */
1214 static int
1215 tcpquery(Query *qp, DNSmsg *mp, int depth, uchar *ibuf, uchar *obuf, int len,
1216 	int waitsecs, int inns, ushort req)
1217 {
1218 	int rv = 0;
1219 	ulong endtime;
1220 
1221 	endtime = time(nil) + waitsecs;
1222 	if(endtime > qp->req->aborttime)
1223 		endtime = qp->req->aborttime;
1224 
1225 	if (0)
1226 		dnslog("%s: udp reply truncated; retrying query via tcp to %I",
1227 			qp->dp->name, qp->tcpip);
1228 
1229 	qlock(&qp->tcplock);
1230 	memmove(obuf, ibuf, IPaddrlen);		/* send back to respondent */
1231 	/* sets qp->tcpip from obuf's udp header */
1232 	if (xmitquery(qp, Tcp, depth, obuf, inns, len) < 0 ||
1233 	    readreply(qp, Tcp, req, ibuf, mp, endtime) < 0)
1234 		rv = -1;
1235 	if (qp->tcpfd > 0) {
1236 		hangup(qp->tcpctlfd);
1237 		close(qp->tcpctlfd);
1238 		close(qp->tcpfd);
1239 	}
1240 	qp->tcpfd = qp->tcpctlfd = -1;
1241 	qunlock(&qp->tcplock);
1242 	return rv;
1243 }
1244 
1245 /*
1246  *  query name servers.  If the name server returns a pointer to another
1247  *  name server, recurse.
1248  */
1249 static int
1250 queryns(Query *qp, int depth, uchar *ibuf, uchar *obuf, int waitsecs, int inns)
1251 {
1252 	int ndest, len, replywaits, rv;
1253 	ushort req;
1254 	ulong endtime;
1255 	char buf[12];
1256 	uchar srcip[IPaddrlen];
1257 	Dest *p, *np, *dest;
1258 //	Dest dest[Maxdest];
1259 
1260 	/* pack request into a udp message */
1261 	req = rand();
1262 	len = mkreq(qp->dp, qp->type, obuf, Frecurse|Oquery, req);
1263 
1264 	/* no server addresses yet */
1265 	queryck(qp);
1266 	dest = emalloc(Maxdest * sizeof *dest);	/* dest can't be on stack */
1267 	for (p = dest; p < dest + Maxdest; p++)
1268 		destinit(p);
1269 	/* this dest array is local to this call of queryns() */
1270 	free(qp->dest);
1271 	qp->curdest = qp->dest = dest;
1272 
1273 	/*
1274 	 *  transmit udp requests and wait for answers.
1275 	 *  at most Maxtrans attempts to each address.
1276 	 *  each cycle send one more message than the previous.
1277 	 *  retry a query via tcp if its response is truncated.
1278 	 */
1279 	for(ndest = 1; ndest < Maxdest; ndest++){
1280 		qp->ndest = ndest;
1281 		qp->tcpset = 0;
1282 		if (xmitquery(qp, Udp, depth, obuf, inns, len) < 0)
1283 			break;
1284 
1285 		endtime = time(nil) + waitsecs;
1286 		if(endtime > qp->req->aborttime)
1287 			endtime = qp->req->aborttime;
1288 
1289 		for(replywaits = 0; replywaits < ndest; replywaits++){
1290 			DNSmsg m;
1291 
1292 			procsetname("reading %sside reply from %I: %s %s from %s",
1293 				(inns? "in": "out"), obuf, qp->dp->name,
1294 				rrname(qp->type, buf, sizeof buf), qp->req->from);
1295 
1296 			/* read udp answer into m */
1297 			if (readreply(qp, Udp, req, ibuf, &m, endtime) >= 0)
1298 				memmove(srcip, ibuf, IPaddrlen);
1299 			else if (!(m.flags & Ftrunc)) {
1300 				freeanswers(&m);
1301 				break;		/* timed out on this dest */
1302 			} else {
1303 				/* whoops, it was truncated! ask again via tcp */
1304 				freeanswers(&m);
1305 				rv = tcpquery(qp, &m, depth, ibuf, obuf, len,
1306 					waitsecs, inns, req);  /* answer in m */
1307 				if (rv < 0) {
1308 					freeanswers(&m);
1309 					break;		/* failed via tcp too */
1310 				}
1311 				memmove(srcip, qp->tcpip, IPaddrlen);
1312 			}
1313 
1314 			/* find responder */
1315 			// dnslog("queryns got reply from %I", srcip);
1316 			for(p = qp->dest; p < qp->curdest; p++)
1317 				if(memcmp(p->a, srcip, sizeof p->a) == 0)
1318 					break;
1319 
1320 			/* remove all addrs of responding server from list */
1321 			for(np = qp->dest; np < qp->curdest; np++)
1322 				if(np->s == p->s)
1323 					p->nx = Maxtrans;
1324 
1325 			/* free or incorporate RRs in m */
1326 			rv = procansw(qp, &m, srcip, depth, p);
1327 			if (rv > 0) {
1328 				free(qp->dest);
1329 				qp->dest = qp->curdest = nil; /* prevent accidents */
1330 				return rv;
1331 			}
1332 		}
1333 	}
1334 
1335 	/* if all servers returned failure, propagate it */
1336 	qp->dp->respcode = Rserver;
1337 	for(p = dest; p < qp->curdest; p++) {
1338 		destck(p);
1339 		if(p->code != Rserver)
1340 			qp->dp->respcode = 0;
1341 		p->magic = 0;			/* prevent accidents */
1342 	}
1343 
1344 //	if (qp->dp->respcode)
1345 //		dnslog("queryns setting Rserver for %s", qp->dp->name);
1346 
1347 	free(qp->dest);
1348 	qp->dest = qp->curdest = nil;		/* prevent accidents */
1349 	return 0;
1350 }
1351 
1352 /*
1353  *  run a command with a supplied fd as standard input
1354  */
1355 char *
1356 system(int fd, char *cmd)
1357 {
1358 	int pid, p, i;
1359 	static Waitmsg msg;
1360 
1361 	if((pid = fork()) == -1)
1362 		sysfatal("fork failed: %r");
1363 	else if(pid == 0){
1364 		dup(fd, 0);
1365 		close(fd);
1366 		for (i = 3; i < 200; i++)
1367 			close(i);		/* don't leak fds */
1368 		execl("/bin/rc", "rc", "-c", cmd, nil);
1369 		sysfatal("exec rc: %r");
1370 	}
1371 	for(p = waitpid(); p >= 0; p = waitpid())
1372 		if(p == pid)
1373 			return msg.msg;
1374 	return "lost child";
1375 }
1376 
1377 /* compute wait, weighted by probability of success, with minimum */
1378 static ulong
1379 weight(ulong ms, unsigned pcntprob)
1380 {
1381 	ulong wait;
1382 
1383 	wait = (ms * pcntprob) / 100;
1384 	if (wait < 1500)
1385 		wait = 1500;
1386 	return wait;
1387 }
1388 
1389 /*
1390  * in principle we could use a single descriptor for a udp port
1391  * to send all queries and receive all the answers to them,
1392  * but we'd have to sort out the answers by dns-query id.
1393  */
1394 static int
1395 udpquery(Query *qp, char *mntpt, int depth, int patient, int inns)
1396 {
1397 	int fd, rv;
1398 	long now;
1399 	ulong pcntprob, wait, reqtm;
1400 	char *msg;
1401 	uchar *obuf, *ibuf;
1402 	static QLock mntlck;
1403 	static ulong lastmount;
1404 
1405 	/* use alloced buffers rather than ones from the stack */
1406 	// ibuf = emalloc(Maxudpin+Udphdrsize);
1407 	ibuf = emalloc(64*1024);		/* max. tcp reply size */
1408 	obuf = emalloc(Maxudp+Udphdrsize);
1409 
1410 	fd = udpport(mntpt);
1411 	while (fd < 0 && cfg.straddle && strcmp(mntpt, "/net.alt") == 0) {
1412 		/* HACK: remount /net.alt */
1413 		now = time(nil);
1414 		if (now < lastmount + Remntretry)
1415 			sleep((lastmount + Remntretry - now)*1000);
1416 		qlock(&mntlck);
1417 		fd = udpport(mntpt);	/* try again under lock */
1418 		if (fd < 0) {
1419 			dnslog("[%d] remounting /net.alt", getpid());
1420 			unmount(nil, "/net.alt");
1421 
1422 			msg = system(open("/dev/null", ORDWR), "outside");
1423 
1424 			lastmount = time(nil);
1425 			if (msg && *msg) {
1426 				dnslog("[%d] can't remount /net.alt: %s",
1427 					getpid(), msg);
1428 				sleep(10*1000);		/* don't spin wildly */
1429 			} else
1430 				fd = udpport(mntpt);
1431 		}
1432 		qunlock(&mntlck);
1433 	}
1434 	if (fd < 0) {
1435 		dnslog("can't get udpport for %s query of name %s: %r",
1436 			mntpt, qp->dp->name);
1437 		sysfatal("out of udp conversations");	/* we're buggered */
1438 	}
1439 
1440 	/*
1441 	 * Our QIP servers are busted, don't answer AAAA and
1442 	 * take forever to answer CNAME if there isn't one.
1443 	 * They rarely set Rname.
1444 	 * make time-to-wait proportional to estimated probability of an
1445 	 * RR of that type existing.
1446 	 */
1447 	if (qp->type >= nelem(likely))
1448 		pcntprob = 35;			/* unpopular query type */
1449 	else
1450 		pcntprob = likely[qp->type];
1451 	reqtm = (patient? 2*Maxreqtm: Maxreqtm);
1452 	/* time for a single outgoing udp query */
1453 	wait = weight(S2MS(reqtm)/3, pcntprob);
1454 	qp->req->aborttime = time(nil) + MS2S(3*wait); /* for all udp queries */
1455 
1456 	qp->udpfd = fd;
1457 	rv = queryns(qp, depth, ibuf, obuf, MS2S(wait), inns);
1458 	close(fd);
1459 	qp->udpfd = -1;
1460 
1461 	free(obuf);
1462 	free(ibuf);
1463 	return rv;
1464 }
1465 
1466 /* look up (qp->dp->name,qp->type) rr in dns, via *nsrp with results in *reqp */
1467 static int
1468 netquery(Query *qp, int depth)
1469 {
1470 	int lock, rv, triedin, inname;
1471 //	char buf[32];
1472 	RR *rp;
1473 	DN *dp;
1474 	Querylck *qlp;
1475 	static int whined;
1476 
1477 	rv = 0;				/* pessimism */
1478 	if(depth > 12)			/* in a recursive loop? */
1479 		return 0;
1480 
1481 	slave(qp->req);
1482 	/*
1483 	 * slave might have forked.  if so, the parent process longjmped to
1484 	 * req->mret; we're usually the child slave, but if there are too
1485 	 * many children already, we're still the same process.
1486 	 */
1487 
1488 	/*
1489 	 * don't lock before call to slave so only children can block.
1490 	 * just lock at top-level invocation.
1491 	 */
1492 	lock = depth <= 1 && qp->req->isslave;
1493 	dp = qp->dp;		/* ensure that it doesn't change underfoot */
1494 	qlp = nil;
1495 	if(lock) {
1496 //		procsetname("query lock wait: %s %s from %s", dp->name,
1497 //			rrname(qp->type, buf, sizeof buf), qp->req->from);
1498 		/*
1499 		 * don't make concurrent queries for this name.
1500 		 * dozens of processes blocking here probably indicates
1501 		 * an error in our dns data that causes us to not
1502 		 * recognise a zone (area) as one of our own, thus
1503 		 * causing us to query other nameservers.
1504 		 */
1505 		qlp = &dp->querylck[qtype2lck(qp->type)];
1506 		qlock(qlp);
1507 		if (qlp->Ref.ref > 10) {
1508 			qunlock(qlp);
1509 			if (!whined) {
1510 				whined = 1;
1511 				dnslog("too many outstanding queries for %s;"
1512 					" dropping this one; no further logging"
1513 					" of drops", dp->name);
1514 			}
1515 			return 0;
1516 		}
1517 		++qlp->Ref.ref;
1518 		qunlock(qlp);
1519 	}
1520 	procsetname("netquery: %s", dp->name);
1521 
1522 	/* prepare server RR's for incremental lookup */
1523 	for(rp = qp->nsrp; rp; rp = rp->next)
1524 		rp->marker = 0;
1525 
1526 	triedin = 0;
1527 
1528 	/*
1529 	 * normal resolvers and servers will just use mntpt for all addresses,
1530 	 * even on the outside.  straddling servers will use mntpt (/net)
1531 	 * for inside addresses and /net.alt for outside addresses,
1532 	 * thus bypassing other inside nameservers.
1533 	 */
1534 	inname = insideaddr(dp->name);
1535 	if (!cfg.straddle || inname) {
1536 		rv = udpquery(qp, mntpt, depth, Hurry, (cfg.inside? Inns: Outns));
1537 		triedin = 1;
1538 	}
1539 
1540 	/*
1541 	 * if we're still looking, are inside, and have an outside domain,
1542 	 * try it on our outside interface, if any.
1543 	 */
1544 	if (rv == 0 && cfg.inside && !inname) {
1545 		if (triedin)
1546 			dnslog(
1547 	   "[%d] netquery: internal nameservers failed for %s; trying external",
1548 				getpid(), dp->name);
1549 
1550 		/* prepare server RR's for incremental lookup */
1551 		for(rp = qp->nsrp; rp; rp = rp->next)
1552 			rp->marker = 0;
1553 
1554 		rv = udpquery(qp, "/net.alt", depth, Patient, Outns);
1555 	}
1556 //	if (rv == 0)		/* could ask /net.alt/dns directly */
1557 //		askoutdns(dp, qp->type);
1558 
1559 	if(lock && qlp) {
1560 		qlock(qlp);
1561 		assert(qlp->Ref.ref > 0);
1562 		qunlock(qlp);
1563 		decref(qlp);
1564 	}
1565 	return rv;
1566 }
1567 
1568 int
1569 seerootns(void)
1570 {
1571 	int rv;
1572 	char root[] = "";
1573 	Request req;
1574 	Query *qp;
1575 
1576 	memset(&req, 0, sizeof req);
1577 	req.isslave = 1;
1578 	req.aborttime = now + Maxreqtm;
1579 	req.from = "internal";
1580 	qp = emalloc(sizeof *qp);
1581 	queryinit(qp, dnlookup(root, Cin, 1), Tns, &req);
1582 
1583 	qp->nsrp = dblookup(root, Cin, Tns, 0, 0);
1584 	rv = netquery(qp, 0);
1585 
1586 	rrfreelist(qp->nsrp);
1587 	querydestroy(qp);
1588 	free(qp);
1589 	return rv;
1590 }
1591