xref: /plan9-contrib/sys/src/cmd/ndb/dnresolve.c (revision cc499a30a6f1302a973149384e954d8486f8a058)
1 /*
2  * domain name resolvers, see rfcs 1035 and 1123
3  */
4 #include <u.h>
5 #include <libc.h>
6 #include <ip.h>
7 #include <bio.h>
8 #include <ndb.h>
9 #include "dns.h"
10 
11 typedef struct Dest Dest;
12 typedef struct Ipaddr Ipaddr;
13 typedef struct Query Query;
14 
15 enum
16 {
17 	Udp, Tcp,
18 
19 	Maxdest=	24,	/* maximum destinations for a request message */
20 	Maxtrans=	3,	/* maximum transmissions to a server */
21 	Maxretries=	3, /* cname+actual resends: was 32; have pity on user */
22 	Maxwaitms=	1000,	/* wait no longer for a remote dns query */
23 	Minwaitms=	100,	/* willing to wait for a remote dns query */
24 	Remntretry=	15,	/* min. sec.s between /net.alt remount tries */
25 	Maxoutstanding=	15,	/* max. outstanding queries per domain name */
26 
27 	Destmagic=	0xcafebabe,
28 	Querymagic=	0xdeadbeef,
29 };
30 enum { Hurry, Patient, };
31 enum { Outns, Inns, };
32 
33 struct Ipaddr {
34 	Ipaddr *next;
35 	uchar	ip[IPaddrlen];
36 };
37 
38 struct Dest
39 {
40 	uchar	a[IPaddrlen];	/* ip address */
41 	DN	*s;		/* name server */
42 	int	nx;		/* number of transmissions */
43 	int	code;		/* response code; used to clear dp->respcode */
44 
45 	ulong	magic;
46 };
47 
48 /*
49  * Query has a QLock in it, thus it can't be an automatic
50  * variable, since each process would see a separate copy
51  * of the lock on its stack.
52  */
53 struct Query {
54 	DN	*dp;		/* domain */
55 	ushort	type;		/* and type to look up */
56 	Request *req;
57 	RR	*nsrp;		/* name servers to consult */
58 
59 	/* dest must not be on the stack due to forking in slave() */
60 	Dest	*dest;		/* array of destinations */
61 	Dest	*curdest;	/* pointer to one of them */
62 	int	ndest;
63 
64 	int	udpfd;
65 
66 	QLock	tcplock;	/* only one tcp call at a time per query */
67 	int	tcpset;
68 	int	tcpfd;		/* if Tcp, read replies from here */
69 	int	tcpctlfd;
70 	uchar	tcpip[IPaddrlen];
71 
72 	ulong	magic;
73 };
74 
75 /* estimated % probability of such a record existing at all */
76 int likely[] = {
77 	[Ta]		95,
78 	[Taaaa]		10,
79 	[Tcname]	15,
80 	[Tmx]		60,
81 	[Tns]		90,
82 	[Tnull]		5,
83 	[Tptr]		35,
84 	[Tsoa]		90,
85 	[Tsrv]		60,
86 	[Ttxt]		15,
87 	[Tall]		95,
88 };
89 
90 static RR*	dnresolve1(char*, int, int, Request*, int, int);
91 static int	netquery(Query *, int);
92 
93 /*
94  * reading /proc/pid/args yields either "name args" or "name [display args]",
95  * so return only display args, if any.
96  */
97 static char *
98 procgetname(void)
99 {
100 	int fd, n;
101 	char *lp, *rp;
102 	char buf[256];
103 
104 	snprint(buf, sizeof buf, "#p/%d/args", getpid());
105 	if((fd = open(buf, OREAD)) < 0)
106 		return strdup("");
107 	*buf = '\0';
108 	n = read(fd, buf, sizeof buf-1);
109 	close(fd);
110 	if (n >= 0)
111 		buf[n] = '\0';
112 	if ((lp = strchr(buf, '[')) == nil ||
113 	    (rp = strrchr(buf, ']')) == nil)
114 		return strdup("");
115 	*rp = '\0';
116 	return strdup(lp+1);
117 }
118 
119 /*
120  *  lookup 'type' info for domain name 'name'.  If it doesn't exist, try
121  *  looking it up as a canonical name.
122  *
123  *  this process can be quite slow if time-outs are set too high when querying
124  *  nameservers that just don't respond to certain query types.  in that case,
125  *  there will be multiple udp retries, multiple nameservers will be queried,
126  *  and this will be repeated for a cname query.  the whole thing will be
127  *  retried several times until we get an answer or a time-out.
128  */
129 RR*
130 dnresolve(char *name, int class, int type, Request *req, RR **cn, int depth,
131 	int recurse, int rooted, int *status)
132 {
133 	RR *rp, *nrp, *drp;
134 	DN *dp;
135 	int loops;
136 	char *procname;
137 	char nname[Domlen];
138 
139 	if(status)
140 		*status = 0;
141 
142 	if(depth > 12)			/* in a recursive loop? */
143 		return nil;
144 
145 	procname = procgetname();
146 	/*
147 	 *  hack for systems that don't have resolve search
148 	 *  lists.  Just look up the simple name in the database.
149 	 */
150 	if(!rooted && strchr(name, '.') == nil){
151 		rp = nil;
152 		drp = domainlist(class);
153 		for(nrp = drp; rp == nil && nrp != nil; nrp = nrp->next){
154 			snprint(nname, sizeof nname, "%s.%s", name,
155 				nrp->ptr->name);
156 			rp = dnresolve(nname, class, type, req, cn, depth+1,
157 				recurse, rooted, status);
158 			lock(&dnlock);
159 			rrfreelist(rrremneg(&rp));
160 			unlock(&dnlock);
161 		}
162 		if(drp != nil)
163 			rrfreelist(drp);
164 		procsetname(procname);
165 		free(procname);
166 		return rp;
167 	}
168 
169 	/*
170 	 *  try the name directly
171 	 */
172 	rp = dnresolve1(name, class, type, req, depth, recurse);
173 	if(rp == nil) {
174 		/*
175 		 * try it as a canonical name if we weren't told
176 		 * that the name didn't exist
177 		 */
178 		dp = dnlookup(name, class, 0);
179 		if(type != Tptr && dp->respcode != Rname)
180 			for(loops = 0; rp == nil && loops < Maxretries; loops++){
181 				/* retry cname, then the actual type */
182 				rp = dnresolve1(name, class, Tcname, req,
183 					depth, recurse);
184 				if(rp == nil)
185 					break;
186 
187 				/* rp->host == nil shouldn't happen, but does */
188 				if(rp->negative || rp->host == nil){
189 					rrfreelist(rp);
190 					rp = nil;
191 					break;
192 				}
193 
194 				name = rp->host->name;
195 				lock(&dnlock);
196 				if(cn)
197 					rrcat(cn, rp);
198 				else
199 					rrfreelist(rp);
200 				unlock(&dnlock);
201 
202 				rp = dnresolve1(name, class, type, req,
203 					depth, recurse);
204 			}
205 
206 		/* distinction between not found and not good */
207 		if(rp == nil && status != nil && dp->respcode != Rok)
208 			*status = dp->respcode;
209 	}
210 	procsetname(procname);
211 	free(procname);
212 	return randomize(rp);
213 }
214 
215 static void
216 queryinit(Query *qp, DN *dp, int type, Request *req)
217 {
218 	memset(qp, 0, sizeof *qp);
219 	qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
220 	qp->dp = dp;
221 	qp->type = type;
222 	if (qp->type != type)
223 		dnslog("queryinit: bogus type %d", type);
224 	qp->req = req;
225 	qp->nsrp = nil;
226 	qp->dest = qp->curdest = nil;
227 	qp->magic = Querymagic;
228 }
229 
230 static void
231 queryck(Query *qp)
232 {
233 	assert(qp);
234 	assert(qp->magic == Querymagic);
235 }
236 
237 static void
238 querydestroy(Query *qp)
239 {
240 	queryck(qp);
241 	/* leave udpfd open */
242 	if (qp->tcpfd > 0)
243 		close(qp->tcpfd);
244 	if (qp->tcpctlfd > 0) {
245 		hangup(qp->tcpctlfd);
246 		close(qp->tcpctlfd);
247 	}
248 	free(qp->dest);
249 	memset(qp, 0, sizeof *qp);	/* prevent accidents */
250 	qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
251 }
252 
253 static void
254 destinit(Dest *p)
255 {
256 	memset(p, 0, sizeof *p);
257 	p->magic = Destmagic;
258 }
259 
260 static void
261 destck(Dest *p)
262 {
263 	assert(p);
264 	assert(p->magic == Destmagic);
265 }
266 
267 static void
268 destdestroy(Dest *p)
269 {
270 	USED(p);
271 }
272 
273 /*
274  * if the response to a query hasn't arrived within 100 ms.,
275  * it's unlikely to arrive at all.  after 1 s., it's really unlikely.
276  * queries for missing RRs are likely to produce time-outs rather than
277  * negative responses, so cname and aaaa queries are likely to time out,
278  * thus we don't wait very long for them.
279  */
280 static void
281 notestats(vlong start, int tmout, int type)
282 {
283 	qlock(&stats);
284 	if (tmout) {
285 		stats.tmout++;
286 		if (type == Taaaa)
287 			stats.tmoutv6++;
288 		else if (type == Tcname)
289 			stats.tmoutcname++;
290 	} else {
291 		long wait10ths = NS2MS(nsec() - start) / 100;
292 
293 		if (wait10ths <= 0)
294 			stats.under10ths[0]++;
295 		else if (wait10ths >= nelem(stats.under10ths))
296 			stats.under10ths[nelem(stats.under10ths) - 1]++;
297 		else
298 			stats.under10ths[wait10ths]++;
299 	}
300 	qunlock(&stats);
301 }
302 
303 static void
304 noteinmem(void)
305 {
306 	qlock(&stats);
307 	stats.answinmem++;
308 	qunlock(&stats);
309 }
310 
311 static RR*
312 issuequery(Query *qp, char *name, int class, int depth, int recurse)
313 {
314 	char *cp;
315 	DN *nsdp;
316 	RR *rp, *nsrp, *dbnsrp;
317 
318 	/*
319 	 *  if we're running as just a resolver, query our
320 	 *  designated name servers
321 	 */
322 	if(cfg.resolver){
323 		nsrp = randomize(getdnsservers(class));
324 		if(nsrp != nil) {
325 			qp->nsrp = nsrp;
326 			if(netquery(qp, depth+1)){
327 				rrfreelist(nsrp);
328 				return rrlookup(qp->dp, qp->type, OKneg);
329 			}
330 			rrfreelist(nsrp);
331 		}
332 	}
333 
334 	/*
335  	 *  walk up the domain name looking for
336 	 *  a name server for the domain.
337 	 */
338 	for(cp = name; cp; cp = walkup(cp)){
339 		/*
340 		 *  if this is a local (served by us) domain,
341 		 *  return answer
342 		 */
343 		dbnsrp = randomize(dblookup(cp, class, Tns, 0, 0));
344 		if(dbnsrp && dbnsrp->local){
345 			rp = dblookup(name, class, qp->type, 1, dbnsrp->ttl);
346 			rrfreelist(dbnsrp);
347 			return rp;
348 		}
349 
350 		/*
351 		 *  if recursion isn't set, just accept local
352 		 *  entries
353 		 */
354 		if(recurse == Dontrecurse){
355 			if(dbnsrp)
356 				rrfreelist(dbnsrp);
357 			continue;
358 		}
359 
360 		/* look for ns in cache */
361 		nsdp = dnlookup(cp, class, 0);
362 		nsrp = nil;
363 		if(nsdp)
364 			nsrp = randomize(rrlookup(nsdp, Tns, NOneg));
365 
366 		/* if the entry timed out, ignore it */
367 		if(nsrp && nsrp->ttl < now){
368 			rrfreelist(nsrp);
369 			nsrp = nil;
370 		}
371 
372 		if(nsrp){
373 			rrfreelist(dbnsrp);
374 
375 			/* query the name servers found in cache */
376 			qp->nsrp = nsrp;
377 			if(netquery(qp, depth+1)){
378 				rrfreelist(nsrp);
379 				return rrlookup(qp->dp, qp->type, OKneg);
380 			}
381 			rrfreelist(nsrp);
382 			continue;
383 		}
384 
385 		/* use ns from db */
386 		if(dbnsrp){
387 			/* try the name servers found in db */
388 			qp->nsrp = dbnsrp;
389 			if(netquery(qp, depth+1)){
390 				/* we got an answer */
391 				rrfreelist(dbnsrp);
392 				return rrlookup(qp->dp, qp->type, NOneg);
393 			}
394 			rrfreelist(dbnsrp);
395 		}
396 	}
397 	return nil;
398 }
399 
400 static RR*
401 dnresolve1(char *name, int class, int type, Request *req, int depth,
402 	int recurse)
403 {
404 	Area *area;
405 	DN *dp;
406 	RR *rp;
407 	Query *qp;
408 
409 	if(debug)
410 		dnslog("[%d] dnresolve1 %s %d %d", getpid(), name, type, class);
411 
412 	/* only class Cin implemented so far */
413 	if(class != Cin)
414 		return nil;
415 
416 	dp = dnlookup(name, class, 1);
417 
418 	/*
419 	 *  Try the cache first
420 	 */
421 	rp = rrlookup(dp, type, OKneg);
422 	if(rp)
423 		if(rp->db){
424 			/* unauthoritative db entries are hints */
425 			if(rp->auth) {
426 				noteinmem();
427 				if(debug)
428 					dnslog("[%d] dnresolve1 %s %d %d: auth rr in db",
429 						getpid(), name, type, class);
430 				return rp;
431 			}
432 		} else
433 			/* cached entry must still be valid */
434 			if(rp->ttl > now)
435 				/* but Tall entries are special */
436 				if(type != Tall || rp->query == Tall) {
437 					noteinmem();
438 					if(debug)
439 						dnslog("[%d] dnresolve1 %s %d %d: rr not in db",
440 							getpid(), name, type, class);
441 					return rp;
442 				}
443 	rrfreelist(rp);
444 	rp = nil;		/* accident prevention */
445 	USED(rp);
446 
447 	/*
448 	 * try the cache for a canonical name. if found punt
449 	 * since we'll find it during the canonical name search
450 	 * in dnresolve().
451 	 */
452 	if(type != Tcname){
453 		rp = rrlookup(dp, Tcname, NOneg);
454 		rrfreelist(rp);
455 		if(rp){
456 			if(debug)
457 				dnslog("[%d] dnresolve1 %s %d %d: rr from rrlookup for non-cname",
458 					getpid(), name, type, class);
459 			return nil;
460 		}
461 	}
462 
463 	/*
464 	 * if the domain name is within an area of ours,
465 	 * we should have found its data in memory by now.
466 	 */
467 	area = inmyarea(dp->name);
468 	if (area || strncmp(dp->name, "local#", 6) == 0) {
469 //		char buf[32];
470 
471 //		dnslog("%s %s: no data in area %s", dp->name,
472 //			rrname(type, buf, sizeof buf), area->soarr->owner->name);
473 		return nil;
474 	}
475 
476 	qp = emalloc(sizeof *qp);
477 	queryinit(qp, dp, type, req);
478 	rp = issuequery(qp, name, class, depth, recurse);
479 	querydestroy(qp);
480 	free(qp);
481 	if(rp){
482 		if(debug)
483 			dnslog("[%d] dnresolve1 %s %d %d: rr from query",
484 				getpid(), name, type, class);
485 		return rp;
486 	}
487 
488 	/* settle for a non-authoritative answer */
489 	rp = rrlookup(dp, type, OKneg);
490 	if(rp){
491 		if(debug)
492 			dnslog("[%d] dnresolve1 %s %d %d: rr from rrlookup",
493 				getpid(), name, type, class);
494 		return rp;
495 	}
496 
497 	/* noone answered.  try the database, we might have a chance. */
498 	rp = dblookup(name, class, type, 0, 0);
499 	if (rp) {
500 		if(debug)
501 			dnslog("[%d] dnresolve1 %s %d %d: rr from dblookup",
502 				getpid(), name, type, class);
503 	}else{
504 		if(debug)
505 			dnslog("[%d] dnresolve1 %s %d %d: no rr from dblookup; crapped out",
506 				getpid(), name, type, class);
507 	}
508 	return rp;
509 }
510 
511 /*
512  *  walk a domain name one element to the right.
513  *  return a pointer to that element.
514  *  in other words, return a pointer to the parent domain name.
515  */
516 char*
517 walkup(char *name)
518 {
519 	char *cp;
520 
521 	cp = strchr(name, '.');
522 	if(cp)
523 		return cp+1;
524 	else if(*name)
525 		return "";
526 	else
527 		return 0;
528 }
529 
530 /*
531  *  Get a udp port for sending requests and reading replies.  Put the port
532  *  into "headers" mode.
533  */
534 static char *hmsg = "headers";
535 
536 int
537 udpport(char *mtpt)
538 {
539 	int fd, ctl;
540 	char ds[64], adir[64];
541 
542 	/* get a udp port */
543 	snprint(ds, sizeof ds, "%s/udp!*!0", (mtpt? mtpt: "/net"));
544 	ctl = announce(ds, adir);
545 	if(ctl < 0){
546 		/* warning("can't get udp port"); */
547 		return -1;
548 	}
549 
550 	/* turn on header style interface */
551 	if(write(ctl, hmsg, strlen(hmsg)) != strlen(hmsg)){
552 		close(ctl);
553 		warning(hmsg);
554 		return -1;
555 	}
556 
557 	/* grab the data file */
558 	snprint(ds, sizeof ds, "%s/data", adir);
559 	fd = open(ds, ORDWR);
560 	close(ctl);
561 	if(fd < 0)
562 		warning("can't open udp port %s: %r", ds);
563 	return fd;
564 }
565 
566 void
567 initdnsmsg(DNSmsg *mp, RR *rp, int flags, ushort reqno)
568 {
569 	mp->flags = flags;
570 	mp->id = reqno;
571 	mp->qd = rp;
572 }
573 
574 DNSmsg *
575 newdnsmsg(RR *rp, int flags, ushort reqno)
576 {
577 	DNSmsg *mp;
578 
579 	mp = emalloc(sizeof *mp);
580 	initdnsmsg(mp, rp, flags, reqno);
581 	return mp;
582 }
583 
584 /* generate a DNS UDP query packet */
585 int
586 mkreq(DN *dp, int type, uchar *buf, int flags, ushort reqno)
587 {
588 	DNSmsg m;
589 	int len;
590 	Udphdr *uh = (Udphdr*)buf;
591 	RR *rp;
592 
593 	/* stuff port number into output buffer */
594 	memset(uh, 0, sizeof *uh);
595 	hnputs(uh->rport, 53);
596 
597 	/* make request and convert it to output format */
598 	memset(&m, 0, sizeof m);
599 	rp = rralloc(type);
600 	rp->owner = dp;
601 	initdnsmsg(&m, rp, flags, reqno);
602 	len = convDNS2M(&m, &buf[Udphdrsize], Maxudp);
603 	rrfree(m.qd);
604 	memset(&m, 0, sizeof m);		/* cause trouble */
605 	return len;
606 }
607 
608 void
609 freeanswers(DNSmsg *mp)
610 {
611 	rrfreelist(mp->qd);
612 	rrfreelist(mp->an);
613 	rrfreelist(mp->ns);
614 	rrfreelist(mp->ar);
615 	mp->qd = mp->an = mp->ns = mp->ar = nil;
616 }
617 
618 /* timed read of reply.  sets srcip */
619 static int
620 readnet(Query *qp, int medium, uchar *ibuf, uvlong endms, uchar **replyp,
621 	uchar *srcip)
622 {
623 	int len, fd;
624 	long ms;
625 	vlong startns = nsec();
626 	uchar *reply;
627 	uchar lenbuf[2];
628 
629 	len = -1;			/* pessimism */
630 	ms = endms - NS2MS(startns);
631 	if (ms <= 0)
632 		return -1;		/* taking too long */
633 
634 	reply = ibuf;
635 	memset(srcip, 0, IPaddrlen);
636 	alarm(ms);
637 	if (medium == Udp)
638 		if (qp->udpfd <= 0)
639 			dnslog("readnet: qp->udpfd closed");
640 		else {
641 			len = read(qp->udpfd, ibuf, Udphdrsize+Maxudpin);
642 			alarm(0);
643 			notestats(startns, len < 0, qp->type);
644 			if (len >= IPaddrlen)
645 				memmove(srcip, ibuf, IPaddrlen);
646 			if (len >= Udphdrsize) {
647 				len   -= Udphdrsize;
648 				reply += Udphdrsize;
649 			}
650 		}
651 	else {
652 		if (!qp->tcpset)
653 			dnslog("readnet: tcp params not set");
654 		fd = qp->tcpfd;
655 		if (fd <= 0)
656 			dnslog("readnet: %s: tcp fd unset for dest %I",
657 				qp->dp->name, qp->tcpip);
658 		else if (readn(fd, lenbuf, 2) != 2) {
659 			dnslog("readnet: short read of tcp size from %I",
660 				qp->tcpip);
661 			/* probably a time-out */
662 			notestats(startns, 1, qp->type);
663 		} else {
664 			len = lenbuf[0]<<8 | lenbuf[1];
665 			if (readn(fd, ibuf, len) != len) {
666 				dnslog("readnet: short read of tcp data from %I",
667 					qp->tcpip);
668 				/* probably a time-out */
669 				notestats(startns, 1, qp->type);
670 				len = -1;
671 			}
672 		}
673 		memmove(srcip, qp->tcpip, IPaddrlen);
674 	}
675 	alarm(0);
676 	*replyp = reply;
677 	return len;
678 }
679 
680 /*
681  *  read replies to a request and remember the rrs in the answer(s).
682  *  ignore any of the wrong type.
683  *  wait at most until endms.
684  */
685 static int
686 readreply(Query *qp, int medium, ushort req, uchar *ibuf, DNSmsg *mp,
687 	uvlong endms)
688 {
689 	int len;
690 	char *err;
691 	char tbuf[32];
692 	uchar *reply;
693 	uchar srcip[IPaddrlen];
694 	RR *rp;
695 
696 	queryck(qp);
697 	memset(mp, 0, sizeof *mp);
698 	memset(srcip, 0, sizeof srcip);
699 	if (0)
700 		len = -1;
701 	for (; timems() < endms &&
702 	    (len = readnet(qp, medium, ibuf, endms, &reply, srcip)) >= 0;
703 	    freeanswers(mp)){
704 		/* convert into internal format  */
705 		memset(mp, 0, sizeof *mp);
706 		err = convM2DNS(reply, len, mp, nil);
707 		if (mp->flags & Ftrunc) {
708 			free(err);
709 			freeanswers(mp);
710 			/* notify our caller to retry the query via tcp. */
711 			return -1;
712 		} else if(err){
713 			dnslog("readreply: %s: input err, len %d: %s: %I",
714 				qp->dp->name, len, err, srcip);
715 			free(err);
716 			continue;
717 		}
718 		if(debug)
719 			logreply(qp->req->id, srcip, mp);
720 
721 		/* answering the right question? */
722 		if(mp->id != req)
723 			dnslog("%d: id %d instead of %d: %I", qp->req->id,
724 				mp->id, req, srcip);
725 		else if(mp->qd == 0)
726 			dnslog("%d: no question RR: %I", qp->req->id, srcip);
727 		else if(mp->qd->owner != qp->dp)
728 			dnslog("%d: owner %s instead of %s: %I", qp->req->id,
729 				mp->qd->owner->name, qp->dp->name, srcip);
730 		else if(mp->qd->type != qp->type)
731 			dnslog("%d: qp->type %d instead of %d: %I",
732 				qp->req->id, mp->qd->type, qp->type, srcip);
733 		else {
734 			/* remember what request this is in answer to */
735 			for(rp = mp->an; rp; rp = rp->next)
736 				rp->query = qp->type;
737 			return 0;
738 		}
739 	}
740 	if (timems() >= endms) {
741 		;				/* query expired */
742 	} else if (0) {
743 		/* this happens routinely when a read times out */
744 		dnslog("readreply: %s type %s: ns %I read error or eof "
745 			"(returned %d): %r", qp->dp->name, rrname(qp->type,
746 			tbuf, sizeof tbuf), srcip, len);
747 		if (medium == Udp)
748 			for (rp = qp->nsrp; rp != nil; rp = rp->next)
749 				if (rp->type == Tns)
750 					dnslog("readreply: %s: query sent to "
751 						"ns %s", qp->dp->name,
752 						rp->host->name);
753 	}
754 	return -1;
755 }
756 
757 /*
758  *	return non-0 if first list includes second list
759  */
760 int
761 contains(RR *rp1, RR *rp2)
762 {
763 	RR *trp1, *trp2;
764 
765 	for(trp2 = rp2; trp2; trp2 = trp2->next){
766 		for(trp1 = rp1; trp1; trp1 = trp1->next)
767 			if(trp1->type == trp2->type)
768 			if(trp1->host == trp2->host)
769 			if(trp1->owner == trp2->owner)
770 				break;
771 		if(trp1 == nil)
772 			return 0;
773 	}
774 	return 1;
775 }
776 
777 
778 /*
779  *  return multicast version if any
780  */
781 int
782 ipisbm(uchar *ip)
783 {
784 	if(isv4(ip)){
785 		if (ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0 ||
786 		    ipcmp(ip, IPv4bcast) == 0)
787 			return 4;
788 	} else
789 		if(ip[0] == 0xff)
790 			return 6;
791 	return 0;
792 }
793 
794 /*
795  *  Get next server address
796  */
797 static int
798 serveraddrs(Query *qp, int nd, int depth)
799 {
800 	RR *rp, *arp, *trp;
801 	Dest *cur;
802 
803 	if(nd >= Maxdest)
804 		return 0;
805 
806 	/*
807 	 *  look for a server whose address we already know.
808 	 *  if we find one, mark it so we ignore this on
809 	 *  subsequent passes.
810 	 */
811 	arp = 0;
812 	for(rp = qp->nsrp; rp; rp = rp->next){
813 		assert(rp->magic == RRmagic);
814 		if(rp->marker)
815 			continue;
816 		arp = rrlookup(rp->host, Ta, NOneg);
817 		if(arp == nil)
818 			arp = rrlookup(rp->host, Taaaa, NOneg);
819 		if(arp){
820 			rp->marker = 1;
821 			break;
822 		}
823 		arp = dblookup(rp->host->name, Cin, Ta, 0, 0);
824 		if(arp == nil)
825 			arp = dblookup(rp->host->name, Cin, Taaaa, 0, 0);
826 		if(arp){
827 			rp->marker = 1;
828 			break;
829 		}
830 	}
831 
832 	/*
833 	 *  if the cache and database lookup didn't find any new
834 	 *  server addresses, try resolving one via the network.
835 	 *  Mark any we try to resolve so we don't try a second time.
836 	 */
837 	if(arp == 0)
838 		for(rp = qp->nsrp; rp; rp = rp->next){
839 			if(rp->marker)
840 				continue;
841 			rp->marker = 1;
842 
843 			/*
844 			 *  avoid loops looking up a server under itself
845 			 */
846 			if(subsume(rp->owner->name, rp->host->name))
847 				continue;
848 
849 			arp = dnresolve(rp->host->name, Cin, Ta, qp->req, 0,
850 				depth+1, Recurse, 1, 0);
851 			if(arp == nil)
852 				arp = dnresolve(rp->host->name, Cin, Taaaa,
853 					qp->req, 0, depth+1, Recurse, 1, 0);
854 			lock(&dnlock);
855 			rrfreelist(rrremneg(&arp));
856 			unlock(&dnlock);
857 			if(arp)
858 				break;
859 		}
860 
861 	/* use any addresses that we found */
862 	for(trp = arp; trp && nd < Maxdest; trp = trp->next){
863 		cur = &qp->dest[nd];
864 		parseip(cur->a, trp->ip->name);
865 		/*
866 		 * straddling servers can reject all nameservers if they are all
867 		 * inside, so be sure to list at least one outside ns at
868 		 * the end of the ns list in /lib/ndb for `dom='.
869 		 */
870 		if (ipisbm(cur->a) ||
871 		    cfg.straddle && !insideaddr(qp->dp->name) && insidens(cur->a))
872 			continue;
873 		cur->nx = 0;
874 		cur->s = trp->owner;
875 		cur->code = Rtimeout;
876 		nd++;
877 	}
878 	rrfreelist(arp);
879 	return nd;
880 }
881 
882 /*
883  *  cache negative responses
884  */
885 static void
886 cacheneg(DN *dp, int type, int rcode, RR *soarr)
887 {
888 	RR *rp;
889 	DN *soaowner;
890 	ulong ttl;
891 
892 	stats.negcached++;
893 
894 	/* no cache time specified, don't make anything up */
895 	if(soarr != nil){
896 		if(soarr->next != nil){
897 			rrfreelist(soarr->next);
898 			soarr->next = nil;
899 		}
900 		soaowner = soarr->owner;
901 	} else
902 		soaowner = nil;
903 
904 	/* the attach can cause soarr to be freed so mine it now */
905 	if(soarr != nil && soarr->soa != nil)
906 		ttl = soarr->soa->minttl+now;
907 	else
908 		ttl = 5*Min;
909 
910 	/* add soa and negative RR to the database */
911 	rrattach(soarr, Authoritative);
912 
913 	rp = rralloc(type);
914 	rp->owner = dp;
915 	rp->negative = 1;
916 	rp->negsoaowner = soaowner;
917 	rp->negrcode = rcode;
918 	rp->ttl = ttl;
919 	rrattach(rp, Authoritative);
920 }
921 
922 static int
923 setdestoutns(Dest *p, int n)
924 {
925 	uchar *outns = outsidens(n);
926 
927 	destck(p);
928 	destinit(p);
929 	if (outns == nil) {
930 		if (n == 0)
931 			dnslog("[%d] no outside-ns in ndb", getpid());
932 		return -1;
933 	}
934 	memmove(p->a, outns, sizeof p->a);
935 	p->s = dnlookup("outside-ns-ips", Cin, 1);
936 	return 0;
937 }
938 
939 /*
940  * issue query via UDP or TCP as appropriate.
941  * for TCP, returns with qp->tcpip set from udppkt header.
942  */
943 static int
944 mydnsquery(Query *qp, int medium, uchar *udppkt, int len)
945 {
946 	int rv = -1, nfd;
947 	char *domain;
948 	char conndir[40], net[40];
949 	uchar belen[2];
950 	NetConnInfo *nci;
951 
952 	queryck(qp);
953 	domain = smprint("%I", udppkt);
954 	if (myaddr(domain)) {
955 		dnslog("mydnsquery: trying to send to myself (%s); bzzzt",
956 			domain);
957 		free(domain);
958 		return rv;
959 	}
960 
961 	switch (medium) {
962 	case Udp:
963 		free(domain);
964 		nfd = dup(qp->udpfd, -1);
965 		if (nfd < 0) {
966 			warning("mydnsquery: qp->udpfd %d: %r", qp->udpfd);
967 			close(qp->udpfd);	/* ensure it's closed */
968 			qp->udpfd = -1;		/* poison it */
969 			return rv;
970 		}
971 		close(nfd);
972 
973 		if (qp->udpfd <= 0)
974 			dnslog("mydnsquery: qp->udpfd %d closed", qp->udpfd);
975 		else {
976 			if (write(qp->udpfd, udppkt, len+Udphdrsize) !=
977 			    len+Udphdrsize)
978 				warning("sending udp msg: %r");
979 			else {
980 				stats.qsent++;
981 				rv = 0;
982 			}
983 		}
984 		break;
985 	case Tcp:
986 		/* send via TCP & keep fd around for reply */
987 		snprint(net, sizeof net, "%s/tcp",
988 			(mntpt[0] != '\0'? mntpt: "/net"));
989 		alarm(10*1000);
990 		qp->tcpfd = rv = dial(netmkaddr(domain, net, "dns"), nil,
991 			conndir, &qp->tcpctlfd);
992 		alarm(0);
993 		if (qp->tcpfd < 0) {
994 			dnslog("can't dial tcp!%s!dns: %r", domain);
995 			free(domain);
996 			break;
997 		}
998 		free(domain);
999 		nci = getnetconninfo(conndir, qp->tcpfd);
1000 		if (nci) {
1001 			parseip(qp->tcpip, nci->rsys);
1002 			freenetconninfo(nci);
1003 		} else
1004 			dnslog("mydnsquery: getnetconninfo failed");
1005 		qp->tcpset = 1;
1006 
1007 		belen[0] = len >> 8;
1008 		belen[1] = len;
1009 		if (write(qp->tcpfd, belen, 2) != 2 ||
1010 		    write(qp->tcpfd, udppkt + Udphdrsize, len) != len)
1011 			warning("sending tcp msg: %r");
1012 		break;
1013 	default:
1014 		sysfatal("mydnsquery: bad medium");
1015 	}
1016 	return rv;
1017 }
1018 
1019 /*
1020  * send query to all UDP destinations or one TCP destination,
1021  * taken from obuf (udp packet) header
1022  */
1023 static int
1024 xmitquery(Query *qp, int medium, int depth, uchar *obuf, int inns, int len)
1025 {
1026 	int j, n;
1027 	char buf[32];
1028 	Dest *p;
1029 
1030 	queryck(qp);
1031 	if(timems() >= qp->req->aborttime)
1032 		return -1;
1033 
1034 	/*
1035 	 * get a nameserver address if we need one.
1036 	 * serveraddrs populates qp->dest.
1037 	 */
1038 	p = qp->dest;
1039 	destck(p);
1040 	if (qp->ndest < 0 || qp->ndest > Maxdest)
1041 		dnslog("qp->ndest %d out of range", qp->ndest);
1042 	if (qp->ndest > qp->curdest - p) {
1043 		j = serveraddrs(qp, qp->curdest - p, depth);
1044 		if (j < 0 || j >= Maxdest) {
1045 			dnslog("serveraddrs() result %d out of range", j);
1046 			abort();
1047 		}
1048 		qp->curdest = &qp->dest[j];
1049 	}
1050 	destck(qp->curdest);
1051 
1052 	/* no servers, punt */
1053 	if (qp->ndest == 0)
1054 		if (cfg.straddle && cfg.inside) {
1055 			/* get ips of "outside-ns-ips" */
1056 			p = qp->curdest = qp->dest;
1057 			for(n = 0; n < Maxdest; n++, qp->curdest++)
1058 				if (setdestoutns(qp->curdest, n) < 0)
1059 					break;
1060 		} else {
1061 			/* it's probably just a bogus domain, don't log it */
1062 			// dnslog("xmitquery: %s: no nameservers", qp->dp->name);
1063 			return -1;
1064 		}
1065 
1066 	/* send to first 'qp->ndest' destinations */
1067 	j = 0;
1068 	if (medium == Tcp) {
1069 		j++;
1070 		queryck(qp);
1071 		assert(qp->dp);
1072 		procsetname("tcp %sside query for %s %s", (inns? "in": "out"),
1073 			qp->dp->name, rrname(qp->type, buf, sizeof buf));
1074 		mydnsquery(qp, medium, obuf, len); /* sets qp->tcpip from obuf */
1075 		if(debug)
1076 			logsend(qp->req->id, depth, qp->tcpip, "", qp->dp->name,
1077 				qp->type);
1078 	} else
1079 		for(; p < &qp->dest[qp->ndest] && p < qp->curdest; p++){
1080 			/* skip destinations we've finished with */
1081 			if(p->nx >= Maxtrans)
1082 				continue;
1083 
1084 			j++;
1085 
1086 			/* exponential backoff of requests */
1087 			if((1<<p->nx) > qp->ndest)
1088 				continue;
1089 
1090 			procsetname("udp %sside query to %I/%s %s %s",
1091 				(inns? "in": "out"), p->a, p->s->name,
1092 				qp->dp->name, rrname(qp->type, buf, sizeof buf));
1093 			if(debug)
1094 				logsend(qp->req->id, depth, p->a, p->s->name,
1095 					qp->dp->name, qp->type);
1096 
1097 			/* fill in UDP destination addr & send it */
1098 			memmove(obuf, p->a, sizeof p->a);
1099 			mydnsquery(qp, medium, obuf, len);
1100 			p->nx++;
1101 		}
1102 	if(j == 0) {
1103 		// dnslog("xmitquery: %s: no destinations left", qp->dp->name);
1104 		return -1;
1105 	}
1106 	return 0;
1107 }
1108 
1109 static int lckindex[Maxlcks] = {
1110 	0,			/* all others map here */
1111 	Ta,
1112 	Tns,
1113 	Tcname,
1114 	Tsoa,
1115 	Tptr,
1116 	Tmx,
1117 	Ttxt,
1118 	Taaaa,
1119 };
1120 
1121 static int
1122 qtype2lck(int qtype)		/* map query type to querylck index */
1123 {
1124 	int i;
1125 
1126 	for (i = 1; i < nelem(lckindex); i++)
1127 		if (lckindex[i] == qtype)
1128 			return i;
1129 	return 0;
1130 }
1131 
1132 /* is mp a cachable negative response (with Rname set)? */
1133 static int
1134 isnegrname(DNSmsg *mp)
1135 {
1136 	/* TODO: could add || cfg.justforw to RHS of && */
1137 	return mp->an == nil && (mp->flags & Rmask) == Rname;
1138 }
1139 
1140 static int
1141 procansw(Query *qp, DNSmsg *mp, uchar *srcip, int depth, Dest *p)
1142 {
1143 	int rv;
1144 //	int lcktype;
1145 	char buf[32];
1146 	DN *ndp;
1147 	Query *nqp;
1148 	RR *tp, *soarr;
1149 
1150 	if (mp->an == nil)
1151 		stats.negans++;
1152 
1153 	/* ignore any error replies */
1154 	if((mp->flags & Rmask) == Rserver){
1155 		stats.negserver++;
1156 		freeanswers(mp);
1157 		if(p != qp->curdest)
1158 			p->code = Rserver;
1159 		return -1;
1160 	}
1161 
1162 	/* ignore any bad delegations */
1163 	if(mp->ns && baddelegation(mp->ns, qp->nsrp, srcip)){
1164 		stats.negbaddeleg++;
1165 		if(mp->an == nil){
1166 			stats.negbdnoans++;
1167 			freeanswers(mp);
1168 			if(p != qp->curdest)
1169 				p->code = Rserver;
1170 			return -1;
1171 		}
1172 		rrfreelist(mp->ns);
1173 		mp->ns = nil;
1174 	}
1175 
1176 	/* remove any soa's from the authority section */
1177 	lock(&dnlock);
1178 	soarr = rrremtype(&mp->ns, Tsoa);
1179 
1180 	/* incorporate answers */
1181 	unique(mp->an);
1182 	unique(mp->ns);
1183 	unique(mp->ar);
1184 	unlock(&dnlock);
1185 	if(mp->an)
1186 		rrattach(mp->an, (mp->flags & Fauth) != 0);
1187 	if(mp->ar)
1188 		rrattach(mp->ar, Notauthoritative);
1189 	if(mp->ns && !cfg.justforw){
1190 		ndp = mp->ns->owner;
1191 		rrattach(mp->ns, Notauthoritative);
1192 	} else {
1193 		ndp = nil;
1194 		rrfreelist(mp->ns);
1195 		mp->ns = nil;
1196 	}
1197 
1198 	/* free the question */
1199 	if(mp->qd) {
1200 		rrfreelist(mp->qd);
1201 		mp->qd = nil;
1202 	}
1203 
1204 	/*
1205 	 *  Any reply from an authoritative server,
1206 	 *  or a positive reply terminates the search.
1207 	 *  A negative response now also terminates the search.
1208 	 */
1209 	if(mp->an != nil || (mp->flags & Fauth)){
1210 		if(isnegrname(mp))
1211 			qp->dp->respcode = Rname;
1212 		else
1213 			qp->dp->respcode = Rok;
1214 
1215 		/*
1216 		 *  cache any negative responses, free soarr.
1217 		 *  negative responses need not be authoritative:
1218 		 *  they can legitimately come from a cache.
1219 		 */
1220 		if( /* (mp->flags & Fauth) && */ mp->an == nil)
1221 			cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
1222 		else
1223 			rrfreelist(soarr);
1224 		return 1;
1225 	} else if (isnegrname(mp)) {
1226 		qp->dp->respcode = Rname;
1227 		/*
1228 		 *  cache negative response.
1229 		 *  negative responses need not be authoritative:
1230 		 *  they can legitimately come from a cache.
1231 		 */
1232 		cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
1233 		return 1;
1234 	}
1235 	stats.negnorname++;
1236 	rrfreelist(soarr);
1237 
1238 	/*
1239 	 *  if we've been given better name servers, recurse.
1240 	 *  if we're a pure resolver, don't recurse, we have
1241 	 *  to forward to a fixed set of named servers.
1242 	 */
1243 	if(!mp->ns || cfg.resolver && cfg.justforw)
1244 		return 0;
1245 	tp = rrlookup(ndp, Tns, NOneg);
1246 	if(contains(qp->nsrp, tp)){
1247 		rrfreelist(tp);
1248 		return 0;
1249 	}
1250 	procsetname("recursive query for %s %s", qp->dp->name,
1251 		rrname(qp->type, buf, sizeof buf));
1252 	/*
1253 	 *  we're called from udpquery, called from
1254 	 *  netquery, which current holds qp->dp->querylck,
1255 	 *  so release it now and acquire it upon return.
1256 	 */
1257 //	lcktype = qtype2lck(qp->type);
1258 //	qunlock(&qp->dp->querylck[lcktype]);
1259 
1260 	nqp = emalloc(sizeof *nqp);
1261 	queryinit(nqp, qp->dp, qp->type, qp->req);
1262 	nqp->nsrp = tp;
1263 	rv = netquery(nqp, depth+1);
1264 
1265 //	qlock(&qp->dp->querylck[lcktype]);
1266 	rrfreelist(nqp->nsrp);
1267 	querydestroy(nqp);
1268 	free(nqp);
1269 	return rv;
1270 }
1271 
1272 /*
1273  * send a query via tcp to a single address (from ibuf's udp header)
1274  * and read the answer(s) into mp->an.
1275  */
1276 static int
1277 tcpquery(Query *qp, DNSmsg *mp, int depth, uchar *ibuf, uchar *obuf, int len,
1278 	ulong waitms, int inns, ushort req)
1279 {
1280 	int rv = 0;
1281 	uvlong endms;
1282 
1283 	endms = timems() + waitms;
1284 	if(endms > qp->req->aborttime)
1285 		endms = qp->req->aborttime;
1286 
1287 	if (0)
1288 		dnslog("%s: udp reply truncated; retrying query via tcp to %I",
1289 			qp->dp->name, qp->tcpip);
1290 
1291 	qlock(&qp->tcplock);
1292 	memmove(obuf, ibuf, IPaddrlen);		/* send back to respondent */
1293 	/* sets qp->tcpip from obuf's udp header */
1294 	if (xmitquery(qp, Tcp, depth, obuf, inns, len) < 0 ||
1295 	    readreply(qp, Tcp, req, ibuf, mp, endms) < 0)
1296 		rv = -1;
1297 	if (qp->tcpfd > 0) {
1298 		hangup(qp->tcpctlfd);
1299 		close(qp->tcpctlfd);
1300 		close(qp->tcpfd);
1301 	}
1302 	qp->tcpfd = qp->tcpctlfd = -1;
1303 	qunlock(&qp->tcplock);
1304 	return rv;
1305 }
1306 
1307 /*
1308  *  query name servers.  If the name server returns a pointer to another
1309  *  name server, recurse.
1310  */
1311 static int
1312 queryns(Query *qp, int depth, uchar *ibuf, uchar *obuf, ulong waitms, int inns)
1313 {
1314 	int ndest, len, replywaits, rv;
1315 	ushort req;
1316 	uvlong endms;
1317 	char buf[12];
1318 	uchar srcip[IPaddrlen];
1319 	Dest *p, *np, *dest;
1320 
1321 	/* pack request into a udp message */
1322 	req = rand();
1323 	len = mkreq(qp->dp, qp->type, obuf, Frecurse|Oquery, req);
1324 
1325 	/* no server addresses yet */
1326 	queryck(qp);
1327 	dest = emalloc(Maxdest * sizeof *dest);	/* dest can't be on stack */
1328 	for (p = dest; p < dest + Maxdest; p++)
1329 		destinit(p);
1330 	/* this dest array is local to this call of queryns() */
1331 	free(qp->dest);
1332 	qp->curdest = qp->dest = dest;
1333 
1334 	/*
1335 	 *  transmit udp requests and wait for answers.
1336 	 *  at most Maxtrans attempts to each address.
1337 	 *  each cycle send one more message than the previous.
1338 	 *  retry a query via tcp if its response is truncated.
1339 	 */
1340 	for(ndest = 1; ndest < Maxdest; ndest++){
1341 		qp->ndest = ndest;
1342 		qp->tcpset = 0;
1343 		if (xmitquery(qp, Udp, depth, obuf, inns, len) < 0)
1344 			break;
1345 
1346 		endms = timems() + waitms;
1347 		if(endms > qp->req->aborttime)
1348 			endms = qp->req->aborttime;
1349 
1350 		for(replywaits = 0; replywaits < ndest; replywaits++){
1351 			DNSmsg m;
1352 
1353 			procsetname("reading %sside reply from %I: %s %s from %s",
1354 				(inns? "in": "out"), obuf, qp->dp->name,
1355 				rrname(qp->type, buf, sizeof buf), qp->req->from);
1356 
1357 			/* read udp answer into m */
1358 			if (readreply(qp, Udp, req, ibuf, &m, endms) >= 0)
1359 				memmove(srcip, ibuf, IPaddrlen);
1360 			else if (!(m.flags & Ftrunc)) {
1361 				freeanswers(&m);
1362 				break;		/* timed out on this dest */
1363 			} else {
1364 				/* whoops, it was truncated! ask again via tcp */
1365 				freeanswers(&m);
1366 				rv = tcpquery(qp, &m, depth, ibuf, obuf, len,
1367 					waitms, inns, req);  /* answer in m */
1368 				if (rv < 0) {
1369 					freeanswers(&m);
1370 					break;		/* failed via tcp too */
1371 				}
1372 				memmove(srcip, qp->tcpip, IPaddrlen);
1373 			}
1374 
1375 			/* find responder */
1376 			// dnslog("queryns got reply from %I", srcip);
1377 			for(p = qp->dest; p < qp->curdest; p++)
1378 				if(memcmp(p->a, srcip, sizeof p->a) == 0)
1379 					break;
1380 
1381 			/* remove all addrs of responding server from list */
1382 			for(np = qp->dest; np < qp->curdest; np++)
1383 				if(np->s == p->s)
1384 					p->nx = Maxtrans;
1385 
1386 			/* free or incorporate RRs in m */
1387 			rv = procansw(qp, &m, srcip, depth, p);
1388 			if (rv > 0) {
1389 				free(qp->dest);
1390 				qp->dest = qp->curdest = nil; /* prevent accidents */
1391 				return rv;
1392 			}
1393 		}
1394 	}
1395 
1396 	/* if all servers returned failure, propagate it */
1397 	qp->dp->respcode = Rserver;
1398 	for(p = dest; p < qp->curdest; p++) {
1399 		destck(p);
1400 		if(p->code != Rserver)
1401 			qp->dp->respcode = Rok;
1402 		p->magic = 0;			/* prevent accidents */
1403 	}
1404 
1405 //	if (qp->dp->respcode)
1406 //		dnslog("queryns setting Rserver for %s", qp->dp->name);
1407 
1408 	free(qp->dest);
1409 	qp->dest = qp->curdest = nil;		/* prevent accidents */
1410 	return 0;
1411 }
1412 
1413 /*
1414  *  run a command with a supplied fd as standard input
1415  */
1416 char *
1417 system(int fd, char *cmd)
1418 {
1419 	int pid, p, i;
1420 	static Waitmsg msg;
1421 
1422 	if((pid = fork()) == -1)
1423 		sysfatal("fork failed: %r");
1424 	else if(pid == 0){
1425 		dup(fd, 0);
1426 		close(fd);
1427 		for (i = 3; i < 200; i++)
1428 			close(i);		/* don't leak fds */
1429 		execl("/bin/rc", "rc", "-c", cmd, nil);
1430 		sysfatal("exec rc: %r");
1431 	}
1432 	for(p = waitpid(); p >= 0; p = waitpid())
1433 		if(p == pid)
1434 			return msg.msg;
1435 	return "lost child";
1436 }
1437 
1438 /* compute wait, weighted by probability of success, with bounds */
1439 static ulong
1440 weight(ulong ms, unsigned pcntprob)
1441 {
1442 	ulong wait;
1443 
1444 	wait = (ms * pcntprob) / 100;
1445 	if (wait < Minwaitms)
1446 		wait = Minwaitms;
1447 	if (wait > Maxwaitms)
1448 		wait = Maxwaitms;
1449 	return wait;
1450 }
1451 
1452 /*
1453  * in principle we could use a single descriptor for a udp port
1454  * to send all queries and receive all the answers to them,
1455  * but we'd have to sort out the answers by dns-query id.
1456  */
1457 static int
1458 udpquery(Query *qp, char *mntpt, int depth, int patient, int inns)
1459 {
1460 	int fd, rv;
1461 	long now;
1462 	ulong pcntprob;
1463 	uvlong wait, reqtm;
1464 	char *msg;
1465 	uchar *obuf, *ibuf;
1466 	static QLock mntlck;
1467 	static ulong lastmount;
1468 
1469 	/* use alloced buffers rather than ones from the stack */
1470 	// ibuf = emalloc(Maxudpin+Udphdrsize);
1471 	ibuf = emalloc(64*1024);		/* max. tcp reply size */
1472 	obuf = emalloc(Maxudp+Udphdrsize);
1473 
1474 	fd = udpport(mntpt);
1475 	while (fd < 0 && cfg.straddle && strcmp(mntpt, "/net.alt") == 0) {
1476 		/* HACK: remount /net.alt */
1477 		now = time(nil);
1478 		if (now < lastmount + Remntretry)
1479 			sleep(S2MS(lastmount + Remntretry - now));
1480 		qlock(&mntlck);
1481 		fd = udpport(mntpt);	/* try again under lock */
1482 		if (fd < 0) {
1483 			dnslog("[%d] remounting /net.alt", getpid());
1484 			unmount(nil, "/net.alt");
1485 
1486 			msg = system(open("/dev/null", ORDWR), "outside");
1487 
1488 			lastmount = time(nil);
1489 			if (msg && *msg) {
1490 				dnslog("[%d] can't remount /net.alt: %s",
1491 					getpid(), msg);
1492 				sleep(10*1000);	/* don't spin remounting */
1493 			} else
1494 				fd = udpport(mntpt);
1495 		}
1496 		qunlock(&mntlck);
1497 	}
1498 	if (fd < 0) {
1499 		dnslog("can't get udpport for %s query of name %s: %r",
1500 			mntpt, qp->dp->name);
1501 		sysfatal("out of udp conversations");	/* we're buggered */
1502 	}
1503 
1504 	/*
1505 	 * Our QIP servers are busted and respond to AAAA and CNAME queries
1506 	 * with (sometimes malformed [too short] packets and) no answers and
1507 	 * just NS RRs but not Rname errors.  so make time-to-wait
1508 	 * proportional to estimated probability of an RR of that type existing.
1509 	 */
1510 	if (qp->type >= nelem(likely))
1511 		pcntprob = 35;			/* unpopular query type */
1512 	else
1513 		pcntprob = likely[qp->type];
1514 	reqtm = (patient? 2 * Maxreqtm: Maxreqtm);
1515 	wait = weight(reqtm / 3, pcntprob);	/* time for one udp query */
1516 	qp->req->aborttime = timems() + 3*wait; /* for all udp queries */
1517 
1518 	qp->udpfd = fd;
1519 	rv = queryns(qp, depth, ibuf, obuf, wait, inns);
1520 	close(fd);
1521 	qp->udpfd = -1;
1522 
1523 	free(obuf);
1524 	free(ibuf);
1525 	return rv;
1526 }
1527 
1528 /*
1529  * look up (qp->dp->name, qp->type) rr in dns,
1530  * using nameservers in qp->nsrp.
1531  */
1532 static int
1533 netquery(Query *qp, int depth)
1534 {
1535 	int lock, rv, triedin, inname;
1536 	char buf[32];
1537 	RR *rp;
1538 	DN *dp;
1539 	Querylck *qlp;
1540 	static int whined;
1541 
1542 	rv = 0;				/* pessimism */
1543 	if(depth > 12)			/* in a recursive loop? */
1544 		return 0;
1545 
1546 	slave(qp->req);
1547 	/*
1548 	 * slave might have forked.  if so, the parent process longjmped to
1549 	 * req->mret; we're usually the child slave, but if there are too
1550 	 * many children already, we're still the same process.
1551 	 */
1552 
1553 	/*
1554 	 * don't lock before call to slave so only children can block.
1555 	 * just lock at top-level invocation.
1556 	 */
1557 	lock = depth <= 1 && qp->req->isslave;
1558 	dp = qp->dp;		/* ensure that it doesn't change underfoot */
1559 	qlp = nil;
1560 	if(lock) {
1561 		procsetname("query lock wait: %s %s from %s", dp->name,
1562 			rrname(qp->type, buf, sizeof buf), qp->req->from);
1563 		/*
1564 		 * don't make concurrent queries for this name.
1565 		 * dozens of processes blocking here probably indicates
1566 		 * an error in our dns data that causes us to not
1567 		 * recognise a zone (area) as one of our own, thus
1568 		 * causing us to query other nameservers.
1569 		 */
1570 		qlp = &dp->querylck[qtype2lck(qp->type)];
1571 		qlock(qlp);
1572 		if (qlp->Ref.ref > Maxoutstanding) {
1573 			qunlock(qlp);
1574 			if (!whined) {
1575 				whined = 1;
1576 				dnslog("too many outstanding queries for %s;"
1577 					" dropping this one; no further logging"
1578 					" of drops", dp->name);
1579 			}
1580 			return 0;
1581 		}
1582 		++qlp->Ref.ref;
1583 		qunlock(qlp);
1584 	}
1585 	procsetname("netquery: %s", dp->name);
1586 
1587 	/* prepare server RR's for incremental lookup */
1588 	for(rp = qp->nsrp; rp; rp = rp->next)
1589 		rp->marker = 0;
1590 
1591 	triedin = 0;
1592 
1593 	/*
1594 	 * normal resolvers and servers will just use mntpt for all addresses,
1595 	 * even on the outside.  straddling servers will use mntpt (/net)
1596 	 * for inside addresses and /net.alt for outside addresses,
1597 	 * thus bypassing other inside nameservers.
1598 	 */
1599 	inname = insideaddr(dp->name);
1600 	if (!cfg.straddle || inname) {
1601 		rv = udpquery(qp, mntpt, depth, Hurry, (cfg.inside? Inns: Outns));
1602 		triedin = 1;
1603 	}
1604 
1605 	/*
1606 	 * if we're still looking, are inside, and have an outside domain,
1607 	 * try it on our outside interface, if any.
1608 	 */
1609 	if (rv == 0 && cfg.inside && !inname) {
1610 		if (triedin)
1611 			dnslog(
1612 	   "[%d] netquery: internal nameservers failed for %s; trying external",
1613 				getpid(), dp->name);
1614 
1615 		/* prepare server RR's for incremental lookup */
1616 		for(rp = qp->nsrp; rp; rp = rp->next)
1617 			rp->marker = 0;
1618 
1619 		rv = udpquery(qp, "/net.alt", depth, Patient, Outns);
1620 	}
1621 //	if (rv == 0)		/* could ask /net.alt/dns directly */
1622 //		askoutdns(dp, qp->type);
1623 
1624 	if(lock && qlp) {
1625 		qlock(qlp);
1626 		assert(qlp->Ref.ref > 0);
1627 		qunlock(qlp);
1628 		decref(qlp);
1629 	}
1630 	return rv;
1631 }
1632 
1633 int
1634 seerootns(void)
1635 {
1636 	int rv;
1637 	char root[] = "";
1638 	Request req;
1639 	Query *qp;
1640 
1641 	memset(&req, 0, sizeof req);
1642 	req.isslave = 1;
1643 	req.aborttime = timems() + Maxreqtm;
1644 	req.from = "internal";
1645 	qp = emalloc(sizeof *qp);
1646 	queryinit(qp, dnlookup(root, Cin, 1), Tns, &req);
1647 
1648 	qp->nsrp = dblookup(root, Cin, Tns, 0, 0);
1649 	rv = netquery(qp, 0);
1650 
1651 	rrfreelist(qp->nsrp);
1652 	querydestroy(qp);
1653 	free(qp);
1654 	return rv;
1655 }
1656