xref: /plan9-contrib/sys/src/cmd/ndb/dnresolve.c (revision d9924332ebdbff1ae6e4e81162ea52d38508aa08)
1 /*
2  * domain name resolvers, see rfcs 1035 and 1123
3  */
4 #include <u.h>
5 #include <libc.h>
6 #include <ip.h>
7 #include <bio.h>
8 #include <ndb.h>
9 #include "dns.h"
10 
11 typedef struct Dest Dest;
12 typedef struct Ipaddr Ipaddr;
13 typedef struct Query Query;
14 
15 enum
16 {
17 	Udp, Tcp,
18 
19 	Maxdest=	24,	/* maximum destinations for a request message */
20 	Maxtrans=	3,	/* maximum transmissions to a server */
21 	Maxretries=	3, /* cname+actual resends: was 32; have pity on user */
22 	Maxwaitms=	1000,	/* wait no longer for a remote dns query */
23 	Minwaitms=	100,	/* willing to wait for a remote dns query */
24 	Remntretry=	15,	/* min. sec.s between /net.alt remount tries */
25 	Maxoutstanding=	15,	/* max. outstanding queries per domain name */
26 
27 	Destmagic=	0xcafebabe,
28 	Querymagic=	0xdeadbeef,
29 };
30 enum { Hurry, Patient, };
31 enum { Outns, Inns, };
32 
33 struct Ipaddr {
34 	Ipaddr *next;
35 	uchar	ip[IPaddrlen];
36 };
37 
38 struct Dest
39 {
40 	uchar	a[IPaddrlen];	/* ip address */
41 	DN	*s;		/* name server */
42 	int	nx;		/* number of transmissions */
43 	int	code;		/* response code; used to clear dp->respcode */
44 
45 	ulong	magic;
46 };
47 
48 /*
49  * Query has a QLock in it, thus it can't be an automatic
50  * variable, since each process would see a separate copy
51  * of the lock on its stack.
52  */
53 struct Query {
54 	DN	*dp;		/* domain */
55 	ushort	type;		/* and type to look up */
56 	Request *req;
57 	RR	*nsrp;		/* name servers to consult */
58 
59 	/* dest must not be on the stack due to forking in slave() */
60 	Dest	*dest;		/* array of destinations */
61 	Dest	*curdest;	/* pointer to one of them */
62 	int	ndest;
63 
64 	int	udpfd;
65 
66 	QLock	tcplock;	/* only one tcp call at a time per query */
67 	int	tcpset;
68 	int	tcpfd;		/* if Tcp, read replies from here */
69 	int	tcpctlfd;
70 	uchar	tcpip[IPaddrlen];
71 
72 	ulong	magic;
73 };
74 
75 /* estimated % probability of such a record existing at all */
76 int likely[] = {
77 	[Ta]		95,
78 	[Taaaa]		10,
79 	[Tcname]	15,
80 	[Tmx]		60,
81 	[Tns]		90,
82 	[Tnull]		5,
83 	[Tptr]		35,
84 	[Tsoa]		90,
85 	[Tsrv]		60,
86 	[Ttxt]		15,
87 	[Tall]		95,
88 };
89 
90 static RR*	dnresolve1(char*, int, int, Request*, int, int);
91 static int	netquery(Query *, int);
92 
93 /*
94  * reading /proc/pid/args yields either "name args" or "name [display args]",
95  * so return only display args, if any.
96  */
97 static char *
98 procgetname(void)
99 {
100 	int fd, n;
101 	char *lp, *rp;
102 	char buf[256];
103 
104 	snprint(buf, sizeof buf, "#p/%d/args", getpid());
105 	if((fd = open(buf, OREAD)) < 0)
106 		return strdup("");
107 	*buf = '\0';
108 	n = read(fd, buf, sizeof buf-1);
109 	close(fd);
110 	if (n >= 0)
111 		buf[n] = '\0';
112 	if ((lp = strchr(buf, '[')) == nil ||
113 	    (rp = strrchr(buf, ']')) == nil)
114 		return strdup("");
115 	*rp = '\0';
116 	return strdup(lp+1);
117 }
118 
119 /*
120  *  lookup 'type' info for domain name 'name'.  If it doesn't exist, try
121  *  looking it up as a canonical name.
122  *
123  *  this process can be quite slow if time-outs are set too high when querying
124  *  nameservers that just don't respond to certain query types.  in that case,
125  *  there will be multiple udp retries, multiple nameservers will be queried,
126  *  and this will be repeated for a cname query.  the whole thing will be
127  *  retried several times until we get an answer or a time-out.
128  */
129 RR*
130 dnresolve(char *name, int class, int type, Request *req, RR **cn, int depth,
131 	int recurse, int rooted, int *status)
132 {
133 	RR *rp, *nrp, *drp;
134 	DN *dp;
135 	int loops;
136 	char *procname;
137 	char nname[Domlen];
138 
139 	if(status)
140 		*status = 0;
141 
142 	if(depth > 12)			/* in a recursive loop? */
143 		return nil;
144 
145 	procname = procgetname();
146 	/*
147 	 *  hack for systems that don't have resolve search
148 	 *  lists.  Just look up the simple name in the database.
149 	 */
150 	if(!rooted && strchr(name, '.') == nil){
151 		rp = nil;
152 		drp = domainlist(class);
153 		for(nrp = drp; rp == nil && nrp != nil; nrp = nrp->next){
154 			snprint(nname, sizeof nname, "%s.%s", name,
155 				nrp->ptr->name);
156 			rp = dnresolve(nname, class, type, req, cn, depth+1,
157 				recurse, rooted, status);
158 			lock(&dnlock);
159 			rrfreelist(rrremneg(&rp));
160 			unlock(&dnlock);
161 		}
162 		if(drp != nil)
163 			rrfreelist(drp);
164 		procsetname(procname);
165 		free(procname);
166 		return rp;
167 	}
168 
169 	/*
170 	 *  try the name directly
171 	 */
172 	rp = dnresolve1(name, class, type, req, depth, recurse);
173 	if(rp == nil) {
174 		/*
175 		 * try it as a canonical name if we weren't told
176 		 * that the name didn't exist
177 		 */
178 		dp = dnlookup(name, class, 0);
179 		if(type != Tptr && dp->respcode != Rname)
180 			for(loops = 0; rp == nil && loops < Maxretries; loops++){
181 				/* retry cname, then the actual type */
182 				rp = dnresolve1(name, class, Tcname, req,
183 					depth, recurse);
184 				if(rp == nil)
185 					break;
186 
187 				/* rp->host == nil shouldn't happen, but does */
188 				if(rp->negative || rp->host == nil){
189 					rrfreelist(rp);
190 					rp = nil;
191 					break;
192 				}
193 
194 				name = rp->host->name;
195 				lock(&dnlock);
196 				if(cn)
197 					rrcat(cn, rp);
198 				else
199 					rrfreelist(rp);
200 				unlock(&dnlock);
201 
202 				rp = dnresolve1(name, class, type, req,
203 					depth, recurse);
204 			}
205 
206 		/* distinction between not found and not good */
207 		if(rp == nil && status != nil && dp->respcode != Rok)
208 			*status = dp->respcode;
209 	}
210 	procsetname(procname);
211 	free(procname);
212 	return randomize(rp);
213 }
214 
215 static void
216 queryinit(Query *qp, DN *dp, int type, Request *req)
217 {
218 	memset(qp, 0, sizeof *qp);
219 	qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
220 	qp->dp = dp;
221 	qp->type = type;
222 	if (qp->type != type)
223 		dnslog("queryinit: bogus type %d", type);
224 	qp->req = req;
225 	qp->nsrp = nil;
226 	qp->dest = qp->curdest = nil;
227 	qp->magic = Querymagic;
228 }
229 
230 static void
231 queryck(Query *qp)
232 {
233 	assert(qp);
234 	assert(qp->magic == Querymagic);
235 }
236 
237 static void
238 querydestroy(Query *qp)
239 {
240 	queryck(qp);
241 	/* leave udpfd open */
242 	if (qp->tcpfd > 0)
243 		close(qp->tcpfd);
244 	if (qp->tcpctlfd > 0) {
245 		hangup(qp->tcpctlfd);
246 		close(qp->tcpctlfd);
247 	}
248 	free(qp->dest);
249 	memset(qp, 0, sizeof *qp);	/* prevent accidents */
250 	qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
251 }
252 
253 static void
254 destinit(Dest *p)
255 {
256 	memset(p, 0, sizeof *p);
257 	p->magic = Destmagic;
258 }
259 
260 static void
261 destck(Dest *p)
262 {
263 	assert(p);
264 	assert(p->magic == Destmagic);
265 }
266 
267 static void
268 destdestroy(Dest *p)
269 {
270 	USED(p);
271 }
272 
273 /*
274  * if the response to a query hasn't arrived within 100 ms.,
275  * it's unlikely to arrive at all.  after 1 s., it's really unlikely.
276  * queries for missing RRs are likely to produce time-outs rather than
277  * negative responses, so cname and aaaa queries are likely to time out,
278  * thus we don't wait very long for them.
279  */
280 static void
281 notestats(vlong start, int tmout, int type)
282 {
283 	qlock(&stats);
284 	if (tmout) {
285 		stats.tmout++;
286 		if (type == Taaaa)
287 			stats.tmoutv6++;
288 		else if (type == Tcname)
289 			stats.tmoutcname++;
290 	} else {
291 		long wait10ths = NS2MS(nsec() - start) / 100;
292 
293 		if (wait10ths <= 0)
294 			stats.under10ths[0]++;
295 		else if (wait10ths >= nelem(stats.under10ths))
296 			stats.under10ths[nelem(stats.under10ths) - 1]++;
297 		else
298 			stats.under10ths[wait10ths]++;
299 	}
300 	qunlock(&stats);
301 }
302 
303 static void
304 noteinmem(void)
305 {
306 	qlock(&stats);
307 	stats.answinmem++;
308 	qunlock(&stats);
309 }
310 
311 static RR*
312 issuequery(Query *qp, char *name, int class, int depth, int recurse)
313 {
314 	char *cp;
315 	DN *nsdp;
316 	RR *rp, *nsrp, *dbnsrp;
317 
318 	/*
319 	 *  if we're running as just a resolver, query our
320 	 *  designated name servers
321 	 */
322 	if(cfg.resolver){
323 		nsrp = randomize(getdnsservers(class));
324 		if(nsrp != nil) {
325 			qp->nsrp = nsrp;
326 			if(netquery(qp, depth+1)){
327 				rrfreelist(nsrp);
328 				return rrlookup(qp->dp, qp->type, OKneg);
329 			}
330 			rrfreelist(nsrp);
331 		}
332 	}
333 
334 	/*
335  	 *  walk up the domain name looking for
336 	 *  a name server for the domain.
337 	 */
338 	for(cp = name; cp; cp = walkup(cp)){
339 		/*
340 		 *  if this is a local (served by us) domain,
341 		 *  return answer
342 		 */
343 		dbnsrp = randomize(dblookup(cp, class, Tns, 0, 0));
344 		if(dbnsrp && dbnsrp->local){
345 			rp = dblookup(name, class, qp->type, 1, dbnsrp->ttl);
346 			rrfreelist(dbnsrp);
347 			return rp;
348 		}
349 
350 		/*
351 		 *  if recursion isn't set, just accept local
352 		 *  entries
353 		 */
354 		if(recurse == Dontrecurse){
355 			if(dbnsrp)
356 				rrfreelist(dbnsrp);
357 			continue;
358 		}
359 
360 		/* look for ns in cache */
361 		nsdp = dnlookup(cp, class, 0);
362 		nsrp = nil;
363 		if(nsdp)
364 			nsrp = randomize(rrlookup(nsdp, Tns, NOneg));
365 
366 		/* if the entry timed out, ignore it */
367 		if(nsrp && nsrp->ttl < now){
368 			rrfreelist(nsrp);
369 			nsrp = nil;
370 		}
371 
372 		if(nsrp){
373 			rrfreelist(dbnsrp);
374 
375 			/* query the name servers found in cache */
376 			qp->nsrp = nsrp;
377 			if(netquery(qp, depth+1)){
378 				rrfreelist(nsrp);
379 				return rrlookup(qp->dp, qp->type, OKneg);
380 			}
381 			rrfreelist(nsrp);
382 			continue;
383 		}
384 
385 		/* use ns from db */
386 		if(dbnsrp){
387 			/* try the name servers found in db */
388 			qp->nsrp = dbnsrp;
389 			if(netquery(qp, depth+1)){
390 				/* we got an answer */
391 				rrfreelist(dbnsrp);
392 				return rrlookup(qp->dp, qp->type, NOneg);
393 			}
394 			rrfreelist(dbnsrp);
395 		}
396 	}
397 	return nil;
398 }
399 
400 static RR*
401 dnresolve1(char *name, int class, int type, Request *req, int depth,
402 	int recurse)
403 {
404 	Area *area;
405 	DN *dp;
406 	RR *rp;
407 	Query *qp;
408 
409 	if(debug)
410 		dnslog("[%d] dnresolve1 %s %d %d", getpid(), name, type, class);
411 
412 	/* only class Cin implemented so far */
413 	if(class != Cin)
414 		return nil;
415 
416 	dp = dnlookup(name, class, 1);
417 
418 	/*
419 	 *  Try the cache first
420 	 */
421 	rp = rrlookup(dp, type, OKneg);
422 	if(rp)
423 		if(rp->db){
424 			/* unauthoritative db entries are hints */
425 			if(rp->auth) {
426 				noteinmem();
427 				if(debug)
428 					dnslog("[%d] dnresolve1 %s %d %d: auth rr in db",
429 						getpid(), name, type, class);
430 				return rp;
431 			}
432 		} else
433 			/* cached entry must still be valid */
434 			if(rp->ttl > now)
435 				/* but Tall entries are special */
436 				if(type != Tall || rp->query == Tall) {
437 					noteinmem();
438 					if(debug)
439 						dnslog("[%d] dnresolve1 %s %d %d: rr not in db",
440 							getpid(), name, type, class);
441 					return rp;
442 				}
443 	rrfreelist(rp);
444 	rp = nil;		/* accident prevention */
445 	USED(rp);
446 
447 	/*
448 	 * try the cache for a canonical name. if found punt
449 	 * since we'll find it during the canonical name search
450 	 * in dnresolve().
451 	 */
452 	if(type != Tcname){
453 		rp = rrlookup(dp, Tcname, NOneg);
454 		rrfreelist(rp);
455 		if(rp){
456 			if(debug)
457 				dnslog("[%d] dnresolve1 %s %d %d: rr from rrlookup for non-cname",
458 					getpid(), name, type, class);
459 			return nil;
460 		}
461 	}
462 
463 	/*
464 	 * if the domain name is within an area of ours,
465 	 * we should have found its data in memory by now.
466 	 */
467 	area = inmyarea(dp->name);
468 	if (area || strncmp(dp->name, "local#", 6) == 0) {
469 //		char buf[32];
470 
471 //		dnslog("%s %s: no data in area %s", dp->name,
472 //			rrname(type, buf, sizeof buf), area->soarr->owner->name);
473 		return nil;
474 	}
475 
476 	qp = emalloc(sizeof *qp);
477 	queryinit(qp, dp, type, req);
478 	rp = issuequery(qp, name, class, depth, recurse);
479 	querydestroy(qp);
480 	free(qp);
481 	if(rp){
482 		if(debug)
483 			dnslog("[%d] dnresolve1 %s %d %d: rr from query",
484 				getpid(), name, type, class);
485 		return rp;
486 	}
487 
488 	/* settle for a non-authoritative answer */
489 	rp = rrlookup(dp, type, OKneg);
490 	if(rp){
491 		if(debug)
492 			dnslog("[%d] dnresolve1 %s %d %d: rr from rrlookup",
493 				getpid(), name, type, class);
494 		return rp;
495 	}
496 
497 	/* noone answered.  try the database, we might have a chance. */
498 	rp = dblookup(name, class, type, 0, 0);
499 	if (rp) {
500 		if(debug)
501 			dnslog("[%d] dnresolve1 %s %d %d: rr from dblookup",
502 				getpid(), name, type, class);
503 	}else{
504 		if(debug)
505 			dnslog("[%d] dnresolve1 %s %d %d: no rr from dblookup; crapped out",
506 				getpid(), name, type, class);
507 	}
508 	return rp;
509 }
510 
511 /*
512  *  walk a domain name one element to the right.
513  *  return a pointer to that element.
514  *  in other words, return a pointer to the parent domain name.
515  */
516 char*
517 walkup(char *name)
518 {
519 	char *cp;
520 
521 	cp = strchr(name, '.');
522 	if(cp)
523 		return cp+1;
524 	else if(*name)
525 		return "";
526 	else
527 		return 0;
528 }
529 
530 /*
531  *  Get a udp port for sending requests and reading replies.  Put the port
532  *  into "headers" mode.
533  */
534 static char *hmsg = "headers";
535 
536 int
537 udpport(char *mtpt)
538 {
539 	int fd, ctl;
540 	char ds[64], adir[64];
541 
542 	/* get a udp port */
543 	snprint(ds, sizeof ds, "%s/udp!*!0", (mtpt? mtpt: "/net"));
544 	ctl = announce(ds, adir);
545 	if(ctl < 0){
546 		/* warning("can't get udp port"); */
547 		return -1;
548 	}
549 
550 	/* turn on header style interface */
551 	if(write(ctl, hmsg, strlen(hmsg)) != strlen(hmsg)){
552 		close(ctl);
553 		warning(hmsg);
554 		return -1;
555 	}
556 
557 	/* grab the data file */
558 	snprint(ds, sizeof ds, "%s/data", adir);
559 	fd = open(ds, ORDWR);
560 	close(ctl);
561 	if(fd < 0)
562 		warning("can't open udp port %s: %r", ds);
563 	return fd;
564 }
565 
566 void
567 initdnsmsg(DNSmsg *mp, RR *rp, int flags, ushort reqno)
568 {
569 	mp->flags = flags;
570 	mp->id = reqno;
571 	mp->qd = rp;
572 }
573 
574 DNSmsg *
575 newdnsmsg(RR *rp, int flags, ushort reqno)
576 {
577 	DNSmsg *mp;
578 
579 	mp = emalloc(sizeof *mp);
580 	initdnsmsg(mp, rp, flags, reqno);
581 	return mp;
582 }
583 
584 /* generate a DNS UDP query packet */
585 int
586 mkreq(DN *dp, int type, uchar *buf, int flags, ushort reqno)
587 {
588 	DNSmsg m;
589 	int len;
590 	Udphdr *uh = (Udphdr*)buf;
591 	RR *rp;
592 
593 	/* stuff port number into output buffer */
594 	memset(uh, 0, sizeof *uh);
595 	hnputs(uh->rport, 53);
596 
597 	/* make request and convert it to output format */
598 	memset(&m, 0, sizeof m);
599 	rp = rralloc(type);
600 	rp->owner = dp;
601 	initdnsmsg(&m, rp, flags, reqno);
602 	len = convDNS2M(&m, &buf[Udphdrsize], Maxudp);
603 	rrfree(m.qd);
604 	memset(&m, 0, sizeof m);		/* cause trouble */
605 	return len;
606 }
607 
608 void
609 freeanswers(DNSmsg *mp)
610 {
611 	rrfreelist(mp->qd);
612 	rrfreelist(mp->an);
613 	rrfreelist(mp->ns);
614 	rrfreelist(mp->ar);
615 	mp->qd = mp->an = mp->ns = mp->ar = nil;
616 }
617 
618 /* timed read of reply.  sets srcip */
619 static int
620 readnet(Query *qp, int medium, uchar *ibuf, uvlong endms, uchar **replyp,
621 	uchar *srcip)
622 {
623 	int len, fd;
624 	long ms;
625 	vlong startns = nsec();
626 	uchar *reply;
627 	uchar lenbuf[2];
628 
629 	len = -1;			/* pessimism */
630 	ms = endms - NS2MS(startns);
631 	if (ms <= 0)
632 		return -1;		/* taking too long */
633 
634 	reply = ibuf;
635 	memset(srcip, 0, IPaddrlen);
636 	alarm(ms);
637 	if (medium == Udp)
638 		if (qp->udpfd <= 0)
639 			dnslog("readnet: qp->udpfd closed");
640 		else {
641 			len = read(qp->udpfd, ibuf, Udphdrsize+Maxudpin);
642 			alarm(0);
643 			notestats(startns, len < 0, qp->type);
644 			if (len >= IPaddrlen)
645 				memmove(srcip, ibuf, IPaddrlen);
646 			if (len >= Udphdrsize) {
647 				len   -= Udphdrsize;
648 				reply += Udphdrsize;
649 			}
650 		}
651 	else {
652 		if (!qp->tcpset)
653 			dnslog("readnet: tcp params not set");
654 		fd = qp->tcpfd;
655 		if (fd <= 0)
656 			dnslog("readnet: %s: tcp fd unset for dest %I",
657 				qp->dp->name, qp->tcpip);
658 		else if (readn(fd, lenbuf, 2) != 2) {
659 			dnslog("readnet: short read of tcp size from %I",
660 				qp->tcpip);
661 			/* probably a time-out */
662 			notestats(startns, 1, qp->type);
663 		} else {
664 			len = lenbuf[0]<<8 | lenbuf[1];
665 			if (readn(fd, ibuf, len) != len) {
666 				dnslog("readnet: short read of tcp data from %I",
667 					qp->tcpip);
668 				/* probably a time-out */
669 				notestats(startns, 1, qp->type);
670 				len = -1;
671 			}
672 		}
673 		memmove(srcip, qp->tcpip, IPaddrlen);
674 	}
675 	alarm(0);
676 	*replyp = reply;
677 	return len;
678 }
679 
680 /*
681  *  read replies to a request and remember the rrs in the answer(s).
682  *  ignore any of the wrong type.
683  *  wait at most until endms.
684  */
685 static int
686 readreply(Query *qp, int medium, ushort req, uchar *ibuf, DNSmsg *mp,
687 	uvlong endms)
688 {
689 	int len;
690 	char *err;
691 	char tbuf[32];
692 	uchar *reply;
693 	uchar srcip[IPaddrlen];
694 	RR *rp;
695 
696 	queryck(qp);
697 	memset(mp, 0, sizeof *mp);
698 	memset(srcip, 0, sizeof srcip);
699 	if (0)
700 		len = -1;
701 	for (; timems() < endms &&
702 	    (len = readnet(qp, medium, ibuf, endms, &reply, srcip)) >= 0;
703 	    freeanswers(mp)){
704 		/* convert into internal format  */
705 		memset(mp, 0, sizeof *mp);
706 		err = convM2DNS(reply, len, mp, nil);
707 		if (mp->flags & Ftrunc) {
708 			free(err);
709 			freeanswers(mp);
710 			/* notify our caller to retry the query via tcp. */
711 			return -1;
712 		} else if(err){
713 			dnslog("readreply: %s: input err, len %d: %s: %I",
714 				qp->dp->name, len, err, srcip);
715 			free(err);
716 			continue;
717 		}
718 		if(debug)
719 			logreply(qp->req->id, srcip, mp);
720 
721 		/* answering the right question? */
722 		if(mp->id != req)
723 			dnslog("%d: id %d instead of %d: %I", qp->req->id,
724 				mp->id, req, srcip);
725 		else if(mp->qd == 0)
726 			dnslog("%d: no question RR: %I", qp->req->id, srcip);
727 		else if(mp->qd->owner != qp->dp)
728 			dnslog("%d: owner %s instead of %s: %I", qp->req->id,
729 				mp->qd->owner->name, qp->dp->name, srcip);
730 		else if(mp->qd->type != qp->type)
731 			dnslog("%d: qp->type %d instead of %d: %I",
732 				qp->req->id, mp->qd->type, qp->type, srcip);
733 		else {
734 			/* remember what request this is in answer to */
735 			for(rp = mp->an; rp; rp = rp->next)
736 				rp->query = qp->type;
737 			return 0;
738 		}
739 	}
740 	if (timems() >= endms) {
741 		;				/* query expired */
742 	} else if (0) {
743 		/* this happens routinely when a read times out */
744 		dnslog("readreply: %s type %s: ns %I read error or eof "
745 			"(returned %d): %r", qp->dp->name, rrname(qp->type,
746 			tbuf, sizeof tbuf), srcip, len);
747 		if (medium == Udp)
748 			for (rp = qp->nsrp; rp != nil; rp = rp->next)
749 				if (rp->type == Tns)
750 					dnslog("readreply: %s: query sent to "
751 						"ns %s", qp->dp->name,
752 						rp->host->name);
753 	}
754 	return -1;
755 }
756 
757 /*
758  *	return non-0 if first list includes second list
759  */
760 int
761 contains(RR *rp1, RR *rp2)
762 {
763 	RR *trp1, *trp2;
764 
765 	for(trp2 = rp2; trp2; trp2 = trp2->next){
766 		for(trp1 = rp1; trp1; trp1 = trp1->next)
767 			if(trp1->type == trp2->type)
768 			if(trp1->host == trp2->host)
769 			if(trp1->owner == trp2->owner)
770 				break;
771 		if(trp1 == nil)
772 			return 0;
773 	}
774 	return 1;
775 }
776 
777 
778 /*
779  *  return multicast version if any
780  */
781 int
782 ipisbm(uchar *ip)
783 {
784 	if(isv4(ip)){
785 		if (ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0 ||
786 		    ipcmp(ip, IPv4bcast) == 0)
787 			return 4;
788 	} else
789 		if(ip[0] == 0xff)
790 			return 6;
791 	return 0;
792 }
793 
794 /*
795  *  Get next server address
796  */
797 static int
798 serveraddrs(Query *qp, int nd, int depth)
799 {
800 	RR *rp, *arp, *trp;
801 	Dest *cur;
802 
803 	if(nd >= Maxdest)
804 		return 0;
805 
806 	/*
807 	 *  look for a server whose address we already know.
808 	 *  if we find one, mark it so we ignore this on
809 	 *  subsequent passes.
810 	 */
811 	arp = 0;
812 	for(rp = qp->nsrp; rp; rp = rp->next){
813 		assert(rp->magic == RRmagic);
814 		if(rp->marker)
815 			continue;
816 		arp = rrlookup(rp->host, Ta, NOneg);
817 		if(arp == nil)
818 			arp = rrlookup(rp->host, Taaaa, NOneg);
819 		if(arp){
820 			rp->marker = 1;
821 			break;
822 		}
823 		arp = dblookup(rp->host->name, Cin, Ta, 0, 0);
824 		if(arp == nil)
825 			arp = dblookup(rp->host->name, Cin, Taaaa, 0, 0);
826 		if(arp){
827 			rp->marker = 1;
828 			break;
829 		}
830 	}
831 
832 	/*
833 	 *  if the cache and database lookup didn't find any new
834 	 *  server addresses, try resolving one via the network.
835 	 *  Mark any we try to resolve so we don't try a second time.
836 	 */
837 	if(arp == 0)
838 		for(rp = qp->nsrp; rp; rp = rp->next){
839 			if(rp->marker)
840 				continue;
841 			rp->marker = 1;
842 
843 			/*
844 			 *  avoid loops looking up a server under itself
845 			 */
846 			if(subsume(rp->owner->name, rp->host->name))
847 				continue;
848 
849 			arp = dnresolve(rp->host->name, Cin, Ta, qp->req, 0,
850 				depth+1, Recurse, 1, 0);
851 			if(arp == nil)
852 				arp = dnresolve(rp->host->name, Cin, Taaaa,
853 					qp->req, 0, depth+1, Recurse, 1, 0);
854 			lock(&dnlock);
855 			rrfreelist(rrremneg(&arp));
856 			unlock(&dnlock);
857 			if(arp)
858 				break;
859 		}
860 
861 	/* use any addresses that we found */
862 	for(trp = arp; trp && nd < Maxdest; trp = trp->next){
863 		cur = &qp->dest[nd];
864 		parseip(cur->a, trp->ip->name);
865 		/*
866 		 * straddling servers can reject all nameservers if they are all
867 		 * inside, so be sure to list at least one outside ns at
868 		 * the end of the ns list in /lib/ndb for `dom='.
869 		 */
870 		if (ipisbm(cur->a) ||
871 		    cfg.straddle && !insideaddr(qp->dp->name) && insidens(cur->a))
872 			continue;
873 		cur->nx = 0;
874 		cur->s = trp->owner;
875 		cur->code = Rtimeout;
876 		nd++;
877 	}
878 	rrfreelist(arp);
879 	return nd;
880 }
881 
882 /*
883  *  cache negative responses
884  */
885 static void
886 cacheneg(DN *dp, int type, int rcode, RR *soarr)
887 {
888 	RR *rp;
889 	DN *soaowner;
890 	ulong ttl;
891 
892 	stats.negcached++;
893 
894 	/* no cache time specified, don't make anything up */
895 	if(soarr != nil){
896 		if(soarr->next != nil){
897 			rrfreelist(soarr->next);
898 			soarr->next = nil;
899 		}
900 		soaowner = soarr->owner;
901 	} else
902 		soaowner = nil;
903 
904 	/* the attach can cause soarr to be freed so mine it now */
905 	if(soarr != nil && soarr->soa != nil)
906 		ttl = soarr->soa->minttl+now;
907 	else
908 		ttl = 5*Min;
909 
910 	/* add soa and negative RR to the database */
911 	rrattach(soarr, Authoritative);
912 
913 	rp = rralloc(type);
914 	rp->owner = dp;
915 	rp->negative = 1;
916 	rp->negsoaowner = soaowner;
917 	rp->negrcode = rcode;
918 	rp->ttl = ttl;
919 	rrattach(rp, Authoritative);
920 }
921 
922 static int
923 setdestoutns(Dest *p, int n)
924 {
925 	uchar *outns = outsidens(n);
926 
927 	destck(p);
928 	destinit(p);
929 	if (outns == nil) {
930 		if (n == 0)
931 			dnslog("[%d] no outside-ns in ndb", getpid());
932 		return -1;
933 	}
934 	memmove(p->a, outns, sizeof p->a);
935 	p->s = dnlookup("outside-ns-ips", Cin, 1);
936 	return 0;
937 }
938 
939 /*
940  * issue query via UDP or TCP as appropriate.
941  * for TCP, returns with qp->tcpip set from udppkt header.
942  */
943 static int
944 mydnsquery(Query *qp, int medium, uchar *udppkt, int len)
945 {
946 	int rv = -1, nfd;
947 	char *domain;
948 	char conndir[40];
949 	uchar belen[2];
950 	NetConnInfo *nci;
951 
952 	queryck(qp);
953 	domain = smprint("%I", udppkt);
954 	if (myaddr(domain)) {
955 		dnslog("mydnsquery: trying to send to myself (%s); bzzzt",
956 			domain);
957 		free(domain);
958 		return rv;
959 	}
960 
961 	switch (medium) {
962 	case Udp:
963 		free(domain);
964 		nfd = dup(qp->udpfd, -1);
965 		if (nfd < 0) {
966 			warning("mydnsquery: qp->udpfd %d: %r", qp->udpfd);
967 			close(qp->udpfd);	/* ensure it's closed */
968 			qp->udpfd = -1;		/* poison it */
969 			return rv;
970 		}
971 		close(nfd);
972 
973 		if (qp->udpfd <= 0)
974 			dnslog("mydnsquery: qp->udpfd %d closed", qp->udpfd);
975 		else {
976 			if (write(qp->udpfd, udppkt, len+Udphdrsize) !=
977 			    len+Udphdrsize)
978 				warning("sending udp msg: %r");
979 			else {
980 				stats.qsent++;
981 				rv = 0;
982 			}
983 		}
984 		break;
985 	case Tcp:
986 		/* send via TCP & keep fd around for reply */
987 		alarm(10*1000);
988 		qp->tcpfd = rv = dial(netmkaddr(domain, "tcp", "dns"), nil,
989 			conndir, &qp->tcpctlfd);
990 		alarm(0);
991 		if (qp->tcpfd < 0) {
992 			dnslog("can't dial tcp!%s!dns: %r", domain);
993 			free(domain);
994 			break;
995 		}
996 		free(domain);
997 		nci = getnetconninfo(conndir, qp->tcpfd);
998 		if (nci) {
999 			parseip(qp->tcpip, nci->rsys);
1000 			freenetconninfo(nci);
1001 		} else
1002 			dnslog("mydnsquery: getnetconninfo failed");
1003 		qp->tcpset = 1;
1004 
1005 		belen[0] = len >> 8;
1006 		belen[1] = len;
1007 		if (write(qp->tcpfd, belen, 2) != 2 ||
1008 		    write(qp->tcpfd, udppkt + Udphdrsize, len) != len)
1009 			warning("sending tcp msg: %r");
1010 		break;
1011 	default:
1012 		sysfatal("mydnsquery: bad medium");
1013 	}
1014 	return rv;
1015 }
1016 
1017 /*
1018  * send query to all UDP destinations or one TCP destination,
1019  * taken from obuf (udp packet) header
1020  */
1021 static int
1022 xmitquery(Query *qp, int medium, int depth, uchar *obuf, int inns, int len)
1023 {
1024 	int j, n;
1025 	char buf[32];
1026 	Dest *p;
1027 
1028 	queryck(qp);
1029 	if(timems() >= qp->req->aborttime)
1030 		return -1;
1031 
1032 	/*
1033 	 * get a nameserver address if we need one.
1034 	 * serveraddrs populates qp->dest.
1035 	 */
1036 	p = qp->dest;
1037 	destck(p);
1038 	if (qp->ndest < 0 || qp->ndest > Maxdest)
1039 		dnslog("qp->ndest %d out of range", qp->ndest);
1040 	if (qp->ndest > qp->curdest - p) {
1041 		j = serveraddrs(qp, qp->curdest - p, depth);
1042 		if (j < 0 || j >= Maxdest) {
1043 			dnslog("serveraddrs() result %d out of range", j);
1044 			abort();
1045 		}
1046 		qp->curdest = &qp->dest[j];
1047 	}
1048 	destck(qp->curdest);
1049 
1050 	/* no servers, punt */
1051 	if (qp->ndest == 0)
1052 		if (cfg.straddle && cfg.inside) {
1053 			/* get ips of "outside-ns-ips" */
1054 			p = qp->curdest = qp->dest;
1055 			for(n = 0; n < Maxdest; n++, qp->curdest++)
1056 				if (setdestoutns(qp->curdest, n) < 0)
1057 					break;
1058 		} else {
1059 			/* it's probably just a bogus domain, don't log it */
1060 			// dnslog("xmitquery: %s: no nameservers", qp->dp->name);
1061 			return -1;
1062 		}
1063 
1064 	/* send to first 'qp->ndest' destinations */
1065 	j = 0;
1066 	if (medium == Tcp) {
1067 		j++;
1068 		queryck(qp);
1069 		assert(qp->dp);
1070 		procsetname("tcp %sside query for %s %s", (inns? "in": "out"),
1071 			qp->dp->name, rrname(qp->type, buf, sizeof buf));
1072 		mydnsquery(qp, medium, obuf, len); /* sets qp->tcpip from obuf */
1073 		if(debug)
1074 			logsend(qp->req->id, depth, qp->tcpip, "", qp->dp->name,
1075 				qp->type);
1076 	} else
1077 		for(; p < &qp->dest[qp->ndest] && p < qp->curdest; p++){
1078 			/* skip destinations we've finished with */
1079 			if(p->nx >= Maxtrans)
1080 				continue;
1081 
1082 			j++;
1083 
1084 			/* exponential backoff of requests */
1085 			if((1<<p->nx) > qp->ndest)
1086 				continue;
1087 
1088 			procsetname("udp %sside query to %I/%s %s %s",
1089 				(inns? "in": "out"), p->a, p->s->name,
1090 				qp->dp->name, rrname(qp->type, buf, sizeof buf));
1091 			if(debug)
1092 				logsend(qp->req->id, depth, p->a, p->s->name,
1093 					qp->dp->name, qp->type);
1094 
1095 			/* fill in UDP destination addr & send it */
1096 			memmove(obuf, p->a, sizeof p->a);
1097 			mydnsquery(qp, medium, obuf, len);
1098 			p->nx++;
1099 		}
1100 	if(j == 0) {
1101 		// dnslog("xmitquery: %s: no destinations left", qp->dp->name);
1102 		return -1;
1103 	}
1104 	return 0;
1105 }
1106 
1107 static int lckindex[Maxlcks] = {
1108 	0,			/* all others map here */
1109 	Ta,
1110 	Tns,
1111 	Tcname,
1112 	Tsoa,
1113 	Tptr,
1114 	Tmx,
1115 	Ttxt,
1116 	Taaaa,
1117 };
1118 
1119 static int
1120 qtype2lck(int qtype)		/* map query type to querylck index */
1121 {
1122 	int i;
1123 
1124 	for (i = 1; i < nelem(lckindex); i++)
1125 		if (lckindex[i] == qtype)
1126 			return i;
1127 	return 0;
1128 }
1129 
1130 /* is mp a cachable negative response (with Rname set)? */
1131 static int
1132 isnegrname(DNSmsg *mp)
1133 {
1134 	/* TODO: could add || cfg.justforw to RHS of && */
1135 	return mp->an == nil && (mp->flags & Rmask) == Rname;
1136 }
1137 
1138 static int
1139 procansw(Query *qp, DNSmsg *mp, uchar *srcip, int depth, Dest *p)
1140 {
1141 	int rv;
1142 //	int lcktype;
1143 	char buf[32];
1144 	DN *ndp;
1145 	Query *nqp;
1146 	RR *tp, *soarr;
1147 
1148 	if (mp->an == nil)
1149 		stats.negans++;
1150 
1151 	/* ignore any error replies */
1152 	if((mp->flags & Rmask) == Rserver){
1153 		stats.negserver++;
1154 		freeanswers(mp);
1155 		if(p != qp->curdest)
1156 			p->code = Rserver;
1157 		return -1;
1158 	}
1159 
1160 	/* ignore any bad delegations */
1161 	if(mp->ns && baddelegation(mp->ns, qp->nsrp, srcip)){
1162 		stats.negbaddeleg++;
1163 		if(mp->an == nil){
1164 			stats.negbdnoans++;
1165 			freeanswers(mp);
1166 			if(p != qp->curdest)
1167 				p->code = Rserver;
1168 			return -1;
1169 		}
1170 		rrfreelist(mp->ns);
1171 		mp->ns = nil;
1172 	}
1173 
1174 	/* remove any soa's from the authority section */
1175 	lock(&dnlock);
1176 	soarr = rrremtype(&mp->ns, Tsoa);
1177 
1178 	/* incorporate answers */
1179 	unique(mp->an);
1180 	unique(mp->ns);
1181 	unique(mp->ar);
1182 	unlock(&dnlock);
1183 	if(mp->an)
1184 		rrattach(mp->an, (mp->flags & Fauth) != 0);
1185 	if(mp->ar)
1186 		rrattach(mp->ar, Notauthoritative);
1187 	if(mp->ns && !cfg.justforw){
1188 		ndp = mp->ns->owner;
1189 		rrattach(mp->ns, Notauthoritative);
1190 	} else {
1191 		ndp = nil;
1192 		rrfreelist(mp->ns);
1193 		mp->ns = nil;
1194 	}
1195 
1196 	/* free the question */
1197 	if(mp->qd) {
1198 		rrfreelist(mp->qd);
1199 		mp->qd = nil;
1200 	}
1201 
1202 	/*
1203 	 *  Any reply from an authoritative server,
1204 	 *  or a positive reply terminates the search.
1205 	 *  A negative response now also terminates the search.
1206 	 */
1207 	if(mp->an != nil || (mp->flags & Fauth)){
1208 		if(isnegrname(mp))
1209 			qp->dp->respcode = Rname;
1210 		else
1211 			qp->dp->respcode = Rok;
1212 
1213 		/*
1214 		 *  cache any negative responses, free soarr.
1215 		 *  negative responses need not be authoritative:
1216 		 *  they can legitimately come from a cache.
1217 		 */
1218 		if( /* (mp->flags & Fauth) && */ mp->an == nil)
1219 			cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
1220 		else
1221 			rrfreelist(soarr);
1222 		return 1;
1223 	} else if (isnegrname(mp)) {
1224 		qp->dp->respcode = Rname;
1225 		/*
1226 		 *  cache negative response.
1227 		 *  negative responses need not be authoritative:
1228 		 *  they can legitimately come from a cache.
1229 		 */
1230 		cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
1231 		return 1;
1232 	}
1233 	stats.negnorname++;
1234 	rrfreelist(soarr);
1235 
1236 	/*
1237 	 *  if we've been given better name servers, recurse.
1238 	 *  if we're a pure resolver, don't recurse, we have
1239 	 *  to forward to a fixed set of named servers.
1240 	 */
1241 	if(!mp->ns || cfg.resolver && cfg.justforw)
1242 		return 0;
1243 	tp = rrlookup(ndp, Tns, NOneg);
1244 	if(contains(qp->nsrp, tp)){
1245 		rrfreelist(tp);
1246 		return 0;
1247 	}
1248 	procsetname("recursive query for %s %s", qp->dp->name,
1249 		rrname(qp->type, buf, sizeof buf));
1250 	/*
1251 	 *  we're called from udpquery, called from
1252 	 *  netquery, which current holds qp->dp->querylck,
1253 	 *  so release it now and acquire it upon return.
1254 	 */
1255 //	lcktype = qtype2lck(qp->type);
1256 //	qunlock(&qp->dp->querylck[lcktype]);
1257 
1258 	nqp = emalloc(sizeof *nqp);
1259 	queryinit(nqp, qp->dp, qp->type, qp->req);
1260 	nqp->nsrp = tp;
1261 	rv = netquery(nqp, depth+1);
1262 
1263 //	qlock(&qp->dp->querylck[lcktype]);
1264 	rrfreelist(nqp->nsrp);
1265 	querydestroy(nqp);
1266 	free(nqp);
1267 	return rv;
1268 }
1269 
1270 /*
1271  * send a query via tcp to a single address (from ibuf's udp header)
1272  * and read the answer(s) into mp->an.
1273  */
1274 static int
1275 tcpquery(Query *qp, DNSmsg *mp, int depth, uchar *ibuf, uchar *obuf, int len,
1276 	ulong waitms, int inns, ushort req)
1277 {
1278 	int rv = 0;
1279 	uvlong endms;
1280 
1281 	endms = timems() + waitms;
1282 	if(endms > qp->req->aborttime)
1283 		endms = qp->req->aborttime;
1284 
1285 	if (0)
1286 		dnslog("%s: udp reply truncated; retrying query via tcp to %I",
1287 			qp->dp->name, qp->tcpip);
1288 
1289 	qlock(&qp->tcplock);
1290 	memmove(obuf, ibuf, IPaddrlen);		/* send back to respondent */
1291 	/* sets qp->tcpip from obuf's udp header */
1292 	if (xmitquery(qp, Tcp, depth, obuf, inns, len) < 0 ||
1293 	    readreply(qp, Tcp, req, ibuf, mp, endms) < 0)
1294 		rv = -1;
1295 	if (qp->tcpfd > 0) {
1296 		hangup(qp->tcpctlfd);
1297 		close(qp->tcpctlfd);
1298 		close(qp->tcpfd);
1299 	}
1300 	qp->tcpfd = qp->tcpctlfd = -1;
1301 	qunlock(&qp->tcplock);
1302 	return rv;
1303 }
1304 
1305 /*
1306  *  query name servers.  If the name server returns a pointer to another
1307  *  name server, recurse.
1308  */
1309 static int
1310 queryns(Query *qp, int depth, uchar *ibuf, uchar *obuf, ulong waitms, int inns)
1311 {
1312 	int ndest, len, replywaits, rv;
1313 	ushort req;
1314 	uvlong endms;
1315 	char buf[12];
1316 	uchar srcip[IPaddrlen];
1317 	Dest *p, *np, *dest;
1318 
1319 	/* pack request into a udp message */
1320 	req = rand();
1321 	len = mkreq(qp->dp, qp->type, obuf, Frecurse|Oquery, req);
1322 
1323 	/* no server addresses yet */
1324 	queryck(qp);
1325 	dest = emalloc(Maxdest * sizeof *dest);	/* dest can't be on stack */
1326 	for (p = dest; p < dest + Maxdest; p++)
1327 		destinit(p);
1328 	/* this dest array is local to this call of queryns() */
1329 	free(qp->dest);
1330 	qp->curdest = qp->dest = dest;
1331 
1332 	/*
1333 	 *  transmit udp requests and wait for answers.
1334 	 *  at most Maxtrans attempts to each address.
1335 	 *  each cycle send one more message than the previous.
1336 	 *  retry a query via tcp if its response is truncated.
1337 	 */
1338 	for(ndest = 1; ndest < Maxdest; ndest++){
1339 		qp->ndest = ndest;
1340 		qp->tcpset = 0;
1341 		if (xmitquery(qp, Udp, depth, obuf, inns, len) < 0)
1342 			break;
1343 
1344 		endms = timems() + waitms;
1345 		if(endms > qp->req->aborttime)
1346 			endms = qp->req->aborttime;
1347 
1348 		for(replywaits = 0; replywaits < ndest; replywaits++){
1349 			DNSmsg m;
1350 
1351 			procsetname("reading %sside reply from %I: %s %s from %s",
1352 				(inns? "in": "out"), obuf, qp->dp->name,
1353 				rrname(qp->type, buf, sizeof buf), qp->req->from);
1354 
1355 			/* read udp answer into m */
1356 			if (readreply(qp, Udp, req, ibuf, &m, endms) >= 0)
1357 				memmove(srcip, ibuf, IPaddrlen);
1358 			else if (!(m.flags & Ftrunc)) {
1359 				freeanswers(&m);
1360 				break;		/* timed out on this dest */
1361 			} else {
1362 				/* whoops, it was truncated! ask again via tcp */
1363 				freeanswers(&m);
1364 				rv = tcpquery(qp, &m, depth, ibuf, obuf, len,
1365 					waitms, inns, req);  /* answer in m */
1366 				if (rv < 0) {
1367 					freeanswers(&m);
1368 					break;		/* failed via tcp too */
1369 				}
1370 				memmove(srcip, qp->tcpip, IPaddrlen);
1371 			}
1372 
1373 			/* find responder */
1374 			// dnslog("queryns got reply from %I", srcip);
1375 			for(p = qp->dest; p < qp->curdest; p++)
1376 				if(memcmp(p->a, srcip, sizeof p->a) == 0)
1377 					break;
1378 
1379 			/* remove all addrs of responding server from list */
1380 			for(np = qp->dest; np < qp->curdest; np++)
1381 				if(np->s == p->s)
1382 					p->nx = Maxtrans;
1383 
1384 			/* free or incorporate RRs in m */
1385 			rv = procansw(qp, &m, srcip, depth, p);
1386 			if (rv > 0) {
1387 				free(qp->dest);
1388 				qp->dest = qp->curdest = nil; /* prevent accidents */
1389 				return rv;
1390 			}
1391 		}
1392 	}
1393 
1394 	/* if all servers returned failure, propagate it */
1395 	qp->dp->respcode = Rserver;
1396 	for(p = dest; p < qp->curdest; p++) {
1397 		destck(p);
1398 		if(p->code != Rserver)
1399 			qp->dp->respcode = Rok;
1400 		p->magic = 0;			/* prevent accidents */
1401 	}
1402 
1403 //	if (qp->dp->respcode)
1404 //		dnslog("queryns setting Rserver for %s", qp->dp->name);
1405 
1406 	free(qp->dest);
1407 	qp->dest = qp->curdest = nil;		/* prevent accidents */
1408 	return 0;
1409 }
1410 
1411 /*
1412  *  run a command with a supplied fd as standard input
1413  */
1414 char *
1415 system(int fd, char *cmd)
1416 {
1417 	int pid, p, i;
1418 	static Waitmsg msg;
1419 
1420 	if((pid = fork()) == -1)
1421 		sysfatal("fork failed: %r");
1422 	else if(pid == 0){
1423 		dup(fd, 0);
1424 		close(fd);
1425 		for (i = 3; i < 200; i++)
1426 			close(i);		/* don't leak fds */
1427 		execl("/bin/rc", "rc", "-c", cmd, nil);
1428 		sysfatal("exec rc: %r");
1429 	}
1430 	for(p = waitpid(); p >= 0; p = waitpid())
1431 		if(p == pid)
1432 			return msg.msg;
1433 	return "lost child";
1434 }
1435 
1436 /* compute wait, weighted by probability of success, with bounds */
1437 static ulong
1438 weight(ulong ms, unsigned pcntprob)
1439 {
1440 	ulong wait;
1441 
1442 	wait = (ms * pcntprob) / 100;
1443 	if (wait < Minwaitms)
1444 		wait = Minwaitms;
1445 	if (wait > Maxwaitms)
1446 		wait = Maxwaitms;
1447 	return wait;
1448 }
1449 
1450 /*
1451  * in principle we could use a single descriptor for a udp port
1452  * to send all queries and receive all the answers to them,
1453  * but we'd have to sort out the answers by dns-query id.
1454  */
1455 static int
1456 udpquery(Query *qp, char *mntpt, int depth, int patient, int inns)
1457 {
1458 	int fd, rv;
1459 	long now;
1460 	ulong pcntprob;
1461 	uvlong wait, reqtm;
1462 	char *msg;
1463 	uchar *obuf, *ibuf;
1464 	static QLock mntlck;
1465 	static ulong lastmount;
1466 
1467 	/* use alloced buffers rather than ones from the stack */
1468 	// ibuf = emalloc(Maxudpin+Udphdrsize);
1469 	ibuf = emalloc(64*1024);		/* max. tcp reply size */
1470 	obuf = emalloc(Maxudp+Udphdrsize);
1471 
1472 	fd = udpport(mntpt);
1473 	while (fd < 0 && cfg.straddle && strcmp(mntpt, "/net.alt") == 0) {
1474 		/* HACK: remount /net.alt */
1475 		now = time(nil);
1476 		if (now < lastmount + Remntretry)
1477 			sleep(S2MS(lastmount + Remntretry - now));
1478 		qlock(&mntlck);
1479 		fd = udpport(mntpt);	/* try again under lock */
1480 		if (fd < 0) {
1481 			dnslog("[%d] remounting /net.alt", getpid());
1482 			unmount(nil, "/net.alt");
1483 
1484 			msg = system(open("/dev/null", ORDWR), "outside");
1485 
1486 			lastmount = time(nil);
1487 			if (msg && *msg) {
1488 				dnslog("[%d] can't remount /net.alt: %s",
1489 					getpid(), msg);
1490 				sleep(10*1000);	/* don't spin remounting */
1491 			} else
1492 				fd = udpport(mntpt);
1493 		}
1494 		qunlock(&mntlck);
1495 	}
1496 	if (fd < 0) {
1497 		dnslog("can't get udpport for %s query of name %s: %r",
1498 			mntpt, qp->dp->name);
1499 		sysfatal("out of udp conversations");	/* we're buggered */
1500 	}
1501 
1502 	/*
1503 	 * Our QIP servers are busted and respond to AAAA and CNAME queries
1504 	 * with (sometimes malformed [too short] packets and) no answers and
1505 	 * just NS RRs but not Rname errors.  so make time-to-wait
1506 	 * proportional to estimated probability of an RR of that type existing.
1507 	 */
1508 	if (qp->type >= nelem(likely))
1509 		pcntprob = 35;			/* unpopular query type */
1510 	else
1511 		pcntprob = likely[qp->type];
1512 	reqtm = (patient? 2 * Maxreqtm: Maxreqtm);
1513 	wait = weight(reqtm / 3, pcntprob);	/* time for one udp query */
1514 	qp->req->aborttime = timems() + 3*wait; /* for all udp queries */
1515 
1516 	qp->udpfd = fd;
1517 	rv = queryns(qp, depth, ibuf, obuf, wait, inns);
1518 	close(fd);
1519 	qp->udpfd = -1;
1520 
1521 	free(obuf);
1522 	free(ibuf);
1523 	return rv;
1524 }
1525 
1526 /*
1527  * look up (qp->dp->name, qp->type) rr in dns,
1528  * using nameservers in qp->nsrp.
1529  */
1530 static int
1531 netquery(Query *qp, int depth)
1532 {
1533 	int lock, rv, triedin, inname;
1534 	char buf[32];
1535 	RR *rp;
1536 	DN *dp;
1537 	Querylck *qlp;
1538 	static int whined;
1539 
1540 	rv = 0;				/* pessimism */
1541 	if(depth > 12)			/* in a recursive loop? */
1542 		return 0;
1543 
1544 	slave(qp->req);
1545 	/*
1546 	 * slave might have forked.  if so, the parent process longjmped to
1547 	 * req->mret; we're usually the child slave, but if there are too
1548 	 * many children already, we're still the same process.
1549 	 */
1550 
1551 	/*
1552 	 * don't lock before call to slave so only children can block.
1553 	 * just lock at top-level invocation.
1554 	 */
1555 	lock = depth <= 1 && qp->req->isslave;
1556 	dp = qp->dp;		/* ensure that it doesn't change underfoot */
1557 	qlp = nil;
1558 	if(lock) {
1559 		procsetname("query lock wait: %s %s from %s", dp->name,
1560 			rrname(qp->type, buf, sizeof buf), qp->req->from);
1561 		/*
1562 		 * don't make concurrent queries for this name.
1563 		 * dozens of processes blocking here probably indicates
1564 		 * an error in our dns data that causes us to not
1565 		 * recognise a zone (area) as one of our own, thus
1566 		 * causing us to query other nameservers.
1567 		 */
1568 		qlp = &dp->querylck[qtype2lck(qp->type)];
1569 		qlock(qlp);
1570 		if (qlp->Ref.ref > Maxoutstanding) {
1571 			qunlock(qlp);
1572 			if (!whined) {
1573 				whined = 1;
1574 				dnslog("too many outstanding queries for %s;"
1575 					" dropping this one; no further logging"
1576 					" of drops", dp->name);
1577 			}
1578 			return 0;
1579 		}
1580 		++qlp->Ref.ref;
1581 		qunlock(qlp);
1582 	}
1583 	procsetname("netquery: %s", dp->name);
1584 
1585 	/* prepare server RR's for incremental lookup */
1586 	for(rp = qp->nsrp; rp; rp = rp->next)
1587 		rp->marker = 0;
1588 
1589 	triedin = 0;
1590 
1591 	/*
1592 	 * normal resolvers and servers will just use mntpt for all addresses,
1593 	 * even on the outside.  straddling servers will use mntpt (/net)
1594 	 * for inside addresses and /net.alt for outside addresses,
1595 	 * thus bypassing other inside nameservers.
1596 	 */
1597 	inname = insideaddr(dp->name);
1598 	if (!cfg.straddle || inname) {
1599 		rv = udpquery(qp, mntpt, depth, Hurry, (cfg.inside? Inns: Outns));
1600 		triedin = 1;
1601 	}
1602 
1603 	/*
1604 	 * if we're still looking, are inside, and have an outside domain,
1605 	 * try it on our outside interface, if any.
1606 	 */
1607 	if (rv == 0 && cfg.inside && !inname) {
1608 		if (triedin)
1609 			dnslog(
1610 	   "[%d] netquery: internal nameservers failed for %s; trying external",
1611 				getpid(), dp->name);
1612 
1613 		/* prepare server RR's for incremental lookup */
1614 		for(rp = qp->nsrp; rp; rp = rp->next)
1615 			rp->marker = 0;
1616 
1617 		rv = udpquery(qp, "/net.alt", depth, Patient, Outns);
1618 	}
1619 //	if (rv == 0)		/* could ask /net.alt/dns directly */
1620 //		askoutdns(dp, qp->type);
1621 
1622 	if(lock && qlp) {
1623 		qlock(qlp);
1624 		assert(qlp->Ref.ref > 0);
1625 		qunlock(qlp);
1626 		decref(qlp);
1627 	}
1628 	return rv;
1629 }
1630 
1631 int
1632 seerootns(void)
1633 {
1634 	int rv;
1635 	char root[] = "";
1636 	Request req;
1637 	Query *qp;
1638 
1639 	memset(&req, 0, sizeof req);
1640 	req.isslave = 1;
1641 	req.aborttime = timems() + Maxreqtm;
1642 	req.from = "internal";
1643 	qp = emalloc(sizeof *qp);
1644 	queryinit(qp, dnlookup(root, Cin, 1), Tns, &req);
1645 
1646 	qp->nsrp = dblookup(root, Cin, Tns, 0, 0);
1647 	rv = netquery(qp, 0);
1648 
1649 	rrfreelist(qp->nsrp);
1650 	querydestroy(qp);
1651 	free(qp);
1652 	return rv;
1653 }
1654