xref: /plan9/sys/src/cmd/ndb/dnresolve.c (revision 817306324ffbda47c94e9f972adb5081c6257d74)
1 /*
2  * domain name resolvers, see rfcs 1035 and 1123
3  */
4 #include <u.h>
5 #include <libc.h>
6 #include <ip.h>
7 #include <bio.h>
8 #include <ndb.h>
9 #include "dns.h"
10 
11 #define NS2MS(ns) ((ns) / 1000000L)
12 #define S2MS(s)   ((s) * 1000)
13 
14 typedef struct Dest Dest;
15 typedef struct Ipaddr Ipaddr;
16 typedef struct Query Query;
17 
18 enum
19 {
20 	Udp, Tcp,
21 	Maxdest=	24,	/* maximum destinations for a request message */
22 	Maxtrans=	3,	/* maximum transmissions to a server */
23 	Destmagic=	0xcafebabe,
24 	Querymagic=	0xdeadbeef,
25 };
26 enum { Hurry, Patient, };
27 enum { Outns, Inns, };
28 enum { Remntretry = 15, };	/* min. sec.s between remount attempts */
29 
30 struct Ipaddr {
31 	Ipaddr *next;
32 	uchar	ip[IPaddrlen];
33 };
34 
35 struct Dest
36 {
37 	uchar	a[IPaddrlen];	/* ip address */
38 	DN	*s;		/* name server */
39 	int	nx;		/* number of transmissions */
40 	int	code;		/* response code; used to clear dp->respcode */
41 
42 	ulong	magic;
43 };
44 
45 struct Query {
46 	DN	*dp;		/* domain */
47 	int	type;		/* and type to look up */
48 	Request *req;
49 	RR	*nsrp;		/* name servers to consult */
50 
51 	/* dest must not be on the stack due to forking in slave() */
52 	Dest	*dest;		/* array of destinations */
53 	Dest	*curdest;	/* pointer to one of them */
54 	int	ndest;
55 
56 	int	udpfd;		/* can be shared by all udp users */
57 
58 	QLock	tcplock;	/* only one tcp call at a time per query */
59 	int	tcpset;
60 	int	tcpfd;		/* if Tcp, read replies from here */
61 	int	tcpctlfd;
62 	uchar	tcpip[IPaddrlen];
63 
64 	ulong	magic;
65 };
66 
67 /* estimated % probability of such a record existing at all */
68 int likely[] = {
69 	[Ta]		95,
70 	[Taaaa]		10,
71 	[Tcname]	15,
72 	[Tmx]		60,
73 	[Tns]		90,
74 	[Tnull]		5,
75 	[Tptr]		35,
76 	[Tsoa]		90,
77 	[Tsrv]		60,
78 	[Ttxt]		15,
79 	[Tall]		95,
80 };
81 
82 static RR*	dnresolve1(char*, int, int, Request*, int, int);
83 static int	netquery(Query *, int);
84 
85 /*
86  * reading /proc/pid/args yields either "name" or "name [display args]",
87  * so return only display args, if any.
88  */
89 static char *
90 procgetname(void)
91 {
92 	int fd, n;
93 	char *lp, *rp;
94 	char buf[256];
95 
96 	snprint(buf, sizeof buf, "#p/%d/args", getpid());
97 	if((fd = open(buf, OREAD)) < 0)
98 		return strdup("");
99 	*buf = '\0';
100 	n = read(fd, buf, sizeof buf-1);
101 	close(fd);
102 	if (n >= 0)
103 		buf[n] = '\0';
104 	if ((lp = strchr(buf, '[')) == nil ||
105 	    (rp = strrchr(buf, ']')) == nil)
106 		return strdup("");
107 	*rp = '\0';
108 	return strdup(lp+1);
109 }
110 
111 /*
112  *  lookup 'type' info for domain name 'name'.  If it doesn't exist, try
113  *  looking it up as a canonical name.
114  */
115 RR*
116 dnresolve(char *name, int class, int type, Request *req, RR **cn, int depth,
117 	int recurse, int rooted, int *status)
118 {
119 	RR *rp, *nrp, *drp;
120 	DN *dp;
121 	int loops;
122 	char *procname;
123 	char nname[Domlen];
124 
125 	if(status)
126 		*status = 0;
127 
128 	if(depth > 12)			/* in a recursive loop? */
129 		return nil;
130 
131 	procname = procgetname();
132 	/*
133 	 *  hack for systems that don't have resolve search
134 	 *  lists.  Just look up the simple name in the database.
135 	 */
136 	if(!rooted && strchr(name, '.') == 0){
137 		rp = nil;
138 		drp = domainlist(class);
139 		for(nrp = drp; rp == nil && nrp != nil; nrp = nrp->next){
140 			snprint(nname, sizeof nname, "%s.%s", name,
141 				nrp->ptr->name);
142 			rp = dnresolve(nname, class, type, req, cn, depth+1,
143 				recurse, rooted, status);
144 			rrfreelist(rrremneg(&rp));
145 		}
146 		if(drp != nil)
147 			rrfree(drp);
148 		procsetname(procname);
149 		free(procname);
150 		return rp;
151 	}
152 
153 	/*
154 	 *  try the name directly
155 	 */
156 	rp = dnresolve1(name, class, type, req, depth, recurse);
157 	if(rp) {
158 		procsetname(procname);
159 		free(procname);
160 		return randomize(rp);
161 	}
162 
163 	/* try it as a canonical name if we weren't told the name didn't exist */
164 	dp = dnlookup(name, class, 0);
165 	if(type != Tptr && dp->respcode != Rname)
166 		for(loops = 0; rp == nil && loops < 32; loops++){
167 			rp = dnresolve1(name, class, Tcname, req, depth, recurse);
168 			if(rp == nil)
169 				break;
170 
171 			if(rp->negative){
172 				rrfreelist(rp);
173 				rp = nil;
174 				break;
175 			}
176 
177 			name = rp->host->name;
178 			if(cn)
179 				rrcat(cn, rp);
180 			else
181 				rrfreelist(rp);
182 
183 			rp = dnresolve1(name, class, type, req, depth, recurse);
184 		}
185 
186 	/* distinction between not found and not good */
187 	if(rp == nil && status != nil && dp->respcode != 0)
188 		*status = dp->respcode;
189 
190 	procsetname(procname);
191 	free(procname);
192 	return randomize(rp);
193 }
194 
195 static void
196 queryinit(Query *qp, DN *dp, int type, Request *req)
197 {
198 	memset(qp, 0, sizeof *qp);
199 	qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
200 	qp->dp = dp;
201 	qp->type = type;
202 	qp->req = req;
203 	qp->nsrp = nil;
204 	qp->dest = qp->curdest = nil;
205 	qp->magic = Querymagic;
206 }
207 
208 static void
209 queryck(Query *qp)
210 {
211 	assert(qp);
212 	assert(qp->magic == Querymagic);
213 }
214 
215 static void
216 querydestroy(Query *qp)
217 {
218 	queryck(qp);
219 	/* leave udpfd alone */
220 	if (qp->tcpfd > 0)
221 		close(qp->tcpfd);
222 	if (qp->tcpctlfd > 0) {
223 		hangup(qp->tcpctlfd);
224 		close(qp->tcpctlfd);
225 	}
226 	free(qp->dest);
227 	memset(qp, 0, sizeof *qp);	/* prevent accidents */
228 	qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
229 }
230 
231 static void
232 destinit(Dest *p)
233 {
234 	memset(p, 0, sizeof *p);
235 	p->magic = Destmagic;
236 }
237 
238 static void
239 destck(Dest *p)
240 {
241 	assert(p);
242 	assert(p->magic == Destmagic);
243 }
244 
245 static void
246 destdestroy(Dest *p)
247 {
248 	USED(p);
249 }
250 
251 /*
252  * if the response to a query hasn't arrived within 100 ms.,
253  * it's unlikely to arrive at all.  after 1 s., it's really unlikely.
254  * queries for missing RRs are likely to produce time-outs rather than
255  * negative responses, so cname and aaaa queries are likely to time out,
256  * thus we don't wait very long for them.
257  */
258 static void
259 notestats(vlong start, int tmout, int type)
260 {
261 	qlock(&stats);
262 	if (tmout) {
263 		stats.tmout++;
264 		if (type == Taaaa)
265 			stats.tmoutv6++;
266 		else if (type == Tcname)
267 			stats.tmoutcname++;
268 	} else {
269 		long wait10ths = NS2MS(nsec() - start) / 100;
270 
271 		if (wait10ths <= 0)
272 			stats.under10ths[0]++;
273 		else if (wait10ths >= nelem(stats.under10ths))
274 			stats.under10ths[nelem(stats.under10ths) - 1]++;
275 		else
276 			stats.under10ths[wait10ths]++;
277 	}
278 	qunlock(&stats);
279 }
280 
281 static void
282 noteinmem(void)
283 {
284 	qlock(&stats);
285 	stats.answinmem++;
286 	qunlock(&stats);
287 }
288 
289 static RR*
290 dnresolve1(char *name, int class, int type, Request *req, int depth,
291 	int recurse)
292 {
293 	DN *dp, *nsdp;
294 	RR *rp, *nsrp, *dbnsrp;
295 	char *cp;
296 	Query query;
297 
298 	if(debug)
299 		dnslog("[%d] dnresolve1 %s %d %d", getpid(), name, type, class);
300 
301 	/* only class Cin implemented so far */
302 	if(class != Cin)
303 		return nil;
304 
305 	dp = dnlookup(name, class, 1);
306 
307 	/*
308 	 *  Try the cache first
309 	 */
310 	rp = rrlookup(dp, type, OKneg);
311 	if(rp)
312 		if(rp->db){
313 			/* unauthoritative db entries are hints */
314 			if(rp->auth) {
315 				noteinmem();
316 				return rp;
317 			}
318 		} else
319 			/* cached entry must still be valid */
320 			if(rp->ttl > now)
321 				/* but Tall entries are special */
322 				if(type != Tall || rp->query == Tall) {
323 					noteinmem();
324 					return rp;
325 				}
326 
327 	rrfreelist(rp);
328 
329 	/*
330 	 * try the cache for a canonical name. if found punt
331 	 * since we'll find it during the canonical name search
332 	 * in dnresolve().
333 	 */
334 	if(type != Tcname){
335 		rp = rrlookup(dp, Tcname, NOneg);
336 		rrfreelist(rp);
337 		if(rp)
338 			return nil;
339 	}
340 
341 	queryinit(&query, dp, type, req);
342 
343 	/*
344 	 *  if we're running as just a resolver, query our
345 	 *  designated name servers
346 	 */
347 	if(cfg.resolver){
348 		nsrp = randomize(getdnsservers(class));
349 		if(nsrp != nil) {
350 			query.nsrp = nsrp;
351 			if(netquery(&query, depth+1)){
352 				rrfreelist(nsrp);
353 				querydestroy(&query);
354 				return rrlookup(dp, type, OKneg);
355 			}
356 			rrfreelist(nsrp);
357 		}
358 	}
359 
360 	/*
361  	 *  walk up the domain name looking for
362 	 *  a name server for the domain.
363 	 */
364 	for(cp = name; cp; cp = walkup(cp)){
365 		/*
366 		 *  if this is a local (served by us) domain,
367 		 *  return answer
368 		 */
369 		dbnsrp = randomize(dblookup(cp, class, Tns, 0, 0));
370 		if(dbnsrp && dbnsrp->local){
371 			rp = dblookup(name, class, type, 1, dbnsrp->ttl);
372 			rrfreelist(dbnsrp);
373 			querydestroy(&query);
374 			return rp;
375 		}
376 
377 		/*
378 		 *  if recursion isn't set, just accept local
379 		 *  entries
380 		 */
381 		if(recurse == Dontrecurse){
382 			if(dbnsrp)
383 				rrfreelist(dbnsrp);
384 			continue;
385 		}
386 
387 		/* look for ns in cache */
388 		nsdp = dnlookup(cp, class, 0);
389 		nsrp = nil;
390 		if(nsdp)
391 			nsrp = randomize(rrlookup(nsdp, Tns, NOneg));
392 
393 		/* if the entry timed out, ignore it */
394 		if(nsrp && nsrp->ttl < now){
395 			rrfreelist(nsrp);
396 			nsrp = nil;
397 		}
398 
399 		if(nsrp){
400 			rrfreelist(dbnsrp);
401 
402 			/* query the name servers found in cache */
403 			query.nsrp = nsrp;
404 			if(netquery(&query, depth+1)){
405 				rrfreelist(nsrp);
406 				querydestroy(&query);
407 				return rrlookup(dp, type, OKneg);
408 			}
409 			rrfreelist(nsrp);
410 			continue;
411 		}
412 
413 		/* use ns from db */
414 		if(dbnsrp){
415 			/* try the name servers found in db */
416 			query.nsrp = dbnsrp;
417 			if(netquery(&query, depth+1)){
418 				/* we got an answer */
419 				rrfreelist(dbnsrp);
420 				querydestroy(&query);
421 				return rrlookup(dp, type, NOneg);
422 			}
423 			rrfreelist(dbnsrp);
424 		}
425 	}
426 	querydestroy(&query);
427 
428 	/* settle for a non-authoritative answer */
429 	rp = rrlookup(dp, type, OKneg);
430 	if(rp)
431 		return rp;
432 
433 	/* noone answered.  try the database, we might have a chance. */
434 	return dblookup(name, class, type, 0, 0);
435 }
436 
437 /*
438  *  walk a domain name one element to the right.
439  *  return a pointer to that element.
440  *  in other words, return a pointer to the parent domain name.
441  */
442 char*
443 walkup(char *name)
444 {
445 	char *cp;
446 
447 	cp = strchr(name, '.');
448 	if(cp)
449 		return cp+1;
450 	else if(*name)
451 		return "";
452 	else
453 		return 0;
454 }
455 
456 /*
457  *  Get a udp port for sending requests and reading replies.  Put the port
458  *  into "headers" mode.
459  */
460 static char *hmsg = "headers";
461 
462 int
463 udpport(char *mtpt)
464 {
465 	int fd, ctl;
466 	char ds[64], adir[64];
467 
468 	/* get a udp port */
469 	snprint(ds, sizeof ds, "%s/udp!*!0", (mtpt? mtpt: "/net"));
470 	ctl = announce(ds, adir);
471 	if(ctl < 0){
472 		/* warning("can't get udp port"); */
473 		return -1;
474 	}
475 
476 	/* turn on header style interface */
477 	if(write(ctl, hmsg, strlen(hmsg)) , 0){
478 		close(ctl);
479 		warning(hmsg);
480 		return -1;
481 	}
482 
483 	/* grab the data file */
484 	snprint(ds, sizeof ds, "%s/data", adir);
485 	fd = open(ds, ORDWR);
486 	close(ctl);
487 	if(fd < 0)
488 		warning("can't open udp port %s: %r", ds);
489 	return fd;
490 }
491 
492 /* generate a DNS UDP query packet */
493 int
494 mkreq(DN *dp, int type, uchar *buf, int flags, ushort reqno)
495 {
496 	DNSmsg m;
497 	int len;
498 	Udphdr *uh = (Udphdr*)buf;
499 
500 	/* stuff port number into output buffer */
501 	memset(uh, 0, sizeof *uh);
502 	hnputs(uh->rport, 53);
503 
504 	/* make request and convert it to output format */
505 	memset(&m, 0, sizeof m);
506 	m.flags = flags;
507 	m.id = reqno;
508 	m.qd = rralloc(type);
509 	m.qd->owner = dp;
510 	m.qd->type = type;
511 	len = convDNS2M(&m, &buf[Udphdrsize], Maxudp);
512 	rrfree(m.qd);
513 	return len;
514 }
515 
516 /* for alarms in readreply */
517 static void
518 ding(void *x, char *msg)
519 {
520 	USED(x);
521 	if(strcmp(msg, "alarm") == 0)
522 		noted(NCONT);
523 	else
524 		noted(NDFLT);
525 }
526 
527 void
528 freeanswers(DNSmsg *mp)
529 {
530 	rrfreelist(mp->qd);
531 	rrfreelist(mp->an);
532 	rrfreelist(mp->ns);
533 	rrfreelist(mp->ar);
534 	mp->qd = mp->an = mp->ns = mp->ar = nil;
535 }
536 
537 /* sets srcip */
538 static int
539 readnet(Query *qp, int medium, uchar *ibuf, ulong endtime, uchar **replyp,
540 	uchar *srcip)
541 {
542 	int len, fd;
543 	long ms;
544 	vlong startns = nsec();
545 	uchar *reply;
546 	uchar lenbuf[2];
547 
548 	/* timed read of reply */
549 	ms = S2MS(endtime) - NS2MS(startns);
550 	if (ms < 2000)
551 		ms = 2000;	/* give the remote ns a fighting chance */
552 	reply = ibuf;
553 	len = -1;			/* pessimism */
554 	memset(srcip, 0, IPaddrlen);
555 	if (medium == Udp)
556 		if (qp->udpfd <= 0)
557 			dnslog("readnet: qp->udpfd closed");
558 		else {
559 			alarm(ms);
560 			len = read(qp->udpfd, ibuf, Udphdrsize+Maxudpin);
561 			alarm(0);
562 			notestats(startns, len < 0, qp->type);
563 			if (len >= IPaddrlen)
564 				memmove(srcip, ibuf, IPaddrlen);
565 			if (len >= Udphdrsize) {
566 				len   -= Udphdrsize;
567 				reply += Udphdrsize;
568 			}
569 		}
570 	else {
571 		if (!qp->tcpset)
572 			dnslog("readnet: tcp params not set");
573 		alarm(ms);
574 		fd = qp->tcpfd;
575 		if (fd <= 0)
576 			dnslog("readnet: %s: tcp fd unset for dest %I",
577 				qp->dp->name, qp->tcpip);
578 		else if (readn(fd, lenbuf, 2) != 2) {
579 			dnslog("readnet: short read of tcp size from %I",
580 				qp->tcpip);
581 			/* probably a time-out */
582 			notestats(startns, 1, qp->type);
583 		} else {
584 			len = lenbuf[0]<<8 | lenbuf[1];
585 			if (readn(fd, ibuf, len) != len) {
586 				dnslog("readnet: short read of tcp data from %I",
587 					qp->tcpip);
588 				/* probably a time-out */
589 				notestats(startns, 1, qp->type);
590 				len = -1;
591 			}
592 		}
593 		alarm(0);
594 		memmove(srcip, qp->tcpip, IPaddrlen);
595 	}
596 	*replyp = reply;
597 	return len;
598 }
599 
600 /*
601  *  read replies to a request and remember the rrs in the answer(s).
602  *  ignore any of the wrong type.
603  *  wait at most until endtime.
604  */
605 static int
606 readreply(Query *qp, int medium, ushort req, uchar *ibuf, DNSmsg *mp,
607 	ulong endtime)
608 {
609 	int len, rv;
610 	char *err;
611 	char tbuf[32];
612 	uchar *reply;
613 	uchar srcip[IPaddrlen];
614 	RR *rp;
615 
616 	notify(ding);
617 
618 	queryck(qp);
619 	rv = 0;
620 	memset(mp, 0, sizeof *mp);
621 	if (time(nil) >= endtime)
622 		return -1;		/* timed out before we started */
623 
624 	memset(srcip, 0, sizeof srcip);
625 	if (0)
626 		len = -1;
627 	for (; time(nil) < endtime &&
628 	    (len = readnet(qp, medium, ibuf, endtime, &reply, srcip)) >= 0;
629 	    freeanswers(mp)){
630 		/* convert into internal format  */
631 		memset(mp, 0, sizeof *mp);
632 		err = convM2DNS(reply, len, mp, nil);
633 		if (mp->flags & Ftrunc) {
634 			free(err);
635 			freeanswers(mp);
636 			/* notify our caller to retry the query via tcp. */
637 			return -1;
638 		} else if(err){
639 			dnslog("readreply: %s: input err, len %d: %s: %I",
640 				qp->dp->name, len, err, srcip);
641 			free(err);
642 			continue;
643 		}
644 		if(debug)
645 			logreply(qp->req->id, srcip, mp);
646 
647 		/* answering the right question? */
648 		if(mp->id != req)
649 			dnslog("%d: id %d instead of %d: %I", qp->req->id,
650 				mp->id, req, srcip);
651 		else if(mp->qd == 0)
652 			dnslog("%d: no question RR: %I", qp->req->id, srcip);
653 		else if(mp->qd->owner != qp->dp)
654 			dnslog("%d: owner %s instead of %s: %I", qp->req->id,
655 				mp->qd->owner->name, qp->dp->name, srcip);
656 		else if(mp->qd->type != qp->type)
657 			dnslog("%d: qp->type %d instead of %d: %I",
658 				qp->req->id, mp->qd->type, qp->type, srcip);
659 		else {
660 			/* remember what request this is in answer to */
661 			for(rp = mp->an; rp; rp = rp->next)
662 				rp->query = qp->type;
663 			return rv;
664 		}
665 	}
666 	if (time(nil) >= endtime) {
667 		;				/* query expired */
668 	} else if (0) {
669 		/* this happens routinely when a read times out */
670 		dnslog("readreply: %s type %s: ns %I read error or eof "
671 			"(returned %d): %r", qp->dp->name, rrname(qp->type,
672 			tbuf, sizeof tbuf), srcip, len);
673 		if (medium == Udp)
674 			for (rp = qp->nsrp; rp != nil; rp = rp->next)
675 				if (rp->type == Tns)
676 					dnslog("readreply: %s: query sent to "
677 						"ns %s", qp->dp->name,
678 						rp->host->name);
679 	}
680 	return -1;
681 }
682 
683 /*
684  *	return non-0 if first list includes second list
685  */
686 int
687 contains(RR *rp1, RR *rp2)
688 {
689 	RR *trp1, *trp2;
690 
691 	for(trp2 = rp2; trp2; trp2 = trp2->next){
692 		for(trp1 = rp1; trp1; trp1 = trp1->next)
693 			if(trp1->type == trp2->type)
694 			if(trp1->host == trp2->host)
695 			if(trp1->owner == trp2->owner)
696 				break;
697 		if(trp1 == nil)
698 			return 0;
699 	}
700 	return 1;
701 }
702 
703 
704 /*
705  *  return multicast version if any
706  */
707 int
708 ipisbm(uchar *ip)
709 {
710 	if(isv4(ip)){
711 		if (ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0 ||
712 		    ipcmp(ip, IPv4bcast) == 0)
713 			return 4;
714 	} else
715 		if(ip[0] == 0xff)
716 			return 6;
717 	return 0;
718 }
719 
720 /*
721  *  Get next server address
722  */
723 static int
724 serveraddrs(Query *qp, int nd, int depth)
725 {
726 	RR *rp, *arp, *trp;
727 	Dest *cur;
728 
729 	if(nd >= Maxdest)
730 		return 0;
731 
732 	/*
733 	 *  look for a server whose address we already know.
734 	 *  if we find one, mark it so we ignore this on
735 	 *  subsequent passes.
736 	 */
737 	arp = 0;
738 	for(rp = qp->nsrp; rp; rp = rp->next){
739 		assert(rp->magic == RRmagic);
740 		if(rp->marker)
741 			continue;
742 		arp = rrlookup(rp->host, Ta, NOneg);
743 		if(arp){
744 			rp->marker = 1;
745 			break;
746 		}
747 		arp = dblookup(rp->host->name, Cin, Ta, 0, 0);
748 		if(arp){
749 			rp->marker = 1;
750 			break;
751 		}
752 	}
753 
754 	/*
755 	 *  if the cache and database lookup didn't find any new
756 	 *  server addresses, try resolving one via the network.
757 	 *  Mark any we try to resolve so we don't try a second time.
758 	 */
759 	if(arp == 0)
760 		for(rp = qp->nsrp; rp; rp = rp->next){
761 			if(rp->marker)
762 				continue;
763 			rp->marker = 1;
764 
765 			/*
766 			 *  avoid loops looking up a server under itself
767 			 */
768 			if(subsume(rp->owner->name, rp->host->name))
769 				continue;
770 
771 			arp = dnresolve(rp->host->name, Cin, Ta, qp->req, 0,
772 				depth+1, Recurse, 1, 0);
773 			rrfreelist(rrremneg(&arp));
774 			if(arp)
775 				break;
776 		}
777 
778 	/* use any addresses that we found */
779 	for(trp = arp; trp && nd < Maxdest; trp = trp->next){
780 		cur = &qp->dest[nd];
781 		parseip(cur->a, trp->ip->name);
782 		/*
783 		 * straddling servers can reject all nameservers if they are all
784 		 * inside, so be sure to list at least one outside ns at
785 		 * the end of the ns list in /lib/ndb for `dom='.
786 		 */
787 		if (ipisbm(cur->a) ||
788 		    cfg.straddle && !insideaddr(qp->dp->name) && insidens(cur->a))
789 			continue;
790 		cur->nx = 0;
791 		cur->s = trp->owner;
792 		cur->code = Rtimeout;
793 		nd++;
794 	}
795 	rrfreelist(arp);
796 	return nd;
797 }
798 
799 /*
800  *  cache negative responses
801  */
802 static void
803 cacheneg(DN *dp, int type, int rcode, RR *soarr)
804 {
805 	RR *rp;
806 	DN *soaowner;
807 	ulong ttl;
808 
809 	/* no cache time specified, don't make anything up */
810 	if(soarr != nil){
811 		if(soarr->next != nil){
812 			rrfreelist(soarr->next);
813 			soarr->next = nil;
814 		}
815 		soaowner = soarr->owner;
816 	} else
817 		soaowner = nil;
818 
819 	/* the attach can cause soarr to be freed so mine it now */
820 	if(soarr != nil && soarr->soa != nil)
821 		ttl = soarr->soa->minttl+now;
822 	else
823 		ttl = 5*Min;
824 
825 	/* add soa and negative RR to the database */
826 	rrattach(soarr, 1);
827 
828 	rp = rralloc(type);
829 	rp->owner = dp;
830 	rp->negative = 1;
831 	rp->negsoaowner = soaowner;
832 	rp->negrcode = rcode;
833 	rp->ttl = ttl;
834 	rrattach(rp, 1);
835 }
836 
837 static int
838 setdestoutns(Dest *p, int n)
839 {
840 	uchar *outns = outsidens(n);
841 
842 	destck(p);
843 	destinit(p);
844 	if (outns == nil) {
845 		if (n == 0)
846 			dnslog("[%d] no outside-ns in ndb", getpid());
847 		return -1;
848 	}
849 	memmove(p->a, outns, sizeof p->a);
850 	p->s = dnlookup("outside-ns-ips", Cin, 1);
851 	return 0;
852 }
853 
854 /*
855  * issue query via UDP or TCP as appropriate.
856  * for TCP, returns with qp->tcpip set from udppkt header.
857  */
858 static int
859 mydnsquery(Query *qp, int medium, uchar *udppkt, int len)
860 {
861 	int rv = -1, nfd;
862 	char *domain;
863 	char conndir[40];
864 	uchar belen[2];
865 	NetConnInfo *nci;
866 
867 	queryck(qp);
868 	switch (medium) {
869 	case Udp:
870 		nfd = dup(qp->udpfd, -1);
871 		if (nfd < 0) {
872 			warning("mydnsquery: qp->udpfd %d: %r", qp->udpfd);
873 			close(qp->udpfd);	/* ensure it's closed */
874 			qp->udpfd = -1;		/* poison it */
875 			return rv;
876 		}
877 		close(nfd);
878 
879 		if (qp->udpfd <= 0)
880 			dnslog("mydnsquery: qp->udpfd %d closed", qp->udpfd);
881 		else {
882 			if (write(qp->udpfd, udppkt, len+Udphdrsize) !=
883 			    len+Udphdrsize)
884 				warning("sending udp msg: %r");
885 			else {
886 				stats.qsent++;
887 				rv = 0;
888 			}
889 		}
890 		break;
891 	case Tcp:
892 		/* send via TCP & keep fd around for reply */
893 		domain = smprint("%I", udppkt);
894 		alarm(10*1000);
895 		qp->tcpfd = rv = dial(netmkaddr(domain, "tcp", "dns"), nil,
896 			conndir, &qp->tcpctlfd);
897 		alarm(0);
898 		if (qp->tcpfd < 0) {
899 			dnslog("can't dial tcp!%s!dns: %r", domain);
900 			free(domain);
901 			break;
902 		}
903 		free(domain);
904 		nci = getnetconninfo(conndir, qp->tcpfd);
905 		if (nci) {
906 			parseip(qp->tcpip, nci->rsys);
907 			freenetconninfo(nci);
908 		} else
909 			dnslog("mydnsquery: getnetconninfo failed");
910 		qp->tcpset = 1;
911 
912 		belen[0] = len >> 8;
913 		belen[1] = len;
914 		if (write(qp->tcpfd, belen, 2) != 2 ||
915 		    write(qp->tcpfd, udppkt + Udphdrsize, len) != len)
916 			warning("sending tcp msg: %r");
917 		break;
918 	default:
919 		sysfatal("mydnsquery: bad medium");
920 	}
921 	return rv;
922 }
923 
924 /*
925  * send query to all UDP destinations or one TCP destination,
926  * taken from obuf (udp packet) header
927  */
928 static int
929 xmitquery(Query *qp, int medium, int depth, uchar *obuf, int inns, int len)
930 {
931 	int j, n;
932 	char buf[32];
933 	Dest *p;
934 
935 	queryck(qp);
936 	if(time(nil) >= qp->req->aborttime)
937 		return -1;
938 
939 	/*
940 	 * get a nameserver address if we need one.
941 	 * serveraddrs populates qp->dest.
942 	 */
943 	p = qp->dest;
944 	destck(p);
945 	if (qp->ndest < 0 || qp->ndest > Maxdest)
946 		dnslog("qp->ndest %d out of range", qp->ndest);
947 	if (qp->ndest > qp->curdest - p)
948 		qp->curdest = &qp->dest[serveraddrs(qp, qp->curdest - p, depth)];
949 	destck(qp->curdest);
950 
951 	/* no servers, punt */
952 	if (qp->curdest == qp->dest)
953 		if (cfg.straddle && cfg.inside) {
954 			/* get ips of "outside-ns-ips" */
955 			p = qp->curdest = qp->dest;
956 			for(n = 0; n < Maxdest; n++, qp->curdest++)
957 				if (setdestoutns(qp->curdest, n) < 0)
958 					break;
959 		} else {
960 			/* it's probably just a bogus domain, don't log it */
961 			// dnslog("xmitquery: %s: no nameservers", qp->dp->name);
962 			return -1;
963 		}
964 
965 	/* send to first 'qp->ndest' destinations */
966 	j = 0;
967 	if (medium == Tcp) {
968 		j++;
969 		queryck(qp);
970 		assert(qp->dp);
971 		procsetname("tcp %sside query for %s %s", (inns? "in": "out"),
972 			qp->dp->name, rrname(qp->type, buf, sizeof buf));
973 		mydnsquery(qp, medium, obuf, len); /* sets qp->tcpip from obuf */
974 		if(debug)
975 			logsend(qp->req->id, depth, qp->tcpip, "", qp->dp->name,
976 				qp->type);
977 	} else
978 		for(; p < &qp->dest[qp->ndest] && p < qp->curdest; p++){
979 			/* skip destinations we've finished with */
980 			if(p->nx >= Maxtrans)
981 				continue;
982 
983 			j++;
984 
985 			/* exponential backoff of requests */
986 			if((1<<p->nx) > qp->ndest)
987 				continue;
988 
989 			procsetname("udp %sside query to %I/%s %s %s",
990 				(inns? "in": "out"), p->a, p->s->name,
991 				qp->dp->name, rrname(qp->type, buf, sizeof buf));
992 			if(debug)
993 				logsend(qp->req->id, depth, p->a, p->s->name,
994 					qp->dp->name, qp->type);
995 
996 			/* fill in UDP destination addr & send it */
997 			memmove(obuf, p->a, sizeof p->a);
998 			mydnsquery(qp, medium, obuf, len);
999 			p->nx++;
1000 		}
1001 	if(j == 0) {
1002 		// dnslog("xmitquery: %s: no destinations left", qp->dp->name);
1003 		return -1;
1004 	}
1005 	return 0;
1006 }
1007 
1008 static int
1009 procansw(Query *qp, DNSmsg *mp, uchar *srcip, int depth, Dest *p)
1010 {
1011 	int rv;
1012 	char buf[32];
1013 	DN *ndp;
1014 	Query nquery;
1015 	RR *tp, *soarr;
1016 
1017 	/* ignore any error replies */
1018 	if((mp->flags & Rmask) == Rserver){
1019 		freeanswers(mp);
1020 		if(p != qp->curdest)
1021 			p->code = Rserver;
1022 		return -1;
1023 	}
1024 
1025 	/* ignore any bad delegations */
1026 	if(mp->ns && baddelegation(mp->ns, qp->nsrp, srcip)){
1027 		if(mp->an == nil){
1028 			freeanswers(mp);
1029 			if(p != qp->curdest)
1030 				p->code = Rserver;
1031 			return -1;
1032 		}
1033 		rrfreelist(mp->ns);
1034 		mp->ns = nil;
1035 	}
1036 
1037 	/* remove any soa's from the authority section */
1038 	soarr = rrremtype(&mp->ns, Tsoa);
1039 
1040 	/* incorporate answers */
1041 	if(mp->an)
1042 		rrattach(mp->an, (mp->flags & Fauth) != 0);
1043 	if(mp->ar)
1044 		rrattach(mp->ar, 0);
1045 	if(mp->ns && !cfg.justforw){
1046 		ndp = mp->ns->owner;
1047 		rrattach(mp->ns, 0);
1048 	} else
1049 		ndp = nil;
1050 
1051 	/* free the question */
1052 	if(mp->qd) {
1053 		rrfreelist(mp->qd);
1054 		mp->qd = nil;
1055 	}
1056 
1057 	/*
1058 	 *  Any reply from an authoritative server,
1059 	 *  or a positive reply terminates the search.
1060 	 *  A negative response now also terminates the search.
1061 	 */
1062 	if(mp->an != nil || (mp->flags & Fauth)){
1063 		if(mp->an == nil && (mp->flags & Rmask) == Rname)
1064 			qp->dp->respcode = Rname;
1065 		else
1066 			qp->dp->respcode = 0;
1067 
1068 		/*
1069 		 *  cache any negative responses, free soarr.
1070 		 *  negative responses need not be authoritative:
1071 		 *  they can legitimately come from a cache.
1072 		 */
1073 		if( /* (mp->flags & Fauth) && */ mp->an == nil)
1074 			cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
1075 		else
1076 			rrfreelist(soarr);
1077 		return 1;
1078 	} else if (mp->an == nil && (mp->flags & Rmask) == Rname) {
1079 		qp->dp->respcode = Rname;
1080 		/*
1081 		 *  cache negative response.
1082 		 *  negative responses need not be authoritative:
1083 		 *  they can legitimately come from a cache.
1084 		 */
1085 		cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
1086 		return 1;
1087 	}
1088 	rrfreelist(soarr);
1089 
1090 	/*
1091 	 *  if we've been given better name servers, recurse.
1092 	 *  we're called from udpquery, called from
1093 	 *  netquery, which current holds qp->dp->querylck,
1094 	 *  so release it now and acquire it upon return.
1095 	 *  if we're a pure resolver, don't recurse, we have
1096 	 *  to forward to a fixed set of named servers.
1097 	 */
1098 	if(!mp->ns || cfg.resolver && cfg.justforw)
1099 		return 0;
1100 	tp = rrlookup(ndp, Tns, NOneg);
1101 	if(contains(qp->nsrp, tp)){
1102 		rrfreelist(tp);
1103 		return 0;
1104 	}
1105 	procsetname("recursive query for %s %s", qp->dp->name,
1106 		rrname(qp->type, buf, sizeof buf));
1107 	qunlock(&qp->dp->querylck);
1108 
1109 	queryinit(&nquery, qp->dp, qp->type, qp->req);
1110 	nquery.nsrp = tp;
1111 	rv = netquery(&nquery, depth+1);
1112 
1113 	qlock(&qp->dp->querylck);
1114 	rrfreelist(tp);
1115 	querydestroy(&nquery);
1116 	return rv;
1117 }
1118 
1119 /*
1120  * send a query via tcp to a single address (from ibuf's udp header)
1121  * and read the answer(s) into mp->an.
1122  */
1123 static int
1124 tcpquery(Query *qp, DNSmsg *mp, int depth, uchar *ibuf, uchar *obuf, int len,
1125 	int waitsecs, int inns, ushort req)
1126 {
1127 	int rv = 0;
1128 	ulong endtime;
1129 
1130 	endtime = time(nil) + waitsecs;
1131 	if(endtime > qp->req->aborttime)
1132 		endtime = qp->req->aborttime;
1133 
1134 	if (0)
1135 		dnslog("%s: udp reply truncated; retrying query via tcp to %I",
1136 			qp->dp->name, qp->tcpip);
1137 
1138 	qlock(&qp->tcplock);
1139 	memmove(obuf, ibuf, IPaddrlen);		/* send back to respondent */
1140 	/* sets qp->tcpip from obuf's udp header */
1141 	if (xmitquery(qp, Tcp, depth, obuf, inns, len) < 0 ||
1142 	    readreply(qp, Tcp, req, ibuf, mp, endtime) < 0)
1143 		rv = -1;
1144 	if (qp->tcpfd > 0) {
1145 		hangup(qp->tcpctlfd);
1146 		close(qp->tcpctlfd);
1147 		close(qp->tcpfd);
1148 	}
1149 	qp->tcpfd = qp->tcpctlfd = -1;
1150 	qunlock(&qp->tcplock);
1151 	return rv;
1152 }
1153 
1154 /*
1155  *  query name servers.  If the name server returns a pointer to another
1156  *  name server, recurse.
1157  */
1158 static int
1159 netquery1(Query *qp, int depth, uchar *ibuf, uchar *obuf, int waitsecs, int inns)
1160 {
1161 	int ndest, len, replywaits, rv;
1162 	ushort req;
1163 	ulong endtime;
1164 	char buf[12];
1165 	uchar srcip[IPaddrlen];
1166 	Dest *p, *np, *dest;
1167 //	Dest dest[Maxdest];
1168 
1169 	/* pack request into a udp message */
1170 	req = rand();
1171 	len = mkreq(qp->dp, qp->type, obuf, Frecurse|Oquery, req);
1172 
1173 	/* no server addresses yet */
1174 	queryck(qp);
1175 	dest = emalloc(Maxdest * sizeof *dest);	/* dest can't be on stack */
1176 	for (p = dest; p < dest + Maxdest; p++)
1177 		destinit(p);
1178 	qp->curdest = qp->dest = dest;
1179 
1180 	/*
1181 	 *  transmit udp requests and wait for answers.
1182 	 *  at most Maxtrans attempts to each address.
1183 	 *  each cycle send one more message than the previous.
1184 	 *  retry a query via tcp if its response is truncated.
1185 	 */
1186 	for(ndest = 1; ndest < Maxdest; ndest++){
1187 		qp->ndest = ndest;
1188 		qp->tcpset = 0;
1189 		if (xmitquery(qp, Udp, depth, obuf, inns, len) < 0)
1190 			break;
1191 
1192 		endtime = time(nil) + waitsecs;
1193 		if(endtime > qp->req->aborttime)
1194 			endtime = qp->req->aborttime;
1195 
1196 		for(replywaits = 0; replywaits < ndest; replywaits++){
1197 			DNSmsg m;
1198 
1199 			procsetname("reading %sside reply from %I for %s %s",
1200 				(inns? "in": "out"), obuf, qp->dp->name,
1201 				rrname(qp->type, buf, sizeof buf));
1202 
1203 			/* read udp answer into m */
1204 			if (readreply(qp, Udp, req, ibuf, &m, endtime) >= 0)
1205 				memmove(srcip, ibuf, IPaddrlen);
1206 			else if (!(m.flags & Ftrunc)) {
1207 				freeanswers(&m);
1208 				break;		/* timed out on this dest */
1209 			} else {
1210 				/* whoops, it was truncated! ask again via tcp */
1211 				rv = tcpquery(qp, &m, depth, ibuf, obuf, len,
1212 					waitsecs, inns, req);  /* answer in m */
1213 				if (rv < 0) {
1214 					freeanswers(&m);
1215 					break;		/* failed via tcp too */
1216 				}
1217 				memmove(srcip, qp->tcpip, IPaddrlen);
1218 			}
1219 
1220 			/* find responder */
1221 			// dnslog("netquery1 got reply from %I", srcip);
1222 			for(p = qp->dest; p < qp->curdest; p++)
1223 				if(memcmp(p->a, srcip, sizeof p->a) == 0)
1224 					break;
1225 
1226 			/* remove all addrs of responding server from list */
1227 			for(np = qp->dest; np < qp->curdest; np++)
1228 				if(np->s == p->s)
1229 					p->nx = Maxtrans;
1230 
1231 			/* free or incorporate RRs in m */
1232 			rv = procansw(qp, &m, srcip, depth, p);
1233 			if (rv > 0)
1234 				return rv;
1235 		}
1236 	}
1237 
1238 	/* if all servers returned failure, propagate it */
1239 	qp->dp->respcode = Rserver;
1240 	for(p = dest; p < qp->curdest; p++) {
1241 		destck(p);
1242 		if(p->code != Rserver)
1243 			qp->dp->respcode = 0;
1244 		p->magic = 0;			/* prevent accidents */
1245 	}
1246 
1247 //	if (qp->dp->respcode)
1248 //		dnslog("netquery1 setting Rserver for %s", qp->dp->name);
1249 
1250 	free(qp->dest);
1251 	qp->dest = qp->curdest = nil;		/* prevent accidents */
1252 	return 0;
1253 }
1254 
1255 /*
1256  *  run a command with a supplied fd as standard input
1257  */
1258 char *
1259 system(int fd, char *cmd)
1260 {
1261 	int pid, p, i;
1262 	static Waitmsg msg;
1263 
1264 	if((pid = fork()) == -1)
1265 		sysfatal("fork failed: %r");
1266 	else if(pid == 0){
1267 		dup(fd, 0);
1268 		close(fd);
1269 		for (i = 3; i < 200; i++)
1270 			close(i);		/* don't leak fds */
1271 		execl("/bin/rc", "rc", "-c", cmd, nil);
1272 		sysfatal("exec rc: %r");
1273 	}
1274 	for(p = waitpid(); p >= 0; p = waitpid())
1275 		if(p == pid)
1276 			return msg.msg;
1277 	return "lost child";
1278 }
1279 
1280 /*
1281  * in principle we could use a single descriptor for a udp port
1282  * to send all queries and receive all the answers to them,
1283  * but we'd have to sort out the answers by dns-query id.
1284  */
1285 static int
1286 udpquery(Query *qp, char *mntpt, int depth, int patient, int inns)
1287 {
1288 	int fd, rv, wait;
1289 	long now;
1290 	ulong pcntprob;
1291 	char *msg;
1292 	uchar *obuf, *ibuf;
1293 	static QLock mntlck;
1294 	static ulong lastmount;
1295 
1296 	/* use alloced buffers rather than ones from the stack */
1297 	// ibuf = emalloc(Maxudpin+Udphdrsize);
1298 	ibuf = emalloc(64*1024);		/* max. tcp reply size */
1299 	obuf = emalloc(Maxudp+Udphdrsize);
1300 
1301 	fd = udpport(mntpt);
1302 	while (fd < 0 && cfg.straddle && strcmp(mntpt, "/net.alt") == 0) {
1303 		/* HACK: remount /net.alt */
1304 		now = time(nil);
1305 		if (now < lastmount + Remntretry)
1306 			sleep((lastmount + Remntretry - now)*1000);
1307 		qlock(&mntlck);
1308 		fd = udpport(mntpt);	/* try again under lock */
1309 		if (fd < 0) {
1310 			dnslog("[%d] remounting /net.alt", getpid());
1311 			unmount(nil, "/net.alt");
1312 
1313 			msg = system(open("/dev/null", ORDWR), "outside");
1314 
1315 			lastmount = time(nil);
1316 			if (msg && *msg) {
1317 				dnslog("[%d] can't remount /net.alt: %s",
1318 					getpid(), msg);
1319 				sleep(10*1000);		/* don't spin wildly */
1320 			} else
1321 				fd = udpport(mntpt);
1322 		}
1323 		qunlock(&mntlck);
1324 	}
1325 	if (fd < 0) {
1326 		dnslog("can't get udpport for %s query of name %s: %r",
1327 			mntpt, qp->dp->name);
1328 		sysfatal("out of udp conversations");	/* we're buggered */
1329 	}
1330 
1331 	if (qp->type < 0 || qp->type >= nelem(likely))
1332 		pcntprob = 35;
1333 	else
1334 		pcntprob = likely[qp->type];
1335 	if (!patient)
1336 		pcntprob /= 2;
1337 	/*
1338 	 * Our QIP servers are busted, don't answer AAAA
1339 	 * and take forever to answer CNAME if there isn't one.
1340 	 * make time-to-wait proportional to estimated probability of an
1341 	 * RR of that type existing.
1342 	 */
1343 	qp->req->aborttime = time(nil) + (Maxreqtm * pcntprob)/100;
1344 	if (qp->req->aborttime < time(nil) + 2)
1345 		qp->req->aborttime = time(nil) + 2;
1346 	qp->udpfd = fd;
1347 	wait = (15 * pcntprob) / 100;		/* for this outgoing query */
1348 	if (wait < 2)
1349 		wait = 2;
1350 
1351 	rv = netquery1(qp, depth, ibuf, obuf, wait, inns);
1352 	close(fd);
1353 	qp->udpfd = -1;
1354 
1355 	free(obuf);
1356 	free(ibuf);
1357 	return rv;
1358 }
1359 
1360 /* look up (dp->name,type) via *nsrp with results in *reqp */
1361 static int
1362 netquery(Query *qp, int depth)
1363 {
1364 	int lock, rv, triedin, inname;
1365 	RR *rp;
1366 
1367 	if(depth > 12)			/* in a recursive loop? */
1368 		return 0;
1369 
1370 	slave(qp->req);
1371 	/*
1372 	 * slave might have forked.  if so, the parent process longjmped to
1373 	 * req->mret; we're usually the child slave, but if there are too
1374 	 * many children already, we're still the same process.
1375 	 */
1376 
1377 	/* don't lock before call to slave so only children can block */
1378 	if(1)
1379 		lock = qp->req->isslave != 0;
1380 	if(1 && lock) {
1381 		procsetname("query lock wait for %s", qp->dp->name);
1382 		/*
1383 		 * don't make concurrent queries for this name.
1384 		 * dozens of processes blocking here probably indicates
1385 		 * an error in our dns data that causes us to not
1386 		 * recognise a zone (area) as one of our own, thus
1387 		 * causing us to query other nameservers.
1388 		 */
1389 		qlock(&qp->dp->querylck);
1390 	}
1391 	procsetname("netquery: %s", qp->dp->name);
1392 
1393 	/* prepare server RR's for incremental lookup */
1394 	for(rp = qp->nsrp; rp; rp = rp->next)
1395 		rp->marker = 0;
1396 
1397 	rv = 0;				/* pessimism */
1398 	triedin = 0;
1399 
1400 	/*
1401 	 * normal resolvers and servers will just use mntpt for all addresses,
1402 	 * even on the outside.  straddling servers will use mntpt (/net)
1403 	 * for inside addresses and /net.alt for outside addresses,
1404 	 * thus bypassing other inside nameservers.
1405 	 */
1406 	inname = insideaddr(qp->dp->name);
1407 	if (!cfg.straddle || inname) {
1408 		rv = udpquery(qp, mntpt, depth, Hurry, (cfg.inside? Inns: Outns));
1409 		triedin = 1;
1410 	}
1411 
1412 	/*
1413 	 * if we're still looking, are inside, and have an outside domain,
1414 	 * try it on our outside interface, if any.
1415 	 */
1416 	if (rv == 0 && cfg.inside && !inname) {
1417 		if (triedin)
1418 			dnslog(
1419 	   "[%d] netquery: internal nameservers failed for %s; trying external",
1420 				getpid(), qp->dp->name);
1421 
1422 		/* prepare server RR's for incremental lookup */
1423 		for(rp = qp->nsrp; rp; rp = rp->next)
1424 			rp->marker = 0;
1425 
1426 		rv = udpquery(qp, "/net.alt", depth, Patient, Outns);
1427 	}
1428 //	if (rv == 0)		/* could ask /net.alt/dns directly */
1429 //		askoutdns(qp->dp, qp->type);
1430 
1431 	if(1 && lock)
1432 		qunlock(&qp->dp->querylck);
1433 	return rv;
1434 }
1435 
1436 int
1437 seerootns(void)
1438 {
1439 	int rv;
1440 	char root[] = "";
1441 	Request req;
1442 	Query query;
1443 
1444 	memset(&req, 0, sizeof req);
1445 	req.isslave = 1;
1446 	req.aborttime = now + Maxreqtm;
1447 	queryinit(&query, dnlookup(root, Cin, 1), Tns, &req);
1448 	query.nsrp = dblookup(root, Cin, Tns, 0, 0);
1449 	rv = netquery(&query, 0);
1450 	rrfreelist(query.nsrp);
1451 	querydestroy(&query);
1452 	return rv;
1453 }
1454