xref: /plan9-contrib/sys/src/cmd/ndb/dnresolve.c (revision 4e7b95445ffebb86c0c2ffea88eb978bd2b5395c)
1 /*
2  * domain name resolvers, see rfcs 1035 and 1123
3  */
4 #include <u.h>
5 #include <libc.h>
6 #include <ip.h>
7 #include <bio.h>
8 #include <ndb.h>
9 #include "dns.h"
10 
11 typedef struct Dest Dest;
12 typedef struct Ipaddr Ipaddr;
13 typedef struct Query Query;
14 typedef struct Sluggards Sluggards;
15 
16 enum
17 {
18 	Udp, Tcp,
19 	Maxdest=	24,	/* maximum destinations for a request message */
20 	Maxtrans=	3,	/* maximum transmissions to a server */
21 	Destmagic=	0xcafebabe,
22 	Querymagic=	0xdeadbeef,
23 };
24 
25 struct Ipaddr {
26 	Ipaddr *next;
27 	uchar	ip[IPaddrlen];
28 };
29 
30 struct Dest
31 {
32 	uchar	a[IPaddrlen];	/* ip address */
33 	DN	*s;		/* name server */
34 	int	nx;		/* number of transmissions */
35 	int	code;		/* response code; used to clear dp->respcode */
36 
37 	ulong	magic;
38 };
39 
40 struct Query {
41 	DN	*dp;		/* domain */
42 	int	type;		/* and type to look up */
43 	Request *req;
44 	RR	*nsrp;		/* name servers to consult */
45 
46 	Dest	*dest;		/* array of destinations */
47 	Dest	*curdest;	/* pointer to one of them */
48 	int	ndest;
49 
50 	int	udpfd;		/* can be shared by all udp users */
51 
52 	QLock	tcplock;	/* only one tcp call at a time per query */
53 	int	tcpset;
54 	int	tcpfd;		/* if Tcp, read replies from here */
55 	int	tcpctlfd;
56 	uchar	tcpip[IPaddrlen];
57 
58 	ulong	magic;
59 };
60 
61 /* a list of sluggardly name servers */
62 struct Sluggards {
63 	QLock;
64 	Ipaddr *head;
65 	Ipaddr *tail;
66 };
67 
68 static Sluggards slugs;
69 
70 static RR*	dnresolve1(char*, int, int, Request*, int, int);
71 static int	netquery(Query *, int);
72 
73 static Ipaddr *
74 newslug(void)
75 {
76 	return emalloc(sizeof(Ipaddr));
77 }
78 
79 static void
80 addslug(uchar nsip[])
81 {
82 	Ipaddr *sp;
83 	static uchar zip[IPaddrlen];
84 
85 	if (memcmp(nsip, zip, IPaddrlen) == 0)
86 		return;
87 
88 	qlock(&slugs);
89 	for (sp = slugs.head; sp != nil; sp = sp->next)
90 		if (memcmp(sp->ip, nsip, IPaddrlen) == 0) {
91 			qunlock(&slugs);		/* already know it */
92 			return;
93 		}
94 
95 	if (slugs.head == nil)
96 		slugs.head = slugs.tail = newslug();
97 	else {
98 		slugs.tail->next = newslug();
99 		slugs.tail = slugs.tail->next;
100 	}
101 	memmove(slugs.tail->ip, nsip, IPaddrlen);
102 	qunlock(&slugs);
103 
104 	dnslog("%I is a slug", nsip);
105 }
106 
107 int
108 isaslug(uchar nsip[])
109 {
110 	Ipaddr *sp;
111 
112 	qlock(&slugs);
113 	for (sp = slugs.head; sp != nil; sp = sp->next)
114 		if (memcmp(sp->ip, nsip, IPaddrlen) == 0) {
115 			qunlock(&slugs);
116 			return 1;
117 		}
118 	qunlock(&slugs);
119 	return 0;
120 }
121 
122 /*
123  * reading /proc/pid/args yields either "name" or "name [display args]",
124  * so return only display args, if any.
125  */
126 static char *
127 procgetname(void)
128 {
129 	int fd, n;
130 	char *lp, *rp;
131 	char buf[256];
132 
133 	snprint(buf, sizeof buf, "#p/%d/args", getpid());
134 	if((fd = open(buf, OREAD)) < 0)
135 		return strdup("");
136 	*buf = '\0';
137 	n = read(fd, buf, sizeof buf-1);
138 	close(fd);
139 	if (n >= 0)
140 		buf[n] = '\0';
141 	if ((lp = strchr(buf, '[')) == nil ||
142 	    (rp = strrchr(buf, ']')) == nil)
143 		return strdup("");
144 	*rp = '\0';
145 	return strdup(lp+1);
146 }
147 
148 /*
149  *  lookup 'type' info for domain name 'name'.  If it doesn't exist, try
150  *  looking it up as a canonical name.
151  */
152 RR*
153 dnresolve(char *name, int class, int type, Request *req, RR **cn, int depth,
154 	int recurse, int rooted, int *status)
155 {
156 	RR *rp, *nrp, *drp;
157 	DN *dp;
158 	int loops;
159 	char *procname;
160 	char nname[Domlen];
161 
162 	if(status)
163 		*status = 0;
164 
165 	procname = procgetname();
166 	/*
167 	 *  hack for systems that don't have resolve search
168 	 *  lists.  Just look up the simple name in the database.
169 	 */
170 	if(!rooted && strchr(name, '.') == 0){
171 		rp = nil;
172 		drp = domainlist(class);
173 		for(nrp = drp; nrp != nil; nrp = nrp->next){
174 			snprint(nname, sizeof nname, "%s.%s", name,
175 				nrp->ptr->name);
176 			rp = dnresolve(nname, class, type, req, cn, depth,
177 				recurse, rooted, status);
178 			rrfreelist(rrremneg(&rp));
179 			if(rp != nil)
180 				break;
181 		}
182 		if(drp != nil)
183 			rrfree(drp);
184 		procsetname(procname);
185 		free(procname);
186 		return rp;
187 	}
188 
189 	/*
190 	 *  try the name directly
191 	 */
192 	rp = dnresolve1(name, class, type, req, depth, recurse);
193 	if(rp) {
194 		procsetname(procname);
195 		free(procname);
196 		return randomize(rp);
197 	}
198 
199 	/* try it as a canonical name if we weren't told the name didn't exist */
200 	dp = dnlookup(name, class, 0);
201 	if(type != Tptr && dp->respcode != Rname)
202 		for(loops = 0; rp == nil && loops < 32; loops++){
203 			rp = dnresolve1(name, class, Tcname, req, depth, recurse);
204 			if(rp == nil)
205 				break;
206 
207 			if(rp->negative){
208 				rrfreelist(rp);
209 				rp = nil;
210 				break;
211 			}
212 
213 			name = rp->host->name;
214 			if(cn)
215 				rrcat(cn, rp);
216 			else
217 				rrfreelist(rp);
218 
219 			rp = dnresolve1(name, class, type, req, depth, recurse);
220 		}
221 
222 	/* distinction between not found and not good */
223 	if(rp == nil && status != nil && dp->respcode != 0)
224 		*status = dp->respcode;
225 
226 	procsetname(procname);
227 	free(procname);
228 	return randomize(rp);
229 }
230 
231 static void
232 queryinit(Query *qp, DN *dp, int type, Request *req)
233 {
234 	memset(qp, 0, sizeof *qp);
235 	qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
236 	qp->dp = dp;
237 	qp->type = type;
238 	qp->req = req;
239 	qp->nsrp = nil;
240 	qp->dest = qp->curdest = nil;
241 	qp->magic = Querymagic;
242 }
243 
244 static void
245 queryck(Query *qp)
246 {
247 	assert(qp);
248 	assert(qp->magic == Querymagic);
249 }
250 
251 static void
252 querydestroy(Query *qp)
253 {
254 	queryck(qp);
255 	if (qp->udpfd > 0)
256 		close(qp->udpfd);
257 	if (qp->tcpfd > 0)
258 		close(qp->tcpfd);
259 	if (qp->tcpctlfd > 0) {
260 		hangup(qp->tcpctlfd);
261 		close(qp->tcpctlfd);
262 	}
263 	memset(qp, 0, sizeof *qp);	/* prevent accidents */
264 	qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
265 }
266 
267 static void
268 destinit(Dest *p)
269 {
270 	memset(p, 0, sizeof *p);
271 	p->magic = Destmagic;
272 }
273 
274 static void
275 destck(Dest *p)
276 {
277 	assert(p);
278 	assert(p->magic == Destmagic);
279 }
280 
281 static RR*
282 dnresolve1(char *name, int class, int type, Request *req, int depth,
283 	int recurse)
284 {
285 	DN *dp, *nsdp;
286 	RR *rp, *nsrp, *dbnsrp;
287 	char *cp;
288 	Query query;
289 
290 	if(debug)
291 		dnslog("[%d] dnresolve1 %s %d %d", getpid(), name, type, class);
292 
293 	/* only class Cin implemented so far */
294 	if(class != Cin)
295 		return nil;
296 
297 	dp = dnlookup(name, class, 1);
298 
299 	/*
300 	 *  Try the cache first
301 	 */
302 	rp = rrlookup(dp, type, OKneg);
303 	if(rp)
304 		if(rp->db){
305 			/* unauthoritative db entries are hints */
306 			if(rp->auth)
307 				return rp;
308 		} else
309 			/* cached entry must still be valid */
310 			if(rp->ttl > now)
311 				/* but Tall entries are special */
312 				if(type != Tall || rp->query == Tall)
313 					return rp;
314 
315 	rrfreelist(rp);
316 
317 	/*
318 	 * try the cache for a canonical name. if found punt
319 	 * since we'll find it during the canonical name search
320 	 * in dnresolve().
321 	 */
322 	if(type != Tcname){
323 		rp = rrlookup(dp, Tcname, NOneg);
324 		rrfreelist(rp);
325 		if(rp)
326 			return nil;
327 	}
328 
329 	queryinit(&query, dp, type, req);
330 
331 	/*
332 	 *  if we're running as just a resolver, query our
333 	 *  designated name servers
334 	 */
335 	if(cfg.resolver){
336 		nsrp = randomize(getdnsservers(class));
337 		if(nsrp != nil) {
338 			query.nsrp = nsrp;
339 			if(netquery(&query, depth+1)){
340 				rrfreelist(nsrp);
341 				querydestroy(&query);
342 				return rrlookup(dp, type, OKneg);
343 			}
344 			rrfreelist(nsrp);
345 		}
346 	}
347 
348 	/*
349  	 *  walk up the domain name looking for
350 	 *  a name server for the domain.
351 	 */
352 	for(cp = name; cp; cp = walkup(cp)){
353 		/*
354 		 *  if this is a local (served by us) domain,
355 		 *  return answer
356 		 */
357 		dbnsrp = randomize(dblookup(cp, class, Tns, 0, 0));
358 		if(dbnsrp && dbnsrp->local){
359 			rp = dblookup(name, class, type, 1, dbnsrp->ttl);
360 			rrfreelist(dbnsrp);
361 			querydestroy(&query);
362 			return rp;
363 		}
364 
365 		/*
366 		 *  if recursion isn't set, just accept local
367 		 *  entries
368 		 */
369 		if(recurse == Dontrecurse){
370 			if(dbnsrp)
371 				rrfreelist(dbnsrp);
372 			continue;
373 		}
374 
375 		/* look for ns in cache */
376 		nsdp = dnlookup(cp, class, 0);
377 		nsrp = nil;
378 		if(nsdp)
379 			nsrp = randomize(rrlookup(nsdp, Tns, NOneg));
380 
381 		/* if the entry timed out, ignore it */
382 		if(nsrp && nsrp->ttl < now){
383 			rrfreelist(nsrp);
384 			nsrp = nil;
385 		}
386 
387 		if(nsrp){
388 			rrfreelist(dbnsrp);
389 
390 			/* query the name servers found in cache */
391 			query.nsrp = nsrp;
392 			if(netquery(&query, depth+1)){
393 				rrfreelist(nsrp);
394 				querydestroy(&query);
395 				return rrlookup(dp, type, OKneg);
396 			}
397 			rrfreelist(nsrp);
398 			continue;
399 		}
400 
401 		/* use ns from db */
402 		if(dbnsrp){
403 			/* try the name servers found in db */
404 			query.nsrp = dbnsrp;
405 			if(netquery(&query, depth+1)){
406 				/* we got an answer */
407 				rrfreelist(dbnsrp);
408 				querydestroy(&query);
409 				return rrlookup(dp, type, NOneg);
410 			}
411 			rrfreelist(dbnsrp);
412 		}
413 	}
414 	querydestroy(&query);
415 
416 	/* settle for a non-authoritative answer */
417 	rp = rrlookup(dp, type, OKneg);
418 	if(rp)
419 		return rp;
420 
421 	/* noone answered.  try the database, we might have a chance. */
422 	return dblookup(name, class, type, 0, 0);
423 }
424 
425 /*
426  *  walk a domain name one element to the right.
427  *  return a pointer to that element.
428  *  in other words, return a pointer to the parent domain name.
429  */
430 char*
431 walkup(char *name)
432 {
433 	char *cp;
434 
435 	cp = strchr(name, '.');
436 	if(cp)
437 		return cp+1;
438 	else if(*name)
439 		return "";
440 	else
441 		return 0;
442 }
443 
444 /*
445  *  Get a udpport for requests and replies.  Put the port
446  *  into "headers" mode.
447  */
448 static char *hmsg = "headers";
449 static char *ohmsg = "oldheaders";
450 
451 int
452 udpport(char *mtpt)
453 {
454 	int fd, ctl;
455 	char ds[64], adir[64];
456 
457 	/* get a udp port */
458 	snprint(ds, sizeof ds, "%s/udp!*!0", (mtpt? mtpt: "/net"));
459 	ctl = announce(ds, adir);
460 	if(ctl < 0){
461 		/* warning("can't get udp port"); */
462 		return -1;
463 	}
464 
465 	/* turn on header style interface */
466 	if(write(ctl, hmsg, strlen(hmsg)) , 0){
467 		close(ctl);
468 		warning(hmsg);
469 		return -1;
470 	}
471 	write(ctl, ohmsg, strlen(ohmsg));
472 
473 	/* grab the data file */
474 	snprint(ds, sizeof ds, "%s/data", adir);
475 	fd = open(ds, ORDWR);
476 	close(ctl);
477 	if(fd < 0)
478 		warning("can't open udp port %s: %r", ds);
479 	return fd;
480 }
481 
482 /* generate a DNS UDP query packet */
483 int
484 mkreq(DN *dp, int type, uchar *buf, int flags, ushort reqno)
485 {
486 	DNSmsg m;
487 	int len;
488 	OUdphdr *uh = (OUdphdr*)buf;
489 
490 	/* stuff port number into output buffer */
491 	memset(uh, 0, sizeof *uh);
492 	hnputs(uh->rport, 53);
493 
494 	/* make request and convert it to output format */
495 	memset(&m, 0, sizeof m);
496 	m.flags = flags;
497 	m.id = reqno;
498 	m.qd = rralloc(type);
499 	m.qd->owner = dp;
500 	m.qd->type = type;
501 	len = convDNS2M(&m, &buf[OUdphdrsize], Maxudp);
502 	rrfree(m.qd);
503 	return len;
504 }
505 
506 /* for alarms in readreply */
507 static void
508 ding(void *x, char *msg)
509 {
510 	USED(x);
511 	if(strcmp(msg, "alarm") == 0)
512 		noted(NCONT);
513 	else
514 		noted(NDFLT);
515 }
516 
517 static void
518 freeanswers(DNSmsg *mp)
519 {
520 	rrfreelist(mp->qd);
521 	rrfreelist(mp->an);
522 	rrfreelist(mp->ns);
523 	rrfreelist(mp->ar);
524 	mp->qd = mp->an = mp->ns = mp->ar = nil;
525 }
526 
527 /* sets srcip */
528 static int
529 readnet(Query *qp, int medium, uchar *ibuf, ulong endtime, uchar **replyp,
530 	uchar *srcip)
531 {
532 	int len, fd;
533 	uchar *reply;
534 	uchar lenbuf[2];
535 
536 	/* timed read of reply */
537 	alarm((endtime - time(nil)) * 1000);
538 	reply = ibuf;
539 	len = -1;			/* pessimism */
540 	memset(srcip, 0, IPaddrlen);
541 	if (medium == Udp) {
542 		if (qp->udpfd <= 0)
543 			dnslog("readnet: qp->udpfd closed");
544 		else {
545 			len = read(qp->udpfd, ibuf, OUdphdrsize+Maxudpin);
546 			if (len >= IPaddrlen)
547 				memmove(srcip, ibuf, IPaddrlen);
548 			if (len >= OUdphdrsize) {
549 				len   -= OUdphdrsize;
550 				reply += OUdphdrsize;
551 			}
552 		}
553 	} else {
554 		if (!qp->tcpset)
555 			dnslog("readnet: tcp params not set");
556 		fd = qp->tcpfd;
557 		if (fd <= 0)
558 			dnslog("readnet: %s: tcp fd unset for dest %I",
559 				qp->dp->name, qp->tcpip);
560 		else if (readn(fd, lenbuf, 2) != 2) {
561 			dnslog("readnet: short read of tcp size from %I",
562 				qp->tcpip);
563 			/*
564 			 * probably a time-out; demote the ns.
565 			 * actually, the problem may be the query, not the ns.
566 			 */
567 			addslug(qp->tcpip);
568 		} else {
569 			len = lenbuf[0]<<8 | lenbuf[1];
570 			if (readn(fd, ibuf, len) != len) {
571 				dnslog("readnet: short read of tcp data from %I",
572 					qp->tcpip);
573 				/* probably a time-out; demote the ns */
574 				addslug(qp->tcpip);
575 				len = -1;
576 			}
577 		}
578 		memmove(srcip, qp->tcpip, IPaddrlen);
579 	}
580 	alarm(0);
581 	*replyp = reply;
582 	return len;
583 }
584 
585 /*
586  *  read replies to a request and remember the rrs in the answer(s).
587  *  ignore any of the wrong type.
588  *  wait at most until endtime.
589  */
590 static int
591 readreply(Query *qp, int medium, ushort req, uchar *ibuf, DNSmsg *mp,
592 	ulong endtime)
593 {
594 	int len = -1, rv;
595 	char *err;
596 	uchar *reply;
597 	uchar srcip[IPaddrlen];
598 	RR *rp;
599 
600 	notify(ding);
601 
602 	queryck(qp);
603 	rv = 0;
604 	memset(mp, 0, sizeof *mp);
605 	if (time(nil) >= endtime)
606 		return -1;		/* timed out before we started */
607 
608 	for (; time(nil) < endtime &&
609 	    (len = readnet(qp, medium, ibuf, endtime, &reply, srcip)) >= 0;
610 	    freeanswers(mp)){
611 		/* convert into internal format  */
612 		memset(mp, 0, sizeof *mp);
613 		err = convM2DNS(reply, len, mp, nil);
614 		if (mp->flags & Ftrunc) {
615 //			dnslog("readreply: %s: truncated reply, len %d from %I",
616 //				qp->dp->name, len, srcip);
617 			/* notify the caller to retry the query via tcp. */
618 			return -1;
619 		} else if(err){
620 			dnslog("readreply: %s: input err, len %d: %s: %I",
621 				qp->dp->name, len, err, srcip);
622 			free(err);
623 			continue;
624 		}
625 		if (err)
626 			free(err);
627 		if(debug)
628 			logreply(qp->req->id, srcip, mp);
629 
630 		/* answering the right question? */
631 		if(mp->id != req)
632 			dnslog("%d: id %d instead of %d: %I", qp->req->id,
633 				mp->id, req, srcip);
634 		else if(mp->qd == 0)
635 			dnslog("%d: no question RR: %I", qp->req->id, srcip);
636 		else if(mp->qd->owner != qp->dp)
637 			dnslog("%d: owner %s instead of %s: %I", qp->req->id,
638 				mp->qd->owner->name, qp->dp->name, srcip);
639 		else if(mp->qd->type != qp->type)
640 			dnslog("%d: qp->type %d instead of %d: %I",
641 				qp->req->id, mp->qd->type, qp->type, srcip);
642 		else {
643 			/* remember what request this is in answer to */
644 			for(rp = mp->an; rp; rp = rp->next)
645 				rp->query = qp->type;
646 			return rv;
647 		}
648 	}
649 	if (time(nil) >= endtime)
650 		addslug(srcip);
651 	else
652 		dnslog("readreply: %s: %I read error or eof (returned %d)",
653 			qp->dp->name, srcip, len);
654 	return -1;
655 }
656 
657 /*
658  *	return non-0 if first list includes second list
659  */
660 int
661 contains(RR *rp1, RR *rp2)
662 {
663 	RR *trp1, *trp2;
664 
665 	for(trp2 = rp2; trp2; trp2 = trp2->next){
666 		for(trp1 = rp1; trp1; trp1 = trp1->next)
667 			if(trp1->type == trp2->type)
668 			if(trp1->host == trp2->host)
669 			if(trp1->owner == trp2->owner)
670 				break;
671 		if(trp1 == nil)
672 			return 0;
673 	}
674 	return 1;
675 }
676 
677 
678 /*
679  *  return multicast version if any
680  */
681 int
682 ipisbm(uchar *ip)
683 {
684 	if(isv4(ip)){
685 		if (ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0 ||
686 		    ipcmp(ip, IPv4bcast) == 0)
687 			return 4;
688 	} else
689 		if(ip[0] == 0xff)
690 			return 6;
691 	return 0;
692 }
693 
694 /*
695  *  Get next server address
696  */
697 static int
698 serveraddrs(Query *qp, int nd, int depth)
699 {
700 	RR *rp, *arp, *trp;
701 	Dest *cur;
702 
703 	if(nd >= Maxdest)
704 		return 0;
705 
706 	/*
707 	 *  look for a server whose address we already know.
708 	 *  if we find one, mark it so we ignore this on
709 	 *  subsequent passes.
710 	 */
711 	arp = 0;
712 	for(rp = qp->nsrp; rp; rp = rp->next){
713 		assert(rp->magic == RRmagic);
714 		if(rp->marker)
715 			continue;
716 		arp = rrlookup(rp->host, Ta, NOneg);
717 		if(arp){
718 			rp->marker = 1;
719 			break;
720 		}
721 		arp = dblookup(rp->host->name, Cin, Ta, 0, 0);
722 		if(arp){
723 			rp->marker = 1;
724 			break;
725 		}
726 	}
727 
728 	/*
729 	 *  if the cache and database lookup didn't find any new
730 	 *  server addresses, try resolving one via the network.
731 	 *  Mark any we try to resolve so we don't try a second time.
732 	 */
733 	if(arp == 0)
734 		for(rp = qp->nsrp; rp; rp = rp->next){
735 			if(rp->marker)
736 				continue;
737 			rp->marker = 1;
738 
739 			/*
740 			 *  avoid loops looking up a server under itself
741 			 */
742 			if(subsume(rp->owner->name, rp->host->name))
743 				continue;
744 
745 			arp = dnresolve(rp->host->name, Cin, Ta, qp->req, 0,
746 				depth+1, Recurse, 1, 0);
747 			rrfreelist(rrremneg(&arp));
748 			if(arp)
749 				break;
750 		}
751 
752 	/* use any addresses that we found */
753 	for(trp = arp; trp && nd < Maxdest; trp = trp->next){
754 		cur = &qp->dest[nd];
755 		parseip(cur->a, trp->ip->name);
756 		/*
757 		 * straddling servers can reject all nameservers if they are all
758 		 * inside, so be sure to list at least one outside ns at
759 		 * the end of the ns list in /lib/ndb for `dom='.
760 		 */
761 		if (ipisbm(cur->a) ||
762 		    cfg.straddle && !insideaddr(qp->dp->name) && insidens(cur->a))
763 			continue;
764 		cur->nx = 0;
765 		cur->s = trp->owner;
766 		cur->code = Rtimeout;
767 		nd++;
768 	}
769 	rrfreelist(arp);
770 	return nd;
771 }
772 
773 /*
774  *  cache negative responses
775  */
776 static void
777 cacheneg(DN *dp, int type, int rcode, RR *soarr)
778 {
779 	RR *rp;
780 	DN *soaowner;
781 	ulong ttl;
782 
783 	/* no cache time specified, don't make anything up */
784 	if(soarr != nil){
785 		if(soarr->next != nil){
786 			rrfreelist(soarr->next);
787 			soarr->next = nil;
788 		}
789 		soaowner = soarr->owner;
790 	} else
791 		soaowner = nil;
792 
793 	/* the attach can cause soarr to be freed so mine it now */
794 	if(soarr != nil && soarr->soa != nil)
795 		ttl = soarr->soa->minttl+now;
796 	else
797 		ttl = 5*Min;
798 
799 	/* add soa and negative RR to the database */
800 	rrattach(soarr, 1);
801 
802 	rp = rralloc(type);
803 	rp->owner = dp;
804 	rp->negative = 1;
805 	rp->negsoaowner = soaowner;
806 	rp->negrcode = rcode;
807 	rp->ttl = ttl;
808 	rrattach(rp, 1);
809 }
810 
811 static int
812 setdestoutns(Dest *p, int n)
813 {
814 	uchar *outns = outsidens(n);
815 
816 	destck(p);
817 	destinit(p);
818 	if (outns == nil) {
819 		if (n == 0)
820 			dnslog("[%d] no outside-ns in ndb", getpid());
821 		return -1;
822 	}
823 	memmove(p->a, outns, sizeof p->a);
824 	p->s = dnlookup("outside-ns-ips", Cin, 1);
825 	return 0;
826 }
827 
828 /*
829  * issue query via UDP or TCP as appropriate.
830  * for TCP, returns with qp->tcpip set from udppkt header.
831  */
832 static int
833 mydnsquery(Query *qp, int medium, uchar *udppkt, int len)
834 {
835 	int rv = -1;
836 	char *domain;
837 	char conndir[40];
838 	NetConnInfo *nci;
839 
840 	queryck(qp);
841 	switch (medium) {
842 	case Udp:
843 		if (qp->udpfd <= 0)
844 			dnslog("mydnsquery: qp->udpfd closed");
845 		else {
846 			if (write(qp->udpfd, udppkt, len+OUdphdrsize) !=
847 			    len+OUdphdrsize)
848 				warning("sending udp msg %r");
849 			rv = 0;
850 		}
851 		break;
852 	case Tcp:
853 		/* send via TCP & keep fd around for reply */
854 		domain = smprint("%I", udppkt);
855 		alarm(10*1000);
856 		qp->tcpfd = rv = dial(netmkaddr(domain, "tcp", "dns"), nil,
857 			conndir, &qp->tcpctlfd);
858 		alarm(0);
859 		if (qp->tcpfd < 0) {
860 			dnslog("can't dial tcp!%s!dns: %r", domain);
861 			addslug(udppkt);
862 		} else {
863 			uchar belen[2];
864 
865 			nci = getnetconninfo(conndir, qp->tcpfd);
866 			if (nci) {
867 				parseip(qp->tcpip, nci->rsys);
868 				freenetconninfo(nci);
869 			} else
870 				dnslog("mydnsquery: getnetconninfo failed");
871 			qp->tcpset = 1;
872 
873 			belen[0] = len >> 8;
874 			belen[1] = len;
875 			if (write(qp->tcpfd, belen, 2) != 2 ||
876 			    write(qp->tcpfd, udppkt + OUdphdrsize, len) != len)
877 				warning("sending tcp msg %r");
878 		}
879 		free(domain);
880 		break;
881 	default:
882 		sysfatal("mydnsquery: bad medium");
883 	}
884 	return rv;
885 }
886 
887 /*
888  * send query to all UDP destinations or one TCP destination,
889  * taken from obuf (udp packet) header
890  */
891 static int
892 xmitquery(Query *qp, int medium, int depth, uchar *obuf, int inns, int len)
893 {
894 	int j, n;
895 	char buf[32];
896 	Dest *p;
897 
898 	queryck(qp);
899 	if(time(nil) >= qp->req->aborttime)
900 		return -1;
901 
902 	/*
903 	 * get a nameserver address if we need one.
904 	 * serveraddrs populates qp->dest.
905 	 */
906 	p = qp->dest;
907 	destck(p);
908 	if (qp->ndest < 0 || qp->ndest > Maxdest)
909 		dnslog("qp->ndest %d out of range", qp->ndest);
910 	if (qp->ndest > qp->curdest - p)
911 		qp->curdest = &qp->dest[serveraddrs(qp, qp->curdest - p, depth)];
912 	destck(qp->curdest);
913 
914 	/* no servers, punt */
915 	if (qp->curdest == qp->dest)
916 		if (cfg.straddle && cfg.inside) {
917 			/* get ips of "outside-ns-ips" */
918 			p = qp->curdest = qp->dest;
919 			for(n = 0; n < Maxdest; n++, qp->curdest++)
920 				if (setdestoutns(qp->curdest, n) < 0)
921 					break;
922 		} else {
923 			/* it's probably just a bogus domain, don't log it */
924 			// dnslog("xmitquery: %s: no nameservers", qp->dp->name);
925 			return -1;
926 		}
927 
928 	/* send to first 'qp->ndest' destinations */
929 	j = 0;
930 	if (medium == Tcp) {
931 		j++;
932 		queryck(qp);
933 		assert(qp->dp);
934 		procsetname("tcp %sside query for %s %s", (inns? "in": "out"),
935 			qp->dp->name, rrname(qp->type, buf, sizeof buf));
936 		mydnsquery(qp, medium, obuf, len); /* sets qp->tcpip from obuf */
937 		if(debug)
938 			logsend(qp->req->id, depth, qp->tcpip, "", qp->dp->name,
939 				qp->type);
940 	} else
941 		for(; p < &qp->dest[qp->ndest] && p < qp->curdest; p++){
942 			/* skip destinations we've finished with */
943 			if(p->nx >= Maxtrans)
944 				continue;
945 
946 			j++;
947 
948 			/* exponential backoff of requests */
949 			if((1<<p->nx) > qp->ndest)
950 				continue;
951 
952 			procsetname("udp %sside query to %I/%s %s %s",
953 				(inns? "in": "out"), p->a, p->s->name,
954 				qp->dp->name, rrname(qp->type, buf, sizeof buf));
955 			if(debug)
956 				logsend(qp->req->id, depth, p->a, p->s->name,
957 					qp->dp->name, qp->type);
958 
959 			/* fill in UDP destination addr & send it */
960 			memmove(obuf, p->a, sizeof p->a);
961 			mydnsquery(qp, medium, obuf, len);
962 			p->nx++;
963 		}
964 	if(j == 0) {
965 		// dnslog("xmitquery: %s: no destinations left", qp->dp->name);
966 		return -1;
967 	}
968 	return 0;
969 }
970 
971 static int
972 procansw(Query *qp, DNSmsg *mp, uchar *srcip, int depth, Dest *p)
973 {
974 	int rv;
975 	char buf[32];
976 	DN *ndp;
977 	Query nquery;
978 	RR *tp, *soarr;
979 
980 	/* ignore any error replies */
981 	if((mp->flags & Rmask) == Rserver){
982 		rrfreelist(mp->qd);
983 		rrfreelist(mp->an);
984 		rrfreelist(mp->ar);
985 		rrfreelist(mp->ns);
986 		if(p != qp->curdest)
987 			p->code = Rserver;
988 		return -1;
989 	}
990 
991 	/* ignore any bad delegations */
992 	if(mp->ns && baddelegation(mp->ns, qp->nsrp, srcip)){
993 		rrfreelist(mp->ns);
994 		mp->ns = nil;
995 		if(mp->an == nil){
996 			rrfreelist(mp->qd);
997 			rrfreelist(mp->ar);
998 			if(p != qp->curdest)
999 				p->code = Rserver;
1000 			return -1;
1001 		}
1002 	}
1003 
1004 	/* remove any soa's from the authority section */
1005 	soarr = rrremtype(&mp->ns, Tsoa);
1006 
1007 	/* incorporate answers */
1008 	if(mp->an)
1009 		rrattach(mp->an, (mp->flags & Fauth) != 0);
1010 	if(mp->ar)
1011 		rrattach(mp->ar, 0);
1012 	if(mp->ns){
1013 		ndp = mp->ns->owner;
1014 		rrattach(mp->ns, 0);
1015 	} else
1016 		ndp = nil;
1017 
1018 	/* free the question */
1019 	if(mp->qd)
1020 		rrfreelist(mp->qd);
1021 
1022 	/*
1023 	 *  Any reply from an authoritative server,
1024 	 *  or a positive reply terminates the search
1025 	 */
1026 	if(mp->an != nil || (mp->flags & Fauth)){
1027 		if(mp->an == nil && (mp->flags & Rmask) == Rname)
1028 			qp->dp->respcode = Rname;
1029 		else
1030 			qp->dp->respcode = 0;
1031 
1032 		/*
1033 		 *  cache any negative responses, free soarr
1034 		 */
1035 		if((mp->flags & Fauth) && mp->an == nil)
1036 			cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
1037 		else
1038 			rrfreelist(soarr);
1039 		return 1;
1040 	}
1041 	rrfreelist(soarr);
1042 
1043 	/*
1044 	 *  if we've been given better name servers,
1045 	 *  recurse.  we're called from udpquery, called from
1046 	 *  netquery, which current holds qp->dp->querylck,
1047 	 *  so release it now and acquire it upon return.
1048 	 */
1049 	if(!mp->ns)
1050 		return 0;
1051 	tp = rrlookup(ndp, Tns, NOneg);
1052 	if(contains(qp->nsrp, tp)){
1053 		rrfreelist(tp);
1054 		return 0;
1055 	}
1056 	procsetname("recursive query for %s %s", qp->dp->name,
1057 		rrname(qp->type, buf, sizeof buf));
1058 //	qunlock(&qp->dp->querylck);
1059 
1060 	queryinit(&nquery, qp->dp, qp->type, qp->req);
1061 	nquery.nsrp = tp;
1062 	rv = netquery(&nquery, depth+1);
1063 
1064 //	qlock(&qp->dp->querylck);
1065 	rrfreelist(tp);
1066 	querydestroy(&nquery);
1067 	return rv;
1068 }
1069 
1070 /*
1071  * send a query via tcp to a single address (from ibuf's udp header)
1072  * and read the answer(s) into mp->an.
1073  */
1074 static int
1075 tcpquery(Query *qp, DNSmsg *mp, int depth, uchar *ibuf, uchar *obuf, int len,
1076 	int waitsecs, int inns, ushort req)
1077 {
1078 	int rv = 0;
1079 	ulong endtime;
1080 
1081 	endtime = time(nil) + waitsecs;
1082 	if(endtime > qp->req->aborttime)
1083 		endtime = qp->req->aborttime;
1084 
1085 	dnslog("%s: udp reply truncated; retrying query via tcp to %I",
1086 		qp->dp->name, qp->tcpip);
1087 
1088 	qlock(&qp->tcplock);
1089 	memmove(obuf, ibuf, IPaddrlen);		/* send back to respondent */
1090 	/* sets qp->tcpip from obuf's udp header */
1091 	if (xmitquery(qp, Tcp, depth, obuf, inns, len) < 0 ||
1092 	    readreply(qp, Tcp, req, ibuf, mp, endtime) < 0)
1093 		rv = -1;
1094 	if (qp->tcpfd > 0) {
1095 		hangup(qp->tcpctlfd);
1096 		close(qp->tcpctlfd);
1097 		close(qp->tcpfd);
1098 	}
1099 	qp->tcpfd = qp->tcpctlfd = -1;
1100 	qunlock(&qp->tcplock);
1101 	return rv;
1102 }
1103 
1104 /*
1105  *  query name servers.  If the name server returns a pointer to another
1106  *  name server, recurse.
1107  */
1108 static int
1109 netquery1(Query *qp, int depth, uchar *ibuf, uchar *obuf, int waitsecs, int inns)
1110 {
1111 	int ndest, len, replywaits, rv;
1112 	ushort req;
1113 	ulong endtime;
1114 	char buf[12];
1115 	uchar srcip[IPaddrlen];
1116 	DNSmsg m;
1117 	Dest *p, *np;
1118 	Dest dest[Maxdest];
1119 
1120 	/* pack request into a udp message */
1121 	req = rand();
1122 	len = mkreq(qp->dp, qp->type, obuf, Frecurse|Oquery, req);
1123 
1124 	/* no server addresses yet */
1125 	queryck(qp);
1126 	for (p = dest; p < dest + nelem(dest); p++)
1127 		destinit(p);
1128 	qp->curdest = qp->dest = dest;
1129 
1130 	/*
1131 	 *  transmit udp requests and wait for answers.
1132 	 *  at most Maxtrans attempts to each address.
1133 	 *  each cycle send one more message than the previous.
1134 	 *  retry a query via tcp if its response is truncated.
1135 	 */
1136 	for(ndest = 1; ndest < Maxdest; ndest++){
1137 		qp->ndest = ndest;
1138 		qp->tcpset = 0;
1139 		if (xmitquery(qp, Udp, depth, obuf, inns, len) < 0)
1140 			break;
1141 
1142 		endtime = time(nil) + waitsecs;
1143 		if(endtime > qp->req->aborttime)
1144 			endtime = qp->req->aborttime;
1145 
1146 		for(replywaits = 0; replywaits < ndest; replywaits++){
1147 			procsetname("reading %sside reply from %s%I for %s %s",
1148 				(inns? "in": "out"),
1149 				(isaslug(qp->tcpip)? "sluggard ": ""), obuf,
1150 				qp->dp->name, rrname(qp->type, buf, sizeof buf));
1151 
1152 			/* read udp answer */
1153 			if (readreply(qp, Udp, req, ibuf, &m, endtime) >= 0)
1154 				memmove(srcip, ibuf, IPaddrlen);
1155 			else if (!(m.flags & Ftrunc)) {
1156 				addslug(ibuf);
1157 				break;		/* timed out on this dest */
1158 			} else {
1159 				/* whoops, it was truncated! ask again via tcp */
1160 				rv = tcpquery(qp, &m, depth, ibuf, obuf, len,
1161 					waitsecs, inns, req);
1162 				if (rv < 0)
1163 					break;		/* failed via tcp too */
1164 				memmove(srcip, qp->tcpip, IPaddrlen);
1165 			}
1166 
1167 			/* find responder */
1168 			// dnslog("netquery1 got reply from %I", srcip);
1169 			for(p = qp->dest; p < qp->curdest; p++)
1170 				if(memcmp(p->a, srcip, sizeof p->a) == 0)
1171 					break;
1172 
1173 			/* remove all addrs of responding server from list */
1174 			for(np = qp->dest; np < qp->curdest; np++)
1175 				if(np->s == p->s)
1176 					p->nx = Maxtrans;
1177 
1178 			rv = procansw(qp, &m, srcip, depth, p);
1179 			if (rv > 0)
1180 				return rv;
1181 		}
1182 	}
1183 
1184 	/* if all servers returned failure, propagate it */
1185 	qp->dp->respcode = Rserver;
1186 	for(p = dest; p < qp->curdest; p++) {
1187 		destck(p);
1188 		if(p->code != Rserver)
1189 			qp->dp->respcode = 0;
1190 		p->magic = 0;			/* prevent accidents */
1191 	}
1192 
1193 //	if (qp->dp->respcode)
1194 //		dnslog("netquery1 setting Rserver for %s", qp->dp->name);
1195 
1196 	qp->dest = qp->curdest = nil;		/* prevent accidents */
1197 	return 0;
1198 }
1199 
1200 /*
1201  *  run a command with a supplied fd as standard input
1202  */
1203 char *
1204 system(int fd, char *cmd)
1205 {
1206 	int pid, p, i;
1207 	static Waitmsg msg;
1208 
1209 	if((pid = fork()) == -1)
1210 		sysfatal("fork failed: %r");
1211 	else if(pid == 0){
1212 		dup(fd, 0);
1213 		close(fd);
1214 		for (i = 3; i < 200; i++)
1215 			close(i);		/* don't leak fds */
1216 		execl("/bin/rc", "rc", "-c", cmd, nil);
1217 		sysfatal("exec rc: %r");
1218 	}
1219 	for(p = waitpid(); p >= 0; p = waitpid())
1220 		if(p == pid)
1221 			return msg.msg;
1222 	return "lost child";
1223 }
1224 
1225 enum { Hurry, Patient, };
1226 enum { Outns, Inns, };
1227 enum { Remntretry = 15, };	/* min. sec.s between remount attempts */
1228 
1229 static int
1230 udpquery(Query *qp, char *mntpt, int depth, int patient, int inns)
1231 {
1232 	int fd, rv = 0;
1233 	long now;
1234 	char *msg;
1235 	uchar *obuf, *ibuf;
1236 	static QLock mntlck;
1237 	static ulong lastmount;
1238 
1239 	/* use alloced buffers rather than ones from the stack */
1240 	// ibuf = emalloc(Maxudpin+OUdphdrsize);
1241 	ibuf = emalloc(64*1024);		/* max. tcp reply size */
1242 	obuf = emalloc(Maxudp+OUdphdrsize);
1243 
1244 	fd = udpport(mntpt);
1245 	while (fd < 0 && cfg.straddle && strcmp(mntpt, "/net.alt") == 0) {
1246 		/* HACK: remount /net.alt */
1247 		now = time(nil);
1248 		if (now < lastmount + Remntretry)
1249 			sleep((lastmount + Remntretry - now)*1000);
1250 		qlock(&mntlck);
1251 		fd = udpport(mntpt);	/* try again under lock */
1252 		if (fd < 0) {
1253 			dnslog("[%d] remounting /net.alt", getpid());
1254 			unmount(nil, "/net.alt");
1255 
1256 			msg = system(open("/dev/null", ORDWR), "outside");
1257 
1258 			lastmount = time(nil);
1259 			if (msg && *msg) {
1260 				dnslog("[%d] can't remount /net.alt: %s",
1261 					getpid(), msg);
1262 				sleep(10*1000);		/* don't spin wildly */
1263 			} else
1264 				fd = udpport(mntpt);
1265 		}
1266 		qunlock(&mntlck);
1267 	}
1268 	if(fd >= 0) {
1269 		qp->req->aborttime = time(nil) + (patient? Maxreqtm: Maxreqtm/2);
1270 		qp->udpfd = fd;
1271 		/* tune; was (patient? 15: 10) */
1272 		rv = netquery1(qp, depth, ibuf, obuf, (patient? 10: 5), inns);
1273 		close(fd);
1274 	} else
1275 		dnslog("can't get udpport for %s query of name %s: %r",
1276 			mntpt, qp->dp->name);
1277 
1278 	free(obuf);
1279 	free(ibuf);
1280 	return rv;
1281 }
1282 
1283 /* look up (dp->name,type) via *nsrp with results in *reqp */
1284 static int
1285 netquery(Query *qp, int depth)
1286 {
1287 	int lock, rv, triedin, inname;
1288 	RR *rp;
1289 
1290 	if(depth > 12)			/* in a recursive loop? */
1291 		return 0;
1292 
1293 	slave(qp->req);
1294 	/*
1295 	 * slave might have forked.  if so, the parent process longjmped to
1296 	 * req->mret; we're usually the child slave, but if there are too
1297 	 * many children already, we're still the same process.
1298 	 */
1299 
1300 	/* don't lock before call to slave so only children can block */
1301 	if (0)
1302 		lock = qp->req->isslave != 0;
1303 	if(0 && lock) {
1304 		procsetname("query lock wait for %s", qp->dp->name);
1305 		/*
1306 		 * don't make concurrent queries for this name.
1307 		 *
1308 		 * this seemed like a good idea, to avoid swamping
1309 		 * an overloaded ns, but in practice, dns processes
1310 		 * pile up quickly and dns becomes unresponsive for a while.
1311 		 */
1312 		qlock(&qp->dp->querylck);
1313 	}
1314 	procsetname("netquery: %s", qp->dp->name);
1315 
1316 	/* prepare server RR's for incremental lookup */
1317 	for(rp = qp->nsrp; rp; rp = rp->next)
1318 		rp->marker = 0;
1319 
1320 	rv = 0;				/* pessimism */
1321 	triedin = 0;
1322 	qp->nsrp = qp->nsrp;
1323 	/*
1324 	 * normal resolvers and servers will just use mntpt for all addresses,
1325 	 * even on the outside.  straddling servers will use mntpt (/net)
1326 	 * for inside addresses and /net.alt for outside addresses,
1327 	 * thus bypassing other inside nameservers.
1328 	 */
1329 	inname = insideaddr(qp->dp->name);
1330 	if (!cfg.straddle || inname) {
1331 		rv = udpquery(qp, mntpt, depth, Hurry, (cfg.inside? Inns: Outns));
1332 		triedin = 1;
1333 	}
1334 
1335 	/*
1336 	 * if we're still looking, are inside, and have an outside domain,
1337 	 * try it on our outside interface, if any.
1338 	 */
1339 	if (rv == 0 && cfg.inside && !inname) {
1340 		if (triedin)
1341 			dnslog(
1342 	   "[%d] netquery: internal nameservers failed for %s; trying external",
1343 				getpid(), qp->dp->name);
1344 
1345 		/* prepare server RR's for incremental lookup */
1346 		for(rp = qp->nsrp; rp; rp = rp->next)
1347 			rp->marker = 0;
1348 
1349 		rv = udpquery(qp, "/net.alt", depth, Patient, Outns);
1350 	}
1351 //	if (rv == 0)		/* could ask /net.alt/dns directly */
1352 //		askoutdns(qp->dp, qp->type);
1353 
1354 	if(0 && lock)
1355 		qunlock(&qp->dp->querylck);
1356 	return rv;
1357 }
1358 
1359 int
1360 seerootns(void)
1361 {
1362 	int rv;
1363 	char root[] = "";
1364 	Request req;
1365 	Query query;
1366 
1367 	memset(&req, 0, sizeof req);
1368 	req.isslave = 1;
1369 	req.aborttime = now + Maxreqtm;
1370 	queryinit(&query, dnlookup(root, Cin, 1), Tns, &req);
1371 	query.nsrp = dblookup(root, Cin, Tns, 0, 0);
1372 	rv = netquery(&query, 0);
1373 	querydestroy(&query);
1374 	return rv;
1375 }
1376