xref: /openbsd-src/usr.sbin/nsd/query.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*
2  * query.c -- nsd(8) the resolver.
3  *
4  * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
5  *
6  * See LICENSE for the license.
7  *
8  */
9 
10 #include "config.h"
11 
12 #include <sys/types.h>
13 #include <sys/socket.h>
14 #include <netinet/in.h>
15 #include <arpa/inet.h>
16 #include <assert.h>
17 #include <ctype.h>
18 #include <errno.h>
19 #include <limits.h>
20 #include <stddef.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <time.h>
25 #include <unistd.h>
26 #include <netdb.h>
27 
28 #include "answer.h"
29 #include "axfr.h"
30 #include "dns.h"
31 #include "dname.h"
32 #include "nsd.h"
33 #include "namedb.h"
34 #include "query.h"
35 #include "util.h"
36 #include "options.h"
37 #include "nsec3.h"
38 #include "tsig.h"
39 
40 /* [Bug #253] Adding unnecessary NS RRset may lead to undesired truncation.
41  * This function determines if the final response packet needs the NS RRset
42  * included. Currently, it will only return negative if QTYPE == DNSKEY|DS.
43  * This way, resolvers won't fallback to TCP unnecessarily when priming
44  * trust anchors.
45  */
46 static int answer_needs_ns(struct query  *query);
47 
48 static int add_rrset(struct query  *query,
49 		     answer_type    *answer,
50 		     rr_section_type section,
51 		     domain_type    *owner,
52 		     rrset_type     *rrset);
53 
54 static void answer_authoritative(struct nsd	  *nsd,
55 				 struct query     *q,
56 				 answer_type      *answer,
57 				 size_t            domain_number,
58 				 int               exact,
59 				 domain_type      *closest_match,
60 				 domain_type      *closest_encloser,
61 				 const dname_type *qname);
62 
63 static void answer_lookup_zone(struct nsd *nsd, struct query *q,
64 			       answer_type *answer, size_t domain_number,
65 			       int exact, domain_type *closest_match,
66 			       domain_type *closest_encloser,
67 			       const dname_type *qname);
68 
69 void
70 query_put_dname_offset(struct query *q, domain_type *domain, uint16_t offset)
71 {
72 	assert(q);
73 	assert(domain);
74 	assert(domain->number > 0);
75 
76 	if (offset > MAX_COMPRESSION_OFFSET)
77 		return;
78 	if (q->compressed_dname_count >= MAX_COMPRESSED_DNAMES)
79 		return;
80 
81 	q->compressed_dname_offsets[domain->number] = offset;
82 	q->compressed_dnames[q->compressed_dname_count] = domain;
83 	++q->compressed_dname_count;
84 }
85 
86 void
87 query_clear_dname_offsets(struct query *q, size_t max_offset)
88 {
89 	while (q->compressed_dname_count > 0
90 	       && (q->compressed_dname_offsets[q->compressed_dnames[q->compressed_dname_count - 1]->number]
91 		   >= max_offset))
92 	{
93 		q->compressed_dname_offsets[q->compressed_dnames[q->compressed_dname_count - 1]->number] = 0;
94 		--q->compressed_dname_count;
95 	}
96 }
97 
98 void
99 query_clear_compression_tables(struct query *q)
100 {
101 	uint16_t i;
102 
103 	for (i = 0; i < q->compressed_dname_count; ++i) {
104 		assert(q->compressed_dnames);
105 		q->compressed_dname_offsets[q->compressed_dnames[i]->number] = 0;
106 	}
107 	q->compressed_dname_count = 0;
108 }
109 
110 void
111 query_add_compression_domain(struct query *q, domain_type *domain, uint16_t offset)
112 {
113 	while (domain->parent) {
114 		DEBUG(DEBUG_NAME_COMPRESSION, 2,
115 		      (LOG_INFO, "query dname: %s, number: %lu, offset: %u\n",
116 		       domain_to_string(domain),
117 		       (unsigned long) domain->number,
118 		       offset));
119 		query_put_dname_offset(q, domain, offset);
120 		offset += label_length(dname_name(domain_dname(domain))) + 1;
121 		domain = domain->parent;
122 	}
123 }
124 
125 /*
126  * Generate an error response with the specified RCODE.
127  */
128 query_state_type
129 query_error (struct query *q, nsd_rc_type rcode)
130 {
131 	if (rcode == NSD_RC_DISCARD) {
132 		return QUERY_DISCARDED;
133 	}
134 
135 	buffer_clear(q->packet);
136 
137 	QR_SET(q->packet);	   /* This is an answer.  */
138 	AD_CLR(q->packet);
139 	RCODE_SET(q->packet, (int) rcode); /* Error code.  */
140 
141 	/* Truncate the question as well... */
142 	QDCOUNT_SET(q->packet, 0);
143 	ANCOUNT_SET(q->packet, 0);
144 	NSCOUNT_SET(q->packet, 0);
145 	ARCOUNT_SET(q->packet, 0);
146 	buffer_set_position(q->packet, QHEADERSZ);
147 	return QUERY_PROCESSED;
148 }
149 
150 static int
151 query_ratelimit_err(nsd_type* nsd)
152 {
153 	time_t now = time(NULL);
154 	if(nsd->err_limit_time == now) {
155 		/* see if limit is exceeded for this second */
156 		if(nsd->err_limit_count++ > ERROR_RATELIMIT)
157 			return 1;
158 	} else {
159 		/* new second, new limits */
160 		nsd->err_limit_time = now;
161 		nsd->err_limit_count = 1;
162 	}
163 	return 0;
164 }
165 
166 static query_state_type
167 query_formerr (struct query *query, nsd_type* nsd)
168 {
169 	int opcode = OPCODE(query->packet);
170 	if(query_ratelimit_err(nsd))
171 		return QUERY_DISCARDED;
172 	FLAGS_SET(query->packet, FLAGS(query->packet) & 0x0100U);
173 			/* Preserve the RD flag. Clear the rest. */
174 	OPCODE_SET(query->packet, opcode);
175 	return query_error(query, NSD_RC_FORMAT);
176 }
177 
178 static void
179 query_cleanup(void *data)
180 {
181 	query_type *query = (query_type *) data;
182 	region_destroy(query->region);
183 }
184 
185 query_type *
186 query_create(region_type *region, uint16_t *compressed_dname_offsets,
187 	size_t compressed_dname_size)
188 {
189 	query_type *query
190 		= (query_type *) region_alloc_zero(region, sizeof(query_type));
191 	/* create region with large block size, because the initial chunk
192 	   saves many mallocs in the server */
193 	query->region = region_create_custom(xalloc, free, 16384, 16384/8, 32, 0);
194 	query->compressed_dname_offsets = compressed_dname_offsets;
195 	query->packet = buffer_create(region, QIOBUFSZ);
196 	region_add_cleanup(region, query_cleanup, query);
197 	query->compressed_dname_offsets_size = compressed_dname_size;
198 	tsig_create_record(&query->tsig, region);
199 	query->tsig_prepare_it = 1;
200 	query->tsig_update_it = 1;
201 	query->tsig_sign_it = 1;
202 	return query;
203 }
204 
205 void
206 query_reset(query_type *q, size_t maxlen, int is_tcp)
207 {
208 	/*
209 	 * As long as less than 4Kb (region block size) has been used,
210 	 * this call to free_all is free, the block is saved for re-use,
211 	 * so no malloc() or free() calls are done.
212 	 * at present use of the region is for:
213 	 *   o query qname dname_type (255 max).
214 	 *   o wildcard expansion domain_type (7*ptr+u32+2bytes)+(5*ptr nsec3)
215 	 *   o wildcard expansion for additional section domain_type.
216 	 *   o nsec3 hashed name(s) (3 dnames for a nonexist_proof,
217 	 *     one proof per wildcard and for nx domain).
218 	 */
219 	region_free_all(q->region);
220 	q->addrlen = sizeof(q->addr);
221 	q->maxlen = maxlen;
222 	q->reserved_space = 0;
223 	buffer_clear(q->packet);
224 	edns_init_record(&q->edns);
225 	tsig_init_record(&q->tsig, NULL, NULL);
226 	q->tsig_prepare_it = 1;
227 	q->tsig_update_it = 1;
228 	q->tsig_sign_it = 1;
229 	q->tcp = is_tcp;
230 	q->qname = NULL;
231 	q->qtype = 0;
232 	q->qclass = 0;
233 	q->zone = NULL;
234 	q->opcode = 0;
235 	q->cname_count = 0;
236 	q->delegation_domain = NULL;
237 	q->delegation_rrset = NULL;
238 	q->compressed_dname_count = 0;
239 	q->number_temporary_domains = 0;
240 
241 	q->axfr_is_done = 0;
242 	q->axfr_zone = NULL;
243 	q->axfr_current_domain = NULL;
244 	q->axfr_current_rrset = NULL;
245 	q->axfr_current_rr = 0;
246 
247 #ifdef RATELIMIT
248 	q->wildcard_domain = NULL;
249 #endif
250 }
251 
252 /* get a temporary domain number (or 0=failure) */
253 static domain_type*
254 query_get_tempdomain(struct query *q)
255 {
256 	static domain_type d[EXTRA_DOMAIN_NUMBERS];
257 	if(q->number_temporary_domains >= EXTRA_DOMAIN_NUMBERS)
258 		return 0;
259 	q->number_temporary_domains ++;
260 	memset(&d[q->number_temporary_domains-1], 0, sizeof(domain_type));
261 	d[q->number_temporary_domains-1].number = q->compressed_dname_offsets_size +
262 		q->number_temporary_domains - 1;
263 	return &d[q->number_temporary_domains-1];
264 }
265 
266 static void
267 query_addtxt(struct query  *q,
268 	     const uint8_t *dname,
269 	     uint16_t       klass,
270 	     uint32_t       ttl,
271 	     const char    *txt)
272 {
273 	size_t txt_length = strlen(txt);
274 	uint8_t len = (uint8_t) txt_length;
275 
276 	assert(txt_length <= UCHAR_MAX);
277 
278 	/* Add the dname */
279 	if (dname >= buffer_begin(q->packet)
280 	    && dname <= buffer_current(q->packet))
281 	{
282 		buffer_write_u16(q->packet,
283 				 0xc000 | (dname - buffer_begin(q->packet)));
284 	} else {
285 		buffer_write(q->packet, dname + 1, *dname);
286 	}
287 
288 	buffer_write_u16(q->packet, TYPE_TXT);
289 	buffer_write_u16(q->packet, klass);
290 	buffer_write_u32(q->packet, ttl);
291 	buffer_write_u16(q->packet, len + 1);
292 	buffer_write_u8(q->packet, len);
293 	buffer_write(q->packet, txt, len);
294 }
295 
296 /*
297  * Parse the question section of a query.  The normalized query name
298  * is stored in QUERY->name, the class in QUERY->klass, and the type
299  * in QUERY->type.
300  */
301 static int
302 process_query_section(query_type *query)
303 {
304 	uint8_t qnamebuf[MAXDOMAINLEN];
305 
306 	buffer_set_position(query->packet, QHEADERSZ);
307 	/* Lets parse the query name and convert it to lower case.  */
308 	if(!packet_read_query_section(query->packet, qnamebuf,
309 		&query->qtype, &query->qclass))
310 		return 0;
311 	query->qname = dname_make(query->region, qnamebuf, 1);
312 	return 1;
313 }
314 
315 
316 /*
317  * Process an optional EDNS OPT record.  Sets QUERY->EDNS to 0 if
318  * there was no EDNS record, to -1 if there was an invalid or
319  * unsupported EDNS record, and to 1 otherwise.  Updates QUERY->MAXLEN
320  * if the EDNS record specifies a maximum supported response length.
321  *
322  * Return NSD_RC_FORMAT on failure, NSD_RC_OK on success.
323  */
324 static nsd_rc_type
325 process_edns(nsd_type* nsd, struct query *q)
326 {
327 	if (q->edns.status == EDNS_ERROR) {
328 		/* The only error is VERSION not implemented */
329 		return NSD_RC_FORMAT;
330 	}
331 
332 	if (q->edns.status == EDNS_OK) {
333 		/* Only care about UDP size larger than normal... */
334 		if (!q->tcp && q->edns.maxlen > UDP_MAX_MESSAGE_LEN) {
335 			size_t edns_size;
336 #if defined(INET6)
337 			if (q->addr.ss_family == AF_INET6) {
338 				edns_size = nsd->ipv6_edns_size;
339 			} else
340 #endif
341 			edns_size = nsd->ipv4_edns_size;
342 
343 			if (q->edns.maxlen < edns_size) {
344 				q->maxlen = q->edns.maxlen;
345 			} else {
346 				q->maxlen = edns_size;
347 			}
348 
349 #if defined(INET6) && !defined(IPV6_USE_MIN_MTU) && !defined(IPV6_MTU)
350 			/*
351 			 * Use IPv6 minimum MTU to avoid sending
352 			 * packets that are too large for some links.
353 			 * IPv6 will not automatically fragment in
354 			 * this case (unlike IPv4).
355 			 */
356 			if (q->addr.ss_family == AF_INET6
357 			    && q->maxlen > IPV6_MIN_MTU)
358 			{
359 				q->maxlen = IPV6_MIN_MTU;
360 			}
361 #endif
362 		}
363 
364 		/* Strip the OPT resource record off... */
365 		buffer_set_position(q->packet, q->edns.position);
366 		buffer_set_limit(q->packet, q->edns.position);
367 		ARCOUNT_SET(q->packet, ARCOUNT(q->packet) - 1);
368 	}
369 	return NSD_RC_OK;
370 }
371 
372 /*
373  * Processes TSIG.
374  * Sets error when tsig does not verify on the query.
375  */
376 static nsd_rc_type
377 process_tsig(struct query* q)
378 {
379 	if(q->tsig.status == TSIG_ERROR)
380 		return NSD_RC_FORMAT;
381 	if(q->tsig.status == TSIG_OK) {
382 		if(!tsig_from_query(&q->tsig)) {
383 			char a[128];
384 			addr2str(&q->addr, a, sizeof(a));
385 			log_msg(LOG_ERR, "query: bad tsig (%s) for key %s from %s",
386 				tsig_error(q->tsig.error_code),
387 				dname_to_string(q->tsig.key_name, NULL), a);
388 			return NSD_RC_NOTAUTH;
389 		}
390 		buffer_set_limit(q->packet, q->tsig.position);
391 		ARCOUNT_SET(q->packet, ARCOUNT(q->packet) - 1);
392 		tsig_prepare(&q->tsig);
393 		tsig_update(&q->tsig, q->packet, buffer_limit(q->packet));
394 		if(!tsig_verify(&q->tsig)) {
395 			char a[128];
396 			addr2str(&q->addr, a, sizeof(a));
397 			log_msg(LOG_ERR, "query: bad tsig signature for key %s from %s",
398 				dname_to_string(q->tsig.key->name, NULL), a);
399 			return NSD_RC_NOTAUTH;
400 		}
401 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "query good tsig signature for %s",
402 			dname_to_string(q->tsig.key->name, NULL)));
403 	}
404 	return NSD_RC_OK;
405 }
406 
407 /*
408  * Check notify acl and forward to xfrd (or return an error).
409  */
410 static query_state_type
411 answer_notify(struct nsd* nsd, struct query *query)
412 {
413 	int acl_num, acl_num_xfr;
414 	acl_options_t *why;
415 	nsd_rc_type rc;
416 
417 	zone_options_t* zone_opt;
418 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "got notify %s processing acl",
419 		dname_to_string(query->qname, NULL)));
420 
421 	zone_opt = zone_options_find(nsd->options, query->qname);
422 	if(!zone_opt)
423 		return query_error(query, NSD_RC_NXDOMAIN);
424 
425 	if(!nsd->this_child) /* we are in debug mode or something */
426 		return query_error(query, NSD_RC_SERVFAIL);
427 
428 	if(!tsig_find_rr(&query->tsig, query->packet)) {
429 		DEBUG(DEBUG_XFRD,2, (LOG_ERR, "bad tsig RR format"));
430 		return query_error(query, NSD_RC_FORMAT);
431 	}
432 	rc = process_tsig(query);
433 	if(rc != NSD_RC_OK)
434 		return query_error(query, rc);
435 
436 	/* check if it passes acl */
437 	if((acl_num = acl_check_incoming(zone_opt->pattern->allow_notify, query,
438 		&why)) != -1)
439 	{
440 		sig_atomic_t mode = NSD_PASS_TO_XFRD;
441 		int s = nsd->this_child->parent_fd;
442 		uint16_t sz;
443 		uint32_t acl_send = htonl(acl_num);
444 		uint32_t acl_xfr;
445 		size_t pos;
446 
447 		/* Find priority candidate for request XFR. -1 if no match */
448 		acl_num_xfr = acl_check_incoming(
449 			zone_opt->pattern->request_xfr, query, NULL);
450 
451 		acl_xfr = htonl(acl_num_xfr);
452 
453 		assert(why);
454 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "got notify %s passed acl %s %s",
455 			dname_to_string(query->qname, NULL),
456 			why->ip_address_spec,
457 			why->nokey?"NOKEY":
458 			(why->blocked?"BLOCKED":why->key_name)));
459 		sz = buffer_limit(query->packet);
460 		if(buffer_limit(query->packet) > MAX_PACKET_SIZE)
461 			return query_error(query, NSD_RC_SERVFAIL);
462 		/* forward to xfrd for processing
463 		   Note. Blocking IPC I/O, but acl is OK. */
464 		sz = htons(sz);
465 		if(!write_socket(s, &mode, sizeof(mode)) ||
466 			!write_socket(s, &sz, sizeof(sz)) ||
467 			!write_socket(s, buffer_begin(query->packet),
468 				buffer_limit(query->packet)) ||
469 			!write_socket(s, &acl_send, sizeof(acl_send)) ||
470 			!write_socket(s, &acl_xfr, sizeof(acl_xfr))) {
471 			log_msg(LOG_ERR, "error in IPC notify server2main, %s",
472 				strerror(errno));
473 			return query_error(query, NSD_RC_SERVFAIL);
474 		}
475 		if(verbosity >= 1) {
476 			uint32_t serial = 0;
477 			char address[128];
478 			addr2str(&query->addr, address, sizeof(address));
479 			if(packet_find_notify_serial(query->packet, &serial))
480 			  VERBOSITY(1, (LOG_INFO, "notify for %s from %s serial %u",
481 				dname_to_string(query->qname, NULL), address,
482 				(unsigned)serial));
483 			else
484 			  VERBOSITY(1, (LOG_INFO, "notify for %s from %s",
485 				dname_to_string(query->qname, NULL), address));
486 		}
487 
488 		/* create notify reply - keep same query contents */
489 		QR_SET(query->packet);         /* This is an answer.  */
490 		AA_SET(query->packet);	   /* we are authoritative. */
491 		ANCOUNT_SET(query->packet, 0);
492 		NSCOUNT_SET(query->packet, 0);
493 		ARCOUNT_SET(query->packet, 0);
494 		RCODE_SET(query->packet, RCODE_OK); /* Error code.  */
495 		/* position is right after the query */
496 		pos = buffer_position(query->packet);
497 		buffer_clear(query->packet);
498 		buffer_set_position(query->packet, pos);
499 		/* tsig is added in add_additional later (if needed) */
500 		return QUERY_PROCESSED;
501 	}
502 
503 	if (verbosity >= 2) {
504 		char address[128];
505 		addr2str(&query->addr, address, sizeof(address));
506 		VERBOSITY(2, (LOG_INFO, "notify for %s from %s refused, %s%s",
507 			dname_to_string(query->qname, NULL),
508 			address,
509 			why?why->key_name:"no acl matches",
510 			why?why->ip_address_spec:"."));
511 	}
512 
513 	return query_error(query, NSD_RC_REFUSE);
514 }
515 
516 
517 /*
518  * Answer a query in the CHAOS class.
519  */
520 static query_state_type
521 answer_chaos(struct nsd *nsd, query_type *q)
522 {
523 	AA_CLR(q->packet);
524 	switch (q->qtype) {
525 	case TYPE_ANY:
526 	case TYPE_TXT:
527 		if ((q->qname->name_size == 11
528 		     && memcmp(dname_name(q->qname), "\002id\006server", 11) == 0) ||
529 		    (q->qname->name_size ==  15
530 		     && memcmp(dname_name(q->qname), "\010hostname\004bind", 15) == 0))
531 		{
532 			/* Add ID */
533 			query_addtxt(q,
534 				     buffer_begin(q->packet) + QHEADERSZ,
535 				     CLASS_CH,
536 				     0,
537 				     nsd->identity);
538 			ANCOUNT_SET(q->packet, ANCOUNT(q->packet) + 1);
539 		} else if ((q->qname->name_size == 16
540 			    && memcmp(dname_name(q->qname), "\007version\006server", 16) == 0) ||
541 			   (q->qname->name_size == 14
542 			    && memcmp(dname_name(q->qname), "\007version\004bind", 14) == 0))
543 		{
544 			if(!nsd->options->hide_version) {
545 				/* Add version */
546 				query_addtxt(q,
547 				     buffer_begin(q->packet) + QHEADERSZ,
548 				     CLASS_CH,
549 				     0,
550 				     nsd->version);
551 				ANCOUNT_SET(q->packet, ANCOUNT(q->packet) + 1);
552 			} else {
553 				RCODE_SET(q->packet, RCODE_REFUSE);
554 			}
555 		} else {
556 			RCODE_SET(q->packet, RCODE_REFUSE);
557 		}
558 		break;
559 	default:
560 		RCODE_SET(q->packet, RCODE_REFUSE);
561 		break;
562 	}
563 
564 	return QUERY_PROCESSED;
565 }
566 
567 
568 /*
569  * Find the covering NSEC for a non-existent domain name.  Normally
570  * the NSEC will be located at CLOSEST_MATCH, except when it is an
571  * empty non-terminal.  In this case the NSEC may be located at the
572  * previous domain name (in canonical ordering).
573  */
574 static domain_type *
575 find_covering_nsec(domain_type *closest_match,
576 		   zone_type   *zone,
577 		   rrset_type **nsec_rrset)
578 {
579 	assert(closest_match);
580 	assert(nsec_rrset);
581 
582 	/* loop away temporary created domains. For real ones it is &RBTREE_NULL */
583 	while (closest_match->rnode == NULL)
584 		closest_match = closest_match->parent;
585 	while (closest_match) {
586 		*nsec_rrset = domain_find_rrset(closest_match, zone, TYPE_NSEC);
587 		if (*nsec_rrset) {
588 			return closest_match;
589 		}
590 		if (closest_match == zone->apex) {
591 			/* Don't look outside the current zone.  */
592 			return NULL;
593 		}
594 		closest_match = domain_previous(closest_match);
595 	}
596 	return NULL;
597 }
598 
599 
600 struct additional_rr_types
601 {
602 	uint16_t        rr_type;
603 	rr_section_type rr_section;
604 };
605 
606 struct additional_rr_types default_additional_rr_types[] = {
607 	{ TYPE_A, ADDITIONAL_A_SECTION },
608 	{ TYPE_AAAA, ADDITIONAL_AAAA_SECTION },
609 	{ 0, (rr_section_type) 0 }
610 };
611 
612 struct additional_rr_types swap_aaaa_additional_rr_types[] = {
613 	{ TYPE_AAAA, ADDITIONAL_A_SECTION },
614 	{ TYPE_A, ADDITIONAL_AAAA_SECTION },
615 	{ 0, (rr_section_type) 0 }
616 };
617 
618 struct additional_rr_types rt_additional_rr_types[] = {
619 	{ TYPE_A, ADDITIONAL_A_SECTION },
620 	{ TYPE_AAAA, ADDITIONAL_AAAA_SECTION },
621 	{ TYPE_X25, ADDITIONAL_OTHER_SECTION },
622 	{ TYPE_ISDN, ADDITIONAL_OTHER_SECTION },
623 	{ 0, (rr_section_type) 0 }
624 };
625 
626 static void
627 add_additional_rrsets(struct query *query, answer_type *answer,
628 		      rrset_type *master_rrset, size_t rdata_index,
629 		      int allow_glue, struct additional_rr_types types[])
630 {
631 	size_t i;
632 
633 	assert(query);
634 	assert(answer);
635 	assert(master_rrset);
636 	assert(rdata_atom_is_domain(rrset_rrtype(master_rrset), rdata_index));
637 
638 	for (i = 0; i < master_rrset->rr_count; ++i) {
639 		int j;
640 		domain_type *additional = rdata_atom_domain(master_rrset->rrs[i].rdatas[rdata_index]);
641 		domain_type *match = additional;
642 
643 		assert(additional);
644 
645 		if (!allow_glue && domain_is_glue(match, query->zone))
646 			continue;
647 
648 		/*
649 		 * Check to see if we need to generate the dependent
650 		 * based on a wildcard domain.
651 		 */
652 		while (!match->is_existing) {
653 			match = match->parent;
654 		}
655 		if (additional != match && domain_wildcard_child(match)) {
656 			domain_type *wildcard_child = domain_wildcard_child(match);
657 			domain_type *temp = (domain_type *) region_alloc(
658 				query->region, sizeof(domain_type));
659 			temp->rnode = NULL;
660 			temp->dname = additional->dname;
661 			temp->number = additional->number;
662 			temp->parent = match;
663 			temp->wildcard_child_closest_match = temp;
664 			temp->rrsets = wildcard_child->rrsets;
665 			temp->is_existing = wildcard_child->is_existing;
666 			additional = temp;
667 		}
668 
669 		for (j = 0; types[j].rr_type != 0; ++j) {
670 			rrset_type *rrset = domain_find_rrset(
671 				additional, query->zone, types[j].rr_type);
672 			if (rrset) {
673 				answer_add_rrset(answer, types[j].rr_section,
674 						 additional, rrset);
675 			}
676 		}
677 	}
678 }
679 
680 static int
681 answer_needs_ns(struct query* query)
682 {
683 	assert(query);
684 	/* Currently, only troublesome for DNSKEY and DS,
685          * cuz their RRSETs are quite large. */
686 	return (query->qtype != TYPE_DNSKEY && query->qtype != TYPE_DS);
687 }
688 
689 static int
690 add_rrset(struct query   *query,
691 	  answer_type    *answer,
692 	  rr_section_type section,
693 	  domain_type    *owner,
694 	  rrset_type     *rrset)
695 {
696 	int result;
697 
698 	assert(query);
699 	assert(answer);
700 	assert(owner);
701 	assert(rrset);
702 	assert(rrset_rrclass(rrset) == CLASS_IN);
703 
704 	result = answer_add_rrset(answer, section, owner, rrset);
705 	switch (rrset_rrtype(rrset)) {
706 	case TYPE_NS:
707 #if defined(INET6)
708 		/* if query over IPv6, swap A and AAAA; put AAAA first */
709 		add_additional_rrsets(query, answer, rrset, 0, 1,
710 			(query->addr.ss_family == AF_INET6)?
711 			swap_aaaa_additional_rr_types:
712 			default_additional_rr_types);
713 #else
714 		add_additional_rrsets(query, answer, rrset, 0, 1,
715 				      default_additional_rr_types);
716 #endif
717 		break;
718 	case TYPE_MB:
719 		add_additional_rrsets(query, answer, rrset, 0, 0,
720 				      default_additional_rr_types);
721 		break;
722 	case TYPE_MX:
723 	case TYPE_KX:
724 		add_additional_rrsets(query, answer, rrset, 1, 0,
725 				      default_additional_rr_types);
726 		break;
727 	case TYPE_RT:
728 		add_additional_rrsets(query, answer, rrset, 1, 0,
729 				      rt_additional_rr_types);
730 		break;
731 	default:
732 		break;
733 	}
734 
735 	return result;
736 }
737 
738 
739 /* returns 0 on error, or the domain number for to_name.
740    from_name is changes to to_name by the DNAME rr.
741    DNAME rr is from src to dest.
742    closest encloser encloses the to_name. */
743 static size_t
744 query_synthesize_cname(struct query* q, struct answer* answer, const dname_type* from_name,
745 	const dname_type* to_name, domain_type* src, domain_type* to_closest_encloser,
746 	domain_type** to_closest_match, uint32_t ttl)
747 {
748 	/* add temporary domains for from_name and to_name and all
749 	   their (not allocated yet) parents */
750 	/* any domains below src are not_existing (because of DNAME at src) */
751 	int i;
752 	domain_type* cname_domain;
753 	domain_type* cname_dest;
754 	rrset_type* rrset;
755 
756 	/* allocate source part */
757 	domain_type* lastparent = src;
758 	assert(q && answer && from_name && to_name && src && to_closest_encloser);
759 	assert(to_closest_match);
760 	for(i=0; i < from_name->label_count - domain_dname(src)->label_count; i++)
761 	{
762 		domain_type* newdom = query_get_tempdomain(q);
763 		if(!newdom)
764 			return 0;
765 		newdom->is_existing = 1;
766 		newdom->parent = lastparent;
767 		newdom->dname
768 			= dname_partial_copy(q->region,
769 			from_name, domain_dname(src)->label_count + i + 1);
770 		if(dname_compare(domain_dname(newdom), q->qname) == 0) {
771 			/* 0 good for query name, otherwise new number */
772 			newdom->number = 0;
773 		}
774 		DEBUG(DEBUG_QUERY,2, (LOG_INFO, "created temp domain src %d. %s nr %d", i,
775 			domain_to_string(newdom), (int)newdom->number));
776 		lastparent = newdom;
777 	}
778 	cname_domain = lastparent;
779 
780 	/* allocate dest part */
781 	lastparent = to_closest_encloser;
782 	for(i=0; i < to_name->label_count - domain_dname(to_closest_encloser)->label_count;
783 		i++)
784 	{
785 		domain_type* newdom = query_get_tempdomain(q);
786 		if(!newdom)
787 			return 0;
788 		newdom->is_existing = 0;
789 		newdom->parent = lastparent;
790 		newdom->dname
791 			= dname_partial_copy(q->region,
792 			to_name, domain_dname(to_closest_encloser)->label_count + i + 1);
793 		DEBUG(DEBUG_QUERY,2, (LOG_INFO, "created temp domain dest %d. %s nr %d", i,
794 			domain_to_string(newdom), (int)newdom->number));
795 		lastparent = newdom;
796 	}
797 	cname_dest = lastparent;
798 	*to_closest_match = cname_dest;
799 
800 	/* allocate the CNAME RR */
801 	rrset = (rrset_type*) region_alloc(q->region, sizeof(rrset_type));
802 	memset(rrset, 0, sizeof(rrset_type));
803 	rrset->zone = q->zone;
804 	rrset->rr_count = 1;
805 	rrset->rrs = (rr_type*) region_alloc(q->region, sizeof(rr_type));
806 	memset(rrset->rrs, 0, sizeof(rr_type));
807 	rrset->rrs->owner = cname_domain;
808 	rrset->rrs->ttl = ttl;
809 	rrset->rrs->type = TYPE_CNAME;
810 	rrset->rrs->klass = CLASS_IN;
811 	rrset->rrs->rdata_count = 1;
812 	rrset->rrs->rdatas = (rdata_atom_type*)region_alloc(q->region,
813 		sizeof(rdata_atom_type));
814 	rrset->rrs->rdatas->domain = cname_dest;
815 
816 	if(!add_rrset(q, answer, ANSWER_SECTION, cname_domain, rrset)) {
817 		log_msg(LOG_ERR, "could not add synthesized CNAME rrset to packet");
818 	}
819 
820 	return cname_dest->number;
821 }
822 
823 /*
824  * Answer delegation information.
825  *
826  * DNSSEC: Include the DS RRset if present.  Otherwise include an NSEC
827  * record proving the DS RRset does not exist.
828  */
829 static void
830 answer_delegation(query_type *query, answer_type *answer)
831 {
832 	assert(answer);
833 	assert(query->delegation_domain);
834 	assert(query->delegation_rrset);
835 
836 	if (query->cname_count == 0) {
837 		AA_CLR(query->packet);
838 	} else {
839 		AA_SET(query->packet);
840 	}
841 
842 	add_rrset(query,
843 		  answer,
844 		  AUTHORITY_SECTION,
845 		  query->delegation_domain,
846 		  query->delegation_rrset);
847 	if (query->edns.dnssec_ok && zone_is_secure(query->zone)) {
848 		rrset_type *rrset;
849 		if ((rrset = domain_find_rrset(query->delegation_domain, query->zone, TYPE_DS))) {
850 			add_rrset(query, answer, AUTHORITY_SECTION,
851 				  query->delegation_domain, rrset);
852 #ifdef NSEC3
853 		} else if (query->zone->nsec3_param) {
854 			nsec3_answer_delegation(query, answer);
855 #endif
856 		} else if ((rrset = domain_find_rrset(query->delegation_domain, query->zone, TYPE_NSEC))) {
857 			add_rrset(query, answer, AUTHORITY_SECTION,
858 				  query->delegation_domain, rrset);
859 		}
860 	}
861 }
862 
863 
864 /*
865  * Answer SOA information.
866  */
867 static void
868 answer_soa(struct query *query, answer_type *answer)
869 {
870 	if (query->qclass != CLASS_ANY) {
871 		add_rrset(query, answer,
872 			  AUTHORITY_SECTION,
873 			  query->zone->apex,
874 			  query->zone->soa_nx_rrset);
875 	}
876 }
877 
878 
879 /*
880  * Answer that the domain name exists but there is no RRset with the
881  * requested type.
882  *
883  * DNSSEC: Include the correct NSEC record proving that the type does
884  * not exist.  In the wildcard no data (3.1.3.4) case the wildcard IS
885  * NOT expanded, so the ORIGINAL parameter must point to the original
886  * wildcard entry, not to the generated entry.
887  */
888 static void
889 answer_nodata(struct query *query, answer_type *answer, domain_type *original)
890 {
891 	if (query->cname_count == 0) {
892 		answer_soa(query, answer);
893 	}
894 
895 #ifdef NSEC3
896 	if (query->edns.dnssec_ok && query->zone->nsec3_param) {
897 		nsec3_answer_nodata(query, answer, original);
898 	} else
899 #endif
900 	if (query->edns.dnssec_ok && zone_is_secure(query->zone)) {
901 		domain_type *nsec_domain;
902 		rrset_type *nsec_rrset;
903 
904 		nsec_domain = find_covering_nsec(original, query->zone, &nsec_rrset);
905 		if (nsec_domain) {
906 			add_rrset(query, answer, AUTHORITY_SECTION, nsec_domain, nsec_rrset);
907 		}
908 	}
909 }
910 
911 static void
912 answer_nxdomain(query_type *query, answer_type *answer)
913 {
914 	RCODE_SET(query->packet, RCODE_NXDOMAIN);
915 	answer_soa(query, answer);
916 }
917 
918 
919 /*
920  * Answer domain information (or SOA if we do not have an RRset for
921  * the type specified by the query).
922  */
923 static void
924 answer_domain(struct nsd* nsd, struct query *q, answer_type *answer,
925 	      domain_type *domain, domain_type *original)
926 {
927 	rrset_type *rrset;
928 
929 	if (q->qtype == TYPE_ANY) {
930 		int added = 0;
931 		for (rrset = domain_find_any_rrset(domain, q->zone); rrset; rrset = rrset->next) {
932 			if (rrset->zone == q->zone
933 #ifdef NSEC3
934 				&& rrset_rrtype(rrset) != TYPE_NSEC3
935 #endif
936 			    /*
937 			     * Don't include the RRSIG RRset when
938 			     * DNSSEC is used, because it is added
939 			     * automatically on an per-RRset basis.
940 			     */
941 			    && !(q->edns.dnssec_ok
942 				 && zone_is_secure(q->zone)
943 				 && rrset_rrtype(rrset) == TYPE_RRSIG))
944 			{
945 				add_rrset(q, answer, ANSWER_SECTION, domain, rrset);
946 				++added;
947 			}
948 		}
949 		if (added == 0) {
950 			answer_nodata(q, answer, original);
951 			return;
952 		}
953 #ifdef NSEC3
954 	} else if (q->qtype == TYPE_NSEC3) {
955 		answer_nodata(q, answer, original);
956 		return;
957 #endif
958 	} else if ((rrset = domain_find_rrset(domain, q->zone, q->qtype))) {
959 		add_rrset(q, answer, ANSWER_SECTION, domain, rrset);
960 	} else if ((rrset = domain_find_rrset(domain, q->zone, TYPE_CNAME))) {
961 		int added;
962 
963 		/*
964 		 * If the CNAME is not added it is already in the
965 		 * answer, so we have a CNAME loop.  Don't follow the
966 		 * CNAME target in this case.
967 		 */
968 		added = add_rrset(q, answer, ANSWER_SECTION, domain, rrset);
969 		assert(rrset->rr_count > 0);
970 		if (added) {
971 			/* only process first CNAME record */
972 			domain_type *closest_match = rdata_atom_domain(rrset->rrs[0].rdatas[0]);
973 			domain_type *closest_encloser = closest_match;
974 			zone_type* origzone = q->zone;
975 			++q->cname_count;
976 
977 			answer_lookup_zone(nsd, q, answer, closest_match->number,
978 					     closest_match == closest_encloser,
979 					     closest_match, closest_encloser,
980 					     domain_dname(closest_match));
981 			q->zone = origzone;
982 		}
983 		return;
984 	} else {
985 		answer_nodata(q, answer, original);
986 		return;
987 	}
988 
989 	if (q->qclass != CLASS_ANY && q->zone->ns_rrset && answer_needs_ns(q)) {
990 		add_rrset(q, answer, OPTIONAL_AUTHORITY_SECTION, q->zone->apex,
991 			  q->zone->ns_rrset);
992 	}
993 }
994 
995 
996 /*
997  * Answer with authoritative data.  If a wildcard is matched the owner
998  * name will be expanded to the domain name specified by
999  * DOMAIN_NUMBER.  DOMAIN_NUMBER 0 (zero) is reserved for the original
1000  * query name.
1001  *
1002  * DNSSEC: Include the necessary NSEC records in case the request
1003  * domain name does not exist and/or a wildcard match does not exist.
1004  */
1005 static void
1006 answer_authoritative(struct nsd   *nsd,
1007 		     struct query *q,
1008 		     answer_type  *answer,
1009 		     size_t        domain_number,
1010 		     int           exact,
1011 		     domain_type  *closest_match,
1012 		     domain_type  *closest_encloser,
1013 		     const dname_type *qname)
1014 {
1015 	domain_type *match;
1016 	domain_type *original = closest_match;
1017 	domain_type *dname_ce;
1018 	rrset_type *rrset;
1019 
1020 #ifdef NSEC3
1021 	if(exact && domain_has_only_NSEC3(closest_match, q->zone)) {
1022 		exact = 0; /* pretend it does not exist */
1023 		if(closest_encloser->parent)
1024 			closest_encloser = closest_encloser->parent;
1025 	}
1026 #endif /* NSEC3 */
1027 	if((dname_ce = find_dname_above(closest_encloser, q->zone)) != NULL) {
1028 		/* occlude the found data, the DNAME is closest_encloser */
1029 		closest_encloser = dname_ce;
1030 		exact = 0;
1031 	}
1032 
1033 	if (exact) {
1034 		match = closest_match;
1035 	} else if ((rrset=domain_find_rrset(closest_encloser, q->zone, TYPE_DNAME))) {
1036 		/* process DNAME */
1037 		const dname_type* name = qname;
1038 		domain_type *dest = rdata_atom_domain(rrset->rrs[0].rdatas[0]);
1039 		int added;
1040 		assert(rrset->rr_count > 0);
1041 		if(domain_number != 0) /* we followed CNAMEs or DNAMEs */
1042 			name = domain_dname(closest_match);
1043 		DEBUG(DEBUG_QUERY,2, (LOG_INFO, "expanding DNAME for q=%s", dname_to_string(name, NULL)));
1044 		DEBUG(DEBUG_QUERY,2, (LOG_INFO, "->src is %s",
1045 			domain_to_string(closest_encloser)));
1046 		DEBUG(DEBUG_QUERY,2, (LOG_INFO, "->dest is %s",
1047 			domain_to_string(dest)));
1048 		/* if the DNAME set is not added we have a loop, do not follow */
1049 		added = add_rrset(q, answer, ANSWER_SECTION, closest_encloser, rrset);
1050 		if(added) {
1051 			domain_type* src = closest_encloser;
1052 			const dname_type* newname = dname_replace(q->region, name,
1053 				domain_dname(src), domain_dname(dest));
1054 			size_t newnum = 0;
1055 			zone_type* origzone = q->zone;
1056 			++q->cname_count;
1057 			if(!newname) { /* newname too long */
1058 				RCODE_SET(q->packet, RCODE_YXDOMAIN);
1059 				return;
1060 			}
1061 			DEBUG(DEBUG_QUERY,2, (LOG_INFO, "->result is %s", dname_to_string(newname, NULL)));
1062 			/* follow the DNAME */
1063 			exact = namedb_lookup(nsd->db, newname, &closest_match, &closest_encloser);
1064 			/* synthesize CNAME record */
1065 			newnum = query_synthesize_cname(q, answer, name, newname,
1066 				src, closest_encloser, &closest_match, rrset->rrs[0].ttl);
1067 			if(!newnum) {
1068 				/* could not synthesize the CNAME. */
1069 				/* return previous CNAMEs to make resolver recurse for us */
1070 				return;
1071 			}
1072 
1073 			answer_lookup_zone(nsd, q, answer, newnum,
1074 				closest_match == closest_encloser,
1075 				closest_match, closest_encloser, newname);
1076 			q->zone = origzone;
1077 		}
1078 		if(!added)  /* log the error so operator can find looping recursors */
1079 			log_msg(LOG_INFO, "DNAME processing stopped due to loop, qname %s",
1080 				dname_to_string(q->qname, NULL));
1081 		return;
1082 	} else if (domain_wildcard_child(closest_encloser)) {
1083 		/* Generate the domain from the wildcard.  */
1084 		domain_type *wildcard_child = domain_wildcard_child(closest_encloser);
1085 #ifdef RATELIMIT
1086 		q->wildcard_domain = wildcard_child;
1087 #endif
1088 
1089 		match = (domain_type *) region_alloc(q->region,
1090 						     sizeof(domain_type));
1091 		match->rnode = NULL;
1092 		match->dname = wildcard_child->dname;
1093 		match->parent = closest_encloser;
1094 		match->wildcard_child_closest_match = match;
1095 		match->number = domain_number;
1096 		match->rrsets = wildcard_child->rrsets;
1097 		match->is_existing = wildcard_child->is_existing;
1098 #ifdef NSEC3
1099 		match->nsec3 = wildcard_child->nsec3;
1100 		/* copy over these entries:
1101 		match->nsec3_is_exact = wildcard_child->nsec3_is_exact;
1102 		match->nsec3_cover = wildcard_child->nsec3_cover;
1103 		match->nsec3_wcard_child_cover = wildcard_child->nsec3_wcard_child_cover;
1104 		match->nsec3_ds_parent_is_exact = wildcard_child->nsec3_ds_parent_is_exact;
1105 		match->nsec3_ds_parent_cover = wildcard_child->nsec3_ds_parent_cover;
1106 		*/
1107 
1108 		if (q->edns.dnssec_ok && q->zone->nsec3_param) {
1109 			/* Only add nsec3 wildcard data when do bit is set */
1110 			nsec3_answer_wildcard(q, answer, wildcard_child, qname);
1111 		}
1112 #endif
1113 
1114 		/*
1115 		 * Remember the original domain in case a Wildcard No
1116 		 * Data (3.1.3.4) response needs to be generated.  In
1117 		 * this particular case the wildcard IS NOT
1118 		 * expanded.
1119 		 */
1120 		original = wildcard_child;
1121 	} else {
1122 		match = NULL;
1123 	}
1124 
1125 	/* Authoritative zone.  */
1126 #ifdef NSEC3
1127 	if (q->edns.dnssec_ok && q->zone->nsec3_param) {
1128 		nsec3_answer_authoritative(&match, q, answer,
1129 			closest_encloser, qname);
1130 	} else
1131 #endif
1132 	if (q->edns.dnssec_ok && zone_is_secure(q->zone)) {
1133 		if (match != closest_encloser) {
1134 			domain_type *nsec_domain;
1135 			rrset_type *nsec_rrset;
1136 
1137 			/*
1138 			 * No match found or generated from wildcard,
1139 			 * include NSEC record.
1140 			 */
1141 			nsec_domain = find_covering_nsec(closest_match, q->zone, &nsec_rrset);
1142 			if (nsec_domain) {
1143 				add_rrset(q, answer, AUTHORITY_SECTION, nsec_domain, nsec_rrset);
1144 			}
1145 		}
1146 		if (!match) {
1147 			domain_type *nsec_domain;
1148 			rrset_type *nsec_rrset;
1149 
1150 			/*
1151 			 * No match and no wildcard.  Include NSEC
1152 			 * proving there is no wildcard.
1153 			 */
1154 			nsec_domain = find_covering_nsec(closest_encloser->wildcard_child_closest_match, q->zone, &nsec_rrset);
1155 			if (nsec_domain) {
1156 				add_rrset(q, answer, AUTHORITY_SECTION, nsec_domain, nsec_rrset);
1157 			}
1158 		}
1159 	}
1160 
1161 #ifdef NSEC3
1162 	if (RCODE(q->packet)!=RCODE_OK) {
1163 		return; /* nsec3 collision failure */
1164 	}
1165 #endif
1166 	if (match) {
1167 		answer_domain(nsd, q, answer, match, original);
1168 	} else {
1169 		answer_nxdomain(q, answer);
1170 	}
1171 }
1172 
1173 /*
1174  * qname may be different after CNAMEs have been followed from query->qname.
1175  */
1176 static void
1177 answer_lookup_zone(struct nsd *nsd, struct query *q, answer_type *answer,
1178 	size_t domain_number, int exact, domain_type *closest_match,
1179 	domain_type *closest_encloser, const dname_type *qname)
1180 {
1181 	q->zone = domain_find_zone(nsd->db, closest_encloser);
1182 	if (!q->zone) {
1183 		/* no zone for this */
1184 		if(q->cname_count == 0)
1185 			RCODE_SET(q->packet, RCODE_REFUSE);
1186 		return;
1187 	}
1188 	if(!q->zone->apex || !q->zone->soa_rrset) {
1189 		/* zone is configured but not loaded */
1190 		if(q->cname_count == 0)
1191 			RCODE_SET(q->packet, RCODE_SERVFAIL);
1192 		return;
1193 	}
1194 	/* now move up the closest encloser until it exists, previous
1195 	 * (possibly empty) closest encloser was useful to finding the zone
1196 	 * (for empty zones too), but now we want actual data nodes */
1197 	if (closest_encloser && !closest_encloser->is_existing) {
1198 		exact = 0;
1199 		while (closest_encloser != NULL && !closest_encloser->is_existing)
1200 			closest_encloser = closest_encloser->parent;
1201 	}
1202 
1203 	/*
1204 	 * See RFC 4035 (DNSSEC protocol) section 3.1.4.1 Responding
1205 	 * to Queries for DS RRs.
1206 	 */
1207 	if (exact && q->qtype == TYPE_DS && closest_encloser == q->zone->apex) {
1208 		/*
1209 		 * Type DS query at a zone cut, use the responsible
1210 		 * parent zone to generate the answer if we are
1211 		 * authoritative for the parent zone.
1212 		 */
1213 		zone_type *zone = domain_find_parent_zone(nsd->db, q->zone);
1214 		if (zone)
1215 			q->zone = zone;
1216 	}
1217 
1218 	/* see if the zone has expired (for secondary zones) */
1219 	if(q->zone && q->zone->opts && q->zone->opts->pattern &&
1220 		q->zone->opts->pattern->request_xfr != 0 && !q->zone->is_ok) {
1221 		if(q->cname_count == 0)
1222 			RCODE_SET(q->packet, RCODE_SERVFAIL);
1223 		return;
1224 	}
1225 
1226 	if (exact && q->qtype == TYPE_DS && closest_encloser == q->zone->apex) {
1227 		/*
1228 		 * Type DS query at the zone apex (and the server is
1229 		 * not authoritative for the parent zone).
1230 		 */
1231 		if (q->qclass == CLASS_ANY) {
1232 			AA_CLR(q->packet);
1233 		} else {
1234 			AA_SET(q->packet);
1235 		}
1236 		answer_nodata(q, answer, closest_encloser);
1237 	} else {
1238 		q->delegation_domain = domain_find_ns_rrsets(
1239 			closest_encloser, q->zone, &q->delegation_rrset);
1240 		if(q->delegation_domain && find_dname_above(q->delegation_domain, q->zone)) {
1241 			q->delegation_domain = NULL; /* use higher DNAME */
1242 		}
1243 
1244 		if (!q->delegation_domain
1245 		    || (exact && q->qtype == TYPE_DS && closest_encloser == q->delegation_domain))
1246 		{
1247 			if (q->qclass == CLASS_ANY) {
1248 				AA_CLR(q->packet);
1249 			} else {
1250 				AA_SET(q->packet);
1251 			}
1252 			answer_authoritative(nsd, q, answer, domain_number, exact,
1253 					     closest_match, closest_encloser, qname);
1254 		}
1255 		else {
1256 			answer_delegation(q, answer);
1257 		}
1258 	}
1259 }
1260 
1261 static void
1262 answer_query(struct nsd *nsd, struct query *q)
1263 {
1264 	domain_type *closest_match;
1265 	domain_type *closest_encloser;
1266 	int exact;
1267 	uint16_t offset;
1268 	answer_type answer;
1269 
1270 	answer_init(&answer);
1271 
1272 	exact = namedb_lookup(nsd->db, q->qname, &closest_match, &closest_encloser);
1273 
1274 	answer_lookup_zone(nsd, q, &answer, 0, exact, closest_match,
1275 		closest_encloser, q->qname);
1276 	ZTATUP2(nsd, q->zone, opcode, q->opcode);
1277 	ZTATUP2(nsd, q->zone, qtype, q->qtype);
1278 	ZTATUP2(nsd, q->zone, qclass, q->qclass);
1279 
1280 	offset = dname_label_offsets(q->qname)[domain_dname(closest_encloser)->label_count - 1] + QHEADERSZ;
1281 	query_add_compression_domain(q, closest_encloser, offset);
1282 	encode_answer(q, &answer);
1283 	query_clear_compression_tables(q);
1284 }
1285 
1286 void
1287 query_prepare_response(query_type *q, nsd_type *nsd)
1288 {
1289 	uint16_t flags;
1290 
1291 	/*
1292 	 * Preserve the data up-to the current packet's limit.
1293 	 */
1294 	buffer_set_position(q->packet, buffer_limit(q->packet));
1295 	buffer_set_limit(q->packet, buffer_capacity(q->packet));
1296 
1297 	/*
1298 	 * Reserve space for the EDNS records if required.
1299 	 */
1300 	q->reserved_space = edns_reserved_space(&q->edns);
1301 	q->reserved_space += tsig_reserved_space(&q->tsig);
1302 	if(q->edns.nsid == 1 && nsd->nsid_len > 0 &&
1303 		q->edns.status != EDNS_NOT_PRESENT)
1304 		q->reserved_space += OPT_HDR + nsd->nsid_len;
1305 
1306 	/* Update the flags.  */
1307 	flags = FLAGS(q->packet);
1308 	flags &= 0x0100U;	/* Preserve the RD flag.  */
1309 				/* CD flag must be cleared for auth answers */
1310 	flags |= 0x8000U;	/* Set the QR flag.  */
1311 	FLAGS_SET(q->packet, flags);
1312 }
1313 
1314 /*
1315  * Processes the query.
1316  *
1317  */
1318 query_state_type
1319 query_process(query_type *q, nsd_type *nsd)
1320 {
1321 	/* The query... */
1322 	nsd_rc_type rc;
1323 	query_state_type query_state;
1324 	uint16_t arcount;
1325 
1326 	/* Sanity checks */
1327 	if (buffer_limit(q->packet) < QHEADERSZ) {
1328 		/* packet too small to contain DNS header.
1329 		Now packet investigation macros will work without problems. */
1330 		return QUERY_DISCARDED;
1331 	}
1332 	if (QR(q->packet)) {
1333 		/* Not a query? Drop it on the floor. */
1334 		return QUERY_DISCARDED;
1335 	}
1336 
1337 	/* check opcode early on, because new opcodes may have different
1338 	 * specification of the meaning of the rest of the packet */
1339 	q->opcode = OPCODE(q->packet);
1340 	if(q->opcode != OPCODE_QUERY && q->opcode != OPCODE_NOTIFY) {
1341 		if(query_ratelimit_err(nsd))
1342 			return QUERY_DISCARDED;
1343 		return query_error(q, NSD_RC_IMPL);
1344 	}
1345 
1346 	if (RCODE(q->packet) != RCODE_OK || !process_query_section(q)) {
1347 		return query_formerr(q, nsd);
1348 	}
1349 
1350 	/* Update statistics.  */
1351 	STATUP2(nsd, opcode, q->opcode);
1352 	STATUP2(nsd, qtype, q->qtype);
1353 	STATUP2(nsd, qclass, q->qclass);
1354 
1355 	if (q->opcode != OPCODE_QUERY) {
1356 		if (q->opcode == OPCODE_NOTIFY) {
1357 			return answer_notify(nsd, q);
1358 		} else {
1359 			if(query_ratelimit_err(nsd))
1360 				return QUERY_DISCARDED;
1361 			return query_error(q, NSD_RC_IMPL);
1362 		}
1363 	}
1364 
1365 	/* Dont bother to answer more than one question at once... */
1366 	if (QDCOUNT(q->packet) != 1) {
1367 		FLAGS_SET(q->packet, 0);
1368 		return query_formerr(q, nsd);
1369 	}
1370 	/* Ignore settings of flags */
1371 
1372 	/* Dont allow any records in the answer or authority section...
1373 	   except for IXFR queries. */
1374 	if (ANCOUNT(q->packet) != 0 ||
1375 		(q->qtype!=TYPE_IXFR && NSCOUNT(q->packet) != 0)) {
1376 		return query_formerr(q, nsd);
1377 	}
1378 	if(q->qtype==TYPE_IXFR && NSCOUNT(q->packet) > 0) {
1379 		int i; /* skip ixfr soa information data here */
1380 		for(i=0; i< NSCOUNT(q->packet); i++)
1381 			if(!packet_skip_rr(q->packet, 0))
1382 				return query_formerr(q, nsd);
1383 	}
1384 
1385 	arcount = ARCOUNT(q->packet);
1386 	if (arcount > 0) {
1387 		/* According to draft-ietf-dnsext-rfc2671bis-edns0-10:
1388 		 * "The placement flexibility for the OPT RR does not
1389 		 * override the need for the TSIG or SIG(0) RRs to be
1390 		 * the last in the additional section whenever they are
1391 		 * present."
1392 		 * So we should not have to check for TSIG RR before
1393 		 * OPT RR. Keep the code for backwards compatibility.
1394 		 */
1395 
1396 		/* see if tsig is before edns record */
1397 		if (!tsig_parse_rr(&q->tsig, q->packet))
1398 			return query_formerr(q, nsd);
1399 		if(q->tsig.status != TSIG_NOT_PRESENT)
1400 			--arcount;
1401 	}
1402 	/* See if there is an OPT RR. */
1403 	if (arcount > 0) {
1404 		if (edns_parse_record(&q->edns, q->packet))
1405 			--arcount;
1406 	}
1407 	/* See if there is a TSIG RR. */
1408 	if (arcount > 0 && q->tsig.status == TSIG_NOT_PRESENT) {
1409 		/* see if tsig is after the edns record */
1410 		if (!tsig_parse_rr(&q->tsig, q->packet))
1411 			return query_formerr(q, nsd);
1412 		if(q->tsig.status != TSIG_NOT_PRESENT)
1413 			--arcount;
1414 	}
1415 	/* If more RRs left in Add. Section, FORMERR. */
1416 	if (arcount > 0) {
1417 		return query_formerr(q, nsd);
1418 	}
1419 
1420 	/* Do we have any trailing garbage? */
1421 #ifdef	STRICT_MESSAGE_PARSE
1422 	if (buffer_remaining(q->packet) > 0) {
1423 		/* If we're strict.... */
1424 		return query_formerr(q, nsd);
1425 	}
1426 #endif
1427 	/* Remove trailing garbage.  */
1428 	buffer_set_limit(q->packet, buffer_position(q->packet));
1429 
1430 	rc = process_tsig(q);
1431 	if (rc != NSD_RC_OK) {
1432 		return query_error(q, rc);
1433 	}
1434 	rc = process_edns(nsd, q);
1435 	if (rc != NSD_RC_OK) {
1436 		/* We should not return FORMERR, but BADVERS (=16).
1437 		 * BADVERS is created with Ext. RCODE, followed by RCODE.
1438 		 * Ext. RCODE is set to 1, RCODE must be 0 (getting 0x10 = 16).
1439 		 * Thus RCODE = NOERROR = NSD_RC_OK. */
1440 		return query_error(q, NSD_RC_OK);
1441 	}
1442 
1443 	query_prepare_response(q, nsd);
1444 
1445 	if (q->qclass != CLASS_IN && q->qclass != CLASS_ANY) {
1446 		if (q->qclass == CLASS_CH) {
1447 			return answer_chaos(nsd, q);
1448 		} else {
1449 			return query_error(q, NSD_RC_REFUSE);
1450 		}
1451 	}
1452 
1453 	query_state = answer_axfr_ixfr(nsd, q);
1454 	if (query_state == QUERY_PROCESSED || query_state == QUERY_IN_AXFR) {
1455 		return query_state;
1456 	}
1457 
1458 	answer_query(nsd, q);
1459 
1460 	return QUERY_PROCESSED;
1461 }
1462 
1463 void
1464 query_add_optional(query_type *q, nsd_type *nsd)
1465 {
1466 	struct edns_data *edns = &nsd->edns_ipv4;
1467 #if defined(INET6)
1468 	if (q->addr.ss_family == AF_INET6) {
1469 		edns = &nsd->edns_ipv6;
1470 	}
1471 #endif
1472 	if (RCODE(q->packet) == RCODE_FORMAT) {
1473 		return;
1474 	}
1475 	switch (q->edns.status) {
1476 	case EDNS_NOT_PRESENT:
1477 		break;
1478 	case EDNS_OK:
1479 		if (q->edns.dnssec_ok)	edns->ok[7] = 0x80;
1480 		else			edns->ok[7] = 0x00;
1481 		buffer_write(q->packet, edns->ok, OPT_LEN);
1482 		if (nsd->nsid_len > 0 && q->edns.nsid == 1 && buffer_available(
1483 			q->packet, OPT_RDATA+OPT_HDR+nsd->nsid_len)) {
1484 			/* rdata length */
1485 			buffer_write(q->packet, edns->rdata_nsid, OPT_RDATA);
1486 			/* nsid opt header */
1487 			buffer_write(q->packet, edns->nsid, OPT_HDR);
1488 			/* nsid payload */
1489 			buffer_write(q->packet, nsd->nsid, nsd->nsid_len);
1490 		}  else {
1491 			/* fill with NULLs */
1492 			buffer_write(q->packet, edns->rdata_none, OPT_RDATA);
1493 		}
1494 		ARCOUNT_SET(q->packet, ARCOUNT(q->packet) + 1);
1495 		STATUP(nsd, edns);
1496 		ZTATUP(nsd, q->zone, edns);
1497 		break;
1498 	case EDNS_ERROR:
1499 		if (q->edns.dnssec_ok)	edns->error[7] = 0x80;
1500 		else			edns->error[7] = 0x00;
1501 		buffer_write(q->packet, edns->error, OPT_LEN);
1502 		buffer_write(q->packet, edns->rdata_none, OPT_RDATA);
1503 		ARCOUNT_SET(q->packet, ARCOUNT(q->packet) + 1);
1504 		STATUP(nsd, ednserr);
1505 		ZTATUP(nsd, q->zone, ednserr);
1506 		break;
1507 	}
1508 
1509 	if (q->tsig.status != TSIG_NOT_PRESENT) {
1510 		if (q->tsig.status == TSIG_ERROR ||
1511 			q->tsig.error_code != TSIG_ERROR_NOERROR) {
1512 			tsig_error_reply(&q->tsig);
1513 			tsig_append_rr(&q->tsig, q->packet);
1514 			ARCOUNT_SET(q->packet, ARCOUNT(q->packet) + 1);
1515 		} else if(q->tsig.status == TSIG_OK &&
1516 			q->tsig.error_code == TSIG_ERROR_NOERROR)
1517 		{
1518 			if(q->tsig_prepare_it)
1519 				tsig_prepare(&q->tsig);
1520 			if(q->tsig_update_it)
1521 				tsig_update(&q->tsig, q->packet, buffer_position(q->packet));
1522 			if(q->tsig_sign_it) {
1523 				tsig_sign(&q->tsig);
1524 				tsig_append_rr(&q->tsig, q->packet);
1525 				ARCOUNT_SET(q->packet, ARCOUNT(q->packet) + 1);
1526 			}
1527 		}
1528 	}
1529 }
1530