xref: /netbsd-src/external/mpl/bind/dist/lib/ns/xfrout.c (revision bcda20f65a8566e103791ec395f7f499ef322704)
1 /*	$NetBSD: xfrout.c,v 1.15 2025/01/26 16:25:46 christos Exp $	*/
2 
3 /*
4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5  *
6  * SPDX-License-Identifier: MPL-2.0
7  *
8  * This Source Code Form is subject to the terms of the Mozilla Public
9  * License, v. 2.0. If a copy of the MPL was not distributed with this
10  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11  *
12  * See the COPYRIGHT file distributed with this work for additional
13  * information regarding copyright ownership.
14  */
15 
16 #include <inttypes.h>
17 #include <stdbool.h>
18 
19 #include <isc/formatcheck.h>
20 #include <isc/mem.h>
21 #include <isc/netmgr.h>
22 #include <isc/result.h>
23 #include <isc/stats.h>
24 #include <isc/util.h>
25 
26 #include <dns/db.h>
27 #include <dns/dbiterator.h>
28 #include <dns/dlz.h>
29 #include <dns/fixedname.h>
30 #include <dns/journal.h>
31 #include <dns/message.h>
32 #include <dns/peer.h>
33 #include <dns/rdataclass.h>
34 #include <dns/rdatalist.h>
35 #include <dns/rdataset.h>
36 #include <dns/rdatasetiter.h>
37 #include <dns/rriterator.h>
38 #include <dns/soa.h>
39 #include <dns/stats.h>
40 #include <dns/tsig.h>
41 #include <dns/view.h>
42 #include <dns/zone.h>
43 #include <dns/zt.h>
44 
45 #include <ns/client.h>
46 #include <ns/log.h>
47 #include <ns/server.h>
48 #include <ns/stats.h>
49 #include <ns/xfrout.h>
50 
51 /*! \file
52  * \brief
53  * Outgoing AXFR and IXFR.
54  */
55 
56 /*
57  * TODO:
58  *  - IXFR over UDP
59  */
60 
61 #define XFROUT_COMMON_LOGARGS \
62 	ns_lctx, DNS_LOGCATEGORY_XFER_OUT, NS_LOGMODULE_XFER_OUT
63 
64 #define XFROUT_PROTOCOL_LOGARGS XFROUT_COMMON_LOGARGS, ISC_LOG_INFO
65 
66 #define XFROUT_DEBUG_LOGARGS(n) XFROUT_COMMON_LOGARGS, ISC_LOG_DEBUG(n)
67 
68 #define XFROUT_RR_LOGARGS XFROUT_COMMON_LOGARGS, XFROUT_RR_LOGLEVEL
69 
70 #define XFROUT_RR_LOGLEVEL ISC_LOG_DEBUG(8)
71 
72 /*%
73  * Fail unconditionally and log as a client error.
74  * The test against ISC_R_SUCCESS is there to keep the Solaris compiler
75  * from complaining about "end-of-loop code not reached".
76  */
77 #define FAILC(code, msg)                                                 \
78 	do {                                                             \
79 		result = (code);                                         \
80 		ns_client_log(client, DNS_LOGCATEGORY_XFER_OUT,          \
81 			      NS_LOGMODULE_XFER_OUT, ISC_LOG_INFO,       \
82 			      "bad zone transfer request: %s (%s)", msg, \
83 			      isc_result_totext(code));                  \
84 		if (result != ISC_R_SUCCESS)                             \
85 			goto failure;                                    \
86 	} while (0)
87 
88 #define FAILQ(code, msg, question, rdclass)                                  \
89 	do {                                                                 \
90 		char _buf1[DNS_NAME_FORMATSIZE];                             \
91 		char _buf2[DNS_RDATACLASS_FORMATSIZE];                       \
92 		result = (code);                                             \
93 		dns_name_format(question, _buf1, sizeof(_buf1));             \
94 		dns_rdataclass_format(rdclass, _buf2, sizeof(_buf2));        \
95 		ns_client_log(client, DNS_LOGCATEGORY_XFER_OUT,              \
96 			      NS_LOGMODULE_XFER_OUT, ISC_LOG_INFO,           \
97 			      "bad zone transfer request: '%s/%s': %s (%s)", \
98 			      _buf1, _buf2, msg, isc_result_totext(code));   \
99 		if (result != ISC_R_SUCCESS)                                 \
100 			goto failure;                                        \
101 	} while (0)
102 
103 #define CHECK(op)                            \
104 	do {                                 \
105 		result = (op);               \
106 		if (result != ISC_R_SUCCESS) \
107 			goto failure;        \
108 	} while (0)
109 
110 /**************************************************************************/
111 
112 static void
113 inc_stats(ns_client_t *client, dns_zone_t *zone, isc_statscounter_t counter) {
114 	ns_stats_increment(client->manager->sctx->nsstats, counter);
115 	if (zone != NULL) {
116 		isc_stats_t *zonestats = dns_zone_getrequeststats(zone);
117 		if (zonestats != NULL) {
118 			isc_stats_increment(zonestats, counter);
119 		}
120 	}
121 }
122 
123 /**************************************************************************/
124 
125 /*% Log an RR (for debugging) */
126 
127 static void
128 log_rr(dns_name_t *name, dns_rdata_t *rdata, uint32_t ttl) {
129 	isc_result_t result;
130 	isc_buffer_t buf;
131 	char mem[2000];
132 	dns_rdatalist_t rdl;
133 	dns_rdataset_t rds;
134 	dns_rdata_t rd = DNS_RDATA_INIT;
135 
136 	dns_rdatalist_init(&rdl);
137 	rdl.type = rdata->type;
138 	rdl.rdclass = rdata->rdclass;
139 	rdl.ttl = ttl;
140 	if (rdata->type == dns_rdatatype_sig ||
141 	    rdata->type == dns_rdatatype_rrsig)
142 	{
143 		rdl.covers = dns_rdata_covers(rdata);
144 	} else {
145 		rdl.covers = dns_rdatatype_none;
146 	}
147 	dns_rdataset_init(&rds);
148 	dns_rdata_init(&rd);
149 	dns_rdata_clone(rdata, &rd);
150 	ISC_LIST_APPEND(rdl.rdata, &rd, link);
151 	dns_rdatalist_tordataset(&rdl, &rds);
152 
153 	isc_buffer_init(&buf, mem, sizeof(mem));
154 	result = dns_rdataset_totext(&rds, name, false, false, &buf);
155 
156 	/*
157 	 * We could use xfrout_log(), but that would produce
158 	 * very long lines with a repetitive prefix.
159 	 */
160 	if (result == ISC_R_SUCCESS) {
161 		/*
162 		 * Get rid of final newline.
163 		 */
164 		INSIST(buf.used >= 1 &&
165 		       ((char *)buf.base)[buf.used - 1] == '\n');
166 		buf.used--;
167 
168 		isc_log_write(XFROUT_RR_LOGARGS, "%.*s",
169 			      (int)isc_buffer_usedlength(&buf),
170 			      (char *)isc_buffer_base(&buf));
171 	} else {
172 		isc_log_write(XFROUT_RR_LOGARGS, "<RR too large to print>");
173 	}
174 }
175 
176 /**************************************************************************/
177 /*
178  * An 'rrstream_t' is a polymorphic iterator that returns
179  * a stream of resource records.  There are multiple implementations,
180  * e.g. for generating AXFR and IXFR records streams.
181  */
182 
183 typedef struct rrstream_methods rrstream_methods_t;
184 
185 typedef struct rrstream {
186 	isc_mem_t *mctx;
187 	rrstream_methods_t *methods;
188 } rrstream_t;
189 
190 struct rrstream_methods {
191 	isc_result_t (*first)(rrstream_t *);
192 	isc_result_t (*next)(rrstream_t *);
193 	void (*current)(rrstream_t *, dns_name_t **, uint32_t *,
194 			dns_rdata_t **);
195 	void (*pause)(rrstream_t *);
196 	void (*destroy)(rrstream_t **);
197 };
198 
199 static void
200 rrstream_noop_pause(rrstream_t *rs) {
201 	UNUSED(rs);
202 }
203 
204 /**************************************************************************/
205 /*
206  * An 'ixfr_rrstream_t' is an 'rrstream_t' that returns
207  * an IXFR-like RR stream from a journal file.
208  *
209  * The SOA at the beginning of each sequence of additions
210  * or deletions are included in the stream, but the extra
211  * SOAs at the beginning and end of the entire transfer are
212  * not included.
213  */
214 
215 typedef struct ixfr_rrstream {
216 	rrstream_t common;
217 	dns_journal_t *journal;
218 } ixfr_rrstream_t;
219 
220 /* Forward declarations. */
221 static void
222 ixfr_rrstream_destroy(rrstream_t **sp);
223 
224 static rrstream_methods_t ixfr_rrstream_methods;
225 
226 /*
227  * Returns: anything dns_journal_open() or dns_journal_iter_init()
228  * may return.
229  */
230 
231 static isc_result_t
232 ixfr_rrstream_create(isc_mem_t *mctx, const char *journal_filename,
233 		     uint32_t begin_serial, uint32_t end_serial, size_t *sizep,
234 		     rrstream_t **sp) {
235 	isc_result_t result;
236 	ixfr_rrstream_t *s = NULL;
237 
238 	INSIST(sp != NULL && *sp == NULL);
239 
240 	s = isc_mem_get(mctx, sizeof(*s));
241 	s->common.mctx = NULL;
242 	isc_mem_attach(mctx, &s->common.mctx);
243 	s->common.methods = &ixfr_rrstream_methods;
244 	s->journal = NULL;
245 
246 	CHECK(dns_journal_open(mctx, journal_filename, DNS_JOURNAL_READ,
247 			       &s->journal));
248 	CHECK(dns_journal_iter_init(s->journal, begin_serial, end_serial,
249 				    sizep));
250 
251 	*sp = (rrstream_t *)s;
252 	return ISC_R_SUCCESS;
253 
254 failure:
255 	ixfr_rrstream_destroy((rrstream_t **)(void *)&s);
256 	return result;
257 }
258 
259 static isc_result_t
260 ixfr_rrstream_first(rrstream_t *rs) {
261 	ixfr_rrstream_t *s = (ixfr_rrstream_t *)rs;
262 	return dns_journal_first_rr(s->journal);
263 }
264 
265 static isc_result_t
266 ixfr_rrstream_next(rrstream_t *rs) {
267 	ixfr_rrstream_t *s = (ixfr_rrstream_t *)rs;
268 	return dns_journal_next_rr(s->journal);
269 }
270 
271 static void
272 ixfr_rrstream_current(rrstream_t *rs, dns_name_t **name, uint32_t *ttl,
273 		      dns_rdata_t **rdata) {
274 	ixfr_rrstream_t *s = (ixfr_rrstream_t *)rs;
275 	dns_journal_current_rr(s->journal, name, ttl, rdata);
276 }
277 
278 static void
279 ixfr_rrstream_destroy(rrstream_t **rsp) {
280 	ixfr_rrstream_t *s = (ixfr_rrstream_t *)*rsp;
281 	if (s->journal != NULL) {
282 		dns_journal_destroy(&s->journal);
283 	}
284 	isc_mem_putanddetach(&s->common.mctx, s, sizeof(*s));
285 }
286 
287 static rrstream_methods_t ixfr_rrstream_methods = {
288 	ixfr_rrstream_first, ixfr_rrstream_next, ixfr_rrstream_current,
289 	rrstream_noop_pause, ixfr_rrstream_destroy
290 };
291 
292 /**************************************************************************/
293 /*
294  * An 'axfr_rrstream_t' is an 'rrstream_t' that returns
295  * an AXFR-like RR stream from a database.
296  *
297  * The SOAs at the beginning and end of the transfer are
298  * not included in the stream.
299  */
300 
301 typedef struct axfr_rrstream {
302 	rrstream_t common;
303 	dns_rriterator_t it;
304 	bool it_valid;
305 } axfr_rrstream_t;
306 
307 /*
308  * Forward declarations.
309  */
310 static void
311 axfr_rrstream_destroy(rrstream_t **rsp);
312 
313 static rrstream_methods_t axfr_rrstream_methods;
314 
315 static isc_result_t
316 axfr_rrstream_create(isc_mem_t *mctx, dns_db_t *db, dns_dbversion_t *ver,
317 		     rrstream_t **sp) {
318 	axfr_rrstream_t *s;
319 	isc_result_t result;
320 
321 	INSIST(sp != NULL && *sp == NULL);
322 
323 	s = isc_mem_get(mctx, sizeof(*s));
324 	s->common.mctx = NULL;
325 	isc_mem_attach(mctx, &s->common.mctx);
326 	s->common.methods = &axfr_rrstream_methods;
327 	s->it_valid = false;
328 
329 	CHECK(dns_rriterator_init(&s->it, db, ver, 0));
330 	s->it_valid = true;
331 
332 	*sp = (rrstream_t *)s;
333 	return ISC_R_SUCCESS;
334 
335 failure:
336 	axfr_rrstream_destroy((rrstream_t **)(void *)&s);
337 	return result;
338 }
339 
340 static isc_result_t
341 axfr_rrstream_first(rrstream_t *rs) {
342 	axfr_rrstream_t *s = (axfr_rrstream_t *)rs;
343 	isc_result_t result;
344 	result = dns_rriterator_first(&s->it);
345 	if (result != ISC_R_SUCCESS) {
346 		return result;
347 	}
348 	/* Skip SOA records. */
349 	for (;;) {
350 		dns_name_t *name_dummy = NULL;
351 		uint32_t ttl_dummy;
352 		dns_rdata_t *rdata = NULL;
353 		dns_rriterator_current(&s->it, &name_dummy, &ttl_dummy, NULL,
354 				       &rdata);
355 		if (rdata->type != dns_rdatatype_soa) {
356 			break;
357 		}
358 		result = dns_rriterator_next(&s->it);
359 		if (result != ISC_R_SUCCESS) {
360 			break;
361 		}
362 	}
363 	return result;
364 }
365 
366 static isc_result_t
367 axfr_rrstream_next(rrstream_t *rs) {
368 	axfr_rrstream_t *s = (axfr_rrstream_t *)rs;
369 	isc_result_t result;
370 
371 	/* Skip SOA records. */
372 	for (;;) {
373 		dns_name_t *name_dummy = NULL;
374 		uint32_t ttl_dummy;
375 		dns_rdata_t *rdata = NULL;
376 		result = dns_rriterator_next(&s->it);
377 		if (result != ISC_R_SUCCESS) {
378 			break;
379 		}
380 		dns_rriterator_current(&s->it, &name_dummy, &ttl_dummy, NULL,
381 				       &rdata);
382 		if (rdata->type != dns_rdatatype_soa) {
383 			break;
384 		}
385 	}
386 	return result;
387 }
388 
389 static void
390 axfr_rrstream_current(rrstream_t *rs, dns_name_t **name, uint32_t *ttl,
391 		      dns_rdata_t **rdata) {
392 	axfr_rrstream_t *s = (axfr_rrstream_t *)rs;
393 	dns_rriterator_current(&s->it, name, ttl, NULL, rdata);
394 }
395 
396 static void
397 axfr_rrstream_pause(rrstream_t *rs) {
398 	axfr_rrstream_t *s = (axfr_rrstream_t *)rs;
399 	dns_rriterator_pause(&s->it);
400 }
401 
402 static void
403 axfr_rrstream_destroy(rrstream_t **rsp) {
404 	axfr_rrstream_t *s = (axfr_rrstream_t *)*rsp;
405 	if (s->it_valid) {
406 		dns_rriterator_destroy(&s->it);
407 	}
408 	isc_mem_putanddetach(&s->common.mctx, s, sizeof(*s));
409 }
410 
411 static rrstream_methods_t axfr_rrstream_methods = {
412 	axfr_rrstream_first, axfr_rrstream_next, axfr_rrstream_current,
413 	axfr_rrstream_pause, axfr_rrstream_destroy
414 };
415 
416 /**************************************************************************/
417 /*
418  * An 'soa_rrstream_t' is a degenerate 'rrstream_t' that returns
419  * a single SOA record.
420  */
421 
422 typedef struct soa_rrstream {
423 	rrstream_t common;
424 	dns_difftuple_t *soa_tuple;
425 } soa_rrstream_t;
426 
427 /*
428  * Forward declarations.
429  */
430 static void
431 soa_rrstream_destroy(rrstream_t **rsp);
432 
433 static rrstream_methods_t soa_rrstream_methods;
434 
435 static isc_result_t
436 soa_rrstream_create(isc_mem_t *mctx, dns_db_t *db, dns_dbversion_t *ver,
437 		    rrstream_t **sp) {
438 	soa_rrstream_t *s;
439 	isc_result_t result;
440 
441 	INSIST(sp != NULL && *sp == NULL);
442 
443 	s = isc_mem_get(mctx, sizeof(*s));
444 	s->common.mctx = NULL;
445 	isc_mem_attach(mctx, &s->common.mctx);
446 	s->common.methods = &soa_rrstream_methods;
447 	s->soa_tuple = NULL;
448 
449 	CHECK(dns_db_createsoatuple(db, ver, mctx, DNS_DIFFOP_EXISTS,
450 				    &s->soa_tuple));
451 
452 	*sp = (rrstream_t *)s;
453 	return ISC_R_SUCCESS;
454 
455 failure:
456 	soa_rrstream_destroy((rrstream_t **)(void *)&s);
457 	return result;
458 }
459 
460 static isc_result_t
461 soa_rrstream_first(rrstream_t *rs) {
462 	UNUSED(rs);
463 	return ISC_R_SUCCESS;
464 }
465 
466 static isc_result_t
467 soa_rrstream_next(rrstream_t *rs) {
468 	UNUSED(rs);
469 	return ISC_R_NOMORE;
470 }
471 
472 static void
473 soa_rrstream_current(rrstream_t *rs, dns_name_t **name, uint32_t *ttl,
474 		     dns_rdata_t **rdata) {
475 	soa_rrstream_t *s = (soa_rrstream_t *)rs;
476 	*name = &s->soa_tuple->name;
477 	*ttl = s->soa_tuple->ttl;
478 	*rdata = &s->soa_tuple->rdata;
479 }
480 
481 static void
482 soa_rrstream_destroy(rrstream_t **rsp) {
483 	soa_rrstream_t *s = (soa_rrstream_t *)*rsp;
484 	if (s->soa_tuple != NULL) {
485 		dns_difftuple_free(&s->soa_tuple);
486 	}
487 	isc_mem_putanddetach(&s->common.mctx, s, sizeof(*s));
488 }
489 
490 static rrstream_methods_t soa_rrstream_methods = {
491 	soa_rrstream_first, soa_rrstream_next, soa_rrstream_current,
492 	rrstream_noop_pause, soa_rrstream_destroy
493 };
494 
495 /**************************************************************************/
496 /*
497  * A 'compound_rrstream_t' objects owns a soa_rrstream
498  * and another rrstream, the "data stream".  It returns
499  * a concatenated stream consisting of the soa_rrstream, then
500  * the data stream, then the soa_rrstream again.
501  *
502  * The component streams are owned by the compound_rrstream_t
503  * and are destroyed with it.
504  */
505 
506 typedef struct compound_rrstream {
507 	rrstream_t common;
508 	rrstream_t *components[3];
509 	int state;
510 	isc_result_t result;
511 } compound_rrstream_t;
512 
513 /*
514  * Forward declarations.
515  */
516 static void
517 compound_rrstream_destroy(rrstream_t **rsp);
518 
519 static isc_result_t
520 compound_rrstream_next(rrstream_t *rs);
521 
522 static rrstream_methods_t compound_rrstream_methods;
523 
524 /*
525  * Requires:
526  *	soa_stream != NULL && *soa_stream != NULL
527  *	data_stream != NULL && *data_stream != NULL
528  *	sp != NULL && *sp == NULL
529  *
530  * Ensures:
531  *	*soa_stream == NULL
532  *	*data_stream == NULL
533  *	*sp points to a valid compound_rrstream_t
534  *	The soa and data streams will be destroyed
535  *	when the compound_rrstream_t is destroyed.
536  */
537 static isc_result_t
538 compound_rrstream_create(isc_mem_t *mctx, rrstream_t **soa_stream,
539 			 rrstream_t **data_stream, rrstream_t **sp) {
540 	compound_rrstream_t *s;
541 
542 	INSIST(sp != NULL && *sp == NULL);
543 
544 	s = isc_mem_get(mctx, sizeof(*s));
545 	s->common.mctx = NULL;
546 	isc_mem_attach(mctx, &s->common.mctx);
547 	s->common.methods = &compound_rrstream_methods;
548 	s->components[0] = *soa_stream;
549 	s->components[1] = *data_stream;
550 	s->components[2] = *soa_stream;
551 	s->state = -1;
552 	s->result = ISC_R_FAILURE;
553 
554 	*data_stream = NULL;
555 	*soa_stream = NULL;
556 	*sp = (rrstream_t *)s;
557 	return ISC_R_SUCCESS;
558 }
559 
560 static isc_result_t
561 compound_rrstream_first(rrstream_t *rs) {
562 	compound_rrstream_t *s = (compound_rrstream_t *)rs;
563 	s->state = 0;
564 	do {
565 		rrstream_t *curstream = s->components[s->state];
566 		s->result = curstream->methods->first(curstream);
567 	} while (s->result == ISC_R_NOMORE && s->state < 2);
568 	return s->result;
569 }
570 
571 static isc_result_t
572 compound_rrstream_next(rrstream_t *rs) {
573 	compound_rrstream_t *s = (compound_rrstream_t *)rs;
574 	rrstream_t *curstream = s->components[s->state];
575 	s->result = curstream->methods->next(curstream);
576 	while (s->result == ISC_R_NOMORE) {
577 		/*
578 		 * Make sure locks held by the current stream
579 		 * are released before we switch streams.
580 		 */
581 		curstream->methods->pause(curstream);
582 		if (s->state == 2) {
583 			return ISC_R_NOMORE;
584 		}
585 		s->state++;
586 		curstream = s->components[s->state];
587 		s->result = curstream->methods->first(curstream);
588 	}
589 	return s->result;
590 }
591 
592 static void
593 compound_rrstream_current(rrstream_t *rs, dns_name_t **name, uint32_t *ttl,
594 			  dns_rdata_t **rdata) {
595 	compound_rrstream_t *s = (compound_rrstream_t *)rs;
596 	rrstream_t *curstream;
597 	INSIST(0 <= s->state && s->state < 3);
598 	INSIST(s->result == ISC_R_SUCCESS);
599 	curstream = s->components[s->state];
600 	curstream->methods->current(curstream, name, ttl, rdata);
601 }
602 
603 static void
604 compound_rrstream_pause(rrstream_t *rs) {
605 	compound_rrstream_t *s = (compound_rrstream_t *)rs;
606 	rrstream_t *curstream;
607 	INSIST(0 <= s->state && s->state < 3);
608 	curstream = s->components[s->state];
609 	curstream->methods->pause(curstream);
610 }
611 
612 static void
613 compound_rrstream_destroy(rrstream_t **rsp) {
614 	compound_rrstream_t *s = (compound_rrstream_t *)*rsp;
615 	s->components[0]->methods->destroy(&s->components[0]);
616 	s->components[1]->methods->destroy(&s->components[1]);
617 	s->components[2] = NULL; /* Copy of components[0]. */
618 	isc_mem_putanddetach(&s->common.mctx, s, sizeof(*s));
619 }
620 
621 static rrstream_methods_t compound_rrstream_methods = {
622 	compound_rrstream_first, compound_rrstream_next,
623 	compound_rrstream_current, compound_rrstream_pause,
624 	compound_rrstream_destroy
625 };
626 
627 /**************************************************************************/
628 
629 /*%
630  * Structure holding outgoing transfer statistics
631  */
632 struct xfr_stats {
633 	uint64_t nmsg;	  /*%< Number of messages sent */
634 	uint64_t nrecs;	  /*%< Number of records sent */
635 	uint64_t nbytes;  /*%< Number of bytes sent */
636 	isc_time_t start; /*%< Start time of the transfer */
637 	isc_time_t end;	  /*%< End time of the transfer */
638 };
639 
640 /*%
641  * An 'xfrout_ctx_t' contains the state of an outgoing AXFR or IXFR
642  * in progress.
643  */
644 typedef struct {
645 	isc_mem_t *mctx;
646 	ns_client_t *client;
647 	unsigned int id;       /* ID of request */
648 	dns_name_t *qname;     /* Question name of request */
649 	dns_rdatatype_t qtype; /* dns_rdatatype_{a,i}xfr */
650 	dns_rdataclass_t qclass;
651 	dns_zone_t *zone; /* (necessary for stats) */
652 	dns_db_t *db;
653 	dns_dbversion_t *ver;
654 	rrstream_t *stream;  /* The XFR RR stream */
655 	bool question_added; /* QUESTION section sent? */
656 	bool end_of_stream;  /* EOS has been reached */
657 	isc_buffer_t buf;    /* Buffer for message owner
658 			      * names and rdatas */
659 	isc_buffer_t txbuf;  /* Transmit message buffer */
660 	size_t cbytes;	     /* Length of current message */
661 	void *txmem;
662 	unsigned int txmemlen;
663 	dns_tsigkey_t *tsigkey; /* Key used to create TSIG */
664 	isc_buffer_t *lasttsig; /* the last TSIG */
665 	bool verified_tsig;	/* verified request MAC */
666 	bool many_answers;
667 	int sends; /* Send in progress */
668 	bool shuttingdown;
669 	bool poll;
670 	const char *mnemonic;	/* Style of transfer */
671 	uint32_t end_serial;	/* Serial number after XFR is done */
672 	struct xfr_stats stats; /*%< Transfer statistics */
673 
674 	/* Timeouts */
675 	uint64_t maxtime; /*%< Maximum XFR timeout (in ms) */
676 	isc_nm_timer_t *maxtime_timer;
677 
678 	uint64_t idletime; /*%< XFR idle timeout (in ms) */
679 
680 	/* Delayed send */
681 	isc_nm_timer_t *delayed_send_timer;
682 } xfrout_ctx_t;
683 
684 static void
685 xfrout_ctx_create(isc_mem_t *mctx, ns_client_t *client, unsigned int id,
686 		  dns_name_t *qname, dns_rdatatype_t qtype,
687 		  dns_rdataclass_t qclass, dns_zone_t *zone, dns_db_t *db,
688 		  dns_dbversion_t *ver, rrstream_t *stream,
689 		  dns_tsigkey_t *tsigkey, isc_buffer_t *lasttsig,
690 		  bool verified_tsig, unsigned int maxtime,
691 		  unsigned int idletime, bool many_answers,
692 		  xfrout_ctx_t **xfrp);
693 
694 static void
695 sendstream(xfrout_ctx_t *xfr);
696 
697 static void
698 xfrout_senddone(isc_nmhandle_t *handle, isc_result_t result, void *arg);
699 
700 static void
701 xfrout_fail(xfrout_ctx_t *xfr, isc_result_t result, const char *msg);
702 
703 static void
704 xfrout_maybe_destroy(xfrout_ctx_t *xfr);
705 
706 static void
707 xfrout_ctx_destroy(xfrout_ctx_t **xfrp);
708 
709 static void
710 xfrout_client_timeout(void *arg, isc_result_t result);
711 
712 static void
713 xfrout_log1(ns_client_t *client, dns_name_t *zonename, dns_rdataclass_t rdclass,
714 	    int level, const char *fmt, ...) ISC_FORMAT_PRINTF(5, 6);
715 
716 static void
717 xfrout_log(xfrout_ctx_t *xfr, int level, const char *fmt, ...)
718 	ISC_FORMAT_PRINTF(3, 4);
719 
720 static void
721 xfrout_delayed_timeout(void *arg, isc_result_t result);
722 
723 /**************************************************************************/
724 
725 void
726 ns_xfr_start(ns_client_t *client, dns_rdatatype_t reqtype) {
727 	isc_result_t result;
728 	dns_name_t *question_name;
729 	dns_rdataset_t *question_rdataset;
730 	dns_zone_t *zone = NULL, *raw = NULL, *mayberaw;
731 	dns_db_t *db = NULL;
732 	dns_dbversion_t *ver = NULL;
733 	dns_rdataclass_t question_class;
734 	rrstream_t *soa_stream = NULL;
735 	rrstream_t *data_stream = NULL;
736 	rrstream_t *stream = NULL;
737 	dns_difftuple_t *current_soa_tuple = NULL;
738 	dns_name_t *soa_name;
739 	dns_rdataset_t *soa_rdataset;
740 	dns_rdata_t soa_rdata = DNS_RDATA_INIT;
741 	bool have_soa = false;
742 	const char *mnemonic = NULL;
743 	isc_mem_t *mctx = client->manager->mctx;
744 	dns_message_t *request = client->message;
745 	xfrout_ctx_t *xfr = NULL;
746 	dns_transfer_format_t format = client->view->transfer_format;
747 	isc_netaddr_t na;
748 	dns_peer_t *peer = NULL;
749 	isc_buffer_t *tsigbuf = NULL;
750 	char *journalfile;
751 	char msg[NS_CLIENT_ACLMSGSIZE("zone transfer")];
752 	char keyname[DNS_NAME_FORMATSIZE];
753 	bool is_poll = false;
754 	bool is_dlz = false;
755 	bool is_ixfr = false;
756 	bool useviewacl = false;
757 	uint32_t begin_serial = 0, current_serial;
758 
759 	switch (reqtype) {
760 	case dns_rdatatype_axfr:
761 		mnemonic = "AXFR";
762 		break;
763 	case dns_rdatatype_ixfr:
764 		mnemonic = "IXFR";
765 		break;
766 	default:
767 		UNREACHABLE();
768 	}
769 
770 	ns_client_log(client, DNS_LOGCATEGORY_XFER_OUT, NS_LOGMODULE_XFER_OUT,
771 		      ISC_LOG_DEBUG(6), "%s request", mnemonic);
772 	/*
773 	 * Apply quota.
774 	 */
775 	result = isc_quota_acquire(&client->manager->sctx->xfroutquota);
776 	if (result != ISC_R_SUCCESS) {
777 		isc_log_write(XFROUT_COMMON_LOGARGS, ISC_LOG_WARNING,
778 			      "%s request denied: %s", mnemonic,
779 			      isc_result_totext(result));
780 		goto max_quota;
781 	}
782 
783 	/*
784 	 * Interpret the question section.
785 	 */
786 	result = dns_message_firstname(request, DNS_SECTION_QUESTION);
787 	INSIST(result == ISC_R_SUCCESS);
788 
789 	/*
790 	 * The question section must contain exactly one question, and
791 	 * it must be for AXFR/IXFR as appropriate.
792 	 */
793 	question_name = NULL;
794 	dns_message_currentname(request, DNS_SECTION_QUESTION, &question_name);
795 	question_rdataset = ISC_LIST_HEAD(question_name->list);
796 	question_class = question_rdataset->rdclass;
797 	INSIST(question_rdataset->type == reqtype);
798 	if (ISC_LIST_NEXT(question_rdataset, link) != NULL) {
799 		FAILC(DNS_R_FORMERR, "multiple questions");
800 	}
801 	result = dns_message_nextname(request, DNS_SECTION_QUESTION);
802 	if (result != ISC_R_NOMORE) {
803 		FAILC(DNS_R_FORMERR, "multiple questions");
804 	}
805 
806 	result = dns_view_findzone(client->view, question_name,
807 				   DNS_ZTFIND_EXACT, &zone);
808 	if (result != ISC_R_SUCCESS || dns_zone_gettype(zone) == dns_zone_dlz) {
809 		/*
810 		 * The normal zone table does not have a match, or this is
811 		 * marked in the zone table as a DLZ zone. Check the DLZ
812 		 * databases for a match.
813 		 */
814 		if (!ISC_LIST_EMPTY(client->view->dlz_searched)) {
815 			result = dns_dlzallowzonexfr(client->view,
816 						     question_name,
817 						     &client->peeraddr, &db);
818 			if (result == ISC_R_DEFAULT) {
819 				useviewacl = true;
820 				result = ISC_R_SUCCESS;
821 			}
822 			if (result == ISC_R_NOPERM) {
823 				char _buf1[DNS_NAME_FORMATSIZE];
824 				char _buf2[DNS_RDATACLASS_FORMATSIZE];
825 
826 				result = DNS_R_REFUSED;
827 				dns_name_format(question_name, _buf1,
828 						sizeof(_buf1));
829 				dns_rdataclass_format(question_class, _buf2,
830 						      sizeof(_buf2));
831 				ns_client_log(client, DNS_LOGCATEGORY_SECURITY,
832 					      NS_LOGMODULE_XFER_OUT,
833 					      ISC_LOG_ERROR,
834 					      "zone transfer '%s/%s' denied",
835 					      _buf1, _buf2);
836 				goto failure;
837 			}
838 			if (result != ISC_R_SUCCESS) {
839 				FAILQ(DNS_R_NOTAUTH, "non-authoritative zone",
840 				      question_name, question_class);
841 			}
842 			is_dlz = true;
843 		} else {
844 			/*
845 			 * not DLZ and not in normal zone table, we are
846 			 * not authoritative
847 			 */
848 			FAILQ(DNS_R_NOTAUTH, "non-authoritative zone",
849 			      question_name, question_class);
850 		}
851 	} else {
852 		/* zone table has a match */
853 		switch (dns_zone_gettype(zone)) {
854 		/*
855 		 * Primary, secondary, and mirror zones are OK for transfer.
856 		 */
857 		case dns_zone_primary:
858 		case dns_zone_secondary:
859 		case dns_zone_mirror:
860 		case dns_zone_dlz:
861 			break;
862 		default:
863 			FAILQ(DNS_R_NOTAUTH, "non-authoritative zone",
864 			      question_name, question_class);
865 		}
866 		CHECK(dns_zone_getdb(zone, &db));
867 		dns_db_currentversion(db, &ver);
868 	}
869 
870 	xfrout_log1(client, question_name, question_class, ISC_LOG_DEBUG(6),
871 		    "%s question section OK", mnemonic);
872 
873 	/*
874 	 * Check the authority section.  Look for a SOA record with
875 	 * the same name and class as the question.
876 	 */
877 	for (result = dns_message_firstname(request, DNS_SECTION_AUTHORITY);
878 	     result == ISC_R_SUCCESS;
879 	     result = dns_message_nextname(request, DNS_SECTION_AUTHORITY))
880 	{
881 		soa_name = NULL;
882 		dns_message_currentname(request, DNS_SECTION_AUTHORITY,
883 					&soa_name);
884 
885 		/*
886 		 * Ignore data whose owner name is not the zone apex.
887 		 */
888 		if (!dns_name_equal(soa_name, question_name)) {
889 			continue;
890 		}
891 
892 		for (soa_rdataset = ISC_LIST_HEAD(soa_name->list);
893 		     soa_rdataset != NULL;
894 		     soa_rdataset = ISC_LIST_NEXT(soa_rdataset, link))
895 		{
896 			/*
897 			 * Ignore non-SOA data.
898 			 */
899 			if (soa_rdataset->type != dns_rdatatype_soa) {
900 				continue;
901 			}
902 			if (soa_rdataset->rdclass != question_class) {
903 				continue;
904 			}
905 
906 			CHECK(dns_rdataset_first(soa_rdataset));
907 			dns_rdataset_current(soa_rdataset, &soa_rdata);
908 			result = dns_rdataset_next(soa_rdataset);
909 			if (result == ISC_R_SUCCESS) {
910 				FAILC(DNS_R_FORMERR, "IXFR authority section "
911 						     "has multiple SOAs");
912 			}
913 			have_soa = true;
914 			goto got_soa;
915 		}
916 	}
917 got_soa:
918 	if (result != ISC_R_NOMORE) {
919 		CHECK(result);
920 	}
921 
922 	xfrout_log1(client, question_name, question_class, ISC_LOG_DEBUG(6),
923 		    "%s authority section OK", mnemonic);
924 
925 	/*
926 	 * If not a DLZ zone or we are falling back to the view's transfer
927 	 * ACL, decide whether to allow this transfer.
928 	 */
929 	if (!is_dlz || useviewacl) {
930 		dns_acl_t *acl;
931 
932 		ns_client_aclmsg("zone transfer", question_name, reqtype,
933 				 client->view->rdclass, msg, sizeof(msg));
934 		if (useviewacl) {
935 			acl = client->view->transferacl;
936 		} else {
937 			acl = dns_zone_getxfracl(zone);
938 		}
939 		CHECK(ns_client_checkacl(client, NULL, msg, acl, true,
940 					 ISC_LOG_ERROR));
941 	}
942 
943 	/*
944 	 * AXFR over UDP is not possible.
945 	 */
946 	if (reqtype == dns_rdatatype_axfr &&
947 	    (client->attributes & NS_CLIENTATTR_TCP) == 0)
948 	{
949 		FAILC(DNS_R_FORMERR, "attempted AXFR over UDP");
950 	}
951 
952 	/*
953 	 * Look up the requesting server in the peer table.
954 	 */
955 	isc_netaddr_fromsockaddr(&na, &client->peeraddr);
956 	(void)dns_peerlist_peerbyaddr(client->view->peers, &na, &peer);
957 
958 	/*
959 	 * Decide on the transfer format (one-answer or many-answers).
960 	 */
961 	if (peer != NULL) {
962 		(void)dns_peer_gettransferformat(peer, &format);
963 	}
964 
965 	/*
966 	 * Get a dynamically allocated copy of the current SOA.
967 	 */
968 	if (is_dlz) {
969 		dns_db_currentversion(db, &ver);
970 	}
971 
972 	CHECK(dns_db_createsoatuple(db, ver, mctx, DNS_DIFFOP_EXISTS,
973 				    &current_soa_tuple));
974 
975 	current_serial = dns_soa_getserial(&current_soa_tuple->rdata);
976 	if (reqtype == dns_rdatatype_ixfr) {
977 		size_t jsize;
978 		uint64_t dbsize;
979 
980 		if (!have_soa) {
981 			FAILC(DNS_R_FORMERR, "IXFR request missing SOA");
982 		}
983 
984 		begin_serial = dns_soa_getserial(&soa_rdata);
985 
986 		/*
987 		 * RFC1995 says "If an IXFR query with the same or
988 		 * newer version number than that of the server
989 		 * is received, it is replied to with a single SOA
990 		 * record of the server's current version, just as
991 		 * in AXFR".  The claim about AXFR is incorrect,
992 		 * but other than that, we do as the RFC says.
993 		 *
994 		 * Sending a single SOA record is also how we refuse
995 		 * IXFR over UDP (currently, we always do).
996 		 */
997 		if (DNS_SERIAL_GE(begin_serial, current_serial) ||
998 		    (client->attributes & NS_CLIENTATTR_TCP) == 0)
999 		{
1000 			CHECK(soa_rrstream_create(mctx, db, ver, &stream));
1001 			is_poll = true;
1002 			goto have_stream;
1003 		}
1004 
1005 		/*
1006 		 * Outgoing IXFR may have been disabled for this peer
1007 		 * or globally.
1008 		 */
1009 		if ((client->attributes & NS_CLIENTATTR_TCP) != 0) {
1010 			bool provide_ixfr;
1011 
1012 			provide_ixfr = client->view->provideixfr;
1013 			if (peer != NULL) {
1014 				(void)dns_peer_getprovideixfr(peer,
1015 							      &provide_ixfr);
1016 			}
1017 			if (!provide_ixfr) {
1018 				xfrout_log1(client, question_name,
1019 					    question_class, ISC_LOG_DEBUG(4),
1020 					    "IXFR delta response disabled due "
1021 					    "to 'provide-ixfr no;' being set");
1022 				mnemonic = "AXFR-style IXFR";
1023 				goto axfr_fallback;
1024 			}
1025 		}
1026 
1027 		journalfile = is_dlz ? NULL : dns_zone_getjournal(zone);
1028 		if (journalfile != NULL) {
1029 			result = ixfr_rrstream_create(
1030 				mctx, journalfile, begin_serial, current_serial,
1031 				&jsize, &data_stream);
1032 		} else {
1033 			result = ISC_R_NOTFOUND;
1034 		}
1035 		if (result == ISC_R_NOTFOUND || result == ISC_R_RANGE) {
1036 			xfrout_log1(client, question_name, question_class,
1037 				    ISC_LOG_INFO,
1038 				    "IXFR version not in journal, "
1039 				    "falling back to AXFR");
1040 			mnemonic = "AXFR-style IXFR";
1041 			goto axfr_fallback;
1042 		}
1043 		CHECK(result);
1044 
1045 		result = dns_db_getsize(db, ver, NULL, &dbsize);
1046 		if (result == ISC_R_SUCCESS) {
1047 			uint32_t ratio = dns_zone_getixfrratio(zone);
1048 			if (ratio != 0 && ((100 * jsize) / dbsize) > ratio) {
1049 				data_stream->methods->destroy(&data_stream);
1050 				data_stream = NULL;
1051 				xfrout_log1(client, question_name,
1052 					    question_class, ISC_LOG_INFO,
1053 					    "IXFR delta size (%zu bytes) "
1054 					    "exceeds the maximum ratio to "
1055 					    "database size "
1056 					    "(%" PRIu64 " bytes), "
1057 					    "falling back to AXFR",
1058 					    jsize, dbsize);
1059 				mnemonic = "AXFR-style IXFR";
1060 				goto axfr_fallback;
1061 			} else {
1062 				xfrout_log1(client, question_name,
1063 					    question_class, ISC_LOG_DEBUG(4),
1064 					    "IXFR delta size (%zu bytes); "
1065 					    "database size "
1066 					    "(%" PRIu64 " bytes)",
1067 					    jsize, dbsize);
1068 			}
1069 		}
1070 		is_ixfr = true;
1071 	} else {
1072 	axfr_fallback:
1073 		CHECK(axfr_rrstream_create(mctx, db, ver, &data_stream));
1074 	}
1075 
1076 	/*
1077 	 * Bracket the data stream with SOAs.
1078 	 */
1079 	CHECK(soa_rrstream_create(mctx, db, ver, &soa_stream));
1080 	CHECK(compound_rrstream_create(mctx, &soa_stream, &data_stream,
1081 				       &stream));
1082 	soa_stream = NULL;
1083 	data_stream = NULL;
1084 
1085 have_stream:
1086 	CHECK(dns_message_getquerytsig(request, mctx, &tsigbuf));
1087 	/*
1088 	 * Create the xfrout context object.  This transfers the ownership
1089 	 * of "stream", "db", "ver", and "quota" to the xfrout context object.
1090 	 */
1091 
1092 	if (is_dlz) {
1093 		xfrout_ctx_create(mctx, client, request->id, question_name,
1094 				  reqtype, question_class, zone, db, ver,
1095 				  stream, dns_message_gettsigkey(request),
1096 				  tsigbuf, request->verified_sig, 3600, 3600,
1097 				  (format == dns_many_answers) ? true : false,
1098 				  &xfr);
1099 	} else {
1100 		xfrout_ctx_create(
1101 			mctx, client, request->id, question_name, reqtype,
1102 			question_class, zone, db, ver, stream,
1103 			dns_message_gettsigkey(request), tsigbuf,
1104 			request->verified_sig, dns_zone_getmaxxfrout(zone),
1105 			dns_zone_getidleout(zone),
1106 			(format == dns_many_answers) ? true : false, &xfr);
1107 	}
1108 
1109 	xfr->end_serial = current_serial;
1110 	xfr->mnemonic = mnemonic;
1111 	stream = NULL;
1112 
1113 	CHECK(xfr->stream->methods->first(xfr->stream));
1114 
1115 	if (xfr->tsigkey != NULL) {
1116 		dns_name_format(xfr->tsigkey->name, keyname, sizeof(keyname));
1117 	} else {
1118 		keyname[0] = '\0';
1119 	}
1120 	xfr->poll = is_poll;
1121 	if (is_poll) {
1122 		xfr->mnemonic = "IXFR poll response";
1123 		xfrout_log1(client, question_name, question_class,
1124 			    ISC_LOG_DEBUG(1), "IXFR poll up to date%s%s",
1125 			    (xfr->tsigkey != NULL) ? ": TSIG " : "", keyname);
1126 	} else if (is_ixfr) {
1127 		xfrout_log1(client, question_name, question_class, ISC_LOG_INFO,
1128 			    "%s started%s%s (serial %u -> %u)", mnemonic,
1129 			    (xfr->tsigkey != NULL) ? ": TSIG " : "", keyname,
1130 			    begin_serial, current_serial);
1131 	} else {
1132 		xfrout_log1(client, question_name, question_class, ISC_LOG_INFO,
1133 			    "%s started%s%s (serial %u)", mnemonic,
1134 			    (xfr->tsigkey != NULL) ? ": TSIG " : "", keyname,
1135 			    current_serial);
1136 	}
1137 
1138 	if (zone != NULL) {
1139 		dns_zone_getraw(zone, &raw);
1140 		mayberaw = (raw != NULL) ? raw : zone;
1141 		if ((client->attributes & NS_CLIENTATTR_WANTEXPIRE) != 0 &&
1142 		    (dns_zone_gettype(mayberaw) == dns_zone_secondary ||
1143 		     dns_zone_gettype(mayberaw) == dns_zone_mirror))
1144 		{
1145 			isc_time_t expiretime;
1146 			uint32_t secs;
1147 			dns_zone_getexpiretime(zone, &expiretime);
1148 			secs = isc_time_seconds(&expiretime);
1149 			if (secs >= client->now && result == ISC_R_SUCCESS) {
1150 				client->attributes |= NS_CLIENTATTR_HAVEEXPIRE;
1151 				client->expire = secs - client->now;
1152 			}
1153 		}
1154 		if (raw != NULL) {
1155 			dns_zone_detach(&raw);
1156 		}
1157 	}
1158 
1159 	/* Start the timers */
1160 	if (xfr->maxtime > 0) {
1161 		xfrout_log(xfr, ISC_LOG_DEBUG(1),
1162 			   "starting maxtime timer %" PRIu64 " ms",
1163 			   xfr->maxtime);
1164 		isc_nm_timer_start(xfr->maxtime_timer, xfr->maxtime);
1165 	}
1166 
1167 	/*
1168 	 * Hand the context over to sendstream().  Set xfr to NULL;
1169 	 * sendstream() is responsible for either passing the
1170 	 * context on to a later event handler or destroying it.
1171 	 */
1172 	sendstream(xfr);
1173 	xfr = NULL;
1174 
1175 	result = ISC_R_SUCCESS;
1176 
1177 failure:
1178 	if (result == DNS_R_REFUSED) {
1179 		inc_stats(client, zone, ns_statscounter_xfrrej);
1180 	}
1181 	if (current_soa_tuple != NULL) {
1182 		dns_difftuple_free(&current_soa_tuple);
1183 	}
1184 	if (stream != NULL) {
1185 		stream->methods->destroy(&stream);
1186 	}
1187 	if (soa_stream != NULL) {
1188 		soa_stream->methods->destroy(&soa_stream);
1189 	}
1190 	if (data_stream != NULL) {
1191 		data_stream->methods->destroy(&data_stream);
1192 	}
1193 	if (ver != NULL) {
1194 		dns_db_closeversion(db, &ver, false);
1195 	}
1196 	if (db != NULL) {
1197 		dns_db_detach(&db);
1198 	}
1199 	if (zone != NULL) {
1200 		dns_zone_detach(&zone);
1201 	}
1202 
1203 	if (xfr != NULL) {
1204 		xfrout_fail(xfr, result, "setting up zone transfer");
1205 	} else if (result != ISC_R_SUCCESS) {
1206 		isc_quota_release(&client->manager->sctx->xfroutquota);
1207 	max_quota:
1208 		ns_client_log(client, DNS_LOGCATEGORY_XFER_OUT,
1209 			      NS_LOGMODULE_XFER_OUT, ISC_LOG_DEBUG(3),
1210 			      "zone transfer setup failed");
1211 		ns_client_error(client, result);
1212 		isc_nmhandle_detach(&client->reqhandle);
1213 	}
1214 }
1215 
1216 static void
1217 xfrout_ctx_create(isc_mem_t *mctx, ns_client_t *client, unsigned int id,
1218 		  dns_name_t *qname, dns_rdatatype_t qtype,
1219 		  dns_rdataclass_t qclass, dns_zone_t *zone, dns_db_t *db,
1220 		  dns_dbversion_t *ver, rrstream_t *stream,
1221 		  dns_tsigkey_t *tsigkey, isc_buffer_t *lasttsig,
1222 		  bool verified_tsig, unsigned int maxtime,
1223 		  unsigned int idletime, bool many_answers,
1224 		  xfrout_ctx_t **xfrp) {
1225 	xfrout_ctx_t *xfr = NULL;
1226 	unsigned int len = NS_CLIENT_TCP_BUFFER_SIZE;
1227 	void *mem = NULL;
1228 
1229 	REQUIRE(xfrp != NULL && *xfrp == NULL);
1230 
1231 	xfr = isc_mem_get(mctx, sizeof(*xfr));
1232 	*xfr = (xfrout_ctx_t){
1233 		.client = client,
1234 		.id = id,
1235 		.qname = qname,
1236 		.qtype = qtype,
1237 		.qclass = qclass,
1238 		.maxtime = maxtime * 1000,   /* in milliseconds */
1239 		.idletime = idletime * 1000, /* In milliseconds */
1240 		.tsigkey = tsigkey,
1241 		.lasttsig = lasttsig,
1242 		.verified_tsig = verified_tsig,
1243 		.many_answers = many_answers,
1244 	};
1245 
1246 	isc_mem_attach(mctx, &xfr->mctx);
1247 
1248 	if (zone != NULL) { /* zone will be NULL if it's DLZ */
1249 		dns_zone_attach(zone, &xfr->zone);
1250 	}
1251 	dns_db_attach(db, &xfr->db);
1252 	dns_db_attachversion(db, ver, &xfr->ver);
1253 
1254 	xfr->stats.start = isc_time_now();
1255 
1256 	isc_nm_timer_create(xfr->client->handle, xfrout_client_timeout, xfr,
1257 			    &xfr->maxtime_timer);
1258 
1259 	isc_nm_timer_create(xfr->client->handle, xfrout_delayed_timeout, xfr,
1260 			    &xfr->delayed_send_timer);
1261 
1262 	/*
1263 	 * Allocate a temporary buffer for the uncompressed response
1264 	 * message data.  The buffer size must be 65535 bytes
1265 	 * (NS_CLIENT_TCP_BUFFER_SIZE): small enough that compressed
1266 	 * data will fit in a single TCP message, and big enough to
1267 	 * hold a maximum-sized RR.
1268 	 *
1269 	 * Note that although 65535-byte RRs are allowed in principle, they
1270 	 * cannot be zone-transferred (at least not if uncompressible),
1271 	 * because the message and RR headers would push the size of the
1272 	 * TCP message over the 65535 byte limit.
1273 	 */
1274 	mem = isc_mem_get(mctx, len);
1275 	isc_buffer_init(&xfr->buf, mem, len);
1276 
1277 	/*
1278 	 * Allocate another temporary buffer for the compressed
1279 	 * response message.
1280 	 */
1281 	mem = isc_mem_get(mctx, len);
1282 	isc_buffer_init(&xfr->txbuf, (char *)mem, len);
1283 	xfr->txmem = mem;
1284 	xfr->txmemlen = len;
1285 
1286 	/*
1287 	 * These MUST be after the last "goto failure;" / CHECK to
1288 	 * prevent a double free by the caller.
1289 	 */
1290 	xfr->stream = stream;
1291 
1292 	*xfrp = xfr;
1293 }
1294 
1295 static void
1296 xfrout_send(xfrout_ctx_t *xfr) {
1297 	const bool is_tcp = ((xfr->client->attributes & NS_CLIENTATTR_TCP) !=
1298 			     0);
1299 
1300 	if (is_tcp) {
1301 		isc_region_t used;
1302 
1303 		isc_buffer_usedregion(&xfr->txbuf, &used);
1304 
1305 		isc_nmhandle_attach(xfr->client->handle,
1306 				    &xfr->client->sendhandle);
1307 		if (xfr->idletime > 0) {
1308 			isc_nmhandle_setwritetimeout(xfr->client->sendhandle,
1309 						     xfr->idletime);
1310 		}
1311 		isc_nm_send(xfr->client->sendhandle, &used, xfrout_senddone,
1312 			    xfr);
1313 		xfr->sends++;
1314 		xfr->cbytes = used.length;
1315 	} else {
1316 		ns_client_send(xfr->client);
1317 		xfr->stream->methods->pause(xfr->stream);
1318 		isc_nmhandle_detach(&xfr->client->reqhandle);
1319 		xfrout_ctx_destroy(&xfr);
1320 	}
1321 }
1322 
1323 static void
1324 xfrout_delayed_timeout(void *arg, isc_result_t result) {
1325 	xfrout_ctx_t *xfr = (xfrout_ctx_t *)arg;
1326 	UNUSED(result);
1327 
1328 	isc_nm_timer_stop(xfr->delayed_send_timer);
1329 	xfrout_send(xfr);
1330 }
1331 
1332 static void
1333 xfrout_enqueue_send(xfrout_ctx_t *xfr) {
1334 	uint64_t timeout = 0;
1335 
1336 	/*
1337 	 * System test helper options to simulate network issues.
1338 	 *
1339 	 * Both "transferslowly" and "transferstuck" are not meant to be
1340 	 * used together (and are not actually used this way).
1341 	 */
1342 	if (ns_server_getoption(xfr->client->manager->sctx,
1343 				NS_SERVER_TRANSFERSLOWLY))
1344 	{
1345 		/* Sleep for a bit over a second. */
1346 		timeout = 1000;
1347 	} else if (ns_server_getoption(xfr->client->manager->sctx,
1348 				       NS_SERVER_TRANSFERSTUCK))
1349 	{
1350 		/* Sleep for a bit over a minute. */
1351 		timeout = 60 * 1000;
1352 	}
1353 
1354 	if (timeout == 0) {
1355 		xfrout_send(xfr);
1356 		return;
1357 	}
1358 
1359 	/* delay */
1360 	isc_nm_timer_start(xfr->delayed_send_timer, timeout);
1361 }
1362 
1363 /*
1364  * Arrange to send as much as we can of "stream" without blocking.
1365  *
1366  * Requires:
1367  *	The stream iterator is initialized and points at an RR,
1368  *      or possibly at the end of the stream (that is, the
1369  *      _first method of the iterator has been called).
1370  */
1371 static void
1372 sendstream(xfrout_ctx_t *xfr) {
1373 	dns_message_t *tcpmsg = NULL;
1374 	dns_message_t *msg = NULL; /* Client message if UDP, tcpmsg if TCP */
1375 	isc_result_t result;
1376 	dns_rdataset_t *qrdataset;
1377 	dns_name_t *msgname = NULL;
1378 	dns_rdata_t *msgrdata = NULL;
1379 	dns_rdatalist_t *msgrdl = NULL;
1380 	dns_rdataset_t *msgrds = NULL;
1381 	dns_compress_t cctx;
1382 	bool cleanup_cctx = false;
1383 	bool is_tcp;
1384 	int n_rrs;
1385 
1386 	isc_buffer_clear(&xfr->buf);
1387 	isc_buffer_clear(&xfr->txbuf);
1388 
1389 	is_tcp = ((xfr->client->attributes & NS_CLIENTATTR_TCP) != 0);
1390 	if (!is_tcp) {
1391 		/*
1392 		 * In the UDP case, we put the response data directly into
1393 		 * the client message.
1394 		 */
1395 		msg = xfr->client->message;
1396 		CHECK(dns_message_reply(msg, true));
1397 	} else {
1398 		/*
1399 		 * TCP. Build a response dns_message_t, temporarily storing
1400 		 * the raw, uncompressed owner names and RR data contiguously
1401 		 * in xfr->buf.  We know that if the uncompressed data fits
1402 		 * in xfr->buf, the compressed data will surely fit in a TCP
1403 		 * message.
1404 		 */
1405 
1406 		dns_message_create(xfr->mctx, NULL, NULL,
1407 				   DNS_MESSAGE_INTENTRENDER, &tcpmsg);
1408 		msg = tcpmsg;
1409 
1410 		msg->id = xfr->id;
1411 		msg->rcode = dns_rcode_noerror;
1412 		msg->flags = DNS_MESSAGEFLAG_QR | DNS_MESSAGEFLAG_AA;
1413 		if ((xfr->client->attributes & NS_CLIENTATTR_RA) != 0) {
1414 			msg->flags |= DNS_MESSAGEFLAG_RA;
1415 		}
1416 		CHECK(dns_message_settsigkey(msg, xfr->tsigkey));
1417 		dns_message_setquerytsig(msg, xfr->lasttsig);
1418 		if (xfr->lasttsig != NULL) {
1419 			isc_buffer_free(&xfr->lasttsig);
1420 		}
1421 		msg->verified_sig = xfr->verified_tsig;
1422 
1423 		/*
1424 		 * Add a EDNS option to the message?
1425 		 */
1426 		if ((xfr->client->attributes & NS_CLIENTATTR_WANTOPT) != 0) {
1427 			dns_rdataset_t *opt = NULL;
1428 
1429 			CHECK(ns_client_addopt(xfr->client, msg, &opt));
1430 			CHECK(dns_message_setopt(msg, opt));
1431 			/*
1432 			 * Add to first message only.
1433 			 */
1434 			xfr->client->attributes &= ~NS_CLIENTATTR_WANTNSID;
1435 			xfr->client->attributes &= ~NS_CLIENTATTR_HAVEEXPIRE;
1436 		}
1437 
1438 		/*
1439 		 * Account for reserved space.
1440 		 */
1441 		if (xfr->tsigkey != NULL) {
1442 			INSIST(msg->reserved != 0U);
1443 		}
1444 		isc_buffer_add(&xfr->buf, msg->reserved);
1445 
1446 		/*
1447 		 * Include a question section in the first message only.
1448 		 * BIND 8.2.1 will not recognize an IXFR if it does not
1449 		 * have a question section.
1450 		 */
1451 		if (!xfr->question_added) {
1452 			dns_name_t *qname = NULL;
1453 			isc_region_t r;
1454 
1455 			/*
1456 			 * Reserve space for the 12-byte message header
1457 			 * and 4 bytes of question.
1458 			 */
1459 			isc_buffer_add(&xfr->buf, 12 + 4);
1460 
1461 			qrdataset = NULL;
1462 			dns_message_gettemprdataset(msg, &qrdataset);
1463 			dns_rdataset_makequestion(qrdataset,
1464 						  xfr->client->message->rdclass,
1465 						  xfr->qtype);
1466 
1467 			dns_message_gettempname(msg, &qname);
1468 			isc_buffer_availableregion(&xfr->buf, &r);
1469 			INSIST(r.length >= xfr->qname->length);
1470 			r.length = xfr->qname->length;
1471 			isc_buffer_putmem(&xfr->buf, xfr->qname->ndata,
1472 					  xfr->qname->length);
1473 			dns_name_fromregion(qname, &r);
1474 			ISC_LIST_INIT(qname->list);
1475 			ISC_LIST_APPEND(qname->list, qrdataset, link);
1476 
1477 			dns_message_addname(msg, qname, DNS_SECTION_QUESTION);
1478 			xfr->question_added = true;
1479 		} else {
1480 			/*
1481 			 * Reserve space for the 12-byte message header
1482 			 */
1483 			isc_buffer_add(&xfr->buf, 12);
1484 			msg->tcp_continuation = 1;
1485 		}
1486 	}
1487 
1488 	/*
1489 	 * Try to fit in as many RRs as possible, unless "one-answer"
1490 	 * format has been requested.
1491 	 */
1492 	for (n_rrs = 0;; n_rrs++) {
1493 		dns_name_t *name = NULL;
1494 		uint32_t ttl;
1495 		dns_rdata_t *rdata = NULL;
1496 
1497 		unsigned int size;
1498 		isc_region_t r;
1499 
1500 		msgname = NULL;
1501 		msgrdata = NULL;
1502 		msgrdl = NULL;
1503 		msgrds = NULL;
1504 
1505 		xfr->stream->methods->current(xfr->stream, &name, &ttl, &rdata);
1506 		size = name->length + 10 + rdata->length;
1507 		isc_buffer_availableregion(&xfr->buf, &r);
1508 		if (size >= r.length) {
1509 			/*
1510 			 * RR would not fit.  If there are other RRs in the
1511 			 * buffer, send them now and leave this RR to the
1512 			 * next message.  If this RR overflows the buffer
1513 			 * all by itself, fail.
1514 			 *
1515 			 * In theory some RRs might fit in a TCP message
1516 			 * when compressed even if they do not fit when
1517 			 * uncompressed, but surely we don't want
1518 			 * to send such monstrosities to an unsuspecting
1519 			 * secondary.
1520 			 */
1521 			if (n_rrs == 0) {
1522 				xfrout_log(xfr, ISC_LOG_WARNING,
1523 					   "RR too large for zone transfer "
1524 					   "(%d bytes)",
1525 					   size);
1526 				/* XXX DNS_R_RRTOOLARGE? */
1527 				result = ISC_R_NOSPACE;
1528 				goto failure;
1529 			}
1530 			break;
1531 		}
1532 
1533 		if (isc_log_wouldlog(ns_lctx, XFROUT_RR_LOGLEVEL)) {
1534 			log_rr(name, rdata, ttl); /* XXX */
1535 		}
1536 
1537 		dns_message_gettempname(msg, &msgname);
1538 		isc_buffer_availableregion(&xfr->buf, &r);
1539 		INSIST(r.length >= name->length);
1540 		r.length = name->length;
1541 		isc_buffer_putmem(&xfr->buf, name->ndata, name->length);
1542 		dns_name_fromregion(msgname, &r);
1543 
1544 		/* Reserve space for RR header. */
1545 		isc_buffer_add(&xfr->buf, 10);
1546 
1547 		dns_message_gettemprdata(msg, &msgrdata);
1548 		isc_buffer_availableregion(&xfr->buf, &r);
1549 		r.length = rdata->length;
1550 		isc_buffer_putmem(&xfr->buf, rdata->data, rdata->length);
1551 		dns_rdata_init(msgrdata);
1552 		dns_rdata_fromregion(msgrdata, rdata->rdclass, rdata->type, &r);
1553 
1554 		dns_message_gettemprdatalist(msg, &msgrdl);
1555 		msgrdl->type = rdata->type;
1556 		msgrdl->rdclass = rdata->rdclass;
1557 		msgrdl->ttl = ttl;
1558 		if (rdata->type == dns_rdatatype_sig ||
1559 		    rdata->type == dns_rdatatype_rrsig)
1560 		{
1561 			msgrdl->covers = dns_rdata_covers(rdata);
1562 		} else {
1563 			msgrdl->covers = dns_rdatatype_none;
1564 		}
1565 		ISC_LIST_APPEND(msgrdl->rdata, msgrdata, link);
1566 
1567 		dns_message_gettemprdataset(msg, &msgrds);
1568 		dns_rdatalist_tordataset(msgrdl, msgrds);
1569 
1570 		ISC_LIST_APPEND(msgname->list, msgrds, link);
1571 
1572 		dns_message_addname(msg, msgname, DNS_SECTION_ANSWER);
1573 		msgname = NULL;
1574 
1575 		xfr->stats.nrecs++;
1576 
1577 		result = xfr->stream->methods->next(xfr->stream);
1578 		if (result == ISC_R_NOMORE) {
1579 			xfr->end_of_stream = true;
1580 			break;
1581 		}
1582 		CHECK(result);
1583 
1584 		if (!xfr->many_answers) {
1585 			break;
1586 		}
1587 		/*
1588 		 * At this stage, at least 1 RR has been rendered into
1589 		 * the message. Check if we want to clamp this message
1590 		 * here (TCP only).
1591 		 */
1592 		if ((isc_buffer_usedlength(&xfr->buf) >=
1593 		     xfr->client->manager->sctx->transfer_tcp_message_size) &&
1594 		    is_tcp)
1595 		{
1596 			break;
1597 		}
1598 	}
1599 
1600 	if (is_tcp) {
1601 		dns_compress_init(&cctx, xfr->mctx,
1602 				  DNS_COMPRESS_CASE | DNS_COMPRESS_LARGE);
1603 		cleanup_cctx = true;
1604 		CHECK(dns_message_renderbegin(msg, &cctx, &xfr->txbuf));
1605 		CHECK(dns_message_rendersection(msg, DNS_SECTION_QUESTION, 0));
1606 		CHECK(dns_message_rendersection(msg, DNS_SECTION_ANSWER, 0));
1607 		CHECK(dns_message_renderend(msg));
1608 		dns_compress_invalidate(&cctx);
1609 		cleanup_cctx = false;
1610 
1611 		xfrout_log(xfr, ISC_LOG_DEBUG(8),
1612 			   "sending TCP message of %d bytes",
1613 			   isc_buffer_usedlength(&xfr->txbuf));
1614 
1615 		xfrout_enqueue_send(xfr);
1616 	} else {
1617 		xfrout_log(xfr, ISC_LOG_DEBUG(8), "sending IXFR UDP response");
1618 
1619 		xfrout_enqueue_send(xfr);
1620 		return;
1621 	}
1622 
1623 	/* Advance lasttsig to be the last TSIG generated */
1624 	CHECK(dns_message_getquerytsig(msg, xfr->mctx, &xfr->lasttsig));
1625 
1626 failure:
1627 	if (tcpmsg != NULL) {
1628 		dns_message_detach(&tcpmsg);
1629 	}
1630 
1631 	if (cleanup_cctx) {
1632 		dns_compress_invalidate(&cctx);
1633 	}
1634 	/*
1635 	 * Make sure to release any locks held by database
1636 	 * iterators before returning from the event handler.
1637 	 */
1638 	xfr->stream->methods->pause(xfr->stream);
1639 
1640 	if (result == ISC_R_SUCCESS) {
1641 		return;
1642 	}
1643 
1644 	xfrout_fail(xfr, result, "sending zone data");
1645 }
1646 
1647 static void
1648 xfrout_ctx_destroy(xfrout_ctx_t **xfrp) {
1649 	xfrout_ctx_t *xfr = *xfrp;
1650 	*xfrp = NULL;
1651 
1652 	INSIST(xfr->sends == 0);
1653 
1654 	isc_nm_timer_stop(xfr->delayed_send_timer);
1655 	isc_nm_timer_detach(&xfr->delayed_send_timer);
1656 
1657 	isc_nm_timer_stop(xfr->maxtime_timer);
1658 	isc_nm_timer_detach(&xfr->maxtime_timer);
1659 
1660 	if (xfr->stream != NULL) {
1661 		xfr->stream->methods->destroy(&xfr->stream);
1662 	}
1663 	if (xfr->buf.base != NULL) {
1664 		isc_mem_put(xfr->mctx, xfr->buf.base, xfr->buf.length);
1665 	}
1666 	if (xfr->txmem != NULL) {
1667 		isc_mem_put(xfr->mctx, xfr->txmem, xfr->txmemlen);
1668 	}
1669 	if (xfr->lasttsig != NULL) {
1670 		isc_buffer_free(&xfr->lasttsig);
1671 	}
1672 
1673 	isc_quota_release(&xfr->client->manager->sctx->xfroutquota);
1674 
1675 	if (xfr->ver != NULL) {
1676 		dns_db_closeversion(xfr->db, &xfr->ver, false);
1677 	}
1678 	if (xfr->zone != NULL) {
1679 		dns_zone_detach(&xfr->zone);
1680 	}
1681 	if (xfr->db != NULL) {
1682 		dns_db_detach(&xfr->db);
1683 	}
1684 
1685 	isc_mem_putanddetach(&xfr->mctx, xfr, sizeof(*xfr));
1686 }
1687 
1688 static void
1689 xfrout_senddone(isc_nmhandle_t *handle, isc_result_t result, void *arg) {
1690 	xfrout_ctx_t *xfr = (xfrout_ctx_t *)arg;
1691 
1692 	REQUIRE((xfr->client->attributes & NS_CLIENTATTR_TCP) != 0);
1693 
1694 	INSIST(handle == xfr->client->handle);
1695 
1696 	xfr->sends--;
1697 	INSIST(xfr->sends == 0);
1698 
1699 	isc_nmhandle_detach(&xfr->client->sendhandle);
1700 
1701 	/*
1702 	 * Update transfer statistics if sending succeeded, accounting for the
1703 	 * two-byte TCP length prefix included in the number of bytes sent.
1704 	 */
1705 	if (result == ISC_R_SUCCESS) {
1706 		xfr->stats.nmsg++;
1707 		xfr->stats.nbytes += xfr->cbytes;
1708 	}
1709 
1710 	if (xfr->shuttingdown) {
1711 		xfrout_maybe_destroy(xfr);
1712 	} else if (result != ISC_R_SUCCESS) {
1713 		xfrout_fail(xfr, result, "send");
1714 	} else if (!xfr->end_of_stream) {
1715 		sendstream(xfr);
1716 	} else {
1717 		/* End of zone transfer stream. */
1718 		uint64_t msecs, persec;
1719 
1720 		inc_stats(xfr->client, xfr->zone, ns_statscounter_xfrdone);
1721 		xfr->stats.end = isc_time_now();
1722 		msecs = isc_time_microdiff(&xfr->stats.end, &xfr->stats.start);
1723 		msecs /= 1000;
1724 		if (msecs == 0) {
1725 			msecs = 1;
1726 		}
1727 		persec = (xfr->stats.nbytes * 1000) / msecs;
1728 		xfrout_log(xfr, xfr->poll ? ISC_LOG_DEBUG(1) : ISC_LOG_INFO,
1729 			   "%s ended: "
1730 			   "%" PRIu64 " messages, %" PRIu64 " records, "
1731 			   "%" PRIu64 " bytes, "
1732 			   "%u.%03u secs (%u bytes/sec) (serial %u)",
1733 			   xfr->mnemonic, xfr->stats.nmsg, xfr->stats.nrecs,
1734 			   xfr->stats.nbytes, (unsigned int)(msecs / 1000),
1735 			   (unsigned int)(msecs % 1000), (unsigned int)persec,
1736 			   xfr->end_serial);
1737 
1738 		/*
1739 		 * We're done, unreference the handle and destroy the xfr
1740 		 * context.
1741 		 */
1742 		isc_nmhandle_detach(&xfr->client->reqhandle);
1743 		xfrout_ctx_destroy(&xfr);
1744 	}
1745 }
1746 
1747 static void
1748 xfrout_fail(xfrout_ctx_t *xfr, isc_result_t result, const char *msg) {
1749 	xfr->shuttingdown = true;
1750 	xfrout_log(xfr, ISC_LOG_ERROR, "%s: %s", msg,
1751 		   isc_result_totext(result));
1752 	xfrout_maybe_destroy(xfr);
1753 }
1754 
1755 static void
1756 xfrout_maybe_destroy(xfrout_ctx_t *xfr) {
1757 	REQUIRE(xfr->shuttingdown);
1758 
1759 	ns_client_drop(xfr->client, ISC_R_CANCELED);
1760 	isc_nmhandle_detach(&xfr->client->reqhandle);
1761 	xfrout_ctx_destroy(&xfr);
1762 }
1763 
1764 static void
1765 xfrout_client_timeout(void *arg, isc_result_t result) {
1766 	xfrout_ctx_t *xfr = (xfrout_ctx_t *)arg;
1767 
1768 	xfr->shuttingdown = true;
1769 	xfrout_log(xfr, ISC_LOG_ERROR, "%s: %s", "aborted",
1770 		   isc_result_totext(result));
1771 }
1772 
1773 /*
1774  * Log outgoing zone transfer messages in a format like
1775  * <client>: transfer of <zone>: <message>
1776  */
1777 
1778 static void
1779 xfrout_logv(ns_client_t *client, dns_name_t *zonename, dns_rdataclass_t rdclass,
1780 	    int level, const char *fmt, va_list ap) ISC_FORMAT_PRINTF(5, 0);
1781 
1782 static void
1783 xfrout_logv(ns_client_t *client, dns_name_t *zonename, dns_rdataclass_t rdclass,
1784 	    int level, const char *fmt, va_list ap) {
1785 	char msgbuf[2048];
1786 	char namebuf[DNS_NAME_FORMATSIZE];
1787 	char classbuf[DNS_RDATACLASS_FORMATSIZE];
1788 
1789 	dns_name_format(zonename, namebuf, sizeof(namebuf));
1790 	dns_rdataclass_format(rdclass, classbuf, sizeof(classbuf));
1791 	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
1792 	ns_client_log(client, DNS_LOGCATEGORY_XFER_OUT, NS_LOGMODULE_XFER_OUT,
1793 		      level, "transfer of '%s/%s': %s", namebuf, classbuf,
1794 		      msgbuf);
1795 }
1796 
1797 /*
1798  * Logging function for use when a xfrout_ctx_t has not yet been created.
1799  */
1800 static void
1801 xfrout_log1(ns_client_t *client, dns_name_t *zonename, dns_rdataclass_t rdclass,
1802 	    int level, const char *fmt, ...) {
1803 	va_list ap;
1804 	va_start(ap, fmt);
1805 	xfrout_logv(client, zonename, rdclass, level, fmt, ap);
1806 	va_end(ap);
1807 }
1808 
1809 /*
1810  * Logging function for use when there is a xfrout_ctx_t.
1811  */
1812 static void
1813 xfrout_log(xfrout_ctx_t *xfr, int level, const char *fmt, ...) {
1814 	va_list ap;
1815 	va_start(ap, fmt);
1816 	xfrout_logv(xfr->client, xfr->qname, xfr->qclass, level, fmt, ap);
1817 	va_end(ap);
1818 }
1819