xref: /netbsd-src/external/mpl/dhcp/bind/dist/lib/isc/lex.c (revision 4afad4b7fa6d4a0d3dedf41d1587a7250710ae54)
1 /*	$NetBSD: lex.c,v 1.1 2024/02/18 20:57:49 christos Exp $	*/
2 
3 /*
4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5  *
6  * SPDX-License-Identifier: MPL-2.0
7  *
8  * This Source Code Form is subject to the terms of the Mozilla Public
9  * License, v. 2.0. If a copy of the MPL was not distributed with this
10  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11  *
12  * See the COPYRIGHT file distributed with this work for additional
13  * information regarding copyright ownership.
14  */
15 
16 /*! \file */
17 
18 #include <ctype.h>
19 #include <errno.h>
20 #include <inttypes.h>
21 #include <stdbool.h>
22 #include <stdlib.h>
23 
24 #include <isc/buffer.h>
25 #include <isc/file.h>
26 #include <isc/lex.h>
27 #include <isc/mem.h>
28 #include <isc/parseint.h>
29 #include <isc/print.h>
30 #include <isc/stdio.h>
31 #include <isc/string.h>
32 #include <isc/util.h>
33 
34 typedef struct inputsource {
35 	isc_result_t result;
36 	bool is_file;
37 	bool need_close;
38 	bool at_eof;
39 	bool last_was_eol;
40 	isc_buffer_t *pushback;
41 	unsigned int ignored;
42 	void *input;
43 	char *name;
44 	unsigned long line;
45 	unsigned long saved_line;
46 	ISC_LINK(struct inputsource) link;
47 } inputsource;
48 
49 #define LEX_MAGIC    ISC_MAGIC('L', 'e', 'x', '!')
50 #define VALID_LEX(l) ISC_MAGIC_VALID(l, LEX_MAGIC)
51 
52 struct isc_lex {
53 	/* Unlocked. */
54 	unsigned int magic;
55 	isc_mem_t *mctx;
56 	size_t max_token;
57 	char *data;
58 	unsigned int comments;
59 	bool comment_ok;
60 	bool last_was_eol;
61 	unsigned int brace_count;
62 	unsigned int paren_count;
63 	unsigned int saved_paren_count;
64 	isc_lexspecials_t specials;
65 	LIST(struct inputsource) sources;
66 };
67 
68 static isc_result_t
grow_data(isc_lex_t * lex,size_t * remainingp,char ** currp,char ** prevp)69 grow_data(isc_lex_t *lex, size_t *remainingp, char **currp, char **prevp) {
70 	char *tmp;
71 
72 	tmp = isc_mem_get(lex->mctx, lex->max_token * 2 + 1);
73 	memmove(tmp, lex->data, lex->max_token + 1);
74 	*currp = tmp + (*currp - lex->data);
75 	if (*prevp != NULL) {
76 		*prevp = tmp + (*prevp - lex->data);
77 	}
78 	isc_mem_put(lex->mctx, lex->data, lex->max_token + 1);
79 	lex->data = tmp;
80 	*remainingp += lex->max_token;
81 	lex->max_token *= 2;
82 	return (ISC_R_SUCCESS);
83 }
84 
85 isc_result_t
isc_lex_create(isc_mem_t * mctx,size_t max_token,isc_lex_t ** lexp)86 isc_lex_create(isc_mem_t *mctx, size_t max_token, isc_lex_t **lexp) {
87 	isc_lex_t *lex;
88 
89 	/*
90 	 * Create a lexer.
91 	 */
92 	REQUIRE(lexp != NULL && *lexp == NULL);
93 
94 	if (max_token == 0U) {
95 		max_token = 1;
96 	}
97 
98 	lex = isc_mem_get(mctx, sizeof(*lex));
99 	lex->data = isc_mem_get(mctx, max_token + 1);
100 	lex->mctx = mctx;
101 	lex->max_token = max_token;
102 	lex->comments = 0;
103 	lex->comment_ok = true;
104 	lex->last_was_eol = true;
105 	lex->brace_count = 0;
106 	lex->paren_count = 0;
107 	lex->saved_paren_count = 0;
108 	memset(lex->specials, 0, 256);
109 	INIT_LIST(lex->sources);
110 	lex->magic = LEX_MAGIC;
111 
112 	*lexp = lex;
113 
114 	return (ISC_R_SUCCESS);
115 }
116 
117 void
isc_lex_destroy(isc_lex_t ** lexp)118 isc_lex_destroy(isc_lex_t **lexp) {
119 	isc_lex_t *lex;
120 
121 	/*
122 	 * Destroy the lexer.
123 	 */
124 
125 	REQUIRE(lexp != NULL);
126 	lex = *lexp;
127 	*lexp = NULL;
128 	REQUIRE(VALID_LEX(lex));
129 
130 	while (!EMPTY(lex->sources)) {
131 		RUNTIME_CHECK(isc_lex_close(lex) == ISC_R_SUCCESS);
132 	}
133 	if (lex->data != NULL) {
134 		isc_mem_put(lex->mctx, lex->data, lex->max_token + 1);
135 	}
136 	lex->magic = 0;
137 	isc_mem_put(lex->mctx, lex, sizeof(*lex));
138 }
139 
140 unsigned int
isc_lex_getcomments(isc_lex_t * lex)141 isc_lex_getcomments(isc_lex_t *lex) {
142 	/*
143 	 * Return the current lexer commenting styles.
144 	 */
145 
146 	REQUIRE(VALID_LEX(lex));
147 
148 	return (lex->comments);
149 }
150 
151 void
isc_lex_setcomments(isc_lex_t * lex,unsigned int comments)152 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments) {
153 	/*
154 	 * Set allowed lexer commenting styles.
155 	 */
156 
157 	REQUIRE(VALID_LEX(lex));
158 
159 	lex->comments = comments;
160 }
161 
162 void
isc_lex_getspecials(isc_lex_t * lex,isc_lexspecials_t specials)163 isc_lex_getspecials(isc_lex_t *lex, isc_lexspecials_t specials) {
164 	/*
165 	 * Put the current list of specials into 'specials'.
166 	 */
167 
168 	REQUIRE(VALID_LEX(lex));
169 
170 	memmove(specials, lex->specials, 256);
171 }
172 
173 void
isc_lex_setspecials(isc_lex_t * lex,isc_lexspecials_t specials)174 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials) {
175 	/*
176 	 * The characters in 'specials' are returned as tokens.  Along with
177 	 * whitespace, they delimit strings and numbers.
178 	 */
179 
180 	REQUIRE(VALID_LEX(lex));
181 
182 	memmove(lex->specials, specials, 256);
183 }
184 
185 static isc_result_t
new_source(isc_lex_t * lex,bool is_file,bool need_close,void * input,const char * name)186 new_source(isc_lex_t *lex, bool is_file, bool need_close, void *input,
187 	   const char *name) {
188 	inputsource *source;
189 
190 	source = isc_mem_get(lex->mctx, sizeof(*source));
191 	source->result = ISC_R_SUCCESS;
192 	source->is_file = is_file;
193 	source->need_close = need_close;
194 	source->at_eof = false;
195 	source->last_was_eol = lex->last_was_eol;
196 	source->input = input;
197 	source->name = isc_mem_strdup(lex->mctx, name);
198 	source->pushback = NULL;
199 	isc_buffer_allocate(lex->mctx, &source->pushback,
200 			    (unsigned int)lex->max_token);
201 	source->ignored = 0;
202 	source->line = 1;
203 	ISC_LIST_INITANDPREPEND(lex->sources, source, link);
204 
205 	return (ISC_R_SUCCESS);
206 }
207 
208 isc_result_t
isc_lex_openfile(isc_lex_t * lex,const char * filename)209 isc_lex_openfile(isc_lex_t *lex, const char *filename) {
210 	isc_result_t result;
211 	FILE *stream = NULL;
212 
213 	/*
214 	 * Open 'filename' and make it the current input source for 'lex'.
215 	 */
216 
217 	REQUIRE(VALID_LEX(lex));
218 
219 	result = isc_stdio_open(filename, "r", &stream);
220 	if (result != ISC_R_SUCCESS) {
221 		return (result);
222 	}
223 
224 	result = new_source(lex, true, true, stream, filename);
225 	if (result != ISC_R_SUCCESS) {
226 		(void)fclose(stream);
227 	}
228 	return (result);
229 }
230 
231 isc_result_t
isc_lex_openstream(isc_lex_t * lex,FILE * stream)232 isc_lex_openstream(isc_lex_t *lex, FILE *stream) {
233 	char name[128];
234 
235 	/*
236 	 * Make 'stream' the current input source for 'lex'.
237 	 */
238 
239 	REQUIRE(VALID_LEX(lex));
240 
241 	snprintf(name, sizeof(name), "stream-%p", stream);
242 
243 	return (new_source(lex, true, false, stream, name));
244 }
245 
246 isc_result_t
isc_lex_openbuffer(isc_lex_t * lex,isc_buffer_t * buffer)247 isc_lex_openbuffer(isc_lex_t *lex, isc_buffer_t *buffer) {
248 	char name[128];
249 
250 	/*
251 	 * Make 'buffer' the current input source for 'lex'.
252 	 */
253 
254 	REQUIRE(VALID_LEX(lex));
255 
256 	snprintf(name, sizeof(name), "buffer-%p", buffer);
257 
258 	return (new_source(lex, false, false, buffer, name));
259 }
260 
261 isc_result_t
isc_lex_close(isc_lex_t * lex)262 isc_lex_close(isc_lex_t *lex) {
263 	inputsource *source;
264 
265 	/*
266 	 * Close the most recently opened object (i.e. file or buffer).
267 	 */
268 
269 	REQUIRE(VALID_LEX(lex));
270 
271 	source = HEAD(lex->sources);
272 	if (source == NULL) {
273 		return (ISC_R_NOMORE);
274 	}
275 
276 	ISC_LIST_UNLINK(lex->sources, source, link);
277 	lex->last_was_eol = source->last_was_eol;
278 	if (source->is_file) {
279 		if (source->need_close) {
280 			(void)fclose((FILE *)(source->input));
281 		}
282 	}
283 	isc_mem_free(lex->mctx, source->name);
284 	isc_buffer_free(&source->pushback);
285 	isc_mem_put(lex->mctx, source, sizeof(*source));
286 
287 	return (ISC_R_SUCCESS);
288 }
289 
290 typedef enum {
291 	lexstate_start,
292 	lexstate_crlf,
293 	lexstate_string,
294 	lexstate_number,
295 	lexstate_maybecomment,
296 	lexstate_ccomment,
297 	lexstate_ccommentend,
298 	lexstate_eatline,
299 	lexstate_qstring,
300 	lexstate_btext,
301 	lexstate_vpair,
302 	lexstate_vpairstart,
303 	lexstate_qvpair,
304 } lexstate;
305 
306 #define IWSEOL (ISC_LEXOPT_INITIALWS | ISC_LEXOPT_EOL)
307 
308 static void
pushback(inputsource * source,int c)309 pushback(inputsource *source, int c) {
310 	REQUIRE(source->pushback->current > 0);
311 	if (c == EOF) {
312 		source->at_eof = false;
313 		return;
314 	}
315 	source->pushback->current--;
316 	if (c == '\n') {
317 		source->line--;
318 	}
319 }
320 
321 static isc_result_t
pushandgrow(isc_lex_t * lex,inputsource * source,int c)322 pushandgrow(isc_lex_t *lex, inputsource *source, int c) {
323 	if (isc_buffer_availablelength(source->pushback) == 0) {
324 		isc_buffer_t *tbuf = NULL;
325 		unsigned int oldlen;
326 		isc_region_t used;
327 		isc_result_t result;
328 
329 		oldlen = isc_buffer_length(source->pushback);
330 		isc_buffer_allocate(lex->mctx, &tbuf, oldlen * 2);
331 		isc_buffer_usedregion(source->pushback, &used);
332 		result = isc_buffer_copyregion(tbuf, &used);
333 		INSIST(result == ISC_R_SUCCESS);
334 		tbuf->current = source->pushback->current;
335 		isc_buffer_free(&source->pushback);
336 		source->pushback = tbuf;
337 	}
338 	isc_buffer_putuint8(source->pushback, (uint8_t)c);
339 	return (ISC_R_SUCCESS);
340 }
341 
342 isc_result_t
isc_lex_gettoken(isc_lex_t * lex,unsigned int options,isc_token_t * tokenp)343 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp) {
344 	inputsource *source;
345 	int c;
346 	bool done = false;
347 	bool no_comments = false;
348 	bool escaped = false;
349 	lexstate state = lexstate_start;
350 	lexstate saved_state = lexstate_start;
351 	isc_buffer_t *buffer;
352 	FILE *stream;
353 	char *curr, *prev;
354 	size_t remaining;
355 	uint32_t as_ulong;
356 	unsigned int saved_options;
357 	isc_result_t result;
358 
359 	/*
360 	 * Get the next token.
361 	 */
362 
363 	REQUIRE(VALID_LEX(lex));
364 	source = HEAD(lex->sources);
365 	REQUIRE(tokenp != NULL);
366 
367 	if (source == NULL) {
368 		if ((options & ISC_LEXOPT_NOMORE) != 0) {
369 			tokenp->type = isc_tokentype_nomore;
370 			return (ISC_R_SUCCESS);
371 		}
372 		return (ISC_R_NOMORE);
373 	}
374 
375 	if (source->result != ISC_R_SUCCESS) {
376 		return (source->result);
377 	}
378 
379 	lex->saved_paren_count = lex->paren_count;
380 	source->saved_line = source->line;
381 
382 	if (isc_buffer_remaininglength(source->pushback) == 0 && source->at_eof)
383 	{
384 		if ((options & ISC_LEXOPT_DNSMULTILINE) != 0 &&
385 		    lex->paren_count != 0)
386 		{
387 			lex->paren_count = 0;
388 			return (ISC_R_UNBALANCED);
389 		}
390 		if ((options & ISC_LEXOPT_BTEXT) != 0 && lex->brace_count != 0)
391 		{
392 			lex->brace_count = 0;
393 			return (ISC_R_UNBALANCED);
394 		}
395 		if ((options & ISC_LEXOPT_EOF) != 0) {
396 			tokenp->type = isc_tokentype_eof;
397 			return (ISC_R_SUCCESS);
398 		}
399 		return (ISC_R_EOF);
400 	}
401 
402 	isc_buffer_compact(source->pushback);
403 
404 	saved_options = options;
405 	if ((options & ISC_LEXOPT_DNSMULTILINE) != 0 && lex->paren_count > 0) {
406 		options &= ~IWSEOL;
407 	}
408 
409 	curr = lex->data;
410 	*curr = '\0';
411 
412 	prev = NULL;
413 	remaining = lex->max_token;
414 
415 #ifdef HAVE_FLOCKFILE
416 	if (source->is_file) {
417 		flockfile(source->input);
418 	}
419 #endif /* ifdef HAVE_FLOCKFILE */
420 
421 	do {
422 		if (isc_buffer_remaininglength(source->pushback) == 0) {
423 			if (source->is_file) {
424 				stream = source->input;
425 
426 #if defined(HAVE_FLOCKFILE) && defined(HAVE_GETC_UNLOCKED)
427 				c = getc_unlocked(stream);
428 #else  /* if defined(HAVE_FLOCKFILE) && defined(HAVE_GETC_UNLOCKED) */
429 				c = getc(stream);
430 #endif /* if defined(HAVE_FLOCKFILE) && defined(HAVE_GETC_UNLOCKED) */
431 				if (c == EOF) {
432 					if (ferror(stream)) {
433 						source->result = ISC_R_IOERROR;
434 						result = source->result;
435 						goto done;
436 					}
437 					source->at_eof = true;
438 				}
439 			} else {
440 				buffer = source->input;
441 
442 				if (buffer->current == buffer->used) {
443 					c = EOF;
444 					source->at_eof = true;
445 				} else {
446 					c = *((unsigned char *)buffer->base +
447 					      buffer->current);
448 					buffer->current++;
449 				}
450 			}
451 			if (c != EOF) {
452 				source->result = pushandgrow(lex, source, c);
453 				if (source->result != ISC_R_SUCCESS) {
454 					result = source->result;
455 					goto done;
456 				}
457 			}
458 		}
459 
460 		if (!source->at_eof) {
461 			if (state == lexstate_start) {
462 				/* Token has not started yet. */
463 				source->ignored = isc_buffer_consumedlength(
464 					source->pushback);
465 			}
466 			c = isc_buffer_getuint8(source->pushback);
467 		} else {
468 			c = EOF;
469 		}
470 
471 		if (c == '\n') {
472 			source->line++;
473 		}
474 
475 		if (lex->comment_ok && !no_comments) {
476 			if (!escaped && c == ';' &&
477 			    ((lex->comments & ISC_LEXCOMMENT_DNSMASTERFILE) !=
478 			     0))
479 			{
480 				saved_state = state;
481 				state = lexstate_eatline;
482 				no_comments = true;
483 				continue;
484 			} else if (c == '/' &&
485 				   (lex->comments &
486 				    (ISC_LEXCOMMENT_C |
487 				     ISC_LEXCOMMENT_CPLUSPLUS)) != 0)
488 			{
489 				saved_state = state;
490 				state = lexstate_maybecomment;
491 				no_comments = true;
492 				continue;
493 			} else if (c == '#' && ((lex->comments &
494 						 ISC_LEXCOMMENT_SHELL) != 0))
495 			{
496 				saved_state = state;
497 				state = lexstate_eatline;
498 				no_comments = true;
499 				continue;
500 			}
501 		}
502 
503 	no_read:
504 		/* INSIST(c == EOF || (c >= 0 && c <= 255)); */
505 		switch (state) {
506 		case lexstate_start:
507 			if (c == EOF) {
508 				lex->last_was_eol = false;
509 				if ((options & ISC_LEXOPT_DNSMULTILINE) != 0 &&
510 				    lex->paren_count != 0)
511 				{
512 					lex->paren_count = 0;
513 					result = ISC_R_UNBALANCED;
514 					goto done;
515 				}
516 				if ((options & ISC_LEXOPT_BTEXT) != 0 &&
517 				    lex->brace_count != 0)
518 				{
519 					lex->brace_count = 0;
520 					result = ISC_R_UNBALANCED;
521 					goto done;
522 				}
523 				if ((options & ISC_LEXOPT_EOF) == 0) {
524 					result = ISC_R_EOF;
525 					goto done;
526 				}
527 				tokenp->type = isc_tokentype_eof;
528 				done = true;
529 			} else if (c == ' ' || c == '\t') {
530 				if (lex->last_was_eol &&
531 				    (options & ISC_LEXOPT_INITIALWS) != 0)
532 				{
533 					lex->last_was_eol = false;
534 					tokenp->type = isc_tokentype_initialws;
535 					tokenp->value.as_char = c;
536 					done = true;
537 				}
538 			} else if (c == '\n') {
539 				if ((options & ISC_LEXOPT_EOL) != 0) {
540 					tokenp->type = isc_tokentype_eol;
541 					done = true;
542 				}
543 				lex->last_was_eol = true;
544 			} else if (c == '\r') {
545 				if ((options & ISC_LEXOPT_EOL) != 0) {
546 					state = lexstate_crlf;
547 				}
548 			} else if (c == '"' &&
549 				   (options & ISC_LEXOPT_QSTRING) != 0)
550 			{
551 				lex->last_was_eol = false;
552 				no_comments = true;
553 				state = lexstate_qstring;
554 			} else if (lex->specials[c]) {
555 				lex->last_was_eol = false;
556 				if ((c == '(' || c == ')') &&
557 				    (options & ISC_LEXOPT_DNSMULTILINE) != 0)
558 				{
559 					if (c == '(') {
560 						if (lex->paren_count == 0) {
561 							options &= ~IWSEOL;
562 						}
563 						lex->paren_count++;
564 					} else {
565 						if (lex->paren_count == 0) {
566 							result =
567 								ISC_R_UNBALANCED;
568 							goto done;
569 						}
570 						lex->paren_count--;
571 						if (lex->paren_count == 0) {
572 							options = saved_options;
573 						}
574 					}
575 					continue;
576 				} else if (c == '{' &&
577 					   (options & ISC_LEXOPT_BTEXT) != 0)
578 				{
579 					if (lex->brace_count != 0) {
580 						result = ISC_R_UNBALANCED;
581 						goto done;
582 					}
583 					lex->brace_count++;
584 					options &= ~IWSEOL;
585 					state = lexstate_btext;
586 					no_comments = true;
587 					continue;
588 				}
589 				tokenp->type = isc_tokentype_special;
590 				tokenp->value.as_char = c;
591 				done = true;
592 			} else if (isdigit((unsigned char)c) &&
593 				   (options & ISC_LEXOPT_NUMBER) != 0)
594 			{
595 				lex->last_was_eol = false;
596 				if ((options & ISC_LEXOPT_OCTAL) != 0 &&
597 				    (c == '8' || c == '9'))
598 				{
599 					state = lexstate_string;
600 				} else {
601 					state = lexstate_number;
602 				}
603 				goto no_read;
604 			} else {
605 				lex->last_was_eol = false;
606 				state = lexstate_string;
607 				goto no_read;
608 			}
609 			break;
610 		case lexstate_crlf:
611 			if (c != '\n') {
612 				pushback(source, c);
613 			}
614 			tokenp->type = isc_tokentype_eol;
615 			done = true;
616 			lex->last_was_eol = true;
617 			break;
618 		case lexstate_number:
619 			if (c == EOF || !isdigit((unsigned char)c)) {
620 				if (c == ' ' || c == '\t' || c == '\r' ||
621 				    c == '\n' || c == EOF || lex->specials[c])
622 				{
623 					int base;
624 					if ((options & ISC_LEXOPT_OCTAL) != 0) {
625 						base = 8;
626 					} else if ((options &
627 						    ISC_LEXOPT_CNUMBER) != 0)
628 					{
629 						base = 0;
630 					} else {
631 						base = 10;
632 					}
633 					pushback(source, c);
634 
635 					result = isc_parse_uint32(
636 						&as_ulong, lex->data, base);
637 					if (result == ISC_R_SUCCESS) {
638 						tokenp->type =
639 							isc_tokentype_number;
640 						tokenp->value.as_ulong =
641 							as_ulong;
642 					} else if (result == ISC_R_BADNUMBER) {
643 						isc_tokenvalue_t *v;
644 
645 						tokenp->type =
646 							isc_tokentype_string;
647 						v = &(tokenp->value);
648 						v->as_textregion.base =
649 							lex->data;
650 						v->as_textregion.length =
651 							(unsigned int)(lex->max_token -
652 								       remaining);
653 					} else {
654 						goto done;
655 					}
656 					done = true;
657 					continue;
658 				} else if ((options & ISC_LEXOPT_CNUMBER) ==
659 						   0 ||
660 					   ((c != 'x' && c != 'X') ||
661 					    (curr != &lex->data[1]) ||
662 					    (lex->data[0] != '0')))
663 				{
664 					/* Above test supports hex numbers */
665 					state = lexstate_string;
666 				}
667 			} else if ((options & ISC_LEXOPT_OCTAL) != 0 &&
668 				   (c == '8' || c == '9'))
669 			{
670 				state = lexstate_string;
671 			}
672 			if (remaining == 0U) {
673 				result = grow_data(lex, &remaining, &curr,
674 						   &prev);
675 				if (result != ISC_R_SUCCESS) {
676 					goto done;
677 				}
678 			}
679 			INSIST(remaining > 0U);
680 			*curr++ = c;
681 			*curr = '\0';
682 			remaining--;
683 			break;
684 		case lexstate_string:
685 			if (!escaped && c == '=' &&
686 			    (options & ISC_LEXOPT_VPAIR) != 0)
687 			{
688 				if (remaining == 0U) {
689 					result = grow_data(lex, &remaining,
690 							   &curr, &prev);
691 					if (result != ISC_R_SUCCESS) {
692 						goto done;
693 					}
694 				}
695 				INSIST(remaining > 0U);
696 				*curr++ = c;
697 				*curr = '\0';
698 				remaining--;
699 				state = lexstate_vpairstart;
700 				break;
701 			}
702 			FALLTHROUGH;
703 		case lexstate_vpairstart:
704 			if (state == lexstate_vpairstart) {
705 				if (c == '"' &&
706 				    (options & ISC_LEXOPT_QVPAIR) != 0)
707 				{
708 					no_comments = true;
709 					state = lexstate_qvpair;
710 					break;
711 				}
712 				state = lexstate_vpair;
713 			}
714 			FALLTHROUGH;
715 		case lexstate_vpair:
716 			/*
717 			 * EOF needs to be checked before lex->specials[c]
718 			 * as lex->specials[EOF] is not a good idea.
719 			 */
720 			if (c == '\r' || c == '\n' || c == EOF ||
721 			    (!escaped &&
722 			     (c == ' ' || c == '\t' || lex->specials[c])))
723 			{
724 				pushback(source, c);
725 				if (source->result != ISC_R_SUCCESS) {
726 					result = source->result;
727 					goto done;
728 				}
729 				if (escaped && c == EOF) {
730 					result = ISC_R_UNEXPECTEDEND;
731 					goto done;
732 				}
733 				tokenp->type = (state == lexstate_string)
734 						       ? isc_tokentype_string
735 						       : isc_tokentype_vpair;
736 				tokenp->value.as_textregion.base = lex->data;
737 				tokenp->value.as_textregion.length =
738 					(unsigned int)(lex->max_token -
739 						       remaining);
740 				done = true;
741 				continue;
742 			}
743 			if ((options & ISC_LEXOPT_ESCAPE) != 0) {
744 				escaped = (!escaped && c == '\\') ? true
745 								  : false;
746 			}
747 			if (remaining == 0U) {
748 				result = grow_data(lex, &remaining, &curr,
749 						   &prev);
750 				if (result != ISC_R_SUCCESS) {
751 					goto done;
752 				}
753 			}
754 			INSIST(remaining > 0U);
755 			*curr++ = c;
756 			*curr = '\0';
757 			remaining--;
758 			break;
759 		case lexstate_maybecomment:
760 			if (c == '*' && (lex->comments & ISC_LEXCOMMENT_C) != 0)
761 			{
762 				state = lexstate_ccomment;
763 				continue;
764 			} else if (c == '/' && (lex->comments &
765 						ISC_LEXCOMMENT_CPLUSPLUS) != 0)
766 			{
767 				state = lexstate_eatline;
768 				continue;
769 			}
770 			pushback(source, c);
771 			c = '/';
772 			no_comments = false;
773 			state = saved_state;
774 			goto no_read;
775 		case lexstate_ccomment:
776 			if (c == EOF) {
777 				result = ISC_R_UNEXPECTEDEND;
778 				goto done;
779 			}
780 			if (c == '*') {
781 				state = lexstate_ccommentend;
782 			}
783 			break;
784 		case lexstate_ccommentend:
785 			if (c == EOF) {
786 				result = ISC_R_UNEXPECTEDEND;
787 				goto done;
788 			}
789 			if (c == '/') {
790 				/*
791 				 * C-style comments become a single space.
792 				 * We do this to ensure that a comment will
793 				 * act as a delimiter for strings and
794 				 * numbers.
795 				 */
796 				c = ' ';
797 				no_comments = false;
798 				state = saved_state;
799 				goto no_read;
800 			} else if (c != '*') {
801 				state = lexstate_ccomment;
802 			}
803 			break;
804 		case lexstate_eatline:
805 			if ((c == '\n') || (c == EOF)) {
806 				no_comments = false;
807 				state = saved_state;
808 				goto no_read;
809 			}
810 			break;
811 		case lexstate_qstring:
812 		case lexstate_qvpair:
813 			if (c == EOF) {
814 				result = ISC_R_UNEXPECTEDEND;
815 				goto done;
816 			}
817 			if (c == '"') {
818 				if (escaped) {
819 					escaped = false;
820 					/*
821 					 * Overwrite the preceding backslash.
822 					 */
823 					INSIST(prev != NULL);
824 					*prev = '"';
825 				} else {
826 					tokenp->type =
827 						(state == lexstate_qstring)
828 							? isc_tokentype_qstring
829 							: isc_tokentype_qvpair;
830 					tokenp->value.as_textregion.base =
831 						lex->data;
832 					tokenp->value.as_textregion.length =
833 						(unsigned int)(lex->max_token -
834 							       remaining);
835 					no_comments = false;
836 					done = true;
837 				}
838 			} else {
839 				if (c == '\n' && !escaped &&
840 				    (options & ISC_LEXOPT_QSTRINGMULTILINE) ==
841 					    0)
842 				{
843 					pushback(source, c);
844 					result = ISC_R_UNBALANCEDQUOTES;
845 					goto done;
846 				}
847 				if (c == '\\' && !escaped) {
848 					escaped = true;
849 				} else {
850 					escaped = false;
851 				}
852 				if (remaining == 0U) {
853 					result = grow_data(lex, &remaining,
854 							   &curr, &prev);
855 					if (result != ISC_R_SUCCESS) {
856 						goto done;
857 					}
858 				}
859 				INSIST(remaining > 0U);
860 				prev = curr;
861 				*curr++ = c;
862 				*curr = '\0';
863 				remaining--;
864 			}
865 			break;
866 		case lexstate_btext:
867 			if (c == EOF) {
868 				result = ISC_R_UNEXPECTEDEND;
869 				goto done;
870 			}
871 			if (c == '{') {
872 				if (escaped) {
873 					escaped = false;
874 				} else {
875 					lex->brace_count++;
876 				}
877 			} else if (c == '}') {
878 				if (escaped) {
879 					escaped = false;
880 				} else {
881 					INSIST(lex->brace_count > 0);
882 					lex->brace_count--;
883 				}
884 
885 				if (lex->brace_count == 0) {
886 					tokenp->type = isc_tokentype_btext;
887 					tokenp->value.as_textregion.base =
888 						lex->data;
889 					tokenp->value.as_textregion.length =
890 						(unsigned int)(lex->max_token -
891 							       remaining);
892 					no_comments = false;
893 					done = true;
894 					break;
895 				}
896 			}
897 
898 			if (c == '\\' && !escaped) {
899 				escaped = true;
900 			} else {
901 				escaped = false;
902 			}
903 
904 			if (remaining == 0U) {
905 				result = grow_data(lex, &remaining, &curr,
906 						   &prev);
907 				if (result != ISC_R_SUCCESS) {
908 					goto done;
909 				}
910 			}
911 			INSIST(remaining > 0U);
912 			prev = curr;
913 			*curr++ = c;
914 			*curr = '\0';
915 			remaining--;
916 			break;
917 		default:
918 			FATAL_ERROR(__FILE__, __LINE__, "Unexpected state %d",
919 				    state);
920 		}
921 	} while (!done);
922 
923 	result = ISC_R_SUCCESS;
924 done:
925 #ifdef HAVE_FLOCKFILE
926 	if (source->is_file) {
927 		funlockfile(source->input);
928 	}
929 #endif /* ifdef HAVE_FLOCKFILE */
930 	return (result);
931 }
932 
933 isc_result_t
isc_lex_getmastertoken(isc_lex_t * lex,isc_token_t * token,isc_tokentype_t expect,bool eol)934 isc_lex_getmastertoken(isc_lex_t *lex, isc_token_t *token,
935 		       isc_tokentype_t expect, bool eol) {
936 	unsigned int options = ISC_LEXOPT_EOL | ISC_LEXOPT_EOF |
937 			       ISC_LEXOPT_DNSMULTILINE | ISC_LEXOPT_ESCAPE;
938 	isc_result_t result;
939 
940 	if (expect == isc_tokentype_vpair) {
941 		options |= ISC_LEXOPT_VPAIR;
942 	} else if (expect == isc_tokentype_qvpair) {
943 		options |= ISC_LEXOPT_VPAIR;
944 		options |= ISC_LEXOPT_QVPAIR;
945 	} else if (expect == isc_tokentype_qstring) {
946 		options |= ISC_LEXOPT_QSTRING;
947 	} else if (expect == isc_tokentype_number) {
948 		options |= ISC_LEXOPT_NUMBER;
949 	}
950 	result = isc_lex_gettoken(lex, options, token);
951 	if (result == ISC_R_RANGE) {
952 		isc_lex_ungettoken(lex, token);
953 	}
954 	if (result != ISC_R_SUCCESS) {
955 		return (result);
956 	}
957 
958 	if (eol && ((token->type == isc_tokentype_eol) ||
959 		    (token->type == isc_tokentype_eof)))
960 	{
961 		return (ISC_R_SUCCESS);
962 	}
963 	if (token->type == isc_tokentype_string &&
964 	    (expect == isc_tokentype_qstring || expect == isc_tokentype_qvpair))
965 	{
966 		return (ISC_R_SUCCESS);
967 	}
968 	if (token->type == isc_tokentype_vpair &&
969 	    expect == isc_tokentype_qvpair)
970 	{
971 		return (ISC_R_SUCCESS);
972 	}
973 	if (token->type != expect) {
974 		isc_lex_ungettoken(lex, token);
975 		if (token->type == isc_tokentype_eol ||
976 		    token->type == isc_tokentype_eof)
977 		{
978 			return (ISC_R_UNEXPECTEDEND);
979 		}
980 		if (expect == isc_tokentype_number) {
981 			return (ISC_R_BADNUMBER);
982 		}
983 		return (ISC_R_UNEXPECTEDTOKEN);
984 	}
985 	return (ISC_R_SUCCESS);
986 }
987 
988 isc_result_t
isc_lex_getoctaltoken(isc_lex_t * lex,isc_token_t * token,bool eol)989 isc_lex_getoctaltoken(isc_lex_t *lex, isc_token_t *token, bool eol) {
990 	unsigned int options = ISC_LEXOPT_EOL | ISC_LEXOPT_EOF |
991 			       ISC_LEXOPT_DNSMULTILINE | ISC_LEXOPT_ESCAPE |
992 			       ISC_LEXOPT_NUMBER | ISC_LEXOPT_OCTAL;
993 	isc_result_t result;
994 
995 	result = isc_lex_gettoken(lex, options, token);
996 	if (result == ISC_R_RANGE) {
997 		isc_lex_ungettoken(lex, token);
998 	}
999 	if (result != ISC_R_SUCCESS) {
1000 		return (result);
1001 	}
1002 
1003 	if (eol && ((token->type == isc_tokentype_eol) ||
1004 		    (token->type == isc_tokentype_eof)))
1005 	{
1006 		return (ISC_R_SUCCESS);
1007 	}
1008 	if (token->type != isc_tokentype_number) {
1009 		isc_lex_ungettoken(lex, token);
1010 		if (token->type == isc_tokentype_eol ||
1011 		    token->type == isc_tokentype_eof)
1012 		{
1013 			return (ISC_R_UNEXPECTEDEND);
1014 		}
1015 		return (ISC_R_BADNUMBER);
1016 	}
1017 	return (ISC_R_SUCCESS);
1018 }
1019 
1020 void
isc_lex_ungettoken(isc_lex_t * lex,isc_token_t * tokenp)1021 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp) {
1022 	inputsource *source;
1023 	/*
1024 	 * Unget the current token.
1025 	 */
1026 
1027 	REQUIRE(VALID_LEX(lex));
1028 	source = HEAD(lex->sources);
1029 	REQUIRE(source != NULL);
1030 	REQUIRE(tokenp != NULL);
1031 	REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 ||
1032 		tokenp->type == isc_tokentype_eof);
1033 
1034 	UNUSED(tokenp);
1035 
1036 	isc_buffer_first(source->pushback);
1037 	lex->paren_count = lex->saved_paren_count;
1038 	source->line = source->saved_line;
1039 	source->at_eof = false;
1040 }
1041 
1042 void
isc_lex_getlasttokentext(isc_lex_t * lex,isc_token_t * tokenp,isc_region_t * r)1043 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r) {
1044 	inputsource *source;
1045 
1046 	REQUIRE(VALID_LEX(lex));
1047 	source = HEAD(lex->sources);
1048 	REQUIRE(source != NULL);
1049 	REQUIRE(tokenp != NULL);
1050 	REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 ||
1051 		tokenp->type == isc_tokentype_eof);
1052 
1053 	UNUSED(tokenp);
1054 
1055 	INSIST(source->ignored <= isc_buffer_consumedlength(source->pushback));
1056 	r->base = (unsigned char *)isc_buffer_base(source->pushback) +
1057 		  source->ignored;
1058 	r->length = isc_buffer_consumedlength(source->pushback) -
1059 		    source->ignored;
1060 }
1061 
1062 char *
isc_lex_getsourcename(isc_lex_t * lex)1063 isc_lex_getsourcename(isc_lex_t *lex) {
1064 	inputsource *source;
1065 
1066 	REQUIRE(VALID_LEX(lex));
1067 	source = HEAD(lex->sources);
1068 
1069 	if (source == NULL) {
1070 		return (NULL);
1071 	}
1072 
1073 	return (source->name);
1074 }
1075 
1076 unsigned long
isc_lex_getsourceline(isc_lex_t * lex)1077 isc_lex_getsourceline(isc_lex_t *lex) {
1078 	inputsource *source;
1079 
1080 	REQUIRE(VALID_LEX(lex));
1081 	source = HEAD(lex->sources);
1082 
1083 	if (source == NULL) {
1084 		return (0);
1085 	}
1086 
1087 	return (source->line);
1088 }
1089 
1090 isc_result_t
isc_lex_setsourcename(isc_lex_t * lex,const char * name)1091 isc_lex_setsourcename(isc_lex_t *lex, const char *name) {
1092 	inputsource *source;
1093 	char *newname;
1094 
1095 	REQUIRE(VALID_LEX(lex));
1096 	source = HEAD(lex->sources);
1097 
1098 	if (source == NULL) {
1099 		return (ISC_R_NOTFOUND);
1100 	}
1101 	newname = isc_mem_strdup(lex->mctx, name);
1102 	isc_mem_free(lex->mctx, source->name);
1103 	source->name = newname;
1104 	return (ISC_R_SUCCESS);
1105 }
1106 
1107 isc_result_t
isc_lex_setsourceline(isc_lex_t * lex,unsigned long line)1108 isc_lex_setsourceline(isc_lex_t *lex, unsigned long line) {
1109 	inputsource *source;
1110 
1111 	REQUIRE(VALID_LEX(lex));
1112 	source = HEAD(lex->sources);
1113 
1114 	if (source == NULL) {
1115 		return (ISC_R_NOTFOUND);
1116 	}
1117 
1118 	source->line = line;
1119 	return (ISC_R_SUCCESS);
1120 }
1121 
1122 bool
isc_lex_isfile(isc_lex_t * lex)1123 isc_lex_isfile(isc_lex_t *lex) {
1124 	inputsource *source;
1125 
1126 	REQUIRE(VALID_LEX(lex));
1127 
1128 	source = HEAD(lex->sources);
1129 
1130 	if (source == NULL) {
1131 		return (false);
1132 	}
1133 
1134 	return (source->is_file);
1135 }
1136