xref: /openbsd-src/usr.bin/dig/lib/isc/lex.c (revision f84b1df5a16cdd762c93854218de246e79975d3b)
1 /*
2  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3  *
4  * Permission to use, copy, modify, and/or distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
9  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
10  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
11  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
12  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
13  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
14  * PERFORMANCE OF THIS SOFTWARE.
15  */
16 
17 /* $Id: lex.c,v 1.14 2022/01/17 18:19:51 naddy Exp $ */
18 
19 /*! \file */
20 
21 #include <ctype.h>
22 #include <stdlib.h>
23 
24 #include <isc/buffer.h>
25 
26 #include <isc/lex.h>
27 
28 #include <errno.h>
29 #include <string.h>
30 #include <isc/util.h>
31 
32 #include "unix/errno2result.h"
33 
34 typedef struct inputsource {
35 	isc_result_t			result;
36 	int			is_file;
37 	int			need_close;
38 	int			at_eof;
39 	int			last_was_eol;
40 	isc_buffer_t *			pushback;
41 	unsigned int			ignored;
42 	void *				input;
43 	char *				name;
44 	unsigned long			line;
45 	unsigned long			saved_line;
46 	ISC_LINK(struct inputsource)	link;
47 } inputsource;
48 
49 struct isc_lex {
50 	/* Unlocked. */
51 	size_t				max_token;
52 	char *				data;
53 	unsigned int			comments;
54 	int			comment_ok;
55 	int			last_was_eol;
56 	unsigned int			paren_count;
57 	unsigned int			saved_paren_count;
58 	isc_lexspecials_t		specials;
59 	LIST(struct inputsource)	sources;
60 };
61 
62 static inline isc_result_t
63 grow_data(isc_lex_t *lex, size_t *remainingp, char **currp, char **prevp) {
64 	char *tmp;
65 
66 	tmp = malloc(lex->max_token * 2 + 1);
67 	if (tmp == NULL)
68 		return (ISC_R_NOMEMORY);
69 	memmove(tmp, lex->data, lex->max_token + 1);
70 	*currp = tmp + (*currp - lex->data);
71 	if (*prevp != NULL)
72 		*prevp = tmp + (*prevp - lex->data);
73 	free(lex->data);
74 	lex->data = tmp;
75 	*remainingp += lex->max_token;
76 	lex->max_token *= 2;
77 	return (ISC_R_SUCCESS);
78 }
79 
80 isc_result_t
81 isc_lex_create(size_t max_token, isc_lex_t **lexp) {
82 	isc_lex_t *lex;
83 
84 	/*
85 	 * Create a lexer.
86 	 */
87 	REQUIRE(lexp != NULL && *lexp == NULL);
88 
89 	if (max_token == 0U)
90 		max_token = 1;
91 
92 	lex = malloc(sizeof(*lex));
93 	if (lex == NULL)
94 		return (ISC_R_NOMEMORY);
95 	lex->data = malloc(max_token + 1);
96 	if (lex->data == NULL) {
97 		free(lex);
98 		return (ISC_R_NOMEMORY);
99 	}
100 	lex->max_token = max_token;
101 	lex->comments = 0;
102 	lex->comment_ok = 1;
103 	lex->last_was_eol = 1;
104 	lex->paren_count = 0;
105 	lex->saved_paren_count = 0;
106 	memset(lex->specials, 0, 256);
107 	INIT_LIST(lex->sources);
108 
109 	*lexp = lex;
110 
111 	return (ISC_R_SUCCESS);
112 }
113 
114 void
115 isc_lex_destroy(isc_lex_t **lexp) {
116 	isc_lex_t *lex;
117 
118 	/*
119 	 * Destroy the lexer.
120 	 */
121 
122 	REQUIRE(lexp != NULL);
123 	lex = *lexp;
124 
125 	while (!EMPTY(lex->sources))
126 		RUNTIME_CHECK(isc_lex_close(lex) == ISC_R_SUCCESS);
127 	if (lex->data != NULL)
128 		free(lex->data);
129 	free(lex);
130 
131 	*lexp = NULL;
132 }
133 
134 void
135 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments) {
136 	/*
137 	 * Set allowed lexer commenting styles.
138 	 */
139 
140 	lex->comments = comments;
141 }
142 
143 void
144 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials) {
145 	/*
146 	 * The characters in 'specials' are returned as tokens.  Along with
147 	 * whitespace, they delimit strings and numbers.
148 	 */
149 
150 	memmove(lex->specials, specials, 256);
151 }
152 
153 static inline isc_result_t
154 new_source(isc_lex_t *lex, int is_file, int need_close,
155 	   void *input, const char *name)
156 {
157 	inputsource *source;
158 	isc_result_t result;
159 
160 	source = malloc(sizeof(*source));
161 	if (source == NULL)
162 		return (ISC_R_NOMEMORY);
163 	source->result = ISC_R_SUCCESS;
164 	source->is_file = is_file;
165 	source->need_close = need_close;
166 	source->at_eof = 0;
167 	source->last_was_eol = lex->last_was_eol;
168 	source->input = input;
169 	source->name = strdup(name);
170 	if (source->name == NULL) {
171 		free(source);
172 		return (ISC_R_NOMEMORY);
173 	}
174 	source->pushback = NULL;
175 	result = isc_buffer_allocate(&source->pushback,
176 				     (unsigned int)lex->max_token);
177 	if (result != ISC_R_SUCCESS) {
178 		free(source->name);
179 		free(source);
180 		return (result);
181 	}
182 	source->ignored = 0;
183 	source->line = 1;
184 	ISC_LIST_INITANDPREPEND(lex->sources, source, link);
185 
186 	return (ISC_R_SUCCESS);
187 }
188 
189 isc_result_t
190 isc_lex_openfile(isc_lex_t *lex, const char *filename) {
191 	isc_result_t result = ISC_R_SUCCESS;
192 	FILE *stream = NULL;
193 
194 	/*
195 	 * Open 'filename' and make it the current input source for 'lex'.
196 	 */
197 
198 	if ((stream = fopen(filename, "r")) == NULL)
199 		return (isc__errno2result(errno));
200 
201 	result = new_source(lex, 1, 1, stream, filename);
202 	if (result != ISC_R_SUCCESS)
203 		(void)fclose(stream);
204 	return (result);
205 }
206 
207 isc_result_t
208 isc_lex_close(isc_lex_t *lex) {
209 	inputsource *source;
210 
211 	/*
212 	 * Close the most recently opened object (i.e. file or buffer).
213 	 */
214 
215 	source = HEAD(lex->sources);
216 	if (source == NULL)
217 		return (ISC_R_NOMORE);
218 
219 	ISC_LIST_UNLINK(lex->sources, source, link);
220 	lex->last_was_eol = source->last_was_eol;
221 	if (source->is_file) {
222 		if (source->need_close)
223 			(void)fclose((FILE *)(source->input));
224 	}
225 	free(source->name);
226 	isc_buffer_free(&source->pushback);
227 	free(source);
228 
229 	return (ISC_R_SUCCESS);
230 }
231 
232 typedef enum {
233 	lexstate_start,
234 	lexstate_string,
235 	lexstate_maybecomment,
236 	lexstate_ccomment,
237 	lexstate_ccommentend,
238 	lexstate_eatline,
239 	lexstate_qstring
240 } lexstate;
241 
242 #define IWSEOL (ISC_LEXOPT_INITIALWS | ISC_LEXOPT_EOL)
243 
244 static void
245 pushback(inputsource *source, int c) {
246 	REQUIRE(source->pushback->current > 0);
247 	if (c == EOF) {
248 		source->at_eof = 0;
249 		return;
250 	}
251 	source->pushback->current--;
252 	if (c == '\n')
253 		source->line--;
254 }
255 
256 static isc_result_t
257 pushandgrow(inputsource *source, int c) {
258 	if (isc_buffer_availablelength(source->pushback) == 0) {
259 		isc_buffer_t *tbuf = NULL;
260 		unsigned int oldlen;
261 		isc_region_t used;
262 		isc_result_t result;
263 
264 		oldlen = isc_buffer_length(source->pushback);
265 		result = isc_buffer_allocate(&tbuf, oldlen * 2);
266 		if (result != ISC_R_SUCCESS)
267 			return (result);
268 		isc_buffer_usedregion(source->pushback, &used);
269 		result = isc_buffer_copyregion(tbuf, &used);
270 		INSIST(result == ISC_R_SUCCESS);
271 		tbuf->current = source->pushback->current;
272 		isc_buffer_free(&source->pushback);
273 		source->pushback = tbuf;
274 	}
275 	isc_buffer_putuint8(source->pushback, (uint8_t)c);
276 	return (ISC_R_SUCCESS);
277 }
278 
279 isc_result_t
280 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp) {
281 	inputsource *source;
282 	int c;
283 	int done = 0;
284 	int no_comments = 0;
285 	int escaped = 0;
286 	lexstate state = lexstate_start;
287 	lexstate saved_state = lexstate_start;
288 	isc_buffer_t *buffer;
289 	FILE *stream;
290 	char *curr, *prev;
291 	size_t remaining;
292 	isc_result_t result;
293 
294 	/*
295 	 * Get the next token.
296 	 */
297 
298 	source = HEAD(lex->sources);
299 	REQUIRE(tokenp != NULL);
300 
301 	if (source == NULL) {
302 		if ((options & ISC_LEXOPT_NOMORE) != 0) {
303 			tokenp->type = isc_tokentype_nomore;
304 			return (ISC_R_SUCCESS);
305 		}
306 		return (ISC_R_NOMORE);
307 	}
308 
309 	if (source->result != ISC_R_SUCCESS)
310 		return (source->result);
311 
312 	lex->saved_paren_count = lex->paren_count;
313 	source->saved_line = source->line;
314 
315 	if (isc_buffer_remaininglength(source->pushback) == 0 &&
316 	    source->at_eof)
317 	{
318 		if ((options & ISC_LEXOPT_EOF) != 0) {
319 			tokenp->type = isc_tokentype_eof;
320 			return (ISC_R_SUCCESS);
321 		}
322 		return (ISC_R_EOF);
323 	}
324 
325 	isc_buffer_compact(source->pushback);
326 
327 	curr = lex->data;
328 	*curr = '\0';
329 
330 	prev = NULL;
331 	remaining = lex->max_token;
332 
333 	if (source->is_file)
334 		flockfile(source->input);
335 
336 	do {
337 		if (isc_buffer_remaininglength(source->pushback) == 0) {
338 			if (source->is_file) {
339 				stream = source->input;
340 
341 				c = getc_unlocked(stream);
342 				if (c == EOF) {
343 					if (ferror(stream)) {
344 						source->result = ISC_R_IOERROR;
345 						result = source->result;
346 						goto done;
347 					}
348 					source->at_eof = 1;
349 				}
350 			} else {
351 				buffer = source->input;
352 
353 				if (buffer->current == buffer->used) {
354 					c = EOF;
355 					source->at_eof = 1;
356 				} else {
357 					c = *((unsigned char *)buffer->base +
358 					      buffer->current);
359 					buffer->current++;
360 				}
361 			}
362 			if (c != EOF) {
363 				source->result = pushandgrow(source, c);
364 				if (source->result != ISC_R_SUCCESS) {
365 					result = source->result;
366 					goto done;
367 				}
368 			}
369 		}
370 
371 		if (!source->at_eof) {
372 			if (state == lexstate_start)
373 				/* Token has not started yet. */
374 				source->ignored =
375 				   isc_buffer_consumedlength(source->pushback);
376 			c = isc_buffer_getuint8(source->pushback);
377 		} else {
378 			c = EOF;
379 		}
380 
381 		if (c == '\n')
382 			source->line++;
383 
384 		if (lex->comment_ok && !no_comments) {
385 			if (c == '/' &&
386 				   (lex->comments &
387 				    (ISC_LEXCOMMENT_C|
388 				     ISC_LEXCOMMENT_CPLUSPLUS)) != 0) {
389 				saved_state = state;
390 				state = lexstate_maybecomment;
391 				no_comments = 1;
392 				continue;
393 			} else if (c == '#' &&
394 				   ((lex->comments & ISC_LEXCOMMENT_SHELL)
395 				    != 0)) {
396 				saved_state = state;
397 				state = lexstate_eatline;
398 				no_comments = 1;
399 				continue;
400 			}
401 		}
402 
403 	no_read:
404 		/* INSIST(c == EOF || (c >= 0 && c <= 255)); */
405 		switch (state) {
406 		case lexstate_start:
407 			if (c == EOF) {
408 				lex->last_was_eol = 0;
409 				if ((options & ISC_LEXOPT_EOF) == 0) {
410 					result = ISC_R_EOF;
411 					goto done;
412 				}
413 				tokenp->type = isc_tokentype_eof;
414 				done = 1;
415 			} else if (c == ' ' || c == '\t') {
416 				lex->last_was_eol = 0;
417 			} else if (c == '\n') {
418 				lex->last_was_eol = 1;
419 			} else if (c == '\r') {
420 				lex->last_was_eol = 0;
421 			} else if (c == '"' &&
422 				   (options & ISC_LEXOPT_QSTRING) != 0) {
423 				lex->last_was_eol = 0;
424 				no_comments = 1;
425 				state = lexstate_qstring;
426 			} else if (lex->specials[c]) {
427 				lex->last_was_eol = 0;
428 				tokenp->type = isc_tokentype_special;
429 				tokenp->value.as_char = c;
430 				done = 1;
431 			} else {
432 				lex->last_was_eol = 0;
433 				state = lexstate_string;
434 				goto no_read;
435 			}
436 			break;
437 		case lexstate_string:
438 			/*
439 			 * EOF needs to be checked before lex->specials[c]
440 			 * as lex->specials[EOF] is not a good idea.
441 			 */
442 			if (c == '\r' || c == '\n' || c == EOF ||
443 			    (!escaped &&
444 			     (c == ' ' || c == '\t' || lex->specials[c]))) {
445 				pushback(source, c);
446 				if (source->result != ISC_R_SUCCESS) {
447 					result = source->result;
448 					goto done;
449 				}
450 				tokenp->type = isc_tokentype_string;
451 				tokenp->value.as_textregion.base = lex->data;
452 				tokenp->value.as_textregion.length =
453 					(unsigned int)
454 					(lex->max_token - remaining);
455 				done = 1;
456 				continue;
457 			}
458 			if (remaining == 0U) {
459 				result = grow_data(lex, &remaining,
460 						   &curr, &prev);
461 				if (result != ISC_R_SUCCESS)
462 					goto done;
463 			}
464 			INSIST(remaining > 0U);
465 			*curr++ = c;
466 			*curr = '\0';
467 			remaining--;
468 			break;
469 		case lexstate_maybecomment:
470 			if (c == '*' &&
471 			    (lex->comments & ISC_LEXCOMMENT_C) != 0) {
472 				state = lexstate_ccomment;
473 				continue;
474 			} else if (c == '/' &&
475 			    (lex->comments & ISC_LEXCOMMENT_CPLUSPLUS) != 0) {
476 				state = lexstate_eatline;
477 				continue;
478 			}
479 			pushback(source, c);
480 			c = '/';
481 			no_comments = 0;
482 			state = saved_state;
483 			goto no_read;
484 		case lexstate_ccomment:
485 			if (c == EOF) {
486 				result = ISC_R_UNEXPECTEDEND;
487 				goto done;
488 			}
489 			if (c == '*')
490 				state = lexstate_ccommentend;
491 			break;
492 		case lexstate_ccommentend:
493 			if (c == EOF) {
494 				result = ISC_R_UNEXPECTEDEND;
495 				goto done;
496 			}
497 			if (c == '/') {
498 				/*
499 				 * C-style comments become a single space.
500 				 * We do this to ensure that a comment will
501 				 * act as a delimiter for strings and
502 				 * numbers.
503 				 */
504 				c = ' ';
505 				no_comments = 0;
506 				state = saved_state;
507 				goto no_read;
508 			} else if (c != '*')
509 				state = lexstate_ccomment;
510 			break;
511 		case lexstate_eatline:
512 			if ((c == '\n') || (c == EOF)) {
513 				no_comments = 0;
514 				state = saved_state;
515 				goto no_read;
516 			}
517 			break;
518 		case lexstate_qstring:
519 			if (c == EOF) {
520 				result = ISC_R_UNEXPECTEDEND;
521 				goto done;
522 			}
523 			if (c == '"') {
524 				if (escaped) {
525 					escaped = 0;
526 					/*
527 					 * Overwrite the preceding backslash.
528 					 */
529 					INSIST(prev != NULL);
530 					*prev = '"';
531 				} else {
532 					tokenp->type = isc_tokentype_qstring;
533 					tokenp->value.as_textregion.base =
534 						lex->data;
535 					tokenp->value.as_textregion.length =
536 						(unsigned int)
537 						(lex->max_token - remaining);
538 					no_comments = 0;
539 					done = 1;
540 				}
541 			} else {
542 				if (c == '\n' && !escaped &&
543 			    (options & ISC_LEXOPT_QSTRINGMULTILINE) == 0) {
544 					pushback(source, c);
545 					result = ISC_R_UNBALANCEDQUOTES;
546 					goto done;
547 				}
548 				if (c == '\\' && !escaped)
549 					escaped = 1;
550 				else
551 					escaped = 0;
552 				if (remaining == 0U) {
553 					result = grow_data(lex, &remaining,
554 							   &curr, &prev);
555 					if (result != ISC_R_SUCCESS)
556 						goto done;
557 				}
558 				INSIST(remaining > 0U);
559 				prev = curr;
560 				*curr++ = c;
561 				*curr = '\0';
562 				remaining--;
563 			}
564 			break;
565 		default:
566 			FATAL_ERROR(__FILE__, __LINE__, "Unexpected state %d",
567 				    state);
568 			/* Does not return. */
569 		}
570 
571 	} while (!done);
572 
573 	result = ISC_R_SUCCESS;
574  done:
575 	if (source->is_file)
576 		funlockfile(source->input);
577 	return (result);
578 }
579 
580 void
581 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp) {
582 	inputsource *source;
583 	/*
584 	 * Unget the current token.
585 	 */
586 
587 	source = HEAD(lex->sources);
588 	REQUIRE(source != NULL);
589 	REQUIRE(tokenp != NULL);
590 	REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 ||
591 		tokenp->type == isc_tokentype_eof);
592 
593 	UNUSED(tokenp);
594 
595 	isc_buffer_first(source->pushback);
596 	lex->paren_count = lex->saved_paren_count;
597 	source->line = source->saved_line;
598 	source->at_eof = 0;
599 }
600 
601 void
602 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r)
603 {
604 	inputsource *source;
605 
606 	source = HEAD(lex->sources);
607 	REQUIRE(source != NULL);
608 	REQUIRE(tokenp != NULL);
609 	REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 ||
610 		tokenp->type == isc_tokentype_eof);
611 
612 	UNUSED(tokenp);
613 
614 	INSIST(source->ignored <= isc_buffer_consumedlength(source->pushback));
615 	r->base = (unsigned char *)isc_buffer_base(source->pushback) +
616 		  source->ignored;
617 	r->length = isc_buffer_consumedlength(source->pushback) -
618 		    source->ignored;
619 }
620 
621 char *
622 isc_lex_getsourcename(isc_lex_t *lex) {
623 	inputsource *source;
624 
625 	source = HEAD(lex->sources);
626 
627 	if (source == NULL)
628 		return (NULL);
629 
630 	return (source->name);
631 }
632 
633 unsigned long
634 isc_lex_getsourceline(isc_lex_t *lex) {
635 	inputsource *source;
636 
637 	source = HEAD(lex->sources);
638 
639 	if (source == NULL)
640 		return (0);
641 
642 	return (source->line);
643 }
644