xref: /openbsd-src/usr.bin/dig/lib/isc/lex.c (revision fcde59b201a29a2b4570b00b71e7aa25d61cb5c1)
1 /*
2  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3  *
4  * Permission to use, copy, modify, and/or distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
9  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
10  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
11  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
12  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
13  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
14  * PERFORMANCE OF THIS SOFTWARE.
15  */
16 
17 /* $Id: lex.c,v 1.13 2020/10/19 14:53:11 florian Exp $ */
18 
19 /*! \file */
20 
21 #include <ctype.h>
22 #include <stdlib.h>
23 
24 #include <isc/buffer.h>
25 
26 #include <isc/lex.h>
27 
28 #include <errno.h>
29 #include <string.h>
30 #include <isc/util.h>
31 
32 #include "unix/errno2result.h"
33 
34 typedef struct inputsource {
35 	isc_result_t			result;
36 	int			is_file;
37 	int			need_close;
38 	int			at_eof;
39 	int			last_was_eol;
40 	isc_buffer_t *			pushback;
41 	unsigned int			ignored;
42 	void *				input;
43 	char *				name;
44 	unsigned long			line;
45 	unsigned long			saved_line;
46 	ISC_LINK(struct inputsource)	link;
47 } inputsource;
48 
49 struct isc_lex {
50 	/* Unlocked. */
51 	size_t				max_token;
52 	char *				data;
53 	unsigned int			comments;
54 	int			comment_ok;
55 	int			last_was_eol;
56 	unsigned int			paren_count;
57 	unsigned int			saved_paren_count;
58 	isc_lexspecials_t		specials;
59 	LIST(struct inputsource)	sources;
60 };
61 
62 static inline isc_result_t
63 grow_data(isc_lex_t *lex, size_t *remainingp, char **currp, char **prevp) {
64 	char *tmp;
65 
66 	tmp = malloc(lex->max_token * 2 + 1);
67 	if (tmp == NULL)
68 		return (ISC_R_NOMEMORY);
69 	memmove(tmp, lex->data, lex->max_token + 1);
70 	*currp = tmp + (*currp - lex->data);
71 	if (*prevp != NULL)
72 		*prevp = tmp + (*prevp - lex->data);
73 	free(lex->data);
74 	lex->data = tmp;
75 	*remainingp += lex->max_token;
76 	lex->max_token *= 2;
77 	return (ISC_R_SUCCESS);
78 }
79 
80 isc_result_t
81 isc_lex_create(size_t max_token, isc_lex_t **lexp) {
82 	isc_lex_t *lex;
83 
84 	/*
85 	 * Create a lexer.
86 	 */
87 	REQUIRE(lexp != NULL && *lexp == NULL);
88 
89 	if (max_token == 0U)
90 		max_token = 1;
91 
92 	lex = malloc(sizeof(*lex));
93 	if (lex == NULL)
94 		return (ISC_R_NOMEMORY);
95 	lex->data = malloc(max_token + 1);
96 	if (lex->data == NULL) {
97 		free(lex);
98 		return (ISC_R_NOMEMORY);
99 	}
100 	lex->max_token = max_token;
101 	lex->comments = 0;
102 	lex->comment_ok = 1;
103 	lex->last_was_eol = 1;
104 	lex->paren_count = 0;
105 	lex->saved_paren_count = 0;
106 	memset(lex->specials, 0, 256);
107 	INIT_LIST(lex->sources);
108 
109 	*lexp = lex;
110 
111 	return (ISC_R_SUCCESS);
112 }
113 
114 void
115 isc_lex_destroy(isc_lex_t **lexp) {
116 	isc_lex_t *lex;
117 
118 	/*
119 	 * Destroy the lexer.
120 	 */
121 
122 	REQUIRE(lexp != NULL);
123 	lex = *lexp;
124 
125 	while (!EMPTY(lex->sources))
126 		RUNTIME_CHECK(isc_lex_close(lex) == ISC_R_SUCCESS);
127 	if (lex->data != NULL)
128 		free(lex->data);
129 	free(lex);
130 
131 	*lexp = NULL;
132 }
133 
134 void
135 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments) {
136 	/*
137 	 * Set allowed lexer commenting styles.
138 	 */
139 
140 	lex->comments = comments;
141 }
142 
143 void
144 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials) {
145 	/*
146 	 * The characters in 'specials' are returned as tokens.  Along with
147 	 * whitespace, they delimit strings and numbers.
148 	 */
149 
150 	memmove(lex->specials, specials, 256);
151 }
152 
153 static inline isc_result_t
154 new_source(isc_lex_t *lex, int is_file, int need_close,
155 	   void *input, const char *name)
156 {
157 	inputsource *source;
158 	isc_result_t result;
159 
160 	source = malloc(sizeof(*source));
161 	if (source == NULL)
162 		return (ISC_R_NOMEMORY);
163 	source->result = ISC_R_SUCCESS;
164 	source->is_file = is_file;
165 	source->need_close = need_close;
166 	source->at_eof = 0;
167 	source->last_was_eol = lex->last_was_eol;
168 	source->input = input;
169 	source->name = strdup(name);
170 	if (source->name == NULL) {
171 		free(source);
172 		return (ISC_R_NOMEMORY);
173 	}
174 	source->pushback = NULL;
175 	result = isc_buffer_allocate(&source->pushback,
176 				     (unsigned int)lex->max_token);
177 	if (result != ISC_R_SUCCESS) {
178 		free(source->name);
179 		free(source);
180 		return (result);
181 	}
182 	source->ignored = 0;
183 	source->line = 1;
184 	ISC_LIST_INITANDPREPEND(lex->sources, source, link);
185 
186 	return (ISC_R_SUCCESS);
187 }
188 
189 isc_result_t
190 isc_lex_openfile(isc_lex_t *lex, const char *filename) {
191 	isc_result_t result = ISC_R_SUCCESS;
192 	FILE *stream = NULL;
193 
194 	/*
195 	 * Open 'filename' and make it the current input source for 'lex'.
196 	 */
197 
198 	if ((stream = fopen(filename, "r")) == NULL)
199 		return (isc__errno2result(errno));
200 
201 	result = new_source(lex, 1, 1, stream, filename);
202 	if (result != ISC_R_SUCCESS)
203 		(void)fclose(stream);
204 	return (result);
205 }
206 
207 isc_result_t
208 isc_lex_close(isc_lex_t *lex) {
209 	inputsource *source;
210 
211 	/*
212 	 * Close the most recently opened object (i.e. file or buffer).
213 	 */
214 
215 	source = HEAD(lex->sources);
216 	if (source == NULL)
217 		return (ISC_R_NOMORE);
218 
219 	ISC_LIST_UNLINK(lex->sources, source, link);
220 	lex->last_was_eol = source->last_was_eol;
221 	if (source->is_file) {
222 		if (source->need_close)
223 			(void)fclose((FILE *)(source->input));
224 	}
225 	free(source->name);
226 	isc_buffer_free(&source->pushback);
227 	free(source);
228 
229 	return (ISC_R_SUCCESS);
230 }
231 
232 typedef enum {
233 	lexstate_start,
234 	lexstate_string,
235 	lexstate_maybecomment,
236 	lexstate_ccomment,
237 	lexstate_ccommentend,
238 	lexstate_eatline,
239 	lexstate_qstring
240 } lexstate;
241 
242 #define IWSEOL (ISC_LEXOPT_INITIALWS | ISC_LEXOPT_EOL)
243 
244 static void
245 pushback(inputsource *source, int c) {
246 	REQUIRE(source->pushback->current > 0);
247 	if (c == EOF) {
248 		source->at_eof = 0;
249 		return;
250 	}
251 	source->pushback->current--;
252 	if (c == '\n')
253 		source->line--;
254 }
255 
256 static isc_result_t
257 pushandgrow(inputsource *source, int c) {
258 	if (isc_buffer_availablelength(source->pushback) == 0) {
259 		isc_buffer_t *tbuf = NULL;
260 		unsigned int oldlen;
261 		isc_region_t used;
262 		isc_result_t result;
263 
264 		oldlen = isc_buffer_length(source->pushback);
265 		result = isc_buffer_allocate(&tbuf, oldlen * 2);
266 		if (result != ISC_R_SUCCESS)
267 			return (result);
268 		isc_buffer_usedregion(source->pushback, &used);
269 		result = isc_buffer_copyregion(tbuf, &used);
270 		INSIST(result == ISC_R_SUCCESS);
271 		tbuf->current = source->pushback->current;
272 		isc_buffer_free(&source->pushback);
273 		source->pushback = tbuf;
274 	}
275 	isc_buffer_putuint8(source->pushback, (uint8_t)c);
276 	return (ISC_R_SUCCESS);
277 }
278 
279 isc_result_t
280 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp) {
281 	inputsource *source;
282 	int c;
283 	int done = 0;
284 	int no_comments = 0;
285 	int escaped = 0;
286 	lexstate state = lexstate_start;
287 	lexstate saved_state = lexstate_start;
288 	isc_buffer_t *buffer;
289 	FILE *stream;
290 	char *curr, *prev;
291 	size_t remaining;
292 	unsigned int saved_options;
293 	isc_result_t result;
294 
295 	/*
296 	 * Get the next token.
297 	 */
298 
299 	source = HEAD(lex->sources);
300 	REQUIRE(tokenp != NULL);
301 
302 	if (source == NULL) {
303 		if ((options & ISC_LEXOPT_NOMORE) != 0) {
304 			tokenp->type = isc_tokentype_nomore;
305 			return (ISC_R_SUCCESS);
306 		}
307 		return (ISC_R_NOMORE);
308 	}
309 
310 	if (source->result != ISC_R_SUCCESS)
311 		return (source->result);
312 
313 	lex->saved_paren_count = lex->paren_count;
314 	source->saved_line = source->line;
315 
316 	if (isc_buffer_remaininglength(source->pushback) == 0 &&
317 	    source->at_eof)
318 	{
319 		if ((options & ISC_LEXOPT_EOF) != 0) {
320 			tokenp->type = isc_tokentype_eof;
321 			return (ISC_R_SUCCESS);
322 		}
323 		return (ISC_R_EOF);
324 	}
325 
326 	isc_buffer_compact(source->pushback);
327 
328 	saved_options = options;
329 
330 	curr = lex->data;
331 	*curr = '\0';
332 
333 	prev = NULL;
334 	remaining = lex->max_token;
335 
336 	if (source->is_file)
337 		flockfile(source->input);
338 
339 	do {
340 		if (isc_buffer_remaininglength(source->pushback) == 0) {
341 			if (source->is_file) {
342 				stream = source->input;
343 
344 				c = getc_unlocked(stream);
345 				if (c == EOF) {
346 					if (ferror(stream)) {
347 						source->result = ISC_R_IOERROR;
348 						result = source->result;
349 						goto done;
350 					}
351 					source->at_eof = 1;
352 				}
353 			} else {
354 				buffer = source->input;
355 
356 				if (buffer->current == buffer->used) {
357 					c = EOF;
358 					source->at_eof = 1;
359 				} else {
360 					c = *((unsigned char *)buffer->base +
361 					      buffer->current);
362 					buffer->current++;
363 				}
364 			}
365 			if (c != EOF) {
366 				source->result = pushandgrow(source, c);
367 				if (source->result != ISC_R_SUCCESS) {
368 					result = source->result;
369 					goto done;
370 				}
371 			}
372 		}
373 
374 		if (!source->at_eof) {
375 			if (state == lexstate_start)
376 				/* Token has not started yet. */
377 				source->ignored =
378 				   isc_buffer_consumedlength(source->pushback);
379 			c = isc_buffer_getuint8(source->pushback);
380 		} else {
381 			c = EOF;
382 		}
383 
384 		if (c == '\n')
385 			source->line++;
386 
387 		if (lex->comment_ok && !no_comments) {
388 			if (c == '/' &&
389 				   (lex->comments &
390 				    (ISC_LEXCOMMENT_C|
391 				     ISC_LEXCOMMENT_CPLUSPLUS)) != 0) {
392 				saved_state = state;
393 				state = lexstate_maybecomment;
394 				no_comments = 1;
395 				continue;
396 			} else if (c == '#' &&
397 				   ((lex->comments & ISC_LEXCOMMENT_SHELL)
398 				    != 0)) {
399 				saved_state = state;
400 				state = lexstate_eatline;
401 				no_comments = 1;
402 				continue;
403 			}
404 		}
405 
406 	no_read:
407 		/* INSIST(c == EOF || (c >= 0 && c <= 255)); */
408 		switch (state) {
409 		case lexstate_start:
410 			if (c == EOF) {
411 				lex->last_was_eol = 0;
412 				if ((options & ISC_LEXOPT_EOF) == 0) {
413 					result = ISC_R_EOF;
414 					goto done;
415 				}
416 				tokenp->type = isc_tokentype_eof;
417 				done = 1;
418 			} else if (c == ' ' || c == '\t') {
419 				lex->last_was_eol = 0;
420 			} else if (c == '\n') {
421 				lex->last_was_eol = 1;
422 			} else if (c == '\r') {
423 				lex->last_was_eol = 0;
424 			} else if (c == '"' &&
425 				   (options & ISC_LEXOPT_QSTRING) != 0) {
426 				lex->last_was_eol = 0;
427 				no_comments = 1;
428 				state = lexstate_qstring;
429 			} else if (lex->specials[c]) {
430 				lex->last_was_eol = 0;
431 				tokenp->type = isc_tokentype_special;
432 				tokenp->value.as_char = c;
433 				done = 1;
434 			} else {
435 				lex->last_was_eol = 0;
436 				state = lexstate_string;
437 				goto no_read;
438 			}
439 			break;
440 		case lexstate_string:
441 			/*
442 			 * EOF needs to be checked before lex->specials[c]
443 			 * as lex->specials[EOF] is not a good idea.
444 			 */
445 			if (c == '\r' || c == '\n' || c == EOF ||
446 			    (!escaped &&
447 			     (c == ' ' || c == '\t' || lex->specials[c]))) {
448 				pushback(source, c);
449 				if (source->result != ISC_R_SUCCESS) {
450 					result = source->result;
451 					goto done;
452 				}
453 				tokenp->type = isc_tokentype_string;
454 				tokenp->value.as_textregion.base = lex->data;
455 				tokenp->value.as_textregion.length =
456 					(unsigned int)
457 					(lex->max_token - remaining);
458 				done = 1;
459 				continue;
460 			}
461 			if (remaining == 0U) {
462 				result = grow_data(lex, &remaining,
463 						   &curr, &prev);
464 				if (result != ISC_R_SUCCESS)
465 					goto done;
466 			}
467 			INSIST(remaining > 0U);
468 			*curr++ = c;
469 			*curr = '\0';
470 			remaining--;
471 			break;
472 		case lexstate_maybecomment:
473 			if (c == '*' &&
474 			    (lex->comments & ISC_LEXCOMMENT_C) != 0) {
475 				state = lexstate_ccomment;
476 				continue;
477 			} else if (c == '/' &&
478 			    (lex->comments & ISC_LEXCOMMENT_CPLUSPLUS) != 0) {
479 				state = lexstate_eatline;
480 				continue;
481 			}
482 			pushback(source, c);
483 			c = '/';
484 			no_comments = 0;
485 			state = saved_state;
486 			goto no_read;
487 		case lexstate_ccomment:
488 			if (c == EOF) {
489 				result = ISC_R_UNEXPECTEDEND;
490 				goto done;
491 			}
492 			if (c == '*')
493 				state = lexstate_ccommentend;
494 			break;
495 		case lexstate_ccommentend:
496 			if (c == EOF) {
497 				result = ISC_R_UNEXPECTEDEND;
498 				goto done;
499 			}
500 			if (c == '/') {
501 				/*
502 				 * C-style comments become a single space.
503 				 * We do this to ensure that a comment will
504 				 * act as a delimiter for strings and
505 				 * numbers.
506 				 */
507 				c = ' ';
508 				no_comments = 0;
509 				state = saved_state;
510 				goto no_read;
511 			} else if (c != '*')
512 				state = lexstate_ccomment;
513 			break;
514 		case lexstate_eatline:
515 			if ((c == '\n') || (c == EOF)) {
516 				no_comments = 0;
517 				state = saved_state;
518 				goto no_read;
519 			}
520 			break;
521 		case lexstate_qstring:
522 			if (c == EOF) {
523 				result = ISC_R_UNEXPECTEDEND;
524 				goto done;
525 			}
526 			if (c == '"') {
527 				if (escaped) {
528 					escaped = 0;
529 					/*
530 					 * Overwrite the preceding backslash.
531 					 */
532 					INSIST(prev != NULL);
533 					*prev = '"';
534 				} else {
535 					tokenp->type = isc_tokentype_qstring;
536 					tokenp->value.as_textregion.base =
537 						lex->data;
538 					tokenp->value.as_textregion.length =
539 						(unsigned int)
540 						(lex->max_token - remaining);
541 					no_comments = 0;
542 					done = 1;
543 				}
544 			} else {
545 				if (c == '\n' && !escaped &&
546 			    (options & ISC_LEXOPT_QSTRINGMULTILINE) == 0) {
547 					pushback(source, c);
548 					result = ISC_R_UNBALANCEDQUOTES;
549 					goto done;
550 				}
551 				if (c == '\\' && !escaped)
552 					escaped = 1;
553 				else
554 					escaped = 0;
555 				if (remaining == 0U) {
556 					result = grow_data(lex, &remaining,
557 							   &curr, &prev);
558 					if (result != ISC_R_SUCCESS)
559 						goto done;
560 				}
561 				INSIST(remaining > 0U);
562 				prev = curr;
563 				*curr++ = c;
564 				*curr = '\0';
565 				remaining--;
566 			}
567 			break;
568 		default:
569 			FATAL_ERROR(__FILE__, __LINE__, "Unexpected state %d",
570 				    state);
571 			/* Does not return. */
572 		}
573 
574 	} while (!done);
575 
576 	result = ISC_R_SUCCESS;
577  done:
578 	if (source->is_file)
579 		funlockfile(source->input);
580 	return (result);
581 }
582 
583 void
584 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp) {
585 	inputsource *source;
586 	/*
587 	 * Unget the current token.
588 	 */
589 
590 	source = HEAD(lex->sources);
591 	REQUIRE(source != NULL);
592 	REQUIRE(tokenp != NULL);
593 	REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 ||
594 		tokenp->type == isc_tokentype_eof);
595 
596 	UNUSED(tokenp);
597 
598 	isc_buffer_first(source->pushback);
599 	lex->paren_count = lex->saved_paren_count;
600 	source->line = source->saved_line;
601 	source->at_eof = 0;
602 }
603 
604 void
605 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r)
606 {
607 	inputsource *source;
608 
609 	source = HEAD(lex->sources);
610 	REQUIRE(source != NULL);
611 	REQUIRE(tokenp != NULL);
612 	REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 ||
613 		tokenp->type == isc_tokentype_eof);
614 
615 	UNUSED(tokenp);
616 
617 	INSIST(source->ignored <= isc_buffer_consumedlength(source->pushback));
618 	r->base = (unsigned char *)isc_buffer_base(source->pushback) +
619 		  source->ignored;
620 	r->length = isc_buffer_consumedlength(source->pushback) -
621 		    source->ignored;
622 }
623 
624 char *
625 isc_lex_getsourcename(isc_lex_t *lex) {
626 	inputsource *source;
627 
628 	source = HEAD(lex->sources);
629 
630 	if (source == NULL)
631 		return (NULL);
632 
633 	return (source->name);
634 }
635 
636 unsigned long
637 isc_lex_getsourceline(isc_lex_t *lex) {
638 	inputsource *source;
639 
640 	source = HEAD(lex->sources);
641 
642 	if (source == NULL)
643 		return (0);
644 
645 	return (source->line);
646 }
647