xref: /openbsd-src/usr.bin/dig/lib/isc/lex.c (revision 1a8dbaac879b9f3335ad7fb25429ce63ac1d6bac)
1 /*
2  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3  *
4  * Permission to use, copy, modify, and/or distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
9  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
10  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
11  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
12  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
13  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
14  * PERFORMANCE OF THIS SOFTWARE.
15  */
16 
17 /* $Id: lex.c,v 1.12 2020/09/14 08:40:44 florian Exp $ */
18 
19 /*! \file */
20 
21 #include <ctype.h>
22 #include <stdlib.h>
23 
24 #include <isc/buffer.h>
25 
26 #include <isc/lex.h>
27 
28 #include <errno.h>
29 #include <string.h>
30 #include <isc/util.h>
31 
32 #include "unix/errno2result.h"
33 
34 typedef struct inputsource {
35 	isc_result_t			result;
36 	int			is_file;
37 	int			need_close;
38 	int			at_eof;
39 	int			last_was_eol;
40 	isc_buffer_t *			pushback;
41 	unsigned int			ignored;
42 	void *				input;
43 	char *				name;
44 	unsigned long			line;
45 	unsigned long			saved_line;
46 	ISC_LINK(struct inputsource)	link;
47 } inputsource;
48 
49 struct isc_lex {
50 	/* Unlocked. */
51 	size_t				max_token;
52 	char *				data;
53 	unsigned int			comments;
54 	int			comment_ok;
55 	int			last_was_eol;
56 	unsigned int			paren_count;
57 	unsigned int			saved_paren_count;
58 	isc_lexspecials_t		specials;
59 	LIST(struct inputsource)	sources;
60 };
61 
62 static inline isc_result_t
63 grow_data(isc_lex_t *lex, size_t *remainingp, char **currp, char **prevp) {
64 	char *tmp;
65 
66 	tmp = malloc(lex->max_token * 2 + 1);
67 	if (tmp == NULL)
68 		return (ISC_R_NOMEMORY);
69 	memmove(tmp, lex->data, lex->max_token + 1);
70 	*currp = tmp + (*currp - lex->data);
71 	if (*prevp != NULL)
72 		*prevp = tmp + (*prevp - lex->data);
73 	free(lex->data);
74 	lex->data = tmp;
75 	*remainingp += lex->max_token;
76 	lex->max_token *= 2;
77 	return (ISC_R_SUCCESS);
78 }
79 
80 isc_result_t
81 isc_lex_create(size_t max_token, isc_lex_t **lexp) {
82 	isc_lex_t *lex;
83 
84 	/*
85 	 * Create a lexer.
86 	 */
87 	REQUIRE(lexp != NULL && *lexp == NULL);
88 
89 	if (max_token == 0U)
90 		max_token = 1;
91 
92 	lex = malloc(sizeof(*lex));
93 	if (lex == NULL)
94 		return (ISC_R_NOMEMORY);
95 	lex->data = malloc(max_token + 1);
96 	if (lex->data == NULL) {
97 		free(lex);
98 		return (ISC_R_NOMEMORY);
99 	}
100 	lex->max_token = max_token;
101 	lex->comments = 0;
102 	lex->comment_ok = 1;
103 	lex->last_was_eol = 1;
104 	lex->paren_count = 0;
105 	lex->saved_paren_count = 0;
106 	memset(lex->specials, 0, 256);
107 	INIT_LIST(lex->sources);
108 
109 	*lexp = lex;
110 
111 	return (ISC_R_SUCCESS);
112 }
113 
114 void
115 isc_lex_destroy(isc_lex_t **lexp) {
116 	isc_lex_t *lex;
117 
118 	/*
119 	 * Destroy the lexer.
120 	 */
121 
122 	REQUIRE(lexp != NULL);
123 	lex = *lexp;
124 
125 	while (!EMPTY(lex->sources))
126 		RUNTIME_CHECK(isc_lex_close(lex) == ISC_R_SUCCESS);
127 	if (lex->data != NULL)
128 		free(lex->data);
129 	free(lex);
130 
131 	*lexp = NULL;
132 }
133 
134 void
135 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments) {
136 	/*
137 	 * Set allowed lexer commenting styles.
138 	 */
139 
140 	lex->comments = comments;
141 }
142 
143 void
144 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials) {
145 	/*
146 	 * The characters in 'specials' are returned as tokens.  Along with
147 	 * whitespace, they delimit strings and numbers.
148 	 */
149 
150 	memmove(lex->specials, specials, 256);
151 }
152 
153 static inline isc_result_t
154 new_source(isc_lex_t *lex, int is_file, int need_close,
155 	   void *input, const char *name)
156 {
157 	inputsource *source;
158 	isc_result_t result;
159 
160 	source = malloc(sizeof(*source));
161 	if (source == NULL)
162 		return (ISC_R_NOMEMORY);
163 	source->result = ISC_R_SUCCESS;
164 	source->is_file = is_file;
165 	source->need_close = need_close;
166 	source->at_eof = 0;
167 	source->last_was_eol = lex->last_was_eol;
168 	source->input = input;
169 	source->name = strdup(name);
170 	if (source->name == NULL) {
171 		free(source);
172 		return (ISC_R_NOMEMORY);
173 	}
174 	source->pushback = NULL;
175 	result = isc_buffer_allocate(&source->pushback,
176 				     (unsigned int)lex->max_token);
177 	if (result != ISC_R_SUCCESS) {
178 		free(source->name);
179 		free(source);
180 		return (result);
181 	}
182 	source->ignored = 0;
183 	source->line = 1;
184 	ISC_LIST_INITANDPREPEND(lex->sources, source, link);
185 
186 	return (ISC_R_SUCCESS);
187 }
188 
189 isc_result_t
190 isc_lex_openfile(isc_lex_t *lex, const char *filename) {
191 	isc_result_t result = ISC_R_SUCCESS;
192 	FILE *stream = NULL;
193 
194 	/*
195 	 * Open 'filename' and make it the current input source for 'lex'.
196 	 */
197 
198 	if ((stream = fopen(filename, "r")) == NULL)
199 		return (isc__errno2result(errno));
200 
201 	result = new_source(lex, 1, 1, stream, filename);
202 	if (result != ISC_R_SUCCESS)
203 		(void)fclose(stream);
204 	return (result);
205 }
206 
207 isc_result_t
208 isc_lex_close(isc_lex_t *lex) {
209 	inputsource *source;
210 
211 	/*
212 	 * Close the most recently opened object (i.e. file or buffer).
213 	 */
214 
215 	source = HEAD(lex->sources);
216 	if (source == NULL)
217 		return (ISC_R_NOMORE);
218 
219 	ISC_LIST_UNLINK(lex->sources, source, link);
220 	lex->last_was_eol = source->last_was_eol;
221 	if (source->is_file) {
222 		if (source->need_close)
223 			(void)fclose((FILE *)(source->input));
224 	}
225 	free(source->name);
226 	isc_buffer_free(&source->pushback);
227 	free(source);
228 
229 	return (ISC_R_SUCCESS);
230 }
231 
232 typedef enum {
233 	lexstate_start,
234 	lexstate_string,
235 	lexstate_maybecomment,
236 	lexstate_ccomment,
237 	lexstate_ccommentend,
238 	lexstate_eatline,
239 	lexstate_qstring
240 } lexstate;
241 
242 #define IWSEOL (ISC_LEXOPT_INITIALWS | ISC_LEXOPT_EOL)
243 
244 static void
245 pushback(inputsource *source, int c) {
246 	REQUIRE(source->pushback->current > 0);
247 	if (c == EOF) {
248 		source->at_eof = 0;
249 		return;
250 	}
251 	source->pushback->current--;
252 	if (c == '\n')
253 		source->line--;
254 }
255 
256 static isc_result_t
257 pushandgrow(inputsource *source, int c) {
258 	if (isc_buffer_availablelength(source->pushback) == 0) {
259 		isc_buffer_t *tbuf = NULL;
260 		unsigned int oldlen;
261 		isc_region_t used;
262 		isc_result_t result;
263 
264 		oldlen = isc_buffer_length(source->pushback);
265 		result = isc_buffer_allocate(&tbuf, oldlen * 2);
266 		if (result != ISC_R_SUCCESS)
267 			return (result);
268 		isc_buffer_usedregion(source->pushback, &used);
269 		result = isc_buffer_copyregion(tbuf, &used);
270 		INSIST(result == ISC_R_SUCCESS);
271 		tbuf->current = source->pushback->current;
272 		isc_buffer_free(&source->pushback);
273 		source->pushback = tbuf;
274 	}
275 	isc_buffer_putuint8(source->pushback, (uint8_t)c);
276 	return (ISC_R_SUCCESS);
277 }
278 
279 isc_result_t
280 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp) {
281 	inputsource *source;
282 	int c;
283 	int done = 0;
284 	int no_comments = 0;
285 	int escaped = 0;
286 	lexstate state = lexstate_start;
287 	lexstate saved_state = lexstate_start;
288 	isc_buffer_t *buffer;
289 	FILE *stream;
290 	char *curr, *prev;
291 	size_t remaining;
292 	unsigned int saved_options;
293 	isc_result_t result;
294 
295 	/*
296 	 * Get the next token.
297 	 */
298 
299 	source = HEAD(lex->sources);
300 	REQUIRE(tokenp != NULL);
301 
302 	if (source == NULL) {
303 		if ((options & ISC_LEXOPT_NOMORE) != 0) {
304 			tokenp->type = isc_tokentype_nomore;
305 			return (ISC_R_SUCCESS);
306 		}
307 		return (ISC_R_NOMORE);
308 	}
309 
310 	if (source->result != ISC_R_SUCCESS)
311 		return (source->result);
312 
313 	lex->saved_paren_count = lex->paren_count;
314 	source->saved_line = source->line;
315 
316 	if (isc_buffer_remaininglength(source->pushback) == 0 &&
317 	    source->at_eof)
318 	{
319 		if ((options & ISC_LEXOPT_EOF) != 0) {
320 			tokenp->type = isc_tokentype_eof;
321 			return (ISC_R_SUCCESS);
322 		}
323 		return (ISC_R_EOF);
324 	}
325 
326 	isc_buffer_compact(source->pushback);
327 
328 	saved_options = options;
329 
330 	curr = lex->data;
331 	*curr = '\0';
332 
333 	prev = NULL;
334 	remaining = lex->max_token;
335 
336 	if (source->is_file)
337 		flockfile(source->input);
338 
339 	do {
340 		if (isc_buffer_remaininglength(source->pushback) == 0) {
341 			if (source->is_file) {
342 				stream = source->input;
343 
344 				c = getc_unlocked(stream);
345 				if (c == EOF) {
346 					if (ferror(stream)) {
347 						source->result = ISC_R_IOERROR;
348 						result = source->result;
349 						goto done;
350 					}
351 					source->at_eof = 1;
352 				}
353 			} else {
354 				buffer = source->input;
355 
356 				if (buffer->current == buffer->used) {
357 					c = EOF;
358 					source->at_eof = 1;
359 				} else {
360 					c = *((unsigned char *)buffer->base +
361 					      buffer->current);
362 					buffer->current++;
363 				}
364 			}
365 			if (c != EOF) {
366 				source->result = pushandgrow(source, c);
367 				if (source->result != ISC_R_SUCCESS) {
368 					result = source->result;
369 					goto done;
370 				}
371 			}
372 		}
373 
374 		if (!source->at_eof) {
375 			if (state == lexstate_start)
376 				/* Token has not started yet. */
377 				source->ignored =
378 				   isc_buffer_consumedlength(source->pushback);
379 			c = isc_buffer_getuint8(source->pushback);
380 		} else {
381 			c = EOF;
382 		}
383 
384 		if (c == '\n')
385 			source->line++;
386 
387 		if (lex->comment_ok && !no_comments) {
388 			if (c == '/' &&
389 				   (lex->comments &
390 				    (ISC_LEXCOMMENT_C|
391 				     ISC_LEXCOMMENT_CPLUSPLUS)) != 0) {
392 				saved_state = state;
393 				state = lexstate_maybecomment;
394 				no_comments = 1;
395 				continue;
396 			} else if (c == '#' &&
397 				   ((lex->comments & ISC_LEXCOMMENT_SHELL)
398 				    != 0)) {
399 				saved_state = state;
400 				state = lexstate_eatline;
401 				no_comments = 1;
402 				continue;
403 			}
404 		}
405 
406 	no_read:
407 		/* INSIST(c == EOF || (c >= 0 && c <= 255)); */
408 		switch (state) {
409 		case lexstate_start:
410 			if (c == EOF) {
411 				lex->last_was_eol = 0;
412 				if ((options & ISC_LEXOPT_EOF) == 0) {
413 					result = ISC_R_EOF;
414 					goto done;
415 				}
416 				tokenp->type = isc_tokentype_eof;
417 				done = 1;
418 			} else if (c == '\n') {
419 				lex->last_was_eol = 1;
420 			} else if (c == '"' &&
421 				   (options & ISC_LEXOPT_QSTRING) != 0) {
422 				lex->last_was_eol = 0;
423 				no_comments = 1;
424 				state = lexstate_qstring;
425 			} else if (lex->specials[c]) {
426 				lex->last_was_eol = 0;
427 				tokenp->type = isc_tokentype_special;
428 				tokenp->value.as_char = c;
429 				done = 1;
430 			} else {
431 				lex->last_was_eol = 0;
432 				state = lexstate_string;
433 				goto no_read;
434 			}
435 			break;
436 		case lexstate_string:
437 			/*
438 			 * EOF needs to be checked before lex->specials[c]
439 			 * as lex->specials[EOF] is not a good idea.
440 			 */
441 			if (c == '\r' || c == '\n' || c == EOF ||
442 			    (!escaped &&
443 			     (c == ' ' || c == '\t' || lex->specials[c]))) {
444 				pushback(source, c);
445 				if (source->result != ISC_R_SUCCESS) {
446 					result = source->result;
447 					goto done;
448 				}
449 				tokenp->type = isc_tokentype_string;
450 				tokenp->value.as_textregion.base = lex->data;
451 				tokenp->value.as_textregion.length =
452 					(unsigned int)
453 					(lex->max_token - remaining);
454 				done = 1;
455 				continue;
456 			}
457 			if (remaining == 0U) {
458 				result = grow_data(lex, &remaining,
459 						   &curr, &prev);
460 				if (result != ISC_R_SUCCESS)
461 					goto done;
462 			}
463 			INSIST(remaining > 0U);
464 			*curr++ = c;
465 			*curr = '\0';
466 			remaining--;
467 			break;
468 		case lexstate_maybecomment:
469 			if (c == '*' &&
470 			    (lex->comments & ISC_LEXCOMMENT_C) != 0) {
471 				state = lexstate_ccomment;
472 				continue;
473 			} else if (c == '/' &&
474 			    (lex->comments & ISC_LEXCOMMENT_CPLUSPLUS) != 0) {
475 				state = lexstate_eatline;
476 				continue;
477 			}
478 			pushback(source, c);
479 			c = '/';
480 			no_comments = 0;
481 			state = saved_state;
482 			goto no_read;
483 		case lexstate_ccomment:
484 			if (c == EOF) {
485 				result = ISC_R_UNEXPECTEDEND;
486 				goto done;
487 			}
488 			if (c == '*')
489 				state = lexstate_ccommentend;
490 			break;
491 		case lexstate_ccommentend:
492 			if (c == EOF) {
493 				result = ISC_R_UNEXPECTEDEND;
494 				goto done;
495 			}
496 			if (c == '/') {
497 				/*
498 				 * C-style comments become a single space.
499 				 * We do this to ensure that a comment will
500 				 * act as a delimiter for strings and
501 				 * numbers.
502 				 */
503 				c = ' ';
504 				no_comments = 0;
505 				state = saved_state;
506 				goto no_read;
507 			} else if (c != '*')
508 				state = lexstate_ccomment;
509 			break;
510 		case lexstate_eatline:
511 			if ((c == '\n') || (c == EOF)) {
512 				no_comments = 0;
513 				state = saved_state;
514 				goto no_read;
515 			}
516 			break;
517 		case lexstate_qstring:
518 			if (c == EOF) {
519 				result = ISC_R_UNEXPECTEDEND;
520 				goto done;
521 			}
522 			if (c == '"') {
523 				if (escaped) {
524 					escaped = 0;
525 					/*
526 					 * Overwrite the preceding backslash.
527 					 */
528 					INSIST(prev != NULL);
529 					*prev = '"';
530 				} else {
531 					tokenp->type = isc_tokentype_qstring;
532 					tokenp->value.as_textregion.base =
533 						lex->data;
534 					tokenp->value.as_textregion.length =
535 						(unsigned int)
536 						(lex->max_token - remaining);
537 					no_comments = 0;
538 					done = 1;
539 				}
540 			} else {
541 				if (c == '\n' && !escaped &&
542 			    (options & ISC_LEXOPT_QSTRINGMULTILINE) == 0) {
543 					pushback(source, c);
544 					result = ISC_R_UNBALANCEDQUOTES;
545 					goto done;
546 				}
547 				if (c == '\\' && !escaped)
548 					escaped = 1;
549 				else
550 					escaped = 0;
551 				if (remaining == 0U) {
552 					result = grow_data(lex, &remaining,
553 							   &curr, &prev);
554 					if (result != ISC_R_SUCCESS)
555 						goto done;
556 				}
557 				INSIST(remaining > 0U);
558 				prev = curr;
559 				*curr++ = c;
560 				*curr = '\0';
561 				remaining--;
562 			}
563 			break;
564 		default:
565 			FATAL_ERROR(__FILE__, __LINE__, "Unexpected state %d",
566 				    state);
567 			/* Does not return. */
568 		}
569 
570 	} while (!done);
571 
572 	result = ISC_R_SUCCESS;
573  done:
574 	if (source->is_file)
575 		funlockfile(source->input);
576 	return (result);
577 }
578 
579 void
580 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp) {
581 	inputsource *source;
582 	/*
583 	 * Unget the current token.
584 	 */
585 
586 	source = HEAD(lex->sources);
587 	REQUIRE(source != NULL);
588 	REQUIRE(tokenp != NULL);
589 	REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 ||
590 		tokenp->type == isc_tokentype_eof);
591 
592 	UNUSED(tokenp);
593 
594 	isc_buffer_first(source->pushback);
595 	lex->paren_count = lex->saved_paren_count;
596 	source->line = source->saved_line;
597 	source->at_eof = 0;
598 }
599 
600 void
601 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r)
602 {
603 	inputsource *source;
604 
605 	source = HEAD(lex->sources);
606 	REQUIRE(source != NULL);
607 	REQUIRE(tokenp != NULL);
608 	REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 ||
609 		tokenp->type == isc_tokentype_eof);
610 
611 	UNUSED(tokenp);
612 
613 	INSIST(source->ignored <= isc_buffer_consumedlength(source->pushback));
614 	r->base = (unsigned char *)isc_buffer_base(source->pushback) +
615 		  source->ignored;
616 	r->length = isc_buffer_consumedlength(source->pushback) -
617 		    source->ignored;
618 }
619 
620 char *
621 isc_lex_getsourcename(isc_lex_t *lex) {
622 	inputsource *source;
623 
624 	source = HEAD(lex->sources);
625 
626 	if (source == NULL)
627 		return (NULL);
628 
629 	return (source->name);
630 }
631 
632 unsigned long
633 isc_lex_getsourceline(isc_lex_t *lex) {
634 	inputsource *source;
635 
636 	source = HEAD(lex->sources);
637 
638 	if (source == NULL)
639 		return (0);
640 
641 	return (source->line);
642 }
643