1 /* 2 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 3 * 4 * Permission to use, copy, modify, and/or distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 10 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 13 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 14 * PERFORMANCE OF THIS SOFTWARE. 15 */ 16 17 /* $Id: lex.c,v 1.13 2020/10/19 14:53:11 florian Exp $ */ 18 19 /*! \file */ 20 21 #include <ctype.h> 22 #include <stdlib.h> 23 24 #include <isc/buffer.h> 25 26 #include <isc/lex.h> 27 28 #include <errno.h> 29 #include <string.h> 30 #include <isc/util.h> 31 32 #include "unix/errno2result.h" 33 34 typedef struct inputsource { 35 isc_result_t result; 36 int is_file; 37 int need_close; 38 int at_eof; 39 int last_was_eol; 40 isc_buffer_t * pushback; 41 unsigned int ignored; 42 void * input; 43 char * name; 44 unsigned long line; 45 unsigned long saved_line; 46 ISC_LINK(struct inputsource) link; 47 } inputsource; 48 49 struct isc_lex { 50 /* Unlocked. */ 51 size_t max_token; 52 char * data; 53 unsigned int comments; 54 int comment_ok; 55 int last_was_eol; 56 unsigned int paren_count; 57 unsigned int saved_paren_count; 58 isc_lexspecials_t specials; 59 LIST(struct inputsource) sources; 60 }; 61 62 static inline isc_result_t 63 grow_data(isc_lex_t *lex, size_t *remainingp, char **currp, char **prevp) { 64 char *tmp; 65 66 tmp = malloc(lex->max_token * 2 + 1); 67 if (tmp == NULL) 68 return (ISC_R_NOMEMORY); 69 memmove(tmp, lex->data, lex->max_token + 1); 70 *currp = tmp + (*currp - lex->data); 71 if (*prevp != NULL) 72 *prevp = tmp + (*prevp - lex->data); 73 free(lex->data); 74 lex->data = tmp; 75 *remainingp += lex->max_token; 76 lex->max_token *= 2; 77 return (ISC_R_SUCCESS); 78 } 79 80 isc_result_t 81 isc_lex_create(size_t max_token, isc_lex_t **lexp) { 82 isc_lex_t *lex; 83 84 /* 85 * Create a lexer. 86 */ 87 REQUIRE(lexp != NULL && *lexp == NULL); 88 89 if (max_token == 0U) 90 max_token = 1; 91 92 lex = malloc(sizeof(*lex)); 93 if (lex == NULL) 94 return (ISC_R_NOMEMORY); 95 lex->data = malloc(max_token + 1); 96 if (lex->data == NULL) { 97 free(lex); 98 return (ISC_R_NOMEMORY); 99 } 100 lex->max_token = max_token; 101 lex->comments = 0; 102 lex->comment_ok = 1; 103 lex->last_was_eol = 1; 104 lex->paren_count = 0; 105 lex->saved_paren_count = 0; 106 memset(lex->specials, 0, 256); 107 INIT_LIST(lex->sources); 108 109 *lexp = lex; 110 111 return (ISC_R_SUCCESS); 112 } 113 114 void 115 isc_lex_destroy(isc_lex_t **lexp) { 116 isc_lex_t *lex; 117 118 /* 119 * Destroy the lexer. 120 */ 121 122 REQUIRE(lexp != NULL); 123 lex = *lexp; 124 125 while (!EMPTY(lex->sources)) 126 RUNTIME_CHECK(isc_lex_close(lex) == ISC_R_SUCCESS); 127 if (lex->data != NULL) 128 free(lex->data); 129 free(lex); 130 131 *lexp = NULL; 132 } 133 134 void 135 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments) { 136 /* 137 * Set allowed lexer commenting styles. 138 */ 139 140 lex->comments = comments; 141 } 142 143 void 144 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials) { 145 /* 146 * The characters in 'specials' are returned as tokens. Along with 147 * whitespace, they delimit strings and numbers. 148 */ 149 150 memmove(lex->specials, specials, 256); 151 } 152 153 static inline isc_result_t 154 new_source(isc_lex_t *lex, int is_file, int need_close, 155 void *input, const char *name) 156 { 157 inputsource *source; 158 isc_result_t result; 159 160 source = malloc(sizeof(*source)); 161 if (source == NULL) 162 return (ISC_R_NOMEMORY); 163 source->result = ISC_R_SUCCESS; 164 source->is_file = is_file; 165 source->need_close = need_close; 166 source->at_eof = 0; 167 source->last_was_eol = lex->last_was_eol; 168 source->input = input; 169 source->name = strdup(name); 170 if (source->name == NULL) { 171 free(source); 172 return (ISC_R_NOMEMORY); 173 } 174 source->pushback = NULL; 175 result = isc_buffer_allocate(&source->pushback, 176 (unsigned int)lex->max_token); 177 if (result != ISC_R_SUCCESS) { 178 free(source->name); 179 free(source); 180 return (result); 181 } 182 source->ignored = 0; 183 source->line = 1; 184 ISC_LIST_INITANDPREPEND(lex->sources, source, link); 185 186 return (ISC_R_SUCCESS); 187 } 188 189 isc_result_t 190 isc_lex_openfile(isc_lex_t *lex, const char *filename) { 191 isc_result_t result = ISC_R_SUCCESS; 192 FILE *stream = NULL; 193 194 /* 195 * Open 'filename' and make it the current input source for 'lex'. 196 */ 197 198 if ((stream = fopen(filename, "r")) == NULL) 199 return (isc__errno2result(errno)); 200 201 result = new_source(lex, 1, 1, stream, filename); 202 if (result != ISC_R_SUCCESS) 203 (void)fclose(stream); 204 return (result); 205 } 206 207 isc_result_t 208 isc_lex_close(isc_lex_t *lex) { 209 inputsource *source; 210 211 /* 212 * Close the most recently opened object (i.e. file or buffer). 213 */ 214 215 source = HEAD(lex->sources); 216 if (source == NULL) 217 return (ISC_R_NOMORE); 218 219 ISC_LIST_UNLINK(lex->sources, source, link); 220 lex->last_was_eol = source->last_was_eol; 221 if (source->is_file) { 222 if (source->need_close) 223 (void)fclose((FILE *)(source->input)); 224 } 225 free(source->name); 226 isc_buffer_free(&source->pushback); 227 free(source); 228 229 return (ISC_R_SUCCESS); 230 } 231 232 typedef enum { 233 lexstate_start, 234 lexstate_string, 235 lexstate_maybecomment, 236 lexstate_ccomment, 237 lexstate_ccommentend, 238 lexstate_eatline, 239 lexstate_qstring 240 } lexstate; 241 242 #define IWSEOL (ISC_LEXOPT_INITIALWS | ISC_LEXOPT_EOL) 243 244 static void 245 pushback(inputsource *source, int c) { 246 REQUIRE(source->pushback->current > 0); 247 if (c == EOF) { 248 source->at_eof = 0; 249 return; 250 } 251 source->pushback->current--; 252 if (c == '\n') 253 source->line--; 254 } 255 256 static isc_result_t 257 pushandgrow(inputsource *source, int c) { 258 if (isc_buffer_availablelength(source->pushback) == 0) { 259 isc_buffer_t *tbuf = NULL; 260 unsigned int oldlen; 261 isc_region_t used; 262 isc_result_t result; 263 264 oldlen = isc_buffer_length(source->pushback); 265 result = isc_buffer_allocate(&tbuf, oldlen * 2); 266 if (result != ISC_R_SUCCESS) 267 return (result); 268 isc_buffer_usedregion(source->pushback, &used); 269 result = isc_buffer_copyregion(tbuf, &used); 270 INSIST(result == ISC_R_SUCCESS); 271 tbuf->current = source->pushback->current; 272 isc_buffer_free(&source->pushback); 273 source->pushback = tbuf; 274 } 275 isc_buffer_putuint8(source->pushback, (uint8_t)c); 276 return (ISC_R_SUCCESS); 277 } 278 279 isc_result_t 280 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp) { 281 inputsource *source; 282 int c; 283 int done = 0; 284 int no_comments = 0; 285 int escaped = 0; 286 lexstate state = lexstate_start; 287 lexstate saved_state = lexstate_start; 288 isc_buffer_t *buffer; 289 FILE *stream; 290 char *curr, *prev; 291 size_t remaining; 292 unsigned int saved_options; 293 isc_result_t result; 294 295 /* 296 * Get the next token. 297 */ 298 299 source = HEAD(lex->sources); 300 REQUIRE(tokenp != NULL); 301 302 if (source == NULL) { 303 if ((options & ISC_LEXOPT_NOMORE) != 0) { 304 tokenp->type = isc_tokentype_nomore; 305 return (ISC_R_SUCCESS); 306 } 307 return (ISC_R_NOMORE); 308 } 309 310 if (source->result != ISC_R_SUCCESS) 311 return (source->result); 312 313 lex->saved_paren_count = lex->paren_count; 314 source->saved_line = source->line; 315 316 if (isc_buffer_remaininglength(source->pushback) == 0 && 317 source->at_eof) 318 { 319 if ((options & ISC_LEXOPT_EOF) != 0) { 320 tokenp->type = isc_tokentype_eof; 321 return (ISC_R_SUCCESS); 322 } 323 return (ISC_R_EOF); 324 } 325 326 isc_buffer_compact(source->pushback); 327 328 saved_options = options; 329 330 curr = lex->data; 331 *curr = '\0'; 332 333 prev = NULL; 334 remaining = lex->max_token; 335 336 if (source->is_file) 337 flockfile(source->input); 338 339 do { 340 if (isc_buffer_remaininglength(source->pushback) == 0) { 341 if (source->is_file) { 342 stream = source->input; 343 344 c = getc_unlocked(stream); 345 if (c == EOF) { 346 if (ferror(stream)) { 347 source->result = ISC_R_IOERROR; 348 result = source->result; 349 goto done; 350 } 351 source->at_eof = 1; 352 } 353 } else { 354 buffer = source->input; 355 356 if (buffer->current == buffer->used) { 357 c = EOF; 358 source->at_eof = 1; 359 } else { 360 c = *((unsigned char *)buffer->base + 361 buffer->current); 362 buffer->current++; 363 } 364 } 365 if (c != EOF) { 366 source->result = pushandgrow(source, c); 367 if (source->result != ISC_R_SUCCESS) { 368 result = source->result; 369 goto done; 370 } 371 } 372 } 373 374 if (!source->at_eof) { 375 if (state == lexstate_start) 376 /* Token has not started yet. */ 377 source->ignored = 378 isc_buffer_consumedlength(source->pushback); 379 c = isc_buffer_getuint8(source->pushback); 380 } else { 381 c = EOF; 382 } 383 384 if (c == '\n') 385 source->line++; 386 387 if (lex->comment_ok && !no_comments) { 388 if (c == '/' && 389 (lex->comments & 390 (ISC_LEXCOMMENT_C| 391 ISC_LEXCOMMENT_CPLUSPLUS)) != 0) { 392 saved_state = state; 393 state = lexstate_maybecomment; 394 no_comments = 1; 395 continue; 396 } else if (c == '#' && 397 ((lex->comments & ISC_LEXCOMMENT_SHELL) 398 != 0)) { 399 saved_state = state; 400 state = lexstate_eatline; 401 no_comments = 1; 402 continue; 403 } 404 } 405 406 no_read: 407 /* INSIST(c == EOF || (c >= 0 && c <= 255)); */ 408 switch (state) { 409 case lexstate_start: 410 if (c == EOF) { 411 lex->last_was_eol = 0; 412 if ((options & ISC_LEXOPT_EOF) == 0) { 413 result = ISC_R_EOF; 414 goto done; 415 } 416 tokenp->type = isc_tokentype_eof; 417 done = 1; 418 } else if (c == ' ' || c == '\t') { 419 lex->last_was_eol = 0; 420 } else if (c == '\n') { 421 lex->last_was_eol = 1; 422 } else if (c == '\r') { 423 lex->last_was_eol = 0; 424 } else if (c == '"' && 425 (options & ISC_LEXOPT_QSTRING) != 0) { 426 lex->last_was_eol = 0; 427 no_comments = 1; 428 state = lexstate_qstring; 429 } else if (lex->specials[c]) { 430 lex->last_was_eol = 0; 431 tokenp->type = isc_tokentype_special; 432 tokenp->value.as_char = c; 433 done = 1; 434 } else { 435 lex->last_was_eol = 0; 436 state = lexstate_string; 437 goto no_read; 438 } 439 break; 440 case lexstate_string: 441 /* 442 * EOF needs to be checked before lex->specials[c] 443 * as lex->specials[EOF] is not a good idea. 444 */ 445 if (c == '\r' || c == '\n' || c == EOF || 446 (!escaped && 447 (c == ' ' || c == '\t' || lex->specials[c]))) { 448 pushback(source, c); 449 if (source->result != ISC_R_SUCCESS) { 450 result = source->result; 451 goto done; 452 } 453 tokenp->type = isc_tokentype_string; 454 tokenp->value.as_textregion.base = lex->data; 455 tokenp->value.as_textregion.length = 456 (unsigned int) 457 (lex->max_token - remaining); 458 done = 1; 459 continue; 460 } 461 if (remaining == 0U) { 462 result = grow_data(lex, &remaining, 463 &curr, &prev); 464 if (result != ISC_R_SUCCESS) 465 goto done; 466 } 467 INSIST(remaining > 0U); 468 *curr++ = c; 469 *curr = '\0'; 470 remaining--; 471 break; 472 case lexstate_maybecomment: 473 if (c == '*' && 474 (lex->comments & ISC_LEXCOMMENT_C) != 0) { 475 state = lexstate_ccomment; 476 continue; 477 } else if (c == '/' && 478 (lex->comments & ISC_LEXCOMMENT_CPLUSPLUS) != 0) { 479 state = lexstate_eatline; 480 continue; 481 } 482 pushback(source, c); 483 c = '/'; 484 no_comments = 0; 485 state = saved_state; 486 goto no_read; 487 case lexstate_ccomment: 488 if (c == EOF) { 489 result = ISC_R_UNEXPECTEDEND; 490 goto done; 491 } 492 if (c == '*') 493 state = lexstate_ccommentend; 494 break; 495 case lexstate_ccommentend: 496 if (c == EOF) { 497 result = ISC_R_UNEXPECTEDEND; 498 goto done; 499 } 500 if (c == '/') { 501 /* 502 * C-style comments become a single space. 503 * We do this to ensure that a comment will 504 * act as a delimiter for strings and 505 * numbers. 506 */ 507 c = ' '; 508 no_comments = 0; 509 state = saved_state; 510 goto no_read; 511 } else if (c != '*') 512 state = lexstate_ccomment; 513 break; 514 case lexstate_eatline: 515 if ((c == '\n') || (c == EOF)) { 516 no_comments = 0; 517 state = saved_state; 518 goto no_read; 519 } 520 break; 521 case lexstate_qstring: 522 if (c == EOF) { 523 result = ISC_R_UNEXPECTEDEND; 524 goto done; 525 } 526 if (c == '"') { 527 if (escaped) { 528 escaped = 0; 529 /* 530 * Overwrite the preceding backslash. 531 */ 532 INSIST(prev != NULL); 533 *prev = '"'; 534 } else { 535 tokenp->type = isc_tokentype_qstring; 536 tokenp->value.as_textregion.base = 537 lex->data; 538 tokenp->value.as_textregion.length = 539 (unsigned int) 540 (lex->max_token - remaining); 541 no_comments = 0; 542 done = 1; 543 } 544 } else { 545 if (c == '\n' && !escaped && 546 (options & ISC_LEXOPT_QSTRINGMULTILINE) == 0) { 547 pushback(source, c); 548 result = ISC_R_UNBALANCEDQUOTES; 549 goto done; 550 } 551 if (c == '\\' && !escaped) 552 escaped = 1; 553 else 554 escaped = 0; 555 if (remaining == 0U) { 556 result = grow_data(lex, &remaining, 557 &curr, &prev); 558 if (result != ISC_R_SUCCESS) 559 goto done; 560 } 561 INSIST(remaining > 0U); 562 prev = curr; 563 *curr++ = c; 564 *curr = '\0'; 565 remaining--; 566 } 567 break; 568 default: 569 FATAL_ERROR(__FILE__, __LINE__, "Unexpected state %d", 570 state); 571 /* Does not return. */ 572 } 573 574 } while (!done); 575 576 result = ISC_R_SUCCESS; 577 done: 578 if (source->is_file) 579 funlockfile(source->input); 580 return (result); 581 } 582 583 void 584 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp) { 585 inputsource *source; 586 /* 587 * Unget the current token. 588 */ 589 590 source = HEAD(lex->sources); 591 REQUIRE(source != NULL); 592 REQUIRE(tokenp != NULL); 593 REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 || 594 tokenp->type == isc_tokentype_eof); 595 596 UNUSED(tokenp); 597 598 isc_buffer_first(source->pushback); 599 lex->paren_count = lex->saved_paren_count; 600 source->line = source->saved_line; 601 source->at_eof = 0; 602 } 603 604 void 605 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r) 606 { 607 inputsource *source; 608 609 source = HEAD(lex->sources); 610 REQUIRE(source != NULL); 611 REQUIRE(tokenp != NULL); 612 REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 || 613 tokenp->type == isc_tokentype_eof); 614 615 UNUSED(tokenp); 616 617 INSIST(source->ignored <= isc_buffer_consumedlength(source->pushback)); 618 r->base = (unsigned char *)isc_buffer_base(source->pushback) + 619 source->ignored; 620 r->length = isc_buffer_consumedlength(source->pushback) - 621 source->ignored; 622 } 623 624 char * 625 isc_lex_getsourcename(isc_lex_t *lex) { 626 inputsource *source; 627 628 source = HEAD(lex->sources); 629 630 if (source == NULL) 631 return (NULL); 632 633 return (source->name); 634 } 635 636 unsigned long 637 isc_lex_getsourceline(isc_lex_t *lex) { 638 inputsource *source; 639 640 source = HEAD(lex->sources); 641 642 if (source == NULL) 643 return (0); 644 645 return (source->line); 646 } 647