1 /* 2 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 3 * 4 * Permission to use, copy, modify, and/or distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 10 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 13 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 14 * PERFORMANCE OF THIS SOFTWARE. 15 */ 16 17 /* $Id: lex.c,v 1.12 2020/09/14 08:40:44 florian Exp $ */ 18 19 /*! \file */ 20 21 #include <ctype.h> 22 #include <stdlib.h> 23 24 #include <isc/buffer.h> 25 26 #include <isc/lex.h> 27 28 #include <errno.h> 29 #include <string.h> 30 #include <isc/util.h> 31 32 #include "unix/errno2result.h" 33 34 typedef struct inputsource { 35 isc_result_t result; 36 int is_file; 37 int need_close; 38 int at_eof; 39 int last_was_eol; 40 isc_buffer_t * pushback; 41 unsigned int ignored; 42 void * input; 43 char * name; 44 unsigned long line; 45 unsigned long saved_line; 46 ISC_LINK(struct inputsource) link; 47 } inputsource; 48 49 struct isc_lex { 50 /* Unlocked. */ 51 size_t max_token; 52 char * data; 53 unsigned int comments; 54 int comment_ok; 55 int last_was_eol; 56 unsigned int paren_count; 57 unsigned int saved_paren_count; 58 isc_lexspecials_t specials; 59 LIST(struct inputsource) sources; 60 }; 61 62 static inline isc_result_t 63 grow_data(isc_lex_t *lex, size_t *remainingp, char **currp, char **prevp) { 64 char *tmp; 65 66 tmp = malloc(lex->max_token * 2 + 1); 67 if (tmp == NULL) 68 return (ISC_R_NOMEMORY); 69 memmove(tmp, lex->data, lex->max_token + 1); 70 *currp = tmp + (*currp - lex->data); 71 if (*prevp != NULL) 72 *prevp = tmp + (*prevp - lex->data); 73 free(lex->data); 74 lex->data = tmp; 75 *remainingp += lex->max_token; 76 lex->max_token *= 2; 77 return (ISC_R_SUCCESS); 78 } 79 80 isc_result_t 81 isc_lex_create(size_t max_token, isc_lex_t **lexp) { 82 isc_lex_t *lex; 83 84 /* 85 * Create a lexer. 86 */ 87 REQUIRE(lexp != NULL && *lexp == NULL); 88 89 if (max_token == 0U) 90 max_token = 1; 91 92 lex = malloc(sizeof(*lex)); 93 if (lex == NULL) 94 return (ISC_R_NOMEMORY); 95 lex->data = malloc(max_token + 1); 96 if (lex->data == NULL) { 97 free(lex); 98 return (ISC_R_NOMEMORY); 99 } 100 lex->max_token = max_token; 101 lex->comments = 0; 102 lex->comment_ok = 1; 103 lex->last_was_eol = 1; 104 lex->paren_count = 0; 105 lex->saved_paren_count = 0; 106 memset(lex->specials, 0, 256); 107 INIT_LIST(lex->sources); 108 109 *lexp = lex; 110 111 return (ISC_R_SUCCESS); 112 } 113 114 void 115 isc_lex_destroy(isc_lex_t **lexp) { 116 isc_lex_t *lex; 117 118 /* 119 * Destroy the lexer. 120 */ 121 122 REQUIRE(lexp != NULL); 123 lex = *lexp; 124 125 while (!EMPTY(lex->sources)) 126 RUNTIME_CHECK(isc_lex_close(lex) == ISC_R_SUCCESS); 127 if (lex->data != NULL) 128 free(lex->data); 129 free(lex); 130 131 *lexp = NULL; 132 } 133 134 void 135 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments) { 136 /* 137 * Set allowed lexer commenting styles. 138 */ 139 140 lex->comments = comments; 141 } 142 143 void 144 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials) { 145 /* 146 * The characters in 'specials' are returned as tokens. Along with 147 * whitespace, they delimit strings and numbers. 148 */ 149 150 memmove(lex->specials, specials, 256); 151 } 152 153 static inline isc_result_t 154 new_source(isc_lex_t *lex, int is_file, int need_close, 155 void *input, const char *name) 156 { 157 inputsource *source; 158 isc_result_t result; 159 160 source = malloc(sizeof(*source)); 161 if (source == NULL) 162 return (ISC_R_NOMEMORY); 163 source->result = ISC_R_SUCCESS; 164 source->is_file = is_file; 165 source->need_close = need_close; 166 source->at_eof = 0; 167 source->last_was_eol = lex->last_was_eol; 168 source->input = input; 169 source->name = strdup(name); 170 if (source->name == NULL) { 171 free(source); 172 return (ISC_R_NOMEMORY); 173 } 174 source->pushback = NULL; 175 result = isc_buffer_allocate(&source->pushback, 176 (unsigned int)lex->max_token); 177 if (result != ISC_R_SUCCESS) { 178 free(source->name); 179 free(source); 180 return (result); 181 } 182 source->ignored = 0; 183 source->line = 1; 184 ISC_LIST_INITANDPREPEND(lex->sources, source, link); 185 186 return (ISC_R_SUCCESS); 187 } 188 189 isc_result_t 190 isc_lex_openfile(isc_lex_t *lex, const char *filename) { 191 isc_result_t result = ISC_R_SUCCESS; 192 FILE *stream = NULL; 193 194 /* 195 * Open 'filename' and make it the current input source for 'lex'. 196 */ 197 198 if ((stream = fopen(filename, "r")) == NULL) 199 return (isc__errno2result(errno)); 200 201 result = new_source(lex, 1, 1, stream, filename); 202 if (result != ISC_R_SUCCESS) 203 (void)fclose(stream); 204 return (result); 205 } 206 207 isc_result_t 208 isc_lex_close(isc_lex_t *lex) { 209 inputsource *source; 210 211 /* 212 * Close the most recently opened object (i.e. file or buffer). 213 */ 214 215 source = HEAD(lex->sources); 216 if (source == NULL) 217 return (ISC_R_NOMORE); 218 219 ISC_LIST_UNLINK(lex->sources, source, link); 220 lex->last_was_eol = source->last_was_eol; 221 if (source->is_file) { 222 if (source->need_close) 223 (void)fclose((FILE *)(source->input)); 224 } 225 free(source->name); 226 isc_buffer_free(&source->pushback); 227 free(source); 228 229 return (ISC_R_SUCCESS); 230 } 231 232 typedef enum { 233 lexstate_start, 234 lexstate_string, 235 lexstate_maybecomment, 236 lexstate_ccomment, 237 lexstate_ccommentend, 238 lexstate_eatline, 239 lexstate_qstring 240 } lexstate; 241 242 #define IWSEOL (ISC_LEXOPT_INITIALWS | ISC_LEXOPT_EOL) 243 244 static void 245 pushback(inputsource *source, int c) { 246 REQUIRE(source->pushback->current > 0); 247 if (c == EOF) { 248 source->at_eof = 0; 249 return; 250 } 251 source->pushback->current--; 252 if (c == '\n') 253 source->line--; 254 } 255 256 static isc_result_t 257 pushandgrow(inputsource *source, int c) { 258 if (isc_buffer_availablelength(source->pushback) == 0) { 259 isc_buffer_t *tbuf = NULL; 260 unsigned int oldlen; 261 isc_region_t used; 262 isc_result_t result; 263 264 oldlen = isc_buffer_length(source->pushback); 265 result = isc_buffer_allocate(&tbuf, oldlen * 2); 266 if (result != ISC_R_SUCCESS) 267 return (result); 268 isc_buffer_usedregion(source->pushback, &used); 269 result = isc_buffer_copyregion(tbuf, &used); 270 INSIST(result == ISC_R_SUCCESS); 271 tbuf->current = source->pushback->current; 272 isc_buffer_free(&source->pushback); 273 source->pushback = tbuf; 274 } 275 isc_buffer_putuint8(source->pushback, (uint8_t)c); 276 return (ISC_R_SUCCESS); 277 } 278 279 isc_result_t 280 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp) { 281 inputsource *source; 282 int c; 283 int done = 0; 284 int no_comments = 0; 285 int escaped = 0; 286 lexstate state = lexstate_start; 287 lexstate saved_state = lexstate_start; 288 isc_buffer_t *buffer; 289 FILE *stream; 290 char *curr, *prev; 291 size_t remaining; 292 unsigned int saved_options; 293 isc_result_t result; 294 295 /* 296 * Get the next token. 297 */ 298 299 source = HEAD(lex->sources); 300 REQUIRE(tokenp != NULL); 301 302 if (source == NULL) { 303 if ((options & ISC_LEXOPT_NOMORE) != 0) { 304 tokenp->type = isc_tokentype_nomore; 305 return (ISC_R_SUCCESS); 306 } 307 return (ISC_R_NOMORE); 308 } 309 310 if (source->result != ISC_R_SUCCESS) 311 return (source->result); 312 313 lex->saved_paren_count = lex->paren_count; 314 source->saved_line = source->line; 315 316 if (isc_buffer_remaininglength(source->pushback) == 0 && 317 source->at_eof) 318 { 319 if ((options & ISC_LEXOPT_EOF) != 0) { 320 tokenp->type = isc_tokentype_eof; 321 return (ISC_R_SUCCESS); 322 } 323 return (ISC_R_EOF); 324 } 325 326 isc_buffer_compact(source->pushback); 327 328 saved_options = options; 329 330 curr = lex->data; 331 *curr = '\0'; 332 333 prev = NULL; 334 remaining = lex->max_token; 335 336 if (source->is_file) 337 flockfile(source->input); 338 339 do { 340 if (isc_buffer_remaininglength(source->pushback) == 0) { 341 if (source->is_file) { 342 stream = source->input; 343 344 c = getc_unlocked(stream); 345 if (c == EOF) { 346 if (ferror(stream)) { 347 source->result = ISC_R_IOERROR; 348 result = source->result; 349 goto done; 350 } 351 source->at_eof = 1; 352 } 353 } else { 354 buffer = source->input; 355 356 if (buffer->current == buffer->used) { 357 c = EOF; 358 source->at_eof = 1; 359 } else { 360 c = *((unsigned char *)buffer->base + 361 buffer->current); 362 buffer->current++; 363 } 364 } 365 if (c != EOF) { 366 source->result = pushandgrow(source, c); 367 if (source->result != ISC_R_SUCCESS) { 368 result = source->result; 369 goto done; 370 } 371 } 372 } 373 374 if (!source->at_eof) { 375 if (state == lexstate_start) 376 /* Token has not started yet. */ 377 source->ignored = 378 isc_buffer_consumedlength(source->pushback); 379 c = isc_buffer_getuint8(source->pushback); 380 } else { 381 c = EOF; 382 } 383 384 if (c == '\n') 385 source->line++; 386 387 if (lex->comment_ok && !no_comments) { 388 if (c == '/' && 389 (lex->comments & 390 (ISC_LEXCOMMENT_C| 391 ISC_LEXCOMMENT_CPLUSPLUS)) != 0) { 392 saved_state = state; 393 state = lexstate_maybecomment; 394 no_comments = 1; 395 continue; 396 } else if (c == '#' && 397 ((lex->comments & ISC_LEXCOMMENT_SHELL) 398 != 0)) { 399 saved_state = state; 400 state = lexstate_eatline; 401 no_comments = 1; 402 continue; 403 } 404 } 405 406 no_read: 407 /* INSIST(c == EOF || (c >= 0 && c <= 255)); */ 408 switch (state) { 409 case lexstate_start: 410 if (c == EOF) { 411 lex->last_was_eol = 0; 412 if ((options & ISC_LEXOPT_EOF) == 0) { 413 result = ISC_R_EOF; 414 goto done; 415 } 416 tokenp->type = isc_tokentype_eof; 417 done = 1; 418 } else if (c == '\n') { 419 lex->last_was_eol = 1; 420 } else if (c == '"' && 421 (options & ISC_LEXOPT_QSTRING) != 0) { 422 lex->last_was_eol = 0; 423 no_comments = 1; 424 state = lexstate_qstring; 425 } else if (lex->specials[c]) { 426 lex->last_was_eol = 0; 427 tokenp->type = isc_tokentype_special; 428 tokenp->value.as_char = c; 429 done = 1; 430 } else { 431 lex->last_was_eol = 0; 432 state = lexstate_string; 433 goto no_read; 434 } 435 break; 436 case lexstate_string: 437 /* 438 * EOF needs to be checked before lex->specials[c] 439 * as lex->specials[EOF] is not a good idea. 440 */ 441 if (c == '\r' || c == '\n' || c == EOF || 442 (!escaped && 443 (c == ' ' || c == '\t' || lex->specials[c]))) { 444 pushback(source, c); 445 if (source->result != ISC_R_SUCCESS) { 446 result = source->result; 447 goto done; 448 } 449 tokenp->type = isc_tokentype_string; 450 tokenp->value.as_textregion.base = lex->data; 451 tokenp->value.as_textregion.length = 452 (unsigned int) 453 (lex->max_token - remaining); 454 done = 1; 455 continue; 456 } 457 if (remaining == 0U) { 458 result = grow_data(lex, &remaining, 459 &curr, &prev); 460 if (result != ISC_R_SUCCESS) 461 goto done; 462 } 463 INSIST(remaining > 0U); 464 *curr++ = c; 465 *curr = '\0'; 466 remaining--; 467 break; 468 case lexstate_maybecomment: 469 if (c == '*' && 470 (lex->comments & ISC_LEXCOMMENT_C) != 0) { 471 state = lexstate_ccomment; 472 continue; 473 } else if (c == '/' && 474 (lex->comments & ISC_LEXCOMMENT_CPLUSPLUS) != 0) { 475 state = lexstate_eatline; 476 continue; 477 } 478 pushback(source, c); 479 c = '/'; 480 no_comments = 0; 481 state = saved_state; 482 goto no_read; 483 case lexstate_ccomment: 484 if (c == EOF) { 485 result = ISC_R_UNEXPECTEDEND; 486 goto done; 487 } 488 if (c == '*') 489 state = lexstate_ccommentend; 490 break; 491 case lexstate_ccommentend: 492 if (c == EOF) { 493 result = ISC_R_UNEXPECTEDEND; 494 goto done; 495 } 496 if (c == '/') { 497 /* 498 * C-style comments become a single space. 499 * We do this to ensure that a comment will 500 * act as a delimiter for strings and 501 * numbers. 502 */ 503 c = ' '; 504 no_comments = 0; 505 state = saved_state; 506 goto no_read; 507 } else if (c != '*') 508 state = lexstate_ccomment; 509 break; 510 case lexstate_eatline: 511 if ((c == '\n') || (c == EOF)) { 512 no_comments = 0; 513 state = saved_state; 514 goto no_read; 515 } 516 break; 517 case lexstate_qstring: 518 if (c == EOF) { 519 result = ISC_R_UNEXPECTEDEND; 520 goto done; 521 } 522 if (c == '"') { 523 if (escaped) { 524 escaped = 0; 525 /* 526 * Overwrite the preceding backslash. 527 */ 528 INSIST(prev != NULL); 529 *prev = '"'; 530 } else { 531 tokenp->type = isc_tokentype_qstring; 532 tokenp->value.as_textregion.base = 533 lex->data; 534 tokenp->value.as_textregion.length = 535 (unsigned int) 536 (lex->max_token - remaining); 537 no_comments = 0; 538 done = 1; 539 } 540 } else { 541 if (c == '\n' && !escaped && 542 (options & ISC_LEXOPT_QSTRINGMULTILINE) == 0) { 543 pushback(source, c); 544 result = ISC_R_UNBALANCEDQUOTES; 545 goto done; 546 } 547 if (c == '\\' && !escaped) 548 escaped = 1; 549 else 550 escaped = 0; 551 if (remaining == 0U) { 552 result = grow_data(lex, &remaining, 553 &curr, &prev); 554 if (result != ISC_R_SUCCESS) 555 goto done; 556 } 557 INSIST(remaining > 0U); 558 prev = curr; 559 *curr++ = c; 560 *curr = '\0'; 561 remaining--; 562 } 563 break; 564 default: 565 FATAL_ERROR(__FILE__, __LINE__, "Unexpected state %d", 566 state); 567 /* Does not return. */ 568 } 569 570 } while (!done); 571 572 result = ISC_R_SUCCESS; 573 done: 574 if (source->is_file) 575 funlockfile(source->input); 576 return (result); 577 } 578 579 void 580 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp) { 581 inputsource *source; 582 /* 583 * Unget the current token. 584 */ 585 586 source = HEAD(lex->sources); 587 REQUIRE(source != NULL); 588 REQUIRE(tokenp != NULL); 589 REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 || 590 tokenp->type == isc_tokentype_eof); 591 592 UNUSED(tokenp); 593 594 isc_buffer_first(source->pushback); 595 lex->paren_count = lex->saved_paren_count; 596 source->line = source->saved_line; 597 source->at_eof = 0; 598 } 599 600 void 601 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r) 602 { 603 inputsource *source; 604 605 source = HEAD(lex->sources); 606 REQUIRE(source != NULL); 607 REQUIRE(tokenp != NULL); 608 REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 || 609 tokenp->type == isc_tokentype_eof); 610 611 UNUSED(tokenp); 612 613 INSIST(source->ignored <= isc_buffer_consumedlength(source->pushback)); 614 r->base = (unsigned char *)isc_buffer_base(source->pushback) + 615 source->ignored; 616 r->length = isc_buffer_consumedlength(source->pushback) - 617 source->ignored; 618 } 619 620 char * 621 isc_lex_getsourcename(isc_lex_t *lex) { 622 inputsource *source; 623 624 source = HEAD(lex->sources); 625 626 if (source == NULL) 627 return (NULL); 628 629 return (source->name); 630 } 631 632 unsigned long 633 isc_lex_getsourceline(isc_lex_t *lex) { 634 inputsource *source; 635 636 source = HEAD(lex->sources); 637 638 if (source == NULL) 639 return (0); 640 641 return (source->line); 642 } 643