1 /* 2 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 3 * 4 * Permission to use, copy, modify, and/or distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 10 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 13 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 14 * PERFORMANCE OF THIS SOFTWARE. 15 */ 16 17 /* $Id: lex.c,v 1.14 2022/01/17 18:19:51 naddy Exp $ */ 18 19 /*! \file */ 20 21 #include <ctype.h> 22 #include <stdlib.h> 23 24 #include <isc/buffer.h> 25 26 #include <isc/lex.h> 27 28 #include <errno.h> 29 #include <string.h> 30 #include <isc/util.h> 31 32 #include "unix/errno2result.h" 33 34 typedef struct inputsource { 35 isc_result_t result; 36 int is_file; 37 int need_close; 38 int at_eof; 39 int last_was_eol; 40 isc_buffer_t * pushback; 41 unsigned int ignored; 42 void * input; 43 char * name; 44 unsigned long line; 45 unsigned long saved_line; 46 ISC_LINK(struct inputsource) link; 47 } inputsource; 48 49 struct isc_lex { 50 /* Unlocked. */ 51 size_t max_token; 52 char * data; 53 unsigned int comments; 54 int comment_ok; 55 int last_was_eol; 56 unsigned int paren_count; 57 unsigned int saved_paren_count; 58 isc_lexspecials_t specials; 59 LIST(struct inputsource) sources; 60 }; 61 62 static inline isc_result_t 63 grow_data(isc_lex_t *lex, size_t *remainingp, char **currp, char **prevp) { 64 char *tmp; 65 66 tmp = malloc(lex->max_token * 2 + 1); 67 if (tmp == NULL) 68 return (ISC_R_NOMEMORY); 69 memmove(tmp, lex->data, lex->max_token + 1); 70 *currp = tmp + (*currp - lex->data); 71 if (*prevp != NULL) 72 *prevp = tmp + (*prevp - lex->data); 73 free(lex->data); 74 lex->data = tmp; 75 *remainingp += lex->max_token; 76 lex->max_token *= 2; 77 return (ISC_R_SUCCESS); 78 } 79 80 isc_result_t 81 isc_lex_create(size_t max_token, isc_lex_t **lexp) { 82 isc_lex_t *lex; 83 84 /* 85 * Create a lexer. 86 */ 87 REQUIRE(lexp != NULL && *lexp == NULL); 88 89 if (max_token == 0U) 90 max_token = 1; 91 92 lex = malloc(sizeof(*lex)); 93 if (lex == NULL) 94 return (ISC_R_NOMEMORY); 95 lex->data = malloc(max_token + 1); 96 if (lex->data == NULL) { 97 free(lex); 98 return (ISC_R_NOMEMORY); 99 } 100 lex->max_token = max_token; 101 lex->comments = 0; 102 lex->comment_ok = 1; 103 lex->last_was_eol = 1; 104 lex->paren_count = 0; 105 lex->saved_paren_count = 0; 106 memset(lex->specials, 0, 256); 107 INIT_LIST(lex->sources); 108 109 *lexp = lex; 110 111 return (ISC_R_SUCCESS); 112 } 113 114 void 115 isc_lex_destroy(isc_lex_t **lexp) { 116 isc_lex_t *lex; 117 118 /* 119 * Destroy the lexer. 120 */ 121 122 REQUIRE(lexp != NULL); 123 lex = *lexp; 124 125 while (!EMPTY(lex->sources)) 126 RUNTIME_CHECK(isc_lex_close(lex) == ISC_R_SUCCESS); 127 if (lex->data != NULL) 128 free(lex->data); 129 free(lex); 130 131 *lexp = NULL; 132 } 133 134 void 135 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments) { 136 /* 137 * Set allowed lexer commenting styles. 138 */ 139 140 lex->comments = comments; 141 } 142 143 void 144 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials) { 145 /* 146 * The characters in 'specials' are returned as tokens. Along with 147 * whitespace, they delimit strings and numbers. 148 */ 149 150 memmove(lex->specials, specials, 256); 151 } 152 153 static inline isc_result_t 154 new_source(isc_lex_t *lex, int is_file, int need_close, 155 void *input, const char *name) 156 { 157 inputsource *source; 158 isc_result_t result; 159 160 source = malloc(sizeof(*source)); 161 if (source == NULL) 162 return (ISC_R_NOMEMORY); 163 source->result = ISC_R_SUCCESS; 164 source->is_file = is_file; 165 source->need_close = need_close; 166 source->at_eof = 0; 167 source->last_was_eol = lex->last_was_eol; 168 source->input = input; 169 source->name = strdup(name); 170 if (source->name == NULL) { 171 free(source); 172 return (ISC_R_NOMEMORY); 173 } 174 source->pushback = NULL; 175 result = isc_buffer_allocate(&source->pushback, 176 (unsigned int)lex->max_token); 177 if (result != ISC_R_SUCCESS) { 178 free(source->name); 179 free(source); 180 return (result); 181 } 182 source->ignored = 0; 183 source->line = 1; 184 ISC_LIST_INITANDPREPEND(lex->sources, source, link); 185 186 return (ISC_R_SUCCESS); 187 } 188 189 isc_result_t 190 isc_lex_openfile(isc_lex_t *lex, const char *filename) { 191 isc_result_t result = ISC_R_SUCCESS; 192 FILE *stream = NULL; 193 194 /* 195 * Open 'filename' and make it the current input source for 'lex'. 196 */ 197 198 if ((stream = fopen(filename, "r")) == NULL) 199 return (isc__errno2result(errno)); 200 201 result = new_source(lex, 1, 1, stream, filename); 202 if (result != ISC_R_SUCCESS) 203 (void)fclose(stream); 204 return (result); 205 } 206 207 isc_result_t 208 isc_lex_close(isc_lex_t *lex) { 209 inputsource *source; 210 211 /* 212 * Close the most recently opened object (i.e. file or buffer). 213 */ 214 215 source = HEAD(lex->sources); 216 if (source == NULL) 217 return (ISC_R_NOMORE); 218 219 ISC_LIST_UNLINK(lex->sources, source, link); 220 lex->last_was_eol = source->last_was_eol; 221 if (source->is_file) { 222 if (source->need_close) 223 (void)fclose((FILE *)(source->input)); 224 } 225 free(source->name); 226 isc_buffer_free(&source->pushback); 227 free(source); 228 229 return (ISC_R_SUCCESS); 230 } 231 232 typedef enum { 233 lexstate_start, 234 lexstate_string, 235 lexstate_maybecomment, 236 lexstate_ccomment, 237 lexstate_ccommentend, 238 lexstate_eatline, 239 lexstate_qstring 240 } lexstate; 241 242 #define IWSEOL (ISC_LEXOPT_INITIALWS | ISC_LEXOPT_EOL) 243 244 static void 245 pushback(inputsource *source, int c) { 246 REQUIRE(source->pushback->current > 0); 247 if (c == EOF) { 248 source->at_eof = 0; 249 return; 250 } 251 source->pushback->current--; 252 if (c == '\n') 253 source->line--; 254 } 255 256 static isc_result_t 257 pushandgrow(inputsource *source, int c) { 258 if (isc_buffer_availablelength(source->pushback) == 0) { 259 isc_buffer_t *tbuf = NULL; 260 unsigned int oldlen; 261 isc_region_t used; 262 isc_result_t result; 263 264 oldlen = isc_buffer_length(source->pushback); 265 result = isc_buffer_allocate(&tbuf, oldlen * 2); 266 if (result != ISC_R_SUCCESS) 267 return (result); 268 isc_buffer_usedregion(source->pushback, &used); 269 result = isc_buffer_copyregion(tbuf, &used); 270 INSIST(result == ISC_R_SUCCESS); 271 tbuf->current = source->pushback->current; 272 isc_buffer_free(&source->pushback); 273 source->pushback = tbuf; 274 } 275 isc_buffer_putuint8(source->pushback, (uint8_t)c); 276 return (ISC_R_SUCCESS); 277 } 278 279 isc_result_t 280 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp) { 281 inputsource *source; 282 int c; 283 int done = 0; 284 int no_comments = 0; 285 int escaped = 0; 286 lexstate state = lexstate_start; 287 lexstate saved_state = lexstate_start; 288 isc_buffer_t *buffer; 289 FILE *stream; 290 char *curr, *prev; 291 size_t remaining; 292 isc_result_t result; 293 294 /* 295 * Get the next token. 296 */ 297 298 source = HEAD(lex->sources); 299 REQUIRE(tokenp != NULL); 300 301 if (source == NULL) { 302 if ((options & ISC_LEXOPT_NOMORE) != 0) { 303 tokenp->type = isc_tokentype_nomore; 304 return (ISC_R_SUCCESS); 305 } 306 return (ISC_R_NOMORE); 307 } 308 309 if (source->result != ISC_R_SUCCESS) 310 return (source->result); 311 312 lex->saved_paren_count = lex->paren_count; 313 source->saved_line = source->line; 314 315 if (isc_buffer_remaininglength(source->pushback) == 0 && 316 source->at_eof) 317 { 318 if ((options & ISC_LEXOPT_EOF) != 0) { 319 tokenp->type = isc_tokentype_eof; 320 return (ISC_R_SUCCESS); 321 } 322 return (ISC_R_EOF); 323 } 324 325 isc_buffer_compact(source->pushback); 326 327 curr = lex->data; 328 *curr = '\0'; 329 330 prev = NULL; 331 remaining = lex->max_token; 332 333 if (source->is_file) 334 flockfile(source->input); 335 336 do { 337 if (isc_buffer_remaininglength(source->pushback) == 0) { 338 if (source->is_file) { 339 stream = source->input; 340 341 c = getc_unlocked(stream); 342 if (c == EOF) { 343 if (ferror(stream)) { 344 source->result = ISC_R_IOERROR; 345 result = source->result; 346 goto done; 347 } 348 source->at_eof = 1; 349 } 350 } else { 351 buffer = source->input; 352 353 if (buffer->current == buffer->used) { 354 c = EOF; 355 source->at_eof = 1; 356 } else { 357 c = *((unsigned char *)buffer->base + 358 buffer->current); 359 buffer->current++; 360 } 361 } 362 if (c != EOF) { 363 source->result = pushandgrow(source, c); 364 if (source->result != ISC_R_SUCCESS) { 365 result = source->result; 366 goto done; 367 } 368 } 369 } 370 371 if (!source->at_eof) { 372 if (state == lexstate_start) 373 /* Token has not started yet. */ 374 source->ignored = 375 isc_buffer_consumedlength(source->pushback); 376 c = isc_buffer_getuint8(source->pushback); 377 } else { 378 c = EOF; 379 } 380 381 if (c == '\n') 382 source->line++; 383 384 if (lex->comment_ok && !no_comments) { 385 if (c == '/' && 386 (lex->comments & 387 (ISC_LEXCOMMENT_C| 388 ISC_LEXCOMMENT_CPLUSPLUS)) != 0) { 389 saved_state = state; 390 state = lexstate_maybecomment; 391 no_comments = 1; 392 continue; 393 } else if (c == '#' && 394 ((lex->comments & ISC_LEXCOMMENT_SHELL) 395 != 0)) { 396 saved_state = state; 397 state = lexstate_eatline; 398 no_comments = 1; 399 continue; 400 } 401 } 402 403 no_read: 404 /* INSIST(c == EOF || (c >= 0 && c <= 255)); */ 405 switch (state) { 406 case lexstate_start: 407 if (c == EOF) { 408 lex->last_was_eol = 0; 409 if ((options & ISC_LEXOPT_EOF) == 0) { 410 result = ISC_R_EOF; 411 goto done; 412 } 413 tokenp->type = isc_tokentype_eof; 414 done = 1; 415 } else if (c == ' ' || c == '\t') { 416 lex->last_was_eol = 0; 417 } else if (c == '\n') { 418 lex->last_was_eol = 1; 419 } else if (c == '\r') { 420 lex->last_was_eol = 0; 421 } else if (c == '"' && 422 (options & ISC_LEXOPT_QSTRING) != 0) { 423 lex->last_was_eol = 0; 424 no_comments = 1; 425 state = lexstate_qstring; 426 } else if (lex->specials[c]) { 427 lex->last_was_eol = 0; 428 tokenp->type = isc_tokentype_special; 429 tokenp->value.as_char = c; 430 done = 1; 431 } else { 432 lex->last_was_eol = 0; 433 state = lexstate_string; 434 goto no_read; 435 } 436 break; 437 case lexstate_string: 438 /* 439 * EOF needs to be checked before lex->specials[c] 440 * as lex->specials[EOF] is not a good idea. 441 */ 442 if (c == '\r' || c == '\n' || c == EOF || 443 (!escaped && 444 (c == ' ' || c == '\t' || lex->specials[c]))) { 445 pushback(source, c); 446 if (source->result != ISC_R_SUCCESS) { 447 result = source->result; 448 goto done; 449 } 450 tokenp->type = isc_tokentype_string; 451 tokenp->value.as_textregion.base = lex->data; 452 tokenp->value.as_textregion.length = 453 (unsigned int) 454 (lex->max_token - remaining); 455 done = 1; 456 continue; 457 } 458 if (remaining == 0U) { 459 result = grow_data(lex, &remaining, 460 &curr, &prev); 461 if (result != ISC_R_SUCCESS) 462 goto done; 463 } 464 INSIST(remaining > 0U); 465 *curr++ = c; 466 *curr = '\0'; 467 remaining--; 468 break; 469 case lexstate_maybecomment: 470 if (c == '*' && 471 (lex->comments & ISC_LEXCOMMENT_C) != 0) { 472 state = lexstate_ccomment; 473 continue; 474 } else if (c == '/' && 475 (lex->comments & ISC_LEXCOMMENT_CPLUSPLUS) != 0) { 476 state = lexstate_eatline; 477 continue; 478 } 479 pushback(source, c); 480 c = '/'; 481 no_comments = 0; 482 state = saved_state; 483 goto no_read; 484 case lexstate_ccomment: 485 if (c == EOF) { 486 result = ISC_R_UNEXPECTEDEND; 487 goto done; 488 } 489 if (c == '*') 490 state = lexstate_ccommentend; 491 break; 492 case lexstate_ccommentend: 493 if (c == EOF) { 494 result = ISC_R_UNEXPECTEDEND; 495 goto done; 496 } 497 if (c == '/') { 498 /* 499 * C-style comments become a single space. 500 * We do this to ensure that a comment will 501 * act as a delimiter for strings and 502 * numbers. 503 */ 504 c = ' '; 505 no_comments = 0; 506 state = saved_state; 507 goto no_read; 508 } else if (c != '*') 509 state = lexstate_ccomment; 510 break; 511 case lexstate_eatline: 512 if ((c == '\n') || (c == EOF)) { 513 no_comments = 0; 514 state = saved_state; 515 goto no_read; 516 } 517 break; 518 case lexstate_qstring: 519 if (c == EOF) { 520 result = ISC_R_UNEXPECTEDEND; 521 goto done; 522 } 523 if (c == '"') { 524 if (escaped) { 525 escaped = 0; 526 /* 527 * Overwrite the preceding backslash. 528 */ 529 INSIST(prev != NULL); 530 *prev = '"'; 531 } else { 532 tokenp->type = isc_tokentype_qstring; 533 tokenp->value.as_textregion.base = 534 lex->data; 535 tokenp->value.as_textregion.length = 536 (unsigned int) 537 (lex->max_token - remaining); 538 no_comments = 0; 539 done = 1; 540 } 541 } else { 542 if (c == '\n' && !escaped && 543 (options & ISC_LEXOPT_QSTRINGMULTILINE) == 0) { 544 pushback(source, c); 545 result = ISC_R_UNBALANCEDQUOTES; 546 goto done; 547 } 548 if (c == '\\' && !escaped) 549 escaped = 1; 550 else 551 escaped = 0; 552 if (remaining == 0U) { 553 result = grow_data(lex, &remaining, 554 &curr, &prev); 555 if (result != ISC_R_SUCCESS) 556 goto done; 557 } 558 INSIST(remaining > 0U); 559 prev = curr; 560 *curr++ = c; 561 *curr = '\0'; 562 remaining--; 563 } 564 break; 565 default: 566 FATAL_ERROR(__FILE__, __LINE__, "Unexpected state %d", 567 state); 568 /* Does not return. */ 569 } 570 571 } while (!done); 572 573 result = ISC_R_SUCCESS; 574 done: 575 if (source->is_file) 576 funlockfile(source->input); 577 return (result); 578 } 579 580 void 581 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp) { 582 inputsource *source; 583 /* 584 * Unget the current token. 585 */ 586 587 source = HEAD(lex->sources); 588 REQUIRE(source != NULL); 589 REQUIRE(tokenp != NULL); 590 REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 || 591 tokenp->type == isc_tokentype_eof); 592 593 UNUSED(tokenp); 594 595 isc_buffer_first(source->pushback); 596 lex->paren_count = lex->saved_paren_count; 597 source->line = source->saved_line; 598 source->at_eof = 0; 599 } 600 601 void 602 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r) 603 { 604 inputsource *source; 605 606 source = HEAD(lex->sources); 607 REQUIRE(source != NULL); 608 REQUIRE(tokenp != NULL); 609 REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 || 610 tokenp->type == isc_tokentype_eof); 611 612 UNUSED(tokenp); 613 614 INSIST(source->ignored <= isc_buffer_consumedlength(source->pushback)); 615 r->base = (unsigned char *)isc_buffer_base(source->pushback) + 616 source->ignored; 617 r->length = isc_buffer_consumedlength(source->pushback) - 618 source->ignored; 619 } 620 621 char * 622 isc_lex_getsourcename(isc_lex_t *lex) { 623 inputsource *source; 624 625 source = HEAD(lex->sources); 626 627 if (source == NULL) 628 return (NULL); 629 630 return (source->name); 631 } 632 633 unsigned long 634 isc_lex_getsourceline(isc_lex_t *lex) { 635 inputsource *source; 636 637 source = HEAD(lex->sources); 638 639 if (source == NULL) 640 return (0); 641 642 return (source->line); 643 } 644