1 /* 2 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 3 * 4 * Permission to use, copy, modify, and/or distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 10 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 13 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 14 * PERFORMANCE OF THIS SOFTWARE. 15 */ 16 17 /* $Id: lex.c,v 1.15 2022/06/25 12:14:18 jsg Exp $ */ 18 19 /*! \file */ 20 21 #include <stdlib.h> 22 23 #include <isc/buffer.h> 24 25 #include <isc/lex.h> 26 27 #include <errno.h> 28 #include <string.h> 29 #include <isc/util.h> 30 31 #include "unix/errno2result.h" 32 33 typedef struct inputsource { 34 isc_result_t result; 35 int is_file; 36 int need_close; 37 int at_eof; 38 int last_was_eol; 39 isc_buffer_t * pushback; 40 unsigned int ignored; 41 void * input; 42 char * name; 43 unsigned long line; 44 unsigned long saved_line; 45 ISC_LINK(struct inputsource) link; 46 } inputsource; 47 48 struct isc_lex { 49 /* Unlocked. */ 50 size_t max_token; 51 char * data; 52 unsigned int comments; 53 int comment_ok; 54 int last_was_eol; 55 unsigned int paren_count; 56 unsigned int saved_paren_count; 57 isc_lexspecials_t specials; 58 LIST(struct inputsource) sources; 59 }; 60 61 static inline isc_result_t 62 grow_data(isc_lex_t *lex, size_t *remainingp, char **currp, char **prevp) { 63 char *tmp; 64 65 tmp = malloc(lex->max_token * 2 + 1); 66 if (tmp == NULL) 67 return (ISC_R_NOMEMORY); 68 memmove(tmp, lex->data, lex->max_token + 1); 69 *currp = tmp + (*currp - lex->data); 70 if (*prevp != NULL) 71 *prevp = tmp + (*prevp - lex->data); 72 free(lex->data); 73 lex->data = tmp; 74 *remainingp += lex->max_token; 75 lex->max_token *= 2; 76 return (ISC_R_SUCCESS); 77 } 78 79 isc_result_t 80 isc_lex_create(size_t max_token, isc_lex_t **lexp) { 81 isc_lex_t *lex; 82 83 /* 84 * Create a lexer. 85 */ 86 REQUIRE(lexp != NULL && *lexp == NULL); 87 88 if (max_token == 0U) 89 max_token = 1; 90 91 lex = malloc(sizeof(*lex)); 92 if (lex == NULL) 93 return (ISC_R_NOMEMORY); 94 lex->data = malloc(max_token + 1); 95 if (lex->data == NULL) { 96 free(lex); 97 return (ISC_R_NOMEMORY); 98 } 99 lex->max_token = max_token; 100 lex->comments = 0; 101 lex->comment_ok = 1; 102 lex->last_was_eol = 1; 103 lex->paren_count = 0; 104 lex->saved_paren_count = 0; 105 memset(lex->specials, 0, 256); 106 INIT_LIST(lex->sources); 107 108 *lexp = lex; 109 110 return (ISC_R_SUCCESS); 111 } 112 113 void 114 isc_lex_destroy(isc_lex_t **lexp) { 115 isc_lex_t *lex; 116 117 /* 118 * Destroy the lexer. 119 */ 120 121 REQUIRE(lexp != NULL); 122 lex = *lexp; 123 124 while (!EMPTY(lex->sources)) 125 RUNTIME_CHECK(isc_lex_close(lex) == ISC_R_SUCCESS); 126 if (lex->data != NULL) 127 free(lex->data); 128 free(lex); 129 130 *lexp = NULL; 131 } 132 133 void 134 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments) { 135 /* 136 * Set allowed lexer commenting styles. 137 */ 138 139 lex->comments = comments; 140 } 141 142 void 143 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials) { 144 /* 145 * The characters in 'specials' are returned as tokens. Along with 146 * whitespace, they delimit strings and numbers. 147 */ 148 149 memmove(lex->specials, specials, 256); 150 } 151 152 static inline isc_result_t 153 new_source(isc_lex_t *lex, int is_file, int need_close, 154 void *input, const char *name) 155 { 156 inputsource *source; 157 isc_result_t result; 158 159 source = malloc(sizeof(*source)); 160 if (source == NULL) 161 return (ISC_R_NOMEMORY); 162 source->result = ISC_R_SUCCESS; 163 source->is_file = is_file; 164 source->need_close = need_close; 165 source->at_eof = 0; 166 source->last_was_eol = lex->last_was_eol; 167 source->input = input; 168 source->name = strdup(name); 169 if (source->name == NULL) { 170 free(source); 171 return (ISC_R_NOMEMORY); 172 } 173 source->pushback = NULL; 174 result = isc_buffer_allocate(&source->pushback, 175 (unsigned int)lex->max_token); 176 if (result != ISC_R_SUCCESS) { 177 free(source->name); 178 free(source); 179 return (result); 180 } 181 source->ignored = 0; 182 source->line = 1; 183 ISC_LIST_INITANDPREPEND(lex->sources, source, link); 184 185 return (ISC_R_SUCCESS); 186 } 187 188 isc_result_t 189 isc_lex_openfile(isc_lex_t *lex, const char *filename) { 190 isc_result_t result = ISC_R_SUCCESS; 191 FILE *stream = NULL; 192 193 /* 194 * Open 'filename' and make it the current input source for 'lex'. 195 */ 196 197 if ((stream = fopen(filename, "r")) == NULL) 198 return (isc__errno2result(errno)); 199 200 result = new_source(lex, 1, 1, stream, filename); 201 if (result != ISC_R_SUCCESS) 202 (void)fclose(stream); 203 return (result); 204 } 205 206 isc_result_t 207 isc_lex_close(isc_lex_t *lex) { 208 inputsource *source; 209 210 /* 211 * Close the most recently opened object (i.e. file or buffer). 212 */ 213 214 source = HEAD(lex->sources); 215 if (source == NULL) 216 return (ISC_R_NOMORE); 217 218 ISC_LIST_UNLINK(lex->sources, source, link); 219 lex->last_was_eol = source->last_was_eol; 220 if (source->is_file) { 221 if (source->need_close) 222 (void)fclose((FILE *)(source->input)); 223 } 224 free(source->name); 225 isc_buffer_free(&source->pushback); 226 free(source); 227 228 return (ISC_R_SUCCESS); 229 } 230 231 typedef enum { 232 lexstate_start, 233 lexstate_string, 234 lexstate_maybecomment, 235 lexstate_ccomment, 236 lexstate_ccommentend, 237 lexstate_eatline, 238 lexstate_qstring 239 } lexstate; 240 241 static void 242 pushback(inputsource *source, int c) { 243 REQUIRE(source->pushback->current > 0); 244 if (c == EOF) { 245 source->at_eof = 0; 246 return; 247 } 248 source->pushback->current--; 249 if (c == '\n') 250 source->line--; 251 } 252 253 static isc_result_t 254 pushandgrow(inputsource *source, int c) { 255 if (isc_buffer_availablelength(source->pushback) == 0) { 256 isc_buffer_t *tbuf = NULL; 257 unsigned int oldlen; 258 isc_region_t used; 259 isc_result_t result; 260 261 oldlen = isc_buffer_length(source->pushback); 262 result = isc_buffer_allocate(&tbuf, oldlen * 2); 263 if (result != ISC_R_SUCCESS) 264 return (result); 265 isc_buffer_usedregion(source->pushback, &used); 266 result = isc_buffer_copyregion(tbuf, &used); 267 INSIST(result == ISC_R_SUCCESS); 268 tbuf->current = source->pushback->current; 269 isc_buffer_free(&source->pushback); 270 source->pushback = tbuf; 271 } 272 isc_buffer_putuint8(source->pushback, (uint8_t)c); 273 return (ISC_R_SUCCESS); 274 } 275 276 isc_result_t 277 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp) { 278 inputsource *source; 279 int c; 280 int done = 0; 281 int no_comments = 0; 282 int escaped = 0; 283 lexstate state = lexstate_start; 284 lexstate saved_state = lexstate_start; 285 isc_buffer_t *buffer; 286 FILE *stream; 287 char *curr, *prev; 288 size_t remaining; 289 isc_result_t result; 290 291 /* 292 * Get the next token. 293 */ 294 295 source = HEAD(lex->sources); 296 REQUIRE(tokenp != NULL); 297 298 if (source == NULL) { 299 if ((options & ISC_LEXOPT_NOMORE) != 0) { 300 tokenp->type = isc_tokentype_nomore; 301 return (ISC_R_SUCCESS); 302 } 303 return (ISC_R_NOMORE); 304 } 305 306 if (source->result != ISC_R_SUCCESS) 307 return (source->result); 308 309 lex->saved_paren_count = lex->paren_count; 310 source->saved_line = source->line; 311 312 if (isc_buffer_remaininglength(source->pushback) == 0 && 313 source->at_eof) 314 { 315 if ((options & ISC_LEXOPT_EOF) != 0) { 316 tokenp->type = isc_tokentype_eof; 317 return (ISC_R_SUCCESS); 318 } 319 return (ISC_R_EOF); 320 } 321 322 isc_buffer_compact(source->pushback); 323 324 curr = lex->data; 325 *curr = '\0'; 326 327 prev = NULL; 328 remaining = lex->max_token; 329 330 if (source->is_file) 331 flockfile(source->input); 332 333 do { 334 if (isc_buffer_remaininglength(source->pushback) == 0) { 335 if (source->is_file) { 336 stream = source->input; 337 338 c = getc_unlocked(stream); 339 if (c == EOF) { 340 if (ferror(stream)) { 341 source->result = ISC_R_IOERROR; 342 result = source->result; 343 goto done; 344 } 345 source->at_eof = 1; 346 } 347 } else { 348 buffer = source->input; 349 350 if (buffer->current == buffer->used) { 351 c = EOF; 352 source->at_eof = 1; 353 } else { 354 c = *((unsigned char *)buffer->base + 355 buffer->current); 356 buffer->current++; 357 } 358 } 359 if (c != EOF) { 360 source->result = pushandgrow(source, c); 361 if (source->result != ISC_R_SUCCESS) { 362 result = source->result; 363 goto done; 364 } 365 } 366 } 367 368 if (!source->at_eof) { 369 if (state == lexstate_start) 370 /* Token has not started yet. */ 371 source->ignored = 372 isc_buffer_consumedlength(source->pushback); 373 c = isc_buffer_getuint8(source->pushback); 374 } else { 375 c = EOF; 376 } 377 378 if (c == '\n') 379 source->line++; 380 381 if (lex->comment_ok && !no_comments) { 382 if (c == '/' && 383 (lex->comments & 384 (ISC_LEXCOMMENT_C| 385 ISC_LEXCOMMENT_CPLUSPLUS)) != 0) { 386 saved_state = state; 387 state = lexstate_maybecomment; 388 no_comments = 1; 389 continue; 390 } else if (c == '#' && 391 ((lex->comments & ISC_LEXCOMMENT_SHELL) 392 != 0)) { 393 saved_state = state; 394 state = lexstate_eatline; 395 no_comments = 1; 396 continue; 397 } 398 } 399 400 no_read: 401 /* INSIST(c == EOF || (c >= 0 && c <= 255)); */ 402 switch (state) { 403 case lexstate_start: 404 if (c == EOF) { 405 lex->last_was_eol = 0; 406 if ((options & ISC_LEXOPT_EOF) == 0) { 407 result = ISC_R_EOF; 408 goto done; 409 } 410 tokenp->type = isc_tokentype_eof; 411 done = 1; 412 } else if (c == ' ' || c == '\t') { 413 lex->last_was_eol = 0; 414 } else if (c == '\n') { 415 lex->last_was_eol = 1; 416 } else if (c == '\r') { 417 lex->last_was_eol = 0; 418 } else if (c == '"' && 419 (options & ISC_LEXOPT_QSTRING) != 0) { 420 lex->last_was_eol = 0; 421 no_comments = 1; 422 state = lexstate_qstring; 423 } else if (lex->specials[c]) { 424 lex->last_was_eol = 0; 425 tokenp->type = isc_tokentype_special; 426 tokenp->value.as_char = c; 427 done = 1; 428 } else { 429 lex->last_was_eol = 0; 430 state = lexstate_string; 431 goto no_read; 432 } 433 break; 434 case lexstate_string: 435 /* 436 * EOF needs to be checked before lex->specials[c] 437 * as lex->specials[EOF] is not a good idea. 438 */ 439 if (c == '\r' || c == '\n' || c == EOF || 440 (!escaped && 441 (c == ' ' || c == '\t' || lex->specials[c]))) { 442 pushback(source, c); 443 if (source->result != ISC_R_SUCCESS) { 444 result = source->result; 445 goto done; 446 } 447 tokenp->type = isc_tokentype_string; 448 tokenp->value.as_textregion.base = lex->data; 449 tokenp->value.as_textregion.length = 450 (unsigned int) 451 (lex->max_token - remaining); 452 done = 1; 453 continue; 454 } 455 if (remaining == 0U) { 456 result = grow_data(lex, &remaining, 457 &curr, &prev); 458 if (result != ISC_R_SUCCESS) 459 goto done; 460 } 461 INSIST(remaining > 0U); 462 *curr++ = c; 463 *curr = '\0'; 464 remaining--; 465 break; 466 case lexstate_maybecomment: 467 if (c == '*' && 468 (lex->comments & ISC_LEXCOMMENT_C) != 0) { 469 state = lexstate_ccomment; 470 continue; 471 } else if (c == '/' && 472 (lex->comments & ISC_LEXCOMMENT_CPLUSPLUS) != 0) { 473 state = lexstate_eatline; 474 continue; 475 } 476 pushback(source, c); 477 c = '/'; 478 no_comments = 0; 479 state = saved_state; 480 goto no_read; 481 case lexstate_ccomment: 482 if (c == EOF) { 483 result = ISC_R_UNEXPECTEDEND; 484 goto done; 485 } 486 if (c == '*') 487 state = lexstate_ccommentend; 488 break; 489 case lexstate_ccommentend: 490 if (c == EOF) { 491 result = ISC_R_UNEXPECTEDEND; 492 goto done; 493 } 494 if (c == '/') { 495 /* 496 * C-style comments become a single space. 497 * We do this to ensure that a comment will 498 * act as a delimiter for strings and 499 * numbers. 500 */ 501 c = ' '; 502 no_comments = 0; 503 state = saved_state; 504 goto no_read; 505 } else if (c != '*') 506 state = lexstate_ccomment; 507 break; 508 case lexstate_eatline: 509 if ((c == '\n') || (c == EOF)) { 510 no_comments = 0; 511 state = saved_state; 512 goto no_read; 513 } 514 break; 515 case lexstate_qstring: 516 if (c == EOF) { 517 result = ISC_R_UNEXPECTEDEND; 518 goto done; 519 } 520 if (c == '"') { 521 if (escaped) { 522 escaped = 0; 523 /* 524 * Overwrite the preceding backslash. 525 */ 526 INSIST(prev != NULL); 527 *prev = '"'; 528 } else { 529 tokenp->type = isc_tokentype_qstring; 530 tokenp->value.as_textregion.base = 531 lex->data; 532 tokenp->value.as_textregion.length = 533 (unsigned int) 534 (lex->max_token - remaining); 535 no_comments = 0; 536 done = 1; 537 } 538 } else { 539 if (c == '\n' && !escaped && 540 (options & ISC_LEXOPT_QSTRINGMULTILINE) == 0) { 541 pushback(source, c); 542 result = ISC_R_UNBALANCEDQUOTES; 543 goto done; 544 } 545 if (c == '\\' && !escaped) 546 escaped = 1; 547 else 548 escaped = 0; 549 if (remaining == 0U) { 550 result = grow_data(lex, &remaining, 551 &curr, &prev); 552 if (result != ISC_R_SUCCESS) 553 goto done; 554 } 555 INSIST(remaining > 0U); 556 prev = curr; 557 *curr++ = c; 558 *curr = '\0'; 559 remaining--; 560 } 561 break; 562 default: 563 FATAL_ERROR(__FILE__, __LINE__, "Unexpected state %d", 564 state); 565 /* Does not return. */ 566 } 567 568 } while (!done); 569 570 result = ISC_R_SUCCESS; 571 done: 572 if (source->is_file) 573 funlockfile(source->input); 574 return (result); 575 } 576 577 void 578 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp) { 579 inputsource *source; 580 /* 581 * Unget the current token. 582 */ 583 584 source = HEAD(lex->sources); 585 REQUIRE(source != NULL); 586 REQUIRE(tokenp != NULL); 587 REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 || 588 tokenp->type == isc_tokentype_eof); 589 590 UNUSED(tokenp); 591 592 isc_buffer_first(source->pushback); 593 lex->paren_count = lex->saved_paren_count; 594 source->line = source->saved_line; 595 source->at_eof = 0; 596 } 597 598 void 599 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r) 600 { 601 inputsource *source; 602 603 source = HEAD(lex->sources); 604 REQUIRE(source != NULL); 605 REQUIRE(tokenp != NULL); 606 REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 || 607 tokenp->type == isc_tokentype_eof); 608 609 UNUSED(tokenp); 610 611 INSIST(source->ignored <= isc_buffer_consumedlength(source->pushback)); 612 r->base = (unsigned char *)isc_buffer_base(source->pushback) + 613 source->ignored; 614 r->length = isc_buffer_consumedlength(source->pushback) - 615 source->ignored; 616 } 617 618 char * 619 isc_lex_getsourcename(isc_lex_t *lex) { 620 inputsource *source; 621 622 source = HEAD(lex->sources); 623 624 if (source == NULL) 625 return (NULL); 626 627 return (source->name); 628 } 629 630 unsigned long 631 isc_lex_getsourceline(isc_lex_t *lex) { 632 inputsource *source; 633 634 source = HEAD(lex->sources); 635 636 if (source == NULL) 637 return (0); 638 639 return (source->line); 640 } 641