1 /* Data and functions related to line maps and input files. 2 Copyright (C) 2004-2020 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it under 7 the terms of the GNU General Public License as published by the Free 8 Software Foundation; either version 3, or (at your option) any later 9 version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 12 WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #include "config.h" 21 #include "system.h" 22 #include "coretypes.h" 23 #include "intl.h" 24 #include "diagnostic.h" 25 #include "diagnostic-core.h" 26 #include "selftest.h" 27 #include "cpplib.h" 28 29 #ifndef HAVE_ICONV 30 #define HAVE_ICONV 0 31 #endif 32 33 /* This is a cache used by get_next_line to store the content of a 34 file to be searched for file lines. */ 35 class fcache 36 { 37 public: 38 /* These are information used to store a line boundary. */ 39 class line_info 40 { 41 public: 42 /* The line number. It starts from 1. */ 43 size_t line_num; 44 45 /* The position (byte count) of the beginning of the line, 46 relative to the file data pointer. This starts at zero. */ 47 size_t start_pos; 48 49 /* The position (byte count) of the last byte of the line. This 50 normally points to the '\n' character, or to one byte after the 51 last byte of the file, if the file doesn't contain a '\n' 52 character. */ 53 size_t end_pos; 54 55 line_info (size_t l, size_t s, size_t e) 56 : line_num (l), start_pos (s), end_pos (e) 57 {} 58 59 line_info () 60 :line_num (0), start_pos (0), end_pos (0) 61 {} 62 }; 63 64 /* The number of time this file has been accessed. This is used 65 to designate which file cache to evict from the cache 66 array. */ 67 unsigned use_count; 68 69 /* The file_path is the key for identifying a particular file in 70 the cache. 71 For libcpp-using code, the underlying buffer for this field is 72 owned by the corresponding _cpp_file within the cpp_reader. */ 73 const char *file_path; 74 75 FILE *fp; 76 77 /* This points to the content of the file that we've read so 78 far. */ 79 char *data; 80 81 /* The size of the DATA array above.*/ 82 size_t size; 83 84 /* The number of bytes read from the underlying file so far. This 85 must be less (or equal) than SIZE above. */ 86 size_t nb_read; 87 88 /* The index of the beginning of the current line. */ 89 size_t line_start_idx; 90 91 /* The number of the previous line read. This starts at 1. Zero 92 means we've read no line so far. */ 93 size_t line_num; 94 95 /* This is the total number of lines of the current file. At the 96 moment, we try to get this information from the line map 97 subsystem. Note that this is just a hint. When using the C++ 98 front-end, this hint is correct because the input file is then 99 completely tokenized before parsing starts; so the line map knows 100 the number of lines before compilation really starts. For e.g, 101 the C front-end, it can happen that we start emitting diagnostics 102 before the line map has seen the end of the file. */ 103 size_t total_lines; 104 105 /* Could this file be missing a trailing newline on its final line? 106 Initially true (to cope with empty files), set to true/false 107 as each line is read. */ 108 bool missing_trailing_newline; 109 110 /* This is a record of the beginning and end of the lines we've seen 111 while reading the file. This is useful to avoid walking the data 112 from the beginning when we are asked to read a line that is 113 before LINE_START_IDX above. Note that the maximum size of this 114 record is fcache_line_record_size, so that the memory consumption 115 doesn't explode. We thus scale total_lines down to 116 fcache_line_record_size. */ 117 vec<line_info, va_heap> line_record; 118 119 fcache (); 120 ~fcache (); 121 }; 122 123 /* Current position in real source file. */ 124 125 location_t input_location = UNKNOWN_LOCATION; 126 127 class line_maps *line_table; 128 129 /* A stashed copy of "line_table" for use by selftest::line_table_test. 130 This needs to be a global so that it can be a GC root, and thus 131 prevent the stashed copy from being garbage-collected if the GC runs 132 during a line_table_test. */ 133 134 class line_maps *saved_line_table; 135 136 static fcache *fcache_tab; 137 static const size_t fcache_tab_size = 16; 138 static const size_t fcache_buffer_size = 4 * 1024; 139 static const size_t fcache_line_record_size = 100; 140 141 /* Expand the source location LOC into a human readable location. If 142 LOC resolves to a builtin location, the file name of the readable 143 location is set to the string "<built-in>". If EXPANSION_POINT_P is 144 TRUE and LOC is virtual, then it is resolved to the expansion 145 point of the involved macro. Otherwise, it is resolved to the 146 spelling location of the token. 147 148 When resolving to the spelling location of the token, if the 149 resulting location is for a built-in location (that is, it has no 150 associated line/column) in the context of a macro expansion, the 151 returned location is the first one (while unwinding the macro 152 location towards its expansion point) that is in real source 153 code. 154 155 ASPECT controls which part of the location to use. */ 156 157 static expanded_location 158 expand_location_1 (location_t loc, 159 bool expansion_point_p, 160 enum location_aspect aspect) 161 { 162 expanded_location xloc; 163 const line_map_ordinary *map; 164 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT; 165 tree block = NULL; 166 167 if (IS_ADHOC_LOC (loc)) 168 { 169 block = LOCATION_BLOCK (loc); 170 loc = LOCATION_LOCUS (loc); 171 } 172 173 memset (&xloc, 0, sizeof (xloc)); 174 175 if (loc >= RESERVED_LOCATION_COUNT) 176 { 177 if (!expansion_point_p) 178 { 179 /* We want to resolve LOC to its spelling location. 180 181 But if that spelling location is a reserved location that 182 appears in the context of a macro expansion (like for a 183 location for a built-in token), let's consider the first 184 location (toward the expansion point) that is not reserved; 185 that is, the first location that is in real source code. */ 186 loc = linemap_unwind_to_first_non_reserved_loc (line_table, 187 loc, NULL); 188 lrk = LRK_SPELLING_LOCATION; 189 } 190 loc = linemap_resolve_location (line_table, loc, lrk, &map); 191 192 /* loc is now either in an ordinary map, or is a reserved location. 193 If it is a compound location, the caret is in a spelling location, 194 but the start/finish might still be a virtual location. 195 Depending of what the caller asked for, we may need to recurse 196 one level in order to resolve any virtual locations in the 197 end-points. */ 198 switch (aspect) 199 { 200 default: 201 gcc_unreachable (); 202 /* Fall through. */ 203 case LOCATION_ASPECT_CARET: 204 break; 205 case LOCATION_ASPECT_START: 206 { 207 location_t start = get_start (loc); 208 if (start != loc) 209 return expand_location_1 (start, expansion_point_p, aspect); 210 } 211 break; 212 case LOCATION_ASPECT_FINISH: 213 { 214 location_t finish = get_finish (loc); 215 if (finish != loc) 216 return expand_location_1 (finish, expansion_point_p, aspect); 217 } 218 break; 219 } 220 xloc = linemap_expand_location (line_table, map, loc); 221 } 222 223 xloc.data = block; 224 if (loc <= BUILTINS_LOCATION) 225 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>"); 226 227 return xloc; 228 } 229 230 /* Initialize the set of cache used for files accessed by caret 231 diagnostic. */ 232 233 static void 234 diagnostic_file_cache_init (void) 235 { 236 if (fcache_tab == NULL) 237 fcache_tab = new fcache[fcache_tab_size]; 238 } 239 240 /* Free the resources used by the set of cache used for files accessed 241 by caret diagnostic. */ 242 243 void 244 diagnostic_file_cache_fini (void) 245 { 246 if (fcache_tab) 247 { 248 delete [] (fcache_tab); 249 fcache_tab = NULL; 250 } 251 } 252 253 /* Return the total lines number that have been read so far by the 254 line map (in the preprocessor) so far. For languages like C++ that 255 entirely preprocess the input file before starting to parse, this 256 equals the actual number of lines of the file. */ 257 258 static size_t 259 total_lines_num (const char *file_path) 260 { 261 size_t r = 0; 262 location_t l = 0; 263 if (linemap_get_file_highest_location (line_table, file_path, &l)) 264 { 265 gcc_assert (l >= RESERVED_LOCATION_COUNT); 266 expanded_location xloc = expand_location (l); 267 r = xloc.line; 268 } 269 return r; 270 } 271 272 /* Lookup the cache used for the content of a given file accessed by 273 caret diagnostic. Return the found cached file, or NULL if no 274 cached file was found. */ 275 276 static fcache* 277 lookup_file_in_cache_tab (const char *file_path) 278 { 279 if (file_path == NULL) 280 return NULL; 281 282 diagnostic_file_cache_init (); 283 284 /* This will contain the found cached file. */ 285 fcache *r = NULL; 286 for (unsigned i = 0; i < fcache_tab_size; ++i) 287 { 288 fcache *c = &fcache_tab[i]; 289 if (c->file_path && !strcmp (c->file_path, file_path)) 290 { 291 ++c->use_count; 292 r = c; 293 } 294 } 295 296 if (r) 297 ++r->use_count; 298 299 return r; 300 } 301 302 /* Purge any mention of FILENAME from the cache of files used for 303 printing source code. For use in selftests when working 304 with tempfiles. */ 305 306 void 307 diagnostics_file_cache_forcibly_evict_file (const char *file_path) 308 { 309 gcc_assert (file_path); 310 311 fcache *r = lookup_file_in_cache_tab (file_path); 312 if (!r) 313 /* Not found. */ 314 return; 315 316 r->file_path = NULL; 317 if (r->fp) 318 fclose (r->fp); 319 r->fp = NULL; 320 r->nb_read = 0; 321 r->line_start_idx = 0; 322 r->line_num = 0; 323 r->line_record.truncate (0); 324 r->use_count = 0; 325 r->total_lines = 0; 326 r->missing_trailing_newline = true; 327 } 328 329 /* Return the file cache that has been less used, recently, or the 330 first empty one. If HIGHEST_USE_COUNT is non-null, 331 *HIGHEST_USE_COUNT is set to the highest use count of the entries 332 in the cache table. */ 333 334 static fcache* 335 evicted_cache_tab_entry (unsigned *highest_use_count) 336 { 337 diagnostic_file_cache_init (); 338 339 fcache *to_evict = &fcache_tab[0]; 340 unsigned huc = to_evict->use_count; 341 for (unsigned i = 1; i < fcache_tab_size; ++i) 342 { 343 fcache *c = &fcache_tab[i]; 344 bool c_is_empty = (c->file_path == NULL); 345 346 if (c->use_count < to_evict->use_count 347 || (to_evict->file_path && c_is_empty)) 348 /* We evict C because it's either an entry with a lower use 349 count or one that is empty. */ 350 to_evict = c; 351 352 if (huc < c->use_count) 353 huc = c->use_count; 354 355 if (c_is_empty) 356 /* We've reached the end of the cache; subsequent elements are 357 all empty. */ 358 break; 359 } 360 361 if (highest_use_count) 362 *highest_use_count = huc; 363 364 return to_evict; 365 } 366 367 /* Create the cache used for the content of a given file to be 368 accessed by caret diagnostic. This cache is added to an array of 369 cache and can be retrieved by lookup_file_in_cache_tab. This 370 function returns the created cache. Note that only the last 371 fcache_tab_size files are cached. */ 372 373 static fcache* 374 add_file_to_cache_tab (const char *file_path) 375 { 376 377 FILE *fp = fopen (file_path, "r"); 378 if (fp == NULL) 379 return NULL; 380 381 unsigned highest_use_count = 0; 382 fcache *r = evicted_cache_tab_entry (&highest_use_count); 383 r->file_path = file_path; 384 if (r->fp) 385 fclose (r->fp); 386 r->fp = fp; 387 r->nb_read = 0; 388 r->line_start_idx = 0; 389 r->line_num = 0; 390 r->line_record.truncate (0); 391 /* Ensure that this cache entry doesn't get evicted next time 392 add_file_to_cache_tab is called. */ 393 r->use_count = ++highest_use_count; 394 r->total_lines = total_lines_num (file_path); 395 r->missing_trailing_newline = true; 396 397 return r; 398 } 399 400 /* Lookup the cache used for the content of a given file accessed by 401 caret diagnostic. If no cached file was found, create a new cache 402 for this file, add it to the array of cached file and return 403 it. */ 404 405 static fcache* 406 lookup_or_add_file_to_cache_tab (const char *file_path) 407 { 408 fcache *r = lookup_file_in_cache_tab (file_path); 409 if (r == NULL) 410 r = add_file_to_cache_tab (file_path); 411 return r; 412 } 413 414 /* Default constructor for a cache of file used by caret 415 diagnostic. */ 416 417 fcache::fcache () 418 : use_count (0), file_path (NULL), fp (NULL), data (0), 419 size (0), nb_read (0), line_start_idx (0), line_num (0), 420 total_lines (0), missing_trailing_newline (true) 421 { 422 line_record.create (0); 423 } 424 425 /* Destructor for a cache of file used by caret diagnostic. */ 426 427 fcache::~fcache () 428 { 429 if (fp) 430 { 431 fclose (fp); 432 fp = NULL; 433 } 434 if (data) 435 { 436 XDELETEVEC (data); 437 data = 0; 438 } 439 line_record.release (); 440 } 441 442 /* Returns TRUE iff the cache would need to be filled with data coming 443 from the file. That is, either the cache is empty or full or the 444 current line is empty. Note that if the cache is full, it would 445 need to be extended and filled again. */ 446 447 static bool 448 needs_read (fcache *c) 449 { 450 return (c->nb_read == 0 451 || c->nb_read == c->size 452 || (c->line_start_idx >= c->nb_read - 1)); 453 } 454 455 /* Return TRUE iff the cache is full and thus needs to be 456 extended. */ 457 458 static bool 459 needs_grow (fcache *c) 460 { 461 return c->nb_read == c->size; 462 } 463 464 /* Grow the cache if it needs to be extended. */ 465 466 static void 467 maybe_grow (fcache *c) 468 { 469 if (!needs_grow (c)) 470 return; 471 472 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2; 473 c->data = XRESIZEVEC (char, c->data, size); 474 c->size = size; 475 } 476 477 /* Read more data into the cache. Extends the cache if need be. 478 Returns TRUE iff new data could be read. */ 479 480 static bool 481 read_data (fcache *c) 482 { 483 if (feof (c->fp) || ferror (c->fp)) 484 return false; 485 486 maybe_grow (c); 487 488 char * from = c->data + c->nb_read; 489 size_t to_read = c->size - c->nb_read; 490 size_t nb_read = fread (from, 1, to_read, c->fp); 491 492 if (ferror (c->fp)) 493 return false; 494 495 c->nb_read += nb_read; 496 return !!nb_read; 497 } 498 499 /* Read new data iff the cache needs to be filled with more data 500 coming from the file FP. Return TRUE iff the cache was filled with 501 mode data. */ 502 503 static bool 504 maybe_read_data (fcache *c) 505 { 506 if (!needs_read (c)) 507 return false; 508 return read_data (c); 509 } 510 511 /* Read a new line from file FP, using C as a cache for the data 512 coming from the file. Upon successful completion, *LINE is set to 513 the beginning of the line found. *LINE points directly in the 514 line cache and is only valid until the next call of get_next_line. 515 *LINE_LEN is set to the length of the line. Note that the line 516 does not contain any terminal delimiter. This function returns 517 true if some data was read or process from the cache, false 518 otherwise. Note that subsequent calls to get_next_line might 519 make the content of *LINE invalid. */ 520 521 static bool 522 get_next_line (fcache *c, char **line, ssize_t *line_len) 523 { 524 /* Fill the cache with data to process. */ 525 maybe_read_data (c); 526 527 size_t remaining_size = c->nb_read - c->line_start_idx; 528 if (remaining_size == 0) 529 /* There is no more data to process. */ 530 return false; 531 532 char *line_start = c->data + c->line_start_idx; 533 534 char *next_line_start = NULL; 535 size_t len = 0; 536 char *line_end = (char *) memchr (line_start, '\n', remaining_size); 537 if (line_end == NULL) 538 { 539 /* We haven't found the end-of-line delimiter in the cache. 540 Fill the cache with more data from the file and look for the 541 '\n'. */ 542 while (maybe_read_data (c)) 543 { 544 line_start = c->data + c->line_start_idx; 545 remaining_size = c->nb_read - c->line_start_idx; 546 line_end = (char *) memchr (line_start, '\n', remaining_size); 547 if (line_end != NULL) 548 { 549 next_line_start = line_end + 1; 550 break; 551 } 552 } 553 if (line_end == NULL) 554 { 555 /* We've loadded all the file into the cache and still no 556 '\n'. Let's say the line ends up at one byte passed the 557 end of the file. This is to stay consistent with the case 558 of when the line ends up with a '\n' and line_end points to 559 that terminal '\n'. That consistency is useful below in 560 the len calculation. */ 561 line_end = c->data + c->nb_read ; 562 c->missing_trailing_newline = true; 563 } 564 else 565 c->missing_trailing_newline = false; 566 } 567 else 568 { 569 next_line_start = line_end + 1; 570 c->missing_trailing_newline = false; 571 } 572 573 if (ferror (c->fp)) 574 return false; 575 576 /* At this point, we've found the end of the of line. It either 577 points to the '\n' or to one byte after the last byte of the 578 file. */ 579 gcc_assert (line_end != NULL); 580 581 len = line_end - line_start; 582 583 if (c->line_start_idx < c->nb_read) 584 *line = line_start; 585 586 ++c->line_num; 587 588 /* Before we update our line record, make sure the hint about the 589 total number of lines of the file is correct. If it's not, then 590 we give up recording line boundaries from now on. */ 591 bool update_line_record = true; 592 if (c->line_num > c->total_lines) 593 update_line_record = false; 594 595 /* Now update our line record so that re-reading lines from the 596 before c->line_start_idx is faster. */ 597 if (update_line_record 598 && c->line_record.length () < fcache_line_record_size) 599 { 600 /* If the file lines fits in the line record, we just record all 601 its lines ...*/ 602 if (c->total_lines <= fcache_line_record_size 603 && c->line_num > c->line_record.length ()) 604 c->line_record.safe_push (fcache::line_info (c->line_num, 605 c->line_start_idx, 606 line_end - c->data)); 607 else if (c->total_lines > fcache_line_record_size) 608 { 609 /* ... otherwise, we just scale total_lines down to 610 (fcache_line_record_size lines. */ 611 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines; 612 if (c->line_record.length () == 0 613 || n >= c->line_record.length ()) 614 c->line_record.safe_push (fcache::line_info (c->line_num, 615 c->line_start_idx, 616 line_end - c->data)); 617 } 618 } 619 620 /* Update c->line_start_idx so that it points to the next line to be 621 read. */ 622 if (next_line_start) 623 c->line_start_idx = next_line_start - c->data; 624 else 625 /* We didn't find any terminal '\n'. Let's consider that the end 626 of line is the end of the data in the cache. The next 627 invocation of get_next_line will either read more data from the 628 underlying file or return false early because we've reached the 629 end of the file. */ 630 c->line_start_idx = c->nb_read; 631 632 *line_len = len; 633 634 return true; 635 } 636 637 /* Consume the next bytes coming from the cache (or from its 638 underlying file if there are remaining unread bytes in the file) 639 until we reach the next end-of-line (or end-of-file). There is no 640 copying from the cache involved. Return TRUE upon successful 641 completion. */ 642 643 static bool 644 goto_next_line (fcache *cache) 645 { 646 char *l; 647 ssize_t len; 648 649 return get_next_line (cache, &l, &len); 650 } 651 652 /* Read an arbitrary line number LINE_NUM from the file cached in C. 653 If the line was read successfully, *LINE points to the beginning 654 of the line in the file cache and *LINE_LEN is the length of the 655 line. *LINE is not nul-terminated, but may contain zero bytes. 656 *LINE is only valid until the next call of read_line_num. 657 This function returns bool if a line was read. */ 658 659 static bool 660 read_line_num (fcache *c, size_t line_num, 661 char **line, ssize_t *line_len) 662 { 663 gcc_assert (line_num > 0); 664 665 if (line_num <= c->line_num) 666 { 667 /* We've been asked to read lines that are before c->line_num. 668 So lets use our line record (if it's not empty) to try to 669 avoid re-reading the file from the beginning again. */ 670 671 if (c->line_record.is_empty ()) 672 { 673 c->line_start_idx = 0; 674 c->line_num = 0; 675 } 676 else 677 { 678 fcache::line_info *i = NULL; 679 if (c->total_lines <= fcache_line_record_size) 680 { 681 /* In languages where the input file is not totally 682 preprocessed up front, the c->total_lines hint 683 can be smaller than the number of lines of the 684 file. In that case, only the first 685 c->total_lines have been recorded. 686 687 Otherwise, the first c->total_lines we've read have 688 their start/end recorded here. */ 689 i = (line_num <= c->total_lines) 690 ? &c->line_record[line_num - 1] 691 : &c->line_record[c->total_lines - 1]; 692 gcc_assert (i->line_num <= line_num); 693 } 694 else 695 { 696 /* So the file had more lines than our line record 697 size. Thus the number of lines we've recorded has 698 been scaled down to fcache_line_reacord_size. Let's 699 pick the start/end of the recorded line that is 700 closest to line_num. */ 701 size_t n = (line_num <= c->total_lines) 702 ? line_num * fcache_line_record_size / c->total_lines 703 : c ->line_record.length () - 1; 704 if (n < c->line_record.length ()) 705 { 706 i = &c->line_record[n]; 707 gcc_assert (i->line_num <= line_num); 708 } 709 } 710 711 if (i && i->line_num == line_num) 712 { 713 /* We have the start/end of the line. */ 714 *line = c->data + i->start_pos; 715 *line_len = i->end_pos - i->start_pos; 716 return true; 717 } 718 719 if (i) 720 { 721 c->line_start_idx = i->start_pos; 722 c->line_num = i->line_num - 1; 723 } 724 else 725 { 726 c->line_start_idx = 0; 727 c->line_num = 0; 728 } 729 } 730 } 731 732 /* Let's walk from line c->line_num up to line_num - 1, without 733 copying any line. */ 734 while (c->line_num < line_num - 1) 735 if (!goto_next_line (c)) 736 return false; 737 738 /* The line we want is the next one. Let's read and copy it back to 739 the caller. */ 740 return get_next_line (c, line, line_len); 741 } 742 743 /* Return the physical source line that corresponds to FILE_PATH/LINE. 744 The line is not nul-terminated. The returned pointer is only 745 valid until the next call of location_get_source_line. 746 Note that the line can contain several null characters, 747 so the returned value's length has the actual length of the line. 748 If the function fails, a NULL char_span is returned. */ 749 750 char_span 751 location_get_source_line (const char *file_path, int line) 752 { 753 char *buffer = NULL; 754 ssize_t len; 755 756 if (line == 0) 757 return char_span (NULL, 0); 758 759 fcache *c = lookup_or_add_file_to_cache_tab (file_path); 760 if (c == NULL) 761 return char_span (NULL, 0); 762 763 bool read = read_line_num (c, line, &buffer, &len); 764 if (!read) 765 return char_span (NULL, 0); 766 767 return char_span (buffer, len); 768 } 769 770 /* Determine if FILE_PATH missing a trailing newline on its final line. 771 Only valid to call once all of the file has been loaded, by 772 requesting a line number beyond the end of the file. */ 773 774 bool 775 location_missing_trailing_newline (const char *file_path) 776 { 777 fcache *c = lookup_or_add_file_to_cache_tab (file_path); 778 if (c == NULL) 779 return false; 780 781 return c->missing_trailing_newline; 782 } 783 784 /* Test if the location originates from the spelling location of a 785 builtin-tokens. That is, return TRUE if LOC is a (possibly 786 virtual) location of a built-in token that appears in the expansion 787 list of a macro. Please note that this function also works on 788 tokens that result from built-in tokens. For instance, the 789 function would return true if passed a token "4" that is the result 790 of the expansion of the built-in __LINE__ macro. */ 791 bool 792 is_location_from_builtin_token (location_t loc) 793 { 794 const line_map_ordinary *map = NULL; 795 loc = linemap_resolve_location (line_table, loc, 796 LRK_SPELLING_LOCATION, &map); 797 return loc == BUILTINS_LOCATION; 798 } 799 800 /* Expand the source location LOC into a human readable location. If 801 LOC is virtual, it resolves to the expansion point of the involved 802 macro. If LOC resolves to a builtin location, the file name of the 803 readable location is set to the string "<built-in>". */ 804 805 expanded_location 806 expand_location (location_t loc) 807 { 808 return expand_location_1 (loc, /*expansion_point_p=*/true, 809 LOCATION_ASPECT_CARET); 810 } 811 812 /* Expand the source location LOC into a human readable location. If 813 LOC is virtual, it resolves to the expansion location of the 814 relevant macro. If LOC resolves to a builtin location, the file 815 name of the readable location is set to the string 816 "<built-in>". */ 817 818 expanded_location 819 expand_location_to_spelling_point (location_t loc, 820 enum location_aspect aspect) 821 { 822 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect); 823 } 824 825 /* The rich_location class within libcpp requires a way to expand 826 location_t instances, and relies on the client code 827 providing a symbol named 828 linemap_client_expand_location_to_spelling_point 829 to do this. 830 831 This is the implementation for libcommon.a (all host binaries), 832 which simply calls into expand_location_1. */ 833 834 expanded_location 835 linemap_client_expand_location_to_spelling_point (location_t loc, 836 enum location_aspect aspect) 837 { 838 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect); 839 } 840 841 842 /* If LOCATION is in a system header and if it is a virtual location for 843 a token coming from the expansion of a macro, unwind it to the 844 location of the expansion point of the macro. Otherwise, just return 845 LOCATION. 846 847 This is used for instance when we want to emit diagnostics about a 848 token that may be located in a macro that is itself defined in a 849 system header, for example, for the NULL macro. In such a case, if 850 LOCATION were passed directly to diagnostic functions such as 851 warning_at, the diagnostic would be suppressed (unless 852 -Wsystem-headers). */ 853 854 location_t 855 expansion_point_location_if_in_system_header (location_t location) 856 { 857 if (in_system_header_at (location)) 858 location = linemap_resolve_location (line_table, location, 859 LRK_MACRO_EXPANSION_POINT, 860 NULL); 861 return location; 862 } 863 864 /* If LOCATION is a virtual location for a token coming from the expansion 865 of a macro, unwind to the location of the expansion point of the macro. */ 866 867 location_t 868 expansion_point_location (location_t location) 869 { 870 return linemap_resolve_location (line_table, location, 871 LRK_MACRO_EXPANSION_POINT, NULL); 872 } 873 874 /* Construct a location with caret at CARET, ranging from START to 875 finish e.g. 876 877 11111111112 878 12345678901234567890 879 522 880 523 return foo + bar; 881 ~~~~^~~~~ 882 524 883 884 The location's caret is at the "+", line 523 column 15, but starts 885 earlier, at the "f" of "foo" at column 11. The finish is at the "r" 886 of "bar" at column 19. */ 887 888 location_t 889 make_location (location_t caret, location_t start, location_t finish) 890 { 891 location_t pure_loc = get_pure_location (caret); 892 source_range src_range; 893 src_range.m_start = get_start (start); 894 src_range.m_finish = get_finish (finish); 895 location_t combined_loc = COMBINE_LOCATION_DATA (line_table, 896 pure_loc, 897 src_range, 898 NULL); 899 return combined_loc; 900 } 901 902 /* Same as above, but taking a source range rather than two locations. */ 903 904 location_t 905 make_location (location_t caret, source_range src_range) 906 { 907 location_t pure_loc = get_pure_location (caret); 908 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL); 909 } 910 911 /* An expanded_location stores the column in byte units. This function 912 converts that column to display units. That requires reading the associated 913 source line in order to calculate the display width. If that cannot be done 914 for any reason, then returns the byte column as a fallback. */ 915 int 916 location_compute_display_column (expanded_location exploc) 917 { 918 if (!(exploc.file && *exploc.file && exploc.line && exploc.column)) 919 return exploc.column; 920 char_span line = location_get_source_line (exploc.file, exploc.line); 921 /* If line is NULL, this function returns exploc.column which is the 922 desired fallback. */ 923 return cpp_byte_column_to_display_column (line.get_buffer (), line.length (), 924 exploc.column); 925 } 926 927 /* Dump statistics to stderr about the memory usage of the line_table 928 set of line maps. This also displays some statistics about macro 929 expansion. */ 930 931 void 932 dump_line_table_statistics (void) 933 { 934 struct linemap_stats s; 935 long total_used_map_size, 936 macro_maps_size, 937 total_allocated_map_size; 938 939 memset (&s, 0, sizeof (s)); 940 941 linemap_get_statistics (line_table, &s); 942 943 macro_maps_size = s.macro_maps_used_size 944 + s.macro_maps_locations_size; 945 946 total_allocated_map_size = s.ordinary_maps_allocated_size 947 + s.macro_maps_allocated_size 948 + s.macro_maps_locations_size; 949 950 total_used_map_size = s.ordinary_maps_used_size 951 + s.macro_maps_used_size 952 + s.macro_maps_locations_size; 953 954 fprintf (stderr, "Number of expanded macros: %5ld\n", 955 s.num_expanded_macros); 956 if (s.num_expanded_macros != 0) 957 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n", 958 s.num_macro_tokens / s.num_expanded_macros); 959 fprintf (stderr, 960 "\nLine Table allocations during the " 961 "compilation process\n"); 962 fprintf (stderr, "Number of ordinary maps used: " PRsa (5) "\n", 963 SIZE_AMOUNT (s.num_ordinary_maps_used)); 964 fprintf (stderr, "Ordinary map used size: " PRsa (5) "\n", 965 SIZE_AMOUNT (s.ordinary_maps_used_size)); 966 fprintf (stderr, "Number of ordinary maps allocated: " PRsa (5) "\n", 967 SIZE_AMOUNT (s.num_ordinary_maps_allocated)); 968 fprintf (stderr, "Ordinary maps allocated size: " PRsa (5) "\n", 969 SIZE_AMOUNT (s.ordinary_maps_allocated_size)); 970 fprintf (stderr, "Number of macro maps used: " PRsa (5) "\n", 971 SIZE_AMOUNT (s.num_macro_maps_used)); 972 fprintf (stderr, "Macro maps used size: " PRsa (5) "\n", 973 SIZE_AMOUNT (s.macro_maps_used_size)); 974 fprintf (stderr, "Macro maps locations size: " PRsa (5) "\n", 975 SIZE_AMOUNT (s.macro_maps_locations_size)); 976 fprintf (stderr, "Macro maps size: " PRsa (5) "\n", 977 SIZE_AMOUNT (macro_maps_size)); 978 fprintf (stderr, "Duplicated maps locations size: " PRsa (5) "\n", 979 SIZE_AMOUNT (s.duplicated_macro_maps_locations_size)); 980 fprintf (stderr, "Total allocated maps size: " PRsa (5) "\n", 981 SIZE_AMOUNT (total_allocated_map_size)); 982 fprintf (stderr, "Total used maps size: " PRsa (5) "\n", 983 SIZE_AMOUNT (total_used_map_size)); 984 fprintf (stderr, "Ad-hoc table size: " PRsa (5) "\n", 985 SIZE_AMOUNT (s.adhoc_table_size)); 986 fprintf (stderr, "Ad-hoc table entries used: " PRsa (5) "\n", 987 SIZE_AMOUNT (s.adhoc_table_entries_used)); 988 fprintf (stderr, "optimized_ranges: " PRsa (5) "\n", 989 SIZE_AMOUNT (line_table->num_optimized_ranges)); 990 fprintf (stderr, "unoptimized_ranges: " PRsa (5) "\n", 991 SIZE_AMOUNT (line_table->num_unoptimized_ranges)); 992 993 fprintf (stderr, "\n"); 994 } 995 996 /* Get location one beyond the final location in ordinary map IDX. */ 997 998 static location_t 999 get_end_location (class line_maps *set, unsigned int idx) 1000 { 1001 if (idx == LINEMAPS_ORDINARY_USED (set) - 1) 1002 return set->highest_location; 1003 1004 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1); 1005 return MAP_START_LOCATION (next_map); 1006 } 1007 1008 /* Helper function for write_digit_row. */ 1009 1010 static void 1011 write_digit (FILE *stream, int digit) 1012 { 1013 fputc ('0' + (digit % 10), stream); 1014 } 1015 1016 /* Helper function for dump_location_info. 1017 Write a row of numbers to STREAM, numbering a source line, 1018 giving the units, tens, hundreds etc of the column number. */ 1019 1020 static void 1021 write_digit_row (FILE *stream, int indent, 1022 const line_map_ordinary *map, 1023 location_t loc, int max_col, int divisor) 1024 { 1025 fprintf (stream, "%*c", indent, ' '); 1026 fprintf (stream, "|"); 1027 for (int column = 1; column < max_col; column++) 1028 { 1029 location_t column_loc = loc + (column << map->m_range_bits); 1030 write_digit (stream, column_loc / divisor); 1031 } 1032 fprintf (stream, "\n"); 1033 } 1034 1035 /* Write a half-closed (START) / half-open (END) interval of 1036 location_t to STREAM. */ 1037 1038 static void 1039 dump_location_range (FILE *stream, 1040 location_t start, location_t end) 1041 { 1042 fprintf (stream, 1043 " location_t interval: %u <= loc < %u\n", 1044 start, end); 1045 } 1046 1047 /* Write a labelled description of a half-closed (START) / half-open (END) 1048 interval of location_t to STREAM. */ 1049 1050 static void 1051 dump_labelled_location_range (FILE *stream, 1052 const char *name, 1053 location_t start, location_t end) 1054 { 1055 fprintf (stream, "%s\n", name); 1056 dump_location_range (stream, start, end); 1057 fprintf (stream, "\n"); 1058 } 1059 1060 /* Write a visualization of the locations in the line_table to STREAM. */ 1061 1062 void 1063 dump_location_info (FILE *stream) 1064 { 1065 /* Visualize the reserved locations. */ 1066 dump_labelled_location_range (stream, "RESERVED LOCATIONS", 1067 0, RESERVED_LOCATION_COUNT); 1068 1069 /* Visualize the ordinary line_map instances, rendering the sources. */ 1070 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++) 1071 { 1072 location_t end_location = get_end_location (line_table, idx); 1073 /* half-closed: doesn't include this one. */ 1074 1075 const line_map_ordinary *map 1076 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx); 1077 fprintf (stream, "ORDINARY MAP: %i\n", idx); 1078 dump_location_range (stream, 1079 MAP_START_LOCATION (map), end_location); 1080 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map)); 1081 fprintf (stream, " starting at line: %i\n", 1082 ORDINARY_MAP_STARTING_LINE_NUMBER (map)); 1083 fprintf (stream, " column and range bits: %i\n", 1084 map->m_column_and_range_bits); 1085 fprintf (stream, " column bits: %i\n", 1086 map->m_column_and_range_bits - map->m_range_bits); 1087 fprintf (stream, " range bits: %i\n", 1088 map->m_range_bits); 1089 const char * reason; 1090 switch (map->reason) { 1091 case LC_ENTER: 1092 reason = "LC_ENTER"; 1093 break; 1094 case LC_LEAVE: 1095 reason = "LC_LEAVE"; 1096 break; 1097 case LC_RENAME: 1098 reason = "LC_RENAME"; 1099 break; 1100 case LC_RENAME_VERBATIM: 1101 reason = "LC_RENAME_VERBATIM"; 1102 break; 1103 case LC_ENTER_MACRO: 1104 reason = "LC_RENAME_MACRO"; 1105 break; 1106 default: 1107 reason = "Unknown"; 1108 } 1109 fprintf (stream, " reason: %d (%s)\n", map->reason, reason); 1110 1111 const line_map_ordinary *includer_map 1112 = linemap_included_from_linemap (line_table, map); 1113 fprintf (stream, " included from location: %d", 1114 linemap_included_from (map)); 1115 if (includer_map) { 1116 fprintf (stream, " (in ordinary map %d)", 1117 int (includer_map - line_table->info_ordinary.maps)); 1118 } 1119 fprintf (stream, "\n"); 1120 1121 /* Render the span of source lines that this "map" covers. */ 1122 for (location_t loc = MAP_START_LOCATION (map); 1123 loc < end_location; 1124 loc += (1 << map->m_range_bits) ) 1125 { 1126 gcc_assert (pure_location_p (line_table, loc) ); 1127 1128 expanded_location exploc 1129 = linemap_expand_location (line_table, map, loc); 1130 1131 if (exploc.column == 0) 1132 { 1133 /* Beginning of a new source line: draw the line. */ 1134 1135 char_span line_text = location_get_source_line (exploc.file, 1136 exploc.line); 1137 if (!line_text) 1138 break; 1139 fprintf (stream, 1140 "%s:%3i|loc:%5i|%.*s\n", 1141 exploc.file, exploc.line, 1142 loc, 1143 (int)line_text.length (), line_text.get_buffer ()); 1144 1145 /* "loc" is at column 0, which means "the whole line". 1146 Render the locations *within* the line, by underlining 1147 it, showing the location_t numeric values 1148 at each column. */ 1149 size_t max_col = (1 << map->m_column_and_range_bits) - 1; 1150 if (max_col > line_text.length ()) 1151 max_col = line_text.length () + 1; 1152 1153 int len_lnum = num_digits (exploc.line); 1154 if (len_lnum < 3) 1155 len_lnum = 3; 1156 int len_loc = num_digits (loc); 1157 if (len_loc < 5) 1158 len_loc = 5; 1159 1160 int indent = 6 + strlen (exploc.file) + len_lnum + len_loc; 1161 1162 /* Thousands. */ 1163 if (end_location > 999) 1164 write_digit_row (stream, indent, map, loc, max_col, 1000); 1165 1166 /* Hundreds. */ 1167 if (end_location > 99) 1168 write_digit_row (stream, indent, map, loc, max_col, 100); 1169 1170 /* Tens. */ 1171 write_digit_row (stream, indent, map, loc, max_col, 10); 1172 1173 /* Units. */ 1174 write_digit_row (stream, indent, map, loc, max_col, 1); 1175 } 1176 } 1177 fprintf (stream, "\n"); 1178 } 1179 1180 /* Visualize unallocated values. */ 1181 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS", 1182 line_table->highest_location, 1183 LINEMAPS_MACRO_LOWEST_LOCATION (line_table)); 1184 1185 /* Visualize the macro line_map instances, rendering the sources. */ 1186 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++) 1187 { 1188 /* Each macro map that is allocated owns location_t values 1189 that are *lower* that the one before them. 1190 Hence it's meaningful to view them either in order of ascending 1191 source locations, or in order of ascending macro map index. */ 1192 const bool ascending_location_ts = true; 1193 unsigned int idx = (ascending_location_ts 1194 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1)) 1195 : i); 1196 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx); 1197 fprintf (stream, "MACRO %i: %s (%u tokens)\n", 1198 idx, 1199 linemap_map_get_macro_name (map), 1200 MACRO_MAP_NUM_MACRO_TOKENS (map)); 1201 dump_location_range (stream, 1202 map->start_location, 1203 (map->start_location 1204 + MACRO_MAP_NUM_MACRO_TOKENS (map))); 1205 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map), 1206 "expansion point is location %i", 1207 MACRO_MAP_EXPANSION_POINT_LOCATION (map)); 1208 fprintf (stream, " map->start_location: %u\n", 1209 map->start_location); 1210 1211 fprintf (stream, " macro_locations:\n"); 1212 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++) 1213 { 1214 location_t x = MACRO_MAP_LOCATIONS (map)[2 * i]; 1215 location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1]; 1216 1217 /* linemap_add_macro_token encodes token numbers in an expansion 1218 by putting them after MAP_START_LOCATION. */ 1219 1220 /* I'm typically seeing 4 uninitialized entries at the end of 1221 0xafafafaf. 1222 This appears to be due to macro.c:replace_args 1223 adding 2 extra args for padding tokens; presumably there may 1224 be a leading and/or trailing padding token injected, 1225 each for 2 more location slots. 1226 This would explain there being up to 4 location_ts slots 1227 that may be uninitialized. */ 1228 1229 fprintf (stream, " %u: %u, %u\n", 1230 i, 1231 x, 1232 y); 1233 if (x == y) 1234 { 1235 if (x < MAP_START_LOCATION (map)) 1236 inform (x, "token %u has %<x-location == y-location == %u%>", 1237 i, x); 1238 else 1239 fprintf (stream, 1240 "x-location == y-location == %u encodes token # %u\n", 1241 x, x - MAP_START_LOCATION (map)); 1242 } 1243 else 1244 { 1245 inform (x, "token %u has %<x-location == %u%>", i, x); 1246 inform (x, "token %u has %<y-location == %u%>", i, y); 1247 } 1248 } 1249 fprintf (stream, "\n"); 1250 } 1251 1252 /* It appears that MAX_LOCATION_T itself is never assigned to a 1253 macro map, presumably due to an off-by-one error somewhere 1254 between the logic in linemap_enter_macro and 1255 LINEMAPS_MACRO_LOWEST_LOCATION. */ 1256 dump_labelled_location_range (stream, "MAX_LOCATION_T", 1257 MAX_LOCATION_T, 1258 MAX_LOCATION_T + 1); 1259 1260 /* Visualize ad-hoc values. */ 1261 dump_labelled_location_range (stream, "AD-HOC LOCATIONS", 1262 MAX_LOCATION_T + 1, UINT_MAX); 1263 } 1264 1265 /* string_concat's constructor. */ 1266 1267 string_concat::string_concat (int num, location_t *locs) 1268 : m_num (num) 1269 { 1270 m_locs = ggc_vec_alloc <location_t> (num); 1271 for (int i = 0; i < num; i++) 1272 m_locs[i] = locs[i]; 1273 } 1274 1275 /* string_concat_db's constructor. */ 1276 1277 string_concat_db::string_concat_db () 1278 { 1279 m_table = hash_map <location_hash, string_concat *>::create_ggc (64); 1280 } 1281 1282 /* Record that a string concatenation occurred, covering NUM 1283 string literal tokens. LOCS is an array of size NUM, containing the 1284 locations of the tokens. A copy of LOCS is taken. */ 1285 1286 void 1287 string_concat_db::record_string_concatenation (int num, location_t *locs) 1288 { 1289 gcc_assert (num > 1); 1290 gcc_assert (locs); 1291 1292 location_t key_loc = get_key_loc (locs[0]); 1293 1294 string_concat *concat 1295 = new (ggc_alloc <string_concat> ()) string_concat (num, locs); 1296 m_table->put (key_loc, concat); 1297 } 1298 1299 /* Determine if LOC was the location of the initial token of a 1300 concatenation of string literal tokens. 1301 If so, *OUT_NUM is written to with the number of tokens, and 1302 *OUT_LOCS with the location of an array of locations of the 1303 tokens, and return true. *OUT_LOCS is a borrowed pointer to 1304 storage owned by the string_concat_db. 1305 Otherwise, return false. */ 1306 1307 bool 1308 string_concat_db::get_string_concatenation (location_t loc, 1309 int *out_num, 1310 location_t **out_locs) 1311 { 1312 gcc_assert (out_num); 1313 gcc_assert (out_locs); 1314 1315 location_t key_loc = get_key_loc (loc); 1316 1317 string_concat **concat = m_table->get (key_loc); 1318 if (!concat) 1319 return false; 1320 1321 *out_num = (*concat)->m_num; 1322 *out_locs =(*concat)->m_locs; 1323 return true; 1324 } 1325 1326 /* Internal function. Canonicalize LOC into a form suitable for 1327 use as a key within the database, stripping away macro expansion, 1328 ad-hoc information, and range information, using the location of 1329 the start of LOC within an ordinary linemap. */ 1330 1331 location_t 1332 string_concat_db::get_key_loc (location_t loc) 1333 { 1334 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION, 1335 NULL); 1336 1337 loc = get_range_from_loc (line_table, loc).m_start; 1338 1339 return loc; 1340 } 1341 1342 /* Helper class for use within get_substring_ranges_for_loc. 1343 An vec of cpp_string with responsibility for releasing all of the 1344 str->text for each str in the vector. */ 1345 1346 class auto_cpp_string_vec : public auto_vec <cpp_string> 1347 { 1348 public: 1349 auto_cpp_string_vec (int alloc) 1350 : auto_vec <cpp_string> (alloc) {} 1351 1352 ~auto_cpp_string_vec () 1353 { 1354 /* Clean up the copies within this vec. */ 1355 int i; 1356 cpp_string *str; 1357 FOR_EACH_VEC_ELT (*this, i, str) 1358 free (const_cast <unsigned char *> (str->text)); 1359 } 1360 }; 1361 1362 /* Attempt to populate RANGES with source location information on the 1363 individual characters within the string literal found at STRLOC. 1364 If CONCATS is non-NULL, then any string literals that the token at 1365 STRLOC was concatenated with are also added to RANGES. 1366 1367 Return NULL if successful, or an error message if any errors occurred (in 1368 which case RANGES may be only partially populated and should not 1369 be used). 1370 1371 This is implemented by re-parsing the relevant source line(s). */ 1372 1373 static const char * 1374 get_substring_ranges_for_loc (cpp_reader *pfile, 1375 string_concat_db *concats, 1376 location_t strloc, 1377 enum cpp_ttype type, 1378 cpp_substring_ranges &ranges) 1379 { 1380 gcc_assert (pfile); 1381 1382 if (strloc == UNKNOWN_LOCATION) 1383 return "unknown location"; 1384 1385 /* Reparsing the strings requires accurate location information. 1386 If -ftrack-macro-expansion has been overridden from its default 1387 of 2, then we might have a location of a macro expansion point, 1388 rather than the location of the literal itself. 1389 Avoid this by requiring that we have full macro expansion tracking 1390 for substring locations to be available. */ 1391 if (cpp_get_options (pfile)->track_macro_expansion != 2) 1392 return "track_macro_expansion != 2"; 1393 1394 /* If #line or # 44 "file"-style directives are present, then there's 1395 no guarantee that the line numbers we have can be used to locate 1396 the strings. For example, we might have a .i file with # directives 1397 pointing back to lines within a .c file, but the .c file might 1398 have been edited since the .i file was created. 1399 In such a case, the safest course is to disable on-demand substring 1400 locations. */ 1401 if (line_table->seen_line_directive) 1402 return "seen line directive"; 1403 1404 /* If string concatenation has occurred at STRLOC, get the locations 1405 of all of the literal tokens making up the compound string. 1406 Otherwise, just use STRLOC. */ 1407 int num_locs = 1; 1408 location_t *strlocs = &strloc; 1409 if (concats) 1410 concats->get_string_concatenation (strloc, &num_locs, &strlocs); 1411 1412 auto_cpp_string_vec strs (num_locs); 1413 auto_vec <cpp_string_location_reader> loc_readers (num_locs); 1414 for (int i = 0; i < num_locs; i++) 1415 { 1416 /* Get range of strloc. We will use it to locate the start and finish 1417 of the literal token within the line. */ 1418 source_range src_range = get_range_from_loc (line_table, strlocs[i]); 1419 1420 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table)) 1421 { 1422 /* If the string token was within a macro expansion, then we can 1423 cope with it for the simple case where we have a single token. 1424 Otherwise, bail out. */ 1425 if (src_range.m_start != src_range.m_finish) 1426 return "macro expansion"; 1427 } 1428 else 1429 { 1430 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS) 1431 /* If so, we can't reliably determine where the token started within 1432 its line. */ 1433 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS"; 1434 1435 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS) 1436 /* If so, we can't reliably determine where the token finished 1437 within its line. */ 1438 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS"; 1439 } 1440 1441 expanded_location start 1442 = expand_location_to_spelling_point (src_range.m_start, 1443 LOCATION_ASPECT_START); 1444 expanded_location finish 1445 = expand_location_to_spelling_point (src_range.m_finish, 1446 LOCATION_ASPECT_FINISH); 1447 if (start.file != finish.file) 1448 return "range endpoints are in different files"; 1449 if (start.line != finish.line) 1450 return "range endpoints are on different lines"; 1451 if (start.column > finish.column) 1452 return "range endpoints are reversed"; 1453 1454 char_span line = location_get_source_line (start.file, start.line); 1455 if (!line) 1456 return "unable to read source line"; 1457 1458 /* Determine the location of the literal (including quotes 1459 and leading prefix chars, such as the 'u' in a u"" 1460 token). */ 1461 size_t literal_length = finish.column - start.column + 1; 1462 1463 /* Ensure that we don't crash if we got the wrong location. */ 1464 if (start.column < 1) 1465 return "zero start column"; 1466 if (line.length () < (start.column - 1 + literal_length)) 1467 return "line is not wide enough"; 1468 1469 char_span literal = line.subspan (start.column - 1, literal_length); 1470 1471 cpp_string from; 1472 from.len = literal_length; 1473 /* Make a copy of the literal, to avoid having to rely on 1474 the lifetime of the copy of the line within the cache. 1475 This will be released by the auto_cpp_string_vec dtor. */ 1476 from.text = (unsigned char *)literal.xstrdup (); 1477 strs.safe_push (from); 1478 1479 /* For very long lines, a new linemap could have started 1480 halfway through the token. 1481 Ensure that the loc_reader uses the linemap of the 1482 *end* of the token for its start location. */ 1483 const line_map_ordinary *start_ord_map; 1484 linemap_resolve_location (line_table, src_range.m_start, 1485 LRK_SPELLING_LOCATION, &start_ord_map); 1486 const line_map_ordinary *final_ord_map; 1487 linemap_resolve_location (line_table, src_range.m_finish, 1488 LRK_SPELLING_LOCATION, &final_ord_map); 1489 if (start_ord_map == NULL || final_ord_map == NULL) 1490 return "failed to get ordinary maps"; 1491 /* Bulletproofing. We ought to only have different ordinary maps 1492 for start vs finish due to line-length jumps. */ 1493 if (start_ord_map != final_ord_map 1494 && start_ord_map->to_file != final_ord_map->to_file) 1495 return "start and finish are spelled in different ordinary maps"; 1496 /* The file from linemap_resolve_location ought to match that from 1497 expand_location_to_spelling_point. */ 1498 if (start_ord_map->to_file != start.file) 1499 return "mismatching file after resolving linemap"; 1500 1501 location_t start_loc 1502 = linemap_position_for_line_and_column (line_table, final_ord_map, 1503 start.line, start.column); 1504 1505 cpp_string_location_reader loc_reader (start_loc, line_table); 1506 loc_readers.safe_push (loc_reader); 1507 } 1508 1509 /* Rerun cpp_interpret_string, or rather, a modified version of it. */ 1510 const char *err = cpp_interpret_string_ranges (pfile, strs.address (), 1511 loc_readers.address (), 1512 num_locs, &ranges, type); 1513 if (err) 1514 return err; 1515 1516 /* Success: "ranges" should now contain information on the string. */ 1517 return NULL; 1518 } 1519 1520 /* Attempt to populate *OUT_LOC with source location information on the 1521 given characters within the string literal found at STRLOC. 1522 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution 1523 character set. 1524 1525 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7 1526 and string literal "012345\n789" 1527 *OUT_LOC is written to with: 1528 "012345\n789" 1529 ~^~~~~ 1530 1531 If CONCATS is non-NULL, then any string literals that the token at 1532 STRLOC was concatenated with are also considered. 1533 1534 This is implemented by re-parsing the relevant source line(s). 1535 1536 Return NULL if successful, or an error message if any errors occurred. 1537 Error messages are intended for GCC developers (to help debugging) rather 1538 than for end-users. */ 1539 1540 const char * 1541 get_location_within_string (cpp_reader *pfile, 1542 string_concat_db *concats, 1543 location_t strloc, 1544 enum cpp_ttype type, 1545 int caret_idx, int start_idx, int end_idx, 1546 location_t *out_loc) 1547 { 1548 gcc_checking_assert (caret_idx >= 0); 1549 gcc_checking_assert (start_idx >= 0); 1550 gcc_checking_assert (end_idx >= 0); 1551 gcc_assert (out_loc); 1552 1553 cpp_substring_ranges ranges; 1554 const char *err 1555 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges); 1556 if (err) 1557 return err; 1558 1559 if (caret_idx >= ranges.get_num_ranges ()) 1560 return "caret_idx out of range"; 1561 if (start_idx >= ranges.get_num_ranges ()) 1562 return "start_idx out of range"; 1563 if (end_idx >= ranges.get_num_ranges ()) 1564 return "end_idx out of range"; 1565 1566 *out_loc = make_location (ranges.get_range (caret_idx).m_start, 1567 ranges.get_range (start_idx).m_start, 1568 ranges.get_range (end_idx).m_finish); 1569 return NULL; 1570 } 1571 1572 #if CHECKING_P 1573 1574 namespace selftest { 1575 1576 /* Selftests of location handling. */ 1577 1578 /* Attempt to populate *OUT_RANGE with source location information on the 1579 given character within the string literal found at STRLOC. 1580 CHAR_IDX refers to an offset within the execution character set. 1581 If CONCATS is non-NULL, then any string literals that the token at 1582 STRLOC was concatenated with are also considered. 1583 1584 This is implemented by re-parsing the relevant source line(s). 1585 1586 Return NULL if successful, or an error message if any errors occurred. 1587 Error messages are intended for GCC developers (to help debugging) rather 1588 than for end-users. */ 1589 1590 static const char * 1591 get_source_range_for_char (cpp_reader *pfile, 1592 string_concat_db *concats, 1593 location_t strloc, 1594 enum cpp_ttype type, 1595 int char_idx, 1596 source_range *out_range) 1597 { 1598 gcc_checking_assert (char_idx >= 0); 1599 gcc_assert (out_range); 1600 1601 cpp_substring_ranges ranges; 1602 const char *err 1603 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges); 1604 if (err) 1605 return err; 1606 1607 if (char_idx >= ranges.get_num_ranges ()) 1608 return "char_idx out of range"; 1609 1610 *out_range = ranges.get_range (char_idx); 1611 return NULL; 1612 } 1613 1614 /* As get_source_range_for_char, but write to *OUT the number 1615 of ranges that are available. */ 1616 1617 static const char * 1618 get_num_source_ranges_for_substring (cpp_reader *pfile, 1619 string_concat_db *concats, 1620 location_t strloc, 1621 enum cpp_ttype type, 1622 int *out) 1623 { 1624 gcc_assert (out); 1625 1626 cpp_substring_ranges ranges; 1627 const char *err 1628 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges); 1629 1630 if (err) 1631 return err; 1632 1633 *out = ranges.get_num_ranges (); 1634 return NULL; 1635 } 1636 1637 /* Selftests of location handling. */ 1638 1639 /* Verify that compare() on linenum_type handles comparisons over the full 1640 range of the type. */ 1641 1642 static void 1643 test_linenum_comparisons () 1644 { 1645 linenum_type min_line (0); 1646 linenum_type max_line (0xffffffff); 1647 ASSERT_EQ (0, compare (min_line, min_line)); 1648 ASSERT_EQ (0, compare (max_line, max_line)); 1649 1650 ASSERT_GT (compare (max_line, min_line), 0); 1651 ASSERT_LT (compare (min_line, max_line), 0); 1652 } 1653 1654 /* Helper function for verifying location data: when location_t 1655 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated 1656 as having column 0. */ 1657 1658 static bool 1659 should_have_column_data_p (location_t loc) 1660 { 1661 if (IS_ADHOC_LOC (loc)) 1662 loc = get_location_from_adhoc_loc (line_table, loc); 1663 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS) 1664 return false; 1665 return true; 1666 } 1667 1668 /* Selftest for should_have_column_data_p. */ 1669 1670 static void 1671 test_should_have_column_data_p () 1672 { 1673 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT)); 1674 ASSERT_TRUE 1675 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS)); 1676 ASSERT_FALSE 1677 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1)); 1678 } 1679 1680 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN 1681 on LOC. */ 1682 1683 static void 1684 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum, 1685 location_t loc) 1686 { 1687 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc)); 1688 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc)); 1689 /* If location_t values are sufficiently high, then column numbers 1690 will be unavailable and LOCATION_COLUMN (loc) will be 0. 1691 When close to the threshold, column numbers *may* be present: if 1692 the final linemap before the threshold contains a line that straddles 1693 the threshold, locations in that line have column information. */ 1694 if (should_have_column_data_p (loc)) 1695 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc)); 1696 } 1697 1698 /* Various selftests involve constructing a line table and one or more 1699 line maps within it. 1700 1701 For maximum test coverage we want to run these tests with a variety 1702 of situations: 1703 - line_table->default_range_bits: some frontends use a non-zero value 1704 and others use zero 1705 - the fallback modes within line-map.c: there are various threshold 1706 values for location_t beyond line-map.c changes 1707 behavior (disabling of the range-packing optimization, disabling 1708 of column-tracking). We can exercise these by starting the line_table 1709 at interesting values at or near these thresholds. 1710 1711 The following struct describes a particular case within our test 1712 matrix. */ 1713 1714 class line_table_case 1715 { 1716 public: 1717 line_table_case (int default_range_bits, int base_location) 1718 : m_default_range_bits (default_range_bits), 1719 m_base_location (base_location) 1720 {} 1721 1722 int m_default_range_bits; 1723 int m_base_location; 1724 }; 1725 1726 /* Constructor. Store the old value of line_table, and create a new 1727 one, using sane defaults. */ 1728 1729 line_table_test::line_table_test () 1730 { 1731 gcc_assert (saved_line_table == NULL); 1732 saved_line_table = line_table; 1733 line_table = ggc_alloc<line_maps> (); 1734 linemap_init (line_table, BUILTINS_LOCATION); 1735 gcc_assert (saved_line_table->reallocator); 1736 line_table->reallocator = saved_line_table->reallocator; 1737 gcc_assert (saved_line_table->round_alloc_size); 1738 line_table->round_alloc_size = saved_line_table->round_alloc_size; 1739 line_table->default_range_bits = 0; 1740 } 1741 1742 /* Constructor. Store the old value of line_table, and create a new 1743 one, using the sitation described in CASE_. */ 1744 1745 line_table_test::line_table_test (const line_table_case &case_) 1746 { 1747 gcc_assert (saved_line_table == NULL); 1748 saved_line_table = line_table; 1749 line_table = ggc_alloc<line_maps> (); 1750 linemap_init (line_table, BUILTINS_LOCATION); 1751 gcc_assert (saved_line_table->reallocator); 1752 line_table->reallocator = saved_line_table->reallocator; 1753 gcc_assert (saved_line_table->round_alloc_size); 1754 line_table->round_alloc_size = saved_line_table->round_alloc_size; 1755 line_table->default_range_bits = case_.m_default_range_bits; 1756 if (case_.m_base_location) 1757 { 1758 line_table->highest_location = case_.m_base_location; 1759 line_table->highest_line = case_.m_base_location; 1760 } 1761 } 1762 1763 /* Destructor. Restore the old value of line_table. */ 1764 1765 line_table_test::~line_table_test () 1766 { 1767 gcc_assert (saved_line_table != NULL); 1768 line_table = saved_line_table; 1769 saved_line_table = NULL; 1770 } 1771 1772 /* Verify basic operation of ordinary linemaps. */ 1773 1774 static void 1775 test_accessing_ordinary_linemaps (const line_table_case &case_) 1776 { 1777 line_table_test ltt (case_); 1778 1779 /* Build a simple linemap describing some locations. */ 1780 linemap_add (line_table, LC_ENTER, false, "foo.c", 0); 1781 1782 linemap_line_start (line_table, 1, 100); 1783 location_t loc_a = linemap_position_for_column (line_table, 1); 1784 location_t loc_b = linemap_position_for_column (line_table, 23); 1785 1786 linemap_line_start (line_table, 2, 100); 1787 location_t loc_c = linemap_position_for_column (line_table, 1); 1788 location_t loc_d = linemap_position_for_column (line_table, 17); 1789 1790 /* Example of a very long line. */ 1791 linemap_line_start (line_table, 3, 2000); 1792 location_t loc_e = linemap_position_for_column (line_table, 700); 1793 1794 /* Transitioning back to a short line. */ 1795 linemap_line_start (line_table, 4, 0); 1796 location_t loc_back_to_short = linemap_position_for_column (line_table, 100); 1797 1798 if (should_have_column_data_p (loc_back_to_short)) 1799 { 1800 /* Verify that we switched to short lines in the linemap. */ 1801 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table); 1802 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits); 1803 } 1804 1805 /* Example of a line that will eventually be seen to be longer 1806 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is 1807 below that. */ 1808 linemap_line_start (line_table, 5, 2000); 1809 1810 location_t loc_start_of_very_long_line 1811 = linemap_position_for_column (line_table, 2000); 1812 location_t loc_too_wide 1813 = linemap_position_for_column (line_table, 4097); 1814 location_t loc_too_wide_2 1815 = linemap_position_for_column (line_table, 4098); 1816 1817 /* ...and back to a sane line length. */ 1818 linemap_line_start (line_table, 6, 100); 1819 location_t loc_sane_again = linemap_position_for_column (line_table, 10); 1820 1821 linemap_add (line_table, LC_LEAVE, false, NULL, 0); 1822 1823 /* Multiple files. */ 1824 linemap_add (line_table, LC_ENTER, false, "bar.c", 0); 1825 linemap_line_start (line_table, 1, 200); 1826 location_t loc_f = linemap_position_for_column (line_table, 150); 1827 linemap_add (line_table, LC_LEAVE, false, NULL, 0); 1828 1829 /* Verify that we can recover the location info. */ 1830 assert_loceq ("foo.c", 1, 1, loc_a); 1831 assert_loceq ("foo.c", 1, 23, loc_b); 1832 assert_loceq ("foo.c", 2, 1, loc_c); 1833 assert_loceq ("foo.c", 2, 17, loc_d); 1834 assert_loceq ("foo.c", 3, 700, loc_e); 1835 assert_loceq ("foo.c", 4, 100, loc_back_to_short); 1836 1837 /* In the very wide line, the initial location should be fully tracked. */ 1838 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line); 1839 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should 1840 be disabled. */ 1841 assert_loceq ("foo.c", 5, 0, loc_too_wide); 1842 assert_loceq ("foo.c", 5, 0, loc_too_wide_2); 1843 /*...and column-tracking should be re-enabled for subsequent lines. */ 1844 assert_loceq ("foo.c", 6, 10, loc_sane_again); 1845 1846 assert_loceq ("bar.c", 1, 150, loc_f); 1847 1848 ASSERT_FALSE (is_location_from_builtin_token (loc_a)); 1849 ASSERT_TRUE (pure_location_p (line_table, loc_a)); 1850 1851 /* Verify using make_location to build a range, and extracting data 1852 back from it. */ 1853 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d); 1854 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d)); 1855 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d)); 1856 source_range src_range = get_range_from_loc (line_table, range_c_b_d); 1857 ASSERT_EQ (loc_b, src_range.m_start); 1858 ASSERT_EQ (loc_d, src_range.m_finish); 1859 } 1860 1861 /* Verify various properties of UNKNOWN_LOCATION. */ 1862 1863 static void 1864 test_unknown_location () 1865 { 1866 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION)); 1867 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION)); 1868 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION)); 1869 } 1870 1871 /* Verify various properties of BUILTINS_LOCATION. */ 1872 1873 static void 1874 test_builtins () 1875 { 1876 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION); 1877 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION); 1878 } 1879 1880 /* Regression test for make_location. 1881 Ensure that we use pure locations for the start/finish of the range, 1882 rather than storing a packed or ad-hoc range as the start/finish. */ 1883 1884 static void 1885 test_make_location_nonpure_range_endpoints (const line_table_case &case_) 1886 { 1887 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c 1888 with C++ frontend. 1889 ....................0000000001111111111222. 1890 ....................1234567890123456789012. */ 1891 const char *content = " r += !aaa == bbb;\n"; 1892 temp_source_file tmp (SELFTEST_LOCATION, ".C", content); 1893 line_table_test ltt (case_); 1894 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1); 1895 1896 const location_t c11 = linemap_position_for_column (line_table, 11); 1897 const location_t c12 = linemap_position_for_column (line_table, 12); 1898 const location_t c13 = linemap_position_for_column (line_table, 13); 1899 const location_t c14 = linemap_position_for_column (line_table, 14); 1900 const location_t c21 = linemap_position_for_column (line_table, 21); 1901 1902 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS) 1903 return; 1904 1905 /* Use column 13 for the caret location, arbitrarily, to verify that we 1906 handle start != caret. */ 1907 const location_t aaa = make_location (c13, c12, c14); 1908 ASSERT_EQ (c13, get_pure_location (aaa)); 1909 ASSERT_EQ (c12, get_start (aaa)); 1910 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa))); 1911 ASSERT_EQ (c14, get_finish (aaa)); 1912 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa))); 1913 1914 /* Make a location using a location with a range as the start-point. */ 1915 const location_t not_aaa = make_location (c11, aaa, c14); 1916 ASSERT_EQ (c11, get_pure_location (not_aaa)); 1917 /* It should use the start location of the range, not store the range 1918 itself. */ 1919 ASSERT_EQ (c12, get_start (not_aaa)); 1920 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa))); 1921 ASSERT_EQ (c14, get_finish (not_aaa)); 1922 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa))); 1923 1924 /* Similarly, make a location with a range as the end-point. */ 1925 const location_t aaa_eq_bbb = make_location (c12, c12, c21); 1926 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb)); 1927 ASSERT_EQ (c12, get_start (aaa_eq_bbb)); 1928 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb))); 1929 ASSERT_EQ (c21, get_finish (aaa_eq_bbb)); 1930 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb))); 1931 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb); 1932 /* It should use the finish location of the range, not store the range 1933 itself. */ 1934 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb)); 1935 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb)); 1936 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb))); 1937 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb)); 1938 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb))); 1939 } 1940 1941 /* Verify reading of input files (e.g. for caret-based diagnostics). */ 1942 1943 static void 1944 test_reading_source_line () 1945 { 1946 /* Create a tempfile and write some text to it. */ 1947 temp_source_file tmp (SELFTEST_LOCATION, ".txt", 1948 "01234567890123456789\n" 1949 "This is the test text\n" 1950 "This is the 3rd line"); 1951 1952 /* Read back a specific line from the tempfile. */ 1953 char_span source_line = location_get_source_line (tmp.get_filename (), 3); 1954 ASSERT_TRUE (source_line); 1955 ASSERT_TRUE (source_line.get_buffer () != NULL); 1956 ASSERT_EQ (20, source_line.length ()); 1957 ASSERT_TRUE (!strncmp ("This is the 3rd line", 1958 source_line.get_buffer (), source_line.length ())); 1959 1960 source_line = location_get_source_line (tmp.get_filename (), 2); 1961 ASSERT_TRUE (source_line); 1962 ASSERT_TRUE (source_line.get_buffer () != NULL); 1963 ASSERT_EQ (21, source_line.length ()); 1964 ASSERT_TRUE (!strncmp ("This is the test text", 1965 source_line.get_buffer (), source_line.length ())); 1966 1967 source_line = location_get_source_line (tmp.get_filename (), 4); 1968 ASSERT_FALSE (source_line); 1969 ASSERT_TRUE (source_line.get_buffer () == NULL); 1970 } 1971 1972 /* Tests of lexing. */ 1973 1974 /* Verify that token TOK from PARSER has cpp_token_as_text 1975 equal to EXPECTED_TEXT. */ 1976 1977 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \ 1978 SELFTEST_BEGIN_STMT \ 1979 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \ 1980 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \ 1981 SELFTEST_END_STMT 1982 1983 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM, 1984 and ranges from EXP_START_COL to EXP_FINISH_COL. 1985 Use LOC as the effective location of the selftest. */ 1986 1987 static void 1988 assert_token_loc_eq (const location &loc, 1989 const cpp_token *tok, 1990 const char *exp_filename, int exp_linenum, 1991 int exp_start_col, int exp_finish_col) 1992 { 1993 location_t tok_loc = tok->src_loc; 1994 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc)); 1995 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc)); 1996 1997 /* If location_t values are sufficiently high, then column numbers 1998 will be unavailable. */ 1999 if (!should_have_column_data_p (tok_loc)) 2000 return; 2001 2002 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc)); 2003 source_range tok_range = get_range_from_loc (line_table, tok_loc); 2004 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start)); 2005 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish)); 2006 } 2007 2008 /* Use assert_token_loc_eq to verify the TOK->src_loc, using 2009 SELFTEST_LOCATION as the effective location of the selftest. */ 2010 2011 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \ 2012 EXP_START_COL, EXP_FINISH_COL) \ 2013 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \ 2014 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL)) 2015 2016 /* Test of lexing a file using libcpp, verifying tokens and their 2017 location information. */ 2018 2019 static void 2020 test_lexer (const line_table_case &case_) 2021 { 2022 /* Create a tempfile and write some text to it. */ 2023 const char *content = 2024 /*00000000011111111112222222222333333.3333444444444.455555555556 2025 12345678901234567890123456789012345.6789012345678.901234567890. */ 2026 ("test_name /* c-style comment */\n" 2027 " \"test literal\"\n" 2028 " // test c++-style comment\n" 2029 " 42\n"); 2030 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content); 2031 2032 line_table_test ltt (case_); 2033 2034 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table); 2035 2036 const char *fname = cpp_read_main_file (parser, tmp.get_filename ()); 2037 ASSERT_NE (fname, NULL); 2038 2039 /* Verify that we get the expected tokens back, with the correct 2040 location information. */ 2041 2042 location_t loc; 2043 const cpp_token *tok; 2044 tok = cpp_get_token_with_location (parser, &loc); 2045 ASSERT_NE (tok, NULL); 2046 ASSERT_EQ (tok->type, CPP_NAME); 2047 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name"); 2048 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9); 2049 2050 tok = cpp_get_token_with_location (parser, &loc); 2051 ASSERT_NE (tok, NULL); 2052 ASSERT_EQ (tok->type, CPP_STRING); 2053 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\""); 2054 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48); 2055 2056 tok = cpp_get_token_with_location (parser, &loc); 2057 ASSERT_NE (tok, NULL); 2058 ASSERT_EQ (tok->type, CPP_NUMBER); 2059 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42"); 2060 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5); 2061 2062 tok = cpp_get_token_with_location (parser, &loc); 2063 ASSERT_NE (tok, NULL); 2064 ASSERT_EQ (tok->type, CPP_EOF); 2065 2066 cpp_finish (parser, NULL); 2067 cpp_destroy (parser); 2068 } 2069 2070 /* Forward decls. */ 2071 2072 class lexer_test; 2073 class lexer_test_options; 2074 2075 /* A class for specifying options of a lexer_test. 2076 The "apply" vfunc is called during the lexer_test constructor. */ 2077 2078 class lexer_test_options 2079 { 2080 public: 2081 virtual void apply (lexer_test &) = 0; 2082 }; 2083 2084 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy 2085 in its dtor. 2086 2087 This is needed by struct lexer_test to ensure that the cleanup of the 2088 cpp_reader happens *after* the cleanup of the temp_source_file. */ 2089 2090 class cpp_reader_ptr 2091 { 2092 public: 2093 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {} 2094 2095 ~cpp_reader_ptr () 2096 { 2097 cpp_finish (m_ptr, NULL); 2098 cpp_destroy (m_ptr); 2099 } 2100 2101 operator cpp_reader * () const { return m_ptr; } 2102 2103 private: 2104 cpp_reader *m_ptr; 2105 }; 2106 2107 /* A struct for writing lexer tests. */ 2108 2109 class lexer_test 2110 { 2111 public: 2112 lexer_test (const line_table_case &case_, const char *content, 2113 lexer_test_options *options); 2114 ~lexer_test (); 2115 2116 const cpp_token *get_token (); 2117 2118 /* The ordering of these fields matters. 2119 The line_table_test must be first, since the cpp_reader_ptr 2120 uses it. 2121 The cpp_reader must be cleaned up *after* the temp_source_file 2122 since the filenames in input.c's input cache are owned by the 2123 cpp_reader; in particular, when ~temp_source_file evicts the 2124 filename the filenames must still be alive. */ 2125 line_table_test m_ltt; 2126 cpp_reader_ptr m_parser; 2127 temp_source_file m_tempfile; 2128 string_concat_db m_concats; 2129 bool m_implicitly_expect_EOF; 2130 }; 2131 2132 /* Use an EBCDIC encoding for the execution charset, specifically 2133 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047"). 2134 2135 This exercises iconv integration within libcpp. 2136 Not every build of iconv supports the given charset, 2137 so we need to flag this error and handle it gracefully. */ 2138 2139 class ebcdic_execution_charset : public lexer_test_options 2140 { 2141 public: 2142 ebcdic_execution_charset () : m_num_iconv_errors (0) 2143 { 2144 gcc_assert (s_singleton == NULL); 2145 s_singleton = this; 2146 } 2147 ~ebcdic_execution_charset () 2148 { 2149 gcc_assert (s_singleton == this); 2150 s_singleton = NULL; 2151 } 2152 2153 void apply (lexer_test &test) FINAL OVERRIDE 2154 { 2155 cpp_options *cpp_opts = cpp_get_options (test.m_parser); 2156 cpp_opts->narrow_charset = "IBM1047"; 2157 2158 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser); 2159 callbacks->diagnostic = on_diagnostic; 2160 } 2161 2162 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED, 2163 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED, 2164 enum cpp_warning_reason reason ATTRIBUTE_UNUSED, 2165 rich_location *richloc ATTRIBUTE_UNUSED, 2166 const char *msgid, va_list *ap ATTRIBUTE_UNUSED) 2167 ATTRIBUTE_FPTR_PRINTF(5,0) 2168 { 2169 gcc_assert (s_singleton); 2170 /* Avoid exgettext from picking this up, it is translated in libcpp. */ 2171 const char *msg = "conversion from %s to %s not supported by iconv"; 2172 #ifdef ENABLE_NLS 2173 msg = dgettext ("cpplib", msg); 2174 #endif 2175 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc 2176 when the local iconv build doesn't support the conversion. */ 2177 if (strcmp (msgid, msg) == 0) 2178 { 2179 s_singleton->m_num_iconv_errors++; 2180 return true; 2181 } 2182 2183 /* Otherwise, we have an unexpected error. */ 2184 abort (); 2185 } 2186 2187 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; } 2188 2189 private: 2190 static ebcdic_execution_charset *s_singleton; 2191 int m_num_iconv_errors; 2192 }; 2193 2194 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton; 2195 2196 /* A lexer_test_options subclass that records a list of diagnostic 2197 messages emitted by the lexer. */ 2198 2199 class lexer_diagnostic_sink : public lexer_test_options 2200 { 2201 public: 2202 lexer_diagnostic_sink () 2203 { 2204 gcc_assert (s_singleton == NULL); 2205 s_singleton = this; 2206 } 2207 ~lexer_diagnostic_sink () 2208 { 2209 gcc_assert (s_singleton == this); 2210 s_singleton = NULL; 2211 2212 int i; 2213 char *str; 2214 FOR_EACH_VEC_ELT (m_diagnostics, i, str) 2215 free (str); 2216 } 2217 2218 void apply (lexer_test &test) FINAL OVERRIDE 2219 { 2220 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser); 2221 callbacks->diagnostic = on_diagnostic; 2222 } 2223 2224 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED, 2225 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED, 2226 enum cpp_warning_reason reason ATTRIBUTE_UNUSED, 2227 rich_location *richloc ATTRIBUTE_UNUSED, 2228 const char *msgid, va_list *ap) 2229 ATTRIBUTE_FPTR_PRINTF(5,0) 2230 { 2231 char *msg = xvasprintf (msgid, *ap); 2232 s_singleton->m_diagnostics.safe_push (msg); 2233 return true; 2234 } 2235 2236 auto_vec<char *> m_diagnostics; 2237 2238 private: 2239 static lexer_diagnostic_sink *s_singleton; 2240 }; 2241 2242 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton; 2243 2244 /* Constructor. Override line_table with a new instance based on CASE_, 2245 and write CONTENT to a tempfile. Create a cpp_reader, and use it to 2246 start parsing the tempfile. */ 2247 2248 lexer_test::lexer_test (const line_table_case &case_, const char *content, 2249 lexer_test_options *options) 2250 : m_ltt (case_), 2251 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)), 2252 /* Create a tempfile and write the text to it. */ 2253 m_tempfile (SELFTEST_LOCATION, ".c", content), 2254 m_concats (), 2255 m_implicitly_expect_EOF (true) 2256 { 2257 if (options) 2258 options->apply (*this); 2259 2260 cpp_init_iconv (m_parser); 2261 2262 /* Parse the file. */ 2263 const char *fname = cpp_read_main_file (m_parser, 2264 m_tempfile.get_filename ()); 2265 ASSERT_NE (fname, NULL); 2266 } 2267 2268 /* Destructor. By default, verify that the next token in m_parser is EOF. */ 2269 2270 lexer_test::~lexer_test () 2271 { 2272 location_t loc; 2273 const cpp_token *tok; 2274 2275 if (m_implicitly_expect_EOF) 2276 { 2277 tok = cpp_get_token_with_location (m_parser, &loc); 2278 ASSERT_NE (tok, NULL); 2279 ASSERT_EQ (tok->type, CPP_EOF); 2280 } 2281 } 2282 2283 /* Get the next token from m_parser. */ 2284 2285 const cpp_token * 2286 lexer_test::get_token () 2287 { 2288 location_t loc; 2289 const cpp_token *tok; 2290 2291 tok = cpp_get_token_with_location (m_parser, &loc); 2292 ASSERT_NE (tok, NULL); 2293 return tok; 2294 } 2295 2296 /* Verify that locations within string literals are correctly handled. */ 2297 2298 /* Verify get_source_range_for_substring for token(s) at STRLOC, 2299 using the string concatenation database for TEST. 2300 2301 Assert that the character at index IDX is on EXPECTED_LINE, 2302 and that it begins at column EXPECTED_START_COL and ends at 2303 EXPECTED_FINISH_COL (unless the locations are beyond 2304 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their 2305 columns). */ 2306 2307 static void 2308 assert_char_at_range (const location &loc, 2309 lexer_test& test, 2310 location_t strloc, enum cpp_ttype type, int idx, 2311 int expected_line, int expected_start_col, 2312 int expected_finish_col) 2313 { 2314 cpp_reader *pfile = test.m_parser; 2315 string_concat_db *concats = &test.m_concats; 2316 2317 source_range actual_range = source_range(); 2318 const char *err 2319 = get_source_range_for_char (pfile, concats, strloc, type, idx, 2320 &actual_range); 2321 if (should_have_column_data_p (strloc)) 2322 ASSERT_EQ_AT (loc, NULL, err); 2323 else 2324 { 2325 ASSERT_STREQ_AT (loc, 2326 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", 2327 err); 2328 return; 2329 } 2330 2331 int actual_start_line = LOCATION_LINE (actual_range.m_start); 2332 ASSERT_EQ_AT (loc, expected_line, actual_start_line); 2333 int actual_finish_line = LOCATION_LINE (actual_range.m_finish); 2334 ASSERT_EQ_AT (loc, expected_line, actual_finish_line); 2335 2336 if (should_have_column_data_p (actual_range.m_start)) 2337 { 2338 int actual_start_col = LOCATION_COLUMN (actual_range.m_start); 2339 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col); 2340 } 2341 if (should_have_column_data_p (actual_range.m_finish)) 2342 { 2343 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish); 2344 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col); 2345 } 2346 } 2347 2348 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for 2349 the effective location of any errors. */ 2350 2351 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \ 2352 EXPECTED_START_COL, EXPECTED_FINISH_COL) \ 2353 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \ 2354 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \ 2355 (EXPECTED_FINISH_COL)) 2356 2357 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC, 2358 using the string concatenation database for TEST. 2359 2360 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */ 2361 2362 static void 2363 assert_num_substring_ranges (const location &loc, 2364 lexer_test& test, 2365 location_t strloc, 2366 enum cpp_ttype type, 2367 int expected_num_ranges) 2368 { 2369 cpp_reader *pfile = test.m_parser; 2370 string_concat_db *concats = &test.m_concats; 2371 2372 int actual_num_ranges = -1; 2373 const char *err 2374 = get_num_source_ranges_for_substring (pfile, concats, strloc, type, 2375 &actual_num_ranges); 2376 if (should_have_column_data_p (strloc)) 2377 ASSERT_EQ_AT (loc, NULL, err); 2378 else 2379 { 2380 ASSERT_STREQ_AT (loc, 2381 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", 2382 err); 2383 return; 2384 } 2385 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges); 2386 } 2387 2388 /* Macro for calling assert_num_substring_ranges, supplying 2389 SELFTEST_LOCATION for the effective location of any errors. */ 2390 2391 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \ 2392 EXPECTED_NUM_RANGES) \ 2393 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \ 2394 (TYPE), (EXPECTED_NUM_RANGES)) 2395 2396 2397 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC 2398 returns an error (using the string concatenation database for TEST). */ 2399 2400 static void 2401 assert_has_no_substring_ranges (const location &loc, 2402 lexer_test& test, 2403 location_t strloc, 2404 enum cpp_ttype type, 2405 const char *expected_err) 2406 { 2407 cpp_reader *pfile = test.m_parser; 2408 string_concat_db *concats = &test.m_concats; 2409 cpp_substring_ranges ranges; 2410 const char *actual_err 2411 = get_substring_ranges_for_loc (pfile, concats, strloc, 2412 type, ranges); 2413 if (should_have_column_data_p (strloc)) 2414 ASSERT_STREQ_AT (loc, expected_err, actual_err); 2415 else 2416 ASSERT_STREQ_AT (loc, 2417 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", 2418 actual_err); 2419 } 2420 2421 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \ 2422 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \ 2423 (STRLOC), (TYPE), (ERR)) 2424 2425 /* Lex a simple string literal. Verify the substring location data, before 2426 and after running cpp_interpret_string on it. */ 2427 2428 static void 2429 test_lexer_string_locations_simple (const line_table_case &case_) 2430 { 2431 /* Digits 0-9 (with 0 at column 10), the simple way. 2432 ....................000000000.11111111112.2222222223333333333 2433 ....................123456789.01234567890.1234567890123456789 2434 We add a trailing comment to ensure that we correctly locate 2435 the end of the string literal token. */ 2436 const char *content = " \"0123456789\" /* not a string */\n"; 2437 lexer_test test (case_, content, NULL); 2438 2439 /* Verify that we get the expected token back, with the correct 2440 location information. */ 2441 const cpp_token *tok = test.get_token (); 2442 ASSERT_EQ (tok->type, CPP_STRING); 2443 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\""); 2444 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20); 2445 2446 /* At this point in lexing, the quote characters are treated as part of 2447 the string (they are stripped off by cpp_interpret_string). */ 2448 2449 ASSERT_EQ (tok->val.str.len, 12); 2450 2451 /* Verify that cpp_interpret_string works. */ 2452 cpp_string dst_string; 2453 const enum cpp_ttype type = CPP_STRING; 2454 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, 2455 &dst_string, type); 2456 ASSERT_TRUE (result); 2457 ASSERT_STREQ ("0123456789", (const char *)dst_string.text); 2458 free (const_cast <unsigned char *> (dst_string.text)); 2459 2460 /* Verify ranges of individual characters. This no longer includes the 2461 opening quote, but does include the closing quote. */ 2462 for (int i = 0; i <= 10; i++) 2463 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 2464 10 + i, 10 + i); 2465 2466 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11); 2467 } 2468 2469 /* As test_lexer_string_locations_simple, but use an EBCDIC execution 2470 encoding. */ 2471 2472 static void 2473 test_lexer_string_locations_ebcdic (const line_table_case &case_) 2474 { 2475 /* EBCDIC support requires iconv. */ 2476 if (!HAVE_ICONV) 2477 return; 2478 2479 /* Digits 0-9 (with 0 at column 10), the simple way. 2480 ....................000000000.11111111112.2222222223333333333 2481 ....................123456789.01234567890.1234567890123456789 2482 We add a trailing comment to ensure that we correctly locate 2483 the end of the string literal token. */ 2484 const char *content = " \"0123456789\" /* not a string */\n"; 2485 ebcdic_execution_charset use_ebcdic; 2486 lexer_test test (case_, content, &use_ebcdic); 2487 2488 /* Verify that we get the expected token back, with the correct 2489 location information. */ 2490 const cpp_token *tok = test.get_token (); 2491 ASSERT_EQ (tok->type, CPP_STRING); 2492 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\""); 2493 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20); 2494 2495 /* At this point in lexing, the quote characters are treated as part of 2496 the string (they are stripped off by cpp_interpret_string). */ 2497 2498 ASSERT_EQ (tok->val.str.len, 12); 2499 2500 /* The remainder of the test requires an iconv implementation that 2501 can convert from UTF-8 to the EBCDIC encoding requested above. */ 2502 if (use_ebcdic.iconv_errors_occurred_p ()) 2503 return; 2504 2505 /* Verify that cpp_interpret_string works. */ 2506 cpp_string dst_string; 2507 const enum cpp_ttype type = CPP_STRING; 2508 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, 2509 &dst_string, type); 2510 ASSERT_TRUE (result); 2511 /* We should now have EBCDIC-encoded text, specifically 2512 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047"). 2513 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */ 2514 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9", 2515 (const char *)dst_string.text); 2516 free (const_cast <unsigned char *> (dst_string.text)); 2517 2518 /* Verify that we don't attempt to record substring location information 2519 for such cases. */ 2520 ASSERT_HAS_NO_SUBSTRING_RANGES 2521 (test, tok->src_loc, type, 2522 "execution character set != source character set"); 2523 } 2524 2525 /* Lex a string literal containing a hex-escaped character. 2526 Verify the substring location data, before and after running 2527 cpp_interpret_string on it. */ 2528 2529 static void 2530 test_lexer_string_locations_hex (const line_table_case &case_) 2531 { 2532 /* Digits 0-9, expressing digit 5 in ASCII as "\x35" 2533 and with a space in place of digit 6, to terminate the escaped 2534 hex code. 2535 ....................000000000.111111.11112222. 2536 ....................123456789.012345.67890123. */ 2537 const char *content = " \"01234\\x35 789\"\n"; 2538 lexer_test test (case_, content, NULL); 2539 2540 /* Verify that we get the expected token back, with the correct 2541 location information. */ 2542 const cpp_token *tok = test.get_token (); 2543 ASSERT_EQ (tok->type, CPP_STRING); 2544 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\""); 2545 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23); 2546 2547 /* At this point in lexing, the quote characters are treated as part of 2548 the string (they are stripped off by cpp_interpret_string). */ 2549 ASSERT_EQ (tok->val.str.len, 15); 2550 2551 /* Verify that cpp_interpret_string works. */ 2552 cpp_string dst_string; 2553 const enum cpp_ttype type = CPP_STRING; 2554 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, 2555 &dst_string, type); 2556 ASSERT_TRUE (result); 2557 ASSERT_STREQ ("012345 789", (const char *)dst_string.text); 2558 free (const_cast <unsigned char *> (dst_string.text)); 2559 2560 /* Verify ranges of individual characters. This no longer includes the 2561 opening quote, but does include the closing quote. */ 2562 for (int i = 0; i <= 4; i++) 2563 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); 2564 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18); 2565 for (int i = 6; i <= 10; i++) 2566 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i); 2567 2568 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11); 2569 } 2570 2571 /* Lex a string literal containing an octal-escaped character. 2572 Verify the substring location data after running cpp_interpret_string 2573 on it. */ 2574 2575 static void 2576 test_lexer_string_locations_oct (const line_table_case &case_) 2577 { 2578 /* Digits 0-9, expressing digit 5 in ASCII as "\065" 2579 and with a space in place of digit 6, to terminate the escaped 2580 octal code. 2581 ....................000000000.111111.11112222.2222223333333333444 2582 ....................123456789.012345.67890123.4567890123456789012 */ 2583 const char *content = " \"01234\\065 789\" /* not a string */\n"; 2584 lexer_test test (case_, content, NULL); 2585 2586 /* Verify that we get the expected token back, with the correct 2587 location information. */ 2588 const cpp_token *tok = test.get_token (); 2589 ASSERT_EQ (tok->type, CPP_STRING); 2590 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\""); 2591 2592 /* Verify that cpp_interpret_string works. */ 2593 cpp_string dst_string; 2594 const enum cpp_ttype type = CPP_STRING; 2595 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, 2596 &dst_string, type); 2597 ASSERT_TRUE (result); 2598 ASSERT_STREQ ("012345 789", (const char *)dst_string.text); 2599 free (const_cast <unsigned char *> (dst_string.text)); 2600 2601 /* Verify ranges of individual characters. This no longer includes the 2602 opening quote, but does include the closing quote. */ 2603 for (int i = 0; i < 5; i++) 2604 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); 2605 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18); 2606 for (int i = 6; i <= 10; i++) 2607 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i); 2608 2609 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11); 2610 } 2611 2612 /* Test of string literal containing letter escapes. */ 2613 2614 static void 2615 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_) 2616 { 2617 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar. 2618 .....................000000000.1.11111.1.1.11222.22222223333333 2619 .....................123456789.0.12345.6.7.89012.34567890123456. */ 2620 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n"); 2621 lexer_test test (case_, content, NULL); 2622 2623 /* Verify that we get the expected tokens back. */ 2624 const cpp_token *tok = test.get_token (); 2625 ASSERT_EQ (tok->type, CPP_STRING); 2626 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\""); 2627 2628 /* Verify ranges of individual characters. */ 2629 /* "\t". */ 2630 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, 2631 0, 1, 10, 11); 2632 /* "foo". */ 2633 for (int i = 1; i <= 3; i++) 2634 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, 2635 i, 1, 11 + i, 11 + i); 2636 /* "\\" and "\n". */ 2637 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, 2638 4, 1, 15, 16); 2639 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, 2640 5, 1, 17, 18); 2641 2642 /* "bar" and closing quote for nul-terminator. */ 2643 for (int i = 6; i <= 9; i++) 2644 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, 2645 i, 1, 13 + i, 13 + i); 2646 2647 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10); 2648 } 2649 2650 /* Another test of a string literal containing a letter escape. 2651 Based on string seen in 2652 printf ("%-%\n"); 2653 in gcc.dg/format/c90-printf-1.c. */ 2654 2655 static void 2656 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_) 2657 { 2658 /* .....................000000000.1111.11.1111.22222222223. 2659 .....................123456789.0123.45.6789.01234567890. */ 2660 const char *content = (" \"%-%\\n\" /* non-str */\n"); 2661 lexer_test test (case_, content, NULL); 2662 2663 /* Verify that we get the expected tokens back. */ 2664 const cpp_token *tok = test.get_token (); 2665 ASSERT_EQ (tok->type, CPP_STRING); 2666 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\""); 2667 2668 /* Verify ranges of individual characters. */ 2669 /* "%-%". */ 2670 for (int i = 0; i < 3; i++) 2671 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, 2672 i, 1, 10 + i, 10 + i); 2673 /* "\n". */ 2674 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, 2675 3, 1, 13, 14); 2676 2677 /* Closing quote for nul-terminator. */ 2678 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, 2679 4, 1, 15, 15); 2680 2681 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5); 2682 } 2683 2684 /* Lex a string literal containing UCN 4 characters. 2685 Verify the substring location data after running cpp_interpret_string 2686 on it. */ 2687 2688 static void 2689 test_lexer_string_locations_ucn4 (const line_table_case &case_) 2690 { 2691 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed 2692 as UCN 4. 2693 ....................000000000.111111.111122.222222223.33333333344444 2694 ....................123456789.012345.678901.234567890.12345678901234 */ 2695 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n"; 2696 lexer_test test (case_, content, NULL); 2697 2698 /* Verify that we get the expected token back, with the correct 2699 location information. */ 2700 const cpp_token *tok = test.get_token (); 2701 ASSERT_EQ (tok->type, CPP_STRING); 2702 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\""); 2703 2704 /* Verify that cpp_interpret_string works. 2705 The string should be encoded in the execution character 2706 set. Assuming that is UTF-8, we should have the following: 2707 ----------- ---- ----- ------- ---------------- 2708 Byte offset Byte Octal Unicode Source Column(s) 2709 ----------- ---- ----- ------- ---------------- 2710 0 0x30 '0' 10 2711 1 0x31 '1' 11 2712 2 0x32 '2' 12 2713 3 0x33 '3' 13 2714 4 0x34 '4' 14 2715 5 0xE2 \342 U+2174 15-20 2716 6 0x85 \205 (cont) 15-20 2717 7 0xB4 \264 (cont) 15-20 2718 8 0xE2 \342 U+2175 21-26 2719 9 0x85 \205 (cont) 21-26 2720 10 0xB5 \265 (cont) 21-26 2721 11 0x37 '7' 27 2722 12 0x38 '8' 28 2723 13 0x39 '9' 29 2724 14 0x00 30 (closing quote) 2725 ----------- ---- ----- ------- ---------------. */ 2726 2727 cpp_string dst_string; 2728 const enum cpp_ttype type = CPP_STRING; 2729 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, 2730 &dst_string, type); 2731 ASSERT_TRUE (result); 2732 ASSERT_STREQ ("01234\342\205\264\342\205\265789", 2733 (const char *)dst_string.text); 2734 free (const_cast <unsigned char *> (dst_string.text)); 2735 2736 /* Verify ranges of individual characters. This no longer includes the 2737 opening quote, but does include the closing quote. 2738 '01234'. */ 2739 for (int i = 0; i <= 4; i++) 2740 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); 2741 /* U+2174. */ 2742 for (int i = 5; i <= 7; i++) 2743 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20); 2744 /* U+2175. */ 2745 for (int i = 8; i <= 10; i++) 2746 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26); 2747 /* '789' and nul terminator */ 2748 for (int i = 11; i <= 14; i++) 2749 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i); 2750 2751 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15); 2752 } 2753 2754 /* Lex a string literal containing UCN 8 characters. 2755 Verify the substring location data after running cpp_interpret_string 2756 on it. */ 2757 2758 static void 2759 test_lexer_string_locations_ucn8 (const line_table_case &case_) 2760 { 2761 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8. 2762 ....................000000000.111111.1111222222.2222333333333.344444 2763 ....................123456789.012345.6789012345.6789012345678.901234 */ 2764 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n"; 2765 lexer_test test (case_, content, NULL); 2766 2767 /* Verify that we get the expected token back, with the correct 2768 location information. */ 2769 const cpp_token *tok = test.get_token (); 2770 ASSERT_EQ (tok->type, CPP_STRING); 2771 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, 2772 "\"01234\\U00002174\\U00002175789\""); 2773 2774 /* Verify that cpp_interpret_string works. 2775 The UTF-8 encoding of the string is identical to that from 2776 the ucn4 testcase above; the only difference is the column 2777 locations. */ 2778 cpp_string dst_string; 2779 const enum cpp_ttype type = CPP_STRING; 2780 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, 2781 &dst_string, type); 2782 ASSERT_TRUE (result); 2783 ASSERT_STREQ ("01234\342\205\264\342\205\265789", 2784 (const char *)dst_string.text); 2785 free (const_cast <unsigned char *> (dst_string.text)); 2786 2787 /* Verify ranges of individual characters. This no longer includes the 2788 opening quote, but does include the closing quote. 2789 '01234'. */ 2790 for (int i = 0; i <= 4; i++) 2791 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); 2792 /* U+2174. */ 2793 for (int i = 5; i <= 7; i++) 2794 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24); 2795 /* U+2175. */ 2796 for (int i = 8; i <= 10; i++) 2797 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34); 2798 /* '789' at columns 35-37 */ 2799 for (int i = 11; i <= 13; i++) 2800 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i); 2801 /* Closing quote/nul-terminator at column 38. */ 2802 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38); 2803 2804 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15); 2805 } 2806 2807 /* Fetch a big-endian 32-bit value and convert to host endianness. */ 2808 2809 static uint32_t 2810 uint32_from_big_endian (const uint32_t *ptr_be_value) 2811 { 2812 const unsigned char *buf = (const unsigned char *)ptr_be_value; 2813 return (((uint32_t) buf[0] << 24) 2814 | ((uint32_t) buf[1] << 16) 2815 | ((uint32_t) buf[2] << 8) 2816 | (uint32_t) buf[3]); 2817 } 2818 2819 /* Lex a wide string literal and verify that attempts to read substring 2820 location data from it fail gracefully. */ 2821 2822 static void 2823 test_lexer_string_locations_wide_string (const line_table_case &case_) 2824 { 2825 /* Digits 0-9. 2826 ....................000000000.11111111112.22222222233333 2827 ....................123456789.01234567890.12345678901234 */ 2828 const char *content = " L\"0123456789\" /* non-str */\n"; 2829 lexer_test test (case_, content, NULL); 2830 2831 /* Verify that we get the expected token back, with the correct 2832 location information. */ 2833 const cpp_token *tok = test.get_token (); 2834 ASSERT_EQ (tok->type, CPP_WSTRING); 2835 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\""); 2836 2837 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */ 2838 cpp_string dst_string; 2839 const enum cpp_ttype type = CPP_WSTRING; 2840 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, 2841 &dst_string, type); 2842 ASSERT_TRUE (result); 2843 /* The cpp_reader defaults to big-endian with 2844 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should 2845 now be encoded as UTF-32BE. */ 2846 const uint32_t *be32_chars = (const uint32_t *)dst_string.text; 2847 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0])); 2848 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5])); 2849 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9])); 2850 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10])); 2851 free (const_cast <unsigned char *> (dst_string.text)); 2852 2853 /* We don't yet support generating substring location information 2854 for L"" strings. */ 2855 ASSERT_HAS_NO_SUBSTRING_RANGES 2856 (test, tok->src_loc, type, 2857 "execution character set != source character set"); 2858 } 2859 2860 /* Fetch a big-endian 16-bit value and convert to host endianness. */ 2861 2862 static uint16_t 2863 uint16_from_big_endian (const uint16_t *ptr_be_value) 2864 { 2865 const unsigned char *buf = (const unsigned char *)ptr_be_value; 2866 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1]; 2867 } 2868 2869 /* Lex a u"" string literal and verify that attempts to read substring 2870 location data from it fail gracefully. */ 2871 2872 static void 2873 test_lexer_string_locations_string16 (const line_table_case &case_) 2874 { 2875 /* Digits 0-9. 2876 ....................000000000.11111111112.22222222233333 2877 ....................123456789.01234567890.12345678901234 */ 2878 const char *content = " u\"0123456789\" /* non-str */\n"; 2879 lexer_test test (case_, content, NULL); 2880 2881 /* Verify that we get the expected token back, with the correct 2882 location information. */ 2883 const cpp_token *tok = test.get_token (); 2884 ASSERT_EQ (tok->type, CPP_STRING16); 2885 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\""); 2886 2887 /* Verify that cpp_interpret_string works, using CPP_STRING16. */ 2888 cpp_string dst_string; 2889 const enum cpp_ttype type = CPP_STRING16; 2890 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, 2891 &dst_string, type); 2892 ASSERT_TRUE (result); 2893 2894 /* The cpp_reader defaults to big-endian, so dst_string should 2895 now be encoded as UTF-16BE. */ 2896 const uint16_t *be16_chars = (const uint16_t *)dst_string.text; 2897 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0])); 2898 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5])); 2899 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9])); 2900 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10])); 2901 free (const_cast <unsigned char *> (dst_string.text)); 2902 2903 /* We don't yet support generating substring location information 2904 for L"" strings. */ 2905 ASSERT_HAS_NO_SUBSTRING_RANGES 2906 (test, tok->src_loc, type, 2907 "execution character set != source character set"); 2908 } 2909 2910 /* Lex a U"" string literal and verify that attempts to read substring 2911 location data from it fail gracefully. */ 2912 2913 static void 2914 test_lexer_string_locations_string32 (const line_table_case &case_) 2915 { 2916 /* Digits 0-9. 2917 ....................000000000.11111111112.22222222233333 2918 ....................123456789.01234567890.12345678901234 */ 2919 const char *content = " U\"0123456789\" /* non-str */\n"; 2920 lexer_test test (case_, content, NULL); 2921 2922 /* Verify that we get the expected token back, with the correct 2923 location information. */ 2924 const cpp_token *tok = test.get_token (); 2925 ASSERT_EQ (tok->type, CPP_STRING32); 2926 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\""); 2927 2928 /* Verify that cpp_interpret_string works, using CPP_STRING32. */ 2929 cpp_string dst_string; 2930 const enum cpp_ttype type = CPP_STRING32; 2931 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, 2932 &dst_string, type); 2933 ASSERT_TRUE (result); 2934 2935 /* The cpp_reader defaults to big-endian, so dst_string should 2936 now be encoded as UTF-32BE. */ 2937 const uint32_t *be32_chars = (const uint32_t *)dst_string.text; 2938 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0])); 2939 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5])); 2940 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9])); 2941 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10])); 2942 free (const_cast <unsigned char *> (dst_string.text)); 2943 2944 /* We don't yet support generating substring location information 2945 for L"" strings. */ 2946 ASSERT_HAS_NO_SUBSTRING_RANGES 2947 (test, tok->src_loc, type, 2948 "execution character set != source character set"); 2949 } 2950 2951 /* Lex a u8-string literal. 2952 Verify the substring location data after running cpp_interpret_string 2953 on it. */ 2954 2955 static void 2956 test_lexer_string_locations_u8 (const line_table_case &case_) 2957 { 2958 /* Digits 0-9. 2959 ....................000000000.11111111112.22222222233333 2960 ....................123456789.01234567890.12345678901234 */ 2961 const char *content = " u8\"0123456789\" /* non-str */\n"; 2962 lexer_test test (case_, content, NULL); 2963 2964 /* Verify that we get the expected token back, with the correct 2965 location information. */ 2966 const cpp_token *tok = test.get_token (); 2967 ASSERT_EQ (tok->type, CPP_UTF8STRING); 2968 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\""); 2969 2970 /* Verify that cpp_interpret_string works. */ 2971 cpp_string dst_string; 2972 const enum cpp_ttype type = CPP_STRING; 2973 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, 2974 &dst_string, type); 2975 ASSERT_TRUE (result); 2976 ASSERT_STREQ ("0123456789", (const char *)dst_string.text); 2977 free (const_cast <unsigned char *> (dst_string.text)); 2978 2979 /* Verify ranges of individual characters. This no longer includes the 2980 opening quote, but does include the closing quote. */ 2981 for (int i = 0; i <= 10; i++) 2982 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); 2983 } 2984 2985 /* Lex a string literal containing UTF-8 source characters. 2986 Verify the substring location data after running cpp_interpret_string 2987 on it. */ 2988 2989 static void 2990 test_lexer_string_locations_utf8_source (const line_table_case &case_) 2991 { 2992 /* This string literal is written out to the source file as UTF-8, 2993 and is of the form "before mojibake after", where "mojibake" 2994 is written as the following four unicode code points: 2995 U+6587 CJK UNIFIED IDEOGRAPH-6587 2996 U+5B57 CJK UNIFIED IDEOGRAPH-5B57 2997 U+5316 CJK UNIFIED IDEOGRAPH-5316 2998 U+3051 HIRAGANA LETTER KE. 2999 Each of these is 3 bytes wide when encoded in UTF-8, whereas the 3000 "before" and "after" are 1 byte per unicode character. 3001 3002 The numbering shown are "columns", which are *byte* numbers within 3003 the line, rather than unicode character numbers. 3004 3005 .................... 000000000.1111111. 3006 .................... 123456789.0123456. */ 3007 const char *content = (" \"before " 3008 /* U+6587 CJK UNIFIED IDEOGRAPH-6587 3009 UTF-8: 0xE6 0x96 0x87 3010 C octal escaped UTF-8: \346\226\207 3011 "column" numbers: 17-19. */ 3012 "\346\226\207" 3013 3014 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57 3015 UTF-8: 0xE5 0xAD 0x97 3016 C octal escaped UTF-8: \345\255\227 3017 "column" numbers: 20-22. */ 3018 "\345\255\227" 3019 3020 /* U+5316 CJK UNIFIED IDEOGRAPH-5316 3021 UTF-8: 0xE5 0x8C 0x96 3022 C octal escaped UTF-8: \345\214\226 3023 "column" numbers: 23-25. */ 3024 "\345\214\226" 3025 3026 /* U+3051 HIRAGANA LETTER KE 3027 UTF-8: 0xE3 0x81 0x91 3028 C octal escaped UTF-8: \343\201\221 3029 "column" numbers: 26-28. */ 3030 "\343\201\221" 3031 3032 /* column numbers 29 onwards 3033 2333333.33334444444444 3034 9012345.67890123456789. */ 3035 " after\" /* non-str */\n"); 3036 lexer_test test (case_, content, NULL); 3037 3038 /* Verify that we get the expected token back, with the correct 3039 location information. */ 3040 const cpp_token *tok = test.get_token (); 3041 ASSERT_EQ (tok->type, CPP_STRING); 3042 ASSERT_TOKEN_AS_TEXT_EQ 3043 (test.m_parser, tok, 3044 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\""); 3045 3046 /* Verify that cpp_interpret_string works. */ 3047 cpp_string dst_string; 3048 const enum cpp_ttype type = CPP_STRING; 3049 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, 3050 &dst_string, type); 3051 ASSERT_TRUE (result); 3052 ASSERT_STREQ 3053 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after", 3054 (const char *)dst_string.text); 3055 free (const_cast <unsigned char *> (dst_string.text)); 3056 3057 /* Verify ranges of individual characters. This no longer includes the 3058 opening quote, but does include the closing quote. 3059 Assuming that both source and execution encodings are UTF-8, we have 3060 a run of 25 octets in each, plus the NUL terminator. */ 3061 for (int i = 0; i < 25; i++) 3062 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); 3063 /* NUL-terminator should use the closing quote at column 35. */ 3064 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35); 3065 3066 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26); 3067 } 3068 3069 /* Test of string literal concatenation. */ 3070 3071 static void 3072 test_lexer_string_locations_concatenation_1 (const line_table_case &case_) 3073 { 3074 /* Digits 0-9. 3075 .....................000000000.111111.11112222222222 3076 .....................123456789.012345.67890123456789. */ 3077 const char *content = (" \"01234\" /* non-str */\n" 3078 " \"56789\" /* non-str */\n"); 3079 lexer_test test (case_, content, NULL); 3080 3081 location_t input_locs[2]; 3082 3083 /* Verify that we get the expected tokens back. */ 3084 auto_vec <cpp_string> input_strings; 3085 const cpp_token *tok_a = test.get_token (); 3086 ASSERT_EQ (tok_a->type, CPP_STRING); 3087 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\""); 3088 input_strings.safe_push (tok_a->val.str); 3089 input_locs[0] = tok_a->src_loc; 3090 3091 const cpp_token *tok_b = test.get_token (); 3092 ASSERT_EQ (tok_b->type, CPP_STRING); 3093 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\""); 3094 input_strings.safe_push (tok_b->val.str); 3095 input_locs[1] = tok_b->src_loc; 3096 3097 /* Verify that cpp_interpret_string works. */ 3098 cpp_string dst_string; 3099 const enum cpp_ttype type = CPP_STRING; 3100 bool result = cpp_interpret_string (test.m_parser, 3101 input_strings.address (), 2, 3102 &dst_string, type); 3103 ASSERT_TRUE (result); 3104 ASSERT_STREQ ("0123456789", (const char *)dst_string.text); 3105 free (const_cast <unsigned char *> (dst_string.text)); 3106 3107 /* Simulate c-lex.c's lex_string in order to record concatenation. */ 3108 test.m_concats.record_string_concatenation (2, input_locs); 3109 3110 location_t initial_loc = input_locs[0]; 3111 3112 /* "01234" on line 1. */ 3113 for (int i = 0; i <= 4; i++) 3114 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i); 3115 /* "56789" in line 2, plus its closing quote for the nul terminator. */ 3116 for (int i = 5; i <= 10; i++) 3117 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i); 3118 3119 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11); 3120 } 3121 3122 /* Another test of string literal concatenation. */ 3123 3124 static void 3125 test_lexer_string_locations_concatenation_2 (const line_table_case &case_) 3126 { 3127 /* Digits 0-9. 3128 .....................000000000.111.11111112222222 3129 .....................123456789.012.34567890123456. */ 3130 const char *content = (" \"01\" /* non-str */\n" 3131 " \"23\" /* non-str */\n" 3132 " \"45\" /* non-str */\n" 3133 " \"67\" /* non-str */\n" 3134 " \"89\" /* non-str */\n"); 3135 lexer_test test (case_, content, NULL); 3136 3137 auto_vec <cpp_string> input_strings; 3138 location_t input_locs[5]; 3139 3140 /* Verify that we get the expected tokens back. */ 3141 for (int i = 0; i < 5; i++) 3142 { 3143 const cpp_token *tok = test.get_token (); 3144 ASSERT_EQ (tok->type, CPP_STRING); 3145 input_strings.safe_push (tok->val.str); 3146 input_locs[i] = tok->src_loc; 3147 } 3148 3149 /* Verify that cpp_interpret_string works. */ 3150 cpp_string dst_string; 3151 const enum cpp_ttype type = CPP_STRING; 3152 bool result = cpp_interpret_string (test.m_parser, 3153 input_strings.address (), 5, 3154 &dst_string, type); 3155 ASSERT_TRUE (result); 3156 ASSERT_STREQ ("0123456789", (const char *)dst_string.text); 3157 free (const_cast <unsigned char *> (dst_string.text)); 3158 3159 /* Simulate c-lex.c's lex_string in order to record concatenation. */ 3160 test.m_concats.record_string_concatenation (5, input_locs); 3161 3162 location_t initial_loc = input_locs[0]; 3163 3164 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can 3165 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS 3166 and expect get_source_range_for_substring to fail. 3167 However, for a string concatenation test, we can have a case 3168 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS, 3169 but subsequent strings can be after it. 3170 Attempting to detect this within assert_char_at_range 3171 would overcomplicate the logic for the common test cases, so 3172 we detect it here. */ 3173 if (should_have_column_data_p (input_locs[0]) 3174 && !should_have_column_data_p (input_locs[4])) 3175 { 3176 /* Verify that get_source_range_for_substring gracefully rejects 3177 this case. */ 3178 source_range actual_range; 3179 const char *err 3180 = get_source_range_for_char (test.m_parser, &test.m_concats, 3181 initial_loc, type, 0, &actual_range); 3182 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err); 3183 return; 3184 } 3185 3186 for (int i = 0; i < 5; i++) 3187 for (int j = 0; j < 2; j++) 3188 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j, 3189 i + 1, 10 + j, 10 + j); 3190 3191 /* NUL-terminator should use the final closing quote at line 5 column 12. */ 3192 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12); 3193 3194 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11); 3195 } 3196 3197 /* Another test of string literal concatenation, this time combined with 3198 various kinds of escaped characters. */ 3199 3200 static void 3201 test_lexer_string_locations_concatenation_3 (const line_table_case &case_) 3202 { 3203 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35" 3204 digit 6 in ASCII as octal "\066", concatenating multiple strings. */ 3205 const char *content 3206 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555 3207 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */ 3208 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n"); 3209 lexer_test test (case_, content, NULL); 3210 3211 auto_vec <cpp_string> input_strings; 3212 location_t input_locs[4]; 3213 3214 /* Verify that we get the expected tokens back. */ 3215 for (int i = 0; i < 4; i++) 3216 { 3217 const cpp_token *tok = test.get_token (); 3218 ASSERT_EQ (tok->type, CPP_STRING); 3219 input_strings.safe_push (tok->val.str); 3220 input_locs[i] = tok->src_loc; 3221 } 3222 3223 /* Verify that cpp_interpret_string works. */ 3224 cpp_string dst_string; 3225 const enum cpp_ttype type = CPP_STRING; 3226 bool result = cpp_interpret_string (test.m_parser, 3227 input_strings.address (), 4, 3228 &dst_string, type); 3229 ASSERT_TRUE (result); 3230 ASSERT_STREQ ("0123456789", (const char *)dst_string.text); 3231 free (const_cast <unsigned char *> (dst_string.text)); 3232 3233 /* Simulate c-lex.c's lex_string in order to record concatenation. */ 3234 test.m_concats.record_string_concatenation (4, input_locs); 3235 3236 location_t initial_loc = input_locs[0]; 3237 3238 for (int i = 0; i <= 4; i++) 3239 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i); 3240 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22); 3241 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30); 3242 for (int i = 7; i <= 9; i++) 3243 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i); 3244 3245 /* NUL-terminator should use the location of the final closing quote. */ 3246 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38); 3247 3248 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11); 3249 } 3250 3251 /* Test of string literal in a macro. */ 3252 3253 static void 3254 test_lexer_string_locations_macro (const line_table_case &case_) 3255 { 3256 /* Digits 0-9. 3257 .....................0000000001111111111.22222222223. 3258 .....................1234567890123456789.01234567890. */ 3259 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n" 3260 " MACRO"); 3261 lexer_test test (case_, content, NULL); 3262 3263 /* Verify that we get the expected tokens back. */ 3264 const cpp_token *tok = test.get_token (); 3265 ASSERT_EQ (tok->type, CPP_PADDING); 3266 3267 tok = test.get_token (); 3268 ASSERT_EQ (tok->type, CPP_STRING); 3269 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\""); 3270 3271 /* Verify ranges of individual characters. We ought to 3272 see columns within the macro definition. */ 3273 for (int i = 0; i <= 10; i++) 3274 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, 3275 i, 1, 20 + i, 20 + i); 3276 3277 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11); 3278 3279 tok = test.get_token (); 3280 ASSERT_EQ (tok->type, CPP_PADDING); 3281 } 3282 3283 /* Test of stringification of a macro argument. */ 3284 3285 static void 3286 test_lexer_string_locations_stringified_macro_argument 3287 (const line_table_case &case_) 3288 { 3289 /* .....................000000000111111111122222222223. 3290 .....................123456789012345678901234567890. */ 3291 const char *content = ("#define MACRO(X) #X /* non-str */\n" 3292 "MACRO(foo)\n"); 3293 lexer_test test (case_, content, NULL); 3294 3295 /* Verify that we get the expected token back. */ 3296 const cpp_token *tok = test.get_token (); 3297 ASSERT_EQ (tok->type, CPP_PADDING); 3298 3299 tok = test.get_token (); 3300 ASSERT_EQ (tok->type, CPP_STRING); 3301 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\""); 3302 3303 /* We don't support getting the location of a stringified macro 3304 argument. Verify that it fails gracefully. */ 3305 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 3306 "cpp_interpret_string_1 failed"); 3307 3308 tok = test.get_token (); 3309 ASSERT_EQ (tok->type, CPP_PADDING); 3310 3311 tok = test.get_token (); 3312 ASSERT_EQ (tok->type, CPP_PADDING); 3313 } 3314 3315 /* Ensure that we are fail gracefully if something attempts to pass 3316 in a location that isn't a string literal token. Seen on this code: 3317 3318 const char a[] = " %d "; 3319 __builtin_printf (a, 0.5); 3320 ^ 3321 3322 when c-format.c erroneously used the indicated one-character 3323 location as the format string location, leading to a read past the 3324 end of a string buffer in cpp_interpret_string_1. */ 3325 3326 static void 3327 test_lexer_string_locations_non_string (const line_table_case &case_) 3328 { 3329 /* .....................000000000111111111122222222223. 3330 .....................123456789012345678901234567890. */ 3331 const char *content = (" a\n"); 3332 lexer_test test (case_, content, NULL); 3333 3334 /* Verify that we get the expected token back. */ 3335 const cpp_token *tok = test.get_token (); 3336 ASSERT_EQ (tok->type, CPP_NAME); 3337 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a"); 3338 3339 /* At this point, libcpp is attempting to interpret the name as a 3340 string literal, despite it not starting with a quote. We don't detect 3341 that, but we should at least fail gracefully. */ 3342 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 3343 "cpp_interpret_string_1 failed"); 3344 } 3345 3346 /* Ensure that we can read substring information for a token which 3347 starts in one linemap and ends in another . Adapted from 3348 gcc.dg/cpp/pr69985.c. */ 3349 3350 static void 3351 test_lexer_string_locations_long_line (const line_table_case &case_) 3352 { 3353 /* .....................000000.000111111111 3354 .....................123456.789012346789. */ 3355 const char *content = ("/* A very long line, so that we start a new line map. */\n" 3356 " \"0123456789012345678901234567890123456789" 3357 "0123456789012345678901234567890123456789" 3358 "0123456789012345678901234567890123456789" 3359 "0123456789\"\n"); 3360 3361 lexer_test test (case_, content, NULL); 3362 3363 /* Verify that we get the expected token back. */ 3364 const cpp_token *tok = test.get_token (); 3365 ASSERT_EQ (tok->type, CPP_STRING); 3366 3367 if (!should_have_column_data_p (line_table->highest_location)) 3368 return; 3369 3370 /* Verify ranges of individual characters. */ 3371 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131); 3372 for (int i = 0; i < 131; i++) 3373 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, 3374 i, 2, 7 + i, 7 + i); 3375 } 3376 3377 /* Test of locations within a raw string that doesn't contain a newline. */ 3378 3379 static void 3380 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_) 3381 { 3382 /* .....................00.0000000111111111122. 3383 .....................12.3456789012345678901. */ 3384 const char *content = ("R\"foo(0123456789)foo\"\n"); 3385 lexer_test test (case_, content, NULL); 3386 3387 /* Verify that we get the expected token back. */ 3388 const cpp_token *tok = test.get_token (); 3389 ASSERT_EQ (tok->type, CPP_STRING); 3390 3391 /* Verify that cpp_interpret_string works. */ 3392 cpp_string dst_string; 3393 const enum cpp_ttype type = CPP_STRING; 3394 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, 3395 &dst_string, type); 3396 ASSERT_TRUE (result); 3397 ASSERT_STREQ ("0123456789", (const char *)dst_string.text); 3398 free (const_cast <unsigned char *> (dst_string.text)); 3399 3400 if (!should_have_column_data_p (line_table->highest_location)) 3401 return; 3402 3403 /* 0-9, plus the nil terminator. */ 3404 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11); 3405 for (int i = 0; i < 11; i++) 3406 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, 3407 i, 1, 7 + i, 7 + i); 3408 } 3409 3410 /* Test of locations within a raw string that contains a newline. */ 3411 3412 static void 3413 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_) 3414 { 3415 /* .....................00.0000. 3416 .....................12.3456. */ 3417 const char *content = ("R\"foo(\n" 3418 /* .....................00000. 3419 .....................12345. */ 3420 "hello\n" 3421 "world\n" 3422 /* .....................00000. 3423 .....................12345. */ 3424 ")foo\"\n"); 3425 lexer_test test (case_, content, NULL); 3426 3427 /* Verify that we get the expected token back. */ 3428 const cpp_token *tok = test.get_token (); 3429 ASSERT_EQ (tok->type, CPP_STRING); 3430 3431 /* Verify that cpp_interpret_string works. */ 3432 cpp_string dst_string; 3433 const enum cpp_ttype type = CPP_STRING; 3434 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, 3435 &dst_string, type); 3436 ASSERT_TRUE (result); 3437 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text); 3438 free (const_cast <unsigned char *> (dst_string.text)); 3439 3440 if (!should_have_column_data_p (line_table->highest_location)) 3441 return; 3442 3443 /* Currently we don't support locations within raw strings that 3444 contain newlines. */ 3445 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type, 3446 "range endpoints are on different lines"); 3447 } 3448 3449 /* Test of parsing an unterminated raw string. */ 3450 3451 static void 3452 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_) 3453 { 3454 const char *content = "R\"ouch()ouCh\" /* etc */"; 3455 3456 lexer_diagnostic_sink diagnostics; 3457 lexer_test test (case_, content, &diagnostics); 3458 test.m_implicitly_expect_EOF = false; 3459 3460 /* Attempt to parse the raw string. */ 3461 const cpp_token *tok = test.get_token (); 3462 ASSERT_EQ (tok->type, CPP_EOF); 3463 3464 ASSERT_EQ (1, diagnostics.m_diagnostics.length ()); 3465 /* We expect the message "unterminated raw string" 3466 in the "cpplib" translation domain. 3467 It's not clear that dgettext is available on all supported hosts, 3468 so this assertion is commented-out for now. 3469 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"), 3470 diagnostics.m_diagnostics[0]); 3471 */ 3472 } 3473 3474 /* Test of lexing char constants. */ 3475 3476 static void 3477 test_lexer_char_constants (const line_table_case &case_) 3478 { 3479 /* Various char constants. 3480 .....................0000000001111111111.22222222223. 3481 .....................1234567890123456789.01234567890. */ 3482 const char *content = (" 'a'\n" 3483 " u'a'\n" 3484 " U'a'\n" 3485 " L'a'\n" 3486 " 'abc'\n"); 3487 lexer_test test (case_, content, NULL); 3488 3489 /* Verify that we get the expected tokens back. */ 3490 /* 'a'. */ 3491 const cpp_token *tok = test.get_token (); 3492 ASSERT_EQ (tok->type, CPP_CHAR); 3493 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'"); 3494 3495 unsigned int chars_seen; 3496 int unsignedp; 3497 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok, 3498 &chars_seen, &unsignedp); 3499 ASSERT_EQ (cc, 'a'); 3500 ASSERT_EQ (chars_seen, 1); 3501 3502 /* u'a'. */ 3503 tok = test.get_token (); 3504 ASSERT_EQ (tok->type, CPP_CHAR16); 3505 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'"); 3506 3507 /* U'a'. */ 3508 tok = test.get_token (); 3509 ASSERT_EQ (tok->type, CPP_CHAR32); 3510 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'"); 3511 3512 /* L'a'. */ 3513 tok = test.get_token (); 3514 ASSERT_EQ (tok->type, CPP_WCHAR); 3515 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'"); 3516 3517 /* 'abc' (c-char-sequence). */ 3518 tok = test.get_token (); 3519 ASSERT_EQ (tok->type, CPP_CHAR); 3520 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'"); 3521 } 3522 /* A table of interesting location_t values, giving one axis of our test 3523 matrix. */ 3524 3525 static const location_t boundary_locations[] = { 3526 /* Zero means "don't override the default values for a new line_table". */ 3527 0, 3528 3529 /* An arbitrary non-zero value that isn't close to one of 3530 the boundary values below. */ 3531 0x10000, 3532 3533 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */ 3534 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100, 3535 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1, 3536 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES, 3537 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1, 3538 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100, 3539 3540 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */ 3541 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100, 3542 LINE_MAP_MAX_LOCATION_WITH_COLS - 1, 3543 LINE_MAP_MAX_LOCATION_WITH_COLS, 3544 LINE_MAP_MAX_LOCATION_WITH_COLS + 1, 3545 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100, 3546 }; 3547 3548 /* Run TESTCASE multiple times, once for each case in our test matrix. */ 3549 3550 void 3551 for_each_line_table_case (void (*testcase) (const line_table_case &)) 3552 { 3553 /* As noted above in the description of struct line_table_case, 3554 we want to explore a test matrix of interesting line_table 3555 situations, running various selftests for each case within the 3556 matrix. */ 3557 3558 /* Run all tests with: 3559 (a) line_table->default_range_bits == 0, and 3560 (b) line_table->default_range_bits == 5. */ 3561 int num_cases_tested = 0; 3562 for (int default_range_bits = 0; default_range_bits <= 5; 3563 default_range_bits += 5) 3564 { 3565 /* ...and use each of the "interesting" location values as 3566 the starting location within line_table. */ 3567 const int num_boundary_locations 3568 = sizeof (boundary_locations) / sizeof (boundary_locations[0]); 3569 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++) 3570 { 3571 line_table_case c (default_range_bits, boundary_locations[loc_idx]); 3572 3573 testcase (c); 3574 3575 num_cases_tested++; 3576 } 3577 } 3578 3579 /* Verify that we fully covered the test matrix. */ 3580 ASSERT_EQ (num_cases_tested, 2 * 12); 3581 } 3582 3583 /* Verify that when presented with a consecutive pair of locations with 3584 a very large line offset, we don't attempt to consolidate them into 3585 a single ordinary linemap where the line offsets within the line map 3586 would lead to overflow (PR lto/88147). */ 3587 3588 static void 3589 test_line_offset_overflow () 3590 { 3591 line_table_test ltt (line_table_case (5, 0)); 3592 3593 linemap_add (line_table, LC_ENTER, false, "foo.c", 0); 3594 linemap_line_start (line_table, 1, 100); 3595 location_t loc_a = linemap_line_start (line_table, 2578, 255); 3596 assert_loceq ("foo.c", 2578, 0, loc_a); 3597 3598 const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table); 3599 ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13); 3600 ASSERT_EQ (ordmap_a->m_range_bits, 5); 3601 3602 location_t loc_b = linemap_line_start (line_table, 404198, 512); 3603 assert_loceq ("foo.c", 404198, 0, loc_b); 3604 3605 /* We should have started a new linemap, rather than attempting to store 3606 a very large line offset. */ 3607 const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table); 3608 ASSERT_NE (ordmap_a, ordmap_b); 3609 } 3610 3611 void test_cpp_utf8 () 3612 { 3613 /* Verify that wcwidth of invalid UTF-8 or control bytes is 1. */ 3614 { 3615 int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8); 3616 ASSERT_EQ (8, w_bad); 3617 int w_ctrl = cpp_display_width ("\r\t\n\v\0\1", 6); 3618 ASSERT_EQ (6, w_ctrl); 3619 } 3620 3621 /* Verify that wcwidth of valid UTF-8 is as expected. */ 3622 { 3623 const int w_pi = cpp_display_width ("\xcf\x80", 2); 3624 ASSERT_EQ (1, w_pi); 3625 const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4); 3626 ASSERT_EQ (2, w_emoji); 3627 const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2); 3628 ASSERT_EQ (1, w_umlaut_precomposed); 3629 const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3); 3630 ASSERT_EQ (1, w_umlaut_combining); 3631 const int w_han = cpp_display_width ("\xe4\xb8\xba", 3); 3632 ASSERT_EQ (2, w_han); 3633 const int w_ascii = cpp_display_width ("GCC", 3); 3634 ASSERT_EQ (3, w_ascii); 3635 const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82" 3636 "\x9f! \xe4\xb8\xba y\xcc\x88", 24); 3637 ASSERT_EQ (18, w_mixed); 3638 } 3639 3640 /* Verify that cpp_byte_column_to_display_column can go past the end, 3641 and similar edge cases. */ 3642 { 3643 const char *str 3644 /* Display columns. 3645 111111112345 */ 3646 = "\xcf\x80 abc"; 3647 /* 111122223456 3648 Byte columns. */ 3649 3650 ASSERT_EQ (5, cpp_display_width (str, 6)); 3651 ASSERT_EQ (105, cpp_byte_column_to_display_column (str, 6, 106)); 3652 ASSERT_EQ (10000, cpp_byte_column_to_display_column (NULL, 0, 10000)); 3653 ASSERT_EQ (0, cpp_byte_column_to_display_column (NULL, 10000, 0)); 3654 } 3655 3656 /* Verify that cpp_display_column_to_byte_column can go past the end, 3657 and similar edge cases, and check invertibility. */ 3658 { 3659 const char *str 3660 /* Display columns. 3661 000000000000000000000000000000000000011 3662 111111112222222234444444455555555678901 */ 3663 = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello"; 3664 /* 000000000000000000000000000000000111111 3665 111122223333444456666777788889999012345 3666 Byte columns. */ 3667 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2)); 3668 ASSERT_EQ (15, cpp_display_column_to_byte_column (str, 15, 11)); 3669 ASSERT_EQ (115, cpp_display_column_to_byte_column (str, 15, 111)); 3670 ASSERT_EQ (10000, cpp_display_column_to_byte_column (NULL, 0, 10000)); 3671 ASSERT_EQ (0, cpp_display_column_to_byte_column (NULL, 10000, 0)); 3672 3673 /* Verify that we do not interrupt a UTF-8 sequence. */ 3674 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1)); 3675 3676 for (int byte_col = 1; byte_col <= 15; ++byte_col) 3677 { 3678 const int disp_col = cpp_byte_column_to_display_column (str, 15, 3679 byte_col); 3680 const int byte_col2 = cpp_display_column_to_byte_column (str, 15, 3681 disp_col); 3682 3683 /* If we ask for the display column in the middle of a UTF-8 3684 sequence, it will return the length of the partial sequence, 3685 matching the behavior of GCC before display column support. 3686 Otherwise check the round trip was successful. */ 3687 if (byte_col < 4) 3688 ASSERT_EQ (byte_col, disp_col); 3689 else if (byte_col >= 6 && byte_col < 9) 3690 ASSERT_EQ (3 + (byte_col - 5), disp_col); 3691 else 3692 ASSERT_EQ (byte_col2, byte_col); 3693 } 3694 } 3695 3696 } 3697 3698 /* Run all of the selftests within this file. */ 3699 3700 void 3701 input_c_tests () 3702 { 3703 test_linenum_comparisons (); 3704 test_should_have_column_data_p (); 3705 test_unknown_location (); 3706 test_builtins (); 3707 for_each_line_table_case (test_make_location_nonpure_range_endpoints); 3708 3709 for_each_line_table_case (test_accessing_ordinary_linemaps); 3710 for_each_line_table_case (test_lexer); 3711 for_each_line_table_case (test_lexer_string_locations_simple); 3712 for_each_line_table_case (test_lexer_string_locations_ebcdic); 3713 for_each_line_table_case (test_lexer_string_locations_hex); 3714 for_each_line_table_case (test_lexer_string_locations_oct); 3715 for_each_line_table_case (test_lexer_string_locations_letter_escape_1); 3716 for_each_line_table_case (test_lexer_string_locations_letter_escape_2); 3717 for_each_line_table_case (test_lexer_string_locations_ucn4); 3718 for_each_line_table_case (test_lexer_string_locations_ucn8); 3719 for_each_line_table_case (test_lexer_string_locations_wide_string); 3720 for_each_line_table_case (test_lexer_string_locations_string16); 3721 for_each_line_table_case (test_lexer_string_locations_string32); 3722 for_each_line_table_case (test_lexer_string_locations_u8); 3723 for_each_line_table_case (test_lexer_string_locations_utf8_source); 3724 for_each_line_table_case (test_lexer_string_locations_concatenation_1); 3725 for_each_line_table_case (test_lexer_string_locations_concatenation_2); 3726 for_each_line_table_case (test_lexer_string_locations_concatenation_3); 3727 for_each_line_table_case (test_lexer_string_locations_macro); 3728 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument); 3729 for_each_line_table_case (test_lexer_string_locations_non_string); 3730 for_each_line_table_case (test_lexer_string_locations_long_line); 3731 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line); 3732 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline); 3733 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated); 3734 for_each_line_table_case (test_lexer_char_constants); 3735 3736 test_reading_source_line (); 3737 3738 test_line_offset_overflow (); 3739 3740 test_cpp_utf8 (); 3741 } 3742 3743 } // namespace selftest 3744 3745 #endif /* CHECKING_P */ 3746