1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2022 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "intl.h"
24 #include "diagnostic.h"
25 #include "selftest.h"
26 #include "cpplib.h"
27
28 #ifndef HAVE_ICONV
29 #define HAVE_ICONV 0
30 #endif
31
32 /* Input charset configuration. */
default_charset_callback(const char *)33 static const char *default_charset_callback (const char *)
34 {
35 return nullptr;
36 }
37
38 void
initialize_input_context(diagnostic_input_charset_callback ccb,bool should_skip_bom)39 file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
40 bool should_skip_bom)
41 {
42 in_context.ccb = (ccb ? ccb : default_charset_callback);
43 in_context.should_skip_bom = should_skip_bom;
44 }
45
46 /* This is a cache used by get_next_line to store the content of a
47 file to be searched for file lines. */
48 class file_cache_slot
49 {
50 public:
51 file_cache_slot ();
52 ~file_cache_slot ();
53
54 bool read_line_num (size_t line_num,
55 char ** line, ssize_t *line_len);
56
57 /* Accessors. */
get_file_path() const58 const char *get_file_path () const { return m_file_path; }
get_use_count() const59 unsigned get_use_count () const { return m_use_count; }
missing_trailing_newline_p() const60 bool missing_trailing_newline_p () const
61 {
62 return m_missing_trailing_newline;
63 }
64
inc_use_count()65 void inc_use_count () { m_use_count++; }
66
67 bool create (const file_cache::input_context &in_context,
68 const char *file_path, FILE *fp, unsigned highest_use_count);
69 void evict ();
70
71 private:
72 /* These are information used to store a line boundary. */
73 class line_info
74 {
75 public:
76 /* The line number. It starts from 1. */
77 size_t line_num;
78
79 /* The position (byte count) of the beginning of the line,
80 relative to the file data pointer. This starts at zero. */
81 size_t start_pos;
82
83 /* The position (byte count) of the last byte of the line. This
84 normally points to the '\n' character, or to one byte after the
85 last byte of the file, if the file doesn't contain a '\n'
86 character. */
87 size_t end_pos;
88
line_info(size_t l,size_t s,size_t e)89 line_info (size_t l, size_t s, size_t e)
90 : line_num (l), start_pos (s), end_pos (e)
91 {}
92
line_info()93 line_info ()
94 :line_num (0), start_pos (0), end_pos (0)
95 {}
96 };
97
98 bool needs_read_p () const;
99 bool needs_grow_p () const;
100 void maybe_grow ();
101 bool read_data ();
102 bool maybe_read_data ();
103 bool get_next_line (char **line, ssize_t *line_len);
104 bool read_next_line (char ** line, ssize_t *line_len);
105 bool goto_next_line ();
106
107 static const size_t buffer_size = 4 * 1024;
108 static const size_t line_record_size = 100;
109
110 /* The number of time this file has been accessed. This is used
111 to designate which file cache to evict from the cache
112 array. */
113 unsigned m_use_count;
114
115 /* The file_path is the key for identifying a particular file in
116 the cache.
117 For libcpp-using code, the underlying buffer for this field is
118 owned by the corresponding _cpp_file within the cpp_reader. */
119 const char *m_file_path;
120
121 FILE *m_fp;
122
123 /* This points to the content of the file that we've read so
124 far. */
125 char *m_data;
126
127 /* The allocated buffer to be freed may start a little earlier than DATA,
128 e.g. if a UTF8 BOM was skipped at the beginning. */
129 int m_alloc_offset;
130
131 /* The size of the DATA array above.*/
132 size_t m_size;
133
134 /* The number of bytes read from the underlying file so far. This
135 must be less (or equal) than SIZE above. */
136 size_t m_nb_read;
137
138 /* The index of the beginning of the current line. */
139 size_t m_line_start_idx;
140
141 /* The number of the previous line read. This starts at 1. Zero
142 means we've read no line so far. */
143 size_t m_line_num;
144
145 /* This is the total number of lines of the current file. At the
146 moment, we try to get this information from the line map
147 subsystem. Note that this is just a hint. When using the C++
148 front-end, this hint is correct because the input file is then
149 completely tokenized before parsing starts; so the line map knows
150 the number of lines before compilation really starts. For e.g,
151 the C front-end, it can happen that we start emitting diagnostics
152 before the line map has seen the end of the file. */
153 size_t m_total_lines;
154
155 /* Could this file be missing a trailing newline on its final line?
156 Initially true (to cope with empty files), set to true/false
157 as each line is read. */
158 bool m_missing_trailing_newline;
159
160 /* This is a record of the beginning and end of the lines we've seen
161 while reading the file. This is useful to avoid walking the data
162 from the beginning when we are asked to read a line that is
163 before LINE_START_IDX above. Note that the maximum size of this
164 record is line_record_size, so that the memory consumption
165 doesn't explode. We thus scale total_lines down to
166 line_record_size. */
167 vec<line_info, va_heap> m_line_record;
168
offset_buffer(int offset)169 void offset_buffer (int offset)
170 {
171 gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
172 : (size_t) offset <= m_size);
173 gcc_assert (m_data);
174 m_alloc_offset += offset;
175 m_data += offset;
176 m_size -= offset;
177 }
178
179 };
180
181 /* Current position in real source file. */
182
183 location_t input_location = UNKNOWN_LOCATION;
184
185 class line_maps *line_table;
186
187 /* A stashed copy of "line_table" for use by selftest::line_table_test.
188 This needs to be a global so that it can be a GC root, and thus
189 prevent the stashed copy from being garbage-collected if the GC runs
190 during a line_table_test. */
191
192 class line_maps *saved_line_table;
193
194 /* Expand the source location LOC into a human readable location. If
195 LOC resolves to a builtin location, the file name of the readable
196 location is set to the string "<built-in>". If EXPANSION_POINT_P is
197 TRUE and LOC is virtual, then it is resolved to the expansion
198 point of the involved macro. Otherwise, it is resolved to the
199 spelling location of the token.
200
201 When resolving to the spelling location of the token, if the
202 resulting location is for a built-in location (that is, it has no
203 associated line/column) in the context of a macro expansion, the
204 returned location is the first one (while unwinding the macro
205 location towards its expansion point) that is in real source
206 code.
207
208 ASPECT controls which part of the location to use. */
209
210 static expanded_location
expand_location_1(location_t loc,bool expansion_point_p,enum location_aspect aspect)211 expand_location_1 (location_t loc,
212 bool expansion_point_p,
213 enum location_aspect aspect)
214 {
215 expanded_location xloc;
216 const line_map_ordinary *map;
217 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
218 tree block = NULL;
219
220 if (IS_ADHOC_LOC (loc))
221 {
222 block = LOCATION_BLOCK (loc);
223 loc = LOCATION_LOCUS (loc);
224 }
225
226 memset (&xloc, 0, sizeof (xloc));
227
228 if (loc >= RESERVED_LOCATION_COUNT)
229 {
230 if (!expansion_point_p)
231 {
232 /* We want to resolve LOC to its spelling location.
233
234 But if that spelling location is a reserved location that
235 appears in the context of a macro expansion (like for a
236 location for a built-in token), let's consider the first
237 location (toward the expansion point) that is not reserved;
238 that is, the first location that is in real source code. */
239 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
240 loc, NULL);
241 lrk = LRK_SPELLING_LOCATION;
242 }
243 loc = linemap_resolve_location (line_table, loc, lrk, &map);
244
245 /* loc is now either in an ordinary map, or is a reserved location.
246 If it is a compound location, the caret is in a spelling location,
247 but the start/finish might still be a virtual location.
248 Depending of what the caller asked for, we may need to recurse
249 one level in order to resolve any virtual locations in the
250 end-points. */
251 switch (aspect)
252 {
253 default:
254 gcc_unreachable ();
255 /* Fall through. */
256 case LOCATION_ASPECT_CARET:
257 break;
258 case LOCATION_ASPECT_START:
259 {
260 location_t start = get_start (loc);
261 if (start != loc)
262 return expand_location_1 (start, expansion_point_p, aspect);
263 }
264 break;
265 case LOCATION_ASPECT_FINISH:
266 {
267 location_t finish = get_finish (loc);
268 if (finish != loc)
269 return expand_location_1 (finish, expansion_point_p, aspect);
270 }
271 break;
272 }
273 xloc = linemap_expand_location (line_table, map, loc);
274 }
275
276 xloc.data = block;
277 if (loc <= BUILTINS_LOCATION)
278 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
279
280 return xloc;
281 }
282
283 /* Initialize the set of cache used for files accessed by caret
284 diagnostic. */
285
286 static void
diagnostic_file_cache_init(void)287 diagnostic_file_cache_init (void)
288 {
289 gcc_assert (global_dc);
290 if (global_dc->m_file_cache == NULL)
291 global_dc->m_file_cache = new file_cache ();
292 }
293
294 /* Free the resources used by the set of cache used for files accessed
295 by caret diagnostic. */
296
297 void
diagnostic_file_cache_fini(void)298 diagnostic_file_cache_fini (void)
299 {
300 if (global_dc->m_file_cache)
301 {
302 delete global_dc->m_file_cache;
303 global_dc->m_file_cache = NULL;
304 }
305 }
306
307 /* Return the total lines number that have been read so far by the
308 line map (in the preprocessor) so far. For languages like C++ that
309 entirely preprocess the input file before starting to parse, this
310 equals the actual number of lines of the file. */
311
312 static size_t
total_lines_num(const char * file_path)313 total_lines_num (const char *file_path)
314 {
315 size_t r = 0;
316 location_t l = 0;
317 if (linemap_get_file_highest_location (line_table, file_path, &l))
318 {
319 gcc_assert (l >= RESERVED_LOCATION_COUNT);
320 expanded_location xloc = expand_location (l);
321 r = xloc.line;
322 }
323 return r;
324 }
325
326 /* Lookup the cache used for the content of a given file accessed by
327 caret diagnostic. Return the found cached file, or NULL if no
328 cached file was found. */
329
330 file_cache_slot *
lookup_file(const char * file_path)331 file_cache::lookup_file (const char *file_path)
332 {
333 gcc_assert (file_path);
334
335 /* This will contain the found cached file. */
336 file_cache_slot *r = NULL;
337 for (unsigned i = 0; i < num_file_slots; ++i)
338 {
339 file_cache_slot *c = &m_file_slots[i];
340 if (c->get_file_path () && !strcmp (c->get_file_path (), file_path))
341 {
342 c->inc_use_count ();
343 r = c;
344 }
345 }
346
347 if (r)
348 r->inc_use_count ();
349
350 return r;
351 }
352
353 /* Purge any mention of FILENAME from the cache of files used for
354 printing source code. For use in selftests when working
355 with tempfiles. */
356
357 void
diagnostics_file_cache_forcibly_evict_file(const char * file_path)358 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
359 {
360 gcc_assert (file_path);
361
362 if (!global_dc->m_file_cache)
363 return;
364
365 global_dc->m_file_cache->forcibly_evict_file (file_path);
366 }
367
368 void
forcibly_evict_file(const char * file_path)369 file_cache::forcibly_evict_file (const char *file_path)
370 {
371 gcc_assert (file_path);
372
373 file_cache_slot *r = lookup_file (file_path);
374 if (!r)
375 /* Not found. */
376 return;
377
378 r->evict ();
379 }
380
381 void
evict()382 file_cache_slot::evict ()
383 {
384 m_file_path = NULL;
385 if (m_fp)
386 fclose (m_fp);
387 m_fp = NULL;
388 m_nb_read = 0;
389 m_line_start_idx = 0;
390 m_line_num = 0;
391 m_line_record.truncate (0);
392 m_use_count = 0;
393 m_total_lines = 0;
394 m_missing_trailing_newline = true;
395 }
396
397 /* Return the file cache that has been less used, recently, or the
398 first empty one. If HIGHEST_USE_COUNT is non-null,
399 *HIGHEST_USE_COUNT is set to the highest use count of the entries
400 in the cache table. */
401
402 file_cache_slot*
evicted_cache_tab_entry(unsigned * highest_use_count)403 file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
404 {
405 diagnostic_file_cache_init ();
406
407 file_cache_slot *to_evict = &m_file_slots[0];
408 unsigned huc = to_evict->get_use_count ();
409 for (unsigned i = 1; i < num_file_slots; ++i)
410 {
411 file_cache_slot *c = &m_file_slots[i];
412 bool c_is_empty = (c->get_file_path () == NULL);
413
414 if (c->get_use_count () < to_evict->get_use_count ()
415 || (to_evict->get_file_path () && c_is_empty))
416 /* We evict C because it's either an entry with a lower use
417 count or one that is empty. */
418 to_evict = c;
419
420 if (huc < c->get_use_count ())
421 huc = c->get_use_count ();
422
423 if (c_is_empty)
424 /* We've reached the end of the cache; subsequent elements are
425 all empty. */
426 break;
427 }
428
429 if (highest_use_count)
430 *highest_use_count = huc;
431
432 return to_evict;
433 }
434
435 /* Create the cache used for the content of a given file to be
436 accessed by caret diagnostic. This cache is added to an array of
437 cache and can be retrieved by lookup_file_in_cache_tab. This
438 function returns the created cache. Note that only the last
439 num_file_slots files are cached. */
440
441 file_cache_slot*
add_file(const char * file_path)442 file_cache::add_file (const char *file_path)
443 {
444
445 FILE *fp = fopen (file_path, "r");
446 if (fp == NULL)
447 return NULL;
448
449 unsigned highest_use_count = 0;
450 file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
451 if (!r->create (in_context, file_path, fp, highest_use_count))
452 return NULL;
453 return r;
454 }
455
456 /* Populate this slot for use on FILE_PATH and FP, dropping any
457 existing cached content within it. */
458
459 bool
create(const file_cache::input_context & in_context,const char * file_path,FILE * fp,unsigned highest_use_count)460 file_cache_slot::create (const file_cache::input_context &in_context,
461 const char *file_path, FILE *fp,
462 unsigned highest_use_count)
463 {
464 m_file_path = file_path;
465 if (m_fp)
466 fclose (m_fp);
467 m_fp = fp;
468 if (m_alloc_offset)
469 offset_buffer (-m_alloc_offset);
470 m_nb_read = 0;
471 m_line_start_idx = 0;
472 m_line_num = 0;
473 m_line_record.truncate (0);
474 /* Ensure that this cache entry doesn't get evicted next time
475 add_file_to_cache_tab is called. */
476 m_use_count = ++highest_use_count;
477 m_total_lines = total_lines_num (file_path);
478 m_missing_trailing_newline = true;
479
480
481 /* Check the input configuration to determine if we need to do any
482 transformations, such as charset conversion or BOM skipping. */
483 if (const char *input_charset = in_context.ccb (file_path))
484 {
485 /* Need a full-blown conversion of the input charset. */
486 fclose (m_fp);
487 m_fp = NULL;
488 const cpp_converted_source cs
489 = cpp_get_converted_source (file_path, input_charset);
490 if (!cs.data)
491 return false;
492 if (m_data)
493 XDELETEVEC (m_data);
494 m_data = cs.data;
495 m_nb_read = m_size = cs.len;
496 m_alloc_offset = cs.data - cs.to_free;
497 }
498 else if (in_context.should_skip_bom)
499 {
500 if (read_data ())
501 {
502 const int offset = cpp_check_utf8_bom (m_data, m_nb_read);
503 offset_buffer (offset);
504 m_nb_read -= offset;
505 }
506 }
507
508 return true;
509 }
510
511 /* file_cache's ctor. */
512
file_cache()513 file_cache::file_cache ()
514 : m_file_slots (new file_cache_slot[num_file_slots])
515 {
516 initialize_input_context (nullptr, false);
517 }
518
519 /* file_cache's dtor. */
520
~file_cache()521 file_cache::~file_cache ()
522 {
523 delete[] m_file_slots;
524 }
525
526 /* Lookup the cache used for the content of a given file accessed by
527 caret diagnostic. If no cached file was found, create a new cache
528 for this file, add it to the array of cached file and return
529 it. */
530
531 file_cache_slot*
lookup_or_add_file(const char * file_path)532 file_cache::lookup_or_add_file (const char *file_path)
533 {
534 file_cache_slot *r = lookup_file (file_path);
535 if (r == NULL)
536 r = add_file (file_path);
537 return r;
538 }
539
540 /* Default constructor for a cache of file used by caret
541 diagnostic. */
542
file_cache_slot()543 file_cache_slot::file_cache_slot ()
544 : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0),
545 m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0),
546 m_line_num (0), m_total_lines (0), m_missing_trailing_newline (true)
547 {
548 m_line_record.create (0);
549 }
550
551 /* Destructor for a cache of file used by caret diagnostic. */
552
~file_cache_slot()553 file_cache_slot::~file_cache_slot ()
554 {
555 if (m_fp)
556 {
557 fclose (m_fp);
558 m_fp = NULL;
559 }
560 if (m_data)
561 {
562 offset_buffer (-m_alloc_offset);
563 XDELETEVEC (m_data);
564 m_data = 0;
565 }
566 m_line_record.release ();
567 }
568
569 /* Returns TRUE iff the cache would need to be filled with data coming
570 from the file. That is, either the cache is empty or full or the
571 current line is empty. Note that if the cache is full, it would
572 need to be extended and filled again. */
573
574 bool
needs_read_p() const575 file_cache_slot::needs_read_p () const
576 {
577 return m_fp && (m_nb_read == 0
578 || m_nb_read == m_size
579 || (m_line_start_idx >= m_nb_read - 1));
580 }
581
582 /* Return TRUE iff the cache is full and thus needs to be
583 extended. */
584
585 bool
needs_grow_p() const586 file_cache_slot::needs_grow_p () const
587 {
588 return m_nb_read == m_size;
589 }
590
591 /* Grow the cache if it needs to be extended. */
592
593 void
maybe_grow()594 file_cache_slot::maybe_grow ()
595 {
596 if (!needs_grow_p ())
597 return;
598
599 if (!m_data)
600 {
601 gcc_assert (m_size == 0 && m_alloc_offset == 0);
602 m_size = buffer_size;
603 m_data = XNEWVEC (char, m_size);
604 }
605 else
606 {
607 const int offset = m_alloc_offset;
608 offset_buffer (-offset);
609 m_size *= 2;
610 m_data = XRESIZEVEC (char, m_data, m_size);
611 offset_buffer (offset);
612 }
613 }
614
615 /* Read more data into the cache. Extends the cache if need be.
616 Returns TRUE iff new data could be read. */
617
618 bool
read_data()619 file_cache_slot::read_data ()
620 {
621 if (feof (m_fp) || ferror (m_fp))
622 return false;
623
624 maybe_grow ();
625
626 char * from = m_data + m_nb_read;
627 size_t to_read = m_size - m_nb_read;
628 size_t nb_read = fread (from, 1, to_read, m_fp);
629
630 if (ferror (m_fp))
631 return false;
632
633 m_nb_read += nb_read;
634 return !!nb_read;
635 }
636
637 /* Read new data iff the cache needs to be filled with more data
638 coming from the file FP. Return TRUE iff the cache was filled with
639 mode data. */
640
641 bool
maybe_read_data()642 file_cache_slot::maybe_read_data ()
643 {
644 if (!needs_read_p ())
645 return false;
646 return read_data ();
647 }
648
649 /* Read a new line from file FP, using C as a cache for the data
650 coming from the file. Upon successful completion, *LINE is set to
651 the beginning of the line found. *LINE points directly in the
652 line cache and is only valid until the next call of get_next_line.
653 *LINE_LEN is set to the length of the line. Note that the line
654 does not contain any terminal delimiter. This function returns
655 true if some data was read or process from the cache, false
656 otherwise. Note that subsequent calls to get_next_line might
657 make the content of *LINE invalid. */
658
659 bool
get_next_line(char ** line,ssize_t * line_len)660 file_cache_slot::get_next_line (char **line, ssize_t *line_len)
661 {
662 /* Fill the cache with data to process. */
663 maybe_read_data ();
664
665 size_t remaining_size = m_nb_read - m_line_start_idx;
666 if (remaining_size == 0)
667 /* There is no more data to process. */
668 return false;
669
670 char *line_start = m_data + m_line_start_idx;
671
672 char *next_line_start = NULL;
673 size_t len = 0;
674 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
675 if (line_end == NULL)
676 {
677 /* We haven't found the end-of-line delimiter in the cache.
678 Fill the cache with more data from the file and look for the
679 '\n'. */
680 while (maybe_read_data ())
681 {
682 line_start = m_data + m_line_start_idx;
683 remaining_size = m_nb_read - m_line_start_idx;
684 line_end = (char *) memchr (line_start, '\n', remaining_size);
685 if (line_end != NULL)
686 {
687 next_line_start = line_end + 1;
688 break;
689 }
690 }
691 if (line_end == NULL)
692 {
693 /* We've loadded all the file into the cache and still no
694 '\n'. Let's say the line ends up at one byte passed the
695 end of the file. This is to stay consistent with the case
696 of when the line ends up with a '\n' and line_end points to
697 that terminal '\n'. That consistency is useful below in
698 the len calculation. */
699 line_end = m_data + m_nb_read ;
700 m_missing_trailing_newline = true;
701 }
702 else
703 m_missing_trailing_newline = false;
704 }
705 else
706 {
707 next_line_start = line_end + 1;
708 m_missing_trailing_newline = false;
709 }
710
711 if (m_fp && ferror (m_fp))
712 return false;
713
714 /* At this point, we've found the end of the of line. It either
715 points to the '\n' or to one byte after the last byte of the
716 file. */
717 gcc_assert (line_end != NULL);
718
719 len = line_end - line_start;
720
721 if (m_line_start_idx < m_nb_read)
722 *line = line_start;
723
724 ++m_line_num;
725
726 /* Before we update our line record, make sure the hint about the
727 total number of lines of the file is correct. If it's not, then
728 we give up recording line boundaries from now on. */
729 bool update_line_record = true;
730 if (m_line_num > m_total_lines)
731 update_line_record = false;
732
733 /* Now update our line record so that re-reading lines from the
734 before m_line_start_idx is faster. */
735 if (update_line_record
736 && m_line_record.length () < line_record_size)
737 {
738 /* If the file lines fits in the line record, we just record all
739 its lines ...*/
740 if (m_total_lines <= line_record_size
741 && m_line_num > m_line_record.length ())
742 m_line_record.safe_push
743 (file_cache_slot::line_info (m_line_num,
744 m_line_start_idx,
745 line_end - m_data));
746 else if (m_total_lines > line_record_size)
747 {
748 /* ... otherwise, we just scale total_lines down to
749 (line_record_size lines. */
750 size_t n = (m_line_num * line_record_size) / m_total_lines;
751 if (m_line_record.length () == 0
752 || n >= m_line_record.length ())
753 m_line_record.safe_push
754 (file_cache_slot::line_info (m_line_num,
755 m_line_start_idx,
756 line_end - m_data));
757 }
758 }
759
760 /* Update m_line_start_idx so that it points to the next line to be
761 read. */
762 if (next_line_start)
763 m_line_start_idx = next_line_start - m_data;
764 else
765 /* We didn't find any terminal '\n'. Let's consider that the end
766 of line is the end of the data in the cache. The next
767 invocation of get_next_line will either read more data from the
768 underlying file or return false early because we've reached the
769 end of the file. */
770 m_line_start_idx = m_nb_read;
771
772 *line_len = len;
773
774 return true;
775 }
776
777 /* Consume the next bytes coming from the cache (or from its
778 underlying file if there are remaining unread bytes in the file)
779 until we reach the next end-of-line (or end-of-file). There is no
780 copying from the cache involved. Return TRUE upon successful
781 completion. */
782
783 bool
goto_next_line()784 file_cache_slot::goto_next_line ()
785 {
786 char *l;
787 ssize_t len;
788
789 return get_next_line (&l, &len);
790 }
791
792 /* Read an arbitrary line number LINE_NUM from the file cached in C.
793 If the line was read successfully, *LINE points to the beginning
794 of the line in the file cache and *LINE_LEN is the length of the
795 line. *LINE is not nul-terminated, but may contain zero bytes.
796 *LINE is only valid until the next call of read_line_num.
797 This function returns bool if a line was read. */
798
799 bool
read_line_num(size_t line_num,char ** line,ssize_t * line_len)800 file_cache_slot::read_line_num (size_t line_num,
801 char ** line, ssize_t *line_len)
802 {
803 gcc_assert (line_num > 0);
804
805 if (line_num <= m_line_num)
806 {
807 /* We've been asked to read lines that are before m_line_num.
808 So lets use our line record (if it's not empty) to try to
809 avoid re-reading the file from the beginning again. */
810
811 if (m_line_record.is_empty ())
812 {
813 m_line_start_idx = 0;
814 m_line_num = 0;
815 }
816 else
817 {
818 file_cache_slot::line_info *i = NULL;
819 if (m_total_lines <= line_record_size)
820 {
821 /* In languages where the input file is not totally
822 preprocessed up front, the m_total_lines hint
823 can be smaller than the number of lines of the
824 file. In that case, only the first
825 m_total_lines have been recorded.
826
827 Otherwise, the first m_total_lines we've read have
828 their start/end recorded here. */
829 i = (line_num <= m_total_lines)
830 ? &m_line_record[line_num - 1]
831 : &m_line_record[m_total_lines - 1];
832 gcc_assert (i->line_num <= line_num);
833 }
834 else
835 {
836 /* So the file had more lines than our line record
837 size. Thus the number of lines we've recorded has
838 been scaled down to line_record_size. Let's
839 pick the start/end of the recorded line that is
840 closest to line_num. */
841 size_t n = (line_num <= m_total_lines)
842 ? line_num * line_record_size / m_total_lines
843 : m_line_record.length () - 1;
844 if (n < m_line_record.length ())
845 {
846 i = &m_line_record[n];
847 gcc_assert (i->line_num <= line_num);
848 }
849 }
850
851 if (i && i->line_num == line_num)
852 {
853 /* We have the start/end of the line. */
854 *line = m_data + i->start_pos;
855 *line_len = i->end_pos - i->start_pos;
856 return true;
857 }
858
859 if (i)
860 {
861 m_line_start_idx = i->start_pos;
862 m_line_num = i->line_num - 1;
863 }
864 else
865 {
866 m_line_start_idx = 0;
867 m_line_num = 0;
868 }
869 }
870 }
871
872 /* Let's walk from line m_line_num up to line_num - 1, without
873 copying any line. */
874 while (m_line_num < line_num - 1)
875 if (!goto_next_line ())
876 return false;
877
878 /* The line we want is the next one. Let's read and copy it back to
879 the caller. */
880 return get_next_line (line, line_len);
881 }
882
883 /* Return the physical source line that corresponds to FILE_PATH/LINE.
884 The line is not nul-terminated. The returned pointer is only
885 valid until the next call of location_get_source_line.
886 Note that the line can contain several null characters,
887 so the returned value's length has the actual length of the line.
888 If the function fails, a NULL char_span is returned. */
889
890 char_span
location_get_source_line(const char * file_path,int line)891 location_get_source_line (const char *file_path, int line)
892 {
893 char *buffer = NULL;
894 ssize_t len;
895
896 if (line == 0)
897 return char_span (NULL, 0);
898
899 if (file_path == NULL)
900 return char_span (NULL, 0);
901
902 diagnostic_file_cache_init ();
903
904 file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
905 if (c == NULL)
906 return char_span (NULL, 0);
907
908 bool read = c->read_line_num (line, &buffer, &len);
909 if (!read)
910 return char_span (NULL, 0);
911
912 return char_span (buffer, len);
913 }
914
915 /* Determine if FILE_PATH missing a trailing newline on its final line.
916 Only valid to call once all of the file has been loaded, by
917 requesting a line number beyond the end of the file. */
918
919 bool
location_missing_trailing_newline(const char * file_path)920 location_missing_trailing_newline (const char *file_path)
921 {
922 diagnostic_file_cache_init ();
923
924 file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
925 if (c == NULL)
926 return false;
927
928 return c->missing_trailing_newline_p ();
929 }
930
931 /* Test if the location originates from the spelling location of a
932 builtin-tokens. That is, return TRUE if LOC is a (possibly
933 virtual) location of a built-in token that appears in the expansion
934 list of a macro. Please note that this function also works on
935 tokens that result from built-in tokens. For instance, the
936 function would return true if passed a token "4" that is the result
937 of the expansion of the built-in __LINE__ macro. */
938 bool
is_location_from_builtin_token(location_t loc)939 is_location_from_builtin_token (location_t loc)
940 {
941 const line_map_ordinary *map = NULL;
942 loc = linemap_resolve_location (line_table, loc,
943 LRK_SPELLING_LOCATION, &map);
944 return loc == BUILTINS_LOCATION;
945 }
946
947 /* Expand the source location LOC into a human readable location. If
948 LOC is virtual, it resolves to the expansion point of the involved
949 macro. If LOC resolves to a builtin location, the file name of the
950 readable location is set to the string "<built-in>". */
951
952 expanded_location
expand_location(location_t loc)953 expand_location (location_t loc)
954 {
955 return expand_location_1 (loc, /*expansion_point_p=*/true,
956 LOCATION_ASPECT_CARET);
957 }
958
959 /* Expand the source location LOC into a human readable location. If
960 LOC is virtual, it resolves to the expansion location of the
961 relevant macro. If LOC resolves to a builtin location, the file
962 name of the readable location is set to the string
963 "<built-in>". */
964
965 expanded_location
expand_location_to_spelling_point(location_t loc,enum location_aspect aspect)966 expand_location_to_spelling_point (location_t loc,
967 enum location_aspect aspect)
968 {
969 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
970 }
971
972 /* The rich_location class within libcpp requires a way to expand
973 location_t instances, and relies on the client code
974 providing a symbol named
975 linemap_client_expand_location_to_spelling_point
976 to do this.
977
978 This is the implementation for libcommon.a (all host binaries),
979 which simply calls into expand_location_1. */
980
981 expanded_location
linemap_client_expand_location_to_spelling_point(location_t loc,enum location_aspect aspect)982 linemap_client_expand_location_to_spelling_point (location_t loc,
983 enum location_aspect aspect)
984 {
985 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
986 }
987
988
989 /* If LOCATION is in a system header and if it is a virtual location
990 for a token coming from the expansion of a macro, unwind it to
991 the location of the expansion point of the macro. If the expansion
992 point is also in a system header return the original LOCATION.
993 Otherwise, return the location of the expansion point.
994
995 This is used for instance when we want to emit diagnostics about a
996 token that may be located in a macro that is itself defined in a
997 system header, for example, for the NULL macro. In such a case, if
998 LOCATION were passed directly to diagnostic functions such as
999 warning_at, the diagnostic would be suppressed (unless
1000 -Wsystem-headers). */
1001
1002 location_t
expansion_point_location_if_in_system_header(location_t location)1003 expansion_point_location_if_in_system_header (location_t location)
1004 {
1005 if (!in_system_header_at (location))
1006 return location;
1007
1008 location_t xloc = linemap_resolve_location (line_table, location,
1009 LRK_MACRO_EXPANSION_POINT,
1010 NULL);
1011 return in_system_header_at (xloc) ? location : xloc;
1012 }
1013
1014 /* If LOCATION is a virtual location for a token coming from the expansion
1015 of a macro, unwind to the location of the expansion point of the macro. */
1016
1017 location_t
expansion_point_location(location_t location)1018 expansion_point_location (location_t location)
1019 {
1020 return linemap_resolve_location (line_table, location,
1021 LRK_MACRO_EXPANSION_POINT, NULL);
1022 }
1023
1024 /* Construct a location with caret at CARET, ranging from START to
1025 finish e.g.
1026
1027 11111111112
1028 12345678901234567890
1029 522
1030 523 return foo + bar;
1031 ~~~~^~~~~
1032 524
1033
1034 The location's caret is at the "+", line 523 column 15, but starts
1035 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
1036 of "bar" at column 19. */
1037
1038 location_t
make_location(location_t caret,location_t start,location_t finish)1039 make_location (location_t caret, location_t start, location_t finish)
1040 {
1041 location_t pure_loc = get_pure_location (caret);
1042 source_range src_range;
1043 src_range.m_start = get_start (start);
1044 src_range.m_finish = get_finish (finish);
1045 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
1046 pure_loc,
1047 src_range,
1048 NULL);
1049 return combined_loc;
1050 }
1051
1052 /* Same as above, but taking a source range rather than two locations. */
1053
1054 location_t
make_location(location_t caret,source_range src_range)1055 make_location (location_t caret, source_range src_range)
1056 {
1057 location_t pure_loc = get_pure_location (caret);
1058 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
1059 }
1060
1061 /* An expanded_location stores the column in byte units. This function
1062 converts that column to display units. That requires reading the associated
1063 source line in order to calculate the display width. If that cannot be done
1064 for any reason, then returns the byte column as a fallback. */
1065 int
location_compute_display_column(expanded_location exploc,const cpp_char_column_policy & policy)1066 location_compute_display_column (expanded_location exploc,
1067 const cpp_char_column_policy &policy)
1068 {
1069 if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
1070 return exploc.column;
1071 char_span line = location_get_source_line (exploc.file, exploc.line);
1072 /* If line is NULL, this function returns exploc.column which is the
1073 desired fallback. */
1074 return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
1075 exploc.column, policy);
1076 }
1077
1078 /* Dump statistics to stderr about the memory usage of the line_table
1079 set of line maps. This also displays some statistics about macro
1080 expansion. */
1081
1082 void
dump_line_table_statistics(void)1083 dump_line_table_statistics (void)
1084 {
1085 struct linemap_stats s;
1086 long total_used_map_size,
1087 macro_maps_size,
1088 total_allocated_map_size;
1089
1090 memset (&s, 0, sizeof (s));
1091
1092 linemap_get_statistics (line_table, &s);
1093
1094 macro_maps_size = s.macro_maps_used_size
1095 + s.macro_maps_locations_size;
1096
1097 total_allocated_map_size = s.ordinary_maps_allocated_size
1098 + s.macro_maps_allocated_size
1099 + s.macro_maps_locations_size;
1100
1101 total_used_map_size = s.ordinary_maps_used_size
1102 + s.macro_maps_used_size
1103 + s.macro_maps_locations_size;
1104
1105 fprintf (stderr, "Number of expanded macros: %5ld\n",
1106 s.num_expanded_macros);
1107 if (s.num_expanded_macros != 0)
1108 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
1109 s.num_macro_tokens / s.num_expanded_macros);
1110 fprintf (stderr,
1111 "\nLine Table allocations during the "
1112 "compilation process\n");
1113 fprintf (stderr, "Number of ordinary maps used: " PRsa (5) "\n",
1114 SIZE_AMOUNT (s.num_ordinary_maps_used));
1115 fprintf (stderr, "Ordinary map used size: " PRsa (5) "\n",
1116 SIZE_AMOUNT (s.ordinary_maps_used_size));
1117 fprintf (stderr, "Number of ordinary maps allocated: " PRsa (5) "\n",
1118 SIZE_AMOUNT (s.num_ordinary_maps_allocated));
1119 fprintf (stderr, "Ordinary maps allocated size: " PRsa (5) "\n",
1120 SIZE_AMOUNT (s.ordinary_maps_allocated_size));
1121 fprintf (stderr, "Number of macro maps used: " PRsa (5) "\n",
1122 SIZE_AMOUNT (s.num_macro_maps_used));
1123 fprintf (stderr, "Macro maps used size: " PRsa (5) "\n",
1124 SIZE_AMOUNT (s.macro_maps_used_size));
1125 fprintf (stderr, "Macro maps locations size: " PRsa (5) "\n",
1126 SIZE_AMOUNT (s.macro_maps_locations_size));
1127 fprintf (stderr, "Macro maps size: " PRsa (5) "\n",
1128 SIZE_AMOUNT (macro_maps_size));
1129 fprintf (stderr, "Duplicated maps locations size: " PRsa (5) "\n",
1130 SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
1131 fprintf (stderr, "Total allocated maps size: " PRsa (5) "\n",
1132 SIZE_AMOUNT (total_allocated_map_size));
1133 fprintf (stderr, "Total used maps size: " PRsa (5) "\n",
1134 SIZE_AMOUNT (total_used_map_size));
1135 fprintf (stderr, "Ad-hoc table size: " PRsa (5) "\n",
1136 SIZE_AMOUNT (s.adhoc_table_size));
1137 fprintf (stderr, "Ad-hoc table entries used: " PRsa (5) "\n",
1138 SIZE_AMOUNT (s.adhoc_table_entries_used));
1139 fprintf (stderr, "optimized_ranges: " PRsa (5) "\n",
1140 SIZE_AMOUNT (line_table->num_optimized_ranges));
1141 fprintf (stderr, "unoptimized_ranges: " PRsa (5) "\n",
1142 SIZE_AMOUNT (line_table->num_unoptimized_ranges));
1143
1144 fprintf (stderr, "\n");
1145 }
1146
1147 /* Get location one beyond the final location in ordinary map IDX. */
1148
1149 static location_t
get_end_location(class line_maps * set,unsigned int idx)1150 get_end_location (class line_maps *set, unsigned int idx)
1151 {
1152 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1153 return set->highest_location;
1154
1155 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1156 return MAP_START_LOCATION (next_map);
1157 }
1158
1159 /* Helper function for write_digit_row. */
1160
1161 static void
write_digit(FILE * stream,int digit)1162 write_digit (FILE *stream, int digit)
1163 {
1164 fputc ('0' + (digit % 10), stream);
1165 }
1166
1167 /* Helper function for dump_location_info.
1168 Write a row of numbers to STREAM, numbering a source line,
1169 giving the units, tens, hundreds etc of the column number. */
1170
1171 static void
write_digit_row(FILE * stream,int indent,const line_map_ordinary * map,location_t loc,int max_col,int divisor)1172 write_digit_row (FILE *stream, int indent,
1173 const line_map_ordinary *map,
1174 location_t loc, int max_col, int divisor)
1175 {
1176 fprintf (stream, "%*c", indent, ' ');
1177 fprintf (stream, "|");
1178 for (int column = 1; column < max_col; column++)
1179 {
1180 location_t column_loc = loc + (column << map->m_range_bits);
1181 write_digit (stream, column_loc / divisor);
1182 }
1183 fprintf (stream, "\n");
1184 }
1185
1186 /* Write a half-closed (START) / half-open (END) interval of
1187 location_t to STREAM. */
1188
1189 static void
dump_location_range(FILE * stream,location_t start,location_t end)1190 dump_location_range (FILE *stream,
1191 location_t start, location_t end)
1192 {
1193 fprintf (stream,
1194 " location_t interval: %u <= loc < %u\n",
1195 start, end);
1196 }
1197
1198 /* Write a labelled description of a half-closed (START) / half-open (END)
1199 interval of location_t to STREAM. */
1200
1201 static void
dump_labelled_location_range(FILE * stream,const char * name,location_t start,location_t end)1202 dump_labelled_location_range (FILE *stream,
1203 const char *name,
1204 location_t start, location_t end)
1205 {
1206 fprintf (stream, "%s\n", name);
1207 dump_location_range (stream, start, end);
1208 fprintf (stream, "\n");
1209 }
1210
1211 /* Write a visualization of the locations in the line_table to STREAM. */
1212
1213 void
dump_location_info(FILE * stream)1214 dump_location_info (FILE *stream)
1215 {
1216 /* Visualize the reserved locations. */
1217 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1218 0, RESERVED_LOCATION_COUNT);
1219
1220 /* Visualize the ordinary line_map instances, rendering the sources. */
1221 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1222 {
1223 location_t end_location = get_end_location (line_table, idx);
1224 /* half-closed: doesn't include this one. */
1225
1226 const line_map_ordinary *map
1227 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1228 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1229 dump_location_range (stream,
1230 MAP_START_LOCATION (map), end_location);
1231 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1232 fprintf (stream, " starting at line: %i\n",
1233 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1234 fprintf (stream, " column and range bits: %i\n",
1235 map->m_column_and_range_bits);
1236 fprintf (stream, " column bits: %i\n",
1237 map->m_column_and_range_bits - map->m_range_bits);
1238 fprintf (stream, " range bits: %i\n",
1239 map->m_range_bits);
1240 const char * reason;
1241 switch (map->reason) {
1242 case LC_ENTER:
1243 reason = "LC_ENTER";
1244 break;
1245 case LC_LEAVE:
1246 reason = "LC_LEAVE";
1247 break;
1248 case LC_RENAME:
1249 reason = "LC_RENAME";
1250 break;
1251 case LC_RENAME_VERBATIM:
1252 reason = "LC_RENAME_VERBATIM";
1253 break;
1254 case LC_ENTER_MACRO:
1255 reason = "LC_RENAME_MACRO";
1256 break;
1257 default:
1258 reason = "Unknown";
1259 }
1260 fprintf (stream, " reason: %d (%s)\n", map->reason, reason);
1261
1262 const line_map_ordinary *includer_map
1263 = linemap_included_from_linemap (line_table, map);
1264 fprintf (stream, " included from location: %d",
1265 linemap_included_from (map));
1266 if (includer_map) {
1267 fprintf (stream, " (in ordinary map %d)",
1268 int (includer_map - line_table->info_ordinary.maps));
1269 }
1270 fprintf (stream, "\n");
1271
1272 /* Render the span of source lines that this "map" covers. */
1273 for (location_t loc = MAP_START_LOCATION (map);
1274 loc < end_location;
1275 loc += (1 << map->m_range_bits) )
1276 {
1277 gcc_assert (pure_location_p (line_table, loc) );
1278
1279 expanded_location exploc
1280 = linemap_expand_location (line_table, map, loc);
1281
1282 if (exploc.column == 0)
1283 {
1284 /* Beginning of a new source line: draw the line. */
1285
1286 char_span line_text = location_get_source_line (exploc.file,
1287 exploc.line);
1288 if (!line_text)
1289 break;
1290 fprintf (stream,
1291 "%s:%3i|loc:%5i|%.*s\n",
1292 exploc.file, exploc.line,
1293 loc,
1294 (int)line_text.length (), line_text.get_buffer ());
1295
1296 /* "loc" is at column 0, which means "the whole line".
1297 Render the locations *within* the line, by underlining
1298 it, showing the location_t numeric values
1299 at each column. */
1300 size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1301 if (max_col > line_text.length ())
1302 max_col = line_text.length () + 1;
1303
1304 int len_lnum = num_digits (exploc.line);
1305 if (len_lnum < 3)
1306 len_lnum = 3;
1307 int len_loc = num_digits (loc);
1308 if (len_loc < 5)
1309 len_loc = 5;
1310
1311 int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
1312
1313 /* Thousands. */
1314 if (end_location > 999)
1315 write_digit_row (stream, indent, map, loc, max_col, 1000);
1316
1317 /* Hundreds. */
1318 if (end_location > 99)
1319 write_digit_row (stream, indent, map, loc, max_col, 100);
1320
1321 /* Tens. */
1322 write_digit_row (stream, indent, map, loc, max_col, 10);
1323
1324 /* Units. */
1325 write_digit_row (stream, indent, map, loc, max_col, 1);
1326 }
1327 }
1328 fprintf (stream, "\n");
1329 }
1330
1331 /* Visualize unallocated values. */
1332 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1333 line_table->highest_location,
1334 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1335
1336 /* Visualize the macro line_map instances, rendering the sources. */
1337 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1338 {
1339 /* Each macro map that is allocated owns location_t values
1340 that are *lower* that the one before them.
1341 Hence it's meaningful to view them either in order of ascending
1342 source locations, or in order of ascending macro map index. */
1343 const bool ascending_location_ts = true;
1344 unsigned int idx = (ascending_location_ts
1345 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1346 : i);
1347 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1348 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1349 idx,
1350 linemap_map_get_macro_name (map),
1351 MACRO_MAP_NUM_MACRO_TOKENS (map));
1352 dump_location_range (stream,
1353 map->start_location,
1354 (map->start_location
1355 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1356 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1357 "expansion point is location %i",
1358 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1359 fprintf (stream, " map->start_location: %u\n",
1360 map->start_location);
1361
1362 fprintf (stream, " macro_locations:\n");
1363 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1364 {
1365 location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1366 location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1367
1368 /* linemap_add_macro_token encodes token numbers in an expansion
1369 by putting them after MAP_START_LOCATION. */
1370
1371 /* I'm typically seeing 4 uninitialized entries at the end of
1372 0xafafafaf.
1373 This appears to be due to macro.cc:replace_args
1374 adding 2 extra args for padding tokens; presumably there may
1375 be a leading and/or trailing padding token injected,
1376 each for 2 more location slots.
1377 This would explain there being up to 4 location_ts slots
1378 that may be uninitialized. */
1379
1380 fprintf (stream, " %u: %u, %u\n",
1381 i,
1382 x,
1383 y);
1384 if (x == y)
1385 {
1386 if (x < MAP_START_LOCATION (map))
1387 inform (x, "token %u has %<x-location == y-location == %u%>",
1388 i, x);
1389 else
1390 fprintf (stream,
1391 "x-location == y-location == %u encodes token # %u\n",
1392 x, x - MAP_START_LOCATION (map));
1393 }
1394 else
1395 {
1396 inform (x, "token %u has %<x-location == %u%>", i, x);
1397 inform (x, "token %u has %<y-location == %u%>", i, y);
1398 }
1399 }
1400 fprintf (stream, "\n");
1401 }
1402
1403 /* It appears that MAX_LOCATION_T itself is never assigned to a
1404 macro map, presumably due to an off-by-one error somewhere
1405 between the logic in linemap_enter_macro and
1406 LINEMAPS_MACRO_LOWEST_LOCATION. */
1407 dump_labelled_location_range (stream, "MAX_LOCATION_T",
1408 MAX_LOCATION_T,
1409 MAX_LOCATION_T + 1);
1410
1411 /* Visualize ad-hoc values. */
1412 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1413 MAX_LOCATION_T + 1, UINT_MAX);
1414 }
1415
1416 /* string_concat's constructor. */
1417
string_concat(int num,location_t * locs)1418 string_concat::string_concat (int num, location_t *locs)
1419 : m_num (num)
1420 {
1421 m_locs = ggc_vec_alloc <location_t> (num);
1422 for (int i = 0; i < num; i++)
1423 m_locs[i] = locs[i];
1424 }
1425
1426 /* string_concat_db's constructor. */
1427
string_concat_db()1428 string_concat_db::string_concat_db ()
1429 {
1430 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1431 }
1432
1433 /* Record that a string concatenation occurred, covering NUM
1434 string literal tokens. LOCS is an array of size NUM, containing the
1435 locations of the tokens. A copy of LOCS is taken. */
1436
1437 void
record_string_concatenation(int num,location_t * locs)1438 string_concat_db::record_string_concatenation (int num, location_t *locs)
1439 {
1440 gcc_assert (num > 1);
1441 gcc_assert (locs);
1442
1443 location_t key_loc = get_key_loc (locs[0]);
1444 /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values:
1445 any data now recorded under key 'key_loc' would be overwritten by a
1446 subsequent call with the same key 'key_loc'. */
1447 if (RESERVED_LOCATION_P (key_loc))
1448 return;
1449
1450 string_concat *concat
1451 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1452 m_table->put (key_loc, concat);
1453 }
1454
1455 /* Determine if LOC was the location of the initial token of a
1456 concatenation of string literal tokens.
1457 If so, *OUT_NUM is written to with the number of tokens, and
1458 *OUT_LOCS with the location of an array of locations of the
1459 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1460 storage owned by the string_concat_db.
1461 Otherwise, return false. */
1462
1463 bool
get_string_concatenation(location_t loc,int * out_num,location_t ** out_locs)1464 string_concat_db::get_string_concatenation (location_t loc,
1465 int *out_num,
1466 location_t **out_locs)
1467 {
1468 gcc_assert (out_num);
1469 gcc_assert (out_locs);
1470
1471 location_t key_loc = get_key_loc (loc);
1472 /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see
1473 discussion in 'string_concat_db::record_string_concatenation'. */
1474 if (RESERVED_LOCATION_P (key_loc))
1475 return false;
1476
1477 string_concat **concat = m_table->get (key_loc);
1478 if (!concat)
1479 return false;
1480
1481 *out_num = (*concat)->m_num;
1482 *out_locs =(*concat)->m_locs;
1483 return true;
1484 }
1485
1486 /* Internal function. Canonicalize LOC into a form suitable for
1487 use as a key within the database, stripping away macro expansion,
1488 ad-hoc information, and range information, using the location of
1489 the start of LOC within an ordinary linemap. */
1490
1491 location_t
get_key_loc(location_t loc)1492 string_concat_db::get_key_loc (location_t loc)
1493 {
1494 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1495 NULL);
1496
1497 loc = get_range_from_loc (line_table, loc).m_start;
1498
1499 return loc;
1500 }
1501
1502 /* Helper class for use within get_substring_ranges_for_loc.
1503 An vec of cpp_string with responsibility for releasing all of the
1504 str->text for each str in the vector. */
1505
1506 class auto_cpp_string_vec : public auto_vec <cpp_string>
1507 {
1508 public:
auto_cpp_string_vec(int alloc)1509 auto_cpp_string_vec (int alloc)
1510 : auto_vec <cpp_string> (alloc) {}
1511
~auto_cpp_string_vec()1512 ~auto_cpp_string_vec ()
1513 {
1514 /* Clean up the copies within this vec. */
1515 int i;
1516 cpp_string *str;
1517 FOR_EACH_VEC_ELT (*this, i, str)
1518 free (const_cast <unsigned char *> (str->text));
1519 }
1520 };
1521
1522 /* Attempt to populate RANGES with source location information on the
1523 individual characters within the string literal found at STRLOC.
1524 If CONCATS is non-NULL, then any string literals that the token at
1525 STRLOC was concatenated with are also added to RANGES.
1526
1527 Return NULL if successful, or an error message if any errors occurred (in
1528 which case RANGES may be only partially populated and should not
1529 be used).
1530
1531 This is implemented by re-parsing the relevant source line(s). */
1532
1533 static const char *
get_substring_ranges_for_loc(cpp_reader * pfile,string_concat_db * concats,location_t strloc,enum cpp_ttype type,cpp_substring_ranges & ranges)1534 get_substring_ranges_for_loc (cpp_reader *pfile,
1535 string_concat_db *concats,
1536 location_t strloc,
1537 enum cpp_ttype type,
1538 cpp_substring_ranges &ranges)
1539 {
1540 gcc_assert (pfile);
1541
1542 if (strloc == UNKNOWN_LOCATION)
1543 return "unknown location";
1544
1545 /* Reparsing the strings requires accurate location information.
1546 If -ftrack-macro-expansion has been overridden from its default
1547 of 2, then we might have a location of a macro expansion point,
1548 rather than the location of the literal itself.
1549 Avoid this by requiring that we have full macro expansion tracking
1550 for substring locations to be available. */
1551 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1552 return "track_macro_expansion != 2";
1553
1554 /* If #line or # 44 "file"-style directives are present, then there's
1555 no guarantee that the line numbers we have can be used to locate
1556 the strings. For example, we might have a .i file with # directives
1557 pointing back to lines within a .c file, but the .c file might
1558 have been edited since the .i file was created.
1559 In such a case, the safest course is to disable on-demand substring
1560 locations. */
1561 if (line_table->seen_line_directive)
1562 return "seen line directive";
1563
1564 /* If string concatenation has occurred at STRLOC, get the locations
1565 of all of the literal tokens making up the compound string.
1566 Otherwise, just use STRLOC. */
1567 int num_locs = 1;
1568 location_t *strlocs = &strloc;
1569 if (concats)
1570 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1571
1572 auto_cpp_string_vec strs (num_locs);
1573 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1574 for (int i = 0; i < num_locs; i++)
1575 {
1576 /* Get range of strloc. We will use it to locate the start and finish
1577 of the literal token within the line. */
1578 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1579
1580 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1581 {
1582 /* If the string token was within a macro expansion, then we can
1583 cope with it for the simple case where we have a single token.
1584 Otherwise, bail out. */
1585 if (src_range.m_start != src_range.m_finish)
1586 return "macro expansion";
1587 }
1588 else
1589 {
1590 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1591 /* If so, we can't reliably determine where the token started within
1592 its line. */
1593 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1594
1595 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1596 /* If so, we can't reliably determine where the token finished
1597 within its line. */
1598 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1599 }
1600
1601 expanded_location start
1602 = expand_location_to_spelling_point (src_range.m_start,
1603 LOCATION_ASPECT_START);
1604 expanded_location finish
1605 = expand_location_to_spelling_point (src_range.m_finish,
1606 LOCATION_ASPECT_FINISH);
1607 if (start.file != finish.file)
1608 return "range endpoints are in different files";
1609 if (start.line != finish.line)
1610 return "range endpoints are on different lines";
1611 if (start.column > finish.column)
1612 return "range endpoints are reversed";
1613
1614 char_span line = location_get_source_line (start.file, start.line);
1615 if (!line)
1616 return "unable to read source line";
1617
1618 /* Determine the location of the literal (including quotes
1619 and leading prefix chars, such as the 'u' in a u""
1620 token). */
1621 size_t literal_length = finish.column - start.column + 1;
1622
1623 /* Ensure that we don't crash if we got the wrong location. */
1624 if (start.column < 1)
1625 return "zero start column";
1626 if (line.length () < (start.column - 1 + literal_length))
1627 return "line is not wide enough";
1628
1629 char_span literal = line.subspan (start.column - 1, literal_length);
1630
1631 cpp_string from;
1632 from.len = literal_length;
1633 /* Make a copy of the literal, to avoid having to rely on
1634 the lifetime of the copy of the line within the cache.
1635 This will be released by the auto_cpp_string_vec dtor. */
1636 from.text = (unsigned char *)literal.xstrdup ();
1637 strs.safe_push (from);
1638
1639 /* For very long lines, a new linemap could have started
1640 halfway through the token.
1641 Ensure that the loc_reader uses the linemap of the
1642 *end* of the token for its start location. */
1643 const line_map_ordinary *start_ord_map;
1644 linemap_resolve_location (line_table, src_range.m_start,
1645 LRK_SPELLING_LOCATION, &start_ord_map);
1646 const line_map_ordinary *final_ord_map;
1647 linemap_resolve_location (line_table, src_range.m_finish,
1648 LRK_SPELLING_LOCATION, &final_ord_map);
1649 if (start_ord_map == NULL || final_ord_map == NULL)
1650 return "failed to get ordinary maps";
1651 /* Bulletproofing. We ought to only have different ordinary maps
1652 for start vs finish due to line-length jumps. */
1653 if (start_ord_map != final_ord_map
1654 && start_ord_map->to_file != final_ord_map->to_file)
1655 return "start and finish are spelled in different ordinary maps";
1656 /* The file from linemap_resolve_location ought to match that from
1657 expand_location_to_spelling_point. */
1658 if (start_ord_map->to_file != start.file)
1659 return "mismatching file after resolving linemap";
1660
1661 location_t start_loc
1662 = linemap_position_for_line_and_column (line_table, final_ord_map,
1663 start.line, start.column);
1664
1665 cpp_string_location_reader loc_reader (start_loc, line_table);
1666 loc_readers.safe_push (loc_reader);
1667 }
1668
1669 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1670 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1671 loc_readers.address (),
1672 num_locs, &ranges, type);
1673 if (err)
1674 return err;
1675
1676 /* Success: "ranges" should now contain information on the string. */
1677 return NULL;
1678 }
1679
1680 /* Attempt to populate *OUT_LOC with source location information on the
1681 given characters within the string literal found at STRLOC.
1682 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1683 character set.
1684
1685 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1686 and string literal "012345\n789"
1687 *OUT_LOC is written to with:
1688 "012345\n789"
1689 ~^~~~~
1690
1691 If CONCATS is non-NULL, then any string literals that the token at
1692 STRLOC was concatenated with are also considered.
1693
1694 This is implemented by re-parsing the relevant source line(s).
1695
1696 Return NULL if successful, or an error message if any errors occurred.
1697 Error messages are intended for GCC developers (to help debugging) rather
1698 than for end-users. */
1699
1700 const char *
get_location_within_string(cpp_reader * pfile,string_concat_db * concats,location_t strloc,enum cpp_ttype type,int caret_idx,int start_idx,int end_idx,location_t * out_loc)1701 get_location_within_string (cpp_reader *pfile,
1702 string_concat_db *concats,
1703 location_t strloc,
1704 enum cpp_ttype type,
1705 int caret_idx, int start_idx, int end_idx,
1706 location_t *out_loc)
1707 {
1708 gcc_checking_assert (caret_idx >= 0);
1709 gcc_checking_assert (start_idx >= 0);
1710 gcc_checking_assert (end_idx >= 0);
1711 gcc_assert (out_loc);
1712
1713 cpp_substring_ranges ranges;
1714 const char *err
1715 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1716 if (err)
1717 return err;
1718
1719 if (caret_idx >= ranges.get_num_ranges ())
1720 return "caret_idx out of range";
1721 if (start_idx >= ranges.get_num_ranges ())
1722 return "start_idx out of range";
1723 if (end_idx >= ranges.get_num_ranges ())
1724 return "end_idx out of range";
1725
1726 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1727 ranges.get_range (start_idx).m_start,
1728 ranges.get_range (end_idx).m_finish);
1729 return NULL;
1730 }
1731
1732 #if CHECKING_P
1733
1734 namespace selftest {
1735
1736 /* Selftests of location handling. */
1737
1738 /* Attempt to populate *OUT_RANGE with source location information on the
1739 given character within the string literal found at STRLOC.
1740 CHAR_IDX refers to an offset within the execution character set.
1741 If CONCATS is non-NULL, then any string literals that the token at
1742 STRLOC was concatenated with are also considered.
1743
1744 This is implemented by re-parsing the relevant source line(s).
1745
1746 Return NULL if successful, or an error message if any errors occurred.
1747 Error messages are intended for GCC developers (to help debugging) rather
1748 than for end-users. */
1749
1750 static const char *
get_source_range_for_char(cpp_reader * pfile,string_concat_db * concats,location_t strloc,enum cpp_ttype type,int char_idx,source_range * out_range)1751 get_source_range_for_char (cpp_reader *pfile,
1752 string_concat_db *concats,
1753 location_t strloc,
1754 enum cpp_ttype type,
1755 int char_idx,
1756 source_range *out_range)
1757 {
1758 gcc_checking_assert (char_idx >= 0);
1759 gcc_assert (out_range);
1760
1761 cpp_substring_ranges ranges;
1762 const char *err
1763 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1764 if (err)
1765 return err;
1766
1767 if (char_idx >= ranges.get_num_ranges ())
1768 return "char_idx out of range";
1769
1770 *out_range = ranges.get_range (char_idx);
1771 return NULL;
1772 }
1773
1774 /* As get_source_range_for_char, but write to *OUT the number
1775 of ranges that are available. */
1776
1777 static const char *
get_num_source_ranges_for_substring(cpp_reader * pfile,string_concat_db * concats,location_t strloc,enum cpp_ttype type,int * out)1778 get_num_source_ranges_for_substring (cpp_reader *pfile,
1779 string_concat_db *concats,
1780 location_t strloc,
1781 enum cpp_ttype type,
1782 int *out)
1783 {
1784 gcc_assert (out);
1785
1786 cpp_substring_ranges ranges;
1787 const char *err
1788 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1789
1790 if (err)
1791 return err;
1792
1793 *out = ranges.get_num_ranges ();
1794 return NULL;
1795 }
1796
1797 /* Selftests of location handling. */
1798
1799 /* Verify that compare() on linenum_type handles comparisons over the full
1800 range of the type. */
1801
1802 static void
test_linenum_comparisons()1803 test_linenum_comparisons ()
1804 {
1805 linenum_type min_line (0);
1806 linenum_type max_line (0xffffffff);
1807 ASSERT_EQ (0, compare (min_line, min_line));
1808 ASSERT_EQ (0, compare (max_line, max_line));
1809
1810 ASSERT_GT (compare (max_line, min_line), 0);
1811 ASSERT_LT (compare (min_line, max_line), 0);
1812 }
1813
1814 /* Helper function for verifying location data: when location_t
1815 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1816 as having column 0. */
1817
1818 static bool
should_have_column_data_p(location_t loc)1819 should_have_column_data_p (location_t loc)
1820 {
1821 if (IS_ADHOC_LOC (loc))
1822 loc = get_location_from_adhoc_loc (line_table, loc);
1823 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1824 return false;
1825 return true;
1826 }
1827
1828 /* Selftest for should_have_column_data_p. */
1829
1830 static void
test_should_have_column_data_p()1831 test_should_have_column_data_p ()
1832 {
1833 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1834 ASSERT_TRUE
1835 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1836 ASSERT_FALSE
1837 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1838 }
1839
1840 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1841 on LOC. */
1842
1843 static void
assert_loceq(const char * exp_filename,int exp_linenum,int exp_colnum,location_t loc)1844 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1845 location_t loc)
1846 {
1847 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1848 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1849 /* If location_t values are sufficiently high, then column numbers
1850 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1851 When close to the threshold, column numbers *may* be present: if
1852 the final linemap before the threshold contains a line that straddles
1853 the threshold, locations in that line have column information. */
1854 if (should_have_column_data_p (loc))
1855 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1856 }
1857
1858 /* Various selftests involve constructing a line table and one or more
1859 line maps within it.
1860
1861 For maximum test coverage we want to run these tests with a variety
1862 of situations:
1863 - line_table->default_range_bits: some frontends use a non-zero value
1864 and others use zero
1865 - the fallback modes within line-map.cc: there are various threshold
1866 values for location_t beyond line-map.cc changes
1867 behavior (disabling of the range-packing optimization, disabling
1868 of column-tracking). We can exercise these by starting the line_table
1869 at interesting values at or near these thresholds.
1870
1871 The following struct describes a particular case within our test
1872 matrix. */
1873
1874 class line_table_case
1875 {
1876 public:
line_table_case(int default_range_bits,int base_location)1877 line_table_case (int default_range_bits, int base_location)
1878 : m_default_range_bits (default_range_bits),
1879 m_base_location (base_location)
1880 {}
1881
1882 int m_default_range_bits;
1883 int m_base_location;
1884 };
1885
1886 /* Constructor. Store the old value of line_table, and create a new
1887 one, using sane defaults. */
1888
line_table_test()1889 line_table_test::line_table_test ()
1890 {
1891 gcc_assert (saved_line_table == NULL);
1892 saved_line_table = line_table;
1893 line_table = ggc_alloc<line_maps> ();
1894 linemap_init (line_table, BUILTINS_LOCATION);
1895 gcc_assert (saved_line_table->reallocator);
1896 line_table->reallocator = saved_line_table->reallocator;
1897 gcc_assert (saved_line_table->round_alloc_size);
1898 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1899 line_table->default_range_bits = 0;
1900 }
1901
1902 /* Constructor. Store the old value of line_table, and create a new
1903 one, using the sitation described in CASE_. */
1904
line_table_test(const line_table_case & case_)1905 line_table_test::line_table_test (const line_table_case &case_)
1906 {
1907 gcc_assert (saved_line_table == NULL);
1908 saved_line_table = line_table;
1909 line_table = ggc_alloc<line_maps> ();
1910 linemap_init (line_table, BUILTINS_LOCATION);
1911 gcc_assert (saved_line_table->reallocator);
1912 line_table->reallocator = saved_line_table->reallocator;
1913 gcc_assert (saved_line_table->round_alloc_size);
1914 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1915 line_table->default_range_bits = case_.m_default_range_bits;
1916 if (case_.m_base_location)
1917 {
1918 line_table->highest_location = case_.m_base_location;
1919 line_table->highest_line = case_.m_base_location;
1920 }
1921 }
1922
1923 /* Destructor. Restore the old value of line_table. */
1924
~line_table_test()1925 line_table_test::~line_table_test ()
1926 {
1927 gcc_assert (saved_line_table != NULL);
1928 line_table = saved_line_table;
1929 saved_line_table = NULL;
1930 }
1931
1932 /* Verify basic operation of ordinary linemaps. */
1933
1934 static void
test_accessing_ordinary_linemaps(const line_table_case & case_)1935 test_accessing_ordinary_linemaps (const line_table_case &case_)
1936 {
1937 line_table_test ltt (case_);
1938
1939 /* Build a simple linemap describing some locations. */
1940 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1941
1942 linemap_line_start (line_table, 1, 100);
1943 location_t loc_a = linemap_position_for_column (line_table, 1);
1944 location_t loc_b = linemap_position_for_column (line_table, 23);
1945
1946 linemap_line_start (line_table, 2, 100);
1947 location_t loc_c = linemap_position_for_column (line_table, 1);
1948 location_t loc_d = linemap_position_for_column (line_table, 17);
1949
1950 /* Example of a very long line. */
1951 linemap_line_start (line_table, 3, 2000);
1952 location_t loc_e = linemap_position_for_column (line_table, 700);
1953
1954 /* Transitioning back to a short line. */
1955 linemap_line_start (line_table, 4, 0);
1956 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1957
1958 if (should_have_column_data_p (loc_back_to_short))
1959 {
1960 /* Verify that we switched to short lines in the linemap. */
1961 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1962 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1963 }
1964
1965 /* Example of a line that will eventually be seen to be longer
1966 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1967 below that. */
1968 linemap_line_start (line_table, 5, 2000);
1969
1970 location_t loc_start_of_very_long_line
1971 = linemap_position_for_column (line_table, 2000);
1972 location_t loc_too_wide
1973 = linemap_position_for_column (line_table, 4097);
1974 location_t loc_too_wide_2
1975 = linemap_position_for_column (line_table, 4098);
1976
1977 /* ...and back to a sane line length. */
1978 linemap_line_start (line_table, 6, 100);
1979 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1980
1981 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1982
1983 /* Multiple files. */
1984 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1985 linemap_line_start (line_table, 1, 200);
1986 location_t loc_f = linemap_position_for_column (line_table, 150);
1987 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1988
1989 /* Verify that we can recover the location info. */
1990 assert_loceq ("foo.c", 1, 1, loc_a);
1991 assert_loceq ("foo.c", 1, 23, loc_b);
1992 assert_loceq ("foo.c", 2, 1, loc_c);
1993 assert_loceq ("foo.c", 2, 17, loc_d);
1994 assert_loceq ("foo.c", 3, 700, loc_e);
1995 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1996
1997 /* In the very wide line, the initial location should be fully tracked. */
1998 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1999 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
2000 be disabled. */
2001 assert_loceq ("foo.c", 5, 0, loc_too_wide);
2002 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
2003 /*...and column-tracking should be re-enabled for subsequent lines. */
2004 assert_loceq ("foo.c", 6, 10, loc_sane_again);
2005
2006 assert_loceq ("bar.c", 1, 150, loc_f);
2007
2008 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
2009 ASSERT_TRUE (pure_location_p (line_table, loc_a));
2010
2011 /* Verify using make_location to build a range, and extracting data
2012 back from it. */
2013 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
2014 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
2015 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
2016 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
2017 ASSERT_EQ (loc_b, src_range.m_start);
2018 ASSERT_EQ (loc_d, src_range.m_finish);
2019 }
2020
2021 /* Verify various properties of UNKNOWN_LOCATION. */
2022
2023 static void
test_unknown_location()2024 test_unknown_location ()
2025 {
2026 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
2027 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
2028 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
2029 }
2030
2031 /* Verify various properties of BUILTINS_LOCATION. */
2032
2033 static void
test_builtins()2034 test_builtins ()
2035 {
2036 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
2037 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
2038 }
2039
2040 /* Regression test for make_location.
2041 Ensure that we use pure locations for the start/finish of the range,
2042 rather than storing a packed or ad-hoc range as the start/finish. */
2043
2044 static void
test_make_location_nonpure_range_endpoints(const line_table_case & case_)2045 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
2046 {
2047 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
2048 with C++ frontend.
2049 ....................0000000001111111111222.
2050 ....................1234567890123456789012. */
2051 const char *content = " r += !aaa == bbb;\n";
2052 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
2053 line_table_test ltt (case_);
2054 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
2055
2056 const location_t c11 = linemap_position_for_column (line_table, 11);
2057 const location_t c12 = linemap_position_for_column (line_table, 12);
2058 const location_t c13 = linemap_position_for_column (line_table, 13);
2059 const location_t c14 = linemap_position_for_column (line_table, 14);
2060 const location_t c21 = linemap_position_for_column (line_table, 21);
2061
2062 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
2063 return;
2064
2065 /* Use column 13 for the caret location, arbitrarily, to verify that we
2066 handle start != caret. */
2067 const location_t aaa = make_location (c13, c12, c14);
2068 ASSERT_EQ (c13, get_pure_location (aaa));
2069 ASSERT_EQ (c12, get_start (aaa));
2070 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
2071 ASSERT_EQ (c14, get_finish (aaa));
2072 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
2073
2074 /* Make a location using a location with a range as the start-point. */
2075 const location_t not_aaa = make_location (c11, aaa, c14);
2076 ASSERT_EQ (c11, get_pure_location (not_aaa));
2077 /* It should use the start location of the range, not store the range
2078 itself. */
2079 ASSERT_EQ (c12, get_start (not_aaa));
2080 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
2081 ASSERT_EQ (c14, get_finish (not_aaa));
2082 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
2083
2084 /* Similarly, make a location with a range as the end-point. */
2085 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
2086 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
2087 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
2088 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
2089 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
2090 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
2091 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
2092 /* It should use the finish location of the range, not store the range
2093 itself. */
2094 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
2095 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
2096 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
2097 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
2098 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
2099 }
2100
2101 /* Verify reading of input files (e.g. for caret-based diagnostics). */
2102
2103 static void
test_reading_source_line()2104 test_reading_source_line ()
2105 {
2106 /* Create a tempfile and write some text to it. */
2107 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
2108 "01234567890123456789\n"
2109 "This is the test text\n"
2110 "This is the 3rd line");
2111
2112 /* Read back a specific line from the tempfile. */
2113 char_span source_line = location_get_source_line (tmp.get_filename (), 3);
2114 ASSERT_TRUE (source_line);
2115 ASSERT_TRUE (source_line.get_buffer () != NULL);
2116 ASSERT_EQ (20, source_line.length ());
2117 ASSERT_TRUE (!strncmp ("This is the 3rd line",
2118 source_line.get_buffer (), source_line.length ()));
2119
2120 source_line = location_get_source_line (tmp.get_filename (), 2);
2121 ASSERT_TRUE (source_line);
2122 ASSERT_TRUE (source_line.get_buffer () != NULL);
2123 ASSERT_EQ (21, source_line.length ());
2124 ASSERT_TRUE (!strncmp ("This is the test text",
2125 source_line.get_buffer (), source_line.length ()));
2126
2127 source_line = location_get_source_line (tmp.get_filename (), 4);
2128 ASSERT_FALSE (source_line);
2129 ASSERT_TRUE (source_line.get_buffer () == NULL);
2130 }
2131
2132 /* Tests of lexing. */
2133
2134 /* Verify that token TOK from PARSER has cpp_token_as_text
2135 equal to EXPECTED_TEXT. */
2136
2137 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
2138 SELFTEST_BEGIN_STMT \
2139 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
2140 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
2141 SELFTEST_END_STMT
2142
2143 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
2144 and ranges from EXP_START_COL to EXP_FINISH_COL.
2145 Use LOC as the effective location of the selftest. */
2146
2147 static void
assert_token_loc_eq(const location & loc,const cpp_token * tok,const char * exp_filename,int exp_linenum,int exp_start_col,int exp_finish_col)2148 assert_token_loc_eq (const location &loc,
2149 const cpp_token *tok,
2150 const char *exp_filename, int exp_linenum,
2151 int exp_start_col, int exp_finish_col)
2152 {
2153 location_t tok_loc = tok->src_loc;
2154 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
2155 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
2156
2157 /* If location_t values are sufficiently high, then column numbers
2158 will be unavailable. */
2159 if (!should_have_column_data_p (tok_loc))
2160 return;
2161
2162 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
2163 source_range tok_range = get_range_from_loc (line_table, tok_loc);
2164 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
2165 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
2166 }
2167
2168 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
2169 SELFTEST_LOCATION as the effective location of the selftest. */
2170
2171 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
2172 EXP_START_COL, EXP_FINISH_COL) \
2173 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
2174 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
2175
2176 /* Test of lexing a file using libcpp, verifying tokens and their
2177 location information. */
2178
2179 static void
test_lexer(const line_table_case & case_)2180 test_lexer (const line_table_case &case_)
2181 {
2182 /* Create a tempfile and write some text to it. */
2183 const char *content =
2184 /*00000000011111111112222222222333333.3333444444444.455555555556
2185 12345678901234567890123456789012345.6789012345678.901234567890. */
2186 ("test_name /* c-style comment */\n"
2187 " \"test literal\"\n"
2188 " // test c++-style comment\n"
2189 " 42\n");
2190 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2191
2192 line_table_test ltt (case_);
2193
2194 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2195
2196 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2197 ASSERT_NE (fname, NULL);
2198
2199 /* Verify that we get the expected tokens back, with the correct
2200 location information. */
2201
2202 location_t loc;
2203 const cpp_token *tok;
2204 tok = cpp_get_token_with_location (parser, &loc);
2205 ASSERT_NE (tok, NULL);
2206 ASSERT_EQ (tok->type, CPP_NAME);
2207 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2208 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2209
2210 tok = cpp_get_token_with_location (parser, &loc);
2211 ASSERT_NE (tok, NULL);
2212 ASSERT_EQ (tok->type, CPP_STRING);
2213 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2214 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2215
2216 tok = cpp_get_token_with_location (parser, &loc);
2217 ASSERT_NE (tok, NULL);
2218 ASSERT_EQ (tok->type, CPP_NUMBER);
2219 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2220 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2221
2222 tok = cpp_get_token_with_location (parser, &loc);
2223 ASSERT_NE (tok, NULL);
2224 ASSERT_EQ (tok->type, CPP_EOF);
2225
2226 cpp_finish (parser, NULL);
2227 cpp_destroy (parser);
2228 }
2229
2230 /* Forward decls. */
2231
2232 class lexer_test;
2233 class lexer_test_options;
2234
2235 /* A class for specifying options of a lexer_test.
2236 The "apply" vfunc is called during the lexer_test constructor. */
2237
2238 class lexer_test_options
2239 {
2240 public:
2241 virtual void apply (lexer_test &) = 0;
2242 };
2243
2244 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2245 in its dtor.
2246
2247 This is needed by struct lexer_test to ensure that the cleanup of the
2248 cpp_reader happens *after* the cleanup of the temp_source_file. */
2249
2250 class cpp_reader_ptr
2251 {
2252 public:
cpp_reader_ptr(cpp_reader * ptr)2253 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2254
~cpp_reader_ptr()2255 ~cpp_reader_ptr ()
2256 {
2257 cpp_finish (m_ptr, NULL);
2258 cpp_destroy (m_ptr);
2259 }
2260
operator cpp_reader*() const2261 operator cpp_reader * () const { return m_ptr; }
2262
2263 private:
2264 cpp_reader *m_ptr;
2265 };
2266
2267 /* A struct for writing lexer tests. */
2268
2269 class lexer_test
2270 {
2271 public:
2272 lexer_test (const line_table_case &case_, const char *content,
2273 lexer_test_options *options);
2274 ~lexer_test ();
2275
2276 const cpp_token *get_token ();
2277
2278 /* The ordering of these fields matters.
2279 The line_table_test must be first, since the cpp_reader_ptr
2280 uses it.
2281 The cpp_reader must be cleaned up *after* the temp_source_file
2282 since the filenames in input.cc's input cache are owned by the
2283 cpp_reader; in particular, when ~temp_source_file evicts the
2284 filename the filenames must still be alive. */
2285 line_table_test m_ltt;
2286 cpp_reader_ptr m_parser;
2287 temp_source_file m_tempfile;
2288 string_concat_db m_concats;
2289 bool m_implicitly_expect_EOF;
2290 };
2291
2292 /* Use an EBCDIC encoding for the execution charset, specifically
2293 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2294
2295 This exercises iconv integration within libcpp.
2296 Not every build of iconv supports the given charset,
2297 so we need to flag this error and handle it gracefully. */
2298
2299 class ebcdic_execution_charset : public lexer_test_options
2300 {
2301 public:
ebcdic_execution_charset()2302 ebcdic_execution_charset () : m_num_iconv_errors (0)
2303 {
2304 gcc_assert (s_singleton == NULL);
2305 s_singleton = this;
2306 }
~ebcdic_execution_charset()2307 ~ebcdic_execution_charset ()
2308 {
2309 gcc_assert (s_singleton == this);
2310 s_singleton = NULL;
2311 }
2312
apply(lexer_test & test)2313 void apply (lexer_test &test) FINAL OVERRIDE
2314 {
2315 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2316 cpp_opts->narrow_charset = "IBM1047";
2317
2318 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2319 callbacks->diagnostic = on_diagnostic;
2320 }
2321
on_diagnostic(cpp_reader * pfile ATTRIBUTE_UNUSED,enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,enum cpp_warning_reason reason ATTRIBUTE_UNUSED,rich_location * richloc ATTRIBUTE_UNUSED,const char * msgid,va_list * ap ATTRIBUTE_UNUSED)2322 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2323 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2324 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2325 rich_location *richloc ATTRIBUTE_UNUSED,
2326 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2327 ATTRIBUTE_FPTR_PRINTF(5,0)
2328 {
2329 gcc_assert (s_singleton);
2330 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2331 const char *msg = "conversion from %s to %s not supported by iconv";
2332 #ifdef ENABLE_NLS
2333 msg = dgettext ("cpplib", msg);
2334 #endif
2335 /* Detect and record errors emitted by libcpp/charset.cc:init_iconv_desc
2336 when the local iconv build doesn't support the conversion. */
2337 if (strcmp (msgid, msg) == 0)
2338 {
2339 s_singleton->m_num_iconv_errors++;
2340 return true;
2341 }
2342
2343 /* Otherwise, we have an unexpected error. */
2344 abort ();
2345 }
2346
iconv_errors_occurred_p() const2347 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2348
2349 private:
2350 static ebcdic_execution_charset *s_singleton;
2351 int m_num_iconv_errors;
2352 };
2353
2354 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2355
2356 /* A lexer_test_options subclass that records a list of diagnostic
2357 messages emitted by the lexer. */
2358
2359 class lexer_diagnostic_sink : public lexer_test_options
2360 {
2361 public:
lexer_diagnostic_sink()2362 lexer_diagnostic_sink ()
2363 {
2364 gcc_assert (s_singleton == NULL);
2365 s_singleton = this;
2366 }
~lexer_diagnostic_sink()2367 ~lexer_diagnostic_sink ()
2368 {
2369 gcc_assert (s_singleton == this);
2370 s_singleton = NULL;
2371
2372 int i;
2373 char *str;
2374 FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2375 free (str);
2376 }
2377
apply(lexer_test & test)2378 void apply (lexer_test &test) FINAL OVERRIDE
2379 {
2380 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2381 callbacks->diagnostic = on_diagnostic;
2382 }
2383
on_diagnostic(cpp_reader * pfile ATTRIBUTE_UNUSED,enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,enum cpp_warning_reason reason ATTRIBUTE_UNUSED,rich_location * richloc ATTRIBUTE_UNUSED,const char * msgid,va_list * ap)2384 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2385 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2386 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2387 rich_location *richloc ATTRIBUTE_UNUSED,
2388 const char *msgid, va_list *ap)
2389 ATTRIBUTE_FPTR_PRINTF(5,0)
2390 {
2391 char *msg = xvasprintf (msgid, *ap);
2392 s_singleton->m_diagnostics.safe_push (msg);
2393 return true;
2394 }
2395
2396 auto_vec<char *> m_diagnostics;
2397
2398 private:
2399 static lexer_diagnostic_sink *s_singleton;
2400 };
2401
2402 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2403
2404 /* Constructor. Override line_table with a new instance based on CASE_,
2405 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2406 start parsing the tempfile. */
2407
lexer_test(const line_table_case & case_,const char * content,lexer_test_options * options)2408 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2409 lexer_test_options *options)
2410 : m_ltt (case_),
2411 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2412 /* Create a tempfile and write the text to it. */
2413 m_tempfile (SELFTEST_LOCATION, ".c", content),
2414 m_concats (),
2415 m_implicitly_expect_EOF (true)
2416 {
2417 if (options)
2418 options->apply (*this);
2419
2420 cpp_init_iconv (m_parser);
2421
2422 /* Parse the file. */
2423 const char *fname = cpp_read_main_file (m_parser,
2424 m_tempfile.get_filename ());
2425 ASSERT_NE (fname, NULL);
2426 }
2427
2428 /* Destructor. By default, verify that the next token in m_parser is EOF. */
2429
~lexer_test()2430 lexer_test::~lexer_test ()
2431 {
2432 location_t loc;
2433 const cpp_token *tok;
2434
2435 if (m_implicitly_expect_EOF)
2436 {
2437 tok = cpp_get_token_with_location (m_parser, &loc);
2438 ASSERT_NE (tok, NULL);
2439 ASSERT_EQ (tok->type, CPP_EOF);
2440 }
2441 }
2442
2443 /* Get the next token from m_parser. */
2444
2445 const cpp_token *
get_token()2446 lexer_test::get_token ()
2447 {
2448 location_t loc;
2449 const cpp_token *tok;
2450
2451 tok = cpp_get_token_with_location (m_parser, &loc);
2452 ASSERT_NE (tok, NULL);
2453 return tok;
2454 }
2455
2456 /* Verify that locations within string literals are correctly handled. */
2457
2458 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2459 using the string concatenation database for TEST.
2460
2461 Assert that the character at index IDX is on EXPECTED_LINE,
2462 and that it begins at column EXPECTED_START_COL and ends at
2463 EXPECTED_FINISH_COL (unless the locations are beyond
2464 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2465 columns). */
2466
2467 static void
assert_char_at_range(const location & loc,lexer_test & test,location_t strloc,enum cpp_ttype type,int idx,int expected_line,int expected_start_col,int expected_finish_col)2468 assert_char_at_range (const location &loc,
2469 lexer_test& test,
2470 location_t strloc, enum cpp_ttype type, int idx,
2471 int expected_line, int expected_start_col,
2472 int expected_finish_col)
2473 {
2474 cpp_reader *pfile = test.m_parser;
2475 string_concat_db *concats = &test.m_concats;
2476
2477 source_range actual_range = source_range();
2478 const char *err
2479 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2480 &actual_range);
2481 if (should_have_column_data_p (strloc))
2482 ASSERT_EQ_AT (loc, NULL, err);
2483 else
2484 {
2485 ASSERT_STREQ_AT (loc,
2486 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2487 err);
2488 return;
2489 }
2490
2491 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2492 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2493 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2494 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2495
2496 if (should_have_column_data_p (actual_range.m_start))
2497 {
2498 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2499 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2500 }
2501 if (should_have_column_data_p (actual_range.m_finish))
2502 {
2503 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2504 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2505 }
2506 }
2507
2508 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2509 the effective location of any errors. */
2510
2511 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2512 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2513 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2514 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2515 (EXPECTED_FINISH_COL))
2516
2517 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2518 using the string concatenation database for TEST.
2519
2520 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2521
2522 static void
assert_num_substring_ranges(const location & loc,lexer_test & test,location_t strloc,enum cpp_ttype type,int expected_num_ranges)2523 assert_num_substring_ranges (const location &loc,
2524 lexer_test& test,
2525 location_t strloc,
2526 enum cpp_ttype type,
2527 int expected_num_ranges)
2528 {
2529 cpp_reader *pfile = test.m_parser;
2530 string_concat_db *concats = &test.m_concats;
2531
2532 int actual_num_ranges = -1;
2533 const char *err
2534 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2535 &actual_num_ranges);
2536 if (should_have_column_data_p (strloc))
2537 ASSERT_EQ_AT (loc, NULL, err);
2538 else
2539 {
2540 ASSERT_STREQ_AT (loc,
2541 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2542 err);
2543 return;
2544 }
2545 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2546 }
2547
2548 /* Macro for calling assert_num_substring_ranges, supplying
2549 SELFTEST_LOCATION for the effective location of any errors. */
2550
2551 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2552 EXPECTED_NUM_RANGES) \
2553 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2554 (TYPE), (EXPECTED_NUM_RANGES))
2555
2556
2557 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2558 returns an error (using the string concatenation database for TEST). */
2559
2560 static void
assert_has_no_substring_ranges(const location & loc,lexer_test & test,location_t strloc,enum cpp_ttype type,const char * expected_err)2561 assert_has_no_substring_ranges (const location &loc,
2562 lexer_test& test,
2563 location_t strloc,
2564 enum cpp_ttype type,
2565 const char *expected_err)
2566 {
2567 cpp_reader *pfile = test.m_parser;
2568 string_concat_db *concats = &test.m_concats;
2569 cpp_substring_ranges ranges;
2570 const char *actual_err
2571 = get_substring_ranges_for_loc (pfile, concats, strloc,
2572 type, ranges);
2573 if (should_have_column_data_p (strloc))
2574 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2575 else
2576 ASSERT_STREQ_AT (loc,
2577 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2578 actual_err);
2579 }
2580
2581 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2582 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2583 (STRLOC), (TYPE), (ERR))
2584
2585 /* Lex a simple string literal. Verify the substring location data, before
2586 and after running cpp_interpret_string on it. */
2587
2588 static void
test_lexer_string_locations_simple(const line_table_case & case_)2589 test_lexer_string_locations_simple (const line_table_case &case_)
2590 {
2591 /* Digits 0-9 (with 0 at column 10), the simple way.
2592 ....................000000000.11111111112.2222222223333333333
2593 ....................123456789.01234567890.1234567890123456789
2594 We add a trailing comment to ensure that we correctly locate
2595 the end of the string literal token. */
2596 const char *content = " \"0123456789\" /* not a string */\n";
2597 lexer_test test (case_, content, NULL);
2598
2599 /* Verify that we get the expected token back, with the correct
2600 location information. */
2601 const cpp_token *tok = test.get_token ();
2602 ASSERT_EQ (tok->type, CPP_STRING);
2603 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2604 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2605
2606 /* At this point in lexing, the quote characters are treated as part of
2607 the string (they are stripped off by cpp_interpret_string). */
2608
2609 ASSERT_EQ (tok->val.str.len, 12);
2610
2611 /* Verify that cpp_interpret_string works. */
2612 cpp_string dst_string;
2613 const enum cpp_ttype type = CPP_STRING;
2614 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2615 &dst_string, type);
2616 ASSERT_TRUE (result);
2617 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2618 free (const_cast <unsigned char *> (dst_string.text));
2619
2620 /* Verify ranges of individual characters. This no longer includes the
2621 opening quote, but does include the closing quote. */
2622 for (int i = 0; i <= 10; i++)
2623 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2624 10 + i, 10 + i);
2625
2626 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2627 }
2628
2629 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2630 encoding. */
2631
2632 static void
test_lexer_string_locations_ebcdic(const line_table_case & case_)2633 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2634 {
2635 /* EBCDIC support requires iconv. */
2636 if (!HAVE_ICONV)
2637 return;
2638
2639 /* Digits 0-9 (with 0 at column 10), the simple way.
2640 ....................000000000.11111111112.2222222223333333333
2641 ....................123456789.01234567890.1234567890123456789
2642 We add a trailing comment to ensure that we correctly locate
2643 the end of the string literal token. */
2644 const char *content = " \"0123456789\" /* not a string */\n";
2645 ebcdic_execution_charset use_ebcdic;
2646 lexer_test test (case_, content, &use_ebcdic);
2647
2648 /* Verify that we get the expected token back, with the correct
2649 location information. */
2650 const cpp_token *tok = test.get_token ();
2651 ASSERT_EQ (tok->type, CPP_STRING);
2652 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2653 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2654
2655 /* At this point in lexing, the quote characters are treated as part of
2656 the string (they are stripped off by cpp_interpret_string). */
2657
2658 ASSERT_EQ (tok->val.str.len, 12);
2659
2660 /* The remainder of the test requires an iconv implementation that
2661 can convert from UTF-8 to the EBCDIC encoding requested above. */
2662 if (use_ebcdic.iconv_errors_occurred_p ())
2663 return;
2664
2665 /* Verify that cpp_interpret_string works. */
2666 cpp_string dst_string;
2667 const enum cpp_ttype type = CPP_STRING;
2668 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2669 &dst_string, type);
2670 ASSERT_TRUE (result);
2671 /* We should now have EBCDIC-encoded text, specifically
2672 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2673 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2674 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2675 (const char *)dst_string.text);
2676 free (const_cast <unsigned char *> (dst_string.text));
2677
2678 /* Verify that we don't attempt to record substring location information
2679 for such cases. */
2680 ASSERT_HAS_NO_SUBSTRING_RANGES
2681 (test, tok->src_loc, type,
2682 "execution character set != source character set");
2683 }
2684
2685 /* Lex a string literal containing a hex-escaped character.
2686 Verify the substring location data, before and after running
2687 cpp_interpret_string on it. */
2688
2689 static void
test_lexer_string_locations_hex(const line_table_case & case_)2690 test_lexer_string_locations_hex (const line_table_case &case_)
2691 {
2692 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2693 and with a space in place of digit 6, to terminate the escaped
2694 hex code.
2695 ....................000000000.111111.11112222.
2696 ....................123456789.012345.67890123. */
2697 const char *content = " \"01234\\x35 789\"\n";
2698 lexer_test test (case_, content, NULL);
2699
2700 /* Verify that we get the expected token back, with the correct
2701 location information. */
2702 const cpp_token *tok = test.get_token ();
2703 ASSERT_EQ (tok->type, CPP_STRING);
2704 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2705 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2706
2707 /* At this point in lexing, the quote characters are treated as part of
2708 the string (they are stripped off by cpp_interpret_string). */
2709 ASSERT_EQ (tok->val.str.len, 15);
2710
2711 /* Verify that cpp_interpret_string works. */
2712 cpp_string dst_string;
2713 const enum cpp_ttype type = CPP_STRING;
2714 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2715 &dst_string, type);
2716 ASSERT_TRUE (result);
2717 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2718 free (const_cast <unsigned char *> (dst_string.text));
2719
2720 /* Verify ranges of individual characters. This no longer includes the
2721 opening quote, but does include the closing quote. */
2722 for (int i = 0; i <= 4; i++)
2723 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2724 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2725 for (int i = 6; i <= 10; i++)
2726 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2727
2728 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2729 }
2730
2731 /* Lex a string literal containing an octal-escaped character.
2732 Verify the substring location data after running cpp_interpret_string
2733 on it. */
2734
2735 static void
test_lexer_string_locations_oct(const line_table_case & case_)2736 test_lexer_string_locations_oct (const line_table_case &case_)
2737 {
2738 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2739 and with a space in place of digit 6, to terminate the escaped
2740 octal code.
2741 ....................000000000.111111.11112222.2222223333333333444
2742 ....................123456789.012345.67890123.4567890123456789012 */
2743 const char *content = " \"01234\\065 789\" /* not a string */\n";
2744 lexer_test test (case_, content, NULL);
2745
2746 /* Verify that we get the expected token back, with the correct
2747 location information. */
2748 const cpp_token *tok = test.get_token ();
2749 ASSERT_EQ (tok->type, CPP_STRING);
2750 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2751
2752 /* Verify that cpp_interpret_string works. */
2753 cpp_string dst_string;
2754 const enum cpp_ttype type = CPP_STRING;
2755 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2756 &dst_string, type);
2757 ASSERT_TRUE (result);
2758 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2759 free (const_cast <unsigned char *> (dst_string.text));
2760
2761 /* Verify ranges of individual characters. This no longer includes the
2762 opening quote, but does include the closing quote. */
2763 for (int i = 0; i < 5; i++)
2764 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2765 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2766 for (int i = 6; i <= 10; i++)
2767 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2768
2769 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2770 }
2771
2772 /* Test of string literal containing letter escapes. */
2773
2774 static void
test_lexer_string_locations_letter_escape_1(const line_table_case & case_)2775 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2776 {
2777 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2778 .....................000000000.1.11111.1.1.11222.22222223333333
2779 .....................123456789.0.12345.6.7.89012.34567890123456. */
2780 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2781 lexer_test test (case_, content, NULL);
2782
2783 /* Verify that we get the expected tokens back. */
2784 const cpp_token *tok = test.get_token ();
2785 ASSERT_EQ (tok->type, CPP_STRING);
2786 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2787
2788 /* Verify ranges of individual characters. */
2789 /* "\t". */
2790 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2791 0, 1, 10, 11);
2792 /* "foo". */
2793 for (int i = 1; i <= 3; i++)
2794 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2795 i, 1, 11 + i, 11 + i);
2796 /* "\\" and "\n". */
2797 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2798 4, 1, 15, 16);
2799 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2800 5, 1, 17, 18);
2801
2802 /* "bar" and closing quote for nul-terminator. */
2803 for (int i = 6; i <= 9; i++)
2804 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2805 i, 1, 13 + i, 13 + i);
2806
2807 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2808 }
2809
2810 /* Another test of a string literal containing a letter escape.
2811 Based on string seen in
2812 printf ("%-%\n");
2813 in gcc.dg/format/c90-printf-1.c. */
2814
2815 static void
test_lexer_string_locations_letter_escape_2(const line_table_case & case_)2816 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2817 {
2818 /* .....................000000000.1111.11.1111.22222222223.
2819 .....................123456789.0123.45.6789.01234567890. */
2820 const char *content = (" \"%-%\\n\" /* non-str */\n");
2821 lexer_test test (case_, content, NULL);
2822
2823 /* Verify that we get the expected tokens back. */
2824 const cpp_token *tok = test.get_token ();
2825 ASSERT_EQ (tok->type, CPP_STRING);
2826 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2827
2828 /* Verify ranges of individual characters. */
2829 /* "%-%". */
2830 for (int i = 0; i < 3; i++)
2831 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2832 i, 1, 10 + i, 10 + i);
2833 /* "\n". */
2834 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2835 3, 1, 13, 14);
2836
2837 /* Closing quote for nul-terminator. */
2838 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2839 4, 1, 15, 15);
2840
2841 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2842 }
2843
2844 /* Lex a string literal containing UCN 4 characters.
2845 Verify the substring location data after running cpp_interpret_string
2846 on it. */
2847
2848 static void
test_lexer_string_locations_ucn4(const line_table_case & case_)2849 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2850 {
2851 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2852 as UCN 4.
2853 ....................000000000.111111.111122.222222223.33333333344444
2854 ....................123456789.012345.678901.234567890.12345678901234 */
2855 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2856 lexer_test test (case_, content, NULL);
2857
2858 /* Verify that we get the expected token back, with the correct
2859 location information. */
2860 const cpp_token *tok = test.get_token ();
2861 ASSERT_EQ (tok->type, CPP_STRING);
2862 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2863
2864 /* Verify that cpp_interpret_string works.
2865 The string should be encoded in the execution character
2866 set. Assuming that is UTF-8, we should have the following:
2867 ----------- ---- ----- ------- ----------------
2868 Byte offset Byte Octal Unicode Source Column(s)
2869 ----------- ---- ----- ------- ----------------
2870 0 0x30 '0' 10
2871 1 0x31 '1' 11
2872 2 0x32 '2' 12
2873 3 0x33 '3' 13
2874 4 0x34 '4' 14
2875 5 0xE2 \342 U+2174 15-20
2876 6 0x85 \205 (cont) 15-20
2877 7 0xB4 \264 (cont) 15-20
2878 8 0xE2 \342 U+2175 21-26
2879 9 0x85 \205 (cont) 21-26
2880 10 0xB5 \265 (cont) 21-26
2881 11 0x37 '7' 27
2882 12 0x38 '8' 28
2883 13 0x39 '9' 29
2884 14 0x00 30 (closing quote)
2885 ----------- ---- ----- ------- ---------------. */
2886
2887 cpp_string dst_string;
2888 const enum cpp_ttype type = CPP_STRING;
2889 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2890 &dst_string, type);
2891 ASSERT_TRUE (result);
2892 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2893 (const char *)dst_string.text);
2894 free (const_cast <unsigned char *> (dst_string.text));
2895
2896 /* Verify ranges of individual characters. This no longer includes the
2897 opening quote, but does include the closing quote.
2898 '01234'. */
2899 for (int i = 0; i <= 4; i++)
2900 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2901 /* U+2174. */
2902 for (int i = 5; i <= 7; i++)
2903 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2904 /* U+2175. */
2905 for (int i = 8; i <= 10; i++)
2906 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2907 /* '789' and nul terminator */
2908 for (int i = 11; i <= 14; i++)
2909 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2910
2911 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2912 }
2913
2914 /* Lex a string literal containing UCN 8 characters.
2915 Verify the substring location data after running cpp_interpret_string
2916 on it. */
2917
2918 static void
test_lexer_string_locations_ucn8(const line_table_case & case_)2919 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2920 {
2921 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2922 ....................000000000.111111.1111222222.2222333333333.344444
2923 ....................123456789.012345.6789012345.6789012345678.901234 */
2924 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2925 lexer_test test (case_, content, NULL);
2926
2927 /* Verify that we get the expected token back, with the correct
2928 location information. */
2929 const cpp_token *tok = test.get_token ();
2930 ASSERT_EQ (tok->type, CPP_STRING);
2931 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2932 "\"01234\\U00002174\\U00002175789\"");
2933
2934 /* Verify that cpp_interpret_string works.
2935 The UTF-8 encoding of the string is identical to that from
2936 the ucn4 testcase above; the only difference is the column
2937 locations. */
2938 cpp_string dst_string;
2939 const enum cpp_ttype type = CPP_STRING;
2940 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2941 &dst_string, type);
2942 ASSERT_TRUE (result);
2943 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2944 (const char *)dst_string.text);
2945 free (const_cast <unsigned char *> (dst_string.text));
2946
2947 /* Verify ranges of individual characters. This no longer includes the
2948 opening quote, but does include the closing quote.
2949 '01234'. */
2950 for (int i = 0; i <= 4; i++)
2951 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2952 /* U+2174. */
2953 for (int i = 5; i <= 7; i++)
2954 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2955 /* U+2175. */
2956 for (int i = 8; i <= 10; i++)
2957 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2958 /* '789' at columns 35-37 */
2959 for (int i = 11; i <= 13; i++)
2960 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2961 /* Closing quote/nul-terminator at column 38. */
2962 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2963
2964 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2965 }
2966
2967 /* Fetch a big-endian 32-bit value and convert to host endianness. */
2968
2969 static uint32_t
uint32_from_big_endian(const uint32_t * ptr_be_value)2970 uint32_from_big_endian (const uint32_t *ptr_be_value)
2971 {
2972 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2973 return (((uint32_t) buf[0] << 24)
2974 | ((uint32_t) buf[1] << 16)
2975 | ((uint32_t) buf[2] << 8)
2976 | (uint32_t) buf[3]);
2977 }
2978
2979 /* Lex a wide string literal and verify that attempts to read substring
2980 location data from it fail gracefully. */
2981
2982 static void
test_lexer_string_locations_wide_string(const line_table_case & case_)2983 test_lexer_string_locations_wide_string (const line_table_case &case_)
2984 {
2985 /* Digits 0-9.
2986 ....................000000000.11111111112.22222222233333
2987 ....................123456789.01234567890.12345678901234 */
2988 const char *content = " L\"0123456789\" /* non-str */\n";
2989 lexer_test test (case_, content, NULL);
2990
2991 /* Verify that we get the expected token back, with the correct
2992 location information. */
2993 const cpp_token *tok = test.get_token ();
2994 ASSERT_EQ (tok->type, CPP_WSTRING);
2995 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2996
2997 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2998 cpp_string dst_string;
2999 const enum cpp_ttype type = CPP_WSTRING;
3000 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3001 &dst_string, type);
3002 ASSERT_TRUE (result);
3003 /* The cpp_reader defaults to big-endian with
3004 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
3005 now be encoded as UTF-32BE. */
3006 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3007 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3008 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3009 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3010 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3011 free (const_cast <unsigned char *> (dst_string.text));
3012
3013 /* We don't yet support generating substring location information
3014 for L"" strings. */
3015 ASSERT_HAS_NO_SUBSTRING_RANGES
3016 (test, tok->src_loc, type,
3017 "execution character set != source character set");
3018 }
3019
3020 /* Fetch a big-endian 16-bit value and convert to host endianness. */
3021
3022 static uint16_t
uint16_from_big_endian(const uint16_t * ptr_be_value)3023 uint16_from_big_endian (const uint16_t *ptr_be_value)
3024 {
3025 const unsigned char *buf = (const unsigned char *)ptr_be_value;
3026 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
3027 }
3028
3029 /* Lex a u"" string literal and verify that attempts to read substring
3030 location data from it fail gracefully. */
3031
3032 static void
test_lexer_string_locations_string16(const line_table_case & case_)3033 test_lexer_string_locations_string16 (const line_table_case &case_)
3034 {
3035 /* Digits 0-9.
3036 ....................000000000.11111111112.22222222233333
3037 ....................123456789.01234567890.12345678901234 */
3038 const char *content = " u\"0123456789\" /* non-str */\n";
3039 lexer_test test (case_, content, NULL);
3040
3041 /* Verify that we get the expected token back, with the correct
3042 location information. */
3043 const cpp_token *tok = test.get_token ();
3044 ASSERT_EQ (tok->type, CPP_STRING16);
3045 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
3046
3047 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
3048 cpp_string dst_string;
3049 const enum cpp_ttype type = CPP_STRING16;
3050 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3051 &dst_string, type);
3052 ASSERT_TRUE (result);
3053
3054 /* The cpp_reader defaults to big-endian, so dst_string should
3055 now be encoded as UTF-16BE. */
3056 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
3057 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
3058 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
3059 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
3060 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
3061 free (const_cast <unsigned char *> (dst_string.text));
3062
3063 /* We don't yet support generating substring location information
3064 for L"" strings. */
3065 ASSERT_HAS_NO_SUBSTRING_RANGES
3066 (test, tok->src_loc, type,
3067 "execution character set != source character set");
3068 }
3069
3070 /* Lex a U"" string literal and verify that attempts to read substring
3071 location data from it fail gracefully. */
3072
3073 static void
test_lexer_string_locations_string32(const line_table_case & case_)3074 test_lexer_string_locations_string32 (const line_table_case &case_)
3075 {
3076 /* Digits 0-9.
3077 ....................000000000.11111111112.22222222233333
3078 ....................123456789.01234567890.12345678901234 */
3079 const char *content = " U\"0123456789\" /* non-str */\n";
3080 lexer_test test (case_, content, NULL);
3081
3082 /* Verify that we get the expected token back, with the correct
3083 location information. */
3084 const cpp_token *tok = test.get_token ();
3085 ASSERT_EQ (tok->type, CPP_STRING32);
3086 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
3087
3088 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
3089 cpp_string dst_string;
3090 const enum cpp_ttype type = CPP_STRING32;
3091 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3092 &dst_string, type);
3093 ASSERT_TRUE (result);
3094
3095 /* The cpp_reader defaults to big-endian, so dst_string should
3096 now be encoded as UTF-32BE. */
3097 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3098 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3099 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3100 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3101 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3102 free (const_cast <unsigned char *> (dst_string.text));
3103
3104 /* We don't yet support generating substring location information
3105 for L"" strings. */
3106 ASSERT_HAS_NO_SUBSTRING_RANGES
3107 (test, tok->src_loc, type,
3108 "execution character set != source character set");
3109 }
3110
3111 /* Lex a u8-string literal.
3112 Verify the substring location data after running cpp_interpret_string
3113 on it. */
3114
3115 static void
test_lexer_string_locations_u8(const line_table_case & case_)3116 test_lexer_string_locations_u8 (const line_table_case &case_)
3117 {
3118 /* Digits 0-9.
3119 ....................000000000.11111111112.22222222233333
3120 ....................123456789.01234567890.12345678901234 */
3121 const char *content = " u8\"0123456789\" /* non-str */\n";
3122 lexer_test test (case_, content, NULL);
3123
3124 /* Verify that we get the expected token back, with the correct
3125 location information. */
3126 const cpp_token *tok = test.get_token ();
3127 ASSERT_EQ (tok->type, CPP_UTF8STRING);
3128 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
3129
3130 /* Verify that cpp_interpret_string works. */
3131 cpp_string dst_string;
3132 const enum cpp_ttype type = CPP_STRING;
3133 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3134 &dst_string, type);
3135 ASSERT_TRUE (result);
3136 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3137 free (const_cast <unsigned char *> (dst_string.text));
3138
3139 /* Verify ranges of individual characters. This no longer includes the
3140 opening quote, but does include the closing quote. */
3141 for (int i = 0; i <= 10; i++)
3142 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3143 }
3144
3145 /* Lex a string literal containing UTF-8 source characters.
3146 Verify the substring location data after running cpp_interpret_string
3147 on it. */
3148
3149 static void
test_lexer_string_locations_utf8_source(const line_table_case & case_)3150 test_lexer_string_locations_utf8_source (const line_table_case &case_)
3151 {
3152 /* This string literal is written out to the source file as UTF-8,
3153 and is of the form "before mojibake after", where "mojibake"
3154 is written as the following four unicode code points:
3155 U+6587 CJK UNIFIED IDEOGRAPH-6587
3156 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3157 U+5316 CJK UNIFIED IDEOGRAPH-5316
3158 U+3051 HIRAGANA LETTER KE.
3159 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
3160 "before" and "after" are 1 byte per unicode character.
3161
3162 The numbering shown are "columns", which are *byte* numbers within
3163 the line, rather than unicode character numbers.
3164
3165 .................... 000000000.1111111.
3166 .................... 123456789.0123456. */
3167 const char *content = (" \"before "
3168 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
3169 UTF-8: 0xE6 0x96 0x87
3170 C octal escaped UTF-8: \346\226\207
3171 "column" numbers: 17-19. */
3172 "\346\226\207"
3173
3174 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3175 UTF-8: 0xE5 0xAD 0x97
3176 C octal escaped UTF-8: \345\255\227
3177 "column" numbers: 20-22. */
3178 "\345\255\227"
3179
3180 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3181 UTF-8: 0xE5 0x8C 0x96
3182 C octal escaped UTF-8: \345\214\226
3183 "column" numbers: 23-25. */
3184 "\345\214\226"
3185
3186 /* U+3051 HIRAGANA LETTER KE
3187 UTF-8: 0xE3 0x81 0x91
3188 C octal escaped UTF-8: \343\201\221
3189 "column" numbers: 26-28. */
3190 "\343\201\221"
3191
3192 /* column numbers 29 onwards
3193 2333333.33334444444444
3194 9012345.67890123456789. */
3195 " after\" /* non-str */\n");
3196 lexer_test test (case_, content, NULL);
3197
3198 /* Verify that we get the expected token back, with the correct
3199 location information. */
3200 const cpp_token *tok = test.get_token ();
3201 ASSERT_EQ (tok->type, CPP_STRING);
3202 ASSERT_TOKEN_AS_TEXT_EQ
3203 (test.m_parser, tok,
3204 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3205
3206 /* Verify that cpp_interpret_string works. */
3207 cpp_string dst_string;
3208 const enum cpp_ttype type = CPP_STRING;
3209 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3210 &dst_string, type);
3211 ASSERT_TRUE (result);
3212 ASSERT_STREQ
3213 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3214 (const char *)dst_string.text);
3215 free (const_cast <unsigned char *> (dst_string.text));
3216
3217 /* Verify ranges of individual characters. This no longer includes the
3218 opening quote, but does include the closing quote.
3219 Assuming that both source and execution encodings are UTF-8, we have
3220 a run of 25 octets in each, plus the NUL terminator. */
3221 for (int i = 0; i < 25; i++)
3222 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3223 /* NUL-terminator should use the closing quote at column 35. */
3224 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3225
3226 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3227 }
3228
3229 /* Test of string literal concatenation. */
3230
3231 static void
test_lexer_string_locations_concatenation_1(const line_table_case & case_)3232 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3233 {
3234 /* Digits 0-9.
3235 .....................000000000.111111.11112222222222
3236 .....................123456789.012345.67890123456789. */
3237 const char *content = (" \"01234\" /* non-str */\n"
3238 " \"56789\" /* non-str */\n");
3239 lexer_test test (case_, content, NULL);
3240
3241 location_t input_locs[2];
3242
3243 /* Verify that we get the expected tokens back. */
3244 auto_vec <cpp_string> input_strings;
3245 const cpp_token *tok_a = test.get_token ();
3246 ASSERT_EQ (tok_a->type, CPP_STRING);
3247 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3248 input_strings.safe_push (tok_a->val.str);
3249 input_locs[0] = tok_a->src_loc;
3250
3251 const cpp_token *tok_b = test.get_token ();
3252 ASSERT_EQ (tok_b->type, CPP_STRING);
3253 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3254 input_strings.safe_push (tok_b->val.str);
3255 input_locs[1] = tok_b->src_loc;
3256
3257 /* Verify that cpp_interpret_string works. */
3258 cpp_string dst_string;
3259 const enum cpp_ttype type = CPP_STRING;
3260 bool result = cpp_interpret_string (test.m_parser,
3261 input_strings.address (), 2,
3262 &dst_string, type);
3263 ASSERT_TRUE (result);
3264 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3265 free (const_cast <unsigned char *> (dst_string.text));
3266
3267 /* Simulate c-lex.cc's lex_string in order to record concatenation. */
3268 test.m_concats.record_string_concatenation (2, input_locs);
3269
3270 location_t initial_loc = input_locs[0];
3271
3272 /* "01234" on line 1. */
3273 for (int i = 0; i <= 4; i++)
3274 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3275 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3276 for (int i = 5; i <= 10; i++)
3277 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3278
3279 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3280 }
3281
3282 /* Another test of string literal concatenation. */
3283
3284 static void
test_lexer_string_locations_concatenation_2(const line_table_case & case_)3285 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3286 {
3287 /* Digits 0-9.
3288 .....................000000000.111.11111112222222
3289 .....................123456789.012.34567890123456. */
3290 const char *content = (" \"01\" /* non-str */\n"
3291 " \"23\" /* non-str */\n"
3292 " \"45\" /* non-str */\n"
3293 " \"67\" /* non-str */\n"
3294 " \"89\" /* non-str */\n");
3295 lexer_test test (case_, content, NULL);
3296
3297 auto_vec <cpp_string> input_strings;
3298 location_t input_locs[5];
3299
3300 /* Verify that we get the expected tokens back. */
3301 for (int i = 0; i < 5; i++)
3302 {
3303 const cpp_token *tok = test.get_token ();
3304 ASSERT_EQ (tok->type, CPP_STRING);
3305 input_strings.safe_push (tok->val.str);
3306 input_locs[i] = tok->src_loc;
3307 }
3308
3309 /* Verify that cpp_interpret_string works. */
3310 cpp_string dst_string;
3311 const enum cpp_ttype type = CPP_STRING;
3312 bool result = cpp_interpret_string (test.m_parser,
3313 input_strings.address (), 5,
3314 &dst_string, type);
3315 ASSERT_TRUE (result);
3316 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3317 free (const_cast <unsigned char *> (dst_string.text));
3318
3319 /* Simulate c-lex.cc's lex_string in order to record concatenation. */
3320 test.m_concats.record_string_concatenation (5, input_locs);
3321
3322 location_t initial_loc = input_locs[0];
3323
3324 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3325 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3326 and expect get_source_range_for_substring to fail.
3327 However, for a string concatenation test, we can have a case
3328 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3329 but subsequent strings can be after it.
3330 Attempting to detect this within assert_char_at_range
3331 would overcomplicate the logic for the common test cases, so
3332 we detect it here. */
3333 if (should_have_column_data_p (input_locs[0])
3334 && !should_have_column_data_p (input_locs[4]))
3335 {
3336 /* Verify that get_source_range_for_substring gracefully rejects
3337 this case. */
3338 source_range actual_range;
3339 const char *err
3340 = get_source_range_for_char (test.m_parser, &test.m_concats,
3341 initial_loc, type, 0, &actual_range);
3342 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3343 return;
3344 }
3345
3346 for (int i = 0; i < 5; i++)
3347 for (int j = 0; j < 2; j++)
3348 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3349 i + 1, 10 + j, 10 + j);
3350
3351 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3352 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3353
3354 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3355 }
3356
3357 /* Another test of string literal concatenation, this time combined with
3358 various kinds of escaped characters. */
3359
3360 static void
test_lexer_string_locations_concatenation_3(const line_table_case & case_)3361 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3362 {
3363 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3364 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3365 const char *content
3366 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3367 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3368 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3369 lexer_test test (case_, content, NULL);
3370
3371 auto_vec <cpp_string> input_strings;
3372 location_t input_locs[4];
3373
3374 /* Verify that we get the expected tokens back. */
3375 for (int i = 0; i < 4; i++)
3376 {
3377 const cpp_token *tok = test.get_token ();
3378 ASSERT_EQ (tok->type, CPP_STRING);
3379 input_strings.safe_push (tok->val.str);
3380 input_locs[i] = tok->src_loc;
3381 }
3382
3383 /* Verify that cpp_interpret_string works. */
3384 cpp_string dst_string;
3385 const enum cpp_ttype type = CPP_STRING;
3386 bool result = cpp_interpret_string (test.m_parser,
3387 input_strings.address (), 4,
3388 &dst_string, type);
3389 ASSERT_TRUE (result);
3390 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3391 free (const_cast <unsigned char *> (dst_string.text));
3392
3393 /* Simulate c-lex.cc's lex_string in order to record concatenation. */
3394 test.m_concats.record_string_concatenation (4, input_locs);
3395
3396 location_t initial_loc = input_locs[0];
3397
3398 for (int i = 0; i <= 4; i++)
3399 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3400 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3401 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3402 for (int i = 7; i <= 9; i++)
3403 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3404
3405 /* NUL-terminator should use the location of the final closing quote. */
3406 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3407
3408 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3409 }
3410
3411 /* Test of string literal in a macro. */
3412
3413 static void
test_lexer_string_locations_macro(const line_table_case & case_)3414 test_lexer_string_locations_macro (const line_table_case &case_)
3415 {
3416 /* Digits 0-9.
3417 .....................0000000001111111111.22222222223.
3418 .....................1234567890123456789.01234567890. */
3419 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3420 " MACRO");
3421 lexer_test test (case_, content, NULL);
3422
3423 /* Verify that we get the expected tokens back. */
3424 const cpp_token *tok = test.get_token ();
3425 ASSERT_EQ (tok->type, CPP_PADDING);
3426
3427 tok = test.get_token ();
3428 ASSERT_EQ (tok->type, CPP_STRING);
3429 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3430
3431 /* Verify ranges of individual characters. We ought to
3432 see columns within the macro definition. */
3433 for (int i = 0; i <= 10; i++)
3434 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3435 i, 1, 20 + i, 20 + i);
3436
3437 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3438
3439 tok = test.get_token ();
3440 ASSERT_EQ (tok->type, CPP_PADDING);
3441 }
3442
3443 /* Test of stringification of a macro argument. */
3444
3445 static void
test_lexer_string_locations_stringified_macro_argument(const line_table_case & case_)3446 test_lexer_string_locations_stringified_macro_argument
3447 (const line_table_case &case_)
3448 {
3449 /* .....................000000000111111111122222222223.
3450 .....................123456789012345678901234567890. */
3451 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3452 "MACRO(foo)\n");
3453 lexer_test test (case_, content, NULL);
3454
3455 /* Verify that we get the expected token back. */
3456 const cpp_token *tok = test.get_token ();
3457 ASSERT_EQ (tok->type, CPP_PADDING);
3458
3459 tok = test.get_token ();
3460 ASSERT_EQ (tok->type, CPP_STRING);
3461 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3462
3463 /* We don't support getting the location of a stringified macro
3464 argument. Verify that it fails gracefully. */
3465 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3466 "cpp_interpret_string_1 failed");
3467
3468 tok = test.get_token ();
3469 ASSERT_EQ (tok->type, CPP_PADDING);
3470
3471 tok = test.get_token ();
3472 ASSERT_EQ (tok->type, CPP_PADDING);
3473 }
3474
3475 /* Ensure that we are fail gracefully if something attempts to pass
3476 in a location that isn't a string literal token. Seen on this code:
3477
3478 const char a[] = " %d ";
3479 __builtin_printf (a, 0.5);
3480 ^
3481
3482 when c-format.cc erroneously used the indicated one-character
3483 location as the format string location, leading to a read past the
3484 end of a string buffer in cpp_interpret_string_1. */
3485
3486 static void
test_lexer_string_locations_non_string(const line_table_case & case_)3487 test_lexer_string_locations_non_string (const line_table_case &case_)
3488 {
3489 /* .....................000000000111111111122222222223.
3490 .....................123456789012345678901234567890. */
3491 const char *content = (" a\n");
3492 lexer_test test (case_, content, NULL);
3493
3494 /* Verify that we get the expected token back. */
3495 const cpp_token *tok = test.get_token ();
3496 ASSERT_EQ (tok->type, CPP_NAME);
3497 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3498
3499 /* At this point, libcpp is attempting to interpret the name as a
3500 string literal, despite it not starting with a quote. We don't detect
3501 that, but we should at least fail gracefully. */
3502 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3503 "cpp_interpret_string_1 failed");
3504 }
3505
3506 /* Ensure that we can read substring information for a token which
3507 starts in one linemap and ends in another . Adapted from
3508 gcc.dg/cpp/pr69985.c. */
3509
3510 static void
test_lexer_string_locations_long_line(const line_table_case & case_)3511 test_lexer_string_locations_long_line (const line_table_case &case_)
3512 {
3513 /* .....................000000.000111111111
3514 .....................123456.789012346789. */
3515 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3516 " \"0123456789012345678901234567890123456789"
3517 "0123456789012345678901234567890123456789"
3518 "0123456789012345678901234567890123456789"
3519 "0123456789\"\n");
3520
3521 lexer_test test (case_, content, NULL);
3522
3523 /* Verify that we get the expected token back. */
3524 const cpp_token *tok = test.get_token ();
3525 ASSERT_EQ (tok->type, CPP_STRING);
3526
3527 if (!should_have_column_data_p (line_table->highest_location))
3528 return;
3529
3530 /* Verify ranges of individual characters. */
3531 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3532 for (int i = 0; i < 131; i++)
3533 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3534 i, 2, 7 + i, 7 + i);
3535 }
3536
3537 /* Test of locations within a raw string that doesn't contain a newline. */
3538
3539 static void
test_lexer_string_locations_raw_string_one_line(const line_table_case & case_)3540 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3541 {
3542 /* .....................00.0000000111111111122.
3543 .....................12.3456789012345678901. */
3544 const char *content = ("R\"foo(0123456789)foo\"\n");
3545 lexer_test test (case_, content, NULL);
3546
3547 /* Verify that we get the expected token back. */
3548 const cpp_token *tok = test.get_token ();
3549 ASSERT_EQ (tok->type, CPP_STRING);
3550
3551 /* Verify that cpp_interpret_string works. */
3552 cpp_string dst_string;
3553 const enum cpp_ttype type = CPP_STRING;
3554 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3555 &dst_string, type);
3556 ASSERT_TRUE (result);
3557 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3558 free (const_cast <unsigned char *> (dst_string.text));
3559
3560 if (!should_have_column_data_p (line_table->highest_location))
3561 return;
3562
3563 /* 0-9, plus the nil terminator. */
3564 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3565 for (int i = 0; i < 11; i++)
3566 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3567 i, 1, 7 + i, 7 + i);
3568 }
3569
3570 /* Test of locations within a raw string that contains a newline. */
3571
3572 static void
test_lexer_string_locations_raw_string_multiline(const line_table_case & case_)3573 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3574 {
3575 /* .....................00.0000.
3576 .....................12.3456. */
3577 const char *content = ("R\"foo(\n"
3578 /* .....................00000.
3579 .....................12345. */
3580 "hello\n"
3581 "world\n"
3582 /* .....................00000.
3583 .....................12345. */
3584 ")foo\"\n");
3585 lexer_test test (case_, content, NULL);
3586
3587 /* Verify that we get the expected token back. */
3588 const cpp_token *tok = test.get_token ();
3589 ASSERT_EQ (tok->type, CPP_STRING);
3590
3591 /* Verify that cpp_interpret_string works. */
3592 cpp_string dst_string;
3593 const enum cpp_ttype type = CPP_STRING;
3594 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3595 &dst_string, type);
3596 ASSERT_TRUE (result);
3597 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3598 free (const_cast <unsigned char *> (dst_string.text));
3599
3600 if (!should_have_column_data_p (line_table->highest_location))
3601 return;
3602
3603 /* Currently we don't support locations within raw strings that
3604 contain newlines. */
3605 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3606 "range endpoints are on different lines");
3607 }
3608
3609 /* Test of parsing an unterminated raw string. */
3610
3611 static void
test_lexer_string_locations_raw_string_unterminated(const line_table_case & case_)3612 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3613 {
3614 const char *content = "R\"ouch()ouCh\" /* etc */";
3615
3616 lexer_diagnostic_sink diagnostics;
3617 lexer_test test (case_, content, &diagnostics);
3618 test.m_implicitly_expect_EOF = false;
3619
3620 /* Attempt to parse the raw string. */
3621 const cpp_token *tok = test.get_token ();
3622 ASSERT_EQ (tok->type, CPP_EOF);
3623
3624 ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3625 /* We expect the message "unterminated raw string"
3626 in the "cpplib" translation domain.
3627 It's not clear that dgettext is available on all supported hosts,
3628 so this assertion is commented-out for now.
3629 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3630 diagnostics.m_diagnostics[0]);
3631 */
3632 }
3633
3634 /* Test of lexing char constants. */
3635
3636 static void
test_lexer_char_constants(const line_table_case & case_)3637 test_lexer_char_constants (const line_table_case &case_)
3638 {
3639 /* Various char constants.
3640 .....................0000000001111111111.22222222223.
3641 .....................1234567890123456789.01234567890. */
3642 const char *content = (" 'a'\n"
3643 " u'a'\n"
3644 " U'a'\n"
3645 " L'a'\n"
3646 " 'abc'\n");
3647 lexer_test test (case_, content, NULL);
3648
3649 /* Verify that we get the expected tokens back. */
3650 /* 'a'. */
3651 const cpp_token *tok = test.get_token ();
3652 ASSERT_EQ (tok->type, CPP_CHAR);
3653 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3654
3655 unsigned int chars_seen;
3656 int unsignedp;
3657 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3658 &chars_seen, &unsignedp);
3659 ASSERT_EQ (cc, 'a');
3660 ASSERT_EQ (chars_seen, 1);
3661
3662 /* u'a'. */
3663 tok = test.get_token ();
3664 ASSERT_EQ (tok->type, CPP_CHAR16);
3665 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3666
3667 /* U'a'. */
3668 tok = test.get_token ();
3669 ASSERT_EQ (tok->type, CPP_CHAR32);
3670 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3671
3672 /* L'a'. */
3673 tok = test.get_token ();
3674 ASSERT_EQ (tok->type, CPP_WCHAR);
3675 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3676
3677 /* 'abc' (c-char-sequence). */
3678 tok = test.get_token ();
3679 ASSERT_EQ (tok->type, CPP_CHAR);
3680 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3681 }
3682 /* A table of interesting location_t values, giving one axis of our test
3683 matrix. */
3684
3685 static const location_t boundary_locations[] = {
3686 /* Zero means "don't override the default values for a new line_table". */
3687 0,
3688
3689 /* An arbitrary non-zero value that isn't close to one of
3690 the boundary values below. */
3691 0x10000,
3692
3693 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3694 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3695 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3696 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3697 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3698 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3699
3700 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3701 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3702 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3703 LINE_MAP_MAX_LOCATION_WITH_COLS,
3704 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3705 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3706 };
3707
3708 /* Run TESTCASE multiple times, once for each case in our test matrix. */
3709
3710 void
for_each_line_table_case(void (* testcase)(const line_table_case &))3711 for_each_line_table_case (void (*testcase) (const line_table_case &))
3712 {
3713 /* As noted above in the description of struct line_table_case,
3714 we want to explore a test matrix of interesting line_table
3715 situations, running various selftests for each case within the
3716 matrix. */
3717
3718 /* Run all tests with:
3719 (a) line_table->default_range_bits == 0, and
3720 (b) line_table->default_range_bits == 5. */
3721 int num_cases_tested = 0;
3722 for (int default_range_bits = 0; default_range_bits <= 5;
3723 default_range_bits += 5)
3724 {
3725 /* ...and use each of the "interesting" location values as
3726 the starting location within line_table. */
3727 const int num_boundary_locations
3728 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3729 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3730 {
3731 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3732
3733 testcase (c);
3734
3735 num_cases_tested++;
3736 }
3737 }
3738
3739 /* Verify that we fully covered the test matrix. */
3740 ASSERT_EQ (num_cases_tested, 2 * 12);
3741 }
3742
3743 /* Verify that when presented with a consecutive pair of locations with
3744 a very large line offset, we don't attempt to consolidate them into
3745 a single ordinary linemap where the line offsets within the line map
3746 would lead to overflow (PR lto/88147). */
3747
3748 static void
test_line_offset_overflow()3749 test_line_offset_overflow ()
3750 {
3751 line_table_test ltt (line_table_case (5, 0));
3752
3753 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3754 linemap_line_start (line_table, 1, 100);
3755 location_t loc_a = linemap_line_start (line_table, 2578, 255);
3756 assert_loceq ("foo.c", 2578, 0, loc_a);
3757
3758 const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3759 ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3760 ASSERT_EQ (ordmap_a->m_range_bits, 5);
3761
3762 location_t loc_b = linemap_line_start (line_table, 404198, 512);
3763 assert_loceq ("foo.c", 404198, 0, loc_b);
3764
3765 /* We should have started a new linemap, rather than attempting to store
3766 a very large line offset. */
3767 const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3768 ASSERT_NE (ordmap_a, ordmap_b);
3769 }
3770
test_cpp_utf8()3771 void test_cpp_utf8 ()
3772 {
3773 const int def_tabstop = 8;
3774 cpp_char_column_policy policy (def_tabstop, cpp_wcwidth);
3775
3776 /* Verify that wcwidth of invalid UTF-8 or control bytes is 1. */
3777 {
3778 int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, policy);
3779 ASSERT_EQ (8, w_bad);
3780 int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5, policy);
3781 ASSERT_EQ (5, w_ctrl);
3782 }
3783
3784 /* Verify that wcwidth of valid UTF-8 is as expected. */
3785 {
3786 const int w_pi = cpp_display_width ("\xcf\x80", 2, policy);
3787 ASSERT_EQ (1, w_pi);
3788 const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4, policy);
3789 ASSERT_EQ (2, w_emoji);
3790 const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2,
3791 policy);
3792 ASSERT_EQ (1, w_umlaut_precomposed);
3793 const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3,
3794 policy);
3795 ASSERT_EQ (1, w_umlaut_combining);
3796 const int w_han = cpp_display_width ("\xe4\xb8\xba", 3, policy);
3797 ASSERT_EQ (2, w_han);
3798 const int w_ascii = cpp_display_width ("GCC", 3, policy);
3799 ASSERT_EQ (3, w_ascii);
3800 const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
3801 "\x9f! \xe4\xb8\xba y\xcc\x88",
3802 24, policy);
3803 ASSERT_EQ (18, w_mixed);
3804 }
3805
3806 /* Verify that display width properly expands tabs. */
3807 {
3808 const char *tstr = "\tabc\td";
3809 ASSERT_EQ (6, cpp_display_width (tstr, 6,
3810 cpp_char_column_policy (1, cpp_wcwidth)));
3811 ASSERT_EQ (10, cpp_display_width (tstr, 6,
3812 cpp_char_column_policy (3, cpp_wcwidth)));
3813 ASSERT_EQ (17, cpp_display_width (tstr, 6,
3814 cpp_char_column_policy (8, cpp_wcwidth)));
3815 ASSERT_EQ (1,
3816 cpp_display_column_to_byte_column
3817 (tstr, 6, 7, cpp_char_column_policy (8, cpp_wcwidth)));
3818 }
3819
3820 /* Verify that cpp_byte_column_to_display_column can go past the end,
3821 and similar edge cases. */
3822 {
3823 const char *str
3824 /* Display columns.
3825 111111112345 */
3826 = "\xcf\x80 abc";
3827 /* 111122223456
3828 Byte columns. */
3829
3830 ASSERT_EQ (5, cpp_display_width (str, 6, policy));
3831 ASSERT_EQ (105,
3832 cpp_byte_column_to_display_column (str, 6, 106, policy));
3833 ASSERT_EQ (10000,
3834 cpp_byte_column_to_display_column (NULL, 0, 10000, policy));
3835 ASSERT_EQ (0,
3836 cpp_byte_column_to_display_column (NULL, 10000, 0, policy));
3837 }
3838
3839 /* Verify that cpp_display_column_to_byte_column can go past the end,
3840 and similar edge cases, and check invertibility. */
3841 {
3842 const char *str
3843 /* Display columns.
3844 000000000000000000000000000000000000011
3845 111111112222222234444444455555555678901 */
3846 = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
3847 /* 000000000000000000000000000000000111111
3848 111122223333444456666777788889999012345
3849 Byte columns. */
3850 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, policy));
3851 ASSERT_EQ (15,
3852 cpp_display_column_to_byte_column (str, 15, 11, policy));
3853 ASSERT_EQ (115,
3854 cpp_display_column_to_byte_column (str, 15, 111, policy));
3855 ASSERT_EQ (10000,
3856 cpp_display_column_to_byte_column (NULL, 0, 10000, policy));
3857 ASSERT_EQ (0,
3858 cpp_display_column_to_byte_column (NULL, 10000, 0, policy));
3859
3860 /* Verify that we do not interrupt a UTF-8 sequence. */
3861 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, policy));
3862
3863 for (int byte_col = 1; byte_col <= 15; ++byte_col)
3864 {
3865 const int disp_col
3866 = cpp_byte_column_to_display_column (str, 15, byte_col, policy);
3867 const int byte_col2
3868 = cpp_display_column_to_byte_column (str, 15, disp_col, policy);
3869
3870 /* If we ask for the display column in the middle of a UTF-8
3871 sequence, it will return the length of the partial sequence,
3872 matching the behavior of GCC before display column support.
3873 Otherwise check the round trip was successful. */
3874 if (byte_col < 4)
3875 ASSERT_EQ (byte_col, disp_col);
3876 else if (byte_col >= 6 && byte_col < 9)
3877 ASSERT_EQ (3 + (byte_col - 5), disp_col);
3878 else
3879 ASSERT_EQ (byte_col2, byte_col);
3880 }
3881 }
3882
3883 }
3884
3885 /* Run all of the selftests within this file. */
3886
3887 void
input_cc_tests()3888 input_cc_tests ()
3889 {
3890 test_linenum_comparisons ();
3891 test_should_have_column_data_p ();
3892 test_unknown_location ();
3893 test_builtins ();
3894 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3895
3896 for_each_line_table_case (test_accessing_ordinary_linemaps);
3897 for_each_line_table_case (test_lexer);
3898 for_each_line_table_case (test_lexer_string_locations_simple);
3899 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3900 for_each_line_table_case (test_lexer_string_locations_hex);
3901 for_each_line_table_case (test_lexer_string_locations_oct);
3902 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3903 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3904 for_each_line_table_case (test_lexer_string_locations_ucn4);
3905 for_each_line_table_case (test_lexer_string_locations_ucn8);
3906 for_each_line_table_case (test_lexer_string_locations_wide_string);
3907 for_each_line_table_case (test_lexer_string_locations_string16);
3908 for_each_line_table_case (test_lexer_string_locations_string32);
3909 for_each_line_table_case (test_lexer_string_locations_u8);
3910 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3911 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3912 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3913 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3914 for_each_line_table_case (test_lexer_string_locations_macro);
3915 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3916 for_each_line_table_case (test_lexer_string_locations_non_string);
3917 for_each_line_table_case (test_lexer_string_locations_long_line);
3918 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3919 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3920 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3921 for_each_line_table_case (test_lexer_char_constants);
3922
3923 test_reading_source_line ();
3924
3925 test_line_offset_overflow ();
3926
3927 test_cpp_utf8 ();
3928 }
3929
3930 } // namespace selftest
3931
3932 #endif /* CHECKING_P */
3933