xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/input.c (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1 /* Data and functions related to line maps and input files.
2    Copyright (C) 2004-2017 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "intl.h"
24 #include "diagnostic-core.h"
25 #include "selftest.h"
26 #include "cpplib.h"
27 
28 #ifndef HAVE_ICONV
29 #define HAVE_ICONV 0
30 #endif
31 
32 /* This is a cache used by get_next_line to store the content of a
33    file to be searched for file lines.  */
34 struct fcache
35 {
36   /* These are information used to store a line boundary.  */
37   struct line_info
38   {
39     /* The line number.  It starts from 1.  */
40     size_t line_num;
41 
42     /* The position (byte count) of the beginning of the line,
43        relative to the file data pointer.  This starts at zero.  */
44     size_t start_pos;
45 
46     /* The position (byte count) of the last byte of the line.  This
47        normally points to the '\n' character, or to one byte after the
48        last byte of the file, if the file doesn't contain a '\n'
49        character.  */
50     size_t end_pos;
51 
52     line_info (size_t l, size_t s, size_t e)
53       : line_num (l), start_pos (s), end_pos (e)
54     {}
55 
56     line_info ()
57       :line_num (0), start_pos (0), end_pos (0)
58     {}
59   };
60 
61   /* The number of time this file has been accessed.  This is used
62      to designate which file cache to evict from the cache
63      array.  */
64   unsigned use_count;
65 
66   /* The file_path is the key for identifying a particular file in
67      the cache.
68      For libcpp-using code, the underlying buffer for this field is
69      owned by the corresponding _cpp_file within the cpp_reader.  */
70   const char *file_path;
71 
72   FILE *fp;
73 
74   /* This points to the content of the file that we've read so
75      far.  */
76   char *data;
77 
78   /*  The size of the DATA array above.*/
79   size_t size;
80 
81   /* The number of bytes read from the underlying file so far.  This
82      must be less (or equal) than SIZE above.  */
83   size_t nb_read;
84 
85   /* The index of the beginning of the current line.  */
86   size_t line_start_idx;
87 
88   /* The number of the previous line read.  This starts at 1.  Zero
89      means we've read no line so far.  */
90   size_t line_num;
91 
92   /* This is the total number of lines of the current file.  At the
93      moment, we try to get this information from the line map
94      subsystem.  Note that this is just a hint.  When using the C++
95      front-end, this hint is correct because the input file is then
96      completely tokenized before parsing starts; so the line map knows
97      the number of lines before compilation really starts.  For e.g,
98      the C front-end, it can happen that we start emitting diagnostics
99      before the line map has seen the end of the file.  */
100   size_t total_lines;
101 
102   /* Could this file be missing a trailing newline on its final line?
103      Initially true (to cope with empty files), set to true/false
104      as each line is read.  */
105   bool missing_trailing_newline;
106 
107   /* This is a record of the beginning and end of the lines we've seen
108      while reading the file.  This is useful to avoid walking the data
109      from the beginning when we are asked to read a line that is
110      before LINE_START_IDX above.  Note that the maximum size of this
111      record is fcache_line_record_size, so that the memory consumption
112      doesn't explode.  We thus scale total_lines down to
113      fcache_line_record_size.  */
114   vec<line_info, va_heap> line_record;
115 
116   fcache ();
117   ~fcache ();
118 };
119 
120 /* Current position in real source file.  */
121 
122 location_t input_location = UNKNOWN_LOCATION;
123 
124 struct line_maps *line_table;
125 
126 /* A stashed copy of "line_table" for use by selftest::line_table_test.
127    This needs to be a global so that it can be a GC root, and thus
128    prevent the stashed copy from being garbage-collected if the GC runs
129    during a line_table_test.  */
130 
131 struct line_maps *saved_line_table;
132 
133 static fcache *fcache_tab;
134 static const size_t fcache_tab_size = 16;
135 static const size_t fcache_buffer_size = 4 * 1024;
136 static const size_t fcache_line_record_size = 100;
137 
138 /* Expand the source location LOC into a human readable location.  If
139    LOC resolves to a builtin location, the file name of the readable
140    location is set to the string "<built-in>". If EXPANSION_POINT_P is
141    TRUE and LOC is virtual, then it is resolved to the expansion
142    point of the involved macro.  Otherwise, it is resolved to the
143    spelling location of the token.
144 
145    When resolving to the spelling location of the token, if the
146    resulting location is for a built-in location (that is, it has no
147    associated line/column) in the context of a macro expansion, the
148    returned location is the first one (while unwinding the macro
149    location towards its expansion point) that is in real source
150    code.  */
151 
152 static expanded_location
153 expand_location_1 (source_location loc,
154 		   bool expansion_point_p)
155 {
156   expanded_location xloc;
157   const line_map_ordinary *map;
158   enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
159   tree block = NULL;
160 
161   if (IS_ADHOC_LOC (loc))
162     {
163       block = LOCATION_BLOCK (loc);
164       loc = LOCATION_LOCUS (loc);
165     }
166 
167   memset (&xloc, 0, sizeof (xloc));
168 
169   if (loc >= RESERVED_LOCATION_COUNT)
170     {
171       if (!expansion_point_p)
172 	{
173 	  /* We want to resolve LOC to its spelling location.
174 
175 	     But if that spelling location is a reserved location that
176 	     appears in the context of a macro expansion (like for a
177 	     location for a built-in token), let's consider the first
178 	     location (toward the expansion point) that is not reserved;
179 	     that is, the first location that is in real source code.  */
180 	  loc = linemap_unwind_to_first_non_reserved_loc (line_table,
181 							  loc, NULL);
182 	  lrk = LRK_SPELLING_LOCATION;
183 	}
184       loc = linemap_resolve_location (line_table, loc,
185 				      lrk, &map);
186       xloc = linemap_expand_location (line_table, map, loc);
187     }
188 
189   xloc.data = block;
190   if (loc <= BUILTINS_LOCATION)
191     xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
192 
193   return xloc;
194 }
195 
196 /* Initialize the set of cache used for files accessed by caret
197    diagnostic.  */
198 
199 static void
200 diagnostic_file_cache_init (void)
201 {
202   if (fcache_tab == NULL)
203     fcache_tab = new fcache[fcache_tab_size];
204 }
205 
206 /* Free the resources used by the set of cache used for files accessed
207    by caret diagnostic.  */
208 
209 void
210 diagnostic_file_cache_fini (void)
211 {
212   if (fcache_tab)
213     {
214       delete [] (fcache_tab);
215       fcache_tab = NULL;
216     }
217 }
218 
219 /* Return the total lines number that have been read so far by the
220    line map (in the preprocessor) so far.  For languages like C++ that
221    entirely preprocess the input file before starting to parse, this
222    equals the actual number of lines of the file.  */
223 
224 static size_t
225 total_lines_num (const char *file_path)
226 {
227   size_t r = 0;
228   source_location l = 0;
229   if (linemap_get_file_highest_location (line_table, file_path, &l))
230     {
231       gcc_assert (l >= RESERVED_LOCATION_COUNT);
232       expanded_location xloc = expand_location (l);
233       r = xloc.line;
234     }
235   return r;
236 }
237 
238 /* Lookup the cache used for the content of a given file accessed by
239    caret diagnostic.  Return the found cached file, or NULL if no
240    cached file was found.  */
241 
242 static fcache*
243 lookup_file_in_cache_tab (const char *file_path)
244 {
245   if (file_path == NULL)
246     return NULL;
247 
248   diagnostic_file_cache_init ();
249 
250   /* This will contain the found cached file.  */
251   fcache *r = NULL;
252   for (unsigned i = 0; i < fcache_tab_size; ++i)
253     {
254       fcache *c = &fcache_tab[i];
255       if (c->file_path && !strcmp (c->file_path, file_path))
256 	{
257 	  ++c->use_count;
258 	  r = c;
259 	}
260     }
261 
262   if (r)
263     ++r->use_count;
264 
265   return r;
266 }
267 
268 /* Purge any mention of FILENAME from the cache of files used for
269    printing source code.  For use in selftests when working
270    with tempfiles.  */
271 
272 void
273 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
274 {
275   gcc_assert (file_path);
276 
277   fcache *r = lookup_file_in_cache_tab (file_path);
278   if (!r)
279     /* Not found.  */
280     return;
281 
282   r->file_path = NULL;
283   if (r->fp)
284     fclose (r->fp);
285   r->fp = NULL;
286   r->nb_read = 0;
287   r->line_start_idx = 0;
288   r->line_num = 0;
289   r->line_record.truncate (0);
290   r->use_count = 0;
291   r->total_lines = 0;
292   r->missing_trailing_newline = true;
293 }
294 
295 /* Return the file cache that has been less used, recently, or the
296    first empty one.  If HIGHEST_USE_COUNT is non-null,
297    *HIGHEST_USE_COUNT is set to the highest use count of the entries
298    in the cache table.  */
299 
300 static fcache*
301 evicted_cache_tab_entry (unsigned *highest_use_count)
302 {
303   diagnostic_file_cache_init ();
304 
305   fcache *to_evict = &fcache_tab[0];
306   unsigned huc = to_evict->use_count;
307   for (unsigned i = 1; i < fcache_tab_size; ++i)
308     {
309       fcache *c = &fcache_tab[i];
310       bool c_is_empty = (c->file_path == NULL);
311 
312       if (c->use_count < to_evict->use_count
313 	  || (to_evict->file_path && c_is_empty))
314 	/* We evict C because it's either an entry with a lower use
315 	   count or one that is empty.  */
316 	to_evict = c;
317 
318       if (huc < c->use_count)
319 	huc = c->use_count;
320 
321       if (c_is_empty)
322 	/* We've reached the end of the cache; subsequent elements are
323 	   all empty.  */
324 	break;
325     }
326 
327   if (highest_use_count)
328     *highest_use_count = huc;
329 
330   return to_evict;
331 }
332 
333 /* Create the cache used for the content of a given file to be
334    accessed by caret diagnostic.  This cache is added to an array of
335    cache and can be retrieved by lookup_file_in_cache_tab.  This
336    function returns the created cache.  Note that only the last
337    fcache_tab_size files are cached.  */
338 
339 static fcache*
340 add_file_to_cache_tab (const char *file_path)
341 {
342 
343   FILE *fp = fopen (file_path, "r");
344   if (fp == NULL)
345     return NULL;
346 
347   unsigned highest_use_count = 0;
348   fcache *r = evicted_cache_tab_entry (&highest_use_count);
349   r->file_path = file_path;
350   if (r->fp)
351     fclose (r->fp);
352   r->fp = fp;
353   r->nb_read = 0;
354   r->line_start_idx = 0;
355   r->line_num = 0;
356   r->line_record.truncate (0);
357   /* Ensure that this cache entry doesn't get evicted next time
358      add_file_to_cache_tab is called.  */
359   r->use_count = ++highest_use_count;
360   r->total_lines = total_lines_num (file_path);
361   r->missing_trailing_newline = true;
362 
363   return r;
364 }
365 
366 /* Lookup the cache used for the content of a given file accessed by
367    caret diagnostic.  If no cached file was found, create a new cache
368    for this file, add it to the array of cached file and return
369    it.  */
370 
371 static fcache*
372 lookup_or_add_file_to_cache_tab (const char *file_path)
373 {
374   fcache *r = lookup_file_in_cache_tab (file_path);
375   if (r == NULL)
376     r = add_file_to_cache_tab (file_path);
377   return r;
378 }
379 
380 /* Default constructor for a cache of file used by caret
381    diagnostic.  */
382 
383 fcache::fcache ()
384 : use_count (0), file_path (NULL), fp (NULL), data (0),
385   size (0), nb_read (0), line_start_idx (0), line_num (0),
386   total_lines (0), missing_trailing_newline (true)
387 {
388   line_record.create (0);
389 }
390 
391 /* Destructor for a cache of file used by caret diagnostic.  */
392 
393 fcache::~fcache ()
394 {
395   if (fp)
396     {
397       fclose (fp);
398       fp = NULL;
399     }
400   if (data)
401     {
402       XDELETEVEC (data);
403       data = 0;
404     }
405   line_record.release ();
406 }
407 
408 /* Returns TRUE iff the cache would need to be filled with data coming
409    from the file.  That is, either the cache is empty or full or the
410    current line is empty.  Note that if the cache is full, it would
411    need to be extended and filled again.  */
412 
413 static bool
414 needs_read (fcache *c)
415 {
416   return (c->nb_read == 0
417 	  || c->nb_read == c->size
418 	  || (c->line_start_idx >= c->nb_read - 1));
419 }
420 
421 /*  Return TRUE iff the cache is full and thus needs to be
422     extended.  */
423 
424 static bool
425 needs_grow (fcache *c)
426 {
427   return c->nb_read == c->size;
428 }
429 
430 /* Grow the cache if it needs to be extended.  */
431 
432 static void
433 maybe_grow (fcache *c)
434 {
435   if (!needs_grow (c))
436     return;
437 
438   size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
439   c->data = XRESIZEVEC (char, c->data, size);
440   c->size = size;
441 }
442 
443 /*  Read more data into the cache.  Extends the cache if need be.
444     Returns TRUE iff new data could be read.  */
445 
446 static bool
447 read_data (fcache *c)
448 {
449   if (feof (c->fp) || ferror (c->fp))
450     return false;
451 
452   maybe_grow (c);
453 
454   char * from = c->data + c->nb_read;
455   size_t to_read = c->size - c->nb_read;
456   size_t nb_read = fread (from, 1, to_read, c->fp);
457 
458   if (ferror (c->fp))
459     return false;
460 
461   c->nb_read += nb_read;
462   return !!nb_read;
463 }
464 
465 /* Read new data iff the cache needs to be filled with more data
466    coming from the file FP.  Return TRUE iff the cache was filled with
467    mode data.  */
468 
469 static bool
470 maybe_read_data (fcache *c)
471 {
472   if (!needs_read (c))
473     return false;
474   return read_data (c);
475 }
476 
477 /* Read a new line from file FP, using C as a cache for the data
478    coming from the file.  Upon successful completion, *LINE is set to
479    the beginning of the line found.  *LINE points directly in the
480    line cache and is only valid until the next call of get_next_line.
481    *LINE_LEN is set to the length of the line.  Note that the line
482    does not contain any terminal delimiter.  This function returns
483    true if some data was read or process from the cache, false
484    otherwise.  Note that subsequent calls to get_next_line might
485    make the content of *LINE invalid.  */
486 
487 static bool
488 get_next_line (fcache *c, char **line, ssize_t *line_len)
489 {
490   /* Fill the cache with data to process.  */
491   maybe_read_data (c);
492 
493   size_t remaining_size = c->nb_read - c->line_start_idx;
494   if (remaining_size == 0)
495     /* There is no more data to process.  */
496     return false;
497 
498   char *line_start = c->data + c->line_start_idx;
499 
500   char *next_line_start = NULL;
501   size_t len = 0;
502   char *line_end = (char *) memchr (line_start, '\n', remaining_size);
503   if (line_end == NULL)
504     {
505       /* We haven't found the end-of-line delimiter in the cache.
506 	 Fill the cache with more data from the file and look for the
507 	 '\n'.  */
508       while (maybe_read_data (c))
509 	{
510 	  line_start = c->data + c->line_start_idx;
511 	  remaining_size = c->nb_read - c->line_start_idx;
512 	  line_end = (char *) memchr (line_start, '\n', remaining_size);
513 	  if (line_end != NULL)
514 	    {
515 	      next_line_start = line_end + 1;
516 	      break;
517 	    }
518 	}
519       if (line_end == NULL)
520 	{
521 	  /* We've loadded all the file into the cache and still no
522 	     '\n'.  Let's say the line ends up at one byte passed the
523 	     end of the file.  This is to stay consistent with the case
524 	     of when the line ends up with a '\n' and line_end points to
525 	     that terminal '\n'.  That consistency is useful below in
526 	     the len calculation.  */
527 	  line_end = c->data + c->nb_read ;
528 	  c->missing_trailing_newline = true;
529 	}
530       else
531 	c->missing_trailing_newline = false;
532     }
533   else
534     {
535       next_line_start = line_end + 1;
536       c->missing_trailing_newline = false;
537     }
538 
539   if (ferror (c->fp))
540     return false;
541 
542   /* At this point, we've found the end of the of line.  It either
543      points to the '\n' or to one byte after the last byte of the
544      file.  */
545   gcc_assert (line_end != NULL);
546 
547   len = line_end - line_start;
548 
549   if (c->line_start_idx < c->nb_read)
550     *line = line_start;
551 
552   ++c->line_num;
553 
554   /* Before we update our line record, make sure the hint about the
555      total number of lines of the file is correct.  If it's not, then
556      we give up recording line boundaries from now on.  */
557   bool update_line_record = true;
558   if (c->line_num > c->total_lines)
559     update_line_record = false;
560 
561     /* Now update our line record so that re-reading lines from the
562      before c->line_start_idx is faster.  */
563   if (update_line_record
564       && c->line_record.length () < fcache_line_record_size)
565     {
566       /* If the file lines fits in the line record, we just record all
567 	 its lines ...*/
568       if (c->total_lines <= fcache_line_record_size
569 	  && c->line_num > c->line_record.length ())
570 	c->line_record.safe_push (fcache::line_info (c->line_num,
571 						 c->line_start_idx,
572 						 line_end - c->data));
573       else if (c->total_lines > fcache_line_record_size)
574 	{
575 	  /* ... otherwise, we just scale total_lines down to
576 	     (fcache_line_record_size lines.  */
577 	  size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
578 	  if (c->line_record.length () == 0
579 	      || n >= c->line_record.length ())
580 	    c->line_record.safe_push (fcache::line_info (c->line_num,
581 						     c->line_start_idx,
582 						     line_end - c->data));
583 	}
584     }
585 
586   /* Update c->line_start_idx so that it points to the next line to be
587      read.  */
588   if (next_line_start)
589     c->line_start_idx = next_line_start - c->data;
590   else
591     /* We didn't find any terminal '\n'.  Let's consider that the end
592        of line is the end of the data in the cache.  The next
593        invocation of get_next_line will either read more data from the
594        underlying file or return false early because we've reached the
595        end of the file.  */
596     c->line_start_idx = c->nb_read;
597 
598   *line_len = len;
599 
600   return true;
601 }
602 
603 /* Consume the next bytes coming from the cache (or from its
604    underlying file if there are remaining unread bytes in the file)
605    until we reach the next end-of-line (or end-of-file).  There is no
606    copying from the cache involved.  Return TRUE upon successful
607    completion.  */
608 
609 static bool
610 goto_next_line (fcache *cache)
611 {
612   char *l;
613   ssize_t len;
614 
615   return get_next_line (cache, &l, &len);
616 }
617 
618 /* Read an arbitrary line number LINE_NUM from the file cached in C.
619    If the line was read successfully, *LINE points to the beginning
620    of the line in the file cache and *LINE_LEN is the length of the
621    line.  *LINE is not nul-terminated, but may contain zero bytes.
622    *LINE is only valid until the next call of read_line_num.
623    This function returns bool if a line was read.  */
624 
625 static bool
626 read_line_num (fcache *c, size_t line_num,
627 	       char **line, ssize_t *line_len)
628 {
629   gcc_assert (line_num > 0);
630 
631   if (line_num <= c->line_num)
632     {
633       /* We've been asked to read lines that are before c->line_num.
634 	 So lets use our line record (if it's not empty) to try to
635 	 avoid re-reading the file from the beginning again.  */
636 
637       if (c->line_record.is_empty ())
638 	{
639 	  c->line_start_idx = 0;
640 	  c->line_num = 0;
641 	}
642       else
643 	{
644 	  fcache::line_info *i = NULL;
645 	  if (c->total_lines <= fcache_line_record_size)
646 	    {
647 	      /* In languages where the input file is not totally
648 		 preprocessed up front, the c->total_lines hint
649 		 can be smaller than the number of lines of the
650 		 file.  In that case, only the first
651 		 c->total_lines have been recorded.
652 
653 		 Otherwise, the first c->total_lines we've read have
654 		 their start/end recorded here.  */
655 	      i = (line_num <= c->total_lines)
656 		? &c->line_record[line_num - 1]
657 		: &c->line_record[c->total_lines - 1];
658 	      gcc_assert (i->line_num <= line_num);
659 	    }
660 	  else
661 	    {
662 	      /*  So the file had more lines than our line record
663 		  size.  Thus the number of lines we've recorded has
664 		  been scaled down to fcache_line_reacord_size.  Let's
665 		  pick the start/end of the recorded line that is
666 		  closest to line_num.  */
667 	      size_t n = (line_num <= c->total_lines)
668 		? line_num * fcache_line_record_size / c->total_lines
669 		: c ->line_record.length () - 1;
670 	      if (n < c->line_record.length ())
671 		{
672 		  i = &c->line_record[n];
673 		  gcc_assert (i->line_num <= line_num);
674 		}
675 	    }
676 
677 	  if (i && i->line_num == line_num)
678 	    {
679 	      /* We have the start/end of the line.  */
680 	      *line = c->data + i->start_pos;
681 	      *line_len = i->end_pos - i->start_pos;
682 	      return true;
683 	    }
684 
685 	  if (i)
686 	    {
687 	      c->line_start_idx = i->start_pos;
688 	      c->line_num = i->line_num - 1;
689 	    }
690 	  else
691 	    {
692 	      c->line_start_idx = 0;
693 	      c->line_num = 0;
694 	    }
695 	}
696     }
697 
698   /*  Let's walk from line c->line_num up to line_num - 1, without
699       copying any line.  */
700   while (c->line_num < line_num - 1)
701     if (!goto_next_line (c))
702       return false;
703 
704   /* The line we want is the next one.  Let's read and copy it back to
705      the caller.  */
706   return get_next_line (c, line, line_len);
707 }
708 
709 /* Return the physical source line that corresponds to FILE_PATH/LINE.
710    The line is not nul-terminated.  The returned pointer is only
711    valid until the next call of location_get_source_line.
712    Note that the line can contain several null characters,
713    so LINE_LEN, if non-null, points to the actual length of the line.
714    If the function fails, NULL is returned.  */
715 
716 const char *
717 location_get_source_line (const char *file_path, int line,
718 			  int *line_len)
719 {
720   char *buffer = NULL;
721   ssize_t len;
722 
723   if (line == 0)
724     return NULL;
725 
726   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
727   if (c == NULL)
728     return NULL;
729 
730   bool read = read_line_num (c, line, &buffer, &len);
731 
732   if (read && line_len)
733     *line_len = len;
734 
735   return read ? buffer : NULL;
736 }
737 
738 /* Determine if FILE_PATH missing a trailing newline on its final line.
739    Only valid to call once all of the file has been loaded, by
740    requesting a line number beyond the end of the file.  */
741 
742 bool
743 location_missing_trailing_newline (const char *file_path)
744 {
745   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
746   if (c == NULL)
747     return false;
748 
749   return c->missing_trailing_newline;
750 }
751 
752 /* Test if the location originates from the spelling location of a
753    builtin-tokens.  That is, return TRUE if LOC is a (possibly
754    virtual) location of a built-in token that appears in the expansion
755    list of a macro.  Please note that this function also works on
756    tokens that result from built-in tokens.  For instance, the
757    function would return true if passed a token "4" that is the result
758    of the expansion of the built-in __LINE__ macro.  */
759 bool
760 is_location_from_builtin_token (source_location loc)
761 {
762   const line_map_ordinary *map = NULL;
763   loc = linemap_resolve_location (line_table, loc,
764 				  LRK_SPELLING_LOCATION, &map);
765   return loc == BUILTINS_LOCATION;
766 }
767 
768 /* Expand the source location LOC into a human readable location.  If
769    LOC is virtual, it resolves to the expansion point of the involved
770    macro.  If LOC resolves to a builtin location, the file name of the
771    readable location is set to the string "<built-in>".  */
772 
773 expanded_location
774 expand_location (source_location loc)
775 {
776   return expand_location_1 (loc, /*expansion_point_p=*/true);
777 }
778 
779 /* Expand the source location LOC into a human readable location.  If
780    LOC is virtual, it resolves to the expansion location of the
781    relevant macro.  If LOC resolves to a builtin location, the file
782    name of the readable location is set to the string
783    "<built-in>".  */
784 
785 expanded_location
786 expand_location_to_spelling_point (source_location loc)
787 {
788   return expand_location_1 (loc, /*expansion_point_p=*/false);
789 }
790 
791 /* The rich_location class within libcpp requires a way to expand
792    source_location instances, and relies on the client code
793    providing a symbol named
794      linemap_client_expand_location_to_spelling_point
795    to do this.
796 
797    This is the implementation for libcommon.a (all host binaries),
798    which simply calls into expand_location_to_spelling_point.  */
799 
800 expanded_location
801 linemap_client_expand_location_to_spelling_point (source_location loc)
802 {
803   return expand_location_to_spelling_point (loc);
804 }
805 
806 
807 /* If LOCATION is in a system header and if it is a virtual location for
808    a token coming from the expansion of a macro, unwind it to the
809    location of the expansion point of the macro.  Otherwise, just return
810    LOCATION.
811 
812    This is used for instance when we want to emit diagnostics about a
813    token that may be located in a macro that is itself defined in a
814    system header, for example, for the NULL macro.  In such a case, if
815    LOCATION were passed directly to diagnostic functions such as
816    warning_at, the diagnostic would be suppressed (unless
817    -Wsystem-headers).  */
818 
819 source_location
820 expansion_point_location_if_in_system_header (source_location location)
821 {
822   if (in_system_header_at (location))
823     location = linemap_resolve_location (line_table, location,
824 					 LRK_MACRO_EXPANSION_POINT,
825 					 NULL);
826   return location;
827 }
828 
829 /* If LOCATION is a virtual location for a token coming from the expansion
830    of a macro, unwind to the location of the expansion point of the macro.  */
831 
832 source_location
833 expansion_point_location (source_location location)
834 {
835   return linemap_resolve_location (line_table, location,
836 				   LRK_MACRO_EXPANSION_POINT, NULL);
837 }
838 
839 /* Construct a location with caret at CARET, ranging from START to
840    finish e.g.
841 
842                  11111111112
843         12345678901234567890
844      522
845      523   return foo + bar;
846                   ~~~~^~~~~
847      524
848 
849    The location's caret is at the "+", line 523 column 15, but starts
850    earlier, at the "f" of "foo" at column 11.  The finish is at the "r"
851    of "bar" at column 19.  */
852 
853 location_t
854 make_location (location_t caret, location_t start, location_t finish)
855 {
856   location_t pure_loc = get_pure_location (caret);
857   source_range src_range;
858   src_range.m_start = get_start (start);
859   src_range.m_finish = get_finish (finish);
860   location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
861 						   pure_loc,
862 						   src_range,
863 						   NULL);
864   return combined_loc;
865 }
866 
867 #define ONE_K 1024
868 #define ONE_M (ONE_K * ONE_K)
869 
870 /* Display a number as an integer multiple of either:
871    - 1024, if said integer is >= to 10 K (in base 2)
872    - 1024 * 1024, if said integer is >= 10 M in (base 2)
873  */
874 #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
875 		  ? (x) \
876 		  : ((x) < 10 * ONE_M \
877 		     ? (x) / ONE_K \
878 		     : (x) / ONE_M)))
879 
880 /* For a given integer, display either:
881    - the character 'k', if the number is higher than 10 K (in base 2)
882      but strictly lower than 10 M (in base 2)
883    - the character 'M' if the number is higher than 10 M (in base2)
884    - the charcter ' ' if the number is strictly lower  than 10 K  */
885 #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
886 
887 /* Display an integer amount as multiple of 1K or 1M (in base 2).
888    Display the correct unit (either k, M, or ' ') after the amount, as
889    well.  */
890 #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
891 
892 /* Dump statistics to stderr about the memory usage of the line_table
893    set of line maps.  This also displays some statistics about macro
894    expansion.  */
895 
896 void
897 dump_line_table_statistics (void)
898 {
899   struct linemap_stats s;
900   long total_used_map_size,
901     macro_maps_size,
902     total_allocated_map_size;
903 
904   memset (&s, 0, sizeof (s));
905 
906   linemap_get_statistics (line_table, &s);
907 
908   macro_maps_size = s.macro_maps_used_size
909     + s.macro_maps_locations_size;
910 
911   total_allocated_map_size = s.ordinary_maps_allocated_size
912     + s.macro_maps_allocated_size
913     + s.macro_maps_locations_size;
914 
915   total_used_map_size = s.ordinary_maps_used_size
916     + s.macro_maps_used_size
917     + s.macro_maps_locations_size;
918 
919   fprintf (stderr, "Number of expanded macros:                     %5ld\n",
920            s.num_expanded_macros);
921   if (s.num_expanded_macros != 0)
922     fprintf (stderr, "Average number of tokens per macro expansion:  %5ld\n",
923              s.num_macro_tokens / s.num_expanded_macros);
924   fprintf (stderr,
925            "\nLine Table allocations during the "
926            "compilation process\n");
927   fprintf (stderr, "Number of ordinary maps used:        %5ld%c\n",
928            SCALE (s.num_ordinary_maps_used),
929            STAT_LABEL (s.num_ordinary_maps_used));
930   fprintf (stderr, "Ordinary map used size:              %5ld%c\n",
931            SCALE (s.ordinary_maps_used_size),
932            STAT_LABEL (s.ordinary_maps_used_size));
933   fprintf (stderr, "Number of ordinary maps allocated:   %5ld%c\n",
934            SCALE (s.num_ordinary_maps_allocated),
935            STAT_LABEL (s.num_ordinary_maps_allocated));
936   fprintf (stderr, "Ordinary maps allocated size:        %5ld%c\n",
937            SCALE (s.ordinary_maps_allocated_size),
938            STAT_LABEL (s.ordinary_maps_allocated_size));
939   fprintf (stderr, "Number of macro maps used:           %5ld%c\n",
940            SCALE (s.num_macro_maps_used),
941            STAT_LABEL (s.num_macro_maps_used));
942   fprintf (stderr, "Macro maps used size:                %5ld%c\n",
943            SCALE (s.macro_maps_used_size),
944            STAT_LABEL (s.macro_maps_used_size));
945   fprintf (stderr, "Macro maps locations size:           %5ld%c\n",
946            SCALE (s.macro_maps_locations_size),
947            STAT_LABEL (s.macro_maps_locations_size));
948   fprintf (stderr, "Macro maps size:                     %5ld%c\n",
949            SCALE (macro_maps_size),
950            STAT_LABEL (macro_maps_size));
951   fprintf (stderr, "Duplicated maps locations size:      %5ld%c\n",
952            SCALE (s.duplicated_macro_maps_locations_size),
953            STAT_LABEL (s.duplicated_macro_maps_locations_size));
954   fprintf (stderr, "Total allocated maps size:           %5ld%c\n",
955            SCALE (total_allocated_map_size),
956            STAT_LABEL (total_allocated_map_size));
957   fprintf (stderr, "Total used maps size:                %5ld%c\n",
958            SCALE (total_used_map_size),
959            STAT_LABEL (total_used_map_size));
960   fprintf (stderr, "Ad-hoc table size:                   %5ld%c\n",
961 	   SCALE (s.adhoc_table_size),
962 	   STAT_LABEL (s.adhoc_table_size));
963   fprintf (stderr, "Ad-hoc table entries used:           %5ld\n",
964 	   s.adhoc_table_entries_used);
965   fprintf (stderr, "optimized_ranges: %i\n",
966 	   line_table->num_optimized_ranges);
967   fprintf (stderr, "unoptimized_ranges: %i\n",
968 	   line_table->num_unoptimized_ranges);
969 
970   fprintf (stderr, "\n");
971 }
972 
973 /* Get location one beyond the final location in ordinary map IDX.  */
974 
975 static source_location
976 get_end_location (struct line_maps *set, unsigned int idx)
977 {
978   if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
979     return set->highest_location;
980 
981   struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
982   return MAP_START_LOCATION (next_map);
983 }
984 
985 /* Helper function for write_digit_row.  */
986 
987 static void
988 write_digit (FILE *stream, int digit)
989 {
990   fputc ('0' + (digit % 10), stream);
991 }
992 
993 /* Helper function for dump_location_info.
994    Write a row of numbers to STREAM, numbering a source line,
995    giving the units, tens, hundreds etc of the column number.  */
996 
997 static void
998 write_digit_row (FILE *stream, int indent,
999 		 const line_map_ordinary *map,
1000 		 source_location loc, int max_col, int divisor)
1001 {
1002   fprintf (stream, "%*c", indent, ' ');
1003   fprintf (stream, "|");
1004   for (int column = 1; column < max_col; column++)
1005     {
1006       source_location column_loc = loc + (column << map->m_range_bits);
1007       write_digit (stream, column_loc / divisor);
1008     }
1009   fprintf (stream, "\n");
1010 }
1011 
1012 /* Write a half-closed (START) / half-open (END) interval of
1013    source_location to STREAM.  */
1014 
1015 static void
1016 dump_location_range (FILE *stream,
1017 		     source_location start, source_location end)
1018 {
1019   fprintf (stream,
1020 	   "  source_location interval: %u <= loc < %u\n",
1021 	   start, end);
1022 }
1023 
1024 /* Write a labelled description of a half-closed (START) / half-open (END)
1025    interval of source_location to STREAM.  */
1026 
1027 static void
1028 dump_labelled_location_range (FILE *stream,
1029 			      const char *name,
1030 			      source_location start, source_location end)
1031 {
1032   fprintf (stream, "%s\n", name);
1033   dump_location_range (stream, start, end);
1034   fprintf (stream, "\n");
1035 }
1036 
1037 /* Write a visualization of the locations in the line_table to STREAM.  */
1038 
1039 void
1040 dump_location_info (FILE *stream)
1041 {
1042   /* Visualize the reserved locations.  */
1043   dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1044 				0, RESERVED_LOCATION_COUNT);
1045 
1046   /* Visualize the ordinary line_map instances, rendering the sources. */
1047   for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1048     {
1049       source_location end_location = get_end_location (line_table, idx);
1050       /* half-closed: doesn't include this one. */
1051 
1052       const line_map_ordinary *map
1053 	= LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1054       fprintf (stream, "ORDINARY MAP: %i\n", idx);
1055       dump_location_range (stream,
1056 			   MAP_START_LOCATION (map), end_location);
1057       fprintf (stream, "  file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1058       fprintf (stream, "  starting at line: %i\n",
1059 	       ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1060       fprintf (stream, "  column and range bits: %i\n",
1061 	       map->m_column_and_range_bits);
1062       fprintf (stream, "  column bits: %i\n",
1063 	       map->m_column_and_range_bits - map->m_range_bits);
1064       fprintf (stream, "  range bits: %i\n",
1065 	       map->m_range_bits);
1066 
1067       /* Render the span of source lines that this "map" covers.  */
1068       for (source_location loc = MAP_START_LOCATION (map);
1069 	   loc < end_location;
1070 	   loc += (1 << map->m_range_bits) )
1071 	{
1072 	  gcc_assert (pure_location_p (line_table, loc) );
1073 
1074 	  expanded_location exploc
1075 	    = linemap_expand_location (line_table, map, loc);
1076 
1077 	  if (0 == exploc.column)
1078 	    {
1079 	      /* Beginning of a new source line: draw the line.  */
1080 
1081 	      int line_size;
1082 	      const char *line_text = location_get_source_line (exploc.file,
1083 								exploc.line,
1084 								&line_size);
1085 	      if (!line_text)
1086 		break;
1087 	      fprintf (stream,
1088 		       "%s:%3i|loc:%5i|%.*s\n",
1089 		       exploc.file, exploc.line,
1090 		       loc,
1091 		       line_size, line_text);
1092 
1093 	      /* "loc" is at column 0, which means "the whole line".
1094 		 Render the locations *within* the line, by underlining
1095 		 it, showing the source_location numeric values
1096 		 at each column.  */
1097 	      int max_col = (1 << map->m_column_and_range_bits) - 1;
1098 	      if (max_col > line_size)
1099 		max_col = line_size + 1;
1100 
1101 	      int indent = 14 + strlen (exploc.file);
1102 
1103 	      /* Thousands.  */
1104 	      if (end_location > 999)
1105 		write_digit_row (stream, indent, map, loc, max_col, 1000);
1106 
1107 	      /* Hundreds.  */
1108 	      if (end_location > 99)
1109 		write_digit_row (stream, indent, map, loc, max_col, 100);
1110 
1111 	      /* Tens.  */
1112 	      write_digit_row (stream, indent, map, loc, max_col, 10);
1113 
1114 	      /* Units.  */
1115 	      write_digit_row (stream, indent, map, loc, max_col, 1);
1116 	    }
1117 	}
1118       fprintf (stream, "\n");
1119     }
1120 
1121   /* Visualize unallocated values.  */
1122   dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1123 				line_table->highest_location,
1124 				LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1125 
1126   /* Visualize the macro line_map instances, rendering the sources. */
1127   for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1128     {
1129       /* Each macro map that is allocated owns source_location values
1130 	 that are *lower* that the one before them.
1131 	 Hence it's meaningful to view them either in order of ascending
1132 	 source locations, or in order of ascending macro map index.  */
1133       const bool ascending_source_locations = true;
1134       unsigned int idx = (ascending_source_locations
1135 			  ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1136 			  : i);
1137       const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1138       fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1139 	       idx,
1140 	       linemap_map_get_macro_name (map),
1141 	       MACRO_MAP_NUM_MACRO_TOKENS (map));
1142       dump_location_range (stream,
1143 			   map->start_location,
1144 			   (map->start_location
1145 			    + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1146       inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1147 	      "expansion point is location %i",
1148 	      MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1149       fprintf (stream, "  map->start_location: %u\n",
1150 	       map->start_location);
1151 
1152       fprintf (stream, "  macro_locations:\n");
1153       for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1154 	{
1155 	  source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
1156 	  source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1157 
1158 	  /* linemap_add_macro_token encodes token numbers in an expansion
1159 	     by putting them after MAP_START_LOCATION. */
1160 
1161 	  /* I'm typically seeing 4 uninitialized entries at the end of
1162 	     0xafafafaf.
1163 	     This appears to be due to macro.c:replace_args
1164 	     adding 2 extra args for padding tokens; presumably there may
1165 	     be a leading and/or trailing padding token injected,
1166 	     each for 2 more location slots.
1167 	     This would explain there being up to 4 source_locations slots
1168 	     that may be uninitialized.  */
1169 
1170 	  fprintf (stream, "    %u: %u, %u\n",
1171 		   i,
1172 		   x,
1173 		   y);
1174 	  if (x == y)
1175 	    {
1176 	      if (x < MAP_START_LOCATION (map))
1177 		inform (x, "token %u has x-location == y-location == %u", i, x);
1178 	      else
1179 		fprintf (stream,
1180 			 "x-location == y-location == %u encodes token # %u\n",
1181 			 x, x - MAP_START_LOCATION (map));
1182 		}
1183 	  else
1184 	    {
1185 	      inform (x, "token %u has x-location == %u", i, x);
1186 	      inform (x, "token %u has y-location == %u", i, y);
1187 	    }
1188 	}
1189       fprintf (stream, "\n");
1190     }
1191 
1192   /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
1193      macro map, presumably due to an off-by-one error somewhere
1194      between the logic in linemap_enter_macro and
1195      LINEMAPS_MACRO_LOWEST_LOCATION.  */
1196   dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
1197 				MAX_SOURCE_LOCATION,
1198 				MAX_SOURCE_LOCATION + 1);
1199 
1200   /* Visualize ad-hoc values.  */
1201   dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1202 				MAX_SOURCE_LOCATION + 1, UINT_MAX);
1203 }
1204 
1205 /* string_concat's constructor.  */
1206 
1207 string_concat::string_concat (int num, location_t *locs)
1208   : m_num (num)
1209 {
1210   m_locs = ggc_vec_alloc <location_t> (num);
1211   for (int i = 0; i < num; i++)
1212     m_locs[i] = locs[i];
1213 }
1214 
1215 /* string_concat_db's constructor.  */
1216 
1217 string_concat_db::string_concat_db ()
1218 {
1219   m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1220 }
1221 
1222 /* Record that a string concatenation occurred, covering NUM
1223    string literal tokens.  LOCS is an array of size NUM, containing the
1224    locations of the tokens.  A copy of LOCS is taken.  */
1225 
1226 void
1227 string_concat_db::record_string_concatenation (int num, location_t *locs)
1228 {
1229   gcc_assert (num > 1);
1230   gcc_assert (locs);
1231 
1232   location_t key_loc = get_key_loc (locs[0]);
1233 
1234   string_concat *concat
1235     = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1236   m_table->put (key_loc, concat);
1237 }
1238 
1239 /* Determine if LOC was the location of the the initial token of a
1240    concatenation of string literal tokens.
1241    If so, *OUT_NUM is written to with the number of tokens, and
1242    *OUT_LOCS with the location of an array of locations of the
1243    tokens, and return true.  *OUT_LOCS is a borrowed pointer to
1244    storage owned by the string_concat_db.
1245    Otherwise, return false.  */
1246 
1247 bool
1248 string_concat_db::get_string_concatenation (location_t loc,
1249 					    int *out_num,
1250 					    location_t **out_locs)
1251 {
1252   gcc_assert (out_num);
1253   gcc_assert (out_locs);
1254 
1255   location_t key_loc = get_key_loc (loc);
1256 
1257   string_concat **concat = m_table->get (key_loc);
1258   if (!concat)
1259     return false;
1260 
1261   *out_num = (*concat)->m_num;
1262   *out_locs =(*concat)->m_locs;
1263   return true;
1264 }
1265 
1266 /* Internal function.  Canonicalize LOC into a form suitable for
1267    use as a key within the database, stripping away macro expansion,
1268    ad-hoc information, and range information, using the location of
1269    the start of LOC within an ordinary linemap.  */
1270 
1271 location_t
1272 string_concat_db::get_key_loc (location_t loc)
1273 {
1274   loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1275 				  NULL);
1276 
1277   loc = get_range_from_loc (line_table, loc).m_start;
1278 
1279   return loc;
1280 }
1281 
1282 /* Helper class for use within get_substring_ranges_for_loc.
1283    An vec of cpp_string with responsibility for releasing all of the
1284    str->text for each str in the vector.  */
1285 
1286 class auto_cpp_string_vec :  public auto_vec <cpp_string>
1287 {
1288  public:
1289   auto_cpp_string_vec (int alloc)
1290     : auto_vec <cpp_string> (alloc) {}
1291 
1292   ~auto_cpp_string_vec ()
1293   {
1294     /* Clean up the copies within this vec.  */
1295     int i;
1296     cpp_string *str;
1297     FOR_EACH_VEC_ELT (*this, i, str)
1298       free (const_cast <unsigned char *> (str->text));
1299   }
1300 };
1301 
1302 /* Attempt to populate RANGES with source location information on the
1303    individual characters within the string literal found at STRLOC.
1304    If CONCATS is non-NULL, then any string literals that the token at
1305    STRLOC  was concatenated with are also added to RANGES.
1306 
1307    Return NULL if successful, or an error message if any errors occurred (in
1308    which case RANGES may be only partially populated and should not
1309    be used).
1310 
1311    This is implemented by re-parsing the relevant source line(s).  */
1312 
1313 static const char *
1314 get_substring_ranges_for_loc (cpp_reader *pfile,
1315 			      string_concat_db *concats,
1316 			      location_t strloc,
1317 			      enum cpp_ttype type,
1318 			      cpp_substring_ranges &ranges)
1319 {
1320   gcc_assert (pfile);
1321 
1322   if (strloc == UNKNOWN_LOCATION)
1323     return "unknown location";
1324 
1325   /* Reparsing the strings requires accurate location information.
1326      If -ftrack-macro-expansion has been overridden from its default
1327      of 2, then we might have a location of a macro expansion point,
1328      rather than the location of the literal itself.
1329      Avoid this by requiring that we have full macro expansion tracking
1330      for substring locations to be available.  */
1331   if (cpp_get_options (pfile)->track_macro_expansion != 2)
1332     return "track_macro_expansion != 2";
1333 
1334   /* If #line or # 44 "file"-style directives are present, then there's
1335      no guarantee that the line numbers we have can be used to locate
1336      the strings.  For example, we might have a .i file with # directives
1337      pointing back to lines within a .c file, but the .c file might
1338      have been edited since the .i file was created.
1339      In such a case, the safest course is to disable on-demand substring
1340      locations.  */
1341   if (line_table->seen_line_directive)
1342     return "seen line directive";
1343 
1344   /* If string concatenation has occurred at STRLOC, get the locations
1345      of all of the literal tokens making up the compound string.
1346      Otherwise, just use STRLOC.  */
1347   int num_locs = 1;
1348   location_t *strlocs = &strloc;
1349   if (concats)
1350     concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1351 
1352   auto_cpp_string_vec strs (num_locs);
1353   auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1354   for (int i = 0; i < num_locs; i++)
1355     {
1356       /* Get range of strloc.  We will use it to locate the start and finish
1357 	 of the literal token within the line.  */
1358       source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1359 
1360       if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1361 	/* If the string is within a macro expansion, we can't get at the
1362 	   end location.  */
1363 	return "macro expansion";
1364 
1365       if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1366 	/* If so, we can't reliably determine where the token started within
1367 	   its line.  */
1368 	return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1369 
1370       if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1371 	/* If so, we can't reliably determine where the token finished within
1372 	   its line.  */
1373 	return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1374 
1375       expanded_location start
1376 	= expand_location_to_spelling_point (src_range.m_start);
1377       expanded_location finish
1378 	= expand_location_to_spelling_point (src_range.m_finish);
1379       if (start.file != finish.file)
1380 	return "range endpoints are in different files";
1381       if (start.line != finish.line)
1382 	return "range endpoints are on different lines";
1383       if (start.column > finish.column)
1384 	return "range endpoints are reversed";
1385 
1386       int line_width;
1387       const char *line = location_get_source_line (start.file, start.line,
1388 						   &line_width);
1389       if (line == NULL)
1390 	return "unable to read source line";
1391 
1392       /* Determine the location of the literal (including quotes
1393 	 and leading prefix chars, such as the 'u' in a u""
1394 	 token).  */
1395       const char *literal = line + start.column - 1;
1396       int literal_length = finish.column - start.column + 1;
1397 
1398       /* Ensure that we don't crash if we got the wrong location.  */
1399       if (line_width < (start.column - 1 + literal_length))
1400 	return "line is not wide enough";
1401 
1402       cpp_string from;
1403       from.len = literal_length;
1404       /* Make a copy of the literal, to avoid having to rely on
1405 	 the lifetime of the copy of the line within the cache.
1406 	 This will be released by the auto_cpp_string_vec dtor.  */
1407       from.text = XDUPVEC (unsigned char, literal, literal_length);
1408       strs.safe_push (from);
1409 
1410       /* For very long lines, a new linemap could have started
1411 	 halfway through the token.
1412 	 Ensure that the loc_reader uses the linemap of the
1413 	 *end* of the token for its start location.  */
1414       const line_map_ordinary *final_ord_map;
1415       linemap_resolve_location (line_table, src_range.m_finish,
1416 				LRK_MACRO_EXPANSION_POINT, &final_ord_map);
1417       location_t start_loc
1418 	= linemap_position_for_line_and_column (line_table, final_ord_map,
1419 						start.line, start.column);
1420 
1421       cpp_string_location_reader loc_reader (start_loc, line_table);
1422       loc_readers.safe_push (loc_reader);
1423     }
1424 
1425   /* Rerun cpp_interpret_string, or rather, a modified version of it.  */
1426   const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1427 						 loc_readers.address (),
1428 						 num_locs, &ranges, type);
1429   if (err)
1430     return err;
1431 
1432   /* Success: "ranges" should now contain information on the string.  */
1433   return NULL;
1434 }
1435 
1436 /* Attempt to populate *OUT_LOC with source location information on the
1437    given characters within the string literal found at STRLOC.
1438    CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1439    character set.
1440 
1441    For example, given CARET_IDX = 4, START_IDX = 3, END_IDX  = 7
1442    and string literal "012345\n789"
1443    *OUT_LOC is written to with:
1444      "012345\n789"
1445          ~^~~~~
1446 
1447    If CONCATS is non-NULL, then any string literals that the token at
1448    STRLOC was concatenated with are also considered.
1449 
1450    This is implemented by re-parsing the relevant source line(s).
1451 
1452    Return NULL if successful, or an error message if any errors occurred.
1453    Error messages are intended for GCC developers (to help debugging) rather
1454    than for end-users.  */
1455 
1456 const char *
1457 get_source_location_for_substring (cpp_reader *pfile,
1458 				   string_concat_db *concats,
1459 				   location_t strloc,
1460 				   enum cpp_ttype type,
1461 				   int caret_idx, int start_idx, int end_idx,
1462 				   source_location *out_loc)
1463 {
1464   gcc_checking_assert (caret_idx >= 0);
1465   gcc_checking_assert (start_idx >= 0);
1466   gcc_checking_assert (end_idx >= 0);
1467   gcc_assert (out_loc);
1468 
1469   cpp_substring_ranges ranges;
1470   const char *err
1471     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1472   if (err)
1473     return err;
1474 
1475   if (caret_idx >= ranges.get_num_ranges ())
1476     return "caret_idx out of range";
1477   if (start_idx >= ranges.get_num_ranges ())
1478     return "start_idx out of range";
1479   if (end_idx >= ranges.get_num_ranges ())
1480     return "end_idx out of range";
1481 
1482   *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1483 			    ranges.get_range (start_idx).m_start,
1484 			    ranges.get_range (end_idx).m_finish);
1485   return NULL;
1486 }
1487 
1488 #if CHECKING_P
1489 
1490 namespace selftest {
1491 
1492 /* Selftests of location handling.  */
1493 
1494 /* Attempt to populate *OUT_RANGE with source location information on the
1495    given character within the string literal found at STRLOC.
1496    CHAR_IDX refers to an offset within the execution character set.
1497    If CONCATS is non-NULL, then any string literals that the token at
1498    STRLOC was concatenated with are also considered.
1499 
1500    This is implemented by re-parsing the relevant source line(s).
1501 
1502    Return NULL if successful, or an error message if any errors occurred.
1503    Error messages are intended for GCC developers (to help debugging) rather
1504    than for end-users.  */
1505 
1506 static const char *
1507 get_source_range_for_char (cpp_reader *pfile,
1508 			   string_concat_db *concats,
1509 			   location_t strloc,
1510 			   enum cpp_ttype type,
1511 			   int char_idx,
1512 			   source_range *out_range)
1513 {
1514   gcc_checking_assert (char_idx >= 0);
1515   gcc_assert (out_range);
1516 
1517   cpp_substring_ranges ranges;
1518   const char *err
1519     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1520   if (err)
1521     return err;
1522 
1523   if (char_idx >= ranges.get_num_ranges ())
1524     return "char_idx out of range";
1525 
1526   *out_range = ranges.get_range (char_idx);
1527   return NULL;
1528 }
1529 
1530 /* As get_source_range_for_char, but write to *OUT the number
1531    of ranges that are available.  */
1532 
1533 static const char *
1534 get_num_source_ranges_for_substring (cpp_reader *pfile,
1535 				     string_concat_db *concats,
1536 				     location_t strloc,
1537 				     enum cpp_ttype type,
1538 				     int *out)
1539 {
1540   gcc_assert (out);
1541 
1542   cpp_substring_ranges ranges;
1543   const char *err
1544     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1545 
1546   if (err)
1547     return err;
1548 
1549   *out = ranges.get_num_ranges ();
1550   return NULL;
1551 }
1552 
1553 /* Selftests of location handling.  */
1554 
1555 /* Helper function for verifying location data: when location_t
1556    values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1557    as having column 0.  */
1558 
1559 static bool
1560 should_have_column_data_p (location_t loc)
1561 {
1562   if (IS_ADHOC_LOC (loc))
1563     loc = get_location_from_adhoc_loc (line_table, loc);
1564   if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1565     return false;
1566   return true;
1567 }
1568 
1569 /* Selftest for should_have_column_data_p.  */
1570 
1571 static void
1572 test_should_have_column_data_p ()
1573 {
1574   ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1575   ASSERT_TRUE
1576     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1577   ASSERT_FALSE
1578     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1579 }
1580 
1581 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1582    on LOC.  */
1583 
1584 static void
1585 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1586 	      location_t loc)
1587 {
1588   ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1589   ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1590   /* If location_t values are sufficiently high, then column numbers
1591      will be unavailable and LOCATION_COLUMN (loc) will be 0.
1592      When close to the threshold, column numbers *may* be present: if
1593      the final linemap before the threshold contains a line that straddles
1594      the threshold, locations in that line have column information.  */
1595   if (should_have_column_data_p (loc))
1596     ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1597 }
1598 
1599 /* Various selftests involve constructing a line table and one or more
1600    line maps within it.
1601 
1602    For maximum test coverage we want to run these tests with a variety
1603    of situations:
1604    - line_table->default_range_bits: some frontends use a non-zero value
1605    and others use zero
1606    - the fallback modes within line-map.c: there are various threshold
1607    values for source_location/location_t beyond line-map.c changes
1608    behavior (disabling of the range-packing optimization, disabling
1609    of column-tracking).  We can exercise these by starting the line_table
1610    at interesting values at or near these thresholds.
1611 
1612    The following struct describes a particular case within our test
1613    matrix.  */
1614 
1615 struct line_table_case
1616 {
1617   line_table_case (int default_range_bits, int base_location)
1618   : m_default_range_bits (default_range_bits),
1619     m_base_location (base_location)
1620   {}
1621 
1622   int m_default_range_bits;
1623   int m_base_location;
1624 };
1625 
1626 /* Constructor.  Store the old value of line_table, and create a new
1627    one, using sane defaults.  */
1628 
1629 line_table_test::line_table_test ()
1630 {
1631   gcc_assert (saved_line_table == NULL);
1632   saved_line_table = line_table;
1633   line_table = ggc_alloc<line_maps> ();
1634   linemap_init (line_table, BUILTINS_LOCATION);
1635   gcc_assert (saved_line_table->reallocator);
1636   line_table->reallocator = saved_line_table->reallocator;
1637   gcc_assert (saved_line_table->round_alloc_size);
1638   line_table->round_alloc_size = saved_line_table->round_alloc_size;
1639   line_table->default_range_bits = 0;
1640 }
1641 
1642 /* Constructor.  Store the old value of line_table, and create a new
1643    one, using the sitation described in CASE_.  */
1644 
1645 line_table_test::line_table_test (const line_table_case &case_)
1646 {
1647   gcc_assert (saved_line_table == NULL);
1648   saved_line_table = line_table;
1649   line_table = ggc_alloc<line_maps> ();
1650   linemap_init (line_table, BUILTINS_LOCATION);
1651   gcc_assert (saved_line_table->reallocator);
1652   line_table->reallocator = saved_line_table->reallocator;
1653   gcc_assert (saved_line_table->round_alloc_size);
1654   line_table->round_alloc_size = saved_line_table->round_alloc_size;
1655   line_table->default_range_bits = case_.m_default_range_bits;
1656   if (case_.m_base_location)
1657     {
1658       line_table->highest_location = case_.m_base_location;
1659       line_table->highest_line = case_.m_base_location;
1660     }
1661 }
1662 
1663 /* Destructor.  Restore the old value of line_table.  */
1664 
1665 line_table_test::~line_table_test ()
1666 {
1667   gcc_assert (saved_line_table != NULL);
1668   line_table = saved_line_table;
1669   saved_line_table = NULL;
1670 }
1671 
1672 /* Verify basic operation of ordinary linemaps.  */
1673 
1674 static void
1675 test_accessing_ordinary_linemaps (const line_table_case &case_)
1676 {
1677   line_table_test ltt (case_);
1678 
1679   /* Build a simple linemap describing some locations. */
1680   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1681 
1682   linemap_line_start (line_table, 1, 100);
1683   location_t loc_a = linemap_position_for_column (line_table, 1);
1684   location_t loc_b = linemap_position_for_column (line_table, 23);
1685 
1686   linemap_line_start (line_table, 2, 100);
1687   location_t loc_c = linemap_position_for_column (line_table, 1);
1688   location_t loc_d = linemap_position_for_column (line_table, 17);
1689 
1690   /* Example of a very long line.  */
1691   linemap_line_start (line_table, 3, 2000);
1692   location_t loc_e = linemap_position_for_column (line_table, 700);
1693 
1694   /* Transitioning back to a short line.  */
1695   linemap_line_start (line_table, 4, 0);
1696   location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1697 
1698   if (should_have_column_data_p (loc_back_to_short))
1699     {
1700       /* Verify that we switched to short lines in the linemap.  */
1701       line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1702       ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1703     }
1704 
1705   /* Example of a line that will eventually be seen to be longer
1706      than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1707      below that.  */
1708   linemap_line_start (line_table, 5, 2000);
1709 
1710   location_t loc_start_of_very_long_line
1711     = linemap_position_for_column (line_table, 2000);
1712   location_t loc_too_wide
1713     = linemap_position_for_column (line_table, 4097);
1714   location_t loc_too_wide_2
1715     = linemap_position_for_column (line_table, 4098);
1716 
1717   /* ...and back to a sane line length.  */
1718   linemap_line_start (line_table, 6, 100);
1719   location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1720 
1721   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1722 
1723   /* Multiple files.  */
1724   linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1725   linemap_line_start (line_table, 1, 200);
1726   location_t loc_f = linemap_position_for_column (line_table, 150);
1727   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1728 
1729   /* Verify that we can recover the location info.  */
1730   assert_loceq ("foo.c", 1, 1, loc_a);
1731   assert_loceq ("foo.c", 1, 23, loc_b);
1732   assert_loceq ("foo.c", 2, 1, loc_c);
1733   assert_loceq ("foo.c", 2, 17, loc_d);
1734   assert_loceq ("foo.c", 3, 700, loc_e);
1735   assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1736 
1737   /* In the very wide line, the initial location should be fully tracked.  */
1738   assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1739   /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1740      be disabled.  */
1741   assert_loceq ("foo.c", 5, 0, loc_too_wide);
1742   assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1743   /*...and column-tracking should be re-enabled for subsequent lines.  */
1744   assert_loceq ("foo.c", 6, 10, loc_sane_again);
1745 
1746   assert_loceq ("bar.c", 1, 150, loc_f);
1747 
1748   ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1749   ASSERT_TRUE (pure_location_p (line_table, loc_a));
1750 
1751   /* Verify using make_location to build a range, and extracting data
1752      back from it.  */
1753   location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1754   ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1755   ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1756   source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1757   ASSERT_EQ (loc_b, src_range.m_start);
1758   ASSERT_EQ (loc_d, src_range.m_finish);
1759 }
1760 
1761 /* Verify various properties of UNKNOWN_LOCATION.  */
1762 
1763 static void
1764 test_unknown_location ()
1765 {
1766   ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1767   ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1768   ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1769 }
1770 
1771 /* Verify various properties of BUILTINS_LOCATION.  */
1772 
1773 static void
1774 test_builtins ()
1775 {
1776   assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1777   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1778 }
1779 
1780 /* Regression test for make_location.
1781    Ensure that we use pure locations for the start/finish of the range,
1782    rather than storing a packed or ad-hoc range as the start/finish.  */
1783 
1784 static void
1785 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1786 {
1787   /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1788      with C++ frontend.
1789      ....................0000000001111111111222.
1790      ....................1234567890123456789012.  */
1791   const char *content = "     r += !aaa == bbb;\n";
1792   temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1793   line_table_test ltt (case_);
1794   linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1795 
1796   const location_t c11 = linemap_position_for_column (line_table, 11);
1797   const location_t c12 = linemap_position_for_column (line_table, 12);
1798   const location_t c13 = linemap_position_for_column (line_table, 13);
1799   const location_t c14 = linemap_position_for_column (line_table, 14);
1800   const location_t c21 = linemap_position_for_column (line_table, 21);
1801 
1802   if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1803     return;
1804 
1805   /* Use column 13 for the caret location, arbitrarily, to verify that we
1806      handle start != caret.  */
1807   const location_t aaa = make_location (c13, c12, c14);
1808   ASSERT_EQ (c13, get_pure_location (aaa));
1809   ASSERT_EQ (c12, get_start (aaa));
1810   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1811   ASSERT_EQ (c14, get_finish (aaa));
1812   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1813 
1814   /* Make a location using a location with a range as the start-point.  */
1815   const location_t not_aaa = make_location (c11, aaa, c14);
1816   ASSERT_EQ (c11, get_pure_location (not_aaa));
1817   /* It should use the start location of the range, not store the range
1818      itself.  */
1819   ASSERT_EQ (c12, get_start (not_aaa));
1820   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1821   ASSERT_EQ (c14, get_finish (not_aaa));
1822   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1823 
1824   /* Similarly, make a location with a range as the end-point.  */
1825   const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1826   ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1827   ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1828   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1829   ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1830   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1831   const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1832   /* It should use the finish location of the range, not store the range
1833      itself.  */
1834   ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1835   ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1836   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1837   ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1838   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1839 }
1840 
1841 /* Verify reading of input files (e.g. for caret-based diagnostics).  */
1842 
1843 static void
1844 test_reading_source_line ()
1845 {
1846   /* Create a tempfile and write some text to it.  */
1847   temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1848 			"01234567890123456789\n"
1849 			"This is the test text\n"
1850 			"This is the 3rd line");
1851 
1852   /* Read back a specific line from the tempfile.  */
1853   int line_size;
1854   const char *source_line = location_get_source_line (tmp.get_filename (),
1855 						      3, &line_size);
1856   ASSERT_TRUE (source_line != NULL);
1857   ASSERT_EQ (20, line_size);
1858   ASSERT_TRUE (!strncmp ("This is the 3rd line",
1859 			 source_line, line_size));
1860 
1861   source_line = location_get_source_line (tmp.get_filename (),
1862 					  2, &line_size);
1863   ASSERT_TRUE (source_line != NULL);
1864   ASSERT_EQ (21, line_size);
1865   ASSERT_TRUE (!strncmp ("This is the test text",
1866 			 source_line, line_size));
1867 
1868   source_line = location_get_source_line (tmp.get_filename (),
1869 					  4, &line_size);
1870   ASSERT_TRUE (source_line == NULL);
1871 }
1872 
1873 /* Tests of lexing.  */
1874 
1875 /* Verify that token TOK from PARSER has cpp_token_as_text
1876    equal to EXPECTED_TEXT.  */
1877 
1878 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT)		\
1879   SELFTEST_BEGIN_STMT							\
1880     unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK));	\
1881     ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt);		\
1882   SELFTEST_END_STMT
1883 
1884 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1885    and ranges from EXP_START_COL to EXP_FINISH_COL.
1886    Use LOC as the effective location of the selftest.  */
1887 
1888 static void
1889 assert_token_loc_eq (const location &loc,
1890 		     const cpp_token *tok,
1891 		     const char *exp_filename, int exp_linenum,
1892 		     int exp_start_col, int exp_finish_col)
1893 {
1894   location_t tok_loc = tok->src_loc;
1895   ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1896   ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1897 
1898   /* If location_t values are sufficiently high, then column numbers
1899      will be unavailable.  */
1900   if (!should_have_column_data_p (tok_loc))
1901     return;
1902 
1903   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1904   source_range tok_range = get_range_from_loc (line_table, tok_loc);
1905   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1906   ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1907 }
1908 
1909 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1910    SELFTEST_LOCATION as the effective location of the selftest.  */
1911 
1912 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1913 			    EXP_START_COL, EXP_FINISH_COL) \
1914   assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1915 		       (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1916 
1917 /* Test of lexing a file using libcpp, verifying tokens and their
1918    location information.  */
1919 
1920 static void
1921 test_lexer (const line_table_case &case_)
1922 {
1923   /* Create a tempfile and write some text to it.  */
1924   const char *content =
1925     /*00000000011111111112222222222333333.3333444444444.455555555556
1926       12345678901234567890123456789012345.6789012345678.901234567890.  */
1927     ("test_name /* c-style comment */\n"
1928      "                                  \"test literal\"\n"
1929      " // test c++-style comment\n"
1930      "   42\n");
1931   temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
1932 
1933   line_table_test ltt (case_);
1934 
1935   cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
1936 
1937   const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
1938   ASSERT_NE (fname, NULL);
1939 
1940   /* Verify that we get the expected tokens back, with the correct
1941      location information.  */
1942 
1943   location_t loc;
1944   const cpp_token *tok;
1945   tok = cpp_get_token_with_location (parser, &loc);
1946   ASSERT_NE (tok, NULL);
1947   ASSERT_EQ (tok->type, CPP_NAME);
1948   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
1949   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
1950 
1951   tok = cpp_get_token_with_location (parser, &loc);
1952   ASSERT_NE (tok, NULL);
1953   ASSERT_EQ (tok->type, CPP_STRING);
1954   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
1955   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
1956 
1957   tok = cpp_get_token_with_location (parser, &loc);
1958   ASSERT_NE (tok, NULL);
1959   ASSERT_EQ (tok->type, CPP_NUMBER);
1960   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
1961   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
1962 
1963   tok = cpp_get_token_with_location (parser, &loc);
1964   ASSERT_NE (tok, NULL);
1965   ASSERT_EQ (tok->type, CPP_EOF);
1966 
1967   cpp_finish (parser, NULL);
1968   cpp_destroy (parser);
1969 }
1970 
1971 /* Forward decls.  */
1972 
1973 struct lexer_test;
1974 class lexer_test_options;
1975 
1976 /* A class for specifying options of a lexer_test.
1977    The "apply" vfunc is called during the lexer_test constructor.  */
1978 
1979 class lexer_test_options
1980 {
1981  public:
1982   virtual void apply (lexer_test &) = 0;
1983 };
1984 
1985 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
1986    in its dtor.
1987 
1988    This is needed by struct lexer_test to ensure that the cleanup of the
1989    cpp_reader happens *after* the cleanup of the temp_source_file.  */
1990 
1991 class cpp_reader_ptr
1992 {
1993  public:
1994   cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
1995 
1996   ~cpp_reader_ptr ()
1997   {
1998     cpp_finish (m_ptr, NULL);
1999     cpp_destroy (m_ptr);
2000   }
2001 
2002   operator cpp_reader * () const { return m_ptr; }
2003 
2004  private:
2005   cpp_reader *m_ptr;
2006 };
2007 
2008 /* A struct for writing lexer tests.  */
2009 
2010 struct lexer_test
2011 {
2012   lexer_test (const line_table_case &case_, const char *content,
2013 	      lexer_test_options *options);
2014   ~lexer_test ();
2015 
2016   const cpp_token *get_token ();
2017 
2018   /* The ordering of these fields matters.
2019      The line_table_test must be first, since the cpp_reader_ptr
2020      uses it.
2021      The cpp_reader must be cleaned up *after* the temp_source_file
2022      since the filenames in input.c's input cache are owned by the
2023      cpp_reader; in particular, when ~temp_source_file evicts the
2024      filename the filenames must still be alive.  */
2025   line_table_test m_ltt;
2026   cpp_reader_ptr m_parser;
2027   temp_source_file m_tempfile;
2028   string_concat_db m_concats;
2029   bool m_implicitly_expect_EOF;
2030 };
2031 
2032 /* Use an EBCDIC encoding for the execution charset, specifically
2033    IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2034 
2035    This exercises iconv integration within libcpp.
2036    Not every build of iconv supports the given charset,
2037    so we need to flag this error and handle it gracefully.  */
2038 
2039 class ebcdic_execution_charset : public lexer_test_options
2040 {
2041  public:
2042   ebcdic_execution_charset () : m_num_iconv_errors (0)
2043     {
2044       gcc_assert (s_singleton == NULL);
2045       s_singleton = this;
2046     }
2047   ~ebcdic_execution_charset ()
2048     {
2049       gcc_assert (s_singleton == this);
2050       s_singleton = NULL;
2051     }
2052 
2053   void apply (lexer_test &test) FINAL OVERRIDE
2054   {
2055     cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2056     cpp_opts->narrow_charset = "IBM1047";
2057 
2058     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2059     callbacks->error = on_error;
2060   }
2061 
2062   static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
2063 			int level ATTRIBUTE_UNUSED,
2064 			int reason ATTRIBUTE_UNUSED,
2065 			rich_location *richloc ATTRIBUTE_UNUSED,
2066 			const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2067     ATTRIBUTE_FPTR_PRINTF(5,0)
2068   {
2069     gcc_assert (s_singleton);
2070     /* Avoid exgettext from picking this up, it is translated in libcpp.  */
2071     const char *msg = "conversion from %s to %s not supported by iconv";
2072 #ifdef ENABLE_NLS
2073     msg = dgettext ("cpplib", msg);
2074 #endif
2075     /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2076        when the local iconv build doesn't support the conversion.  */
2077     if (strcmp (msgid, msg) == 0)
2078       {
2079 	s_singleton->m_num_iconv_errors++;
2080 	return true;
2081       }
2082 
2083     /* Otherwise, we have an unexpected error.  */
2084     abort ();
2085   }
2086 
2087   bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2088 
2089  private:
2090   static ebcdic_execution_charset *s_singleton;
2091   int m_num_iconv_errors;
2092 };
2093 
2094 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2095 
2096 /* A lexer_test_options subclass that records a list of error
2097    messages emitted by the lexer.  */
2098 
2099 class lexer_error_sink : public lexer_test_options
2100 {
2101  public:
2102   lexer_error_sink ()
2103   {
2104     gcc_assert (s_singleton == NULL);
2105     s_singleton = this;
2106   }
2107   ~lexer_error_sink ()
2108   {
2109     gcc_assert (s_singleton == this);
2110     s_singleton = NULL;
2111 
2112     int i;
2113     char *str;
2114     FOR_EACH_VEC_ELT (m_errors, i, str)
2115       free (str);
2116   }
2117 
2118   void apply (lexer_test &test) FINAL OVERRIDE
2119   {
2120     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2121     callbacks->error = on_error;
2122   }
2123 
2124   static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
2125 			int level ATTRIBUTE_UNUSED,
2126 			int reason ATTRIBUTE_UNUSED,
2127 			rich_location *richloc ATTRIBUTE_UNUSED,
2128 			const char *msgid, va_list *ap)
2129     ATTRIBUTE_FPTR_PRINTF(5,0)
2130   {
2131     char *msg = xvasprintf (msgid, *ap);
2132     s_singleton->m_errors.safe_push (msg);
2133     return true;
2134   }
2135 
2136   auto_vec<char *> m_errors;
2137 
2138  private:
2139   static lexer_error_sink *s_singleton;
2140 };
2141 
2142 lexer_error_sink *lexer_error_sink::s_singleton;
2143 
2144 /* Constructor.  Override line_table with a new instance based on CASE_,
2145    and write CONTENT to a tempfile.  Create a cpp_reader, and use it to
2146    start parsing the tempfile.  */
2147 
2148 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2149 			lexer_test_options *options)
2150 : m_ltt (case_),
2151   m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2152   /* Create a tempfile and write the text to it.  */
2153   m_tempfile (SELFTEST_LOCATION, ".c", content),
2154   m_concats (),
2155   m_implicitly_expect_EOF (true)
2156 {
2157   if (options)
2158     options->apply (*this);
2159 
2160   cpp_init_iconv (m_parser);
2161 
2162   /* Parse the file.  */
2163   const char *fname = cpp_read_main_file (m_parser,
2164 					  m_tempfile.get_filename ());
2165   ASSERT_NE (fname, NULL);
2166 }
2167 
2168 /* Destructor.  By default, verify that the next token in m_parser is EOF.  */
2169 
2170 lexer_test::~lexer_test ()
2171 {
2172   location_t loc;
2173   const cpp_token *tok;
2174 
2175   if (m_implicitly_expect_EOF)
2176     {
2177       tok = cpp_get_token_with_location (m_parser, &loc);
2178       ASSERT_NE (tok, NULL);
2179       ASSERT_EQ (tok->type, CPP_EOF);
2180     }
2181 }
2182 
2183 /* Get the next token from m_parser.  */
2184 
2185 const cpp_token *
2186 lexer_test::get_token ()
2187 {
2188   location_t loc;
2189   const cpp_token *tok;
2190 
2191   tok = cpp_get_token_with_location (m_parser, &loc);
2192   ASSERT_NE (tok, NULL);
2193   return tok;
2194 }
2195 
2196 /* Verify that locations within string literals are correctly handled.  */
2197 
2198 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2199    using the string concatenation database for TEST.
2200 
2201    Assert that the character at index IDX is on EXPECTED_LINE,
2202    and that it begins at column EXPECTED_START_COL and ends at
2203    EXPECTED_FINISH_COL (unless the locations are beyond
2204    LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2205    columns).  */
2206 
2207 static void
2208 assert_char_at_range (const location &loc,
2209 		      lexer_test& test,
2210 		      location_t strloc, enum cpp_ttype type, int idx,
2211 		      int expected_line, int expected_start_col,
2212 		      int expected_finish_col)
2213 {
2214   cpp_reader *pfile = test.m_parser;
2215   string_concat_db *concats = &test.m_concats;
2216 
2217   source_range actual_range = source_range();
2218   const char *err
2219     = get_source_range_for_char (pfile, concats, strloc, type, idx,
2220 				 &actual_range);
2221   if (should_have_column_data_p (strloc))
2222     ASSERT_EQ_AT (loc, NULL, err);
2223   else
2224     {
2225       ASSERT_STREQ_AT (loc,
2226 		       "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2227 		       err);
2228       return;
2229     }
2230 
2231   int actual_start_line = LOCATION_LINE (actual_range.m_start);
2232   ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2233   int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2234   ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2235 
2236   if (should_have_column_data_p (actual_range.m_start))
2237     {
2238       int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2239       ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2240     }
2241   if (should_have_column_data_p (actual_range.m_finish))
2242     {
2243       int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2244       ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2245     }
2246 }
2247 
2248 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2249    the effective location of any errors.  */
2250 
2251 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2252 			     EXPECTED_START_COL, EXPECTED_FINISH_COL)	\
2253   assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2254 			(IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2255 			(EXPECTED_FINISH_COL))
2256 
2257 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2258    using the string concatenation database for TEST.
2259 
2260    Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES.  */
2261 
2262 static void
2263 assert_num_substring_ranges (const location &loc,
2264 			     lexer_test& test,
2265 			     location_t strloc,
2266 			     enum cpp_ttype type,
2267 			     int expected_num_ranges)
2268 {
2269   cpp_reader *pfile = test.m_parser;
2270   string_concat_db *concats = &test.m_concats;
2271 
2272   int actual_num_ranges = -1;
2273   const char *err
2274     = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2275 					   &actual_num_ranges);
2276   if (should_have_column_data_p (strloc))
2277     ASSERT_EQ_AT (loc, NULL, err);
2278   else
2279     {
2280       ASSERT_STREQ_AT (loc,
2281 		       "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2282 		       err);
2283       return;
2284     }
2285   ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2286 }
2287 
2288 /* Macro for calling assert_num_substring_ranges, supplying
2289    SELFTEST_LOCATION for the effective location of any errors.  */
2290 
2291 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2292 				    EXPECTED_NUM_RANGES)		\
2293   assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2294 			       (TYPE), (EXPECTED_NUM_RANGES))
2295 
2296 
2297 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2298    returns an error (using the string concatenation database for TEST).  */
2299 
2300 static void
2301 assert_has_no_substring_ranges (const location &loc,
2302 				lexer_test& test,
2303 				location_t strloc,
2304 				enum cpp_ttype type,
2305 				const char *expected_err)
2306 {
2307   cpp_reader *pfile = test.m_parser;
2308   string_concat_db *concats = &test.m_concats;
2309   cpp_substring_ranges ranges;
2310   const char *actual_err
2311     = get_substring_ranges_for_loc (pfile, concats, strloc,
2312 				    type, ranges);
2313   if (should_have_column_data_p (strloc))
2314     ASSERT_STREQ_AT (loc, expected_err, actual_err);
2315   else
2316     ASSERT_STREQ_AT (loc,
2317 		     "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2318 		     actual_err);
2319 }
2320 
2321 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR)    \
2322     assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2323 				    (STRLOC), (TYPE), (ERR))
2324 
2325 /* Lex a simple string literal.  Verify the substring location data, before
2326    and after running cpp_interpret_string on it.  */
2327 
2328 static void
2329 test_lexer_string_locations_simple (const line_table_case &case_)
2330 {
2331   /* Digits 0-9 (with 0 at column 10), the simple way.
2332      ....................000000000.11111111112.2222222223333333333
2333      ....................123456789.01234567890.1234567890123456789
2334      We add a trailing comment to ensure that we correctly locate
2335      the end of the string literal token.  */
2336   const char *content = "        \"0123456789\" /* not a string */\n";
2337   lexer_test test (case_, content, NULL);
2338 
2339   /* Verify that we get the expected token back, with the correct
2340      location information.  */
2341   const cpp_token *tok = test.get_token ();
2342   ASSERT_EQ (tok->type, CPP_STRING);
2343   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2344   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2345 
2346   /* At this point in lexing, the quote characters are treated as part of
2347      the string (they are stripped off by cpp_interpret_string).  */
2348 
2349   ASSERT_EQ (tok->val.str.len, 12);
2350 
2351   /* Verify that cpp_interpret_string works.  */
2352   cpp_string dst_string;
2353   const enum cpp_ttype type = CPP_STRING;
2354   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2355 				      &dst_string, type);
2356   ASSERT_TRUE (result);
2357   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2358   free (const_cast <unsigned char *> (dst_string.text));
2359 
2360   /* Verify ranges of individual characters.  This no longer includes the
2361      opening quote, but does include the closing quote.  */
2362   for (int i = 0; i <= 10; i++)
2363     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2364 			  10 + i, 10 + i);
2365 
2366   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2367 }
2368 
2369 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2370    encoding.  */
2371 
2372 static void
2373 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2374 {
2375   /* EBCDIC support requires iconv.  */
2376   if (!HAVE_ICONV)
2377     return;
2378 
2379   /* Digits 0-9 (with 0 at column 10), the simple way.
2380      ....................000000000.11111111112.2222222223333333333
2381      ....................123456789.01234567890.1234567890123456789
2382      We add a trailing comment to ensure that we correctly locate
2383      the end of the string literal token.  */
2384   const char *content = "        \"0123456789\" /* not a string */\n";
2385   ebcdic_execution_charset use_ebcdic;
2386   lexer_test test (case_, content, &use_ebcdic);
2387 
2388   /* Verify that we get the expected token back, with the correct
2389      location information.  */
2390   const cpp_token *tok = test.get_token ();
2391   ASSERT_EQ (tok->type, CPP_STRING);
2392   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2393   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2394 
2395   /* At this point in lexing, the quote characters are treated as part of
2396      the string (they are stripped off by cpp_interpret_string).  */
2397 
2398   ASSERT_EQ (tok->val.str.len, 12);
2399 
2400   /* The remainder of the test requires an iconv implementation that
2401      can convert from UTF-8 to the EBCDIC encoding requested above.  */
2402   if (use_ebcdic.iconv_errors_occurred_p ())
2403     return;
2404 
2405   /* Verify that cpp_interpret_string works.  */
2406   cpp_string dst_string;
2407   const enum cpp_ttype type = CPP_STRING;
2408   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2409 				      &dst_string, type);
2410   ASSERT_TRUE (result);
2411   /* We should now have EBCDIC-encoded text, specifically
2412      IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2413      The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9.  */
2414   ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2415 		(const char *)dst_string.text);
2416   free (const_cast <unsigned char *> (dst_string.text));
2417 
2418   /* Verify that we don't attempt to record substring location information
2419      for such cases.  */
2420   ASSERT_HAS_NO_SUBSTRING_RANGES
2421     (test, tok->src_loc, type,
2422      "execution character set != source character set");
2423 }
2424 
2425 /* Lex a string literal containing a hex-escaped character.
2426    Verify the substring location data, before and after running
2427    cpp_interpret_string on it.  */
2428 
2429 static void
2430 test_lexer_string_locations_hex (const line_table_case &case_)
2431 {
2432   /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2433      and with a space in place of digit 6, to terminate the escaped
2434      hex code.
2435      ....................000000000.111111.11112222.
2436      ....................123456789.012345.67890123.  */
2437   const char *content = "        \"01234\\x35 789\"\n";
2438   lexer_test test (case_, content, NULL);
2439 
2440   /* Verify that we get the expected token back, with the correct
2441      location information.  */
2442   const cpp_token *tok = test.get_token ();
2443   ASSERT_EQ (tok->type, CPP_STRING);
2444   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2445   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2446 
2447   /* At this point in lexing, the quote characters are treated as part of
2448      the string (they are stripped off by cpp_interpret_string).  */
2449   ASSERT_EQ (tok->val.str.len, 15);
2450 
2451   /* Verify that cpp_interpret_string works.  */
2452   cpp_string dst_string;
2453   const enum cpp_ttype type = CPP_STRING;
2454   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2455 				      &dst_string, type);
2456   ASSERT_TRUE (result);
2457   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2458   free (const_cast <unsigned char *> (dst_string.text));
2459 
2460   /* Verify ranges of individual characters.  This no longer includes the
2461      opening quote, but does include the closing quote.  */
2462   for (int i = 0; i <= 4; i++)
2463     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2464   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2465   for (int i = 6; i <= 10; i++)
2466     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2467 
2468   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2469 }
2470 
2471 /* Lex a string literal containing an octal-escaped character.
2472    Verify the substring location data after running cpp_interpret_string
2473    on it.  */
2474 
2475 static void
2476 test_lexer_string_locations_oct (const line_table_case &case_)
2477 {
2478   /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2479      and with a space in place of digit 6, to terminate the escaped
2480      octal code.
2481      ....................000000000.111111.11112222.2222223333333333444
2482      ....................123456789.012345.67890123.4567890123456789012  */
2483   const char *content = "        \"01234\\065 789\" /* not a string */\n";
2484   lexer_test test (case_, content, NULL);
2485 
2486   /* Verify that we get the expected token back, with the correct
2487      location information.  */
2488   const cpp_token *tok = test.get_token ();
2489   ASSERT_EQ (tok->type, CPP_STRING);
2490   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2491 
2492   /* Verify that cpp_interpret_string works.  */
2493   cpp_string dst_string;
2494   const enum cpp_ttype type = CPP_STRING;
2495   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2496 				      &dst_string, type);
2497   ASSERT_TRUE (result);
2498   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2499   free (const_cast <unsigned char *> (dst_string.text));
2500 
2501   /* Verify ranges of individual characters.  This no longer includes the
2502      opening quote, but does include the closing quote.  */
2503   for (int i = 0; i < 5; i++)
2504     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2505   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2506   for (int i = 6; i <= 10; i++)
2507     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2508 
2509   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2510 }
2511 
2512 /* Test of string literal containing letter escapes.  */
2513 
2514 static void
2515 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2516 {
2517   /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2518      .....................000000000.1.11111.1.1.11222.22222223333333
2519      .....................123456789.0.12345.6.7.89012.34567890123456.  */
2520   const char *content = ("        \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2521   lexer_test test (case_, content, NULL);
2522 
2523   /* Verify that we get the expected tokens back.  */
2524   const cpp_token *tok = test.get_token ();
2525   ASSERT_EQ (tok->type, CPP_STRING);
2526   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2527 
2528   /* Verify ranges of individual characters. */
2529   /* "\t".  */
2530   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2531 			0, 1, 10, 11);
2532   /* "foo". */
2533   for (int i = 1; i <= 3; i++)
2534     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2535 			  i, 1, 11 + i, 11 + i);
2536   /* "\\" and "\n".  */
2537   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2538 			4, 1, 15, 16);
2539   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2540 			5, 1, 17, 18);
2541 
2542   /* "bar" and closing quote for nul-terminator.  */
2543   for (int i = 6; i <= 9; i++)
2544     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2545 			  i, 1, 13 + i, 13 + i);
2546 
2547   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2548 }
2549 
2550 /* Another test of a string literal containing a letter escape.
2551    Based on string seen in
2552      printf ("%-%\n");
2553    in gcc.dg/format/c90-printf-1.c.  */
2554 
2555 static void
2556 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2557 {
2558   /* .....................000000000.1111.11.1111.22222222223.
2559      .....................123456789.0123.45.6789.01234567890.  */
2560   const char *content = ("        \"%-%\\n\" /* non-str */\n");
2561   lexer_test test (case_, content, NULL);
2562 
2563   /* Verify that we get the expected tokens back.  */
2564   const cpp_token *tok = test.get_token ();
2565   ASSERT_EQ (tok->type, CPP_STRING);
2566   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2567 
2568   /* Verify ranges of individual characters. */
2569   /* "%-%".  */
2570   for (int i = 0; i < 3; i++)
2571     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2572 			  i, 1, 10 + i, 10 + i);
2573   /* "\n".  */
2574   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2575 			3, 1, 13, 14);
2576 
2577   /* Closing quote for nul-terminator.  */
2578   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2579 			4, 1, 15, 15);
2580 
2581   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2582 }
2583 
2584 /* Lex a string literal containing UCN 4 characters.
2585    Verify the substring location data after running cpp_interpret_string
2586    on it.  */
2587 
2588 static void
2589 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2590 {
2591   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2592      as UCN 4.
2593      ....................000000000.111111.111122.222222223.33333333344444
2594      ....................123456789.012345.678901.234567890.12345678901234  */
2595   const char *content = "        \"01234\\u2174\\u2175789\" /* non-str */\n";
2596   lexer_test test (case_, content, NULL);
2597 
2598   /* Verify that we get the expected token back, with the correct
2599      location information.  */
2600   const cpp_token *tok = test.get_token ();
2601   ASSERT_EQ (tok->type, CPP_STRING);
2602   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2603 
2604   /* Verify that cpp_interpret_string works.
2605      The string should be encoded in the execution character
2606      set.  Assuming that that is UTF-8, we should have the following:
2607      -----------  ----  -----  -------  ----------------
2608      Byte offset  Byte  Octal  Unicode  Source Column(s)
2609      -----------  ----  -----  -------  ----------------
2610      0            0x30         '0'      10
2611      1            0x31         '1'      11
2612      2            0x32         '2'      12
2613      3            0x33         '3'      13
2614      4            0x34         '4'      14
2615      5            0xE2  \342   U+2174   15-20
2616      6            0x85  \205    (cont)  15-20
2617      7            0xB4  \264    (cont)  15-20
2618      8            0xE2  \342   U+2175   21-26
2619      9            0x85  \205    (cont)  21-26
2620      10           0xB5  \265    (cont)  21-26
2621      11           0x37         '7'      27
2622      12           0x38         '8'      28
2623      13           0x39         '9'      29
2624      14           0x00                  30 (closing quote)
2625      -----------  ----  -----  -------  ---------------.  */
2626 
2627   cpp_string dst_string;
2628   const enum cpp_ttype type = CPP_STRING;
2629   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2630 				      &dst_string, type);
2631   ASSERT_TRUE (result);
2632   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2633 		(const char *)dst_string.text);
2634   free (const_cast <unsigned char *> (dst_string.text));
2635 
2636   /* Verify ranges of individual characters.  This no longer includes the
2637      opening quote, but does include the closing quote.
2638      '01234'.  */
2639   for (int i = 0; i <= 4; i++)
2640     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2641   /* U+2174.  */
2642   for (int i = 5; i <= 7; i++)
2643     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2644   /* U+2175.  */
2645   for (int i = 8; i <= 10; i++)
2646     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2647   /* '789' and nul terminator  */
2648   for (int i = 11; i <= 14; i++)
2649     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2650 
2651   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2652 }
2653 
2654 /* Lex a string literal containing UCN 8 characters.
2655    Verify the substring location data after running cpp_interpret_string
2656    on it.  */
2657 
2658 static void
2659 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2660 {
2661   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2662      ....................000000000.111111.1111222222.2222333333333.344444
2663      ....................123456789.012345.6789012345.6789012345678.901234  */
2664   const char *content = "        \"01234\\U00002174\\U00002175789\" /* */\n";
2665   lexer_test test (case_, content, NULL);
2666 
2667   /* Verify that we get the expected token back, with the correct
2668      location information.  */
2669   const cpp_token *tok = test.get_token ();
2670   ASSERT_EQ (tok->type, CPP_STRING);
2671   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2672 			   "\"01234\\U00002174\\U00002175789\"");
2673 
2674   /* Verify that cpp_interpret_string works.
2675      The UTF-8 encoding of the string is identical to that from
2676      the ucn4 testcase above; the only difference is the column
2677      locations.  */
2678   cpp_string dst_string;
2679   const enum cpp_ttype type = CPP_STRING;
2680   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2681 				      &dst_string, type);
2682   ASSERT_TRUE (result);
2683   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2684 		(const char *)dst_string.text);
2685   free (const_cast <unsigned char *> (dst_string.text));
2686 
2687   /* Verify ranges of individual characters.  This no longer includes the
2688      opening quote, but does include the closing quote.
2689      '01234'.  */
2690   for (int i = 0; i <= 4; i++)
2691     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2692   /* U+2174.  */
2693   for (int i = 5; i <= 7; i++)
2694     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2695   /* U+2175.  */
2696   for (int i = 8; i <= 10; i++)
2697     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2698   /* '789' at columns 35-37  */
2699   for (int i = 11; i <= 13; i++)
2700     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2701   /* Closing quote/nul-terminator at column 38.  */
2702   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2703 
2704   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2705 }
2706 
2707 /* Fetch a big-endian 32-bit value and convert to host endianness.  */
2708 
2709 static uint32_t
2710 uint32_from_big_endian (const uint32_t *ptr_be_value)
2711 {
2712   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2713   return (((uint32_t) buf[0] << 24)
2714 	  | ((uint32_t) buf[1] << 16)
2715 	  | ((uint32_t) buf[2] << 8)
2716 	  | (uint32_t) buf[3]);
2717 }
2718 
2719 /* Lex a wide string literal and verify that attempts to read substring
2720    location data from it fail gracefully.  */
2721 
2722 static void
2723 test_lexer_string_locations_wide_string (const line_table_case &case_)
2724 {
2725   /* Digits 0-9.
2726      ....................000000000.11111111112.22222222233333
2727      ....................123456789.01234567890.12345678901234  */
2728   const char *content = "       L\"0123456789\" /* non-str */\n";
2729   lexer_test test (case_, content, NULL);
2730 
2731   /* Verify that we get the expected token back, with the correct
2732      location information.  */
2733   const cpp_token *tok = test.get_token ();
2734   ASSERT_EQ (tok->type, CPP_WSTRING);
2735   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2736 
2737   /* Verify that cpp_interpret_string works, using CPP_WSTRING.  */
2738   cpp_string dst_string;
2739   const enum cpp_ttype type = CPP_WSTRING;
2740   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2741 				      &dst_string, type);
2742   ASSERT_TRUE (result);
2743   /* The cpp_reader defaults to big-endian with
2744      CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2745      now be encoded as UTF-32BE.  */
2746   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2747   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2748   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2749   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2750   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2751   free (const_cast <unsigned char *> (dst_string.text));
2752 
2753   /* We don't yet support generating substring location information
2754      for L"" strings.  */
2755   ASSERT_HAS_NO_SUBSTRING_RANGES
2756     (test, tok->src_loc, type,
2757      "execution character set != source character set");
2758 }
2759 
2760 /* Fetch a big-endian 16-bit value and convert to host endianness.  */
2761 
2762 static uint16_t
2763 uint16_from_big_endian (const uint16_t *ptr_be_value)
2764 {
2765   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2766   return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2767 }
2768 
2769 /* Lex a u"" string literal and verify that attempts to read substring
2770    location data from it fail gracefully.  */
2771 
2772 static void
2773 test_lexer_string_locations_string16 (const line_table_case &case_)
2774 {
2775   /* Digits 0-9.
2776      ....................000000000.11111111112.22222222233333
2777      ....................123456789.01234567890.12345678901234  */
2778   const char *content = "       u\"0123456789\" /* non-str */\n";
2779   lexer_test test (case_, content, NULL);
2780 
2781   /* Verify that we get the expected token back, with the correct
2782      location information.  */
2783   const cpp_token *tok = test.get_token ();
2784   ASSERT_EQ (tok->type, CPP_STRING16);
2785   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2786 
2787   /* Verify that cpp_interpret_string works, using CPP_STRING16.  */
2788   cpp_string dst_string;
2789   const enum cpp_ttype type = CPP_STRING16;
2790   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2791 				      &dst_string, type);
2792   ASSERT_TRUE (result);
2793 
2794   /* The cpp_reader defaults to big-endian, so dst_string should
2795      now be encoded as UTF-16BE.  */
2796   const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2797   ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2798   ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2799   ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2800   ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2801   free (const_cast <unsigned char *> (dst_string.text));
2802 
2803   /* We don't yet support generating substring location information
2804      for L"" strings.  */
2805   ASSERT_HAS_NO_SUBSTRING_RANGES
2806     (test, tok->src_loc, type,
2807      "execution character set != source character set");
2808 }
2809 
2810 /* Lex a U"" string literal and verify that attempts to read substring
2811    location data from it fail gracefully.  */
2812 
2813 static void
2814 test_lexer_string_locations_string32 (const line_table_case &case_)
2815 {
2816   /* Digits 0-9.
2817      ....................000000000.11111111112.22222222233333
2818      ....................123456789.01234567890.12345678901234  */
2819   const char *content = "       U\"0123456789\" /* non-str */\n";
2820   lexer_test test (case_, content, NULL);
2821 
2822   /* Verify that we get the expected token back, with the correct
2823      location information.  */
2824   const cpp_token *tok = test.get_token ();
2825   ASSERT_EQ (tok->type, CPP_STRING32);
2826   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2827 
2828   /* Verify that cpp_interpret_string works, using CPP_STRING32.  */
2829   cpp_string dst_string;
2830   const enum cpp_ttype type = CPP_STRING32;
2831   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2832 				      &dst_string, type);
2833   ASSERT_TRUE (result);
2834 
2835   /* The cpp_reader defaults to big-endian, so dst_string should
2836      now be encoded as UTF-32BE.  */
2837   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2838   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2839   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2840   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2841   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2842   free (const_cast <unsigned char *> (dst_string.text));
2843 
2844   /* We don't yet support generating substring location information
2845      for L"" strings.  */
2846   ASSERT_HAS_NO_SUBSTRING_RANGES
2847     (test, tok->src_loc, type,
2848      "execution character set != source character set");
2849 }
2850 
2851 /* Lex a u8-string literal.
2852    Verify the substring location data after running cpp_interpret_string
2853    on it.  */
2854 
2855 static void
2856 test_lexer_string_locations_u8 (const line_table_case &case_)
2857 {
2858   /* Digits 0-9.
2859      ....................000000000.11111111112.22222222233333
2860      ....................123456789.01234567890.12345678901234  */
2861   const char *content = "      u8\"0123456789\" /* non-str */\n";
2862   lexer_test test (case_, content, NULL);
2863 
2864   /* Verify that we get the expected token back, with the correct
2865      location information.  */
2866   const cpp_token *tok = test.get_token ();
2867   ASSERT_EQ (tok->type, CPP_UTF8STRING);
2868   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2869 
2870   /* Verify that cpp_interpret_string works.  */
2871   cpp_string dst_string;
2872   const enum cpp_ttype type = CPP_STRING;
2873   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2874 				      &dst_string, type);
2875   ASSERT_TRUE (result);
2876   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2877   free (const_cast <unsigned char *> (dst_string.text));
2878 
2879   /* Verify ranges of individual characters.  This no longer includes the
2880      opening quote, but does include the closing quote.  */
2881   for (int i = 0; i <= 10; i++)
2882     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2883 }
2884 
2885 /* Lex a string literal containing UTF-8 source characters.
2886    Verify the substring location data after running cpp_interpret_string
2887    on it.  */
2888 
2889 static void
2890 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2891 {
2892  /* This string literal is written out to the source file as UTF-8,
2893     and is of the form "before mojibake after", where "mojibake"
2894     is written as the following four unicode code points:
2895        U+6587 CJK UNIFIED IDEOGRAPH-6587
2896        U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2897        U+5316 CJK UNIFIED IDEOGRAPH-5316
2898        U+3051 HIRAGANA LETTER KE.
2899      Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2900      "before" and "after" are 1 byte per unicode character.
2901 
2902      The numbering shown are "columns", which are *byte* numbers within
2903      the line, rather than unicode character numbers.
2904 
2905      .................... 000000000.1111111.
2906      .................... 123456789.0123456.  */
2907   const char *content = ("        \"before "
2908 			 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2909 			      UTF-8: 0xE6 0x96 0x87
2910 			      C octal escaped UTF-8: \346\226\207
2911 			    "column" numbers: 17-19.  */
2912 			 "\346\226\207"
2913 
2914 			 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2915 			      UTF-8: 0xE5 0xAD 0x97
2916 			      C octal escaped UTF-8: \345\255\227
2917 			    "column" numbers: 20-22.  */
2918 			 "\345\255\227"
2919 
2920 			 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2921 			      UTF-8: 0xE5 0x8C 0x96
2922 			      C octal escaped UTF-8: \345\214\226
2923 			    "column" numbers: 23-25.  */
2924 			 "\345\214\226"
2925 
2926 			 /* U+3051 HIRAGANA LETTER KE
2927 			      UTF-8: 0xE3 0x81 0x91
2928 			      C octal escaped UTF-8: \343\201\221
2929 			    "column" numbers: 26-28.  */
2930 			 "\343\201\221"
2931 
2932 			 /* column numbers 29 onwards
2933 			  2333333.33334444444444
2934 			  9012345.67890123456789. */
2935 			 " after\" /* non-str */\n");
2936   lexer_test test (case_, content, NULL);
2937 
2938   /* Verify that we get the expected token back, with the correct
2939      location information.  */
2940   const cpp_token *tok = test.get_token ();
2941   ASSERT_EQ (tok->type, CPP_STRING);
2942   ASSERT_TOKEN_AS_TEXT_EQ
2943     (test.m_parser, tok,
2944      "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
2945 
2946   /* Verify that cpp_interpret_string works.  */
2947   cpp_string dst_string;
2948   const enum cpp_ttype type = CPP_STRING;
2949   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2950 				      &dst_string, type);
2951   ASSERT_TRUE (result);
2952   ASSERT_STREQ
2953     ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
2954      (const char *)dst_string.text);
2955   free (const_cast <unsigned char *> (dst_string.text));
2956 
2957   /* Verify ranges of individual characters.  This no longer includes the
2958      opening quote, but does include the closing quote.
2959      Assuming that both source and execution encodings are UTF-8, we have
2960      a run of 25 octets in each, plus the NUL terminator.  */
2961   for (int i = 0; i < 25; i++)
2962     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2963   /* NUL-terminator should use the closing quote at column 35.  */
2964   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
2965 
2966   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
2967 }
2968 
2969 /* Test of string literal concatenation.  */
2970 
2971 static void
2972 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
2973 {
2974   /* Digits 0-9.
2975      .....................000000000.111111.11112222222222
2976      .....................123456789.012345.67890123456789.  */
2977   const char *content = ("        \"01234\" /* non-str */\n"
2978 			 "        \"56789\" /* non-str */\n");
2979   lexer_test test (case_, content, NULL);
2980 
2981   location_t input_locs[2];
2982 
2983   /* Verify that we get the expected tokens back.  */
2984   auto_vec <cpp_string> input_strings;
2985   const cpp_token *tok_a = test.get_token ();
2986   ASSERT_EQ (tok_a->type, CPP_STRING);
2987   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
2988   input_strings.safe_push (tok_a->val.str);
2989   input_locs[0] = tok_a->src_loc;
2990 
2991   const cpp_token *tok_b = test.get_token ();
2992   ASSERT_EQ (tok_b->type, CPP_STRING);
2993   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
2994   input_strings.safe_push (tok_b->val.str);
2995   input_locs[1] = tok_b->src_loc;
2996 
2997   /* Verify that cpp_interpret_string works.  */
2998   cpp_string dst_string;
2999   const enum cpp_ttype type = CPP_STRING;
3000   bool result = cpp_interpret_string (test.m_parser,
3001 				      input_strings.address (), 2,
3002 				      &dst_string, type);
3003   ASSERT_TRUE (result);
3004   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3005   free (const_cast <unsigned char *> (dst_string.text));
3006 
3007   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3008   test.m_concats.record_string_concatenation (2, input_locs);
3009 
3010   location_t initial_loc = input_locs[0];
3011 
3012   /* "01234" on line 1.  */
3013   for (int i = 0; i <= 4; i++)
3014     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3015   /* "56789" in line 2, plus its closing quote for the nul terminator.  */
3016   for (int i = 5; i <= 10; i++)
3017     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3018 
3019   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3020 }
3021 
3022 /* Another test of string literal concatenation.  */
3023 
3024 static void
3025 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3026 {
3027   /* Digits 0-9.
3028      .....................000000000.111.11111112222222
3029      .....................123456789.012.34567890123456.  */
3030   const char *content = ("        \"01\" /* non-str */\n"
3031 			 "        \"23\" /* non-str */\n"
3032 			 "        \"45\" /* non-str */\n"
3033 			 "        \"67\" /* non-str */\n"
3034 			 "        \"89\" /* non-str */\n");
3035   lexer_test test (case_, content, NULL);
3036 
3037   auto_vec <cpp_string> input_strings;
3038   location_t input_locs[5];
3039 
3040   /* Verify that we get the expected tokens back.  */
3041   for (int i = 0; i < 5; i++)
3042     {
3043       const cpp_token *tok = test.get_token ();
3044       ASSERT_EQ (tok->type, CPP_STRING);
3045       input_strings.safe_push (tok->val.str);
3046       input_locs[i] = tok->src_loc;
3047     }
3048 
3049   /* Verify that cpp_interpret_string works.  */
3050   cpp_string dst_string;
3051   const enum cpp_ttype type = CPP_STRING;
3052   bool result = cpp_interpret_string (test.m_parser,
3053 				      input_strings.address (), 5,
3054 				      &dst_string, type);
3055   ASSERT_TRUE (result);
3056   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3057   free (const_cast <unsigned char *> (dst_string.text));
3058 
3059   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3060   test.m_concats.record_string_concatenation (5, input_locs);
3061 
3062   location_t initial_loc = input_locs[0];
3063 
3064   /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3065      detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3066      and expect get_source_range_for_substring to fail.
3067      However, for a string concatenation test, we can have a case
3068      where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3069      but subsequent strings can be after it.
3070      Attempting to detect this within assert_char_at_range
3071      would overcomplicate the logic for the common test cases, so
3072      we detect it here.  */
3073   if (should_have_column_data_p (input_locs[0])
3074       && !should_have_column_data_p (input_locs[4]))
3075     {
3076       /* Verify that get_source_range_for_substring gracefully rejects
3077 	 this case.  */
3078       source_range actual_range;
3079       const char *err
3080 	= get_source_range_for_char (test.m_parser, &test.m_concats,
3081 				     initial_loc, type, 0, &actual_range);
3082       ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3083       return;
3084     }
3085 
3086   for (int i = 0; i < 5; i++)
3087     for (int j = 0; j < 2; j++)
3088       ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3089 			    i + 1, 10 + j, 10 + j);
3090 
3091   /* NUL-terminator should use the final closing quote at line 5 column 12.  */
3092   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3093 
3094   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3095 }
3096 
3097 /* Another test of string literal concatenation, this time combined with
3098    various kinds of escaped characters.  */
3099 
3100 static void
3101 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3102 {
3103   /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3104      digit 6 in ASCII as octal "\066", concatenating multiple strings.  */
3105   const char *content
3106     /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3107        .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3108     = ("        \"01234\"  \"\\x35\"  \"\\066\"  \"789\" /* non-str */\n");
3109   lexer_test test (case_, content, NULL);
3110 
3111   auto_vec <cpp_string> input_strings;
3112   location_t input_locs[4];
3113 
3114   /* Verify that we get the expected tokens back.  */
3115   for (int i = 0; i < 4; i++)
3116     {
3117       const cpp_token *tok = test.get_token ();
3118       ASSERT_EQ (tok->type, CPP_STRING);
3119       input_strings.safe_push (tok->val.str);
3120       input_locs[i] = tok->src_loc;
3121     }
3122 
3123   /* Verify that cpp_interpret_string works.  */
3124   cpp_string dst_string;
3125   const enum cpp_ttype type = CPP_STRING;
3126   bool result = cpp_interpret_string (test.m_parser,
3127 				      input_strings.address (), 4,
3128 				      &dst_string, type);
3129   ASSERT_TRUE (result);
3130   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3131   free (const_cast <unsigned char *> (dst_string.text));
3132 
3133   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3134   test.m_concats.record_string_concatenation (4, input_locs);
3135 
3136   location_t initial_loc = input_locs[0];
3137 
3138   for (int i = 0; i <= 4; i++)
3139     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3140   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3141   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3142   for (int i = 7; i <= 9; i++)
3143     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3144 
3145   /* NUL-terminator should use the location of the final closing quote.  */
3146   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3147 
3148   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3149 }
3150 
3151 /* Test of string literal in a macro.  */
3152 
3153 static void
3154 test_lexer_string_locations_macro (const line_table_case &case_)
3155 {
3156   /* Digits 0-9.
3157      .....................0000000001111111111.22222222223.
3158      .....................1234567890123456789.01234567890.  */
3159   const char *content = ("#define MACRO     \"0123456789\" /* non-str */\n"
3160 			 "  MACRO");
3161   lexer_test test (case_, content, NULL);
3162 
3163   /* Verify that we get the expected tokens back.  */
3164   const cpp_token *tok = test.get_token ();
3165   ASSERT_EQ (tok->type, CPP_PADDING);
3166 
3167   tok = test.get_token ();
3168   ASSERT_EQ (tok->type, CPP_STRING);
3169   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3170 
3171   /* Verify ranges of individual characters.  We ought to
3172      see columns within the macro definition.  */
3173   for (int i = 0; i <= 10; i++)
3174     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3175 			  i, 1, 20 + i, 20 + i);
3176 
3177   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3178 
3179   tok = test.get_token ();
3180   ASSERT_EQ (tok->type, CPP_PADDING);
3181 }
3182 
3183 /* Test of stringification of a macro argument.  */
3184 
3185 static void
3186 test_lexer_string_locations_stringified_macro_argument
3187   (const line_table_case &case_)
3188 {
3189   /* .....................000000000111111111122222222223.
3190      .....................123456789012345678901234567890.  */
3191   const char *content = ("#define MACRO(X) #X /* non-str */\n"
3192 			 "MACRO(foo)\n");
3193   lexer_test test (case_, content, NULL);
3194 
3195   /* Verify that we get the expected token back.  */
3196   const cpp_token *tok = test.get_token ();
3197   ASSERT_EQ (tok->type, CPP_PADDING);
3198 
3199   tok = test.get_token ();
3200   ASSERT_EQ (tok->type, CPP_STRING);
3201   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3202 
3203   /* We don't support getting the location of a stringified macro
3204      argument.  Verify that it fails gracefully.  */
3205   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3206 				  "cpp_interpret_string_1 failed");
3207 
3208   tok = test.get_token ();
3209   ASSERT_EQ (tok->type, CPP_PADDING);
3210 
3211   tok = test.get_token ();
3212   ASSERT_EQ (tok->type, CPP_PADDING);
3213 }
3214 
3215 /* Ensure that we are fail gracefully if something attempts to pass
3216    in a location that isn't a string literal token.  Seen on this code:
3217 
3218      const char a[] = " %d ";
3219      __builtin_printf (a, 0.5);
3220                        ^
3221 
3222    when c-format.c erroneously used the indicated one-character
3223    location as the format string location, leading to a read past the
3224    end of a string buffer in cpp_interpret_string_1.  */
3225 
3226 static void
3227 test_lexer_string_locations_non_string (const line_table_case &case_)
3228 {
3229   /* .....................000000000111111111122222222223.
3230      .....................123456789012345678901234567890.  */
3231   const char *content = ("         a\n");
3232   lexer_test test (case_, content, NULL);
3233 
3234   /* Verify that we get the expected token back.  */
3235   const cpp_token *tok = test.get_token ();
3236   ASSERT_EQ (tok->type, CPP_NAME);
3237   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3238 
3239   /* At this point, libcpp is attempting to interpret the name as a
3240      string literal, despite it not starting with a quote.  We don't detect
3241      that, but we should at least fail gracefully.  */
3242   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3243 				  "cpp_interpret_string_1 failed");
3244 }
3245 
3246 /* Ensure that we can read substring information for a token which
3247    starts in one linemap and ends in another .  Adapted from
3248    gcc.dg/cpp/pr69985.c.  */
3249 
3250 static void
3251 test_lexer_string_locations_long_line (const line_table_case &case_)
3252 {
3253   /* .....................000000.000111111111
3254      .....................123456.789012346789.  */
3255   const char *content = ("/* A very long line, so that we start a new line map.  */\n"
3256 			 "     \"0123456789012345678901234567890123456789"
3257 			 "0123456789012345678901234567890123456789"
3258 			 "0123456789012345678901234567890123456789"
3259 			 "0123456789\"\n");
3260 
3261   lexer_test test (case_, content, NULL);
3262 
3263   /* Verify that we get the expected token back.  */
3264   const cpp_token *tok = test.get_token ();
3265   ASSERT_EQ (tok->type, CPP_STRING);
3266 
3267   if (!should_have_column_data_p (line_table->highest_location))
3268     return;
3269 
3270   /* Verify ranges of individual characters.  */
3271   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3272   for (int i = 0; i < 131; i++)
3273     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3274 			  i, 2, 7 + i, 7 + i);
3275 }
3276 
3277 /* Test of locations within a raw string that doesn't contain a newline.  */
3278 
3279 static void
3280 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3281 {
3282   /* .....................00.0000000111111111122.
3283      .....................12.3456789012345678901.  */
3284   const char *content = ("R\"foo(0123456789)foo\"\n");
3285   lexer_test test (case_, content, NULL);
3286 
3287   /* Verify that we get the expected token back.  */
3288   const cpp_token *tok = test.get_token ();
3289   ASSERT_EQ (tok->type, CPP_STRING);
3290 
3291   /* Verify that cpp_interpret_string works.  */
3292   cpp_string dst_string;
3293   const enum cpp_ttype type = CPP_STRING;
3294   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3295 				      &dst_string, type);
3296   ASSERT_TRUE (result);
3297   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3298   free (const_cast <unsigned char *> (dst_string.text));
3299 
3300   if (!should_have_column_data_p (line_table->highest_location))
3301     return;
3302 
3303   /* 0-9, plus the nil terminator.  */
3304   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3305   for (int i = 0; i < 11; i++)
3306     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3307 			  i, 1, 7 + i, 7 + i);
3308 }
3309 
3310 /* Test of locations within a raw string that contains a newline.  */
3311 
3312 static void
3313 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3314 {
3315   /* .....................00.0000.
3316      .....................12.3456.  */
3317   const char *content = ("R\"foo(\n"
3318   /* .....................00000.
3319      .....................12345.  */
3320 			 "hello\n"
3321 			 "world\n"
3322   /* .....................00000.
3323      .....................12345.  */
3324 			 ")foo\"\n");
3325   lexer_test test (case_, content, NULL);
3326 
3327   /* Verify that we get the expected token back.  */
3328   const cpp_token *tok = test.get_token ();
3329   ASSERT_EQ (tok->type, CPP_STRING);
3330 
3331   /* Verify that cpp_interpret_string works.  */
3332   cpp_string dst_string;
3333   const enum cpp_ttype type = CPP_STRING;
3334   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3335 				      &dst_string, type);
3336   ASSERT_TRUE (result);
3337   ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3338   free (const_cast <unsigned char *> (dst_string.text));
3339 
3340   if (!should_have_column_data_p (line_table->highest_location))
3341     return;
3342 
3343   /* Currently we don't support locations within raw strings that
3344      contain newlines.  */
3345   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3346 				  "range endpoints are on different lines");
3347 }
3348 
3349 /* Test of parsing an unterminated raw string.  */
3350 
3351 static void
3352 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3353 {
3354   const char *content = "R\"ouch()ouCh\" /* etc */";
3355 
3356   lexer_error_sink errors;
3357   lexer_test test (case_, content, &errors);
3358   test.m_implicitly_expect_EOF = false;
3359 
3360   /* Attempt to parse the raw string.  */
3361   const cpp_token *tok = test.get_token ();
3362   ASSERT_EQ (tok->type, CPP_EOF);
3363 
3364   ASSERT_EQ (1, errors.m_errors.length ());
3365   /* We expect the message "unterminated raw string"
3366      in the "cpplib" translation domain.
3367      It's not clear that dgettext is available on all supported hosts,
3368      so this assertion is commented-out for now.
3369        ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3370                      errors.m_errors[0]);
3371   */
3372 }
3373 
3374 /* Test of lexing char constants.  */
3375 
3376 static void
3377 test_lexer_char_constants (const line_table_case &case_)
3378 {
3379   /* Various char constants.
3380      .....................0000000001111111111.22222222223.
3381      .....................1234567890123456789.01234567890.  */
3382   const char *content = ("         'a'\n"
3383 			 "        u'a'\n"
3384 			 "        U'a'\n"
3385 			 "        L'a'\n"
3386 			 "         'abc'\n");
3387   lexer_test test (case_, content, NULL);
3388 
3389   /* Verify that we get the expected tokens back.  */
3390   /* 'a'.  */
3391   const cpp_token *tok = test.get_token ();
3392   ASSERT_EQ (tok->type, CPP_CHAR);
3393   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3394 
3395   unsigned int chars_seen;
3396   int unsignedp;
3397   cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3398 					  &chars_seen, &unsignedp);
3399   ASSERT_EQ (cc, 'a');
3400   ASSERT_EQ (chars_seen, 1);
3401 
3402   /* u'a'.  */
3403   tok = test.get_token ();
3404   ASSERT_EQ (tok->type, CPP_CHAR16);
3405   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3406 
3407   /* U'a'.  */
3408   tok = test.get_token ();
3409   ASSERT_EQ (tok->type, CPP_CHAR32);
3410   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3411 
3412   /* L'a'.  */
3413   tok = test.get_token ();
3414   ASSERT_EQ (tok->type, CPP_WCHAR);
3415   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3416 
3417   /* 'abc' (c-char-sequence).  */
3418   tok = test.get_token ();
3419   ASSERT_EQ (tok->type, CPP_CHAR);
3420   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3421 }
3422 /* A table of interesting location_t values, giving one axis of our test
3423    matrix.  */
3424 
3425 static const location_t boundary_locations[] = {
3426   /* Zero means "don't override the default values for a new line_table".  */
3427   0,
3428 
3429   /* An arbitrary non-zero value that isn't close to one of
3430      the boundary values below.  */
3431   0x10000,
3432 
3433   /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES.  */
3434   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3435   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3436   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3437   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3438   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3439 
3440   /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS.  */
3441   LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3442   LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3443   LINE_MAP_MAX_LOCATION_WITH_COLS,
3444   LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3445   LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3446 };
3447 
3448 /* Run TESTCASE multiple times, once for each case in our test matrix.  */
3449 
3450 void
3451 for_each_line_table_case (void (*testcase) (const line_table_case &))
3452 {
3453   /* As noted above in the description of struct line_table_case,
3454      we want to explore a test matrix of interesting line_table
3455      situations, running various selftests for each case within the
3456      matrix.  */
3457 
3458   /* Run all tests with:
3459      (a) line_table->default_range_bits == 0, and
3460      (b) line_table->default_range_bits == 5.  */
3461   int num_cases_tested = 0;
3462   for (int default_range_bits = 0; default_range_bits <= 5;
3463        default_range_bits += 5)
3464     {
3465       /* ...and use each of the "interesting" location values as
3466 	 the starting location within line_table.  */
3467       const int num_boundary_locations
3468 	= sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3469       for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3470 	{
3471 	  line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3472 
3473 	  testcase (c);
3474 
3475 	  num_cases_tested++;
3476 	}
3477     }
3478 
3479   /* Verify that we fully covered the test matrix.  */
3480   ASSERT_EQ (num_cases_tested, 2 * 12);
3481 }
3482 
3483 /* Run all of the selftests within this file.  */
3484 
3485 void
3486 input_c_tests ()
3487 {
3488   test_should_have_column_data_p ();
3489   test_unknown_location ();
3490   test_builtins ();
3491   for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3492 
3493   for_each_line_table_case (test_accessing_ordinary_linemaps);
3494   for_each_line_table_case (test_lexer);
3495   for_each_line_table_case (test_lexer_string_locations_simple);
3496   for_each_line_table_case (test_lexer_string_locations_ebcdic);
3497   for_each_line_table_case (test_lexer_string_locations_hex);
3498   for_each_line_table_case (test_lexer_string_locations_oct);
3499   for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3500   for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3501   for_each_line_table_case (test_lexer_string_locations_ucn4);
3502   for_each_line_table_case (test_lexer_string_locations_ucn8);
3503   for_each_line_table_case (test_lexer_string_locations_wide_string);
3504   for_each_line_table_case (test_lexer_string_locations_string16);
3505   for_each_line_table_case (test_lexer_string_locations_string32);
3506   for_each_line_table_case (test_lexer_string_locations_u8);
3507   for_each_line_table_case (test_lexer_string_locations_utf8_source);
3508   for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3509   for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3510   for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3511   for_each_line_table_case (test_lexer_string_locations_macro);
3512   for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3513   for_each_line_table_case (test_lexer_string_locations_non_string);
3514   for_each_line_table_case (test_lexer_string_locations_long_line);
3515   for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3516   for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3517   for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3518   for_each_line_table_case (test_lexer_char_constants);
3519 
3520   test_reading_source_line ();
3521 }
3522 
3523 } // namespace selftest
3524 
3525 #endif /* CHECKING_P */
3526