xref: /dflybsd-src/contrib/gcc-8.0/gcc/input.c (revision 95059079af47f9a66a175f374f2da1a5020e3255)
138fd1498Szrj /* Data and functions related to line maps and input files.
238fd1498Szrj    Copyright (C) 2004-2018 Free Software Foundation, Inc.
338fd1498Szrj 
438fd1498Szrj This file is part of GCC.
538fd1498Szrj 
638fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
738fd1498Szrj the terms of the GNU General Public License as published by the Free
838fd1498Szrj Software Foundation; either version 3, or (at your option) any later
938fd1498Szrj version.
1038fd1498Szrj 
1138fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
1238fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
1338fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1438fd1498Szrj for more details.
1538fd1498Szrj 
1638fd1498Szrj You should have received a copy of the GNU General Public License
1738fd1498Szrj along with GCC; see the file COPYING3.  If not see
1838fd1498Szrj <http://www.gnu.org/licenses/>.  */
1938fd1498Szrj 
2038fd1498Szrj #include "config.h"
2138fd1498Szrj #include "system.h"
2238fd1498Szrj #include "coretypes.h"
2338fd1498Szrj #include "intl.h"
2438fd1498Szrj #include "diagnostic-core.h"
2538fd1498Szrj #include "selftest.h"
2638fd1498Szrj #include "cpplib.h"
2738fd1498Szrj 
2838fd1498Szrj #ifndef HAVE_ICONV
2938fd1498Szrj #define HAVE_ICONV 0
3038fd1498Szrj #endif
3138fd1498Szrj 
3238fd1498Szrj /* This is a cache used by get_next_line to store the content of a
3338fd1498Szrj    file to be searched for file lines.  */
3438fd1498Szrj struct fcache
3538fd1498Szrj {
3638fd1498Szrj   /* These are information used to store a line boundary.  */
3738fd1498Szrj   struct line_info
3838fd1498Szrj   {
3938fd1498Szrj     /* The line number.  It starts from 1.  */
4038fd1498Szrj     size_t line_num;
4138fd1498Szrj 
4238fd1498Szrj     /* The position (byte count) of the beginning of the line,
4338fd1498Szrj        relative to the file data pointer.  This starts at zero.  */
4438fd1498Szrj     size_t start_pos;
4538fd1498Szrj 
4638fd1498Szrj     /* The position (byte count) of the last byte of the line.  This
4738fd1498Szrj        normally points to the '\n' character, or to one byte after the
4838fd1498Szrj        last byte of the file, if the file doesn't contain a '\n'
4938fd1498Szrj        character.  */
5038fd1498Szrj     size_t end_pos;
5138fd1498Szrj 
line_infofcache::line_info5238fd1498Szrj     line_info (size_t l, size_t s, size_t e)
5338fd1498Szrj       : line_num (l), start_pos (s), end_pos (e)
5438fd1498Szrj     {}
5538fd1498Szrj 
line_infofcache::line_info5638fd1498Szrj     line_info ()
5738fd1498Szrj       :line_num (0), start_pos (0), end_pos (0)
5838fd1498Szrj     {}
5938fd1498Szrj   };
6038fd1498Szrj 
6138fd1498Szrj   /* The number of time this file has been accessed.  This is used
6238fd1498Szrj      to designate which file cache to evict from the cache
6338fd1498Szrj      array.  */
6438fd1498Szrj   unsigned use_count;
6538fd1498Szrj 
6638fd1498Szrj   /* The file_path is the key for identifying a particular file in
6738fd1498Szrj      the cache.
6838fd1498Szrj      For libcpp-using code, the underlying buffer for this field is
6938fd1498Szrj      owned by the corresponding _cpp_file within the cpp_reader.  */
7038fd1498Szrj   const char *file_path;
7138fd1498Szrj 
7238fd1498Szrj   FILE *fp;
7338fd1498Szrj 
7438fd1498Szrj   /* This points to the content of the file that we've read so
7538fd1498Szrj      far.  */
7638fd1498Szrj   char *data;
7738fd1498Szrj 
7838fd1498Szrj   /*  The size of the DATA array above.*/
7938fd1498Szrj   size_t size;
8038fd1498Szrj 
8138fd1498Szrj   /* The number of bytes read from the underlying file so far.  This
8238fd1498Szrj      must be less (or equal) than SIZE above.  */
8338fd1498Szrj   size_t nb_read;
8438fd1498Szrj 
8538fd1498Szrj   /* The index of the beginning of the current line.  */
8638fd1498Szrj   size_t line_start_idx;
8738fd1498Szrj 
8838fd1498Szrj   /* The number of the previous line read.  This starts at 1.  Zero
8938fd1498Szrj      means we've read no line so far.  */
9038fd1498Szrj   size_t line_num;
9138fd1498Szrj 
9238fd1498Szrj   /* This is the total number of lines of the current file.  At the
9338fd1498Szrj      moment, we try to get this information from the line map
9438fd1498Szrj      subsystem.  Note that this is just a hint.  When using the C++
9538fd1498Szrj      front-end, this hint is correct because the input file is then
9638fd1498Szrj      completely tokenized before parsing starts; so the line map knows
9738fd1498Szrj      the number of lines before compilation really starts.  For e.g,
9838fd1498Szrj      the C front-end, it can happen that we start emitting diagnostics
9938fd1498Szrj      before the line map has seen the end of the file.  */
10038fd1498Szrj   size_t total_lines;
10138fd1498Szrj 
10238fd1498Szrj   /* Could this file be missing a trailing newline on its final line?
10338fd1498Szrj      Initially true (to cope with empty files), set to true/false
10438fd1498Szrj      as each line is read.  */
10538fd1498Szrj   bool missing_trailing_newline;
10638fd1498Szrj 
10738fd1498Szrj   /* This is a record of the beginning and end of the lines we've seen
10838fd1498Szrj      while reading the file.  This is useful to avoid walking the data
10938fd1498Szrj      from the beginning when we are asked to read a line that is
11038fd1498Szrj      before LINE_START_IDX above.  Note that the maximum size of this
11138fd1498Szrj      record is fcache_line_record_size, so that the memory consumption
11238fd1498Szrj      doesn't explode.  We thus scale total_lines down to
11338fd1498Szrj      fcache_line_record_size.  */
11438fd1498Szrj   vec<line_info, va_heap> line_record;
11538fd1498Szrj 
11638fd1498Szrj   fcache ();
11738fd1498Szrj   ~fcache ();
11838fd1498Szrj };
11938fd1498Szrj 
12038fd1498Szrj /* Current position in real source file.  */
12138fd1498Szrj 
12238fd1498Szrj location_t input_location = UNKNOWN_LOCATION;
12338fd1498Szrj 
12438fd1498Szrj struct line_maps *line_table;
12538fd1498Szrj 
12638fd1498Szrj /* A stashed copy of "line_table" for use by selftest::line_table_test.
12738fd1498Szrj    This needs to be a global so that it can be a GC root, and thus
12838fd1498Szrj    prevent the stashed copy from being garbage-collected if the GC runs
12938fd1498Szrj    during a line_table_test.  */
13038fd1498Szrj 
13138fd1498Szrj struct line_maps *saved_line_table;
13238fd1498Szrj 
13338fd1498Szrj static fcache *fcache_tab;
13438fd1498Szrj static const size_t fcache_tab_size = 16;
13538fd1498Szrj static const size_t fcache_buffer_size = 4 * 1024;
13638fd1498Szrj static const size_t fcache_line_record_size = 100;
13738fd1498Szrj 
13838fd1498Szrj /* Expand the source location LOC into a human readable location.  If
13938fd1498Szrj    LOC resolves to a builtin location, the file name of the readable
14038fd1498Szrj    location is set to the string "<built-in>". If EXPANSION_POINT_P is
14138fd1498Szrj    TRUE and LOC is virtual, then it is resolved to the expansion
14238fd1498Szrj    point of the involved macro.  Otherwise, it is resolved to the
14338fd1498Szrj    spelling location of the token.
14438fd1498Szrj 
14538fd1498Szrj    When resolving to the spelling location of the token, if the
14638fd1498Szrj    resulting location is for a built-in location (that is, it has no
14738fd1498Szrj    associated line/column) in the context of a macro expansion, the
14838fd1498Szrj    returned location is the first one (while unwinding the macro
14938fd1498Szrj    location towards its expansion point) that is in real source
15038fd1498Szrj    code.
15138fd1498Szrj 
15238fd1498Szrj    ASPECT controls which part of the location to use.  */
15338fd1498Szrj 
15438fd1498Szrj static expanded_location
expand_location_1(source_location loc,bool expansion_point_p,enum location_aspect aspect)15538fd1498Szrj expand_location_1 (source_location loc,
15638fd1498Szrj 		   bool expansion_point_p,
15738fd1498Szrj 		   enum location_aspect aspect)
15838fd1498Szrj {
15938fd1498Szrj   expanded_location xloc;
16038fd1498Szrj   const line_map_ordinary *map;
16138fd1498Szrj   enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
16238fd1498Szrj   tree block = NULL;
16338fd1498Szrj 
16438fd1498Szrj   if (IS_ADHOC_LOC (loc))
16538fd1498Szrj     {
16638fd1498Szrj       block = LOCATION_BLOCK (loc);
16738fd1498Szrj       loc = LOCATION_LOCUS (loc);
16838fd1498Szrj     }
16938fd1498Szrj 
17038fd1498Szrj   memset (&xloc, 0, sizeof (xloc));
17138fd1498Szrj 
17238fd1498Szrj   if (loc >= RESERVED_LOCATION_COUNT)
17338fd1498Szrj     {
17438fd1498Szrj       if (!expansion_point_p)
17538fd1498Szrj 	{
17638fd1498Szrj 	  /* We want to resolve LOC to its spelling location.
17738fd1498Szrj 
17838fd1498Szrj 	     But if that spelling location is a reserved location that
17938fd1498Szrj 	     appears in the context of a macro expansion (like for a
18038fd1498Szrj 	     location for a built-in token), let's consider the first
18138fd1498Szrj 	     location (toward the expansion point) that is not reserved;
18238fd1498Szrj 	     that is, the first location that is in real source code.  */
18338fd1498Szrj 	  loc = linemap_unwind_to_first_non_reserved_loc (line_table,
18438fd1498Szrj 							  loc, NULL);
18538fd1498Szrj 	  lrk = LRK_SPELLING_LOCATION;
18638fd1498Szrj 	}
18738fd1498Szrj       loc = linemap_resolve_location (line_table, loc, lrk, &map);
18838fd1498Szrj 
18938fd1498Szrj       /* loc is now either in an ordinary map, or is a reserved location.
19038fd1498Szrj 	 If it is a compound location, the caret is in a spelling location,
19138fd1498Szrj 	 but the start/finish might still be a virtual location.
19238fd1498Szrj 	 Depending of what the caller asked for, we may need to recurse
19338fd1498Szrj 	 one level in order to resolve any virtual locations in the
19438fd1498Szrj 	 end-points.  */
19538fd1498Szrj       switch (aspect)
19638fd1498Szrj 	{
19738fd1498Szrj 	default:
19838fd1498Szrj 	  gcc_unreachable ();
19938fd1498Szrj 	  /* Fall through.  */
20038fd1498Szrj 	case LOCATION_ASPECT_CARET:
20138fd1498Szrj 	  break;
20238fd1498Szrj 	case LOCATION_ASPECT_START:
20338fd1498Szrj 	  {
20438fd1498Szrj 	    source_location start = get_start (loc);
20538fd1498Szrj 	    if (start != loc)
20638fd1498Szrj 	      return expand_location_1 (start, expansion_point_p, aspect);
20738fd1498Szrj 	  }
20838fd1498Szrj 	  break;
20938fd1498Szrj 	case LOCATION_ASPECT_FINISH:
21038fd1498Szrj 	  {
21138fd1498Szrj 	    source_location finish = get_finish (loc);
21238fd1498Szrj 	    if (finish != loc)
21338fd1498Szrj 	      return expand_location_1 (finish, expansion_point_p, aspect);
21438fd1498Szrj 	  }
21538fd1498Szrj 	  break;
21638fd1498Szrj 	}
21738fd1498Szrj       xloc = linemap_expand_location (line_table, map, loc);
21838fd1498Szrj     }
21938fd1498Szrj 
22038fd1498Szrj   xloc.data = block;
22138fd1498Szrj   if (loc <= BUILTINS_LOCATION)
22238fd1498Szrj     xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
22338fd1498Szrj 
22438fd1498Szrj   return xloc;
22538fd1498Szrj }
22638fd1498Szrj 
22738fd1498Szrj /* Initialize the set of cache used for files accessed by caret
22838fd1498Szrj    diagnostic.  */
22938fd1498Szrj 
23038fd1498Szrj static void
diagnostic_file_cache_init(void)23138fd1498Szrj diagnostic_file_cache_init (void)
23238fd1498Szrj {
23338fd1498Szrj   if (fcache_tab == NULL)
23438fd1498Szrj     fcache_tab = new fcache[fcache_tab_size];
23538fd1498Szrj }
23638fd1498Szrj 
23738fd1498Szrj /* Free the resources used by the set of cache used for files accessed
23838fd1498Szrj    by caret diagnostic.  */
23938fd1498Szrj 
24038fd1498Szrj void
diagnostic_file_cache_fini(void)24138fd1498Szrj diagnostic_file_cache_fini (void)
24238fd1498Szrj {
24338fd1498Szrj   if (fcache_tab)
24438fd1498Szrj     {
24538fd1498Szrj       delete [] (fcache_tab);
24638fd1498Szrj       fcache_tab = NULL;
24738fd1498Szrj     }
24838fd1498Szrj }
24938fd1498Szrj 
25038fd1498Szrj /* Return the total lines number that have been read so far by the
25138fd1498Szrj    line map (in the preprocessor) so far.  For languages like C++ that
25238fd1498Szrj    entirely preprocess the input file before starting to parse, this
25338fd1498Szrj    equals the actual number of lines of the file.  */
25438fd1498Szrj 
25538fd1498Szrj static size_t
total_lines_num(const char * file_path)25638fd1498Szrj total_lines_num (const char *file_path)
25738fd1498Szrj {
25838fd1498Szrj   size_t r = 0;
25938fd1498Szrj   source_location l = 0;
26038fd1498Szrj   if (linemap_get_file_highest_location (line_table, file_path, &l))
26138fd1498Szrj     {
26238fd1498Szrj       gcc_assert (l >= RESERVED_LOCATION_COUNT);
26338fd1498Szrj       expanded_location xloc = expand_location (l);
26438fd1498Szrj       r = xloc.line;
26538fd1498Szrj     }
26638fd1498Szrj   return r;
26738fd1498Szrj }
26838fd1498Szrj 
26938fd1498Szrj /* Lookup the cache used for the content of a given file accessed by
27038fd1498Szrj    caret diagnostic.  Return the found cached file, or NULL if no
27138fd1498Szrj    cached file was found.  */
27238fd1498Szrj 
27338fd1498Szrj static fcache*
lookup_file_in_cache_tab(const char * file_path)27438fd1498Szrj lookup_file_in_cache_tab (const char *file_path)
27538fd1498Szrj {
27638fd1498Szrj   if (file_path == NULL)
27738fd1498Szrj     return NULL;
27838fd1498Szrj 
27938fd1498Szrj   diagnostic_file_cache_init ();
28038fd1498Szrj 
28138fd1498Szrj   /* This will contain the found cached file.  */
28238fd1498Szrj   fcache *r = NULL;
28338fd1498Szrj   for (unsigned i = 0; i < fcache_tab_size; ++i)
28438fd1498Szrj     {
28538fd1498Szrj       fcache *c = &fcache_tab[i];
28638fd1498Szrj       if (c->file_path && !strcmp (c->file_path, file_path))
28738fd1498Szrj 	{
28838fd1498Szrj 	  ++c->use_count;
28938fd1498Szrj 	  r = c;
29038fd1498Szrj 	}
29138fd1498Szrj     }
29238fd1498Szrj 
29338fd1498Szrj   if (r)
29438fd1498Szrj     ++r->use_count;
29538fd1498Szrj 
29638fd1498Szrj   return r;
29738fd1498Szrj }
29838fd1498Szrj 
29938fd1498Szrj /* Purge any mention of FILENAME from the cache of files used for
30038fd1498Szrj    printing source code.  For use in selftests when working
30138fd1498Szrj    with tempfiles.  */
30238fd1498Szrj 
30338fd1498Szrj void
diagnostics_file_cache_forcibly_evict_file(const char * file_path)30438fd1498Szrj diagnostics_file_cache_forcibly_evict_file (const char *file_path)
30538fd1498Szrj {
30638fd1498Szrj   gcc_assert (file_path);
30738fd1498Szrj 
30838fd1498Szrj   fcache *r = lookup_file_in_cache_tab (file_path);
30938fd1498Szrj   if (!r)
31038fd1498Szrj     /* Not found.  */
31138fd1498Szrj     return;
31238fd1498Szrj 
31338fd1498Szrj   r->file_path = NULL;
31438fd1498Szrj   if (r->fp)
31538fd1498Szrj     fclose (r->fp);
31638fd1498Szrj   r->fp = NULL;
31738fd1498Szrj   r->nb_read = 0;
31838fd1498Szrj   r->line_start_idx = 0;
31938fd1498Szrj   r->line_num = 0;
32038fd1498Szrj   r->line_record.truncate (0);
32138fd1498Szrj   r->use_count = 0;
32238fd1498Szrj   r->total_lines = 0;
32338fd1498Szrj   r->missing_trailing_newline = true;
32438fd1498Szrj }
32538fd1498Szrj 
32638fd1498Szrj /* Return the file cache that has been less used, recently, or the
32738fd1498Szrj    first empty one.  If HIGHEST_USE_COUNT is non-null,
32838fd1498Szrj    *HIGHEST_USE_COUNT is set to the highest use count of the entries
32938fd1498Szrj    in the cache table.  */
33038fd1498Szrj 
33138fd1498Szrj static fcache*
evicted_cache_tab_entry(unsigned * highest_use_count)33238fd1498Szrj evicted_cache_tab_entry (unsigned *highest_use_count)
33338fd1498Szrj {
33438fd1498Szrj   diagnostic_file_cache_init ();
33538fd1498Szrj 
33638fd1498Szrj   fcache *to_evict = &fcache_tab[0];
33738fd1498Szrj   unsigned huc = to_evict->use_count;
33838fd1498Szrj   for (unsigned i = 1; i < fcache_tab_size; ++i)
33938fd1498Szrj     {
34038fd1498Szrj       fcache *c = &fcache_tab[i];
34138fd1498Szrj       bool c_is_empty = (c->file_path == NULL);
34238fd1498Szrj 
34338fd1498Szrj       if (c->use_count < to_evict->use_count
34438fd1498Szrj 	  || (to_evict->file_path && c_is_empty))
34538fd1498Szrj 	/* We evict C because it's either an entry with a lower use
34638fd1498Szrj 	   count or one that is empty.  */
34738fd1498Szrj 	to_evict = c;
34838fd1498Szrj 
34938fd1498Szrj       if (huc < c->use_count)
35038fd1498Szrj 	huc = c->use_count;
35138fd1498Szrj 
35238fd1498Szrj       if (c_is_empty)
35338fd1498Szrj 	/* We've reached the end of the cache; subsequent elements are
35438fd1498Szrj 	   all empty.  */
35538fd1498Szrj 	break;
35638fd1498Szrj     }
35738fd1498Szrj 
35838fd1498Szrj   if (highest_use_count)
35938fd1498Szrj     *highest_use_count = huc;
36038fd1498Szrj 
36138fd1498Szrj   return to_evict;
36238fd1498Szrj }
36338fd1498Szrj 
36438fd1498Szrj /* Create the cache used for the content of a given file to be
36538fd1498Szrj    accessed by caret diagnostic.  This cache is added to an array of
36638fd1498Szrj    cache and can be retrieved by lookup_file_in_cache_tab.  This
36738fd1498Szrj    function returns the created cache.  Note that only the last
36838fd1498Szrj    fcache_tab_size files are cached.  */
36938fd1498Szrj 
37038fd1498Szrj static fcache*
add_file_to_cache_tab(const char * file_path)37138fd1498Szrj add_file_to_cache_tab (const char *file_path)
37238fd1498Szrj {
37338fd1498Szrj 
37438fd1498Szrj   FILE *fp = fopen (file_path, "r");
37538fd1498Szrj   if (fp == NULL)
37638fd1498Szrj     return NULL;
37738fd1498Szrj 
37838fd1498Szrj   unsigned highest_use_count = 0;
37938fd1498Szrj   fcache *r = evicted_cache_tab_entry (&highest_use_count);
38038fd1498Szrj   r->file_path = file_path;
38138fd1498Szrj   if (r->fp)
38238fd1498Szrj     fclose (r->fp);
38338fd1498Szrj   r->fp = fp;
38438fd1498Szrj   r->nb_read = 0;
38538fd1498Szrj   r->line_start_idx = 0;
38638fd1498Szrj   r->line_num = 0;
38738fd1498Szrj   r->line_record.truncate (0);
38838fd1498Szrj   /* Ensure that this cache entry doesn't get evicted next time
38938fd1498Szrj      add_file_to_cache_tab is called.  */
39038fd1498Szrj   r->use_count = ++highest_use_count;
39138fd1498Szrj   r->total_lines = total_lines_num (file_path);
39238fd1498Szrj   r->missing_trailing_newline = true;
39338fd1498Szrj 
39438fd1498Szrj   return r;
39538fd1498Szrj }
39638fd1498Szrj 
39738fd1498Szrj /* Lookup the cache used for the content of a given file accessed by
39838fd1498Szrj    caret diagnostic.  If no cached file was found, create a new cache
39938fd1498Szrj    for this file, add it to the array of cached file and return
40038fd1498Szrj    it.  */
40138fd1498Szrj 
40238fd1498Szrj static fcache*
lookup_or_add_file_to_cache_tab(const char * file_path)40338fd1498Szrj lookup_or_add_file_to_cache_tab (const char *file_path)
40438fd1498Szrj {
40538fd1498Szrj   fcache *r = lookup_file_in_cache_tab (file_path);
40638fd1498Szrj   if (r == NULL)
40738fd1498Szrj     r = add_file_to_cache_tab (file_path);
40838fd1498Szrj   return r;
40938fd1498Szrj }
41038fd1498Szrj 
41138fd1498Szrj /* Default constructor for a cache of file used by caret
41238fd1498Szrj    diagnostic.  */
41338fd1498Szrj 
fcache()41438fd1498Szrj fcache::fcache ()
41538fd1498Szrj : use_count (0), file_path (NULL), fp (NULL), data (0),
41638fd1498Szrj   size (0), nb_read (0), line_start_idx (0), line_num (0),
41738fd1498Szrj   total_lines (0), missing_trailing_newline (true)
41838fd1498Szrj {
41938fd1498Szrj   line_record.create (0);
42038fd1498Szrj }
42138fd1498Szrj 
42238fd1498Szrj /* Destructor for a cache of file used by caret diagnostic.  */
42338fd1498Szrj 
~fcache()42438fd1498Szrj fcache::~fcache ()
42538fd1498Szrj {
42638fd1498Szrj   if (fp)
42738fd1498Szrj     {
42838fd1498Szrj       fclose (fp);
42938fd1498Szrj       fp = NULL;
43038fd1498Szrj     }
43138fd1498Szrj   if (data)
43238fd1498Szrj     {
43338fd1498Szrj       XDELETEVEC (data);
43438fd1498Szrj       data = 0;
43538fd1498Szrj     }
43638fd1498Szrj   line_record.release ();
43738fd1498Szrj }
43838fd1498Szrj 
43938fd1498Szrj /* Returns TRUE iff the cache would need to be filled with data coming
44038fd1498Szrj    from the file.  That is, either the cache is empty or full or the
44138fd1498Szrj    current line is empty.  Note that if the cache is full, it would
44238fd1498Szrj    need to be extended and filled again.  */
44338fd1498Szrj 
44438fd1498Szrj static bool
needs_read(fcache * c)44538fd1498Szrj needs_read (fcache *c)
44638fd1498Szrj {
44738fd1498Szrj   return (c->nb_read == 0
44838fd1498Szrj 	  || c->nb_read == c->size
44938fd1498Szrj 	  || (c->line_start_idx >= c->nb_read - 1));
45038fd1498Szrj }
45138fd1498Szrj 
45238fd1498Szrj /*  Return TRUE iff the cache is full and thus needs to be
45338fd1498Szrj     extended.  */
45438fd1498Szrj 
45538fd1498Szrj static bool
needs_grow(fcache * c)45638fd1498Szrj needs_grow (fcache *c)
45738fd1498Szrj {
45838fd1498Szrj   return c->nb_read == c->size;
45938fd1498Szrj }
46038fd1498Szrj 
46138fd1498Szrj /* Grow the cache if it needs to be extended.  */
46238fd1498Szrj 
46338fd1498Szrj static void
maybe_grow(fcache * c)46438fd1498Szrj maybe_grow (fcache *c)
46538fd1498Szrj {
46638fd1498Szrj   if (!needs_grow (c))
46738fd1498Szrj     return;
46838fd1498Szrj 
46938fd1498Szrj   size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
47038fd1498Szrj   c->data = XRESIZEVEC (char, c->data, size);
47138fd1498Szrj   c->size = size;
47238fd1498Szrj }
47338fd1498Szrj 
47438fd1498Szrj /*  Read more data into the cache.  Extends the cache if need be.
47538fd1498Szrj     Returns TRUE iff new data could be read.  */
47638fd1498Szrj 
47738fd1498Szrj static bool
read_data(fcache * c)47838fd1498Szrj read_data (fcache *c)
47938fd1498Szrj {
48038fd1498Szrj   if (feof (c->fp) || ferror (c->fp))
48138fd1498Szrj     return false;
48238fd1498Szrj 
48338fd1498Szrj   maybe_grow (c);
48438fd1498Szrj 
48538fd1498Szrj   char * from = c->data + c->nb_read;
48638fd1498Szrj   size_t to_read = c->size - c->nb_read;
48738fd1498Szrj   size_t nb_read = fread (from, 1, to_read, c->fp);
48838fd1498Szrj 
48938fd1498Szrj   if (ferror (c->fp))
49038fd1498Szrj     return false;
49138fd1498Szrj 
49238fd1498Szrj   c->nb_read += nb_read;
49338fd1498Szrj   return !!nb_read;
49438fd1498Szrj }
49538fd1498Szrj 
49638fd1498Szrj /* Read new data iff the cache needs to be filled with more data
49738fd1498Szrj    coming from the file FP.  Return TRUE iff the cache was filled with
49838fd1498Szrj    mode data.  */
49938fd1498Szrj 
50038fd1498Szrj static bool
maybe_read_data(fcache * c)50138fd1498Szrj maybe_read_data (fcache *c)
50238fd1498Szrj {
50338fd1498Szrj   if (!needs_read (c))
50438fd1498Szrj     return false;
50538fd1498Szrj   return read_data (c);
50638fd1498Szrj }
50738fd1498Szrj 
50838fd1498Szrj /* Read a new line from file FP, using C as a cache for the data
50938fd1498Szrj    coming from the file.  Upon successful completion, *LINE is set to
51038fd1498Szrj    the beginning of the line found.  *LINE points directly in the
51138fd1498Szrj    line cache and is only valid until the next call of get_next_line.
51238fd1498Szrj    *LINE_LEN is set to the length of the line.  Note that the line
51338fd1498Szrj    does not contain any terminal delimiter.  This function returns
51438fd1498Szrj    true if some data was read or process from the cache, false
51538fd1498Szrj    otherwise.  Note that subsequent calls to get_next_line might
51638fd1498Szrj    make the content of *LINE invalid.  */
51738fd1498Szrj 
51838fd1498Szrj static bool
get_next_line(fcache * c,char ** line,ssize_t * line_len)51938fd1498Szrj get_next_line (fcache *c, char **line, ssize_t *line_len)
52038fd1498Szrj {
52138fd1498Szrj   /* Fill the cache with data to process.  */
52238fd1498Szrj   maybe_read_data (c);
52338fd1498Szrj 
52438fd1498Szrj   size_t remaining_size = c->nb_read - c->line_start_idx;
52538fd1498Szrj   if (remaining_size == 0)
52638fd1498Szrj     /* There is no more data to process.  */
52738fd1498Szrj     return false;
52838fd1498Szrj 
52938fd1498Szrj   char *line_start = c->data + c->line_start_idx;
53038fd1498Szrj 
53138fd1498Szrj   char *next_line_start = NULL;
53238fd1498Szrj   size_t len = 0;
53338fd1498Szrj   char *line_end = (char *) memchr (line_start, '\n', remaining_size);
53438fd1498Szrj   if (line_end == NULL)
53538fd1498Szrj     {
53638fd1498Szrj       /* We haven't found the end-of-line delimiter in the cache.
53738fd1498Szrj 	 Fill the cache with more data from the file and look for the
53838fd1498Szrj 	 '\n'.  */
53938fd1498Szrj       while (maybe_read_data (c))
54038fd1498Szrj 	{
54138fd1498Szrj 	  line_start = c->data + c->line_start_idx;
54238fd1498Szrj 	  remaining_size = c->nb_read - c->line_start_idx;
54338fd1498Szrj 	  line_end = (char *) memchr (line_start, '\n', remaining_size);
54438fd1498Szrj 	  if (line_end != NULL)
54538fd1498Szrj 	    {
54638fd1498Szrj 	      next_line_start = line_end + 1;
54738fd1498Szrj 	      break;
54838fd1498Szrj 	    }
54938fd1498Szrj 	}
55038fd1498Szrj       if (line_end == NULL)
55138fd1498Szrj 	{
55238fd1498Szrj 	  /* We've loadded all the file into the cache and still no
55338fd1498Szrj 	     '\n'.  Let's say the line ends up at one byte passed the
55438fd1498Szrj 	     end of the file.  This is to stay consistent with the case
55538fd1498Szrj 	     of when the line ends up with a '\n' and line_end points to
55638fd1498Szrj 	     that terminal '\n'.  That consistency is useful below in
55738fd1498Szrj 	     the len calculation.  */
55838fd1498Szrj 	  line_end = c->data + c->nb_read ;
55938fd1498Szrj 	  c->missing_trailing_newline = true;
56038fd1498Szrj 	}
56138fd1498Szrj       else
56238fd1498Szrj 	c->missing_trailing_newline = false;
56338fd1498Szrj     }
56438fd1498Szrj   else
56538fd1498Szrj     {
56638fd1498Szrj       next_line_start = line_end + 1;
56738fd1498Szrj       c->missing_trailing_newline = false;
56838fd1498Szrj     }
56938fd1498Szrj 
57038fd1498Szrj   if (ferror (c->fp))
57138fd1498Szrj     return false;
57238fd1498Szrj 
57338fd1498Szrj   /* At this point, we've found the end of the of line.  It either
57438fd1498Szrj      points to the '\n' or to one byte after the last byte of the
57538fd1498Szrj      file.  */
57638fd1498Szrj   gcc_assert (line_end != NULL);
57738fd1498Szrj 
57838fd1498Szrj   len = line_end - line_start;
57938fd1498Szrj 
58038fd1498Szrj   if (c->line_start_idx < c->nb_read)
58138fd1498Szrj     *line = line_start;
58238fd1498Szrj 
58338fd1498Szrj   ++c->line_num;
58438fd1498Szrj 
58538fd1498Szrj   /* Before we update our line record, make sure the hint about the
58638fd1498Szrj      total number of lines of the file is correct.  If it's not, then
58738fd1498Szrj      we give up recording line boundaries from now on.  */
58838fd1498Szrj   bool update_line_record = true;
58938fd1498Szrj   if (c->line_num > c->total_lines)
59038fd1498Szrj     update_line_record = false;
59138fd1498Szrj 
59238fd1498Szrj     /* Now update our line record so that re-reading lines from the
59338fd1498Szrj      before c->line_start_idx is faster.  */
59438fd1498Szrj   if (update_line_record
59538fd1498Szrj       && c->line_record.length () < fcache_line_record_size)
59638fd1498Szrj     {
59738fd1498Szrj       /* If the file lines fits in the line record, we just record all
59838fd1498Szrj 	 its lines ...*/
59938fd1498Szrj       if (c->total_lines <= fcache_line_record_size
60038fd1498Szrj 	  && c->line_num > c->line_record.length ())
60138fd1498Szrj 	c->line_record.safe_push (fcache::line_info (c->line_num,
60238fd1498Szrj 						 c->line_start_idx,
60338fd1498Szrj 						 line_end - c->data));
60438fd1498Szrj       else if (c->total_lines > fcache_line_record_size)
60538fd1498Szrj 	{
60638fd1498Szrj 	  /* ... otherwise, we just scale total_lines down to
60738fd1498Szrj 	     (fcache_line_record_size lines.  */
60838fd1498Szrj 	  size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
60938fd1498Szrj 	  if (c->line_record.length () == 0
61038fd1498Szrj 	      || n >= c->line_record.length ())
61138fd1498Szrj 	    c->line_record.safe_push (fcache::line_info (c->line_num,
61238fd1498Szrj 						     c->line_start_idx,
61338fd1498Szrj 						     line_end - c->data));
61438fd1498Szrj 	}
61538fd1498Szrj     }
61638fd1498Szrj 
61738fd1498Szrj   /* Update c->line_start_idx so that it points to the next line to be
61838fd1498Szrj      read.  */
61938fd1498Szrj   if (next_line_start)
62038fd1498Szrj     c->line_start_idx = next_line_start - c->data;
62138fd1498Szrj   else
62238fd1498Szrj     /* We didn't find any terminal '\n'.  Let's consider that the end
62338fd1498Szrj        of line is the end of the data in the cache.  The next
62438fd1498Szrj        invocation of get_next_line will either read more data from the
62538fd1498Szrj        underlying file or return false early because we've reached the
62638fd1498Szrj        end of the file.  */
62738fd1498Szrj     c->line_start_idx = c->nb_read;
62838fd1498Szrj 
62938fd1498Szrj   *line_len = len;
63038fd1498Szrj 
63138fd1498Szrj   return true;
63238fd1498Szrj }
63338fd1498Szrj 
63438fd1498Szrj /* Consume the next bytes coming from the cache (or from its
63538fd1498Szrj    underlying file if there are remaining unread bytes in the file)
63638fd1498Szrj    until we reach the next end-of-line (or end-of-file).  There is no
63738fd1498Szrj    copying from the cache involved.  Return TRUE upon successful
63838fd1498Szrj    completion.  */
63938fd1498Szrj 
64038fd1498Szrj static bool
goto_next_line(fcache * cache)64138fd1498Szrj goto_next_line (fcache *cache)
64238fd1498Szrj {
64338fd1498Szrj   char *l;
64438fd1498Szrj   ssize_t len;
64538fd1498Szrj 
64638fd1498Szrj   return get_next_line (cache, &l, &len);
64738fd1498Szrj }
64838fd1498Szrj 
64938fd1498Szrj /* Read an arbitrary line number LINE_NUM from the file cached in C.
65038fd1498Szrj    If the line was read successfully, *LINE points to the beginning
65138fd1498Szrj    of the line in the file cache and *LINE_LEN is the length of the
65238fd1498Szrj    line.  *LINE is not nul-terminated, but may contain zero bytes.
65338fd1498Szrj    *LINE is only valid until the next call of read_line_num.
65438fd1498Szrj    This function returns bool if a line was read.  */
65538fd1498Szrj 
65638fd1498Szrj static bool
read_line_num(fcache * c,size_t line_num,char ** line,ssize_t * line_len)65738fd1498Szrj read_line_num (fcache *c, size_t line_num,
65838fd1498Szrj 	       char **line, ssize_t *line_len)
65938fd1498Szrj {
66038fd1498Szrj   gcc_assert (line_num > 0);
66138fd1498Szrj 
66238fd1498Szrj   if (line_num <= c->line_num)
66338fd1498Szrj     {
66438fd1498Szrj       /* We've been asked to read lines that are before c->line_num.
66538fd1498Szrj 	 So lets use our line record (if it's not empty) to try to
66638fd1498Szrj 	 avoid re-reading the file from the beginning again.  */
66738fd1498Szrj 
66838fd1498Szrj       if (c->line_record.is_empty ())
66938fd1498Szrj 	{
67038fd1498Szrj 	  c->line_start_idx = 0;
67138fd1498Szrj 	  c->line_num = 0;
67238fd1498Szrj 	}
67338fd1498Szrj       else
67438fd1498Szrj 	{
67538fd1498Szrj 	  fcache::line_info *i = NULL;
67638fd1498Szrj 	  if (c->total_lines <= fcache_line_record_size)
67738fd1498Szrj 	    {
67838fd1498Szrj 	      /* In languages where the input file is not totally
67938fd1498Szrj 		 preprocessed up front, the c->total_lines hint
68038fd1498Szrj 		 can be smaller than the number of lines of the
68138fd1498Szrj 		 file.  In that case, only the first
68238fd1498Szrj 		 c->total_lines have been recorded.
68338fd1498Szrj 
68438fd1498Szrj 		 Otherwise, the first c->total_lines we've read have
68538fd1498Szrj 		 their start/end recorded here.  */
68638fd1498Szrj 	      i = (line_num <= c->total_lines)
68738fd1498Szrj 		? &c->line_record[line_num - 1]
68838fd1498Szrj 		: &c->line_record[c->total_lines - 1];
68938fd1498Szrj 	      gcc_assert (i->line_num <= line_num);
69038fd1498Szrj 	    }
69138fd1498Szrj 	  else
69238fd1498Szrj 	    {
69338fd1498Szrj 	      /*  So the file had more lines than our line record
69438fd1498Szrj 		  size.  Thus the number of lines we've recorded has
69538fd1498Szrj 		  been scaled down to fcache_line_reacord_size.  Let's
69638fd1498Szrj 		  pick the start/end of the recorded line that is
69738fd1498Szrj 		  closest to line_num.  */
69838fd1498Szrj 	      size_t n = (line_num <= c->total_lines)
69938fd1498Szrj 		? line_num * fcache_line_record_size / c->total_lines
70038fd1498Szrj 		: c ->line_record.length () - 1;
70138fd1498Szrj 	      if (n < c->line_record.length ())
70238fd1498Szrj 		{
70338fd1498Szrj 		  i = &c->line_record[n];
70438fd1498Szrj 		  gcc_assert (i->line_num <= line_num);
70538fd1498Szrj 		}
70638fd1498Szrj 	    }
70738fd1498Szrj 
70838fd1498Szrj 	  if (i && i->line_num == line_num)
70938fd1498Szrj 	    {
71038fd1498Szrj 	      /* We have the start/end of the line.  */
71138fd1498Szrj 	      *line = c->data + i->start_pos;
71238fd1498Szrj 	      *line_len = i->end_pos - i->start_pos;
71338fd1498Szrj 	      return true;
71438fd1498Szrj 	    }
71538fd1498Szrj 
71638fd1498Szrj 	  if (i)
71738fd1498Szrj 	    {
71838fd1498Szrj 	      c->line_start_idx = i->start_pos;
71938fd1498Szrj 	      c->line_num = i->line_num - 1;
72038fd1498Szrj 	    }
72138fd1498Szrj 	  else
72238fd1498Szrj 	    {
72338fd1498Szrj 	      c->line_start_idx = 0;
72438fd1498Szrj 	      c->line_num = 0;
72538fd1498Szrj 	    }
72638fd1498Szrj 	}
72738fd1498Szrj     }
72838fd1498Szrj 
72938fd1498Szrj   /*  Let's walk from line c->line_num up to line_num - 1, without
73038fd1498Szrj       copying any line.  */
73138fd1498Szrj   while (c->line_num < line_num - 1)
73238fd1498Szrj     if (!goto_next_line (c))
73338fd1498Szrj       return false;
73438fd1498Szrj 
73538fd1498Szrj   /* The line we want is the next one.  Let's read and copy it back to
73638fd1498Szrj      the caller.  */
73738fd1498Szrj   return get_next_line (c, line, line_len);
73838fd1498Szrj }
73938fd1498Szrj 
74038fd1498Szrj /* Return the physical source line that corresponds to FILE_PATH/LINE.
74138fd1498Szrj    The line is not nul-terminated.  The returned pointer is only
74238fd1498Szrj    valid until the next call of location_get_source_line.
74338fd1498Szrj    Note that the line can contain several null characters,
74438fd1498Szrj    so LINE_LEN, if non-null, points to the actual length of the line.
74538fd1498Szrj    If the function fails, NULL is returned.  */
74638fd1498Szrj 
74738fd1498Szrj const char *
location_get_source_line(const char * file_path,int line,int * line_len)74838fd1498Szrj location_get_source_line (const char *file_path, int line,
74938fd1498Szrj 			  int *line_len)
75038fd1498Szrj {
75138fd1498Szrj   char *buffer = NULL;
75238fd1498Szrj   ssize_t len;
75338fd1498Szrj 
75438fd1498Szrj   if (line == 0)
75538fd1498Szrj     return NULL;
75638fd1498Szrj 
75738fd1498Szrj   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
75838fd1498Szrj   if (c == NULL)
75938fd1498Szrj     return NULL;
76038fd1498Szrj 
76138fd1498Szrj   bool read = read_line_num (c, line, &buffer, &len);
76238fd1498Szrj 
76338fd1498Szrj   if (read && line_len)
76438fd1498Szrj     *line_len = len;
76538fd1498Szrj 
76638fd1498Szrj   return read ? buffer : NULL;
76738fd1498Szrj }
76838fd1498Szrj 
76938fd1498Szrj /* Determine if FILE_PATH missing a trailing newline on its final line.
77038fd1498Szrj    Only valid to call once all of the file has been loaded, by
77138fd1498Szrj    requesting a line number beyond the end of the file.  */
77238fd1498Szrj 
77338fd1498Szrj bool
location_missing_trailing_newline(const char * file_path)77438fd1498Szrj location_missing_trailing_newline (const char *file_path)
77538fd1498Szrj {
77638fd1498Szrj   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
77738fd1498Szrj   if (c == NULL)
77838fd1498Szrj     return false;
77938fd1498Szrj 
78038fd1498Szrj   return c->missing_trailing_newline;
78138fd1498Szrj }
78238fd1498Szrj 
78338fd1498Szrj /* Test if the location originates from the spelling location of a
78438fd1498Szrj    builtin-tokens.  That is, return TRUE if LOC is a (possibly
78538fd1498Szrj    virtual) location of a built-in token that appears in the expansion
78638fd1498Szrj    list of a macro.  Please note that this function also works on
78738fd1498Szrj    tokens that result from built-in tokens.  For instance, the
78838fd1498Szrj    function would return true if passed a token "4" that is the result
78938fd1498Szrj    of the expansion of the built-in __LINE__ macro.  */
79038fd1498Szrj bool
is_location_from_builtin_token(source_location loc)79138fd1498Szrj is_location_from_builtin_token (source_location loc)
79238fd1498Szrj {
79338fd1498Szrj   const line_map_ordinary *map = NULL;
79438fd1498Szrj   loc = linemap_resolve_location (line_table, loc,
79538fd1498Szrj 				  LRK_SPELLING_LOCATION, &map);
79638fd1498Szrj   return loc == BUILTINS_LOCATION;
79738fd1498Szrj }
79838fd1498Szrj 
79938fd1498Szrj /* Expand the source location LOC into a human readable location.  If
80038fd1498Szrj    LOC is virtual, it resolves to the expansion point of the involved
80138fd1498Szrj    macro.  If LOC resolves to a builtin location, the file name of the
80238fd1498Szrj    readable location is set to the string "<built-in>".  */
80338fd1498Szrj 
80438fd1498Szrj expanded_location
expand_location(source_location loc)80538fd1498Szrj expand_location (source_location loc)
80638fd1498Szrj {
80738fd1498Szrj   return expand_location_1 (loc, /*expansion_point_p=*/true,
80838fd1498Szrj 			    LOCATION_ASPECT_CARET);
80938fd1498Szrj }
81038fd1498Szrj 
81138fd1498Szrj /* Expand the source location LOC into a human readable location.  If
81238fd1498Szrj    LOC is virtual, it resolves to the expansion location of the
81338fd1498Szrj    relevant macro.  If LOC resolves to a builtin location, the file
81438fd1498Szrj    name of the readable location is set to the string
81538fd1498Szrj    "<built-in>".  */
81638fd1498Szrj 
81738fd1498Szrj expanded_location
expand_location_to_spelling_point(source_location loc)81838fd1498Szrj expand_location_to_spelling_point (source_location loc)
81938fd1498Szrj {
82038fd1498Szrj   return expand_location_1 (loc, /*expansion_point_p=*/false,
82138fd1498Szrj 			    LOCATION_ASPECT_CARET);
82238fd1498Szrj }
82338fd1498Szrj 
82438fd1498Szrj /* The rich_location class within libcpp requires a way to expand
82538fd1498Szrj    source_location instances, and relies on the client code
82638fd1498Szrj    providing a symbol named
82738fd1498Szrj      linemap_client_expand_location_to_spelling_point
82838fd1498Szrj    to do this.
82938fd1498Szrj 
83038fd1498Szrj    This is the implementation for libcommon.a (all host binaries),
83138fd1498Szrj    which simply calls into expand_location_1.  */
83238fd1498Szrj 
83338fd1498Szrj expanded_location
linemap_client_expand_location_to_spelling_point(source_location loc,enum location_aspect aspect)83438fd1498Szrj linemap_client_expand_location_to_spelling_point (source_location loc,
83538fd1498Szrj 						  enum location_aspect aspect)
83638fd1498Szrj {
83738fd1498Szrj   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
83838fd1498Szrj }
83938fd1498Szrj 
84038fd1498Szrj 
84138fd1498Szrj /* If LOCATION is in a system header and if it is a virtual location for
84238fd1498Szrj    a token coming from the expansion of a macro, unwind it to the
84338fd1498Szrj    location of the expansion point of the macro.  Otherwise, just return
84438fd1498Szrj    LOCATION.
84538fd1498Szrj 
84638fd1498Szrj    This is used for instance when we want to emit diagnostics about a
84738fd1498Szrj    token that may be located in a macro that is itself defined in a
84838fd1498Szrj    system header, for example, for the NULL macro.  In such a case, if
84938fd1498Szrj    LOCATION were passed directly to diagnostic functions such as
85038fd1498Szrj    warning_at, the diagnostic would be suppressed (unless
85138fd1498Szrj    -Wsystem-headers).  */
85238fd1498Szrj 
85338fd1498Szrj source_location
expansion_point_location_if_in_system_header(source_location location)85438fd1498Szrj expansion_point_location_if_in_system_header (source_location location)
85538fd1498Szrj {
85638fd1498Szrj   if (in_system_header_at (location))
85738fd1498Szrj     location = linemap_resolve_location (line_table, location,
85838fd1498Szrj 					 LRK_MACRO_EXPANSION_POINT,
85938fd1498Szrj 					 NULL);
86038fd1498Szrj   return location;
86138fd1498Szrj }
86238fd1498Szrj 
86338fd1498Szrj /* If LOCATION is a virtual location for a token coming from the expansion
86438fd1498Szrj    of a macro, unwind to the location of the expansion point of the macro.  */
86538fd1498Szrj 
86638fd1498Szrj source_location
expansion_point_location(source_location location)86738fd1498Szrj expansion_point_location (source_location location)
86838fd1498Szrj {
86938fd1498Szrj   return linemap_resolve_location (line_table, location,
87038fd1498Szrj 				   LRK_MACRO_EXPANSION_POINT, NULL);
87138fd1498Szrj }
87238fd1498Szrj 
87338fd1498Szrj /* Construct a location with caret at CARET, ranging from START to
87438fd1498Szrj    finish e.g.
87538fd1498Szrj 
87638fd1498Szrj                  11111111112
87738fd1498Szrj         12345678901234567890
87838fd1498Szrj      522
87938fd1498Szrj      523   return foo + bar;
88038fd1498Szrj                   ~~~~^~~~~
88138fd1498Szrj      524
88238fd1498Szrj 
88338fd1498Szrj    The location's caret is at the "+", line 523 column 15, but starts
88438fd1498Szrj    earlier, at the "f" of "foo" at column 11.  The finish is at the "r"
88538fd1498Szrj    of "bar" at column 19.  */
88638fd1498Szrj 
88738fd1498Szrj location_t
make_location(location_t caret,location_t start,location_t finish)88838fd1498Szrj make_location (location_t caret, location_t start, location_t finish)
88938fd1498Szrj {
89038fd1498Szrj   location_t pure_loc = get_pure_location (caret);
89138fd1498Szrj   source_range src_range;
89238fd1498Szrj   src_range.m_start = get_start (start);
89338fd1498Szrj   src_range.m_finish = get_finish (finish);
89438fd1498Szrj   location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
89538fd1498Szrj 						   pure_loc,
89638fd1498Szrj 						   src_range,
89738fd1498Szrj 						   NULL);
89838fd1498Szrj   return combined_loc;
89938fd1498Szrj }
90038fd1498Szrj 
90138fd1498Szrj /* Same as above, but taking a source range rather than two locations.  */
90238fd1498Szrj 
90338fd1498Szrj location_t
make_location(location_t caret,source_range src_range)90438fd1498Szrj make_location (location_t caret, source_range src_range)
90538fd1498Szrj {
90638fd1498Szrj   location_t pure_loc = get_pure_location (caret);
90738fd1498Szrj   return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
90838fd1498Szrj }
90938fd1498Szrj 
91038fd1498Szrj #define ONE_K 1024
91138fd1498Szrj #define ONE_M (ONE_K * ONE_K)
91238fd1498Szrj 
91338fd1498Szrj /* Display a number as an integer multiple of either:
91438fd1498Szrj    - 1024, if said integer is >= to 10 K (in base 2)
91538fd1498Szrj    - 1024 * 1024, if said integer is >= 10 M in (base 2)
91638fd1498Szrj  */
91738fd1498Szrj #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
91838fd1498Szrj 		  ? (x) \
91938fd1498Szrj 		  : ((x) < 10 * ONE_M \
92038fd1498Szrj 		     ? (x) / ONE_K \
92138fd1498Szrj 		     : (x) / ONE_M)))
92238fd1498Szrj 
92338fd1498Szrj /* For a given integer, display either:
92438fd1498Szrj    - the character 'k', if the number is higher than 10 K (in base 2)
92538fd1498Szrj      but strictly lower than 10 M (in base 2)
92638fd1498Szrj    - the character 'M' if the number is higher than 10 M (in base2)
92738fd1498Szrj    - the charcter ' ' if the number is strictly lower  than 10 K  */
92838fd1498Szrj #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
92938fd1498Szrj 
93038fd1498Szrj /* Display an integer amount as multiple of 1K or 1M (in base 2).
93138fd1498Szrj    Display the correct unit (either k, M, or ' ') after the amount, as
93238fd1498Szrj    well.  */
93338fd1498Szrj #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
93438fd1498Szrj 
93538fd1498Szrj /* Dump statistics to stderr about the memory usage of the line_table
93638fd1498Szrj    set of line maps.  This also displays some statistics about macro
93738fd1498Szrj    expansion.  */
93838fd1498Szrj 
93938fd1498Szrj void
dump_line_table_statistics(void)94038fd1498Szrj dump_line_table_statistics (void)
94138fd1498Szrj {
94238fd1498Szrj   struct linemap_stats s;
94338fd1498Szrj   long total_used_map_size,
94438fd1498Szrj     macro_maps_size,
94538fd1498Szrj     total_allocated_map_size;
94638fd1498Szrj 
94738fd1498Szrj   memset (&s, 0, sizeof (s));
94838fd1498Szrj 
94938fd1498Szrj   linemap_get_statistics (line_table, &s);
95038fd1498Szrj 
95138fd1498Szrj   macro_maps_size = s.macro_maps_used_size
95238fd1498Szrj     + s.macro_maps_locations_size;
95338fd1498Szrj 
95438fd1498Szrj   total_allocated_map_size = s.ordinary_maps_allocated_size
95538fd1498Szrj     + s.macro_maps_allocated_size
95638fd1498Szrj     + s.macro_maps_locations_size;
95738fd1498Szrj 
95838fd1498Szrj   total_used_map_size = s.ordinary_maps_used_size
95938fd1498Szrj     + s.macro_maps_used_size
96038fd1498Szrj     + s.macro_maps_locations_size;
96138fd1498Szrj 
96238fd1498Szrj   fprintf (stderr, "Number of expanded macros:                     %5ld\n",
96338fd1498Szrj            s.num_expanded_macros);
96438fd1498Szrj   if (s.num_expanded_macros != 0)
96538fd1498Szrj     fprintf (stderr, "Average number of tokens per macro expansion:  %5ld\n",
96638fd1498Szrj              s.num_macro_tokens / s.num_expanded_macros);
96738fd1498Szrj   fprintf (stderr,
96838fd1498Szrj            "\nLine Table allocations during the "
96938fd1498Szrj            "compilation process\n");
97038fd1498Szrj   fprintf (stderr, "Number of ordinary maps used:        %5ld%c\n",
97138fd1498Szrj            SCALE (s.num_ordinary_maps_used),
97238fd1498Szrj            STAT_LABEL (s.num_ordinary_maps_used));
97338fd1498Szrj   fprintf (stderr, "Ordinary map used size:              %5ld%c\n",
97438fd1498Szrj            SCALE (s.ordinary_maps_used_size),
97538fd1498Szrj            STAT_LABEL (s.ordinary_maps_used_size));
97638fd1498Szrj   fprintf (stderr, "Number of ordinary maps allocated:   %5ld%c\n",
97738fd1498Szrj            SCALE (s.num_ordinary_maps_allocated),
97838fd1498Szrj            STAT_LABEL (s.num_ordinary_maps_allocated));
97938fd1498Szrj   fprintf (stderr, "Ordinary maps allocated size:        %5ld%c\n",
98038fd1498Szrj            SCALE (s.ordinary_maps_allocated_size),
98138fd1498Szrj            STAT_LABEL (s.ordinary_maps_allocated_size));
98238fd1498Szrj   fprintf (stderr, "Number of macro maps used:           %5ld%c\n",
98338fd1498Szrj            SCALE (s.num_macro_maps_used),
98438fd1498Szrj            STAT_LABEL (s.num_macro_maps_used));
98538fd1498Szrj   fprintf (stderr, "Macro maps used size:                %5ld%c\n",
98638fd1498Szrj            SCALE (s.macro_maps_used_size),
98738fd1498Szrj            STAT_LABEL (s.macro_maps_used_size));
98838fd1498Szrj   fprintf (stderr, "Macro maps locations size:           %5ld%c\n",
98938fd1498Szrj            SCALE (s.macro_maps_locations_size),
99038fd1498Szrj            STAT_LABEL (s.macro_maps_locations_size));
99138fd1498Szrj   fprintf (stderr, "Macro maps size:                     %5ld%c\n",
99238fd1498Szrj            SCALE (macro_maps_size),
99338fd1498Szrj            STAT_LABEL (macro_maps_size));
99438fd1498Szrj   fprintf (stderr, "Duplicated maps locations size:      %5ld%c\n",
99538fd1498Szrj            SCALE (s.duplicated_macro_maps_locations_size),
99638fd1498Szrj            STAT_LABEL (s.duplicated_macro_maps_locations_size));
99738fd1498Szrj   fprintf (stderr, "Total allocated maps size:           %5ld%c\n",
99838fd1498Szrj            SCALE (total_allocated_map_size),
99938fd1498Szrj            STAT_LABEL (total_allocated_map_size));
100038fd1498Szrj   fprintf (stderr, "Total used maps size:                %5ld%c\n",
100138fd1498Szrj            SCALE (total_used_map_size),
100238fd1498Szrj            STAT_LABEL (total_used_map_size));
100338fd1498Szrj   fprintf (stderr, "Ad-hoc table size:                   %5ld%c\n",
100438fd1498Szrj 	   SCALE (s.adhoc_table_size),
100538fd1498Szrj 	   STAT_LABEL (s.adhoc_table_size));
100638fd1498Szrj   fprintf (stderr, "Ad-hoc table entries used:           %5ld\n",
100738fd1498Szrj 	   s.adhoc_table_entries_used);
100838fd1498Szrj   fprintf (stderr, "optimized_ranges: %i\n",
100938fd1498Szrj 	   line_table->num_optimized_ranges);
101038fd1498Szrj   fprintf (stderr, "unoptimized_ranges: %i\n",
101138fd1498Szrj 	   line_table->num_unoptimized_ranges);
101238fd1498Szrj 
101338fd1498Szrj   fprintf (stderr, "\n");
101438fd1498Szrj }
101538fd1498Szrj 
101638fd1498Szrj /* Get location one beyond the final location in ordinary map IDX.  */
101738fd1498Szrj 
101838fd1498Szrj static source_location
get_end_location(struct line_maps * set,unsigned int idx)101938fd1498Szrj get_end_location (struct line_maps *set, unsigned int idx)
102038fd1498Szrj {
102138fd1498Szrj   if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
102238fd1498Szrj     return set->highest_location;
102338fd1498Szrj 
102438fd1498Szrj   struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
102538fd1498Szrj   return MAP_START_LOCATION (next_map);
102638fd1498Szrj }
102738fd1498Szrj 
102838fd1498Szrj /* Helper function for write_digit_row.  */
102938fd1498Szrj 
103038fd1498Szrj static void
write_digit(FILE * stream,int digit)103138fd1498Szrj write_digit (FILE *stream, int digit)
103238fd1498Szrj {
103338fd1498Szrj   fputc ('0' + (digit % 10), stream);
103438fd1498Szrj }
103538fd1498Szrj 
103638fd1498Szrj /* Helper function for dump_location_info.
103738fd1498Szrj    Write a row of numbers to STREAM, numbering a source line,
103838fd1498Szrj    giving the units, tens, hundreds etc of the column number.  */
103938fd1498Szrj 
104038fd1498Szrj static void
write_digit_row(FILE * stream,int indent,const line_map_ordinary * map,source_location loc,int max_col,int divisor)104138fd1498Szrj write_digit_row (FILE *stream, int indent,
104238fd1498Szrj 		 const line_map_ordinary *map,
104338fd1498Szrj 		 source_location loc, int max_col, int divisor)
104438fd1498Szrj {
104538fd1498Szrj   fprintf (stream, "%*c", indent, ' ');
104638fd1498Szrj   fprintf (stream, "|");
104738fd1498Szrj   for (int column = 1; column < max_col; column++)
104838fd1498Szrj     {
104938fd1498Szrj       source_location column_loc = loc + (column << map->m_range_bits);
105038fd1498Szrj       write_digit (stream, column_loc / divisor);
105138fd1498Szrj     }
105238fd1498Szrj   fprintf (stream, "\n");
105338fd1498Szrj }
105438fd1498Szrj 
105538fd1498Szrj /* Write a half-closed (START) / half-open (END) interval of
105638fd1498Szrj    source_location to STREAM.  */
105738fd1498Szrj 
105838fd1498Szrj static void
dump_location_range(FILE * stream,source_location start,source_location end)105938fd1498Szrj dump_location_range (FILE *stream,
106038fd1498Szrj 		     source_location start, source_location end)
106138fd1498Szrj {
106238fd1498Szrj   fprintf (stream,
106338fd1498Szrj 	   "  source_location interval: %u <= loc < %u\n",
106438fd1498Szrj 	   start, end);
106538fd1498Szrj }
106638fd1498Szrj 
106738fd1498Szrj /* Write a labelled description of a half-closed (START) / half-open (END)
106838fd1498Szrj    interval of source_location to STREAM.  */
106938fd1498Szrj 
107038fd1498Szrj static void
dump_labelled_location_range(FILE * stream,const char * name,source_location start,source_location end)107138fd1498Szrj dump_labelled_location_range (FILE *stream,
107238fd1498Szrj 			      const char *name,
107338fd1498Szrj 			      source_location start, source_location end)
107438fd1498Szrj {
107538fd1498Szrj   fprintf (stream, "%s\n", name);
107638fd1498Szrj   dump_location_range (stream, start, end);
107738fd1498Szrj   fprintf (stream, "\n");
107838fd1498Szrj }
107938fd1498Szrj 
108038fd1498Szrj /* Write a visualization of the locations in the line_table to STREAM.  */
108138fd1498Szrj 
108238fd1498Szrj void
dump_location_info(FILE * stream)108338fd1498Szrj dump_location_info (FILE *stream)
108438fd1498Szrj {
108538fd1498Szrj   /* Visualize the reserved locations.  */
108638fd1498Szrj   dump_labelled_location_range (stream, "RESERVED LOCATIONS",
108738fd1498Szrj 				0, RESERVED_LOCATION_COUNT);
108838fd1498Szrj 
108938fd1498Szrj   /* Visualize the ordinary line_map instances, rendering the sources. */
109038fd1498Szrj   for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
109138fd1498Szrj     {
109238fd1498Szrj       source_location end_location = get_end_location (line_table, idx);
109338fd1498Szrj       /* half-closed: doesn't include this one. */
109438fd1498Szrj 
109538fd1498Szrj       const line_map_ordinary *map
109638fd1498Szrj 	= LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
109738fd1498Szrj       fprintf (stream, "ORDINARY MAP: %i\n", idx);
109838fd1498Szrj       dump_location_range (stream,
109938fd1498Szrj 			   MAP_START_LOCATION (map), end_location);
110038fd1498Szrj       fprintf (stream, "  file: %s\n", ORDINARY_MAP_FILE_NAME (map));
110138fd1498Szrj       fprintf (stream, "  starting at line: %i\n",
110238fd1498Szrj 	       ORDINARY_MAP_STARTING_LINE_NUMBER (map));
110338fd1498Szrj       fprintf (stream, "  column and range bits: %i\n",
110438fd1498Szrj 	       map->m_column_and_range_bits);
110538fd1498Szrj       fprintf (stream, "  column bits: %i\n",
110638fd1498Szrj 	       map->m_column_and_range_bits - map->m_range_bits);
110738fd1498Szrj       fprintf (stream, "  range bits: %i\n",
110838fd1498Szrj 	       map->m_range_bits);
110938fd1498Szrj 
111038fd1498Szrj       /* Render the span of source lines that this "map" covers.  */
111138fd1498Szrj       for (source_location loc = MAP_START_LOCATION (map);
111238fd1498Szrj 	   loc < end_location;
111338fd1498Szrj 	   loc += (1 << map->m_range_bits) )
111438fd1498Szrj 	{
111538fd1498Szrj 	  gcc_assert (pure_location_p (line_table, loc) );
111638fd1498Szrj 
111738fd1498Szrj 	  expanded_location exploc
111838fd1498Szrj 	    = linemap_expand_location (line_table, map, loc);
111938fd1498Szrj 
112038fd1498Szrj 	  if (exploc.column == 0)
112138fd1498Szrj 	    {
112238fd1498Szrj 	      /* Beginning of a new source line: draw the line.  */
112338fd1498Szrj 
112438fd1498Szrj 	      int line_size;
112538fd1498Szrj 	      const char *line_text = location_get_source_line (exploc.file,
112638fd1498Szrj 								exploc.line,
112738fd1498Szrj 								&line_size);
112838fd1498Szrj 	      if (!line_text)
112938fd1498Szrj 		break;
113038fd1498Szrj 	      fprintf (stream,
113138fd1498Szrj 		       "%s:%3i|loc:%5i|%.*s\n",
113238fd1498Szrj 		       exploc.file, exploc.line,
113338fd1498Szrj 		       loc,
113438fd1498Szrj 		       line_size, line_text);
113538fd1498Szrj 
113638fd1498Szrj 	      /* "loc" is at column 0, which means "the whole line".
113738fd1498Szrj 		 Render the locations *within* the line, by underlining
113838fd1498Szrj 		 it, showing the source_location numeric values
113938fd1498Szrj 		 at each column.  */
114038fd1498Szrj 	      int max_col = (1 << map->m_column_and_range_bits) - 1;
114138fd1498Szrj 	      if (max_col > line_size)
114238fd1498Szrj 		max_col = line_size + 1;
114338fd1498Szrj 
114438fd1498Szrj 	      int indent = 14 + strlen (exploc.file);
114538fd1498Szrj 
114638fd1498Szrj 	      /* Thousands.  */
114738fd1498Szrj 	      if (end_location > 999)
114838fd1498Szrj 		write_digit_row (stream, indent, map, loc, max_col, 1000);
114938fd1498Szrj 
115038fd1498Szrj 	      /* Hundreds.  */
115138fd1498Szrj 	      if (end_location > 99)
115238fd1498Szrj 		write_digit_row (stream, indent, map, loc, max_col, 100);
115338fd1498Szrj 
115438fd1498Szrj 	      /* Tens.  */
115538fd1498Szrj 	      write_digit_row (stream, indent, map, loc, max_col, 10);
115638fd1498Szrj 
115738fd1498Szrj 	      /* Units.  */
115838fd1498Szrj 	      write_digit_row (stream, indent, map, loc, max_col, 1);
115938fd1498Szrj 	    }
116038fd1498Szrj 	}
116138fd1498Szrj       fprintf (stream, "\n");
116238fd1498Szrj     }
116338fd1498Szrj 
116438fd1498Szrj   /* Visualize unallocated values.  */
116538fd1498Szrj   dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
116638fd1498Szrj 				line_table->highest_location,
116738fd1498Szrj 				LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
116838fd1498Szrj 
116938fd1498Szrj   /* Visualize the macro line_map instances, rendering the sources. */
117038fd1498Szrj   for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
117138fd1498Szrj     {
117238fd1498Szrj       /* Each macro map that is allocated owns source_location values
117338fd1498Szrj 	 that are *lower* that the one before them.
117438fd1498Szrj 	 Hence it's meaningful to view them either in order of ascending
117538fd1498Szrj 	 source locations, or in order of ascending macro map index.  */
117638fd1498Szrj       const bool ascending_source_locations = true;
117738fd1498Szrj       unsigned int idx = (ascending_source_locations
117838fd1498Szrj 			  ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
117938fd1498Szrj 			  : i);
118038fd1498Szrj       const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
118138fd1498Szrj       fprintf (stream, "MACRO %i: %s (%u tokens)\n",
118238fd1498Szrj 	       idx,
118338fd1498Szrj 	       linemap_map_get_macro_name (map),
118438fd1498Szrj 	       MACRO_MAP_NUM_MACRO_TOKENS (map));
118538fd1498Szrj       dump_location_range (stream,
118638fd1498Szrj 			   map->start_location,
118738fd1498Szrj 			   (map->start_location
118838fd1498Szrj 			    + MACRO_MAP_NUM_MACRO_TOKENS (map)));
118938fd1498Szrj       inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
119038fd1498Szrj 	      "expansion point is location %i",
119138fd1498Szrj 	      MACRO_MAP_EXPANSION_POINT_LOCATION (map));
119238fd1498Szrj       fprintf (stream, "  map->start_location: %u\n",
119338fd1498Szrj 	       map->start_location);
119438fd1498Szrj 
119538fd1498Szrj       fprintf (stream, "  macro_locations:\n");
119638fd1498Szrj       for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
119738fd1498Szrj 	{
119838fd1498Szrj 	  source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
119938fd1498Szrj 	  source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
120038fd1498Szrj 
120138fd1498Szrj 	  /* linemap_add_macro_token encodes token numbers in an expansion
120238fd1498Szrj 	     by putting them after MAP_START_LOCATION. */
120338fd1498Szrj 
120438fd1498Szrj 	  /* I'm typically seeing 4 uninitialized entries at the end of
120538fd1498Szrj 	     0xafafafaf.
120638fd1498Szrj 	     This appears to be due to macro.c:replace_args
120738fd1498Szrj 	     adding 2 extra args for padding tokens; presumably there may
120838fd1498Szrj 	     be a leading and/or trailing padding token injected,
120938fd1498Szrj 	     each for 2 more location slots.
121038fd1498Szrj 	     This would explain there being up to 4 source_locations slots
121138fd1498Szrj 	     that may be uninitialized.  */
121238fd1498Szrj 
121338fd1498Szrj 	  fprintf (stream, "    %u: %u, %u\n",
121438fd1498Szrj 		   i,
121538fd1498Szrj 		   x,
121638fd1498Szrj 		   y);
121738fd1498Szrj 	  if (x == y)
121838fd1498Szrj 	    {
121938fd1498Szrj 	      if (x < MAP_START_LOCATION (map))
122038fd1498Szrj 		inform (x, "token %u has x-location == y-location == %u", i, x);
122138fd1498Szrj 	      else
122238fd1498Szrj 		fprintf (stream,
122338fd1498Szrj 			 "x-location == y-location == %u encodes token # %u\n",
122438fd1498Szrj 			 x, x - MAP_START_LOCATION (map));
122538fd1498Szrj 		}
122638fd1498Szrj 	  else
122738fd1498Szrj 	    {
122838fd1498Szrj 	      inform (x, "token %u has x-location == %u", i, x);
122938fd1498Szrj 	      inform (x, "token %u has y-location == %u", i, y);
123038fd1498Szrj 	    }
123138fd1498Szrj 	}
123238fd1498Szrj       fprintf (stream, "\n");
123338fd1498Szrj     }
123438fd1498Szrj 
123538fd1498Szrj   /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
123638fd1498Szrj      macro map, presumably due to an off-by-one error somewhere
123738fd1498Szrj      between the logic in linemap_enter_macro and
123838fd1498Szrj      LINEMAPS_MACRO_LOWEST_LOCATION.  */
123938fd1498Szrj   dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
124038fd1498Szrj 				MAX_SOURCE_LOCATION,
124138fd1498Szrj 				MAX_SOURCE_LOCATION + 1);
124238fd1498Szrj 
124338fd1498Szrj   /* Visualize ad-hoc values.  */
124438fd1498Szrj   dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
124538fd1498Szrj 				MAX_SOURCE_LOCATION + 1, UINT_MAX);
124638fd1498Szrj }
124738fd1498Szrj 
124838fd1498Szrj /* string_concat's constructor.  */
124938fd1498Szrj 
string_concat(int num,location_t * locs)125038fd1498Szrj string_concat::string_concat (int num, location_t *locs)
125138fd1498Szrj   : m_num (num)
125238fd1498Szrj {
125338fd1498Szrj   m_locs = ggc_vec_alloc <location_t> (num);
125438fd1498Szrj   for (int i = 0; i < num; i++)
125538fd1498Szrj     m_locs[i] = locs[i];
125638fd1498Szrj }
125738fd1498Szrj 
125838fd1498Szrj /* string_concat_db's constructor.  */
125938fd1498Szrj 
string_concat_db()126038fd1498Szrj string_concat_db::string_concat_db ()
126138fd1498Szrj {
126238fd1498Szrj   m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
126338fd1498Szrj }
126438fd1498Szrj 
126538fd1498Szrj /* Record that a string concatenation occurred, covering NUM
126638fd1498Szrj    string literal tokens.  LOCS is an array of size NUM, containing the
126738fd1498Szrj    locations of the tokens.  A copy of LOCS is taken.  */
126838fd1498Szrj 
126938fd1498Szrj void
record_string_concatenation(int num,location_t * locs)127038fd1498Szrj string_concat_db::record_string_concatenation (int num, location_t *locs)
127138fd1498Szrj {
127238fd1498Szrj   gcc_assert (num > 1);
127338fd1498Szrj   gcc_assert (locs);
127438fd1498Szrj 
127538fd1498Szrj   location_t key_loc = get_key_loc (locs[0]);
127638fd1498Szrj 
127738fd1498Szrj   string_concat *concat
127838fd1498Szrj     = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
127938fd1498Szrj   m_table->put (key_loc, concat);
128038fd1498Szrj }
128138fd1498Szrj 
128238fd1498Szrj /* Determine if LOC was the location of the the initial token of a
128338fd1498Szrj    concatenation of string literal tokens.
128438fd1498Szrj    If so, *OUT_NUM is written to with the number of tokens, and
128538fd1498Szrj    *OUT_LOCS with the location of an array of locations of the
128638fd1498Szrj    tokens, and return true.  *OUT_LOCS is a borrowed pointer to
128738fd1498Szrj    storage owned by the string_concat_db.
128838fd1498Szrj    Otherwise, return false.  */
128938fd1498Szrj 
129038fd1498Szrj bool
get_string_concatenation(location_t loc,int * out_num,location_t ** out_locs)129138fd1498Szrj string_concat_db::get_string_concatenation (location_t loc,
129238fd1498Szrj 					    int *out_num,
129338fd1498Szrj 					    location_t **out_locs)
129438fd1498Szrj {
129538fd1498Szrj   gcc_assert (out_num);
129638fd1498Szrj   gcc_assert (out_locs);
129738fd1498Szrj 
129838fd1498Szrj   location_t key_loc = get_key_loc (loc);
129938fd1498Szrj 
130038fd1498Szrj   string_concat **concat = m_table->get (key_loc);
130138fd1498Szrj   if (!concat)
130238fd1498Szrj     return false;
130338fd1498Szrj 
130438fd1498Szrj   *out_num = (*concat)->m_num;
130538fd1498Szrj   *out_locs =(*concat)->m_locs;
130638fd1498Szrj   return true;
130738fd1498Szrj }
130838fd1498Szrj 
130938fd1498Szrj /* Internal function.  Canonicalize LOC into a form suitable for
131038fd1498Szrj    use as a key within the database, stripping away macro expansion,
131138fd1498Szrj    ad-hoc information, and range information, using the location of
131238fd1498Szrj    the start of LOC within an ordinary linemap.  */
131338fd1498Szrj 
131438fd1498Szrj location_t
get_key_loc(location_t loc)131538fd1498Szrj string_concat_db::get_key_loc (location_t loc)
131638fd1498Szrj {
131738fd1498Szrj   loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
131838fd1498Szrj 				  NULL);
131938fd1498Szrj 
132038fd1498Szrj   loc = get_range_from_loc (line_table, loc).m_start;
132138fd1498Szrj 
132238fd1498Szrj   return loc;
132338fd1498Szrj }
132438fd1498Szrj 
132538fd1498Szrj /* Helper class for use within get_substring_ranges_for_loc.
132638fd1498Szrj    An vec of cpp_string with responsibility for releasing all of the
132738fd1498Szrj    str->text for each str in the vector.  */
132838fd1498Szrj 
132938fd1498Szrj class auto_cpp_string_vec :  public auto_vec <cpp_string>
133038fd1498Szrj {
133138fd1498Szrj  public:
auto_cpp_string_vec(int alloc)133238fd1498Szrj   auto_cpp_string_vec (int alloc)
133338fd1498Szrj     : auto_vec <cpp_string> (alloc) {}
133438fd1498Szrj 
~auto_cpp_string_vec()133538fd1498Szrj   ~auto_cpp_string_vec ()
133638fd1498Szrj   {
133738fd1498Szrj     /* Clean up the copies within this vec.  */
133838fd1498Szrj     int i;
133938fd1498Szrj     cpp_string *str;
134038fd1498Szrj     FOR_EACH_VEC_ELT (*this, i, str)
134138fd1498Szrj       free (const_cast <unsigned char *> (str->text));
134238fd1498Szrj   }
134338fd1498Szrj };
134438fd1498Szrj 
134538fd1498Szrj /* Attempt to populate RANGES with source location information on the
134638fd1498Szrj    individual characters within the string literal found at STRLOC.
134738fd1498Szrj    If CONCATS is non-NULL, then any string literals that the token at
134838fd1498Szrj    STRLOC  was concatenated with are also added to RANGES.
134938fd1498Szrj 
135038fd1498Szrj    Return NULL if successful, or an error message if any errors occurred (in
135138fd1498Szrj    which case RANGES may be only partially populated and should not
135238fd1498Szrj    be used).
135338fd1498Szrj 
135438fd1498Szrj    This is implemented by re-parsing the relevant source line(s).  */
135538fd1498Szrj 
135638fd1498Szrj static const char *
get_substring_ranges_for_loc(cpp_reader * pfile,string_concat_db * concats,location_t strloc,enum cpp_ttype type,cpp_substring_ranges & ranges)135738fd1498Szrj get_substring_ranges_for_loc (cpp_reader *pfile,
135838fd1498Szrj 			      string_concat_db *concats,
135938fd1498Szrj 			      location_t strloc,
136038fd1498Szrj 			      enum cpp_ttype type,
136138fd1498Szrj 			      cpp_substring_ranges &ranges)
136238fd1498Szrj {
136338fd1498Szrj   gcc_assert (pfile);
136438fd1498Szrj 
136538fd1498Szrj   if (strloc == UNKNOWN_LOCATION)
136638fd1498Szrj     return "unknown location";
136738fd1498Szrj 
136838fd1498Szrj   /* Reparsing the strings requires accurate location information.
136938fd1498Szrj      If -ftrack-macro-expansion has been overridden from its default
137038fd1498Szrj      of 2, then we might have a location of a macro expansion point,
137138fd1498Szrj      rather than the location of the literal itself.
137238fd1498Szrj      Avoid this by requiring that we have full macro expansion tracking
137338fd1498Szrj      for substring locations to be available.  */
137438fd1498Szrj   if (cpp_get_options (pfile)->track_macro_expansion != 2)
137538fd1498Szrj     return "track_macro_expansion != 2";
137638fd1498Szrj 
137738fd1498Szrj   /* If #line or # 44 "file"-style directives are present, then there's
137838fd1498Szrj      no guarantee that the line numbers we have can be used to locate
137938fd1498Szrj      the strings.  For example, we might have a .i file with # directives
138038fd1498Szrj      pointing back to lines within a .c file, but the .c file might
138138fd1498Szrj      have been edited since the .i file was created.
138238fd1498Szrj      In such a case, the safest course is to disable on-demand substring
138338fd1498Szrj      locations.  */
138438fd1498Szrj   if (line_table->seen_line_directive)
138538fd1498Szrj     return "seen line directive";
138638fd1498Szrj 
138738fd1498Szrj   /* If string concatenation has occurred at STRLOC, get the locations
138838fd1498Szrj      of all of the literal tokens making up the compound string.
138938fd1498Szrj      Otherwise, just use STRLOC.  */
139038fd1498Szrj   int num_locs = 1;
139138fd1498Szrj   location_t *strlocs = &strloc;
139238fd1498Szrj   if (concats)
139338fd1498Szrj     concats->get_string_concatenation (strloc, &num_locs, &strlocs);
139438fd1498Szrj 
139538fd1498Szrj   auto_cpp_string_vec strs (num_locs);
139638fd1498Szrj   auto_vec <cpp_string_location_reader> loc_readers (num_locs);
139738fd1498Szrj   for (int i = 0; i < num_locs; i++)
139838fd1498Szrj     {
139938fd1498Szrj       /* Get range of strloc.  We will use it to locate the start and finish
140038fd1498Szrj 	 of the literal token within the line.  */
140138fd1498Szrj       source_range src_range = get_range_from_loc (line_table, strlocs[i]);
140238fd1498Szrj 
140338fd1498Szrj       if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
140438fd1498Szrj 	/* If the string is within a macro expansion, we can't get at the
140538fd1498Szrj 	   end location.  */
140638fd1498Szrj 	return "macro expansion";
140738fd1498Szrj 
140838fd1498Szrj       if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
140938fd1498Szrj 	/* If so, we can't reliably determine where the token started within
141038fd1498Szrj 	   its line.  */
141138fd1498Szrj 	return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
141238fd1498Szrj 
141338fd1498Szrj       if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
141438fd1498Szrj 	/* If so, we can't reliably determine where the token finished within
141538fd1498Szrj 	   its line.  */
141638fd1498Szrj 	return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
141738fd1498Szrj 
141838fd1498Szrj       expanded_location start
141938fd1498Szrj 	= expand_location_to_spelling_point (src_range.m_start);
142038fd1498Szrj       expanded_location finish
142138fd1498Szrj 	= expand_location_to_spelling_point (src_range.m_finish);
142238fd1498Szrj       if (start.file != finish.file)
142338fd1498Szrj 	return "range endpoints are in different files";
142438fd1498Szrj       if (start.line != finish.line)
142538fd1498Szrj 	return "range endpoints are on different lines";
142638fd1498Szrj       if (start.column > finish.column)
142738fd1498Szrj 	return "range endpoints are reversed";
142838fd1498Szrj 
142938fd1498Szrj       int line_width;
143038fd1498Szrj       const char *line = location_get_source_line (start.file, start.line,
143138fd1498Szrj 						   &line_width);
143238fd1498Szrj       if (line == NULL)
143338fd1498Szrj 	return "unable to read source line";
143438fd1498Szrj 
143538fd1498Szrj       /* Determine the location of the literal (including quotes
143638fd1498Szrj 	 and leading prefix chars, such as the 'u' in a u""
143738fd1498Szrj 	 token).  */
143838fd1498Szrj       const char *literal = line + start.column - 1;
143938fd1498Szrj       int literal_length = finish.column - start.column + 1;
144038fd1498Szrj 
144138fd1498Szrj       /* Ensure that we don't crash if we got the wrong location.  */
144238fd1498Szrj       if (line_width < (start.column - 1 + literal_length))
144338fd1498Szrj 	return "line is not wide enough";
144438fd1498Szrj 
144538fd1498Szrj       cpp_string from;
144638fd1498Szrj       from.len = literal_length;
144738fd1498Szrj       /* Make a copy of the literal, to avoid having to rely on
144838fd1498Szrj 	 the lifetime of the copy of the line within the cache.
144938fd1498Szrj 	 This will be released by the auto_cpp_string_vec dtor.  */
145038fd1498Szrj       from.text = XDUPVEC (unsigned char, literal, literal_length);
145138fd1498Szrj       strs.safe_push (from);
145238fd1498Szrj 
145338fd1498Szrj       /* For very long lines, a new linemap could have started
145438fd1498Szrj 	 halfway through the token.
145538fd1498Szrj 	 Ensure that the loc_reader uses the linemap of the
145638fd1498Szrj 	 *end* of the token for its start location.  */
145738fd1498Szrj       const line_map_ordinary *final_ord_map;
145838fd1498Szrj       linemap_resolve_location (line_table, src_range.m_finish,
145938fd1498Szrj 				LRK_MACRO_EXPANSION_POINT, &final_ord_map);
146038fd1498Szrj       location_t start_loc
146138fd1498Szrj 	= linemap_position_for_line_and_column (line_table, final_ord_map,
146238fd1498Szrj 						start.line, start.column);
146338fd1498Szrj 
146438fd1498Szrj       cpp_string_location_reader loc_reader (start_loc, line_table);
146538fd1498Szrj       loc_readers.safe_push (loc_reader);
146638fd1498Szrj     }
146738fd1498Szrj 
146838fd1498Szrj   /* Rerun cpp_interpret_string, or rather, a modified version of it.  */
146938fd1498Szrj   const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
147038fd1498Szrj 						 loc_readers.address (),
147138fd1498Szrj 						 num_locs, &ranges, type);
147238fd1498Szrj   if (err)
147338fd1498Szrj     return err;
147438fd1498Szrj 
147538fd1498Szrj   /* Success: "ranges" should now contain information on the string.  */
147638fd1498Szrj   return NULL;
147738fd1498Szrj }
147838fd1498Szrj 
147938fd1498Szrj /* Attempt to populate *OUT_LOC with source location information on the
148038fd1498Szrj    given characters within the string literal found at STRLOC.
148138fd1498Szrj    CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
148238fd1498Szrj    character set.
148338fd1498Szrj 
148438fd1498Szrj    For example, given CARET_IDX = 4, START_IDX = 3, END_IDX  = 7
148538fd1498Szrj    and string literal "012345\n789"
148638fd1498Szrj    *OUT_LOC is written to with:
148738fd1498Szrj      "012345\n789"
148838fd1498Szrj          ~^~~~~
148938fd1498Szrj 
149038fd1498Szrj    If CONCATS is non-NULL, then any string literals that the token at
149138fd1498Szrj    STRLOC was concatenated with are also considered.
149238fd1498Szrj 
149338fd1498Szrj    This is implemented by re-parsing the relevant source line(s).
149438fd1498Szrj 
149538fd1498Szrj    Return NULL if successful, or an error message if any errors occurred.
149638fd1498Szrj    Error messages are intended for GCC developers (to help debugging) rather
149738fd1498Szrj    than for end-users.  */
149838fd1498Szrj 
149938fd1498Szrj const char *
get_source_location_for_substring(cpp_reader * pfile,string_concat_db * concats,location_t strloc,enum cpp_ttype type,int caret_idx,int start_idx,int end_idx,source_location * out_loc)150038fd1498Szrj get_source_location_for_substring (cpp_reader *pfile,
150138fd1498Szrj 				   string_concat_db *concats,
150238fd1498Szrj 				   location_t strloc,
150338fd1498Szrj 				   enum cpp_ttype type,
150438fd1498Szrj 				   int caret_idx, int start_idx, int end_idx,
150538fd1498Szrj 				   source_location *out_loc)
150638fd1498Szrj {
150738fd1498Szrj   gcc_checking_assert (caret_idx >= 0);
150838fd1498Szrj   gcc_checking_assert (start_idx >= 0);
150938fd1498Szrj   gcc_checking_assert (end_idx >= 0);
151038fd1498Szrj   gcc_assert (out_loc);
151138fd1498Szrj 
151238fd1498Szrj   cpp_substring_ranges ranges;
151338fd1498Szrj   const char *err
151438fd1498Szrj     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
151538fd1498Szrj   if (err)
151638fd1498Szrj     return err;
151738fd1498Szrj 
151838fd1498Szrj   if (caret_idx >= ranges.get_num_ranges ())
151938fd1498Szrj     return "caret_idx out of range";
152038fd1498Szrj   if (start_idx >= ranges.get_num_ranges ())
152138fd1498Szrj     return "start_idx out of range";
152238fd1498Szrj   if (end_idx >= ranges.get_num_ranges ())
152338fd1498Szrj     return "end_idx out of range";
152438fd1498Szrj 
152538fd1498Szrj   *out_loc = make_location (ranges.get_range (caret_idx).m_start,
152638fd1498Szrj 			    ranges.get_range (start_idx).m_start,
152738fd1498Szrj 			    ranges.get_range (end_idx).m_finish);
152838fd1498Szrj   return NULL;
152938fd1498Szrj }
153038fd1498Szrj 
153138fd1498Szrj #if CHECKING_P
153238fd1498Szrj 
153338fd1498Szrj namespace selftest {
153438fd1498Szrj 
153538fd1498Szrj /* Selftests of location handling.  */
153638fd1498Szrj 
153738fd1498Szrj /* Attempt to populate *OUT_RANGE with source location information on the
153838fd1498Szrj    given character within the string literal found at STRLOC.
153938fd1498Szrj    CHAR_IDX refers to an offset within the execution character set.
154038fd1498Szrj    If CONCATS is non-NULL, then any string literals that the token at
154138fd1498Szrj    STRLOC was concatenated with are also considered.
154238fd1498Szrj 
154338fd1498Szrj    This is implemented by re-parsing the relevant source line(s).
154438fd1498Szrj 
154538fd1498Szrj    Return NULL if successful, or an error message if any errors occurred.
154638fd1498Szrj    Error messages are intended for GCC developers (to help debugging) rather
154738fd1498Szrj    than for end-users.  */
154838fd1498Szrj 
154938fd1498Szrj static const char *
get_source_range_for_char(cpp_reader * pfile,string_concat_db * concats,location_t strloc,enum cpp_ttype type,int char_idx,source_range * out_range)155038fd1498Szrj get_source_range_for_char (cpp_reader *pfile,
155138fd1498Szrj 			   string_concat_db *concats,
155238fd1498Szrj 			   location_t strloc,
155338fd1498Szrj 			   enum cpp_ttype type,
155438fd1498Szrj 			   int char_idx,
155538fd1498Szrj 			   source_range *out_range)
155638fd1498Szrj {
155738fd1498Szrj   gcc_checking_assert (char_idx >= 0);
155838fd1498Szrj   gcc_assert (out_range);
155938fd1498Szrj 
156038fd1498Szrj   cpp_substring_ranges ranges;
156138fd1498Szrj   const char *err
156238fd1498Szrj     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
156338fd1498Szrj   if (err)
156438fd1498Szrj     return err;
156538fd1498Szrj 
156638fd1498Szrj   if (char_idx >= ranges.get_num_ranges ())
156738fd1498Szrj     return "char_idx out of range";
156838fd1498Szrj 
156938fd1498Szrj   *out_range = ranges.get_range (char_idx);
157038fd1498Szrj   return NULL;
157138fd1498Szrj }
157238fd1498Szrj 
157338fd1498Szrj /* As get_source_range_for_char, but write to *OUT the number
157438fd1498Szrj    of ranges that are available.  */
157538fd1498Szrj 
157638fd1498Szrj static const char *
get_num_source_ranges_for_substring(cpp_reader * pfile,string_concat_db * concats,location_t strloc,enum cpp_ttype type,int * out)157738fd1498Szrj get_num_source_ranges_for_substring (cpp_reader *pfile,
157838fd1498Szrj 				     string_concat_db *concats,
157938fd1498Szrj 				     location_t strloc,
158038fd1498Szrj 				     enum cpp_ttype type,
158138fd1498Szrj 				     int *out)
158238fd1498Szrj {
158338fd1498Szrj   gcc_assert (out);
158438fd1498Szrj 
158538fd1498Szrj   cpp_substring_ranges ranges;
158638fd1498Szrj   const char *err
158738fd1498Szrj     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
158838fd1498Szrj 
158938fd1498Szrj   if (err)
159038fd1498Szrj     return err;
159138fd1498Szrj 
159238fd1498Szrj   *out = ranges.get_num_ranges ();
159338fd1498Szrj   return NULL;
159438fd1498Szrj }
159538fd1498Szrj 
159638fd1498Szrj /* Selftests of location handling.  */
159738fd1498Szrj 
159838fd1498Szrj /* Verify that compare() on linenum_type handles comparisons over the full
159938fd1498Szrj    range of the type.  */
160038fd1498Szrj 
160138fd1498Szrj static void
test_linenum_comparisons()160238fd1498Szrj test_linenum_comparisons ()
160338fd1498Szrj {
160438fd1498Szrj   linenum_type min_line (0);
160538fd1498Szrj   linenum_type max_line (0xffffffff);
160638fd1498Szrj   ASSERT_EQ (0, compare (min_line, min_line));
160738fd1498Szrj   ASSERT_EQ (0, compare (max_line, max_line));
160838fd1498Szrj 
160938fd1498Szrj   ASSERT_GT (compare (max_line, min_line), 0);
161038fd1498Szrj   ASSERT_LT (compare (min_line, max_line), 0);
161138fd1498Szrj }
161238fd1498Szrj 
161338fd1498Szrj /* Helper function for verifying location data: when location_t
161438fd1498Szrj    values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
161538fd1498Szrj    as having column 0.  */
161638fd1498Szrj 
161738fd1498Szrj static bool
should_have_column_data_p(location_t loc)161838fd1498Szrj should_have_column_data_p (location_t loc)
161938fd1498Szrj {
162038fd1498Szrj   if (IS_ADHOC_LOC (loc))
162138fd1498Szrj     loc = get_location_from_adhoc_loc (line_table, loc);
162238fd1498Szrj   if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
162338fd1498Szrj     return false;
162438fd1498Szrj   return true;
162538fd1498Szrj }
162638fd1498Szrj 
162738fd1498Szrj /* Selftest for should_have_column_data_p.  */
162838fd1498Szrj 
162938fd1498Szrj static void
test_should_have_column_data_p()163038fd1498Szrj test_should_have_column_data_p ()
163138fd1498Szrj {
163238fd1498Szrj   ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
163338fd1498Szrj   ASSERT_TRUE
163438fd1498Szrj     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
163538fd1498Szrj   ASSERT_FALSE
163638fd1498Szrj     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
163738fd1498Szrj }
163838fd1498Szrj 
163938fd1498Szrj /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
164038fd1498Szrj    on LOC.  */
164138fd1498Szrj 
164238fd1498Szrj static void
assert_loceq(const char * exp_filename,int exp_linenum,int exp_colnum,location_t loc)164338fd1498Szrj assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
164438fd1498Szrj 	      location_t loc)
164538fd1498Szrj {
164638fd1498Szrj   ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
164738fd1498Szrj   ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
164838fd1498Szrj   /* If location_t values are sufficiently high, then column numbers
164938fd1498Szrj      will be unavailable and LOCATION_COLUMN (loc) will be 0.
165038fd1498Szrj      When close to the threshold, column numbers *may* be present: if
165138fd1498Szrj      the final linemap before the threshold contains a line that straddles
165238fd1498Szrj      the threshold, locations in that line have column information.  */
165338fd1498Szrj   if (should_have_column_data_p (loc))
165438fd1498Szrj     ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
165538fd1498Szrj }
165638fd1498Szrj 
165738fd1498Szrj /* Various selftests involve constructing a line table and one or more
165838fd1498Szrj    line maps within it.
165938fd1498Szrj 
166038fd1498Szrj    For maximum test coverage we want to run these tests with a variety
166138fd1498Szrj    of situations:
166238fd1498Szrj    - line_table->default_range_bits: some frontends use a non-zero value
166338fd1498Szrj    and others use zero
166438fd1498Szrj    - the fallback modes within line-map.c: there are various threshold
166538fd1498Szrj    values for source_location/location_t beyond line-map.c changes
166638fd1498Szrj    behavior (disabling of the range-packing optimization, disabling
166738fd1498Szrj    of column-tracking).  We can exercise these by starting the line_table
166838fd1498Szrj    at interesting values at or near these thresholds.
166938fd1498Szrj 
167038fd1498Szrj    The following struct describes a particular case within our test
167138fd1498Szrj    matrix.  */
167238fd1498Szrj 
167338fd1498Szrj struct line_table_case
167438fd1498Szrj {
line_table_caseline_table_case167538fd1498Szrj   line_table_case (int default_range_bits, int base_location)
167638fd1498Szrj   : m_default_range_bits (default_range_bits),
167738fd1498Szrj     m_base_location (base_location)
167838fd1498Szrj   {}
167938fd1498Szrj 
168038fd1498Szrj   int m_default_range_bits;
168138fd1498Szrj   int m_base_location;
168238fd1498Szrj };
168338fd1498Szrj 
168438fd1498Szrj /* Constructor.  Store the old value of line_table, and create a new
168538fd1498Szrj    one, using sane defaults.  */
168638fd1498Szrj 
line_table_test()168738fd1498Szrj line_table_test::line_table_test ()
168838fd1498Szrj {
168938fd1498Szrj   gcc_assert (saved_line_table == NULL);
169038fd1498Szrj   saved_line_table = line_table;
169138fd1498Szrj   line_table = ggc_alloc<line_maps> ();
169238fd1498Szrj   linemap_init (line_table, BUILTINS_LOCATION);
169338fd1498Szrj   gcc_assert (saved_line_table->reallocator);
169438fd1498Szrj   line_table->reallocator = saved_line_table->reallocator;
169538fd1498Szrj   gcc_assert (saved_line_table->round_alloc_size);
169638fd1498Szrj   line_table->round_alloc_size = saved_line_table->round_alloc_size;
169738fd1498Szrj   line_table->default_range_bits = 0;
169838fd1498Szrj }
169938fd1498Szrj 
170038fd1498Szrj /* Constructor.  Store the old value of line_table, and create a new
170138fd1498Szrj    one, using the sitation described in CASE_.  */
170238fd1498Szrj 
line_table_test(const line_table_case & case_)170338fd1498Szrj line_table_test::line_table_test (const line_table_case &case_)
170438fd1498Szrj {
170538fd1498Szrj   gcc_assert (saved_line_table == NULL);
170638fd1498Szrj   saved_line_table = line_table;
170738fd1498Szrj   line_table = ggc_alloc<line_maps> ();
170838fd1498Szrj   linemap_init (line_table, BUILTINS_LOCATION);
170938fd1498Szrj   gcc_assert (saved_line_table->reallocator);
171038fd1498Szrj   line_table->reallocator = saved_line_table->reallocator;
171138fd1498Szrj   gcc_assert (saved_line_table->round_alloc_size);
171238fd1498Szrj   line_table->round_alloc_size = saved_line_table->round_alloc_size;
171338fd1498Szrj   line_table->default_range_bits = case_.m_default_range_bits;
171438fd1498Szrj   if (case_.m_base_location)
171538fd1498Szrj     {
171638fd1498Szrj       line_table->highest_location = case_.m_base_location;
171738fd1498Szrj       line_table->highest_line = case_.m_base_location;
171838fd1498Szrj     }
171938fd1498Szrj }
172038fd1498Szrj 
172138fd1498Szrj /* Destructor.  Restore the old value of line_table.  */
172238fd1498Szrj 
~line_table_test()172338fd1498Szrj line_table_test::~line_table_test ()
172438fd1498Szrj {
172538fd1498Szrj   gcc_assert (saved_line_table != NULL);
172638fd1498Szrj   line_table = saved_line_table;
172738fd1498Szrj   saved_line_table = NULL;
172838fd1498Szrj }
172938fd1498Szrj 
173038fd1498Szrj /* Verify basic operation of ordinary linemaps.  */
173138fd1498Szrj 
173238fd1498Szrj static void
test_accessing_ordinary_linemaps(const line_table_case & case_)173338fd1498Szrj test_accessing_ordinary_linemaps (const line_table_case &case_)
173438fd1498Szrj {
173538fd1498Szrj   line_table_test ltt (case_);
173638fd1498Szrj 
173738fd1498Szrj   /* Build a simple linemap describing some locations. */
173838fd1498Szrj   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
173938fd1498Szrj 
174038fd1498Szrj   linemap_line_start (line_table, 1, 100);
174138fd1498Szrj   location_t loc_a = linemap_position_for_column (line_table, 1);
174238fd1498Szrj   location_t loc_b = linemap_position_for_column (line_table, 23);
174338fd1498Szrj 
174438fd1498Szrj   linemap_line_start (line_table, 2, 100);
174538fd1498Szrj   location_t loc_c = linemap_position_for_column (line_table, 1);
174638fd1498Szrj   location_t loc_d = linemap_position_for_column (line_table, 17);
174738fd1498Szrj 
174838fd1498Szrj   /* Example of a very long line.  */
174938fd1498Szrj   linemap_line_start (line_table, 3, 2000);
175038fd1498Szrj   location_t loc_e = linemap_position_for_column (line_table, 700);
175138fd1498Szrj 
175238fd1498Szrj   /* Transitioning back to a short line.  */
175338fd1498Szrj   linemap_line_start (line_table, 4, 0);
175438fd1498Szrj   location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
175538fd1498Szrj 
175638fd1498Szrj   if (should_have_column_data_p (loc_back_to_short))
175738fd1498Szrj     {
175838fd1498Szrj       /* Verify that we switched to short lines in the linemap.  */
175938fd1498Szrj       line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
176038fd1498Szrj       ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
176138fd1498Szrj     }
176238fd1498Szrj 
176338fd1498Szrj   /* Example of a line that will eventually be seen to be longer
176438fd1498Szrj      than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
176538fd1498Szrj      below that.  */
176638fd1498Szrj   linemap_line_start (line_table, 5, 2000);
176738fd1498Szrj 
176838fd1498Szrj   location_t loc_start_of_very_long_line
176938fd1498Szrj     = linemap_position_for_column (line_table, 2000);
177038fd1498Szrj   location_t loc_too_wide
177138fd1498Szrj     = linemap_position_for_column (line_table, 4097);
177238fd1498Szrj   location_t loc_too_wide_2
177338fd1498Szrj     = linemap_position_for_column (line_table, 4098);
177438fd1498Szrj 
177538fd1498Szrj   /* ...and back to a sane line length.  */
177638fd1498Szrj   linemap_line_start (line_table, 6, 100);
177738fd1498Szrj   location_t loc_sane_again = linemap_position_for_column (line_table, 10);
177838fd1498Szrj 
177938fd1498Szrj   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
178038fd1498Szrj 
178138fd1498Szrj   /* Multiple files.  */
178238fd1498Szrj   linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
178338fd1498Szrj   linemap_line_start (line_table, 1, 200);
178438fd1498Szrj   location_t loc_f = linemap_position_for_column (line_table, 150);
178538fd1498Szrj   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
178638fd1498Szrj 
178738fd1498Szrj   /* Verify that we can recover the location info.  */
178838fd1498Szrj   assert_loceq ("foo.c", 1, 1, loc_a);
178938fd1498Szrj   assert_loceq ("foo.c", 1, 23, loc_b);
179038fd1498Szrj   assert_loceq ("foo.c", 2, 1, loc_c);
179138fd1498Szrj   assert_loceq ("foo.c", 2, 17, loc_d);
179238fd1498Szrj   assert_loceq ("foo.c", 3, 700, loc_e);
179338fd1498Szrj   assert_loceq ("foo.c", 4, 100, loc_back_to_short);
179438fd1498Szrj 
179538fd1498Szrj   /* In the very wide line, the initial location should be fully tracked.  */
179638fd1498Szrj   assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
179738fd1498Szrj   /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
179838fd1498Szrj      be disabled.  */
179938fd1498Szrj   assert_loceq ("foo.c", 5, 0, loc_too_wide);
180038fd1498Szrj   assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
180138fd1498Szrj   /*...and column-tracking should be re-enabled for subsequent lines.  */
180238fd1498Szrj   assert_loceq ("foo.c", 6, 10, loc_sane_again);
180338fd1498Szrj 
180438fd1498Szrj   assert_loceq ("bar.c", 1, 150, loc_f);
180538fd1498Szrj 
180638fd1498Szrj   ASSERT_FALSE (is_location_from_builtin_token (loc_a));
180738fd1498Szrj   ASSERT_TRUE (pure_location_p (line_table, loc_a));
180838fd1498Szrj 
180938fd1498Szrj   /* Verify using make_location to build a range, and extracting data
181038fd1498Szrj      back from it.  */
181138fd1498Szrj   location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
181238fd1498Szrj   ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
181338fd1498Szrj   ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
181438fd1498Szrj   source_range src_range = get_range_from_loc (line_table, range_c_b_d);
181538fd1498Szrj   ASSERT_EQ (loc_b, src_range.m_start);
181638fd1498Szrj   ASSERT_EQ (loc_d, src_range.m_finish);
181738fd1498Szrj }
181838fd1498Szrj 
181938fd1498Szrj /* Verify various properties of UNKNOWN_LOCATION.  */
182038fd1498Szrj 
182138fd1498Szrj static void
test_unknown_location()182238fd1498Szrj test_unknown_location ()
182338fd1498Szrj {
182438fd1498Szrj   ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
182538fd1498Szrj   ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
182638fd1498Szrj   ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
182738fd1498Szrj }
182838fd1498Szrj 
182938fd1498Szrj /* Verify various properties of BUILTINS_LOCATION.  */
183038fd1498Szrj 
183138fd1498Szrj static void
test_builtins()183238fd1498Szrj test_builtins ()
183338fd1498Szrj {
183438fd1498Szrj   assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
183538fd1498Szrj   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
183638fd1498Szrj }
183738fd1498Szrj 
183838fd1498Szrj /* Regression test for make_location.
183938fd1498Szrj    Ensure that we use pure locations for the start/finish of the range,
184038fd1498Szrj    rather than storing a packed or ad-hoc range as the start/finish.  */
184138fd1498Szrj 
184238fd1498Szrj static void
test_make_location_nonpure_range_endpoints(const line_table_case & case_)184338fd1498Szrj test_make_location_nonpure_range_endpoints (const line_table_case &case_)
184438fd1498Szrj {
184538fd1498Szrj   /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
184638fd1498Szrj      with C++ frontend.
184738fd1498Szrj      ....................0000000001111111111222.
184838fd1498Szrj      ....................1234567890123456789012.  */
184938fd1498Szrj   const char *content = "     r += !aaa == bbb;\n";
185038fd1498Szrj   temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
185138fd1498Szrj   line_table_test ltt (case_);
185238fd1498Szrj   linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
185338fd1498Szrj 
185438fd1498Szrj   const location_t c11 = linemap_position_for_column (line_table, 11);
185538fd1498Szrj   const location_t c12 = linemap_position_for_column (line_table, 12);
185638fd1498Szrj   const location_t c13 = linemap_position_for_column (line_table, 13);
185738fd1498Szrj   const location_t c14 = linemap_position_for_column (line_table, 14);
185838fd1498Szrj   const location_t c21 = linemap_position_for_column (line_table, 21);
185938fd1498Szrj 
186038fd1498Szrj   if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
186138fd1498Szrj     return;
186238fd1498Szrj 
186338fd1498Szrj   /* Use column 13 for the caret location, arbitrarily, to verify that we
186438fd1498Szrj      handle start != caret.  */
186538fd1498Szrj   const location_t aaa = make_location (c13, c12, c14);
186638fd1498Szrj   ASSERT_EQ (c13, get_pure_location (aaa));
186738fd1498Szrj   ASSERT_EQ (c12, get_start (aaa));
186838fd1498Szrj   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
186938fd1498Szrj   ASSERT_EQ (c14, get_finish (aaa));
187038fd1498Szrj   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
187138fd1498Szrj 
187238fd1498Szrj   /* Make a location using a location with a range as the start-point.  */
187338fd1498Szrj   const location_t not_aaa = make_location (c11, aaa, c14);
187438fd1498Szrj   ASSERT_EQ (c11, get_pure_location (not_aaa));
187538fd1498Szrj   /* It should use the start location of the range, not store the range
187638fd1498Szrj      itself.  */
187738fd1498Szrj   ASSERT_EQ (c12, get_start (not_aaa));
187838fd1498Szrj   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
187938fd1498Szrj   ASSERT_EQ (c14, get_finish (not_aaa));
188038fd1498Szrj   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
188138fd1498Szrj 
188238fd1498Szrj   /* Similarly, make a location with a range as the end-point.  */
188338fd1498Szrj   const location_t aaa_eq_bbb = make_location (c12, c12, c21);
188438fd1498Szrj   ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
188538fd1498Szrj   ASSERT_EQ (c12, get_start (aaa_eq_bbb));
188638fd1498Szrj   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
188738fd1498Szrj   ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
188838fd1498Szrj   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
188938fd1498Szrj   const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
189038fd1498Szrj   /* It should use the finish location of the range, not store the range
189138fd1498Szrj      itself.  */
189238fd1498Szrj   ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
189338fd1498Szrj   ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
189438fd1498Szrj   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
189538fd1498Szrj   ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
189638fd1498Szrj   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
189738fd1498Szrj }
189838fd1498Szrj 
189938fd1498Szrj /* Verify reading of input files (e.g. for caret-based diagnostics).  */
190038fd1498Szrj 
190138fd1498Szrj static void
test_reading_source_line()190238fd1498Szrj test_reading_source_line ()
190338fd1498Szrj {
190438fd1498Szrj   /* Create a tempfile and write some text to it.  */
190538fd1498Szrj   temp_source_file tmp (SELFTEST_LOCATION, ".txt",
190638fd1498Szrj 			"01234567890123456789\n"
190738fd1498Szrj 			"This is the test text\n"
190838fd1498Szrj 			"This is the 3rd line");
190938fd1498Szrj 
191038fd1498Szrj   /* Read back a specific line from the tempfile.  */
191138fd1498Szrj   int line_size;
191238fd1498Szrj   const char *source_line = location_get_source_line (tmp.get_filename (),
191338fd1498Szrj 						      3, &line_size);
191438fd1498Szrj   ASSERT_TRUE (source_line != NULL);
191538fd1498Szrj   ASSERT_EQ (20, line_size);
191638fd1498Szrj   ASSERT_TRUE (!strncmp ("This is the 3rd line",
191738fd1498Szrj 			 source_line, line_size));
191838fd1498Szrj 
191938fd1498Szrj   source_line = location_get_source_line (tmp.get_filename (),
192038fd1498Szrj 					  2, &line_size);
192138fd1498Szrj   ASSERT_TRUE (source_line != NULL);
192238fd1498Szrj   ASSERT_EQ (21, line_size);
192338fd1498Szrj   ASSERT_TRUE (!strncmp ("This is the test text",
192438fd1498Szrj 			 source_line, line_size));
192538fd1498Szrj 
192638fd1498Szrj   source_line = location_get_source_line (tmp.get_filename (),
192738fd1498Szrj 					  4, &line_size);
192838fd1498Szrj   ASSERT_TRUE (source_line == NULL);
192938fd1498Szrj }
193038fd1498Szrj 
193138fd1498Szrj /* Tests of lexing.  */
193238fd1498Szrj 
193338fd1498Szrj /* Verify that token TOK from PARSER has cpp_token_as_text
193438fd1498Szrj    equal to EXPECTED_TEXT.  */
193538fd1498Szrj 
193638fd1498Szrj #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT)		\
193738fd1498Szrj   SELFTEST_BEGIN_STMT							\
193838fd1498Szrj     unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK));	\
193938fd1498Szrj     ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt);		\
194038fd1498Szrj   SELFTEST_END_STMT
194138fd1498Szrj 
194238fd1498Szrj /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
194338fd1498Szrj    and ranges from EXP_START_COL to EXP_FINISH_COL.
194438fd1498Szrj    Use LOC as the effective location of the selftest.  */
194538fd1498Szrj 
194638fd1498Szrj static void
assert_token_loc_eq(const location & loc,const cpp_token * tok,const char * exp_filename,int exp_linenum,int exp_start_col,int exp_finish_col)194738fd1498Szrj assert_token_loc_eq (const location &loc,
194838fd1498Szrj 		     const cpp_token *tok,
194938fd1498Szrj 		     const char *exp_filename, int exp_linenum,
195038fd1498Szrj 		     int exp_start_col, int exp_finish_col)
195138fd1498Szrj {
195238fd1498Szrj   location_t tok_loc = tok->src_loc;
195338fd1498Szrj   ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
195438fd1498Szrj   ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
195538fd1498Szrj 
195638fd1498Szrj   /* If location_t values are sufficiently high, then column numbers
195738fd1498Szrj      will be unavailable.  */
195838fd1498Szrj   if (!should_have_column_data_p (tok_loc))
195938fd1498Szrj     return;
196038fd1498Szrj 
196138fd1498Szrj   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
196238fd1498Szrj   source_range tok_range = get_range_from_loc (line_table, tok_loc);
196338fd1498Szrj   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
196438fd1498Szrj   ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
196538fd1498Szrj }
196638fd1498Szrj 
196738fd1498Szrj /* Use assert_token_loc_eq to verify the TOK->src_loc, using
196838fd1498Szrj    SELFTEST_LOCATION as the effective location of the selftest.  */
196938fd1498Szrj 
197038fd1498Szrj #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
197138fd1498Szrj 			    EXP_START_COL, EXP_FINISH_COL) \
197238fd1498Szrj   assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
197338fd1498Szrj 		       (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
197438fd1498Szrj 
197538fd1498Szrj /* Test of lexing a file using libcpp, verifying tokens and their
197638fd1498Szrj    location information.  */
197738fd1498Szrj 
197838fd1498Szrj static void
test_lexer(const line_table_case & case_)197938fd1498Szrj test_lexer (const line_table_case &case_)
198038fd1498Szrj {
198138fd1498Szrj   /* Create a tempfile and write some text to it.  */
198238fd1498Szrj   const char *content =
198338fd1498Szrj     /*00000000011111111112222222222333333.3333444444444.455555555556
198438fd1498Szrj       12345678901234567890123456789012345.6789012345678.901234567890.  */
198538fd1498Szrj     ("test_name /* c-style comment */\n"
198638fd1498Szrj      "                                  \"test literal\"\n"
198738fd1498Szrj      " // test c++-style comment\n"
198838fd1498Szrj      "   42\n");
198938fd1498Szrj   temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
199038fd1498Szrj 
199138fd1498Szrj   line_table_test ltt (case_);
199238fd1498Szrj 
199338fd1498Szrj   cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
199438fd1498Szrj 
199538fd1498Szrj   const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
199638fd1498Szrj   ASSERT_NE (fname, NULL);
199738fd1498Szrj 
199838fd1498Szrj   /* Verify that we get the expected tokens back, with the correct
199938fd1498Szrj      location information.  */
200038fd1498Szrj 
200138fd1498Szrj   location_t loc;
200238fd1498Szrj   const cpp_token *tok;
200338fd1498Szrj   tok = cpp_get_token_with_location (parser, &loc);
200438fd1498Szrj   ASSERT_NE (tok, NULL);
200538fd1498Szrj   ASSERT_EQ (tok->type, CPP_NAME);
200638fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
200738fd1498Szrj   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
200838fd1498Szrj 
200938fd1498Szrj   tok = cpp_get_token_with_location (parser, &loc);
201038fd1498Szrj   ASSERT_NE (tok, NULL);
201138fd1498Szrj   ASSERT_EQ (tok->type, CPP_STRING);
201238fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
201338fd1498Szrj   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
201438fd1498Szrj 
201538fd1498Szrj   tok = cpp_get_token_with_location (parser, &loc);
201638fd1498Szrj   ASSERT_NE (tok, NULL);
201738fd1498Szrj   ASSERT_EQ (tok->type, CPP_NUMBER);
201838fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
201938fd1498Szrj   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
202038fd1498Szrj 
202138fd1498Szrj   tok = cpp_get_token_with_location (parser, &loc);
202238fd1498Szrj   ASSERT_NE (tok, NULL);
202338fd1498Szrj   ASSERT_EQ (tok->type, CPP_EOF);
202438fd1498Szrj 
202538fd1498Szrj   cpp_finish (parser, NULL);
202638fd1498Szrj   cpp_destroy (parser);
202738fd1498Szrj }
202838fd1498Szrj 
202938fd1498Szrj /* Forward decls.  */
203038fd1498Szrj 
203138fd1498Szrj struct lexer_test;
203238fd1498Szrj class lexer_test_options;
203338fd1498Szrj 
203438fd1498Szrj /* A class for specifying options of a lexer_test.
203538fd1498Szrj    The "apply" vfunc is called during the lexer_test constructor.  */
203638fd1498Szrj 
203738fd1498Szrj class lexer_test_options
203838fd1498Szrj {
203938fd1498Szrj  public:
204038fd1498Szrj   virtual void apply (lexer_test &) = 0;
204138fd1498Szrj };
204238fd1498Szrj 
204338fd1498Szrj /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
204438fd1498Szrj    in its dtor.
204538fd1498Szrj 
204638fd1498Szrj    This is needed by struct lexer_test to ensure that the cleanup of the
204738fd1498Szrj    cpp_reader happens *after* the cleanup of the temp_source_file.  */
204838fd1498Szrj 
204938fd1498Szrj class cpp_reader_ptr
205038fd1498Szrj {
205138fd1498Szrj  public:
cpp_reader_ptr(cpp_reader * ptr)205238fd1498Szrj   cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
205338fd1498Szrj 
~cpp_reader_ptr()205438fd1498Szrj   ~cpp_reader_ptr ()
205538fd1498Szrj   {
205638fd1498Szrj     cpp_finish (m_ptr, NULL);
205738fd1498Szrj     cpp_destroy (m_ptr);
205838fd1498Szrj   }
205938fd1498Szrj 
206038fd1498Szrj   operator cpp_reader * () const { return m_ptr; }
206138fd1498Szrj 
206238fd1498Szrj  private:
206338fd1498Szrj   cpp_reader *m_ptr;
206438fd1498Szrj };
206538fd1498Szrj 
206638fd1498Szrj /* A struct for writing lexer tests.  */
206738fd1498Szrj 
206838fd1498Szrj struct lexer_test
206938fd1498Szrj {
207038fd1498Szrj   lexer_test (const line_table_case &case_, const char *content,
207138fd1498Szrj 	      lexer_test_options *options);
207238fd1498Szrj   ~lexer_test ();
207338fd1498Szrj 
207438fd1498Szrj   const cpp_token *get_token ();
207538fd1498Szrj 
207638fd1498Szrj   /* The ordering of these fields matters.
207738fd1498Szrj      The line_table_test must be first, since the cpp_reader_ptr
207838fd1498Szrj      uses it.
207938fd1498Szrj      The cpp_reader must be cleaned up *after* the temp_source_file
208038fd1498Szrj      since the filenames in input.c's input cache are owned by the
208138fd1498Szrj      cpp_reader; in particular, when ~temp_source_file evicts the
208238fd1498Szrj      filename the filenames must still be alive.  */
208338fd1498Szrj   line_table_test m_ltt;
208438fd1498Szrj   cpp_reader_ptr m_parser;
208538fd1498Szrj   temp_source_file m_tempfile;
208638fd1498Szrj   string_concat_db m_concats;
208738fd1498Szrj   bool m_implicitly_expect_EOF;
208838fd1498Szrj };
208938fd1498Szrj 
209038fd1498Szrj /* Use an EBCDIC encoding for the execution charset, specifically
209138fd1498Szrj    IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
209238fd1498Szrj 
209338fd1498Szrj    This exercises iconv integration within libcpp.
209438fd1498Szrj    Not every build of iconv supports the given charset,
209538fd1498Szrj    so we need to flag this error and handle it gracefully.  */
209638fd1498Szrj 
209738fd1498Szrj class ebcdic_execution_charset : public lexer_test_options
209838fd1498Szrj {
209938fd1498Szrj  public:
ebcdic_execution_charset()210038fd1498Szrj   ebcdic_execution_charset () : m_num_iconv_errors (0)
210138fd1498Szrj     {
210238fd1498Szrj       gcc_assert (s_singleton == NULL);
210338fd1498Szrj       s_singleton = this;
210438fd1498Szrj     }
~ebcdic_execution_charset()210538fd1498Szrj   ~ebcdic_execution_charset ()
210638fd1498Szrj     {
210738fd1498Szrj       gcc_assert (s_singleton == this);
210838fd1498Szrj       s_singleton = NULL;
210938fd1498Szrj     }
211038fd1498Szrj 
apply(lexer_test & test)211138fd1498Szrj   void apply (lexer_test &test) FINAL OVERRIDE
211238fd1498Szrj   {
211338fd1498Szrj     cpp_options *cpp_opts = cpp_get_options (test.m_parser);
211438fd1498Szrj     cpp_opts->narrow_charset = "IBM1047";
211538fd1498Szrj 
211638fd1498Szrj     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
211738fd1498Szrj     callbacks->error = on_error;
211838fd1498Szrj   }
211938fd1498Szrj 
on_error(cpp_reader * pfile ATTRIBUTE_UNUSED,int level ATTRIBUTE_UNUSED,int reason ATTRIBUTE_UNUSED,rich_location * richloc ATTRIBUTE_UNUSED,const char * msgid,va_list * ap ATTRIBUTE_UNUSED)212038fd1498Szrj   static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
212138fd1498Szrj 			int level ATTRIBUTE_UNUSED,
212238fd1498Szrj 			int reason ATTRIBUTE_UNUSED,
212338fd1498Szrj 			rich_location *richloc ATTRIBUTE_UNUSED,
212438fd1498Szrj 			const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
212538fd1498Szrj     ATTRIBUTE_FPTR_PRINTF(5,0)
212638fd1498Szrj   {
212738fd1498Szrj     gcc_assert (s_singleton);
212838fd1498Szrj     /* Avoid exgettext from picking this up, it is translated in libcpp.  */
212938fd1498Szrj     const char *msg = "conversion from %s to %s not supported by iconv";
213038fd1498Szrj #ifdef ENABLE_NLS
213138fd1498Szrj     msg = dgettext ("cpplib", msg);
213238fd1498Szrj #endif
213338fd1498Szrj     /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
213438fd1498Szrj        when the local iconv build doesn't support the conversion.  */
213538fd1498Szrj     if (strcmp (msgid, msg) == 0)
213638fd1498Szrj       {
213738fd1498Szrj 	s_singleton->m_num_iconv_errors++;
213838fd1498Szrj 	return true;
213938fd1498Szrj       }
214038fd1498Szrj 
214138fd1498Szrj     /* Otherwise, we have an unexpected error.  */
214238fd1498Szrj     abort ();
214338fd1498Szrj   }
214438fd1498Szrj 
iconv_errors_occurred_p()214538fd1498Szrj   bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
214638fd1498Szrj 
214738fd1498Szrj  private:
214838fd1498Szrj   static ebcdic_execution_charset *s_singleton;
214938fd1498Szrj   int m_num_iconv_errors;
215038fd1498Szrj };
215138fd1498Szrj 
215238fd1498Szrj ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
215338fd1498Szrj 
215438fd1498Szrj /* A lexer_test_options subclass that records a list of error
215538fd1498Szrj    messages emitted by the lexer.  */
215638fd1498Szrj 
215738fd1498Szrj class lexer_error_sink : public lexer_test_options
215838fd1498Szrj {
215938fd1498Szrj  public:
lexer_error_sink()216038fd1498Szrj   lexer_error_sink ()
216138fd1498Szrj   {
216238fd1498Szrj     gcc_assert (s_singleton == NULL);
216338fd1498Szrj     s_singleton = this;
216438fd1498Szrj   }
~lexer_error_sink()216538fd1498Szrj   ~lexer_error_sink ()
216638fd1498Szrj   {
216738fd1498Szrj     gcc_assert (s_singleton == this);
216838fd1498Szrj     s_singleton = NULL;
216938fd1498Szrj 
217038fd1498Szrj     int i;
217138fd1498Szrj     char *str;
217238fd1498Szrj     FOR_EACH_VEC_ELT (m_errors, i, str)
217338fd1498Szrj       free (str);
217438fd1498Szrj   }
217538fd1498Szrj 
apply(lexer_test & test)217638fd1498Szrj   void apply (lexer_test &test) FINAL OVERRIDE
217738fd1498Szrj   {
217838fd1498Szrj     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
217938fd1498Szrj     callbacks->error = on_error;
218038fd1498Szrj   }
218138fd1498Szrj 
on_error(cpp_reader * pfile ATTRIBUTE_UNUSED,int level ATTRIBUTE_UNUSED,int reason ATTRIBUTE_UNUSED,rich_location * richloc ATTRIBUTE_UNUSED,const char * msgid,va_list * ap)218238fd1498Szrj   static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
218338fd1498Szrj 			int level ATTRIBUTE_UNUSED,
218438fd1498Szrj 			int reason ATTRIBUTE_UNUSED,
218538fd1498Szrj 			rich_location *richloc ATTRIBUTE_UNUSED,
218638fd1498Szrj 			const char *msgid, va_list *ap)
218738fd1498Szrj     ATTRIBUTE_FPTR_PRINTF(5,0)
218838fd1498Szrj   {
218938fd1498Szrj     char *msg = xvasprintf (msgid, *ap);
219038fd1498Szrj     s_singleton->m_errors.safe_push (msg);
219138fd1498Szrj     return true;
219238fd1498Szrj   }
219338fd1498Szrj 
219438fd1498Szrj   auto_vec<char *> m_errors;
219538fd1498Szrj 
219638fd1498Szrj  private:
219738fd1498Szrj   static lexer_error_sink *s_singleton;
219838fd1498Szrj };
219938fd1498Szrj 
220038fd1498Szrj lexer_error_sink *lexer_error_sink::s_singleton;
220138fd1498Szrj 
220238fd1498Szrj /* Constructor.  Override line_table with a new instance based on CASE_,
220338fd1498Szrj    and write CONTENT to a tempfile.  Create a cpp_reader, and use it to
220438fd1498Szrj    start parsing the tempfile.  */
220538fd1498Szrj 
lexer_test(const line_table_case & case_,const char * content,lexer_test_options * options)220638fd1498Szrj lexer_test::lexer_test (const line_table_case &case_, const char *content,
220738fd1498Szrj 			lexer_test_options *options)
220838fd1498Szrj : m_ltt (case_),
220938fd1498Szrj   m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
221038fd1498Szrj   /* Create a tempfile and write the text to it.  */
221138fd1498Szrj   m_tempfile (SELFTEST_LOCATION, ".c", content),
221238fd1498Szrj   m_concats (),
221338fd1498Szrj   m_implicitly_expect_EOF (true)
221438fd1498Szrj {
221538fd1498Szrj   if (options)
221638fd1498Szrj     options->apply (*this);
221738fd1498Szrj 
221838fd1498Szrj   cpp_init_iconv (m_parser);
221938fd1498Szrj 
222038fd1498Szrj   /* Parse the file.  */
222138fd1498Szrj   const char *fname = cpp_read_main_file (m_parser,
222238fd1498Szrj 					  m_tempfile.get_filename ());
222338fd1498Szrj   ASSERT_NE (fname, NULL);
222438fd1498Szrj }
222538fd1498Szrj 
222638fd1498Szrj /* Destructor.  By default, verify that the next token in m_parser is EOF.  */
222738fd1498Szrj 
~lexer_test()222838fd1498Szrj lexer_test::~lexer_test ()
222938fd1498Szrj {
223038fd1498Szrj   location_t loc;
223138fd1498Szrj   const cpp_token *tok;
223238fd1498Szrj 
223338fd1498Szrj   if (m_implicitly_expect_EOF)
223438fd1498Szrj     {
223538fd1498Szrj       tok = cpp_get_token_with_location (m_parser, &loc);
223638fd1498Szrj       ASSERT_NE (tok, NULL);
223738fd1498Szrj       ASSERT_EQ (tok->type, CPP_EOF);
223838fd1498Szrj     }
223938fd1498Szrj }
224038fd1498Szrj 
224138fd1498Szrj /* Get the next token from m_parser.  */
224238fd1498Szrj 
224338fd1498Szrj const cpp_token *
get_token()224438fd1498Szrj lexer_test::get_token ()
224538fd1498Szrj {
224638fd1498Szrj   location_t loc;
224738fd1498Szrj   const cpp_token *tok;
224838fd1498Szrj 
224938fd1498Szrj   tok = cpp_get_token_with_location (m_parser, &loc);
225038fd1498Szrj   ASSERT_NE (tok, NULL);
225138fd1498Szrj   return tok;
225238fd1498Szrj }
225338fd1498Szrj 
225438fd1498Szrj /* Verify that locations within string literals are correctly handled.  */
225538fd1498Szrj 
225638fd1498Szrj /* Verify get_source_range_for_substring for token(s) at STRLOC,
225738fd1498Szrj    using the string concatenation database for TEST.
225838fd1498Szrj 
225938fd1498Szrj    Assert that the character at index IDX is on EXPECTED_LINE,
226038fd1498Szrj    and that it begins at column EXPECTED_START_COL and ends at
226138fd1498Szrj    EXPECTED_FINISH_COL (unless the locations are beyond
226238fd1498Szrj    LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
226338fd1498Szrj    columns).  */
226438fd1498Szrj 
226538fd1498Szrj static void
assert_char_at_range(const location & loc,lexer_test & test,location_t strloc,enum cpp_ttype type,int idx,int expected_line,int expected_start_col,int expected_finish_col)226638fd1498Szrj assert_char_at_range (const location &loc,
226738fd1498Szrj 		      lexer_test& test,
226838fd1498Szrj 		      location_t strloc, enum cpp_ttype type, int idx,
226938fd1498Szrj 		      int expected_line, int expected_start_col,
227038fd1498Szrj 		      int expected_finish_col)
227138fd1498Szrj {
227238fd1498Szrj   cpp_reader *pfile = test.m_parser;
227338fd1498Szrj   string_concat_db *concats = &test.m_concats;
227438fd1498Szrj 
227538fd1498Szrj   source_range actual_range = source_range();
227638fd1498Szrj   const char *err
227738fd1498Szrj     = get_source_range_for_char (pfile, concats, strloc, type, idx,
227838fd1498Szrj 				 &actual_range);
227938fd1498Szrj   if (should_have_column_data_p (strloc))
228038fd1498Szrj     ASSERT_EQ_AT (loc, NULL, err);
228138fd1498Szrj   else
228238fd1498Szrj     {
228338fd1498Szrj       ASSERT_STREQ_AT (loc,
228438fd1498Szrj 		       "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
228538fd1498Szrj 		       err);
228638fd1498Szrj       return;
228738fd1498Szrj     }
228838fd1498Szrj 
228938fd1498Szrj   int actual_start_line = LOCATION_LINE (actual_range.m_start);
229038fd1498Szrj   ASSERT_EQ_AT (loc, expected_line, actual_start_line);
229138fd1498Szrj   int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
229238fd1498Szrj   ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
229338fd1498Szrj 
229438fd1498Szrj   if (should_have_column_data_p (actual_range.m_start))
229538fd1498Szrj     {
229638fd1498Szrj       int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
229738fd1498Szrj       ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
229838fd1498Szrj     }
229938fd1498Szrj   if (should_have_column_data_p (actual_range.m_finish))
230038fd1498Szrj     {
230138fd1498Szrj       int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
230238fd1498Szrj       ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
230338fd1498Szrj     }
230438fd1498Szrj }
230538fd1498Szrj 
230638fd1498Szrj /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
230738fd1498Szrj    the effective location of any errors.  */
230838fd1498Szrj 
230938fd1498Szrj #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
231038fd1498Szrj 			     EXPECTED_START_COL, EXPECTED_FINISH_COL)	\
231138fd1498Szrj   assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
231238fd1498Szrj 			(IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
231338fd1498Szrj 			(EXPECTED_FINISH_COL))
231438fd1498Szrj 
231538fd1498Szrj /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
231638fd1498Szrj    using the string concatenation database for TEST.
231738fd1498Szrj 
231838fd1498Szrj    Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES.  */
231938fd1498Szrj 
232038fd1498Szrj static void
assert_num_substring_ranges(const location & loc,lexer_test & test,location_t strloc,enum cpp_ttype type,int expected_num_ranges)232138fd1498Szrj assert_num_substring_ranges (const location &loc,
232238fd1498Szrj 			     lexer_test& test,
232338fd1498Szrj 			     location_t strloc,
232438fd1498Szrj 			     enum cpp_ttype type,
232538fd1498Szrj 			     int expected_num_ranges)
232638fd1498Szrj {
232738fd1498Szrj   cpp_reader *pfile = test.m_parser;
232838fd1498Szrj   string_concat_db *concats = &test.m_concats;
232938fd1498Szrj 
233038fd1498Szrj   int actual_num_ranges = -1;
233138fd1498Szrj   const char *err
233238fd1498Szrj     = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
233338fd1498Szrj 					   &actual_num_ranges);
233438fd1498Szrj   if (should_have_column_data_p (strloc))
233538fd1498Szrj     ASSERT_EQ_AT (loc, NULL, err);
233638fd1498Szrj   else
233738fd1498Szrj     {
233838fd1498Szrj       ASSERT_STREQ_AT (loc,
233938fd1498Szrj 		       "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
234038fd1498Szrj 		       err);
234138fd1498Szrj       return;
234238fd1498Szrj     }
234338fd1498Szrj   ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
234438fd1498Szrj }
234538fd1498Szrj 
234638fd1498Szrj /* Macro for calling assert_num_substring_ranges, supplying
234738fd1498Szrj    SELFTEST_LOCATION for the effective location of any errors.  */
234838fd1498Szrj 
234938fd1498Szrj #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
235038fd1498Szrj 				    EXPECTED_NUM_RANGES)		\
235138fd1498Szrj   assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
235238fd1498Szrj 			       (TYPE), (EXPECTED_NUM_RANGES))
235338fd1498Szrj 
235438fd1498Szrj 
235538fd1498Szrj /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
235638fd1498Szrj    returns an error (using the string concatenation database for TEST).  */
235738fd1498Szrj 
235838fd1498Szrj static void
assert_has_no_substring_ranges(const location & loc,lexer_test & test,location_t strloc,enum cpp_ttype type,const char * expected_err)235938fd1498Szrj assert_has_no_substring_ranges (const location &loc,
236038fd1498Szrj 				lexer_test& test,
236138fd1498Szrj 				location_t strloc,
236238fd1498Szrj 				enum cpp_ttype type,
236338fd1498Szrj 				const char *expected_err)
236438fd1498Szrj {
236538fd1498Szrj   cpp_reader *pfile = test.m_parser;
236638fd1498Szrj   string_concat_db *concats = &test.m_concats;
236738fd1498Szrj   cpp_substring_ranges ranges;
236838fd1498Szrj   const char *actual_err
236938fd1498Szrj     = get_substring_ranges_for_loc (pfile, concats, strloc,
237038fd1498Szrj 				    type, ranges);
237138fd1498Szrj   if (should_have_column_data_p (strloc))
237238fd1498Szrj     ASSERT_STREQ_AT (loc, expected_err, actual_err);
237338fd1498Szrj   else
237438fd1498Szrj     ASSERT_STREQ_AT (loc,
237538fd1498Szrj 		     "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
237638fd1498Szrj 		     actual_err);
237738fd1498Szrj }
237838fd1498Szrj 
237938fd1498Szrj #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR)    \
238038fd1498Szrj     assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
238138fd1498Szrj 				    (STRLOC), (TYPE), (ERR))
238238fd1498Szrj 
238338fd1498Szrj /* Lex a simple string literal.  Verify the substring location data, before
238438fd1498Szrj    and after running cpp_interpret_string on it.  */
238538fd1498Szrj 
238638fd1498Szrj static void
test_lexer_string_locations_simple(const line_table_case & case_)238738fd1498Szrj test_lexer_string_locations_simple (const line_table_case &case_)
238838fd1498Szrj {
238938fd1498Szrj   /* Digits 0-9 (with 0 at column 10), the simple way.
239038fd1498Szrj      ....................000000000.11111111112.2222222223333333333
239138fd1498Szrj      ....................123456789.01234567890.1234567890123456789
239238fd1498Szrj      We add a trailing comment to ensure that we correctly locate
239338fd1498Szrj      the end of the string literal token.  */
239438fd1498Szrj   const char *content = "        \"0123456789\" /* not a string */\n";
239538fd1498Szrj   lexer_test test (case_, content, NULL);
239638fd1498Szrj 
239738fd1498Szrj   /* Verify that we get the expected token back, with the correct
239838fd1498Szrj      location information.  */
239938fd1498Szrj   const cpp_token *tok = test.get_token ();
240038fd1498Szrj   ASSERT_EQ (tok->type, CPP_STRING);
240138fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
240238fd1498Szrj   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
240338fd1498Szrj 
240438fd1498Szrj   /* At this point in lexing, the quote characters are treated as part of
240538fd1498Szrj      the string (they are stripped off by cpp_interpret_string).  */
240638fd1498Szrj 
240738fd1498Szrj   ASSERT_EQ (tok->val.str.len, 12);
240838fd1498Szrj 
240938fd1498Szrj   /* Verify that cpp_interpret_string works.  */
241038fd1498Szrj   cpp_string dst_string;
241138fd1498Szrj   const enum cpp_ttype type = CPP_STRING;
241238fd1498Szrj   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
241338fd1498Szrj 				      &dst_string, type);
241438fd1498Szrj   ASSERT_TRUE (result);
241538fd1498Szrj   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
241638fd1498Szrj   free (const_cast <unsigned char *> (dst_string.text));
241738fd1498Szrj 
241838fd1498Szrj   /* Verify ranges of individual characters.  This no longer includes the
241938fd1498Szrj      opening quote, but does include the closing quote.  */
242038fd1498Szrj   for (int i = 0; i <= 10; i++)
242138fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
242238fd1498Szrj 			  10 + i, 10 + i);
242338fd1498Szrj 
242438fd1498Szrj   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
242538fd1498Szrj }
242638fd1498Szrj 
242738fd1498Szrj /* As test_lexer_string_locations_simple, but use an EBCDIC execution
242838fd1498Szrj    encoding.  */
242938fd1498Szrj 
243038fd1498Szrj static void
test_lexer_string_locations_ebcdic(const line_table_case & case_)243138fd1498Szrj test_lexer_string_locations_ebcdic (const line_table_case &case_)
243238fd1498Szrj {
243338fd1498Szrj   /* EBCDIC support requires iconv.  */
243438fd1498Szrj   if (!HAVE_ICONV)
243538fd1498Szrj     return;
243638fd1498Szrj 
243738fd1498Szrj   /* Digits 0-9 (with 0 at column 10), the simple way.
243838fd1498Szrj      ....................000000000.11111111112.2222222223333333333
243938fd1498Szrj      ....................123456789.01234567890.1234567890123456789
244038fd1498Szrj      We add a trailing comment to ensure that we correctly locate
244138fd1498Szrj      the end of the string literal token.  */
244238fd1498Szrj   const char *content = "        \"0123456789\" /* not a string */\n";
244338fd1498Szrj   ebcdic_execution_charset use_ebcdic;
244438fd1498Szrj   lexer_test test (case_, content, &use_ebcdic);
244538fd1498Szrj 
244638fd1498Szrj   /* Verify that we get the expected token back, with the correct
244738fd1498Szrj      location information.  */
244838fd1498Szrj   const cpp_token *tok = test.get_token ();
244938fd1498Szrj   ASSERT_EQ (tok->type, CPP_STRING);
245038fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
245138fd1498Szrj   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
245238fd1498Szrj 
245338fd1498Szrj   /* At this point in lexing, the quote characters are treated as part of
245438fd1498Szrj      the string (they are stripped off by cpp_interpret_string).  */
245538fd1498Szrj 
245638fd1498Szrj   ASSERT_EQ (tok->val.str.len, 12);
245738fd1498Szrj 
245838fd1498Szrj   /* The remainder of the test requires an iconv implementation that
245938fd1498Szrj      can convert from UTF-8 to the EBCDIC encoding requested above.  */
246038fd1498Szrj   if (use_ebcdic.iconv_errors_occurred_p ())
246138fd1498Szrj     return;
246238fd1498Szrj 
246338fd1498Szrj   /* Verify that cpp_interpret_string works.  */
246438fd1498Szrj   cpp_string dst_string;
246538fd1498Szrj   const enum cpp_ttype type = CPP_STRING;
246638fd1498Szrj   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
246738fd1498Szrj 				      &dst_string, type);
246838fd1498Szrj   ASSERT_TRUE (result);
246938fd1498Szrj   /* We should now have EBCDIC-encoded text, specifically
247038fd1498Szrj      IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
247138fd1498Szrj      The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9.  */
247238fd1498Szrj   ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
247338fd1498Szrj 		(const char *)dst_string.text);
247438fd1498Szrj   free (const_cast <unsigned char *> (dst_string.text));
247538fd1498Szrj 
247638fd1498Szrj   /* Verify that we don't attempt to record substring location information
247738fd1498Szrj      for such cases.  */
247838fd1498Szrj   ASSERT_HAS_NO_SUBSTRING_RANGES
247938fd1498Szrj     (test, tok->src_loc, type,
248038fd1498Szrj      "execution character set != source character set");
248138fd1498Szrj }
248238fd1498Szrj 
248338fd1498Szrj /* Lex a string literal containing a hex-escaped character.
248438fd1498Szrj    Verify the substring location data, before and after running
248538fd1498Szrj    cpp_interpret_string on it.  */
248638fd1498Szrj 
248738fd1498Szrj static void
test_lexer_string_locations_hex(const line_table_case & case_)248838fd1498Szrj test_lexer_string_locations_hex (const line_table_case &case_)
248938fd1498Szrj {
249038fd1498Szrj   /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
249138fd1498Szrj      and with a space in place of digit 6, to terminate the escaped
249238fd1498Szrj      hex code.
249338fd1498Szrj      ....................000000000.111111.11112222.
249438fd1498Szrj      ....................123456789.012345.67890123.  */
249538fd1498Szrj   const char *content = "        \"01234\\x35 789\"\n";
249638fd1498Szrj   lexer_test test (case_, content, NULL);
249738fd1498Szrj 
249838fd1498Szrj   /* Verify that we get the expected token back, with the correct
249938fd1498Szrj      location information.  */
250038fd1498Szrj   const cpp_token *tok = test.get_token ();
250138fd1498Szrj   ASSERT_EQ (tok->type, CPP_STRING);
250238fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
250338fd1498Szrj   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
250438fd1498Szrj 
250538fd1498Szrj   /* At this point in lexing, the quote characters are treated as part of
250638fd1498Szrj      the string (they are stripped off by cpp_interpret_string).  */
250738fd1498Szrj   ASSERT_EQ (tok->val.str.len, 15);
250838fd1498Szrj 
250938fd1498Szrj   /* Verify that cpp_interpret_string works.  */
251038fd1498Szrj   cpp_string dst_string;
251138fd1498Szrj   const enum cpp_ttype type = CPP_STRING;
251238fd1498Szrj   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
251338fd1498Szrj 				      &dst_string, type);
251438fd1498Szrj   ASSERT_TRUE (result);
251538fd1498Szrj   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
251638fd1498Szrj   free (const_cast <unsigned char *> (dst_string.text));
251738fd1498Szrj 
251838fd1498Szrj   /* Verify ranges of individual characters.  This no longer includes the
251938fd1498Szrj      opening quote, but does include the closing quote.  */
252038fd1498Szrj   for (int i = 0; i <= 4; i++)
252138fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
252238fd1498Szrj   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
252338fd1498Szrj   for (int i = 6; i <= 10; i++)
252438fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
252538fd1498Szrj 
252638fd1498Szrj   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
252738fd1498Szrj }
252838fd1498Szrj 
252938fd1498Szrj /* Lex a string literal containing an octal-escaped character.
253038fd1498Szrj    Verify the substring location data after running cpp_interpret_string
253138fd1498Szrj    on it.  */
253238fd1498Szrj 
253338fd1498Szrj static void
test_lexer_string_locations_oct(const line_table_case & case_)253438fd1498Szrj test_lexer_string_locations_oct (const line_table_case &case_)
253538fd1498Szrj {
253638fd1498Szrj   /* Digits 0-9, expressing digit 5 in ASCII as "\065"
253738fd1498Szrj      and with a space in place of digit 6, to terminate the escaped
253838fd1498Szrj      octal code.
253938fd1498Szrj      ....................000000000.111111.11112222.2222223333333333444
254038fd1498Szrj      ....................123456789.012345.67890123.4567890123456789012  */
254138fd1498Szrj   const char *content = "        \"01234\\065 789\" /* not a string */\n";
254238fd1498Szrj   lexer_test test (case_, content, NULL);
254338fd1498Szrj 
254438fd1498Szrj   /* Verify that we get the expected token back, with the correct
254538fd1498Szrj      location information.  */
254638fd1498Szrj   const cpp_token *tok = test.get_token ();
254738fd1498Szrj   ASSERT_EQ (tok->type, CPP_STRING);
254838fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
254938fd1498Szrj 
255038fd1498Szrj   /* Verify that cpp_interpret_string works.  */
255138fd1498Szrj   cpp_string dst_string;
255238fd1498Szrj   const enum cpp_ttype type = CPP_STRING;
255338fd1498Szrj   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
255438fd1498Szrj 				      &dst_string, type);
255538fd1498Szrj   ASSERT_TRUE (result);
255638fd1498Szrj   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
255738fd1498Szrj   free (const_cast <unsigned char *> (dst_string.text));
255838fd1498Szrj 
255938fd1498Szrj   /* Verify ranges of individual characters.  This no longer includes the
256038fd1498Szrj      opening quote, but does include the closing quote.  */
256138fd1498Szrj   for (int i = 0; i < 5; i++)
256238fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
256338fd1498Szrj   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
256438fd1498Szrj   for (int i = 6; i <= 10; i++)
256538fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
256638fd1498Szrj 
256738fd1498Szrj   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
256838fd1498Szrj }
256938fd1498Szrj 
257038fd1498Szrj /* Test of string literal containing letter escapes.  */
257138fd1498Szrj 
257238fd1498Szrj static void
test_lexer_string_locations_letter_escape_1(const line_table_case & case_)257338fd1498Szrj test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
257438fd1498Szrj {
257538fd1498Szrj   /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
257638fd1498Szrj      .....................000000000.1.11111.1.1.11222.22222223333333
257738fd1498Szrj      .....................123456789.0.12345.6.7.89012.34567890123456.  */
257838fd1498Szrj   const char *content = ("        \"\\tfoo\\\\\\nbar\" /* non-str */\n");
257938fd1498Szrj   lexer_test test (case_, content, NULL);
258038fd1498Szrj 
258138fd1498Szrj   /* Verify that we get the expected tokens back.  */
258238fd1498Szrj   const cpp_token *tok = test.get_token ();
258338fd1498Szrj   ASSERT_EQ (tok->type, CPP_STRING);
258438fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
258538fd1498Szrj 
258638fd1498Szrj   /* Verify ranges of individual characters. */
258738fd1498Szrj   /* "\t".  */
258838fd1498Szrj   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
258938fd1498Szrj 			0, 1, 10, 11);
259038fd1498Szrj   /* "foo". */
259138fd1498Szrj   for (int i = 1; i <= 3; i++)
259238fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
259338fd1498Szrj 			  i, 1, 11 + i, 11 + i);
259438fd1498Szrj   /* "\\" and "\n".  */
259538fd1498Szrj   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
259638fd1498Szrj 			4, 1, 15, 16);
259738fd1498Szrj   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
259838fd1498Szrj 			5, 1, 17, 18);
259938fd1498Szrj 
260038fd1498Szrj   /* "bar" and closing quote for nul-terminator.  */
260138fd1498Szrj   for (int i = 6; i <= 9; i++)
260238fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
260338fd1498Szrj 			  i, 1, 13 + i, 13 + i);
260438fd1498Szrj 
260538fd1498Szrj   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
260638fd1498Szrj }
260738fd1498Szrj 
260838fd1498Szrj /* Another test of a string literal containing a letter escape.
260938fd1498Szrj    Based on string seen in
261038fd1498Szrj      printf ("%-%\n");
261138fd1498Szrj    in gcc.dg/format/c90-printf-1.c.  */
261238fd1498Szrj 
261338fd1498Szrj static void
test_lexer_string_locations_letter_escape_2(const line_table_case & case_)261438fd1498Szrj test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
261538fd1498Szrj {
261638fd1498Szrj   /* .....................000000000.1111.11.1111.22222222223.
261738fd1498Szrj      .....................123456789.0123.45.6789.01234567890.  */
261838fd1498Szrj   const char *content = ("        \"%-%\\n\" /* non-str */\n");
261938fd1498Szrj   lexer_test test (case_, content, NULL);
262038fd1498Szrj 
262138fd1498Szrj   /* Verify that we get the expected tokens back.  */
262238fd1498Szrj   const cpp_token *tok = test.get_token ();
262338fd1498Szrj   ASSERT_EQ (tok->type, CPP_STRING);
262438fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
262538fd1498Szrj 
262638fd1498Szrj   /* Verify ranges of individual characters. */
262738fd1498Szrj   /* "%-%".  */
262838fd1498Szrj   for (int i = 0; i < 3; i++)
262938fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
263038fd1498Szrj 			  i, 1, 10 + i, 10 + i);
263138fd1498Szrj   /* "\n".  */
263238fd1498Szrj   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
263338fd1498Szrj 			3, 1, 13, 14);
263438fd1498Szrj 
263538fd1498Szrj   /* Closing quote for nul-terminator.  */
263638fd1498Szrj   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
263738fd1498Szrj 			4, 1, 15, 15);
263838fd1498Szrj 
263938fd1498Szrj   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
264038fd1498Szrj }
264138fd1498Szrj 
264238fd1498Szrj /* Lex a string literal containing UCN 4 characters.
264338fd1498Szrj    Verify the substring location data after running cpp_interpret_string
264438fd1498Szrj    on it.  */
264538fd1498Szrj 
264638fd1498Szrj static void
test_lexer_string_locations_ucn4(const line_table_case & case_)264738fd1498Szrj test_lexer_string_locations_ucn4 (const line_table_case &case_)
264838fd1498Szrj {
264938fd1498Szrj   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
265038fd1498Szrj      as UCN 4.
265138fd1498Szrj      ....................000000000.111111.111122.222222223.33333333344444
265238fd1498Szrj      ....................123456789.012345.678901.234567890.12345678901234  */
265338fd1498Szrj   const char *content = "        \"01234\\u2174\\u2175789\" /* non-str */\n";
265438fd1498Szrj   lexer_test test (case_, content, NULL);
265538fd1498Szrj 
265638fd1498Szrj   /* Verify that we get the expected token back, with the correct
265738fd1498Szrj      location information.  */
265838fd1498Szrj   const cpp_token *tok = test.get_token ();
265938fd1498Szrj   ASSERT_EQ (tok->type, CPP_STRING);
266038fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
266138fd1498Szrj 
266238fd1498Szrj   /* Verify that cpp_interpret_string works.
266338fd1498Szrj      The string should be encoded in the execution character
266438fd1498Szrj      set.  Assuming that that is UTF-8, we should have the following:
266538fd1498Szrj      -----------  ----  -----  -------  ----------------
266638fd1498Szrj      Byte offset  Byte  Octal  Unicode  Source Column(s)
266738fd1498Szrj      -----------  ----  -----  -------  ----------------
266838fd1498Szrj      0            0x30         '0'      10
266938fd1498Szrj      1            0x31         '1'      11
267038fd1498Szrj      2            0x32         '2'      12
267138fd1498Szrj      3            0x33         '3'      13
267238fd1498Szrj      4            0x34         '4'      14
267338fd1498Szrj      5            0xE2  \342   U+2174   15-20
267438fd1498Szrj      6            0x85  \205    (cont)  15-20
267538fd1498Szrj      7            0xB4  \264    (cont)  15-20
267638fd1498Szrj      8            0xE2  \342   U+2175   21-26
267738fd1498Szrj      9            0x85  \205    (cont)  21-26
267838fd1498Szrj      10           0xB5  \265    (cont)  21-26
267938fd1498Szrj      11           0x37         '7'      27
268038fd1498Szrj      12           0x38         '8'      28
268138fd1498Szrj      13           0x39         '9'      29
268238fd1498Szrj      14           0x00                  30 (closing quote)
268338fd1498Szrj      -----------  ----  -----  -------  ---------------.  */
268438fd1498Szrj 
268538fd1498Szrj   cpp_string dst_string;
268638fd1498Szrj   const enum cpp_ttype type = CPP_STRING;
268738fd1498Szrj   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
268838fd1498Szrj 				      &dst_string, type);
268938fd1498Szrj   ASSERT_TRUE (result);
269038fd1498Szrj   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
269138fd1498Szrj 		(const char *)dst_string.text);
269238fd1498Szrj   free (const_cast <unsigned char *> (dst_string.text));
269338fd1498Szrj 
269438fd1498Szrj   /* Verify ranges of individual characters.  This no longer includes the
269538fd1498Szrj      opening quote, but does include the closing quote.
269638fd1498Szrj      '01234'.  */
269738fd1498Szrj   for (int i = 0; i <= 4; i++)
269838fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
269938fd1498Szrj   /* U+2174.  */
270038fd1498Szrj   for (int i = 5; i <= 7; i++)
270138fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
270238fd1498Szrj   /* U+2175.  */
270338fd1498Szrj   for (int i = 8; i <= 10; i++)
270438fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
270538fd1498Szrj   /* '789' and nul terminator  */
270638fd1498Szrj   for (int i = 11; i <= 14; i++)
270738fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
270838fd1498Szrj 
270938fd1498Szrj   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
271038fd1498Szrj }
271138fd1498Szrj 
271238fd1498Szrj /* Lex a string literal containing UCN 8 characters.
271338fd1498Szrj    Verify the substring location data after running cpp_interpret_string
271438fd1498Szrj    on it.  */
271538fd1498Szrj 
271638fd1498Szrj static void
test_lexer_string_locations_ucn8(const line_table_case & case_)271738fd1498Szrj test_lexer_string_locations_ucn8 (const line_table_case &case_)
271838fd1498Szrj {
271938fd1498Szrj   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
272038fd1498Szrj      ....................000000000.111111.1111222222.2222333333333.344444
272138fd1498Szrj      ....................123456789.012345.6789012345.6789012345678.901234  */
272238fd1498Szrj   const char *content = "        \"01234\\U00002174\\U00002175789\" /* */\n";
272338fd1498Szrj   lexer_test test (case_, content, NULL);
272438fd1498Szrj 
272538fd1498Szrj   /* Verify that we get the expected token back, with the correct
272638fd1498Szrj      location information.  */
272738fd1498Szrj   const cpp_token *tok = test.get_token ();
272838fd1498Szrj   ASSERT_EQ (tok->type, CPP_STRING);
272938fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
273038fd1498Szrj 			   "\"01234\\U00002174\\U00002175789\"");
273138fd1498Szrj 
273238fd1498Szrj   /* Verify that cpp_interpret_string works.
273338fd1498Szrj      The UTF-8 encoding of the string is identical to that from
273438fd1498Szrj      the ucn4 testcase above; the only difference is the column
273538fd1498Szrj      locations.  */
273638fd1498Szrj   cpp_string dst_string;
273738fd1498Szrj   const enum cpp_ttype type = CPP_STRING;
273838fd1498Szrj   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
273938fd1498Szrj 				      &dst_string, type);
274038fd1498Szrj   ASSERT_TRUE (result);
274138fd1498Szrj   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
274238fd1498Szrj 		(const char *)dst_string.text);
274338fd1498Szrj   free (const_cast <unsigned char *> (dst_string.text));
274438fd1498Szrj 
274538fd1498Szrj   /* Verify ranges of individual characters.  This no longer includes the
274638fd1498Szrj      opening quote, but does include the closing quote.
274738fd1498Szrj      '01234'.  */
274838fd1498Szrj   for (int i = 0; i <= 4; i++)
274938fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
275038fd1498Szrj   /* U+2174.  */
275138fd1498Szrj   for (int i = 5; i <= 7; i++)
275238fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
275338fd1498Szrj   /* U+2175.  */
275438fd1498Szrj   for (int i = 8; i <= 10; i++)
275538fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
275638fd1498Szrj   /* '789' at columns 35-37  */
275738fd1498Szrj   for (int i = 11; i <= 13; i++)
275838fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
275938fd1498Szrj   /* Closing quote/nul-terminator at column 38.  */
276038fd1498Szrj   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
276138fd1498Szrj 
276238fd1498Szrj   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
276338fd1498Szrj }
276438fd1498Szrj 
276538fd1498Szrj /* Fetch a big-endian 32-bit value and convert to host endianness.  */
276638fd1498Szrj 
276738fd1498Szrj static uint32_t
uint32_from_big_endian(const uint32_t * ptr_be_value)276838fd1498Szrj uint32_from_big_endian (const uint32_t *ptr_be_value)
276938fd1498Szrj {
277038fd1498Szrj   const unsigned char *buf = (const unsigned char *)ptr_be_value;
277138fd1498Szrj   return (((uint32_t) buf[0] << 24)
277238fd1498Szrj 	  | ((uint32_t) buf[1] << 16)
277338fd1498Szrj 	  | ((uint32_t) buf[2] << 8)
277438fd1498Szrj 	  | (uint32_t) buf[3]);
277538fd1498Szrj }
277638fd1498Szrj 
277738fd1498Szrj /* Lex a wide string literal and verify that attempts to read substring
277838fd1498Szrj    location data from it fail gracefully.  */
277938fd1498Szrj 
278038fd1498Szrj static void
test_lexer_string_locations_wide_string(const line_table_case & case_)278138fd1498Szrj test_lexer_string_locations_wide_string (const line_table_case &case_)
278238fd1498Szrj {
278338fd1498Szrj   /* Digits 0-9.
278438fd1498Szrj      ....................000000000.11111111112.22222222233333
278538fd1498Szrj      ....................123456789.01234567890.12345678901234  */
278638fd1498Szrj   const char *content = "       L\"0123456789\" /* non-str */\n";
278738fd1498Szrj   lexer_test test (case_, content, NULL);
278838fd1498Szrj 
278938fd1498Szrj   /* Verify that we get the expected token back, with the correct
279038fd1498Szrj      location information.  */
279138fd1498Szrj   const cpp_token *tok = test.get_token ();
279238fd1498Szrj   ASSERT_EQ (tok->type, CPP_WSTRING);
279338fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
279438fd1498Szrj 
279538fd1498Szrj   /* Verify that cpp_interpret_string works, using CPP_WSTRING.  */
279638fd1498Szrj   cpp_string dst_string;
279738fd1498Szrj   const enum cpp_ttype type = CPP_WSTRING;
279838fd1498Szrj   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
279938fd1498Szrj 				      &dst_string, type);
280038fd1498Szrj   ASSERT_TRUE (result);
280138fd1498Szrj   /* The cpp_reader defaults to big-endian with
280238fd1498Szrj      CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
280338fd1498Szrj      now be encoded as UTF-32BE.  */
280438fd1498Szrj   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
280538fd1498Szrj   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
280638fd1498Szrj   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
280738fd1498Szrj   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
280838fd1498Szrj   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
280938fd1498Szrj   free (const_cast <unsigned char *> (dst_string.text));
281038fd1498Szrj 
281138fd1498Szrj   /* We don't yet support generating substring location information
281238fd1498Szrj      for L"" strings.  */
281338fd1498Szrj   ASSERT_HAS_NO_SUBSTRING_RANGES
281438fd1498Szrj     (test, tok->src_loc, type,
281538fd1498Szrj      "execution character set != source character set");
281638fd1498Szrj }
281738fd1498Szrj 
281838fd1498Szrj /* Fetch a big-endian 16-bit value and convert to host endianness.  */
281938fd1498Szrj 
282038fd1498Szrj static uint16_t
uint16_from_big_endian(const uint16_t * ptr_be_value)282138fd1498Szrj uint16_from_big_endian (const uint16_t *ptr_be_value)
282238fd1498Szrj {
282338fd1498Szrj   const unsigned char *buf = (const unsigned char *)ptr_be_value;
282438fd1498Szrj   return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
282538fd1498Szrj }
282638fd1498Szrj 
282738fd1498Szrj /* Lex a u"" string literal and verify that attempts to read substring
282838fd1498Szrj    location data from it fail gracefully.  */
282938fd1498Szrj 
283038fd1498Szrj static void
test_lexer_string_locations_string16(const line_table_case & case_)283138fd1498Szrj test_lexer_string_locations_string16 (const line_table_case &case_)
283238fd1498Szrj {
283338fd1498Szrj   /* Digits 0-9.
283438fd1498Szrj      ....................000000000.11111111112.22222222233333
283538fd1498Szrj      ....................123456789.01234567890.12345678901234  */
283638fd1498Szrj   const char *content = "       u\"0123456789\" /* non-str */\n";
283738fd1498Szrj   lexer_test test (case_, content, NULL);
283838fd1498Szrj 
283938fd1498Szrj   /* Verify that we get the expected token back, with the correct
284038fd1498Szrj      location information.  */
284138fd1498Szrj   const cpp_token *tok = test.get_token ();
284238fd1498Szrj   ASSERT_EQ (tok->type, CPP_STRING16);
284338fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
284438fd1498Szrj 
284538fd1498Szrj   /* Verify that cpp_interpret_string works, using CPP_STRING16.  */
284638fd1498Szrj   cpp_string dst_string;
284738fd1498Szrj   const enum cpp_ttype type = CPP_STRING16;
284838fd1498Szrj   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
284938fd1498Szrj 				      &dst_string, type);
285038fd1498Szrj   ASSERT_TRUE (result);
285138fd1498Szrj 
285238fd1498Szrj   /* The cpp_reader defaults to big-endian, so dst_string should
285338fd1498Szrj      now be encoded as UTF-16BE.  */
285438fd1498Szrj   const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
285538fd1498Szrj   ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
285638fd1498Szrj   ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
285738fd1498Szrj   ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
285838fd1498Szrj   ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
285938fd1498Szrj   free (const_cast <unsigned char *> (dst_string.text));
286038fd1498Szrj 
286138fd1498Szrj   /* We don't yet support generating substring location information
286238fd1498Szrj      for L"" strings.  */
286338fd1498Szrj   ASSERT_HAS_NO_SUBSTRING_RANGES
286438fd1498Szrj     (test, tok->src_loc, type,
286538fd1498Szrj      "execution character set != source character set");
286638fd1498Szrj }
286738fd1498Szrj 
286838fd1498Szrj /* Lex a U"" string literal and verify that attempts to read substring
286938fd1498Szrj    location data from it fail gracefully.  */
287038fd1498Szrj 
287138fd1498Szrj static void
test_lexer_string_locations_string32(const line_table_case & case_)287238fd1498Szrj test_lexer_string_locations_string32 (const line_table_case &case_)
287338fd1498Szrj {
287438fd1498Szrj   /* Digits 0-9.
287538fd1498Szrj      ....................000000000.11111111112.22222222233333
287638fd1498Szrj      ....................123456789.01234567890.12345678901234  */
287738fd1498Szrj   const char *content = "       U\"0123456789\" /* non-str */\n";
287838fd1498Szrj   lexer_test test (case_, content, NULL);
287938fd1498Szrj 
288038fd1498Szrj   /* Verify that we get the expected token back, with the correct
288138fd1498Szrj      location information.  */
288238fd1498Szrj   const cpp_token *tok = test.get_token ();
288338fd1498Szrj   ASSERT_EQ (tok->type, CPP_STRING32);
288438fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
288538fd1498Szrj 
288638fd1498Szrj   /* Verify that cpp_interpret_string works, using CPP_STRING32.  */
288738fd1498Szrj   cpp_string dst_string;
288838fd1498Szrj   const enum cpp_ttype type = CPP_STRING32;
288938fd1498Szrj   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
289038fd1498Szrj 				      &dst_string, type);
289138fd1498Szrj   ASSERT_TRUE (result);
289238fd1498Szrj 
289338fd1498Szrj   /* The cpp_reader defaults to big-endian, so dst_string should
289438fd1498Szrj      now be encoded as UTF-32BE.  */
289538fd1498Szrj   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
289638fd1498Szrj   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
289738fd1498Szrj   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
289838fd1498Szrj   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
289938fd1498Szrj   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
290038fd1498Szrj   free (const_cast <unsigned char *> (dst_string.text));
290138fd1498Szrj 
290238fd1498Szrj   /* We don't yet support generating substring location information
290338fd1498Szrj      for L"" strings.  */
290438fd1498Szrj   ASSERT_HAS_NO_SUBSTRING_RANGES
290538fd1498Szrj     (test, tok->src_loc, type,
290638fd1498Szrj      "execution character set != source character set");
290738fd1498Szrj }
290838fd1498Szrj 
290938fd1498Szrj /* Lex a u8-string literal.
291038fd1498Szrj    Verify the substring location data after running cpp_interpret_string
291138fd1498Szrj    on it.  */
291238fd1498Szrj 
291338fd1498Szrj static void
test_lexer_string_locations_u8(const line_table_case & case_)291438fd1498Szrj test_lexer_string_locations_u8 (const line_table_case &case_)
291538fd1498Szrj {
291638fd1498Szrj   /* Digits 0-9.
291738fd1498Szrj      ....................000000000.11111111112.22222222233333
291838fd1498Szrj      ....................123456789.01234567890.12345678901234  */
291938fd1498Szrj   const char *content = "      u8\"0123456789\" /* non-str */\n";
292038fd1498Szrj   lexer_test test (case_, content, NULL);
292138fd1498Szrj 
292238fd1498Szrj   /* Verify that we get the expected token back, with the correct
292338fd1498Szrj      location information.  */
292438fd1498Szrj   const cpp_token *tok = test.get_token ();
292538fd1498Szrj   ASSERT_EQ (tok->type, CPP_UTF8STRING);
292638fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
292738fd1498Szrj 
292838fd1498Szrj   /* Verify that cpp_interpret_string works.  */
292938fd1498Szrj   cpp_string dst_string;
293038fd1498Szrj   const enum cpp_ttype type = CPP_STRING;
293138fd1498Szrj   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
293238fd1498Szrj 				      &dst_string, type);
293338fd1498Szrj   ASSERT_TRUE (result);
293438fd1498Szrj   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
293538fd1498Szrj   free (const_cast <unsigned char *> (dst_string.text));
293638fd1498Szrj 
293738fd1498Szrj   /* Verify ranges of individual characters.  This no longer includes the
293838fd1498Szrj      opening quote, but does include the closing quote.  */
293938fd1498Szrj   for (int i = 0; i <= 10; i++)
294038fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
294138fd1498Szrj }
294238fd1498Szrj 
294338fd1498Szrj /* Lex a string literal containing UTF-8 source characters.
294438fd1498Szrj    Verify the substring location data after running cpp_interpret_string
294538fd1498Szrj    on it.  */
294638fd1498Szrj 
294738fd1498Szrj static void
test_lexer_string_locations_utf8_source(const line_table_case & case_)294838fd1498Szrj test_lexer_string_locations_utf8_source (const line_table_case &case_)
294938fd1498Szrj {
295038fd1498Szrj  /* This string literal is written out to the source file as UTF-8,
295138fd1498Szrj     and is of the form "before mojibake after", where "mojibake"
295238fd1498Szrj     is written as the following four unicode code points:
295338fd1498Szrj        U+6587 CJK UNIFIED IDEOGRAPH-6587
295438fd1498Szrj        U+5B57 CJK UNIFIED IDEOGRAPH-5B57
295538fd1498Szrj        U+5316 CJK UNIFIED IDEOGRAPH-5316
295638fd1498Szrj        U+3051 HIRAGANA LETTER KE.
295738fd1498Szrj      Each of these is 3 bytes wide when encoded in UTF-8, whereas the
295838fd1498Szrj      "before" and "after" are 1 byte per unicode character.
295938fd1498Szrj 
296038fd1498Szrj      The numbering shown are "columns", which are *byte* numbers within
296138fd1498Szrj      the line, rather than unicode character numbers.
296238fd1498Szrj 
296338fd1498Szrj      .................... 000000000.1111111.
296438fd1498Szrj      .................... 123456789.0123456.  */
296538fd1498Szrj   const char *content = ("        \"before "
296638fd1498Szrj 			 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
296738fd1498Szrj 			      UTF-8: 0xE6 0x96 0x87
296838fd1498Szrj 			      C octal escaped UTF-8: \346\226\207
296938fd1498Szrj 			    "column" numbers: 17-19.  */
297038fd1498Szrj 			 "\346\226\207"
297138fd1498Szrj 
297238fd1498Szrj 			 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
297338fd1498Szrj 			      UTF-8: 0xE5 0xAD 0x97
297438fd1498Szrj 			      C octal escaped UTF-8: \345\255\227
297538fd1498Szrj 			    "column" numbers: 20-22.  */
297638fd1498Szrj 			 "\345\255\227"
297738fd1498Szrj 
297838fd1498Szrj 			 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
297938fd1498Szrj 			      UTF-8: 0xE5 0x8C 0x96
298038fd1498Szrj 			      C octal escaped UTF-8: \345\214\226
298138fd1498Szrj 			    "column" numbers: 23-25.  */
298238fd1498Szrj 			 "\345\214\226"
298338fd1498Szrj 
298438fd1498Szrj 			 /* U+3051 HIRAGANA LETTER KE
298538fd1498Szrj 			      UTF-8: 0xE3 0x81 0x91
298638fd1498Szrj 			      C octal escaped UTF-8: \343\201\221
298738fd1498Szrj 			    "column" numbers: 26-28.  */
298838fd1498Szrj 			 "\343\201\221"
298938fd1498Szrj 
299038fd1498Szrj 			 /* column numbers 29 onwards
299138fd1498Szrj 			  2333333.33334444444444
299238fd1498Szrj 			  9012345.67890123456789. */
299338fd1498Szrj 			 " after\" /* non-str */\n");
299438fd1498Szrj   lexer_test test (case_, content, NULL);
299538fd1498Szrj 
299638fd1498Szrj   /* Verify that we get the expected token back, with the correct
299738fd1498Szrj      location information.  */
299838fd1498Szrj   const cpp_token *tok = test.get_token ();
299938fd1498Szrj   ASSERT_EQ (tok->type, CPP_STRING);
300038fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ
300138fd1498Szrj     (test.m_parser, tok,
300238fd1498Szrj      "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
300338fd1498Szrj 
300438fd1498Szrj   /* Verify that cpp_interpret_string works.  */
300538fd1498Szrj   cpp_string dst_string;
300638fd1498Szrj   const enum cpp_ttype type = CPP_STRING;
300738fd1498Szrj   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
300838fd1498Szrj 				      &dst_string, type);
300938fd1498Szrj   ASSERT_TRUE (result);
301038fd1498Szrj   ASSERT_STREQ
301138fd1498Szrj     ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
301238fd1498Szrj      (const char *)dst_string.text);
301338fd1498Szrj   free (const_cast <unsigned char *> (dst_string.text));
301438fd1498Szrj 
301538fd1498Szrj   /* Verify ranges of individual characters.  This no longer includes the
301638fd1498Szrj      opening quote, but does include the closing quote.
301738fd1498Szrj      Assuming that both source and execution encodings are UTF-8, we have
301838fd1498Szrj      a run of 25 octets in each, plus the NUL terminator.  */
301938fd1498Szrj   for (int i = 0; i < 25; i++)
302038fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
302138fd1498Szrj   /* NUL-terminator should use the closing quote at column 35.  */
302238fd1498Szrj   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
302338fd1498Szrj 
302438fd1498Szrj   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
302538fd1498Szrj }
302638fd1498Szrj 
302738fd1498Szrj /* Test of string literal concatenation.  */
302838fd1498Szrj 
302938fd1498Szrj static void
test_lexer_string_locations_concatenation_1(const line_table_case & case_)303038fd1498Szrj test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
303138fd1498Szrj {
303238fd1498Szrj   /* Digits 0-9.
303338fd1498Szrj      .....................000000000.111111.11112222222222
303438fd1498Szrj      .....................123456789.012345.67890123456789.  */
303538fd1498Szrj   const char *content = ("        \"01234\" /* non-str */\n"
303638fd1498Szrj 			 "        \"56789\" /* non-str */\n");
303738fd1498Szrj   lexer_test test (case_, content, NULL);
303838fd1498Szrj 
303938fd1498Szrj   location_t input_locs[2];
304038fd1498Szrj 
304138fd1498Szrj   /* Verify that we get the expected tokens back.  */
304238fd1498Szrj   auto_vec <cpp_string> input_strings;
304338fd1498Szrj   const cpp_token *tok_a = test.get_token ();
304438fd1498Szrj   ASSERT_EQ (tok_a->type, CPP_STRING);
304538fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
304638fd1498Szrj   input_strings.safe_push (tok_a->val.str);
304738fd1498Szrj   input_locs[0] = tok_a->src_loc;
304838fd1498Szrj 
304938fd1498Szrj   const cpp_token *tok_b = test.get_token ();
305038fd1498Szrj   ASSERT_EQ (tok_b->type, CPP_STRING);
305138fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
305238fd1498Szrj   input_strings.safe_push (tok_b->val.str);
305338fd1498Szrj   input_locs[1] = tok_b->src_loc;
305438fd1498Szrj 
305538fd1498Szrj   /* Verify that cpp_interpret_string works.  */
305638fd1498Szrj   cpp_string dst_string;
305738fd1498Szrj   const enum cpp_ttype type = CPP_STRING;
305838fd1498Szrj   bool result = cpp_interpret_string (test.m_parser,
305938fd1498Szrj 				      input_strings.address (), 2,
306038fd1498Szrj 				      &dst_string, type);
306138fd1498Szrj   ASSERT_TRUE (result);
306238fd1498Szrj   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
306338fd1498Szrj   free (const_cast <unsigned char *> (dst_string.text));
306438fd1498Szrj 
306538fd1498Szrj   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
306638fd1498Szrj   test.m_concats.record_string_concatenation (2, input_locs);
306738fd1498Szrj 
306838fd1498Szrj   location_t initial_loc = input_locs[0];
306938fd1498Szrj 
307038fd1498Szrj   /* "01234" on line 1.  */
307138fd1498Szrj   for (int i = 0; i <= 4; i++)
307238fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
307338fd1498Szrj   /* "56789" in line 2, plus its closing quote for the nul terminator.  */
307438fd1498Szrj   for (int i = 5; i <= 10; i++)
307538fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
307638fd1498Szrj 
307738fd1498Szrj   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
307838fd1498Szrj }
307938fd1498Szrj 
308038fd1498Szrj /* Another test of string literal concatenation.  */
308138fd1498Szrj 
308238fd1498Szrj static void
test_lexer_string_locations_concatenation_2(const line_table_case & case_)308338fd1498Szrj test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
308438fd1498Szrj {
308538fd1498Szrj   /* Digits 0-9.
308638fd1498Szrj      .....................000000000.111.11111112222222
308738fd1498Szrj      .....................123456789.012.34567890123456.  */
308838fd1498Szrj   const char *content = ("        \"01\" /* non-str */\n"
308938fd1498Szrj 			 "        \"23\" /* non-str */\n"
309038fd1498Szrj 			 "        \"45\" /* non-str */\n"
309138fd1498Szrj 			 "        \"67\" /* non-str */\n"
309238fd1498Szrj 			 "        \"89\" /* non-str */\n");
309338fd1498Szrj   lexer_test test (case_, content, NULL);
309438fd1498Szrj 
309538fd1498Szrj   auto_vec <cpp_string> input_strings;
309638fd1498Szrj   location_t input_locs[5];
309738fd1498Szrj 
309838fd1498Szrj   /* Verify that we get the expected tokens back.  */
309938fd1498Szrj   for (int i = 0; i < 5; i++)
310038fd1498Szrj     {
310138fd1498Szrj       const cpp_token *tok = test.get_token ();
310238fd1498Szrj       ASSERT_EQ (tok->type, CPP_STRING);
310338fd1498Szrj       input_strings.safe_push (tok->val.str);
310438fd1498Szrj       input_locs[i] = tok->src_loc;
310538fd1498Szrj     }
310638fd1498Szrj 
310738fd1498Szrj   /* Verify that cpp_interpret_string works.  */
310838fd1498Szrj   cpp_string dst_string;
310938fd1498Szrj   const enum cpp_ttype type = CPP_STRING;
311038fd1498Szrj   bool result = cpp_interpret_string (test.m_parser,
311138fd1498Szrj 				      input_strings.address (), 5,
311238fd1498Szrj 				      &dst_string, type);
311338fd1498Szrj   ASSERT_TRUE (result);
311438fd1498Szrj   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
311538fd1498Szrj   free (const_cast <unsigned char *> (dst_string.text));
311638fd1498Szrj 
311738fd1498Szrj   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
311838fd1498Szrj   test.m_concats.record_string_concatenation (5, input_locs);
311938fd1498Szrj 
312038fd1498Szrj   location_t initial_loc = input_locs[0];
312138fd1498Szrj 
312238fd1498Szrj   /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
312338fd1498Szrj      detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
312438fd1498Szrj      and expect get_source_range_for_substring to fail.
312538fd1498Szrj      However, for a string concatenation test, we can have a case
312638fd1498Szrj      where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
312738fd1498Szrj      but subsequent strings can be after it.
312838fd1498Szrj      Attempting to detect this within assert_char_at_range
312938fd1498Szrj      would overcomplicate the logic for the common test cases, so
313038fd1498Szrj      we detect it here.  */
313138fd1498Szrj   if (should_have_column_data_p (input_locs[0])
313238fd1498Szrj       && !should_have_column_data_p (input_locs[4]))
313338fd1498Szrj     {
313438fd1498Szrj       /* Verify that get_source_range_for_substring gracefully rejects
313538fd1498Szrj 	 this case.  */
313638fd1498Szrj       source_range actual_range;
313738fd1498Szrj       const char *err
313838fd1498Szrj 	= get_source_range_for_char (test.m_parser, &test.m_concats,
313938fd1498Szrj 				     initial_loc, type, 0, &actual_range);
314038fd1498Szrj       ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
314138fd1498Szrj       return;
314238fd1498Szrj     }
314338fd1498Szrj 
314438fd1498Szrj   for (int i = 0; i < 5; i++)
314538fd1498Szrj     for (int j = 0; j < 2; j++)
314638fd1498Szrj       ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
314738fd1498Szrj 			    i + 1, 10 + j, 10 + j);
314838fd1498Szrj 
314938fd1498Szrj   /* NUL-terminator should use the final closing quote at line 5 column 12.  */
315038fd1498Szrj   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
315138fd1498Szrj 
315238fd1498Szrj   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
315338fd1498Szrj }
315438fd1498Szrj 
315538fd1498Szrj /* Another test of string literal concatenation, this time combined with
315638fd1498Szrj    various kinds of escaped characters.  */
315738fd1498Szrj 
315838fd1498Szrj static void
test_lexer_string_locations_concatenation_3(const line_table_case & case_)315938fd1498Szrj test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
316038fd1498Szrj {
316138fd1498Szrj   /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
316238fd1498Szrj      digit 6 in ASCII as octal "\066", concatenating multiple strings.  */
316338fd1498Szrj   const char *content
316438fd1498Szrj     /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
316538fd1498Szrj        .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
316638fd1498Szrj     = ("        \"01234\"  \"\\x35\"  \"\\066\"  \"789\" /* non-str */\n");
316738fd1498Szrj   lexer_test test (case_, content, NULL);
316838fd1498Szrj 
316938fd1498Szrj   auto_vec <cpp_string> input_strings;
317038fd1498Szrj   location_t input_locs[4];
317138fd1498Szrj 
317238fd1498Szrj   /* Verify that we get the expected tokens back.  */
317338fd1498Szrj   for (int i = 0; i < 4; i++)
317438fd1498Szrj     {
317538fd1498Szrj       const cpp_token *tok = test.get_token ();
317638fd1498Szrj       ASSERT_EQ (tok->type, CPP_STRING);
317738fd1498Szrj       input_strings.safe_push (tok->val.str);
317838fd1498Szrj       input_locs[i] = tok->src_loc;
317938fd1498Szrj     }
318038fd1498Szrj 
318138fd1498Szrj   /* Verify that cpp_interpret_string works.  */
318238fd1498Szrj   cpp_string dst_string;
318338fd1498Szrj   const enum cpp_ttype type = CPP_STRING;
318438fd1498Szrj   bool result = cpp_interpret_string (test.m_parser,
318538fd1498Szrj 				      input_strings.address (), 4,
318638fd1498Szrj 				      &dst_string, type);
318738fd1498Szrj   ASSERT_TRUE (result);
318838fd1498Szrj   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
318938fd1498Szrj   free (const_cast <unsigned char *> (dst_string.text));
319038fd1498Szrj 
319138fd1498Szrj   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
319238fd1498Szrj   test.m_concats.record_string_concatenation (4, input_locs);
319338fd1498Szrj 
319438fd1498Szrj   location_t initial_loc = input_locs[0];
319538fd1498Szrj 
319638fd1498Szrj   for (int i = 0; i <= 4; i++)
319738fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
319838fd1498Szrj   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
319938fd1498Szrj   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
320038fd1498Szrj   for (int i = 7; i <= 9; i++)
320138fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
320238fd1498Szrj 
320338fd1498Szrj   /* NUL-terminator should use the location of the final closing quote.  */
320438fd1498Szrj   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
320538fd1498Szrj 
320638fd1498Szrj   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
320738fd1498Szrj }
320838fd1498Szrj 
320938fd1498Szrj /* Test of string literal in a macro.  */
321038fd1498Szrj 
321138fd1498Szrj static void
test_lexer_string_locations_macro(const line_table_case & case_)321238fd1498Szrj test_lexer_string_locations_macro (const line_table_case &case_)
321338fd1498Szrj {
321438fd1498Szrj   /* Digits 0-9.
321538fd1498Szrj      .....................0000000001111111111.22222222223.
321638fd1498Szrj      .....................1234567890123456789.01234567890.  */
321738fd1498Szrj   const char *content = ("#define MACRO     \"0123456789\" /* non-str */\n"
321838fd1498Szrj 			 "  MACRO");
321938fd1498Szrj   lexer_test test (case_, content, NULL);
322038fd1498Szrj 
322138fd1498Szrj   /* Verify that we get the expected tokens back.  */
322238fd1498Szrj   const cpp_token *tok = test.get_token ();
322338fd1498Szrj   ASSERT_EQ (tok->type, CPP_PADDING);
322438fd1498Szrj 
322538fd1498Szrj   tok = test.get_token ();
322638fd1498Szrj   ASSERT_EQ (tok->type, CPP_STRING);
322738fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
322838fd1498Szrj 
322938fd1498Szrj   /* Verify ranges of individual characters.  We ought to
323038fd1498Szrj      see columns within the macro definition.  */
323138fd1498Szrj   for (int i = 0; i <= 10; i++)
323238fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
323338fd1498Szrj 			  i, 1, 20 + i, 20 + i);
323438fd1498Szrj 
323538fd1498Szrj   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
323638fd1498Szrj 
323738fd1498Szrj   tok = test.get_token ();
323838fd1498Szrj   ASSERT_EQ (tok->type, CPP_PADDING);
323938fd1498Szrj }
324038fd1498Szrj 
324138fd1498Szrj /* Test of stringification of a macro argument.  */
324238fd1498Szrj 
324338fd1498Szrj static void
test_lexer_string_locations_stringified_macro_argument(const line_table_case & case_)324438fd1498Szrj test_lexer_string_locations_stringified_macro_argument
324538fd1498Szrj   (const line_table_case &case_)
324638fd1498Szrj {
324738fd1498Szrj   /* .....................000000000111111111122222222223.
324838fd1498Szrj      .....................123456789012345678901234567890.  */
324938fd1498Szrj   const char *content = ("#define MACRO(X) #X /* non-str */\n"
325038fd1498Szrj 			 "MACRO(foo)\n");
325138fd1498Szrj   lexer_test test (case_, content, NULL);
325238fd1498Szrj 
325338fd1498Szrj   /* Verify that we get the expected token back.  */
325438fd1498Szrj   const cpp_token *tok = test.get_token ();
325538fd1498Szrj   ASSERT_EQ (tok->type, CPP_PADDING);
325638fd1498Szrj 
325738fd1498Szrj   tok = test.get_token ();
325838fd1498Szrj   ASSERT_EQ (tok->type, CPP_STRING);
325938fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
326038fd1498Szrj 
326138fd1498Szrj   /* We don't support getting the location of a stringified macro
326238fd1498Szrj      argument.  Verify that it fails gracefully.  */
326338fd1498Szrj   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
326438fd1498Szrj 				  "cpp_interpret_string_1 failed");
326538fd1498Szrj 
326638fd1498Szrj   tok = test.get_token ();
326738fd1498Szrj   ASSERT_EQ (tok->type, CPP_PADDING);
326838fd1498Szrj 
326938fd1498Szrj   tok = test.get_token ();
327038fd1498Szrj   ASSERT_EQ (tok->type, CPP_PADDING);
327138fd1498Szrj }
327238fd1498Szrj 
327338fd1498Szrj /* Ensure that we are fail gracefully if something attempts to pass
327438fd1498Szrj    in a location that isn't a string literal token.  Seen on this code:
327538fd1498Szrj 
327638fd1498Szrj      const char a[] = " %d ";
327738fd1498Szrj      __builtin_printf (a, 0.5);
327838fd1498Szrj                        ^
327938fd1498Szrj 
328038fd1498Szrj    when c-format.c erroneously used the indicated one-character
328138fd1498Szrj    location as the format string location, leading to a read past the
328238fd1498Szrj    end of a string buffer in cpp_interpret_string_1.  */
328338fd1498Szrj 
328438fd1498Szrj static void
test_lexer_string_locations_non_string(const line_table_case & case_)328538fd1498Szrj test_lexer_string_locations_non_string (const line_table_case &case_)
328638fd1498Szrj {
328738fd1498Szrj   /* .....................000000000111111111122222222223.
328838fd1498Szrj      .....................123456789012345678901234567890.  */
328938fd1498Szrj   const char *content = ("         a\n");
329038fd1498Szrj   lexer_test test (case_, content, NULL);
329138fd1498Szrj 
329238fd1498Szrj   /* Verify that we get the expected token back.  */
329338fd1498Szrj   const cpp_token *tok = test.get_token ();
329438fd1498Szrj   ASSERT_EQ (tok->type, CPP_NAME);
329538fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
329638fd1498Szrj 
329738fd1498Szrj   /* At this point, libcpp is attempting to interpret the name as a
329838fd1498Szrj      string literal, despite it not starting with a quote.  We don't detect
329938fd1498Szrj      that, but we should at least fail gracefully.  */
330038fd1498Szrj   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
330138fd1498Szrj 				  "cpp_interpret_string_1 failed");
330238fd1498Szrj }
330338fd1498Szrj 
330438fd1498Szrj /* Ensure that we can read substring information for a token which
330538fd1498Szrj    starts in one linemap and ends in another .  Adapted from
330638fd1498Szrj    gcc.dg/cpp/pr69985.c.  */
330738fd1498Szrj 
330838fd1498Szrj static void
test_lexer_string_locations_long_line(const line_table_case & case_)330938fd1498Szrj test_lexer_string_locations_long_line (const line_table_case &case_)
331038fd1498Szrj {
331138fd1498Szrj   /* .....................000000.000111111111
331238fd1498Szrj      .....................123456.789012346789.  */
331338fd1498Szrj   const char *content = ("/* A very long line, so that we start a new line map.  */\n"
331438fd1498Szrj 			 "     \"0123456789012345678901234567890123456789"
331538fd1498Szrj 			 "0123456789012345678901234567890123456789"
331638fd1498Szrj 			 "0123456789012345678901234567890123456789"
331738fd1498Szrj 			 "0123456789\"\n");
331838fd1498Szrj 
331938fd1498Szrj   lexer_test test (case_, content, NULL);
332038fd1498Szrj 
332138fd1498Szrj   /* Verify that we get the expected token back.  */
332238fd1498Szrj   const cpp_token *tok = test.get_token ();
332338fd1498Szrj   ASSERT_EQ (tok->type, CPP_STRING);
332438fd1498Szrj 
332538fd1498Szrj   if (!should_have_column_data_p (line_table->highest_location))
332638fd1498Szrj     return;
332738fd1498Szrj 
332838fd1498Szrj   /* Verify ranges of individual characters.  */
332938fd1498Szrj   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
333038fd1498Szrj   for (int i = 0; i < 131; i++)
333138fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
333238fd1498Szrj 			  i, 2, 7 + i, 7 + i);
333338fd1498Szrj }
333438fd1498Szrj 
333538fd1498Szrj /* Test of locations within a raw string that doesn't contain a newline.  */
333638fd1498Szrj 
333738fd1498Szrj static void
test_lexer_string_locations_raw_string_one_line(const line_table_case & case_)333838fd1498Szrj test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
333938fd1498Szrj {
334038fd1498Szrj   /* .....................00.0000000111111111122.
334138fd1498Szrj      .....................12.3456789012345678901.  */
334238fd1498Szrj   const char *content = ("R\"foo(0123456789)foo\"\n");
334338fd1498Szrj   lexer_test test (case_, content, NULL);
334438fd1498Szrj 
334538fd1498Szrj   /* Verify that we get the expected token back.  */
334638fd1498Szrj   const cpp_token *tok = test.get_token ();
334738fd1498Szrj   ASSERT_EQ (tok->type, CPP_STRING);
334838fd1498Szrj 
334938fd1498Szrj   /* Verify that cpp_interpret_string works.  */
335038fd1498Szrj   cpp_string dst_string;
335138fd1498Szrj   const enum cpp_ttype type = CPP_STRING;
335238fd1498Szrj   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
335338fd1498Szrj 				      &dst_string, type);
335438fd1498Szrj   ASSERT_TRUE (result);
335538fd1498Szrj   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
335638fd1498Szrj   free (const_cast <unsigned char *> (dst_string.text));
335738fd1498Szrj 
335838fd1498Szrj   if (!should_have_column_data_p (line_table->highest_location))
335938fd1498Szrj     return;
336038fd1498Szrj 
336138fd1498Szrj   /* 0-9, plus the nil terminator.  */
336238fd1498Szrj   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
336338fd1498Szrj   for (int i = 0; i < 11; i++)
336438fd1498Szrj     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
336538fd1498Szrj 			  i, 1, 7 + i, 7 + i);
336638fd1498Szrj }
336738fd1498Szrj 
336838fd1498Szrj /* Test of locations within a raw string that contains a newline.  */
336938fd1498Szrj 
337038fd1498Szrj static void
test_lexer_string_locations_raw_string_multiline(const line_table_case & case_)337138fd1498Szrj test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
337238fd1498Szrj {
337338fd1498Szrj   /* .....................00.0000.
337438fd1498Szrj      .....................12.3456.  */
337538fd1498Szrj   const char *content = ("R\"foo(\n"
337638fd1498Szrj   /* .....................00000.
337738fd1498Szrj      .....................12345.  */
337838fd1498Szrj 			 "hello\n"
337938fd1498Szrj 			 "world\n"
338038fd1498Szrj   /* .....................00000.
338138fd1498Szrj      .....................12345.  */
338238fd1498Szrj 			 ")foo\"\n");
338338fd1498Szrj   lexer_test test (case_, content, NULL);
338438fd1498Szrj 
338538fd1498Szrj   /* Verify that we get the expected token back.  */
338638fd1498Szrj   const cpp_token *tok = test.get_token ();
338738fd1498Szrj   ASSERT_EQ (tok->type, CPP_STRING);
338838fd1498Szrj 
338938fd1498Szrj   /* Verify that cpp_interpret_string works.  */
339038fd1498Szrj   cpp_string dst_string;
339138fd1498Szrj   const enum cpp_ttype type = CPP_STRING;
339238fd1498Szrj   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
339338fd1498Szrj 				      &dst_string, type);
339438fd1498Szrj   ASSERT_TRUE (result);
339538fd1498Szrj   ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
339638fd1498Szrj   free (const_cast <unsigned char *> (dst_string.text));
339738fd1498Szrj 
339838fd1498Szrj   if (!should_have_column_data_p (line_table->highest_location))
339938fd1498Szrj     return;
340038fd1498Szrj 
340138fd1498Szrj   /* Currently we don't support locations within raw strings that
340238fd1498Szrj      contain newlines.  */
340338fd1498Szrj   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
340438fd1498Szrj 				  "range endpoints are on different lines");
340538fd1498Szrj }
340638fd1498Szrj 
340738fd1498Szrj /* Test of parsing an unterminated raw string.  */
340838fd1498Szrj 
340938fd1498Szrj static void
test_lexer_string_locations_raw_string_unterminated(const line_table_case & case_)341038fd1498Szrj test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
341138fd1498Szrj {
341238fd1498Szrj   const char *content = "R\"ouch()ouCh\" /* etc */";
341338fd1498Szrj 
341438fd1498Szrj   lexer_error_sink errors;
341538fd1498Szrj   lexer_test test (case_, content, &errors);
341638fd1498Szrj   test.m_implicitly_expect_EOF = false;
341738fd1498Szrj 
341838fd1498Szrj   /* Attempt to parse the raw string.  */
341938fd1498Szrj   const cpp_token *tok = test.get_token ();
342038fd1498Szrj   ASSERT_EQ (tok->type, CPP_EOF);
342138fd1498Szrj 
342238fd1498Szrj   ASSERT_EQ (1, errors.m_errors.length ());
342338fd1498Szrj   /* We expect the message "unterminated raw string"
342438fd1498Szrj      in the "cpplib" translation domain.
342538fd1498Szrj      It's not clear that dgettext is available on all supported hosts,
342638fd1498Szrj      so this assertion is commented-out for now.
342738fd1498Szrj        ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
342838fd1498Szrj                      errors.m_errors[0]);
342938fd1498Szrj   */
343038fd1498Szrj }
343138fd1498Szrj 
343238fd1498Szrj /* Test of lexing char constants.  */
343338fd1498Szrj 
343438fd1498Szrj static void
test_lexer_char_constants(const line_table_case & case_)343538fd1498Szrj test_lexer_char_constants (const line_table_case &case_)
343638fd1498Szrj {
343738fd1498Szrj   /* Various char constants.
343838fd1498Szrj      .....................0000000001111111111.22222222223.
343938fd1498Szrj      .....................1234567890123456789.01234567890.  */
344038fd1498Szrj   const char *content = ("         'a'\n"
344138fd1498Szrj 			 "        u'a'\n"
344238fd1498Szrj 			 "        U'a'\n"
344338fd1498Szrj 			 "        L'a'\n"
344438fd1498Szrj 			 "         'abc'\n");
344538fd1498Szrj   lexer_test test (case_, content, NULL);
344638fd1498Szrj 
344738fd1498Szrj   /* Verify that we get the expected tokens back.  */
344838fd1498Szrj   /* 'a'.  */
344938fd1498Szrj   const cpp_token *tok = test.get_token ();
345038fd1498Szrj   ASSERT_EQ (tok->type, CPP_CHAR);
345138fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
345238fd1498Szrj 
345338fd1498Szrj   unsigned int chars_seen;
345438fd1498Szrj   int unsignedp;
345538fd1498Szrj   cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
345638fd1498Szrj 					  &chars_seen, &unsignedp);
345738fd1498Szrj   ASSERT_EQ (cc, 'a');
345838fd1498Szrj   ASSERT_EQ (chars_seen, 1);
345938fd1498Szrj 
346038fd1498Szrj   /* u'a'.  */
346138fd1498Szrj   tok = test.get_token ();
346238fd1498Szrj   ASSERT_EQ (tok->type, CPP_CHAR16);
346338fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
346438fd1498Szrj 
346538fd1498Szrj   /* U'a'.  */
346638fd1498Szrj   tok = test.get_token ();
346738fd1498Szrj   ASSERT_EQ (tok->type, CPP_CHAR32);
346838fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
346938fd1498Szrj 
347038fd1498Szrj   /* L'a'.  */
347138fd1498Szrj   tok = test.get_token ();
347238fd1498Szrj   ASSERT_EQ (tok->type, CPP_WCHAR);
347338fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
347438fd1498Szrj 
347538fd1498Szrj   /* 'abc' (c-char-sequence).  */
347638fd1498Szrj   tok = test.get_token ();
347738fd1498Szrj   ASSERT_EQ (tok->type, CPP_CHAR);
347838fd1498Szrj   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
347938fd1498Szrj }
348038fd1498Szrj /* A table of interesting location_t values, giving one axis of our test
348138fd1498Szrj    matrix.  */
348238fd1498Szrj 
348338fd1498Szrj static const location_t boundary_locations[] = {
348438fd1498Szrj   /* Zero means "don't override the default values for a new line_table".  */
348538fd1498Szrj   0,
348638fd1498Szrj 
348738fd1498Szrj   /* An arbitrary non-zero value that isn't close to one of
348838fd1498Szrj      the boundary values below.  */
348938fd1498Szrj   0x10000,
349038fd1498Szrj 
349138fd1498Szrj   /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES.  */
349238fd1498Szrj   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
349338fd1498Szrj   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
349438fd1498Szrj   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
349538fd1498Szrj   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
349638fd1498Szrj   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
349738fd1498Szrj 
349838fd1498Szrj   /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS.  */
349938fd1498Szrj   LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
350038fd1498Szrj   LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
350138fd1498Szrj   LINE_MAP_MAX_LOCATION_WITH_COLS,
350238fd1498Szrj   LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
350338fd1498Szrj   LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
350438fd1498Szrj };
350538fd1498Szrj 
350638fd1498Szrj /* Run TESTCASE multiple times, once for each case in our test matrix.  */
350738fd1498Szrj 
350838fd1498Szrj void
for_each_line_table_case(void (* testcase)(const line_table_case &))350938fd1498Szrj for_each_line_table_case (void (*testcase) (const line_table_case &))
351038fd1498Szrj {
351138fd1498Szrj   /* As noted above in the description of struct line_table_case,
351238fd1498Szrj      we want to explore a test matrix of interesting line_table
351338fd1498Szrj      situations, running various selftests for each case within the
351438fd1498Szrj      matrix.  */
351538fd1498Szrj 
351638fd1498Szrj   /* Run all tests with:
351738fd1498Szrj      (a) line_table->default_range_bits == 0, and
351838fd1498Szrj      (b) line_table->default_range_bits == 5.  */
351938fd1498Szrj   int num_cases_tested = 0;
352038fd1498Szrj   for (int default_range_bits = 0; default_range_bits <= 5;
352138fd1498Szrj        default_range_bits += 5)
352238fd1498Szrj     {
352338fd1498Szrj       /* ...and use each of the "interesting" location values as
352438fd1498Szrj 	 the starting location within line_table.  */
352538fd1498Szrj       const int num_boundary_locations
352638fd1498Szrj 	= sizeof (boundary_locations) / sizeof (boundary_locations[0]);
352738fd1498Szrj       for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
352838fd1498Szrj 	{
352938fd1498Szrj 	  line_table_case c (default_range_bits, boundary_locations[loc_idx]);
353038fd1498Szrj 
353138fd1498Szrj 	  testcase (c);
353238fd1498Szrj 
353338fd1498Szrj 	  num_cases_tested++;
353438fd1498Szrj 	}
353538fd1498Szrj     }
353638fd1498Szrj 
353738fd1498Szrj   /* Verify that we fully covered the test matrix.  */
353838fd1498Szrj   ASSERT_EQ (num_cases_tested, 2 * 12);
353938fd1498Szrj }
354038fd1498Szrj 
3541*e215fc28Szrj /* Verify that when presented with a consecutive pair of locations with
3542*e215fc28Szrj    a very large line offset, we don't attempt to consolidate them into
3543*e215fc28Szrj    a single ordinary linemap where the line offsets within the line map
3544*e215fc28Szrj    would lead to overflow (PR lto/88147).  */
3545*e215fc28Szrj 
3546*e215fc28Szrj static void
test_line_offset_overflow()3547*e215fc28Szrj test_line_offset_overflow ()
3548*e215fc28Szrj {
3549*e215fc28Szrj   line_table_test ltt (line_table_case (5, 0));
3550*e215fc28Szrj 
3551*e215fc28Szrj   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3552*e215fc28Szrj   linemap_line_start (line_table, 1, 100);
3553*e215fc28Szrj   location_t loc_a = linemap_line_start (line_table, 2578, 255);
3554*e215fc28Szrj   assert_loceq ("foo.c", 2578, 0, loc_a);
3555*e215fc28Szrj 
3556*e215fc28Szrj   const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3557*e215fc28Szrj   ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3558*e215fc28Szrj   ASSERT_EQ (ordmap_a->m_range_bits, 5);
3559*e215fc28Szrj 
3560*e215fc28Szrj   location_t loc_b = linemap_line_start (line_table, 404198, 512);
3561*e215fc28Szrj   assert_loceq ("foo.c", 404198, 0, loc_b);
3562*e215fc28Szrj 
3563*e215fc28Szrj   /* We should have started a new linemap, rather than attempting to store
3564*e215fc28Szrj      a very large line offset.  */
3565*e215fc28Szrj   const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3566*e215fc28Szrj   ASSERT_NE (ordmap_a, ordmap_b);
3567*e215fc28Szrj }
3568*e215fc28Szrj 
356938fd1498Szrj /* Run all of the selftests within this file.  */
357038fd1498Szrj 
357138fd1498Szrj void
input_c_tests()357238fd1498Szrj input_c_tests ()
357338fd1498Szrj {
357438fd1498Szrj   test_linenum_comparisons ();
357538fd1498Szrj   test_should_have_column_data_p ();
357638fd1498Szrj   test_unknown_location ();
357738fd1498Szrj   test_builtins ();
357838fd1498Szrj   for_each_line_table_case (test_make_location_nonpure_range_endpoints);
357938fd1498Szrj 
358038fd1498Szrj   for_each_line_table_case (test_accessing_ordinary_linemaps);
358138fd1498Szrj   for_each_line_table_case (test_lexer);
358238fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_simple);
358338fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_ebcdic);
358438fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_hex);
358538fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_oct);
358638fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
358738fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
358838fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_ucn4);
358938fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_ucn8);
359038fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_wide_string);
359138fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_string16);
359238fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_string32);
359338fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_u8);
359438fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_utf8_source);
359538fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_concatenation_1);
359638fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_concatenation_2);
359738fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_concatenation_3);
359838fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_macro);
359938fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
360038fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_non_string);
360138fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_long_line);
360238fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
360338fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
360438fd1498Szrj   for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
360538fd1498Szrj   for_each_line_table_case (test_lexer_char_constants);
360638fd1498Szrj 
360738fd1498Szrj   test_reading_source_line ();
3608*e215fc28Szrj 
3609*e215fc28Szrj   test_line_offset_overflow ();
361038fd1498Szrj }
361138fd1498Szrj 
361238fd1498Szrj } // namespace selftest
361338fd1498Szrj 
361438fd1498Szrj #endif /* CHECKING_P */
3615