138fd1498Szrj /* Data and functions related to line maps and input files.
238fd1498Szrj Copyright (C) 2004-2018 Free Software Foundation, Inc.
338fd1498Szrj
438fd1498Szrj This file is part of GCC.
538fd1498Szrj
638fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
738fd1498Szrj the terms of the GNU General Public License as published by the Free
838fd1498Szrj Software Foundation; either version 3, or (at your option) any later
938fd1498Szrj version.
1038fd1498Szrj
1138fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
1238fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
1338fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
1438fd1498Szrj for more details.
1538fd1498Szrj
1638fd1498Szrj You should have received a copy of the GNU General Public License
1738fd1498Szrj along with GCC; see the file COPYING3. If not see
1838fd1498Szrj <http://www.gnu.org/licenses/>. */
1938fd1498Szrj
2038fd1498Szrj #include "config.h"
2138fd1498Szrj #include "system.h"
2238fd1498Szrj #include "coretypes.h"
2338fd1498Szrj #include "intl.h"
2438fd1498Szrj #include "diagnostic-core.h"
2538fd1498Szrj #include "selftest.h"
2638fd1498Szrj #include "cpplib.h"
2738fd1498Szrj
2838fd1498Szrj #ifndef HAVE_ICONV
2938fd1498Szrj #define HAVE_ICONV 0
3038fd1498Szrj #endif
3138fd1498Szrj
3238fd1498Szrj /* This is a cache used by get_next_line to store the content of a
3338fd1498Szrj file to be searched for file lines. */
3438fd1498Szrj struct fcache
3538fd1498Szrj {
3638fd1498Szrj /* These are information used to store a line boundary. */
3738fd1498Szrj struct line_info
3838fd1498Szrj {
3938fd1498Szrj /* The line number. It starts from 1. */
4038fd1498Szrj size_t line_num;
4138fd1498Szrj
4238fd1498Szrj /* The position (byte count) of the beginning of the line,
4338fd1498Szrj relative to the file data pointer. This starts at zero. */
4438fd1498Szrj size_t start_pos;
4538fd1498Szrj
4638fd1498Szrj /* The position (byte count) of the last byte of the line. This
4738fd1498Szrj normally points to the '\n' character, or to one byte after the
4838fd1498Szrj last byte of the file, if the file doesn't contain a '\n'
4938fd1498Szrj character. */
5038fd1498Szrj size_t end_pos;
5138fd1498Szrj
line_infofcache::line_info5238fd1498Szrj line_info (size_t l, size_t s, size_t e)
5338fd1498Szrj : line_num (l), start_pos (s), end_pos (e)
5438fd1498Szrj {}
5538fd1498Szrj
line_infofcache::line_info5638fd1498Szrj line_info ()
5738fd1498Szrj :line_num (0), start_pos (0), end_pos (0)
5838fd1498Szrj {}
5938fd1498Szrj };
6038fd1498Szrj
6138fd1498Szrj /* The number of time this file has been accessed. This is used
6238fd1498Szrj to designate which file cache to evict from the cache
6338fd1498Szrj array. */
6438fd1498Szrj unsigned use_count;
6538fd1498Szrj
6638fd1498Szrj /* The file_path is the key for identifying a particular file in
6738fd1498Szrj the cache.
6838fd1498Szrj For libcpp-using code, the underlying buffer for this field is
6938fd1498Szrj owned by the corresponding _cpp_file within the cpp_reader. */
7038fd1498Szrj const char *file_path;
7138fd1498Szrj
7238fd1498Szrj FILE *fp;
7338fd1498Szrj
7438fd1498Szrj /* This points to the content of the file that we've read so
7538fd1498Szrj far. */
7638fd1498Szrj char *data;
7738fd1498Szrj
7838fd1498Szrj /* The size of the DATA array above.*/
7938fd1498Szrj size_t size;
8038fd1498Szrj
8138fd1498Szrj /* The number of bytes read from the underlying file so far. This
8238fd1498Szrj must be less (or equal) than SIZE above. */
8338fd1498Szrj size_t nb_read;
8438fd1498Szrj
8538fd1498Szrj /* The index of the beginning of the current line. */
8638fd1498Szrj size_t line_start_idx;
8738fd1498Szrj
8838fd1498Szrj /* The number of the previous line read. This starts at 1. Zero
8938fd1498Szrj means we've read no line so far. */
9038fd1498Szrj size_t line_num;
9138fd1498Szrj
9238fd1498Szrj /* This is the total number of lines of the current file. At the
9338fd1498Szrj moment, we try to get this information from the line map
9438fd1498Szrj subsystem. Note that this is just a hint. When using the C++
9538fd1498Szrj front-end, this hint is correct because the input file is then
9638fd1498Szrj completely tokenized before parsing starts; so the line map knows
9738fd1498Szrj the number of lines before compilation really starts. For e.g,
9838fd1498Szrj the C front-end, it can happen that we start emitting diagnostics
9938fd1498Szrj before the line map has seen the end of the file. */
10038fd1498Szrj size_t total_lines;
10138fd1498Szrj
10238fd1498Szrj /* Could this file be missing a trailing newline on its final line?
10338fd1498Szrj Initially true (to cope with empty files), set to true/false
10438fd1498Szrj as each line is read. */
10538fd1498Szrj bool missing_trailing_newline;
10638fd1498Szrj
10738fd1498Szrj /* This is a record of the beginning and end of the lines we've seen
10838fd1498Szrj while reading the file. This is useful to avoid walking the data
10938fd1498Szrj from the beginning when we are asked to read a line that is
11038fd1498Szrj before LINE_START_IDX above. Note that the maximum size of this
11138fd1498Szrj record is fcache_line_record_size, so that the memory consumption
11238fd1498Szrj doesn't explode. We thus scale total_lines down to
11338fd1498Szrj fcache_line_record_size. */
11438fd1498Szrj vec<line_info, va_heap> line_record;
11538fd1498Szrj
11638fd1498Szrj fcache ();
11738fd1498Szrj ~fcache ();
11838fd1498Szrj };
11938fd1498Szrj
12038fd1498Szrj /* Current position in real source file. */
12138fd1498Szrj
12238fd1498Szrj location_t input_location = UNKNOWN_LOCATION;
12338fd1498Szrj
12438fd1498Szrj struct line_maps *line_table;
12538fd1498Szrj
12638fd1498Szrj /* A stashed copy of "line_table" for use by selftest::line_table_test.
12738fd1498Szrj This needs to be a global so that it can be a GC root, and thus
12838fd1498Szrj prevent the stashed copy from being garbage-collected if the GC runs
12938fd1498Szrj during a line_table_test. */
13038fd1498Szrj
13138fd1498Szrj struct line_maps *saved_line_table;
13238fd1498Szrj
13338fd1498Szrj static fcache *fcache_tab;
13438fd1498Szrj static const size_t fcache_tab_size = 16;
13538fd1498Szrj static const size_t fcache_buffer_size = 4 * 1024;
13638fd1498Szrj static const size_t fcache_line_record_size = 100;
13738fd1498Szrj
13838fd1498Szrj /* Expand the source location LOC into a human readable location. If
13938fd1498Szrj LOC resolves to a builtin location, the file name of the readable
14038fd1498Szrj location is set to the string "<built-in>". If EXPANSION_POINT_P is
14138fd1498Szrj TRUE and LOC is virtual, then it is resolved to the expansion
14238fd1498Szrj point of the involved macro. Otherwise, it is resolved to the
14338fd1498Szrj spelling location of the token.
14438fd1498Szrj
14538fd1498Szrj When resolving to the spelling location of the token, if the
14638fd1498Szrj resulting location is for a built-in location (that is, it has no
14738fd1498Szrj associated line/column) in the context of a macro expansion, the
14838fd1498Szrj returned location is the first one (while unwinding the macro
14938fd1498Szrj location towards its expansion point) that is in real source
15038fd1498Szrj code.
15138fd1498Szrj
15238fd1498Szrj ASPECT controls which part of the location to use. */
15338fd1498Szrj
15438fd1498Szrj static expanded_location
expand_location_1(source_location loc,bool expansion_point_p,enum location_aspect aspect)15538fd1498Szrj expand_location_1 (source_location loc,
15638fd1498Szrj bool expansion_point_p,
15738fd1498Szrj enum location_aspect aspect)
15838fd1498Szrj {
15938fd1498Szrj expanded_location xloc;
16038fd1498Szrj const line_map_ordinary *map;
16138fd1498Szrj enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
16238fd1498Szrj tree block = NULL;
16338fd1498Szrj
16438fd1498Szrj if (IS_ADHOC_LOC (loc))
16538fd1498Szrj {
16638fd1498Szrj block = LOCATION_BLOCK (loc);
16738fd1498Szrj loc = LOCATION_LOCUS (loc);
16838fd1498Szrj }
16938fd1498Szrj
17038fd1498Szrj memset (&xloc, 0, sizeof (xloc));
17138fd1498Szrj
17238fd1498Szrj if (loc >= RESERVED_LOCATION_COUNT)
17338fd1498Szrj {
17438fd1498Szrj if (!expansion_point_p)
17538fd1498Szrj {
17638fd1498Szrj /* We want to resolve LOC to its spelling location.
17738fd1498Szrj
17838fd1498Szrj But if that spelling location is a reserved location that
17938fd1498Szrj appears in the context of a macro expansion (like for a
18038fd1498Szrj location for a built-in token), let's consider the first
18138fd1498Szrj location (toward the expansion point) that is not reserved;
18238fd1498Szrj that is, the first location that is in real source code. */
18338fd1498Szrj loc = linemap_unwind_to_first_non_reserved_loc (line_table,
18438fd1498Szrj loc, NULL);
18538fd1498Szrj lrk = LRK_SPELLING_LOCATION;
18638fd1498Szrj }
18738fd1498Szrj loc = linemap_resolve_location (line_table, loc, lrk, &map);
18838fd1498Szrj
18938fd1498Szrj /* loc is now either in an ordinary map, or is a reserved location.
19038fd1498Szrj If it is a compound location, the caret is in a spelling location,
19138fd1498Szrj but the start/finish might still be a virtual location.
19238fd1498Szrj Depending of what the caller asked for, we may need to recurse
19338fd1498Szrj one level in order to resolve any virtual locations in the
19438fd1498Szrj end-points. */
19538fd1498Szrj switch (aspect)
19638fd1498Szrj {
19738fd1498Szrj default:
19838fd1498Szrj gcc_unreachable ();
19938fd1498Szrj /* Fall through. */
20038fd1498Szrj case LOCATION_ASPECT_CARET:
20138fd1498Szrj break;
20238fd1498Szrj case LOCATION_ASPECT_START:
20338fd1498Szrj {
20438fd1498Szrj source_location start = get_start (loc);
20538fd1498Szrj if (start != loc)
20638fd1498Szrj return expand_location_1 (start, expansion_point_p, aspect);
20738fd1498Szrj }
20838fd1498Szrj break;
20938fd1498Szrj case LOCATION_ASPECT_FINISH:
21038fd1498Szrj {
21138fd1498Szrj source_location finish = get_finish (loc);
21238fd1498Szrj if (finish != loc)
21338fd1498Szrj return expand_location_1 (finish, expansion_point_p, aspect);
21438fd1498Szrj }
21538fd1498Szrj break;
21638fd1498Szrj }
21738fd1498Szrj xloc = linemap_expand_location (line_table, map, loc);
21838fd1498Szrj }
21938fd1498Szrj
22038fd1498Szrj xloc.data = block;
22138fd1498Szrj if (loc <= BUILTINS_LOCATION)
22238fd1498Szrj xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
22338fd1498Szrj
22438fd1498Szrj return xloc;
22538fd1498Szrj }
22638fd1498Szrj
22738fd1498Szrj /* Initialize the set of cache used for files accessed by caret
22838fd1498Szrj diagnostic. */
22938fd1498Szrj
23038fd1498Szrj static void
diagnostic_file_cache_init(void)23138fd1498Szrj diagnostic_file_cache_init (void)
23238fd1498Szrj {
23338fd1498Szrj if (fcache_tab == NULL)
23438fd1498Szrj fcache_tab = new fcache[fcache_tab_size];
23538fd1498Szrj }
23638fd1498Szrj
23738fd1498Szrj /* Free the resources used by the set of cache used for files accessed
23838fd1498Szrj by caret diagnostic. */
23938fd1498Szrj
24038fd1498Szrj void
diagnostic_file_cache_fini(void)24138fd1498Szrj diagnostic_file_cache_fini (void)
24238fd1498Szrj {
24338fd1498Szrj if (fcache_tab)
24438fd1498Szrj {
24538fd1498Szrj delete [] (fcache_tab);
24638fd1498Szrj fcache_tab = NULL;
24738fd1498Szrj }
24838fd1498Szrj }
24938fd1498Szrj
25038fd1498Szrj /* Return the total lines number that have been read so far by the
25138fd1498Szrj line map (in the preprocessor) so far. For languages like C++ that
25238fd1498Szrj entirely preprocess the input file before starting to parse, this
25338fd1498Szrj equals the actual number of lines of the file. */
25438fd1498Szrj
25538fd1498Szrj static size_t
total_lines_num(const char * file_path)25638fd1498Szrj total_lines_num (const char *file_path)
25738fd1498Szrj {
25838fd1498Szrj size_t r = 0;
25938fd1498Szrj source_location l = 0;
26038fd1498Szrj if (linemap_get_file_highest_location (line_table, file_path, &l))
26138fd1498Szrj {
26238fd1498Szrj gcc_assert (l >= RESERVED_LOCATION_COUNT);
26338fd1498Szrj expanded_location xloc = expand_location (l);
26438fd1498Szrj r = xloc.line;
26538fd1498Szrj }
26638fd1498Szrj return r;
26738fd1498Szrj }
26838fd1498Szrj
26938fd1498Szrj /* Lookup the cache used for the content of a given file accessed by
27038fd1498Szrj caret diagnostic. Return the found cached file, or NULL if no
27138fd1498Szrj cached file was found. */
27238fd1498Szrj
27338fd1498Szrj static fcache*
lookup_file_in_cache_tab(const char * file_path)27438fd1498Szrj lookup_file_in_cache_tab (const char *file_path)
27538fd1498Szrj {
27638fd1498Szrj if (file_path == NULL)
27738fd1498Szrj return NULL;
27838fd1498Szrj
27938fd1498Szrj diagnostic_file_cache_init ();
28038fd1498Szrj
28138fd1498Szrj /* This will contain the found cached file. */
28238fd1498Szrj fcache *r = NULL;
28338fd1498Szrj for (unsigned i = 0; i < fcache_tab_size; ++i)
28438fd1498Szrj {
28538fd1498Szrj fcache *c = &fcache_tab[i];
28638fd1498Szrj if (c->file_path && !strcmp (c->file_path, file_path))
28738fd1498Szrj {
28838fd1498Szrj ++c->use_count;
28938fd1498Szrj r = c;
29038fd1498Szrj }
29138fd1498Szrj }
29238fd1498Szrj
29338fd1498Szrj if (r)
29438fd1498Szrj ++r->use_count;
29538fd1498Szrj
29638fd1498Szrj return r;
29738fd1498Szrj }
29838fd1498Szrj
29938fd1498Szrj /* Purge any mention of FILENAME from the cache of files used for
30038fd1498Szrj printing source code. For use in selftests when working
30138fd1498Szrj with tempfiles. */
30238fd1498Szrj
30338fd1498Szrj void
diagnostics_file_cache_forcibly_evict_file(const char * file_path)30438fd1498Szrj diagnostics_file_cache_forcibly_evict_file (const char *file_path)
30538fd1498Szrj {
30638fd1498Szrj gcc_assert (file_path);
30738fd1498Szrj
30838fd1498Szrj fcache *r = lookup_file_in_cache_tab (file_path);
30938fd1498Szrj if (!r)
31038fd1498Szrj /* Not found. */
31138fd1498Szrj return;
31238fd1498Szrj
31338fd1498Szrj r->file_path = NULL;
31438fd1498Szrj if (r->fp)
31538fd1498Szrj fclose (r->fp);
31638fd1498Szrj r->fp = NULL;
31738fd1498Szrj r->nb_read = 0;
31838fd1498Szrj r->line_start_idx = 0;
31938fd1498Szrj r->line_num = 0;
32038fd1498Szrj r->line_record.truncate (0);
32138fd1498Szrj r->use_count = 0;
32238fd1498Szrj r->total_lines = 0;
32338fd1498Szrj r->missing_trailing_newline = true;
32438fd1498Szrj }
32538fd1498Szrj
32638fd1498Szrj /* Return the file cache that has been less used, recently, or the
32738fd1498Szrj first empty one. If HIGHEST_USE_COUNT is non-null,
32838fd1498Szrj *HIGHEST_USE_COUNT is set to the highest use count of the entries
32938fd1498Szrj in the cache table. */
33038fd1498Szrj
33138fd1498Szrj static fcache*
evicted_cache_tab_entry(unsigned * highest_use_count)33238fd1498Szrj evicted_cache_tab_entry (unsigned *highest_use_count)
33338fd1498Szrj {
33438fd1498Szrj diagnostic_file_cache_init ();
33538fd1498Szrj
33638fd1498Szrj fcache *to_evict = &fcache_tab[0];
33738fd1498Szrj unsigned huc = to_evict->use_count;
33838fd1498Szrj for (unsigned i = 1; i < fcache_tab_size; ++i)
33938fd1498Szrj {
34038fd1498Szrj fcache *c = &fcache_tab[i];
34138fd1498Szrj bool c_is_empty = (c->file_path == NULL);
34238fd1498Szrj
34338fd1498Szrj if (c->use_count < to_evict->use_count
34438fd1498Szrj || (to_evict->file_path && c_is_empty))
34538fd1498Szrj /* We evict C because it's either an entry with a lower use
34638fd1498Szrj count or one that is empty. */
34738fd1498Szrj to_evict = c;
34838fd1498Szrj
34938fd1498Szrj if (huc < c->use_count)
35038fd1498Szrj huc = c->use_count;
35138fd1498Szrj
35238fd1498Szrj if (c_is_empty)
35338fd1498Szrj /* We've reached the end of the cache; subsequent elements are
35438fd1498Szrj all empty. */
35538fd1498Szrj break;
35638fd1498Szrj }
35738fd1498Szrj
35838fd1498Szrj if (highest_use_count)
35938fd1498Szrj *highest_use_count = huc;
36038fd1498Szrj
36138fd1498Szrj return to_evict;
36238fd1498Szrj }
36338fd1498Szrj
36438fd1498Szrj /* Create the cache used for the content of a given file to be
36538fd1498Szrj accessed by caret diagnostic. This cache is added to an array of
36638fd1498Szrj cache and can be retrieved by lookup_file_in_cache_tab. This
36738fd1498Szrj function returns the created cache. Note that only the last
36838fd1498Szrj fcache_tab_size files are cached. */
36938fd1498Szrj
37038fd1498Szrj static fcache*
add_file_to_cache_tab(const char * file_path)37138fd1498Szrj add_file_to_cache_tab (const char *file_path)
37238fd1498Szrj {
37338fd1498Szrj
37438fd1498Szrj FILE *fp = fopen (file_path, "r");
37538fd1498Szrj if (fp == NULL)
37638fd1498Szrj return NULL;
37738fd1498Szrj
37838fd1498Szrj unsigned highest_use_count = 0;
37938fd1498Szrj fcache *r = evicted_cache_tab_entry (&highest_use_count);
38038fd1498Szrj r->file_path = file_path;
38138fd1498Szrj if (r->fp)
38238fd1498Szrj fclose (r->fp);
38338fd1498Szrj r->fp = fp;
38438fd1498Szrj r->nb_read = 0;
38538fd1498Szrj r->line_start_idx = 0;
38638fd1498Szrj r->line_num = 0;
38738fd1498Szrj r->line_record.truncate (0);
38838fd1498Szrj /* Ensure that this cache entry doesn't get evicted next time
38938fd1498Szrj add_file_to_cache_tab is called. */
39038fd1498Szrj r->use_count = ++highest_use_count;
39138fd1498Szrj r->total_lines = total_lines_num (file_path);
39238fd1498Szrj r->missing_trailing_newline = true;
39338fd1498Szrj
39438fd1498Szrj return r;
39538fd1498Szrj }
39638fd1498Szrj
39738fd1498Szrj /* Lookup the cache used for the content of a given file accessed by
39838fd1498Szrj caret diagnostic. If no cached file was found, create a new cache
39938fd1498Szrj for this file, add it to the array of cached file and return
40038fd1498Szrj it. */
40138fd1498Szrj
40238fd1498Szrj static fcache*
lookup_or_add_file_to_cache_tab(const char * file_path)40338fd1498Szrj lookup_or_add_file_to_cache_tab (const char *file_path)
40438fd1498Szrj {
40538fd1498Szrj fcache *r = lookup_file_in_cache_tab (file_path);
40638fd1498Szrj if (r == NULL)
40738fd1498Szrj r = add_file_to_cache_tab (file_path);
40838fd1498Szrj return r;
40938fd1498Szrj }
41038fd1498Szrj
41138fd1498Szrj /* Default constructor for a cache of file used by caret
41238fd1498Szrj diagnostic. */
41338fd1498Szrj
fcache()41438fd1498Szrj fcache::fcache ()
41538fd1498Szrj : use_count (0), file_path (NULL), fp (NULL), data (0),
41638fd1498Szrj size (0), nb_read (0), line_start_idx (0), line_num (0),
41738fd1498Szrj total_lines (0), missing_trailing_newline (true)
41838fd1498Szrj {
41938fd1498Szrj line_record.create (0);
42038fd1498Szrj }
42138fd1498Szrj
42238fd1498Szrj /* Destructor for a cache of file used by caret diagnostic. */
42338fd1498Szrj
~fcache()42438fd1498Szrj fcache::~fcache ()
42538fd1498Szrj {
42638fd1498Szrj if (fp)
42738fd1498Szrj {
42838fd1498Szrj fclose (fp);
42938fd1498Szrj fp = NULL;
43038fd1498Szrj }
43138fd1498Szrj if (data)
43238fd1498Szrj {
43338fd1498Szrj XDELETEVEC (data);
43438fd1498Szrj data = 0;
43538fd1498Szrj }
43638fd1498Szrj line_record.release ();
43738fd1498Szrj }
43838fd1498Szrj
43938fd1498Szrj /* Returns TRUE iff the cache would need to be filled with data coming
44038fd1498Szrj from the file. That is, either the cache is empty or full or the
44138fd1498Szrj current line is empty. Note that if the cache is full, it would
44238fd1498Szrj need to be extended and filled again. */
44338fd1498Szrj
44438fd1498Szrj static bool
needs_read(fcache * c)44538fd1498Szrj needs_read (fcache *c)
44638fd1498Szrj {
44738fd1498Szrj return (c->nb_read == 0
44838fd1498Szrj || c->nb_read == c->size
44938fd1498Szrj || (c->line_start_idx >= c->nb_read - 1));
45038fd1498Szrj }
45138fd1498Szrj
45238fd1498Szrj /* Return TRUE iff the cache is full and thus needs to be
45338fd1498Szrj extended. */
45438fd1498Szrj
45538fd1498Szrj static bool
needs_grow(fcache * c)45638fd1498Szrj needs_grow (fcache *c)
45738fd1498Szrj {
45838fd1498Szrj return c->nb_read == c->size;
45938fd1498Szrj }
46038fd1498Szrj
46138fd1498Szrj /* Grow the cache if it needs to be extended. */
46238fd1498Szrj
46338fd1498Szrj static void
maybe_grow(fcache * c)46438fd1498Szrj maybe_grow (fcache *c)
46538fd1498Szrj {
46638fd1498Szrj if (!needs_grow (c))
46738fd1498Szrj return;
46838fd1498Szrj
46938fd1498Szrj size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
47038fd1498Szrj c->data = XRESIZEVEC (char, c->data, size);
47138fd1498Szrj c->size = size;
47238fd1498Szrj }
47338fd1498Szrj
47438fd1498Szrj /* Read more data into the cache. Extends the cache if need be.
47538fd1498Szrj Returns TRUE iff new data could be read. */
47638fd1498Szrj
47738fd1498Szrj static bool
read_data(fcache * c)47838fd1498Szrj read_data (fcache *c)
47938fd1498Szrj {
48038fd1498Szrj if (feof (c->fp) || ferror (c->fp))
48138fd1498Szrj return false;
48238fd1498Szrj
48338fd1498Szrj maybe_grow (c);
48438fd1498Szrj
48538fd1498Szrj char * from = c->data + c->nb_read;
48638fd1498Szrj size_t to_read = c->size - c->nb_read;
48738fd1498Szrj size_t nb_read = fread (from, 1, to_read, c->fp);
48838fd1498Szrj
48938fd1498Szrj if (ferror (c->fp))
49038fd1498Szrj return false;
49138fd1498Szrj
49238fd1498Szrj c->nb_read += nb_read;
49338fd1498Szrj return !!nb_read;
49438fd1498Szrj }
49538fd1498Szrj
49638fd1498Szrj /* Read new data iff the cache needs to be filled with more data
49738fd1498Szrj coming from the file FP. Return TRUE iff the cache was filled with
49838fd1498Szrj mode data. */
49938fd1498Szrj
50038fd1498Szrj static bool
maybe_read_data(fcache * c)50138fd1498Szrj maybe_read_data (fcache *c)
50238fd1498Szrj {
50338fd1498Szrj if (!needs_read (c))
50438fd1498Szrj return false;
50538fd1498Szrj return read_data (c);
50638fd1498Szrj }
50738fd1498Szrj
50838fd1498Szrj /* Read a new line from file FP, using C as a cache for the data
50938fd1498Szrj coming from the file. Upon successful completion, *LINE is set to
51038fd1498Szrj the beginning of the line found. *LINE points directly in the
51138fd1498Szrj line cache and is only valid until the next call of get_next_line.
51238fd1498Szrj *LINE_LEN is set to the length of the line. Note that the line
51338fd1498Szrj does not contain any terminal delimiter. This function returns
51438fd1498Szrj true if some data was read or process from the cache, false
51538fd1498Szrj otherwise. Note that subsequent calls to get_next_line might
51638fd1498Szrj make the content of *LINE invalid. */
51738fd1498Szrj
51838fd1498Szrj static bool
get_next_line(fcache * c,char ** line,ssize_t * line_len)51938fd1498Szrj get_next_line (fcache *c, char **line, ssize_t *line_len)
52038fd1498Szrj {
52138fd1498Szrj /* Fill the cache with data to process. */
52238fd1498Szrj maybe_read_data (c);
52338fd1498Szrj
52438fd1498Szrj size_t remaining_size = c->nb_read - c->line_start_idx;
52538fd1498Szrj if (remaining_size == 0)
52638fd1498Szrj /* There is no more data to process. */
52738fd1498Szrj return false;
52838fd1498Szrj
52938fd1498Szrj char *line_start = c->data + c->line_start_idx;
53038fd1498Szrj
53138fd1498Szrj char *next_line_start = NULL;
53238fd1498Szrj size_t len = 0;
53338fd1498Szrj char *line_end = (char *) memchr (line_start, '\n', remaining_size);
53438fd1498Szrj if (line_end == NULL)
53538fd1498Szrj {
53638fd1498Szrj /* We haven't found the end-of-line delimiter in the cache.
53738fd1498Szrj Fill the cache with more data from the file and look for the
53838fd1498Szrj '\n'. */
53938fd1498Szrj while (maybe_read_data (c))
54038fd1498Szrj {
54138fd1498Szrj line_start = c->data + c->line_start_idx;
54238fd1498Szrj remaining_size = c->nb_read - c->line_start_idx;
54338fd1498Szrj line_end = (char *) memchr (line_start, '\n', remaining_size);
54438fd1498Szrj if (line_end != NULL)
54538fd1498Szrj {
54638fd1498Szrj next_line_start = line_end + 1;
54738fd1498Szrj break;
54838fd1498Szrj }
54938fd1498Szrj }
55038fd1498Szrj if (line_end == NULL)
55138fd1498Szrj {
55238fd1498Szrj /* We've loadded all the file into the cache and still no
55338fd1498Szrj '\n'. Let's say the line ends up at one byte passed the
55438fd1498Szrj end of the file. This is to stay consistent with the case
55538fd1498Szrj of when the line ends up with a '\n' and line_end points to
55638fd1498Szrj that terminal '\n'. That consistency is useful below in
55738fd1498Szrj the len calculation. */
55838fd1498Szrj line_end = c->data + c->nb_read ;
55938fd1498Szrj c->missing_trailing_newline = true;
56038fd1498Szrj }
56138fd1498Szrj else
56238fd1498Szrj c->missing_trailing_newline = false;
56338fd1498Szrj }
56438fd1498Szrj else
56538fd1498Szrj {
56638fd1498Szrj next_line_start = line_end + 1;
56738fd1498Szrj c->missing_trailing_newline = false;
56838fd1498Szrj }
56938fd1498Szrj
57038fd1498Szrj if (ferror (c->fp))
57138fd1498Szrj return false;
57238fd1498Szrj
57338fd1498Szrj /* At this point, we've found the end of the of line. It either
57438fd1498Szrj points to the '\n' or to one byte after the last byte of the
57538fd1498Szrj file. */
57638fd1498Szrj gcc_assert (line_end != NULL);
57738fd1498Szrj
57838fd1498Szrj len = line_end - line_start;
57938fd1498Szrj
58038fd1498Szrj if (c->line_start_idx < c->nb_read)
58138fd1498Szrj *line = line_start;
58238fd1498Szrj
58338fd1498Szrj ++c->line_num;
58438fd1498Szrj
58538fd1498Szrj /* Before we update our line record, make sure the hint about the
58638fd1498Szrj total number of lines of the file is correct. If it's not, then
58738fd1498Szrj we give up recording line boundaries from now on. */
58838fd1498Szrj bool update_line_record = true;
58938fd1498Szrj if (c->line_num > c->total_lines)
59038fd1498Szrj update_line_record = false;
59138fd1498Szrj
59238fd1498Szrj /* Now update our line record so that re-reading lines from the
59338fd1498Szrj before c->line_start_idx is faster. */
59438fd1498Szrj if (update_line_record
59538fd1498Szrj && c->line_record.length () < fcache_line_record_size)
59638fd1498Szrj {
59738fd1498Szrj /* If the file lines fits in the line record, we just record all
59838fd1498Szrj its lines ...*/
59938fd1498Szrj if (c->total_lines <= fcache_line_record_size
60038fd1498Szrj && c->line_num > c->line_record.length ())
60138fd1498Szrj c->line_record.safe_push (fcache::line_info (c->line_num,
60238fd1498Szrj c->line_start_idx,
60338fd1498Szrj line_end - c->data));
60438fd1498Szrj else if (c->total_lines > fcache_line_record_size)
60538fd1498Szrj {
60638fd1498Szrj /* ... otherwise, we just scale total_lines down to
60738fd1498Szrj (fcache_line_record_size lines. */
60838fd1498Szrj size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
60938fd1498Szrj if (c->line_record.length () == 0
61038fd1498Szrj || n >= c->line_record.length ())
61138fd1498Szrj c->line_record.safe_push (fcache::line_info (c->line_num,
61238fd1498Szrj c->line_start_idx,
61338fd1498Szrj line_end - c->data));
61438fd1498Szrj }
61538fd1498Szrj }
61638fd1498Szrj
61738fd1498Szrj /* Update c->line_start_idx so that it points to the next line to be
61838fd1498Szrj read. */
61938fd1498Szrj if (next_line_start)
62038fd1498Szrj c->line_start_idx = next_line_start - c->data;
62138fd1498Szrj else
62238fd1498Szrj /* We didn't find any terminal '\n'. Let's consider that the end
62338fd1498Szrj of line is the end of the data in the cache. The next
62438fd1498Szrj invocation of get_next_line will either read more data from the
62538fd1498Szrj underlying file or return false early because we've reached the
62638fd1498Szrj end of the file. */
62738fd1498Szrj c->line_start_idx = c->nb_read;
62838fd1498Szrj
62938fd1498Szrj *line_len = len;
63038fd1498Szrj
63138fd1498Szrj return true;
63238fd1498Szrj }
63338fd1498Szrj
63438fd1498Szrj /* Consume the next bytes coming from the cache (or from its
63538fd1498Szrj underlying file if there are remaining unread bytes in the file)
63638fd1498Szrj until we reach the next end-of-line (or end-of-file). There is no
63738fd1498Szrj copying from the cache involved. Return TRUE upon successful
63838fd1498Szrj completion. */
63938fd1498Szrj
64038fd1498Szrj static bool
goto_next_line(fcache * cache)64138fd1498Szrj goto_next_line (fcache *cache)
64238fd1498Szrj {
64338fd1498Szrj char *l;
64438fd1498Szrj ssize_t len;
64538fd1498Szrj
64638fd1498Szrj return get_next_line (cache, &l, &len);
64738fd1498Szrj }
64838fd1498Szrj
64938fd1498Szrj /* Read an arbitrary line number LINE_NUM from the file cached in C.
65038fd1498Szrj If the line was read successfully, *LINE points to the beginning
65138fd1498Szrj of the line in the file cache and *LINE_LEN is the length of the
65238fd1498Szrj line. *LINE is not nul-terminated, but may contain zero bytes.
65338fd1498Szrj *LINE is only valid until the next call of read_line_num.
65438fd1498Szrj This function returns bool if a line was read. */
65538fd1498Szrj
65638fd1498Szrj static bool
read_line_num(fcache * c,size_t line_num,char ** line,ssize_t * line_len)65738fd1498Szrj read_line_num (fcache *c, size_t line_num,
65838fd1498Szrj char **line, ssize_t *line_len)
65938fd1498Szrj {
66038fd1498Szrj gcc_assert (line_num > 0);
66138fd1498Szrj
66238fd1498Szrj if (line_num <= c->line_num)
66338fd1498Szrj {
66438fd1498Szrj /* We've been asked to read lines that are before c->line_num.
66538fd1498Szrj So lets use our line record (if it's not empty) to try to
66638fd1498Szrj avoid re-reading the file from the beginning again. */
66738fd1498Szrj
66838fd1498Szrj if (c->line_record.is_empty ())
66938fd1498Szrj {
67038fd1498Szrj c->line_start_idx = 0;
67138fd1498Szrj c->line_num = 0;
67238fd1498Szrj }
67338fd1498Szrj else
67438fd1498Szrj {
67538fd1498Szrj fcache::line_info *i = NULL;
67638fd1498Szrj if (c->total_lines <= fcache_line_record_size)
67738fd1498Szrj {
67838fd1498Szrj /* In languages where the input file is not totally
67938fd1498Szrj preprocessed up front, the c->total_lines hint
68038fd1498Szrj can be smaller than the number of lines of the
68138fd1498Szrj file. In that case, only the first
68238fd1498Szrj c->total_lines have been recorded.
68338fd1498Szrj
68438fd1498Szrj Otherwise, the first c->total_lines we've read have
68538fd1498Szrj their start/end recorded here. */
68638fd1498Szrj i = (line_num <= c->total_lines)
68738fd1498Szrj ? &c->line_record[line_num - 1]
68838fd1498Szrj : &c->line_record[c->total_lines - 1];
68938fd1498Szrj gcc_assert (i->line_num <= line_num);
69038fd1498Szrj }
69138fd1498Szrj else
69238fd1498Szrj {
69338fd1498Szrj /* So the file had more lines than our line record
69438fd1498Szrj size. Thus the number of lines we've recorded has
69538fd1498Szrj been scaled down to fcache_line_reacord_size. Let's
69638fd1498Szrj pick the start/end of the recorded line that is
69738fd1498Szrj closest to line_num. */
69838fd1498Szrj size_t n = (line_num <= c->total_lines)
69938fd1498Szrj ? line_num * fcache_line_record_size / c->total_lines
70038fd1498Szrj : c ->line_record.length () - 1;
70138fd1498Szrj if (n < c->line_record.length ())
70238fd1498Szrj {
70338fd1498Szrj i = &c->line_record[n];
70438fd1498Szrj gcc_assert (i->line_num <= line_num);
70538fd1498Szrj }
70638fd1498Szrj }
70738fd1498Szrj
70838fd1498Szrj if (i && i->line_num == line_num)
70938fd1498Szrj {
71038fd1498Szrj /* We have the start/end of the line. */
71138fd1498Szrj *line = c->data + i->start_pos;
71238fd1498Szrj *line_len = i->end_pos - i->start_pos;
71338fd1498Szrj return true;
71438fd1498Szrj }
71538fd1498Szrj
71638fd1498Szrj if (i)
71738fd1498Szrj {
71838fd1498Szrj c->line_start_idx = i->start_pos;
71938fd1498Szrj c->line_num = i->line_num - 1;
72038fd1498Szrj }
72138fd1498Szrj else
72238fd1498Szrj {
72338fd1498Szrj c->line_start_idx = 0;
72438fd1498Szrj c->line_num = 0;
72538fd1498Szrj }
72638fd1498Szrj }
72738fd1498Szrj }
72838fd1498Szrj
72938fd1498Szrj /* Let's walk from line c->line_num up to line_num - 1, without
73038fd1498Szrj copying any line. */
73138fd1498Szrj while (c->line_num < line_num - 1)
73238fd1498Szrj if (!goto_next_line (c))
73338fd1498Szrj return false;
73438fd1498Szrj
73538fd1498Szrj /* The line we want is the next one. Let's read and copy it back to
73638fd1498Szrj the caller. */
73738fd1498Szrj return get_next_line (c, line, line_len);
73838fd1498Szrj }
73938fd1498Szrj
74038fd1498Szrj /* Return the physical source line that corresponds to FILE_PATH/LINE.
74138fd1498Szrj The line is not nul-terminated. The returned pointer is only
74238fd1498Szrj valid until the next call of location_get_source_line.
74338fd1498Szrj Note that the line can contain several null characters,
74438fd1498Szrj so LINE_LEN, if non-null, points to the actual length of the line.
74538fd1498Szrj If the function fails, NULL is returned. */
74638fd1498Szrj
74738fd1498Szrj const char *
location_get_source_line(const char * file_path,int line,int * line_len)74838fd1498Szrj location_get_source_line (const char *file_path, int line,
74938fd1498Szrj int *line_len)
75038fd1498Szrj {
75138fd1498Szrj char *buffer = NULL;
75238fd1498Szrj ssize_t len;
75338fd1498Szrj
75438fd1498Szrj if (line == 0)
75538fd1498Szrj return NULL;
75638fd1498Szrj
75738fd1498Szrj fcache *c = lookup_or_add_file_to_cache_tab (file_path);
75838fd1498Szrj if (c == NULL)
75938fd1498Szrj return NULL;
76038fd1498Szrj
76138fd1498Szrj bool read = read_line_num (c, line, &buffer, &len);
76238fd1498Szrj
76338fd1498Szrj if (read && line_len)
76438fd1498Szrj *line_len = len;
76538fd1498Szrj
76638fd1498Szrj return read ? buffer : NULL;
76738fd1498Szrj }
76838fd1498Szrj
76938fd1498Szrj /* Determine if FILE_PATH missing a trailing newline on its final line.
77038fd1498Szrj Only valid to call once all of the file has been loaded, by
77138fd1498Szrj requesting a line number beyond the end of the file. */
77238fd1498Szrj
77338fd1498Szrj bool
location_missing_trailing_newline(const char * file_path)77438fd1498Szrj location_missing_trailing_newline (const char *file_path)
77538fd1498Szrj {
77638fd1498Szrj fcache *c = lookup_or_add_file_to_cache_tab (file_path);
77738fd1498Szrj if (c == NULL)
77838fd1498Szrj return false;
77938fd1498Szrj
78038fd1498Szrj return c->missing_trailing_newline;
78138fd1498Szrj }
78238fd1498Szrj
78338fd1498Szrj /* Test if the location originates from the spelling location of a
78438fd1498Szrj builtin-tokens. That is, return TRUE if LOC is a (possibly
78538fd1498Szrj virtual) location of a built-in token that appears in the expansion
78638fd1498Szrj list of a macro. Please note that this function also works on
78738fd1498Szrj tokens that result from built-in tokens. For instance, the
78838fd1498Szrj function would return true if passed a token "4" that is the result
78938fd1498Szrj of the expansion of the built-in __LINE__ macro. */
79038fd1498Szrj bool
is_location_from_builtin_token(source_location loc)79138fd1498Szrj is_location_from_builtin_token (source_location loc)
79238fd1498Szrj {
79338fd1498Szrj const line_map_ordinary *map = NULL;
79438fd1498Szrj loc = linemap_resolve_location (line_table, loc,
79538fd1498Szrj LRK_SPELLING_LOCATION, &map);
79638fd1498Szrj return loc == BUILTINS_LOCATION;
79738fd1498Szrj }
79838fd1498Szrj
79938fd1498Szrj /* Expand the source location LOC into a human readable location. If
80038fd1498Szrj LOC is virtual, it resolves to the expansion point of the involved
80138fd1498Szrj macro. If LOC resolves to a builtin location, the file name of the
80238fd1498Szrj readable location is set to the string "<built-in>". */
80338fd1498Szrj
80438fd1498Szrj expanded_location
expand_location(source_location loc)80538fd1498Szrj expand_location (source_location loc)
80638fd1498Szrj {
80738fd1498Szrj return expand_location_1 (loc, /*expansion_point_p=*/true,
80838fd1498Szrj LOCATION_ASPECT_CARET);
80938fd1498Szrj }
81038fd1498Szrj
81138fd1498Szrj /* Expand the source location LOC into a human readable location. If
81238fd1498Szrj LOC is virtual, it resolves to the expansion location of the
81338fd1498Szrj relevant macro. If LOC resolves to a builtin location, the file
81438fd1498Szrj name of the readable location is set to the string
81538fd1498Szrj "<built-in>". */
81638fd1498Szrj
81738fd1498Szrj expanded_location
expand_location_to_spelling_point(source_location loc)81838fd1498Szrj expand_location_to_spelling_point (source_location loc)
81938fd1498Szrj {
82038fd1498Szrj return expand_location_1 (loc, /*expansion_point_p=*/false,
82138fd1498Szrj LOCATION_ASPECT_CARET);
82238fd1498Szrj }
82338fd1498Szrj
82438fd1498Szrj /* The rich_location class within libcpp requires a way to expand
82538fd1498Szrj source_location instances, and relies on the client code
82638fd1498Szrj providing a symbol named
82738fd1498Szrj linemap_client_expand_location_to_spelling_point
82838fd1498Szrj to do this.
82938fd1498Szrj
83038fd1498Szrj This is the implementation for libcommon.a (all host binaries),
83138fd1498Szrj which simply calls into expand_location_1. */
83238fd1498Szrj
83338fd1498Szrj expanded_location
linemap_client_expand_location_to_spelling_point(source_location loc,enum location_aspect aspect)83438fd1498Szrj linemap_client_expand_location_to_spelling_point (source_location loc,
83538fd1498Szrj enum location_aspect aspect)
83638fd1498Szrj {
83738fd1498Szrj return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
83838fd1498Szrj }
83938fd1498Szrj
84038fd1498Szrj
84138fd1498Szrj /* If LOCATION is in a system header and if it is a virtual location for
84238fd1498Szrj a token coming from the expansion of a macro, unwind it to the
84338fd1498Szrj location of the expansion point of the macro. Otherwise, just return
84438fd1498Szrj LOCATION.
84538fd1498Szrj
84638fd1498Szrj This is used for instance when we want to emit diagnostics about a
84738fd1498Szrj token that may be located in a macro that is itself defined in a
84838fd1498Szrj system header, for example, for the NULL macro. In such a case, if
84938fd1498Szrj LOCATION were passed directly to diagnostic functions such as
85038fd1498Szrj warning_at, the diagnostic would be suppressed (unless
85138fd1498Szrj -Wsystem-headers). */
85238fd1498Szrj
85338fd1498Szrj source_location
expansion_point_location_if_in_system_header(source_location location)85438fd1498Szrj expansion_point_location_if_in_system_header (source_location location)
85538fd1498Szrj {
85638fd1498Szrj if (in_system_header_at (location))
85738fd1498Szrj location = linemap_resolve_location (line_table, location,
85838fd1498Szrj LRK_MACRO_EXPANSION_POINT,
85938fd1498Szrj NULL);
86038fd1498Szrj return location;
86138fd1498Szrj }
86238fd1498Szrj
86338fd1498Szrj /* If LOCATION is a virtual location for a token coming from the expansion
86438fd1498Szrj of a macro, unwind to the location of the expansion point of the macro. */
86538fd1498Szrj
86638fd1498Szrj source_location
expansion_point_location(source_location location)86738fd1498Szrj expansion_point_location (source_location location)
86838fd1498Szrj {
86938fd1498Szrj return linemap_resolve_location (line_table, location,
87038fd1498Szrj LRK_MACRO_EXPANSION_POINT, NULL);
87138fd1498Szrj }
87238fd1498Szrj
87338fd1498Szrj /* Construct a location with caret at CARET, ranging from START to
87438fd1498Szrj finish e.g.
87538fd1498Szrj
87638fd1498Szrj 11111111112
87738fd1498Szrj 12345678901234567890
87838fd1498Szrj 522
87938fd1498Szrj 523 return foo + bar;
88038fd1498Szrj ~~~~^~~~~
88138fd1498Szrj 524
88238fd1498Szrj
88338fd1498Szrj The location's caret is at the "+", line 523 column 15, but starts
88438fd1498Szrj earlier, at the "f" of "foo" at column 11. The finish is at the "r"
88538fd1498Szrj of "bar" at column 19. */
88638fd1498Szrj
88738fd1498Szrj location_t
make_location(location_t caret,location_t start,location_t finish)88838fd1498Szrj make_location (location_t caret, location_t start, location_t finish)
88938fd1498Szrj {
89038fd1498Szrj location_t pure_loc = get_pure_location (caret);
89138fd1498Szrj source_range src_range;
89238fd1498Szrj src_range.m_start = get_start (start);
89338fd1498Szrj src_range.m_finish = get_finish (finish);
89438fd1498Szrj location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
89538fd1498Szrj pure_loc,
89638fd1498Szrj src_range,
89738fd1498Szrj NULL);
89838fd1498Szrj return combined_loc;
89938fd1498Szrj }
90038fd1498Szrj
90138fd1498Szrj /* Same as above, but taking a source range rather than two locations. */
90238fd1498Szrj
90338fd1498Szrj location_t
make_location(location_t caret,source_range src_range)90438fd1498Szrj make_location (location_t caret, source_range src_range)
90538fd1498Szrj {
90638fd1498Szrj location_t pure_loc = get_pure_location (caret);
90738fd1498Szrj return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
90838fd1498Szrj }
90938fd1498Szrj
91038fd1498Szrj #define ONE_K 1024
91138fd1498Szrj #define ONE_M (ONE_K * ONE_K)
91238fd1498Szrj
91338fd1498Szrj /* Display a number as an integer multiple of either:
91438fd1498Szrj - 1024, if said integer is >= to 10 K (in base 2)
91538fd1498Szrj - 1024 * 1024, if said integer is >= 10 M in (base 2)
91638fd1498Szrj */
91738fd1498Szrj #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
91838fd1498Szrj ? (x) \
91938fd1498Szrj : ((x) < 10 * ONE_M \
92038fd1498Szrj ? (x) / ONE_K \
92138fd1498Szrj : (x) / ONE_M)))
92238fd1498Szrj
92338fd1498Szrj /* For a given integer, display either:
92438fd1498Szrj - the character 'k', if the number is higher than 10 K (in base 2)
92538fd1498Szrj but strictly lower than 10 M (in base 2)
92638fd1498Szrj - the character 'M' if the number is higher than 10 M (in base2)
92738fd1498Szrj - the charcter ' ' if the number is strictly lower than 10 K */
92838fd1498Szrj #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
92938fd1498Szrj
93038fd1498Szrj /* Display an integer amount as multiple of 1K or 1M (in base 2).
93138fd1498Szrj Display the correct unit (either k, M, or ' ') after the amount, as
93238fd1498Szrj well. */
93338fd1498Szrj #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
93438fd1498Szrj
93538fd1498Szrj /* Dump statistics to stderr about the memory usage of the line_table
93638fd1498Szrj set of line maps. This also displays some statistics about macro
93738fd1498Szrj expansion. */
93838fd1498Szrj
93938fd1498Szrj void
dump_line_table_statistics(void)94038fd1498Szrj dump_line_table_statistics (void)
94138fd1498Szrj {
94238fd1498Szrj struct linemap_stats s;
94338fd1498Szrj long total_used_map_size,
94438fd1498Szrj macro_maps_size,
94538fd1498Szrj total_allocated_map_size;
94638fd1498Szrj
94738fd1498Szrj memset (&s, 0, sizeof (s));
94838fd1498Szrj
94938fd1498Szrj linemap_get_statistics (line_table, &s);
95038fd1498Szrj
95138fd1498Szrj macro_maps_size = s.macro_maps_used_size
95238fd1498Szrj + s.macro_maps_locations_size;
95338fd1498Szrj
95438fd1498Szrj total_allocated_map_size = s.ordinary_maps_allocated_size
95538fd1498Szrj + s.macro_maps_allocated_size
95638fd1498Szrj + s.macro_maps_locations_size;
95738fd1498Szrj
95838fd1498Szrj total_used_map_size = s.ordinary_maps_used_size
95938fd1498Szrj + s.macro_maps_used_size
96038fd1498Szrj + s.macro_maps_locations_size;
96138fd1498Szrj
96238fd1498Szrj fprintf (stderr, "Number of expanded macros: %5ld\n",
96338fd1498Szrj s.num_expanded_macros);
96438fd1498Szrj if (s.num_expanded_macros != 0)
96538fd1498Szrj fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
96638fd1498Szrj s.num_macro_tokens / s.num_expanded_macros);
96738fd1498Szrj fprintf (stderr,
96838fd1498Szrj "\nLine Table allocations during the "
96938fd1498Szrj "compilation process\n");
97038fd1498Szrj fprintf (stderr, "Number of ordinary maps used: %5ld%c\n",
97138fd1498Szrj SCALE (s.num_ordinary_maps_used),
97238fd1498Szrj STAT_LABEL (s.num_ordinary_maps_used));
97338fd1498Szrj fprintf (stderr, "Ordinary map used size: %5ld%c\n",
97438fd1498Szrj SCALE (s.ordinary_maps_used_size),
97538fd1498Szrj STAT_LABEL (s.ordinary_maps_used_size));
97638fd1498Szrj fprintf (stderr, "Number of ordinary maps allocated: %5ld%c\n",
97738fd1498Szrj SCALE (s.num_ordinary_maps_allocated),
97838fd1498Szrj STAT_LABEL (s.num_ordinary_maps_allocated));
97938fd1498Szrj fprintf (stderr, "Ordinary maps allocated size: %5ld%c\n",
98038fd1498Szrj SCALE (s.ordinary_maps_allocated_size),
98138fd1498Szrj STAT_LABEL (s.ordinary_maps_allocated_size));
98238fd1498Szrj fprintf (stderr, "Number of macro maps used: %5ld%c\n",
98338fd1498Szrj SCALE (s.num_macro_maps_used),
98438fd1498Szrj STAT_LABEL (s.num_macro_maps_used));
98538fd1498Szrj fprintf (stderr, "Macro maps used size: %5ld%c\n",
98638fd1498Szrj SCALE (s.macro_maps_used_size),
98738fd1498Szrj STAT_LABEL (s.macro_maps_used_size));
98838fd1498Szrj fprintf (stderr, "Macro maps locations size: %5ld%c\n",
98938fd1498Szrj SCALE (s.macro_maps_locations_size),
99038fd1498Szrj STAT_LABEL (s.macro_maps_locations_size));
99138fd1498Szrj fprintf (stderr, "Macro maps size: %5ld%c\n",
99238fd1498Szrj SCALE (macro_maps_size),
99338fd1498Szrj STAT_LABEL (macro_maps_size));
99438fd1498Szrj fprintf (stderr, "Duplicated maps locations size: %5ld%c\n",
99538fd1498Szrj SCALE (s.duplicated_macro_maps_locations_size),
99638fd1498Szrj STAT_LABEL (s.duplicated_macro_maps_locations_size));
99738fd1498Szrj fprintf (stderr, "Total allocated maps size: %5ld%c\n",
99838fd1498Szrj SCALE (total_allocated_map_size),
99938fd1498Szrj STAT_LABEL (total_allocated_map_size));
100038fd1498Szrj fprintf (stderr, "Total used maps size: %5ld%c\n",
100138fd1498Szrj SCALE (total_used_map_size),
100238fd1498Szrj STAT_LABEL (total_used_map_size));
100338fd1498Szrj fprintf (stderr, "Ad-hoc table size: %5ld%c\n",
100438fd1498Szrj SCALE (s.adhoc_table_size),
100538fd1498Szrj STAT_LABEL (s.adhoc_table_size));
100638fd1498Szrj fprintf (stderr, "Ad-hoc table entries used: %5ld\n",
100738fd1498Szrj s.adhoc_table_entries_used);
100838fd1498Szrj fprintf (stderr, "optimized_ranges: %i\n",
100938fd1498Szrj line_table->num_optimized_ranges);
101038fd1498Szrj fprintf (stderr, "unoptimized_ranges: %i\n",
101138fd1498Szrj line_table->num_unoptimized_ranges);
101238fd1498Szrj
101338fd1498Szrj fprintf (stderr, "\n");
101438fd1498Szrj }
101538fd1498Szrj
101638fd1498Szrj /* Get location one beyond the final location in ordinary map IDX. */
101738fd1498Szrj
101838fd1498Szrj static source_location
get_end_location(struct line_maps * set,unsigned int idx)101938fd1498Szrj get_end_location (struct line_maps *set, unsigned int idx)
102038fd1498Szrj {
102138fd1498Szrj if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
102238fd1498Szrj return set->highest_location;
102338fd1498Szrj
102438fd1498Szrj struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
102538fd1498Szrj return MAP_START_LOCATION (next_map);
102638fd1498Szrj }
102738fd1498Szrj
102838fd1498Szrj /* Helper function for write_digit_row. */
102938fd1498Szrj
103038fd1498Szrj static void
write_digit(FILE * stream,int digit)103138fd1498Szrj write_digit (FILE *stream, int digit)
103238fd1498Szrj {
103338fd1498Szrj fputc ('0' + (digit % 10), stream);
103438fd1498Szrj }
103538fd1498Szrj
103638fd1498Szrj /* Helper function for dump_location_info.
103738fd1498Szrj Write a row of numbers to STREAM, numbering a source line,
103838fd1498Szrj giving the units, tens, hundreds etc of the column number. */
103938fd1498Szrj
104038fd1498Szrj static void
write_digit_row(FILE * stream,int indent,const line_map_ordinary * map,source_location loc,int max_col,int divisor)104138fd1498Szrj write_digit_row (FILE *stream, int indent,
104238fd1498Szrj const line_map_ordinary *map,
104338fd1498Szrj source_location loc, int max_col, int divisor)
104438fd1498Szrj {
104538fd1498Szrj fprintf (stream, "%*c", indent, ' ');
104638fd1498Szrj fprintf (stream, "|");
104738fd1498Szrj for (int column = 1; column < max_col; column++)
104838fd1498Szrj {
104938fd1498Szrj source_location column_loc = loc + (column << map->m_range_bits);
105038fd1498Szrj write_digit (stream, column_loc / divisor);
105138fd1498Szrj }
105238fd1498Szrj fprintf (stream, "\n");
105338fd1498Szrj }
105438fd1498Szrj
105538fd1498Szrj /* Write a half-closed (START) / half-open (END) interval of
105638fd1498Szrj source_location to STREAM. */
105738fd1498Szrj
105838fd1498Szrj static void
dump_location_range(FILE * stream,source_location start,source_location end)105938fd1498Szrj dump_location_range (FILE *stream,
106038fd1498Szrj source_location start, source_location end)
106138fd1498Szrj {
106238fd1498Szrj fprintf (stream,
106338fd1498Szrj " source_location interval: %u <= loc < %u\n",
106438fd1498Szrj start, end);
106538fd1498Szrj }
106638fd1498Szrj
106738fd1498Szrj /* Write a labelled description of a half-closed (START) / half-open (END)
106838fd1498Szrj interval of source_location to STREAM. */
106938fd1498Szrj
107038fd1498Szrj static void
dump_labelled_location_range(FILE * stream,const char * name,source_location start,source_location end)107138fd1498Szrj dump_labelled_location_range (FILE *stream,
107238fd1498Szrj const char *name,
107338fd1498Szrj source_location start, source_location end)
107438fd1498Szrj {
107538fd1498Szrj fprintf (stream, "%s\n", name);
107638fd1498Szrj dump_location_range (stream, start, end);
107738fd1498Szrj fprintf (stream, "\n");
107838fd1498Szrj }
107938fd1498Szrj
108038fd1498Szrj /* Write a visualization of the locations in the line_table to STREAM. */
108138fd1498Szrj
108238fd1498Szrj void
dump_location_info(FILE * stream)108338fd1498Szrj dump_location_info (FILE *stream)
108438fd1498Szrj {
108538fd1498Szrj /* Visualize the reserved locations. */
108638fd1498Szrj dump_labelled_location_range (stream, "RESERVED LOCATIONS",
108738fd1498Szrj 0, RESERVED_LOCATION_COUNT);
108838fd1498Szrj
108938fd1498Szrj /* Visualize the ordinary line_map instances, rendering the sources. */
109038fd1498Szrj for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
109138fd1498Szrj {
109238fd1498Szrj source_location end_location = get_end_location (line_table, idx);
109338fd1498Szrj /* half-closed: doesn't include this one. */
109438fd1498Szrj
109538fd1498Szrj const line_map_ordinary *map
109638fd1498Szrj = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
109738fd1498Szrj fprintf (stream, "ORDINARY MAP: %i\n", idx);
109838fd1498Szrj dump_location_range (stream,
109938fd1498Szrj MAP_START_LOCATION (map), end_location);
110038fd1498Szrj fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
110138fd1498Szrj fprintf (stream, " starting at line: %i\n",
110238fd1498Szrj ORDINARY_MAP_STARTING_LINE_NUMBER (map));
110338fd1498Szrj fprintf (stream, " column and range bits: %i\n",
110438fd1498Szrj map->m_column_and_range_bits);
110538fd1498Szrj fprintf (stream, " column bits: %i\n",
110638fd1498Szrj map->m_column_and_range_bits - map->m_range_bits);
110738fd1498Szrj fprintf (stream, " range bits: %i\n",
110838fd1498Szrj map->m_range_bits);
110938fd1498Szrj
111038fd1498Szrj /* Render the span of source lines that this "map" covers. */
111138fd1498Szrj for (source_location loc = MAP_START_LOCATION (map);
111238fd1498Szrj loc < end_location;
111338fd1498Szrj loc += (1 << map->m_range_bits) )
111438fd1498Szrj {
111538fd1498Szrj gcc_assert (pure_location_p (line_table, loc) );
111638fd1498Szrj
111738fd1498Szrj expanded_location exploc
111838fd1498Szrj = linemap_expand_location (line_table, map, loc);
111938fd1498Szrj
112038fd1498Szrj if (exploc.column == 0)
112138fd1498Szrj {
112238fd1498Szrj /* Beginning of a new source line: draw the line. */
112338fd1498Szrj
112438fd1498Szrj int line_size;
112538fd1498Szrj const char *line_text = location_get_source_line (exploc.file,
112638fd1498Szrj exploc.line,
112738fd1498Szrj &line_size);
112838fd1498Szrj if (!line_text)
112938fd1498Szrj break;
113038fd1498Szrj fprintf (stream,
113138fd1498Szrj "%s:%3i|loc:%5i|%.*s\n",
113238fd1498Szrj exploc.file, exploc.line,
113338fd1498Szrj loc,
113438fd1498Szrj line_size, line_text);
113538fd1498Szrj
113638fd1498Szrj /* "loc" is at column 0, which means "the whole line".
113738fd1498Szrj Render the locations *within* the line, by underlining
113838fd1498Szrj it, showing the source_location numeric values
113938fd1498Szrj at each column. */
114038fd1498Szrj int max_col = (1 << map->m_column_and_range_bits) - 1;
114138fd1498Szrj if (max_col > line_size)
114238fd1498Szrj max_col = line_size + 1;
114338fd1498Szrj
114438fd1498Szrj int indent = 14 + strlen (exploc.file);
114538fd1498Szrj
114638fd1498Szrj /* Thousands. */
114738fd1498Szrj if (end_location > 999)
114838fd1498Szrj write_digit_row (stream, indent, map, loc, max_col, 1000);
114938fd1498Szrj
115038fd1498Szrj /* Hundreds. */
115138fd1498Szrj if (end_location > 99)
115238fd1498Szrj write_digit_row (stream, indent, map, loc, max_col, 100);
115338fd1498Szrj
115438fd1498Szrj /* Tens. */
115538fd1498Szrj write_digit_row (stream, indent, map, loc, max_col, 10);
115638fd1498Szrj
115738fd1498Szrj /* Units. */
115838fd1498Szrj write_digit_row (stream, indent, map, loc, max_col, 1);
115938fd1498Szrj }
116038fd1498Szrj }
116138fd1498Szrj fprintf (stream, "\n");
116238fd1498Szrj }
116338fd1498Szrj
116438fd1498Szrj /* Visualize unallocated values. */
116538fd1498Szrj dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
116638fd1498Szrj line_table->highest_location,
116738fd1498Szrj LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
116838fd1498Szrj
116938fd1498Szrj /* Visualize the macro line_map instances, rendering the sources. */
117038fd1498Szrj for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
117138fd1498Szrj {
117238fd1498Szrj /* Each macro map that is allocated owns source_location values
117338fd1498Szrj that are *lower* that the one before them.
117438fd1498Szrj Hence it's meaningful to view them either in order of ascending
117538fd1498Szrj source locations, or in order of ascending macro map index. */
117638fd1498Szrj const bool ascending_source_locations = true;
117738fd1498Szrj unsigned int idx = (ascending_source_locations
117838fd1498Szrj ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
117938fd1498Szrj : i);
118038fd1498Szrj const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
118138fd1498Szrj fprintf (stream, "MACRO %i: %s (%u tokens)\n",
118238fd1498Szrj idx,
118338fd1498Szrj linemap_map_get_macro_name (map),
118438fd1498Szrj MACRO_MAP_NUM_MACRO_TOKENS (map));
118538fd1498Szrj dump_location_range (stream,
118638fd1498Szrj map->start_location,
118738fd1498Szrj (map->start_location
118838fd1498Szrj + MACRO_MAP_NUM_MACRO_TOKENS (map)));
118938fd1498Szrj inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
119038fd1498Szrj "expansion point is location %i",
119138fd1498Szrj MACRO_MAP_EXPANSION_POINT_LOCATION (map));
119238fd1498Szrj fprintf (stream, " map->start_location: %u\n",
119338fd1498Szrj map->start_location);
119438fd1498Szrj
119538fd1498Szrj fprintf (stream, " macro_locations:\n");
119638fd1498Szrj for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
119738fd1498Szrj {
119838fd1498Szrj source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
119938fd1498Szrj source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
120038fd1498Szrj
120138fd1498Szrj /* linemap_add_macro_token encodes token numbers in an expansion
120238fd1498Szrj by putting them after MAP_START_LOCATION. */
120338fd1498Szrj
120438fd1498Szrj /* I'm typically seeing 4 uninitialized entries at the end of
120538fd1498Szrj 0xafafafaf.
120638fd1498Szrj This appears to be due to macro.c:replace_args
120738fd1498Szrj adding 2 extra args for padding tokens; presumably there may
120838fd1498Szrj be a leading and/or trailing padding token injected,
120938fd1498Szrj each for 2 more location slots.
121038fd1498Szrj This would explain there being up to 4 source_locations slots
121138fd1498Szrj that may be uninitialized. */
121238fd1498Szrj
121338fd1498Szrj fprintf (stream, " %u: %u, %u\n",
121438fd1498Szrj i,
121538fd1498Szrj x,
121638fd1498Szrj y);
121738fd1498Szrj if (x == y)
121838fd1498Szrj {
121938fd1498Szrj if (x < MAP_START_LOCATION (map))
122038fd1498Szrj inform (x, "token %u has x-location == y-location == %u", i, x);
122138fd1498Szrj else
122238fd1498Szrj fprintf (stream,
122338fd1498Szrj "x-location == y-location == %u encodes token # %u\n",
122438fd1498Szrj x, x - MAP_START_LOCATION (map));
122538fd1498Szrj }
122638fd1498Szrj else
122738fd1498Szrj {
122838fd1498Szrj inform (x, "token %u has x-location == %u", i, x);
122938fd1498Szrj inform (x, "token %u has y-location == %u", i, y);
123038fd1498Szrj }
123138fd1498Szrj }
123238fd1498Szrj fprintf (stream, "\n");
123338fd1498Szrj }
123438fd1498Szrj
123538fd1498Szrj /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
123638fd1498Szrj macro map, presumably due to an off-by-one error somewhere
123738fd1498Szrj between the logic in linemap_enter_macro and
123838fd1498Szrj LINEMAPS_MACRO_LOWEST_LOCATION. */
123938fd1498Szrj dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
124038fd1498Szrj MAX_SOURCE_LOCATION,
124138fd1498Szrj MAX_SOURCE_LOCATION + 1);
124238fd1498Szrj
124338fd1498Szrj /* Visualize ad-hoc values. */
124438fd1498Szrj dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
124538fd1498Szrj MAX_SOURCE_LOCATION + 1, UINT_MAX);
124638fd1498Szrj }
124738fd1498Szrj
124838fd1498Szrj /* string_concat's constructor. */
124938fd1498Szrj
string_concat(int num,location_t * locs)125038fd1498Szrj string_concat::string_concat (int num, location_t *locs)
125138fd1498Szrj : m_num (num)
125238fd1498Szrj {
125338fd1498Szrj m_locs = ggc_vec_alloc <location_t> (num);
125438fd1498Szrj for (int i = 0; i < num; i++)
125538fd1498Szrj m_locs[i] = locs[i];
125638fd1498Szrj }
125738fd1498Szrj
125838fd1498Szrj /* string_concat_db's constructor. */
125938fd1498Szrj
string_concat_db()126038fd1498Szrj string_concat_db::string_concat_db ()
126138fd1498Szrj {
126238fd1498Szrj m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
126338fd1498Szrj }
126438fd1498Szrj
126538fd1498Szrj /* Record that a string concatenation occurred, covering NUM
126638fd1498Szrj string literal tokens. LOCS is an array of size NUM, containing the
126738fd1498Szrj locations of the tokens. A copy of LOCS is taken. */
126838fd1498Szrj
126938fd1498Szrj void
record_string_concatenation(int num,location_t * locs)127038fd1498Szrj string_concat_db::record_string_concatenation (int num, location_t *locs)
127138fd1498Szrj {
127238fd1498Szrj gcc_assert (num > 1);
127338fd1498Szrj gcc_assert (locs);
127438fd1498Szrj
127538fd1498Szrj location_t key_loc = get_key_loc (locs[0]);
127638fd1498Szrj
127738fd1498Szrj string_concat *concat
127838fd1498Szrj = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
127938fd1498Szrj m_table->put (key_loc, concat);
128038fd1498Szrj }
128138fd1498Szrj
128238fd1498Szrj /* Determine if LOC was the location of the the initial token of a
128338fd1498Szrj concatenation of string literal tokens.
128438fd1498Szrj If so, *OUT_NUM is written to with the number of tokens, and
128538fd1498Szrj *OUT_LOCS with the location of an array of locations of the
128638fd1498Szrj tokens, and return true. *OUT_LOCS is a borrowed pointer to
128738fd1498Szrj storage owned by the string_concat_db.
128838fd1498Szrj Otherwise, return false. */
128938fd1498Szrj
129038fd1498Szrj bool
get_string_concatenation(location_t loc,int * out_num,location_t ** out_locs)129138fd1498Szrj string_concat_db::get_string_concatenation (location_t loc,
129238fd1498Szrj int *out_num,
129338fd1498Szrj location_t **out_locs)
129438fd1498Szrj {
129538fd1498Szrj gcc_assert (out_num);
129638fd1498Szrj gcc_assert (out_locs);
129738fd1498Szrj
129838fd1498Szrj location_t key_loc = get_key_loc (loc);
129938fd1498Szrj
130038fd1498Szrj string_concat **concat = m_table->get (key_loc);
130138fd1498Szrj if (!concat)
130238fd1498Szrj return false;
130338fd1498Szrj
130438fd1498Szrj *out_num = (*concat)->m_num;
130538fd1498Szrj *out_locs =(*concat)->m_locs;
130638fd1498Szrj return true;
130738fd1498Szrj }
130838fd1498Szrj
130938fd1498Szrj /* Internal function. Canonicalize LOC into a form suitable for
131038fd1498Szrj use as a key within the database, stripping away macro expansion,
131138fd1498Szrj ad-hoc information, and range information, using the location of
131238fd1498Szrj the start of LOC within an ordinary linemap. */
131338fd1498Szrj
131438fd1498Szrj location_t
get_key_loc(location_t loc)131538fd1498Szrj string_concat_db::get_key_loc (location_t loc)
131638fd1498Szrj {
131738fd1498Szrj loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
131838fd1498Szrj NULL);
131938fd1498Szrj
132038fd1498Szrj loc = get_range_from_loc (line_table, loc).m_start;
132138fd1498Szrj
132238fd1498Szrj return loc;
132338fd1498Szrj }
132438fd1498Szrj
132538fd1498Szrj /* Helper class for use within get_substring_ranges_for_loc.
132638fd1498Szrj An vec of cpp_string with responsibility for releasing all of the
132738fd1498Szrj str->text for each str in the vector. */
132838fd1498Szrj
132938fd1498Szrj class auto_cpp_string_vec : public auto_vec <cpp_string>
133038fd1498Szrj {
133138fd1498Szrj public:
auto_cpp_string_vec(int alloc)133238fd1498Szrj auto_cpp_string_vec (int alloc)
133338fd1498Szrj : auto_vec <cpp_string> (alloc) {}
133438fd1498Szrj
~auto_cpp_string_vec()133538fd1498Szrj ~auto_cpp_string_vec ()
133638fd1498Szrj {
133738fd1498Szrj /* Clean up the copies within this vec. */
133838fd1498Szrj int i;
133938fd1498Szrj cpp_string *str;
134038fd1498Szrj FOR_EACH_VEC_ELT (*this, i, str)
134138fd1498Szrj free (const_cast <unsigned char *> (str->text));
134238fd1498Szrj }
134338fd1498Szrj };
134438fd1498Szrj
134538fd1498Szrj /* Attempt to populate RANGES with source location information on the
134638fd1498Szrj individual characters within the string literal found at STRLOC.
134738fd1498Szrj If CONCATS is non-NULL, then any string literals that the token at
134838fd1498Szrj STRLOC was concatenated with are also added to RANGES.
134938fd1498Szrj
135038fd1498Szrj Return NULL if successful, or an error message if any errors occurred (in
135138fd1498Szrj which case RANGES may be only partially populated and should not
135238fd1498Szrj be used).
135338fd1498Szrj
135438fd1498Szrj This is implemented by re-parsing the relevant source line(s). */
135538fd1498Szrj
135638fd1498Szrj static const char *
get_substring_ranges_for_loc(cpp_reader * pfile,string_concat_db * concats,location_t strloc,enum cpp_ttype type,cpp_substring_ranges & ranges)135738fd1498Szrj get_substring_ranges_for_loc (cpp_reader *pfile,
135838fd1498Szrj string_concat_db *concats,
135938fd1498Szrj location_t strloc,
136038fd1498Szrj enum cpp_ttype type,
136138fd1498Szrj cpp_substring_ranges &ranges)
136238fd1498Szrj {
136338fd1498Szrj gcc_assert (pfile);
136438fd1498Szrj
136538fd1498Szrj if (strloc == UNKNOWN_LOCATION)
136638fd1498Szrj return "unknown location";
136738fd1498Szrj
136838fd1498Szrj /* Reparsing the strings requires accurate location information.
136938fd1498Szrj If -ftrack-macro-expansion has been overridden from its default
137038fd1498Szrj of 2, then we might have a location of a macro expansion point,
137138fd1498Szrj rather than the location of the literal itself.
137238fd1498Szrj Avoid this by requiring that we have full macro expansion tracking
137338fd1498Szrj for substring locations to be available. */
137438fd1498Szrj if (cpp_get_options (pfile)->track_macro_expansion != 2)
137538fd1498Szrj return "track_macro_expansion != 2";
137638fd1498Szrj
137738fd1498Szrj /* If #line or # 44 "file"-style directives are present, then there's
137838fd1498Szrj no guarantee that the line numbers we have can be used to locate
137938fd1498Szrj the strings. For example, we might have a .i file with # directives
138038fd1498Szrj pointing back to lines within a .c file, but the .c file might
138138fd1498Szrj have been edited since the .i file was created.
138238fd1498Szrj In such a case, the safest course is to disable on-demand substring
138338fd1498Szrj locations. */
138438fd1498Szrj if (line_table->seen_line_directive)
138538fd1498Szrj return "seen line directive";
138638fd1498Szrj
138738fd1498Szrj /* If string concatenation has occurred at STRLOC, get the locations
138838fd1498Szrj of all of the literal tokens making up the compound string.
138938fd1498Szrj Otherwise, just use STRLOC. */
139038fd1498Szrj int num_locs = 1;
139138fd1498Szrj location_t *strlocs = &strloc;
139238fd1498Szrj if (concats)
139338fd1498Szrj concats->get_string_concatenation (strloc, &num_locs, &strlocs);
139438fd1498Szrj
139538fd1498Szrj auto_cpp_string_vec strs (num_locs);
139638fd1498Szrj auto_vec <cpp_string_location_reader> loc_readers (num_locs);
139738fd1498Szrj for (int i = 0; i < num_locs; i++)
139838fd1498Szrj {
139938fd1498Szrj /* Get range of strloc. We will use it to locate the start and finish
140038fd1498Szrj of the literal token within the line. */
140138fd1498Szrj source_range src_range = get_range_from_loc (line_table, strlocs[i]);
140238fd1498Szrj
140338fd1498Szrj if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
140438fd1498Szrj /* If the string is within a macro expansion, we can't get at the
140538fd1498Szrj end location. */
140638fd1498Szrj return "macro expansion";
140738fd1498Szrj
140838fd1498Szrj if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
140938fd1498Szrj /* If so, we can't reliably determine where the token started within
141038fd1498Szrj its line. */
141138fd1498Szrj return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
141238fd1498Szrj
141338fd1498Szrj if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
141438fd1498Szrj /* If so, we can't reliably determine where the token finished within
141538fd1498Szrj its line. */
141638fd1498Szrj return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
141738fd1498Szrj
141838fd1498Szrj expanded_location start
141938fd1498Szrj = expand_location_to_spelling_point (src_range.m_start);
142038fd1498Szrj expanded_location finish
142138fd1498Szrj = expand_location_to_spelling_point (src_range.m_finish);
142238fd1498Szrj if (start.file != finish.file)
142338fd1498Szrj return "range endpoints are in different files";
142438fd1498Szrj if (start.line != finish.line)
142538fd1498Szrj return "range endpoints are on different lines";
142638fd1498Szrj if (start.column > finish.column)
142738fd1498Szrj return "range endpoints are reversed";
142838fd1498Szrj
142938fd1498Szrj int line_width;
143038fd1498Szrj const char *line = location_get_source_line (start.file, start.line,
143138fd1498Szrj &line_width);
143238fd1498Szrj if (line == NULL)
143338fd1498Szrj return "unable to read source line";
143438fd1498Szrj
143538fd1498Szrj /* Determine the location of the literal (including quotes
143638fd1498Szrj and leading prefix chars, such as the 'u' in a u""
143738fd1498Szrj token). */
143838fd1498Szrj const char *literal = line + start.column - 1;
143938fd1498Szrj int literal_length = finish.column - start.column + 1;
144038fd1498Szrj
144138fd1498Szrj /* Ensure that we don't crash if we got the wrong location. */
144238fd1498Szrj if (line_width < (start.column - 1 + literal_length))
144338fd1498Szrj return "line is not wide enough";
144438fd1498Szrj
144538fd1498Szrj cpp_string from;
144638fd1498Szrj from.len = literal_length;
144738fd1498Szrj /* Make a copy of the literal, to avoid having to rely on
144838fd1498Szrj the lifetime of the copy of the line within the cache.
144938fd1498Szrj This will be released by the auto_cpp_string_vec dtor. */
145038fd1498Szrj from.text = XDUPVEC (unsigned char, literal, literal_length);
145138fd1498Szrj strs.safe_push (from);
145238fd1498Szrj
145338fd1498Szrj /* For very long lines, a new linemap could have started
145438fd1498Szrj halfway through the token.
145538fd1498Szrj Ensure that the loc_reader uses the linemap of the
145638fd1498Szrj *end* of the token for its start location. */
145738fd1498Szrj const line_map_ordinary *final_ord_map;
145838fd1498Szrj linemap_resolve_location (line_table, src_range.m_finish,
145938fd1498Szrj LRK_MACRO_EXPANSION_POINT, &final_ord_map);
146038fd1498Szrj location_t start_loc
146138fd1498Szrj = linemap_position_for_line_and_column (line_table, final_ord_map,
146238fd1498Szrj start.line, start.column);
146338fd1498Szrj
146438fd1498Szrj cpp_string_location_reader loc_reader (start_loc, line_table);
146538fd1498Szrj loc_readers.safe_push (loc_reader);
146638fd1498Szrj }
146738fd1498Szrj
146838fd1498Szrj /* Rerun cpp_interpret_string, or rather, a modified version of it. */
146938fd1498Szrj const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
147038fd1498Szrj loc_readers.address (),
147138fd1498Szrj num_locs, &ranges, type);
147238fd1498Szrj if (err)
147338fd1498Szrj return err;
147438fd1498Szrj
147538fd1498Szrj /* Success: "ranges" should now contain information on the string. */
147638fd1498Szrj return NULL;
147738fd1498Szrj }
147838fd1498Szrj
147938fd1498Szrj /* Attempt to populate *OUT_LOC with source location information on the
148038fd1498Szrj given characters within the string literal found at STRLOC.
148138fd1498Szrj CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
148238fd1498Szrj character set.
148338fd1498Szrj
148438fd1498Szrj For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
148538fd1498Szrj and string literal "012345\n789"
148638fd1498Szrj *OUT_LOC is written to with:
148738fd1498Szrj "012345\n789"
148838fd1498Szrj ~^~~~~
148938fd1498Szrj
149038fd1498Szrj If CONCATS is non-NULL, then any string literals that the token at
149138fd1498Szrj STRLOC was concatenated with are also considered.
149238fd1498Szrj
149338fd1498Szrj This is implemented by re-parsing the relevant source line(s).
149438fd1498Szrj
149538fd1498Szrj Return NULL if successful, or an error message if any errors occurred.
149638fd1498Szrj Error messages are intended for GCC developers (to help debugging) rather
149738fd1498Szrj than for end-users. */
149838fd1498Szrj
149938fd1498Szrj const char *
get_source_location_for_substring(cpp_reader * pfile,string_concat_db * concats,location_t strloc,enum cpp_ttype type,int caret_idx,int start_idx,int end_idx,source_location * out_loc)150038fd1498Szrj get_source_location_for_substring (cpp_reader *pfile,
150138fd1498Szrj string_concat_db *concats,
150238fd1498Szrj location_t strloc,
150338fd1498Szrj enum cpp_ttype type,
150438fd1498Szrj int caret_idx, int start_idx, int end_idx,
150538fd1498Szrj source_location *out_loc)
150638fd1498Szrj {
150738fd1498Szrj gcc_checking_assert (caret_idx >= 0);
150838fd1498Szrj gcc_checking_assert (start_idx >= 0);
150938fd1498Szrj gcc_checking_assert (end_idx >= 0);
151038fd1498Szrj gcc_assert (out_loc);
151138fd1498Szrj
151238fd1498Szrj cpp_substring_ranges ranges;
151338fd1498Szrj const char *err
151438fd1498Szrj = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
151538fd1498Szrj if (err)
151638fd1498Szrj return err;
151738fd1498Szrj
151838fd1498Szrj if (caret_idx >= ranges.get_num_ranges ())
151938fd1498Szrj return "caret_idx out of range";
152038fd1498Szrj if (start_idx >= ranges.get_num_ranges ())
152138fd1498Szrj return "start_idx out of range";
152238fd1498Szrj if (end_idx >= ranges.get_num_ranges ())
152338fd1498Szrj return "end_idx out of range";
152438fd1498Szrj
152538fd1498Szrj *out_loc = make_location (ranges.get_range (caret_idx).m_start,
152638fd1498Szrj ranges.get_range (start_idx).m_start,
152738fd1498Szrj ranges.get_range (end_idx).m_finish);
152838fd1498Szrj return NULL;
152938fd1498Szrj }
153038fd1498Szrj
153138fd1498Szrj #if CHECKING_P
153238fd1498Szrj
153338fd1498Szrj namespace selftest {
153438fd1498Szrj
153538fd1498Szrj /* Selftests of location handling. */
153638fd1498Szrj
153738fd1498Szrj /* Attempt to populate *OUT_RANGE with source location information on the
153838fd1498Szrj given character within the string literal found at STRLOC.
153938fd1498Szrj CHAR_IDX refers to an offset within the execution character set.
154038fd1498Szrj If CONCATS is non-NULL, then any string literals that the token at
154138fd1498Szrj STRLOC was concatenated with are also considered.
154238fd1498Szrj
154338fd1498Szrj This is implemented by re-parsing the relevant source line(s).
154438fd1498Szrj
154538fd1498Szrj Return NULL if successful, or an error message if any errors occurred.
154638fd1498Szrj Error messages are intended for GCC developers (to help debugging) rather
154738fd1498Szrj than for end-users. */
154838fd1498Szrj
154938fd1498Szrj static const char *
get_source_range_for_char(cpp_reader * pfile,string_concat_db * concats,location_t strloc,enum cpp_ttype type,int char_idx,source_range * out_range)155038fd1498Szrj get_source_range_for_char (cpp_reader *pfile,
155138fd1498Szrj string_concat_db *concats,
155238fd1498Szrj location_t strloc,
155338fd1498Szrj enum cpp_ttype type,
155438fd1498Szrj int char_idx,
155538fd1498Szrj source_range *out_range)
155638fd1498Szrj {
155738fd1498Szrj gcc_checking_assert (char_idx >= 0);
155838fd1498Szrj gcc_assert (out_range);
155938fd1498Szrj
156038fd1498Szrj cpp_substring_ranges ranges;
156138fd1498Szrj const char *err
156238fd1498Szrj = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
156338fd1498Szrj if (err)
156438fd1498Szrj return err;
156538fd1498Szrj
156638fd1498Szrj if (char_idx >= ranges.get_num_ranges ())
156738fd1498Szrj return "char_idx out of range";
156838fd1498Szrj
156938fd1498Szrj *out_range = ranges.get_range (char_idx);
157038fd1498Szrj return NULL;
157138fd1498Szrj }
157238fd1498Szrj
157338fd1498Szrj /* As get_source_range_for_char, but write to *OUT the number
157438fd1498Szrj of ranges that are available. */
157538fd1498Szrj
157638fd1498Szrj static const char *
get_num_source_ranges_for_substring(cpp_reader * pfile,string_concat_db * concats,location_t strloc,enum cpp_ttype type,int * out)157738fd1498Szrj get_num_source_ranges_for_substring (cpp_reader *pfile,
157838fd1498Szrj string_concat_db *concats,
157938fd1498Szrj location_t strloc,
158038fd1498Szrj enum cpp_ttype type,
158138fd1498Szrj int *out)
158238fd1498Szrj {
158338fd1498Szrj gcc_assert (out);
158438fd1498Szrj
158538fd1498Szrj cpp_substring_ranges ranges;
158638fd1498Szrj const char *err
158738fd1498Szrj = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
158838fd1498Szrj
158938fd1498Szrj if (err)
159038fd1498Szrj return err;
159138fd1498Szrj
159238fd1498Szrj *out = ranges.get_num_ranges ();
159338fd1498Szrj return NULL;
159438fd1498Szrj }
159538fd1498Szrj
159638fd1498Szrj /* Selftests of location handling. */
159738fd1498Szrj
159838fd1498Szrj /* Verify that compare() on linenum_type handles comparisons over the full
159938fd1498Szrj range of the type. */
160038fd1498Szrj
160138fd1498Szrj static void
test_linenum_comparisons()160238fd1498Szrj test_linenum_comparisons ()
160338fd1498Szrj {
160438fd1498Szrj linenum_type min_line (0);
160538fd1498Szrj linenum_type max_line (0xffffffff);
160638fd1498Szrj ASSERT_EQ (0, compare (min_line, min_line));
160738fd1498Szrj ASSERT_EQ (0, compare (max_line, max_line));
160838fd1498Szrj
160938fd1498Szrj ASSERT_GT (compare (max_line, min_line), 0);
161038fd1498Szrj ASSERT_LT (compare (min_line, max_line), 0);
161138fd1498Szrj }
161238fd1498Szrj
161338fd1498Szrj /* Helper function for verifying location data: when location_t
161438fd1498Szrj values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
161538fd1498Szrj as having column 0. */
161638fd1498Szrj
161738fd1498Szrj static bool
should_have_column_data_p(location_t loc)161838fd1498Szrj should_have_column_data_p (location_t loc)
161938fd1498Szrj {
162038fd1498Szrj if (IS_ADHOC_LOC (loc))
162138fd1498Szrj loc = get_location_from_adhoc_loc (line_table, loc);
162238fd1498Szrj if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
162338fd1498Szrj return false;
162438fd1498Szrj return true;
162538fd1498Szrj }
162638fd1498Szrj
162738fd1498Szrj /* Selftest for should_have_column_data_p. */
162838fd1498Szrj
162938fd1498Szrj static void
test_should_have_column_data_p()163038fd1498Szrj test_should_have_column_data_p ()
163138fd1498Szrj {
163238fd1498Szrj ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
163338fd1498Szrj ASSERT_TRUE
163438fd1498Szrj (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
163538fd1498Szrj ASSERT_FALSE
163638fd1498Szrj (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
163738fd1498Szrj }
163838fd1498Szrj
163938fd1498Szrj /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
164038fd1498Szrj on LOC. */
164138fd1498Szrj
164238fd1498Szrj static void
assert_loceq(const char * exp_filename,int exp_linenum,int exp_colnum,location_t loc)164338fd1498Szrj assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
164438fd1498Szrj location_t loc)
164538fd1498Szrj {
164638fd1498Szrj ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
164738fd1498Szrj ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
164838fd1498Szrj /* If location_t values are sufficiently high, then column numbers
164938fd1498Szrj will be unavailable and LOCATION_COLUMN (loc) will be 0.
165038fd1498Szrj When close to the threshold, column numbers *may* be present: if
165138fd1498Szrj the final linemap before the threshold contains a line that straddles
165238fd1498Szrj the threshold, locations in that line have column information. */
165338fd1498Szrj if (should_have_column_data_p (loc))
165438fd1498Szrj ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
165538fd1498Szrj }
165638fd1498Szrj
165738fd1498Szrj /* Various selftests involve constructing a line table and one or more
165838fd1498Szrj line maps within it.
165938fd1498Szrj
166038fd1498Szrj For maximum test coverage we want to run these tests with a variety
166138fd1498Szrj of situations:
166238fd1498Szrj - line_table->default_range_bits: some frontends use a non-zero value
166338fd1498Szrj and others use zero
166438fd1498Szrj - the fallback modes within line-map.c: there are various threshold
166538fd1498Szrj values for source_location/location_t beyond line-map.c changes
166638fd1498Szrj behavior (disabling of the range-packing optimization, disabling
166738fd1498Szrj of column-tracking). We can exercise these by starting the line_table
166838fd1498Szrj at interesting values at or near these thresholds.
166938fd1498Szrj
167038fd1498Szrj The following struct describes a particular case within our test
167138fd1498Szrj matrix. */
167238fd1498Szrj
167338fd1498Szrj struct line_table_case
167438fd1498Szrj {
line_table_caseline_table_case167538fd1498Szrj line_table_case (int default_range_bits, int base_location)
167638fd1498Szrj : m_default_range_bits (default_range_bits),
167738fd1498Szrj m_base_location (base_location)
167838fd1498Szrj {}
167938fd1498Szrj
168038fd1498Szrj int m_default_range_bits;
168138fd1498Szrj int m_base_location;
168238fd1498Szrj };
168338fd1498Szrj
168438fd1498Szrj /* Constructor. Store the old value of line_table, and create a new
168538fd1498Szrj one, using sane defaults. */
168638fd1498Szrj
line_table_test()168738fd1498Szrj line_table_test::line_table_test ()
168838fd1498Szrj {
168938fd1498Szrj gcc_assert (saved_line_table == NULL);
169038fd1498Szrj saved_line_table = line_table;
169138fd1498Szrj line_table = ggc_alloc<line_maps> ();
169238fd1498Szrj linemap_init (line_table, BUILTINS_LOCATION);
169338fd1498Szrj gcc_assert (saved_line_table->reallocator);
169438fd1498Szrj line_table->reallocator = saved_line_table->reallocator;
169538fd1498Szrj gcc_assert (saved_line_table->round_alloc_size);
169638fd1498Szrj line_table->round_alloc_size = saved_line_table->round_alloc_size;
169738fd1498Szrj line_table->default_range_bits = 0;
169838fd1498Szrj }
169938fd1498Szrj
170038fd1498Szrj /* Constructor. Store the old value of line_table, and create a new
170138fd1498Szrj one, using the sitation described in CASE_. */
170238fd1498Szrj
line_table_test(const line_table_case & case_)170338fd1498Szrj line_table_test::line_table_test (const line_table_case &case_)
170438fd1498Szrj {
170538fd1498Szrj gcc_assert (saved_line_table == NULL);
170638fd1498Szrj saved_line_table = line_table;
170738fd1498Szrj line_table = ggc_alloc<line_maps> ();
170838fd1498Szrj linemap_init (line_table, BUILTINS_LOCATION);
170938fd1498Szrj gcc_assert (saved_line_table->reallocator);
171038fd1498Szrj line_table->reallocator = saved_line_table->reallocator;
171138fd1498Szrj gcc_assert (saved_line_table->round_alloc_size);
171238fd1498Szrj line_table->round_alloc_size = saved_line_table->round_alloc_size;
171338fd1498Szrj line_table->default_range_bits = case_.m_default_range_bits;
171438fd1498Szrj if (case_.m_base_location)
171538fd1498Szrj {
171638fd1498Szrj line_table->highest_location = case_.m_base_location;
171738fd1498Szrj line_table->highest_line = case_.m_base_location;
171838fd1498Szrj }
171938fd1498Szrj }
172038fd1498Szrj
172138fd1498Szrj /* Destructor. Restore the old value of line_table. */
172238fd1498Szrj
~line_table_test()172338fd1498Szrj line_table_test::~line_table_test ()
172438fd1498Szrj {
172538fd1498Szrj gcc_assert (saved_line_table != NULL);
172638fd1498Szrj line_table = saved_line_table;
172738fd1498Szrj saved_line_table = NULL;
172838fd1498Szrj }
172938fd1498Szrj
173038fd1498Szrj /* Verify basic operation of ordinary linemaps. */
173138fd1498Szrj
173238fd1498Szrj static void
test_accessing_ordinary_linemaps(const line_table_case & case_)173338fd1498Szrj test_accessing_ordinary_linemaps (const line_table_case &case_)
173438fd1498Szrj {
173538fd1498Szrj line_table_test ltt (case_);
173638fd1498Szrj
173738fd1498Szrj /* Build a simple linemap describing some locations. */
173838fd1498Szrj linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
173938fd1498Szrj
174038fd1498Szrj linemap_line_start (line_table, 1, 100);
174138fd1498Szrj location_t loc_a = linemap_position_for_column (line_table, 1);
174238fd1498Szrj location_t loc_b = linemap_position_for_column (line_table, 23);
174338fd1498Szrj
174438fd1498Szrj linemap_line_start (line_table, 2, 100);
174538fd1498Szrj location_t loc_c = linemap_position_for_column (line_table, 1);
174638fd1498Szrj location_t loc_d = linemap_position_for_column (line_table, 17);
174738fd1498Szrj
174838fd1498Szrj /* Example of a very long line. */
174938fd1498Szrj linemap_line_start (line_table, 3, 2000);
175038fd1498Szrj location_t loc_e = linemap_position_for_column (line_table, 700);
175138fd1498Szrj
175238fd1498Szrj /* Transitioning back to a short line. */
175338fd1498Szrj linemap_line_start (line_table, 4, 0);
175438fd1498Szrj location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
175538fd1498Szrj
175638fd1498Szrj if (should_have_column_data_p (loc_back_to_short))
175738fd1498Szrj {
175838fd1498Szrj /* Verify that we switched to short lines in the linemap. */
175938fd1498Szrj line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
176038fd1498Szrj ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
176138fd1498Szrj }
176238fd1498Szrj
176338fd1498Szrj /* Example of a line that will eventually be seen to be longer
176438fd1498Szrj than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
176538fd1498Szrj below that. */
176638fd1498Szrj linemap_line_start (line_table, 5, 2000);
176738fd1498Szrj
176838fd1498Szrj location_t loc_start_of_very_long_line
176938fd1498Szrj = linemap_position_for_column (line_table, 2000);
177038fd1498Szrj location_t loc_too_wide
177138fd1498Szrj = linemap_position_for_column (line_table, 4097);
177238fd1498Szrj location_t loc_too_wide_2
177338fd1498Szrj = linemap_position_for_column (line_table, 4098);
177438fd1498Szrj
177538fd1498Szrj /* ...and back to a sane line length. */
177638fd1498Szrj linemap_line_start (line_table, 6, 100);
177738fd1498Szrj location_t loc_sane_again = linemap_position_for_column (line_table, 10);
177838fd1498Szrj
177938fd1498Szrj linemap_add (line_table, LC_LEAVE, false, NULL, 0);
178038fd1498Szrj
178138fd1498Szrj /* Multiple files. */
178238fd1498Szrj linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
178338fd1498Szrj linemap_line_start (line_table, 1, 200);
178438fd1498Szrj location_t loc_f = linemap_position_for_column (line_table, 150);
178538fd1498Szrj linemap_add (line_table, LC_LEAVE, false, NULL, 0);
178638fd1498Szrj
178738fd1498Szrj /* Verify that we can recover the location info. */
178838fd1498Szrj assert_loceq ("foo.c", 1, 1, loc_a);
178938fd1498Szrj assert_loceq ("foo.c", 1, 23, loc_b);
179038fd1498Szrj assert_loceq ("foo.c", 2, 1, loc_c);
179138fd1498Szrj assert_loceq ("foo.c", 2, 17, loc_d);
179238fd1498Szrj assert_loceq ("foo.c", 3, 700, loc_e);
179338fd1498Szrj assert_loceq ("foo.c", 4, 100, loc_back_to_short);
179438fd1498Szrj
179538fd1498Szrj /* In the very wide line, the initial location should be fully tracked. */
179638fd1498Szrj assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
179738fd1498Szrj /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
179838fd1498Szrj be disabled. */
179938fd1498Szrj assert_loceq ("foo.c", 5, 0, loc_too_wide);
180038fd1498Szrj assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
180138fd1498Szrj /*...and column-tracking should be re-enabled for subsequent lines. */
180238fd1498Szrj assert_loceq ("foo.c", 6, 10, loc_sane_again);
180338fd1498Szrj
180438fd1498Szrj assert_loceq ("bar.c", 1, 150, loc_f);
180538fd1498Szrj
180638fd1498Szrj ASSERT_FALSE (is_location_from_builtin_token (loc_a));
180738fd1498Szrj ASSERT_TRUE (pure_location_p (line_table, loc_a));
180838fd1498Szrj
180938fd1498Szrj /* Verify using make_location to build a range, and extracting data
181038fd1498Szrj back from it. */
181138fd1498Szrj location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
181238fd1498Szrj ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
181338fd1498Szrj ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
181438fd1498Szrj source_range src_range = get_range_from_loc (line_table, range_c_b_d);
181538fd1498Szrj ASSERT_EQ (loc_b, src_range.m_start);
181638fd1498Szrj ASSERT_EQ (loc_d, src_range.m_finish);
181738fd1498Szrj }
181838fd1498Szrj
181938fd1498Szrj /* Verify various properties of UNKNOWN_LOCATION. */
182038fd1498Szrj
182138fd1498Szrj static void
test_unknown_location()182238fd1498Szrj test_unknown_location ()
182338fd1498Szrj {
182438fd1498Szrj ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
182538fd1498Szrj ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
182638fd1498Szrj ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
182738fd1498Szrj }
182838fd1498Szrj
182938fd1498Szrj /* Verify various properties of BUILTINS_LOCATION. */
183038fd1498Szrj
183138fd1498Szrj static void
test_builtins()183238fd1498Szrj test_builtins ()
183338fd1498Szrj {
183438fd1498Szrj assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
183538fd1498Szrj ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
183638fd1498Szrj }
183738fd1498Szrj
183838fd1498Szrj /* Regression test for make_location.
183938fd1498Szrj Ensure that we use pure locations for the start/finish of the range,
184038fd1498Szrj rather than storing a packed or ad-hoc range as the start/finish. */
184138fd1498Szrj
184238fd1498Szrj static void
test_make_location_nonpure_range_endpoints(const line_table_case & case_)184338fd1498Szrj test_make_location_nonpure_range_endpoints (const line_table_case &case_)
184438fd1498Szrj {
184538fd1498Szrj /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
184638fd1498Szrj with C++ frontend.
184738fd1498Szrj ....................0000000001111111111222.
184838fd1498Szrj ....................1234567890123456789012. */
184938fd1498Szrj const char *content = " r += !aaa == bbb;\n";
185038fd1498Szrj temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
185138fd1498Szrj line_table_test ltt (case_);
185238fd1498Szrj linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
185338fd1498Szrj
185438fd1498Szrj const location_t c11 = linemap_position_for_column (line_table, 11);
185538fd1498Szrj const location_t c12 = linemap_position_for_column (line_table, 12);
185638fd1498Szrj const location_t c13 = linemap_position_for_column (line_table, 13);
185738fd1498Szrj const location_t c14 = linemap_position_for_column (line_table, 14);
185838fd1498Szrj const location_t c21 = linemap_position_for_column (line_table, 21);
185938fd1498Szrj
186038fd1498Szrj if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
186138fd1498Szrj return;
186238fd1498Szrj
186338fd1498Szrj /* Use column 13 for the caret location, arbitrarily, to verify that we
186438fd1498Szrj handle start != caret. */
186538fd1498Szrj const location_t aaa = make_location (c13, c12, c14);
186638fd1498Szrj ASSERT_EQ (c13, get_pure_location (aaa));
186738fd1498Szrj ASSERT_EQ (c12, get_start (aaa));
186838fd1498Szrj ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
186938fd1498Szrj ASSERT_EQ (c14, get_finish (aaa));
187038fd1498Szrj ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
187138fd1498Szrj
187238fd1498Szrj /* Make a location using a location with a range as the start-point. */
187338fd1498Szrj const location_t not_aaa = make_location (c11, aaa, c14);
187438fd1498Szrj ASSERT_EQ (c11, get_pure_location (not_aaa));
187538fd1498Szrj /* It should use the start location of the range, not store the range
187638fd1498Szrj itself. */
187738fd1498Szrj ASSERT_EQ (c12, get_start (not_aaa));
187838fd1498Szrj ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
187938fd1498Szrj ASSERT_EQ (c14, get_finish (not_aaa));
188038fd1498Szrj ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
188138fd1498Szrj
188238fd1498Szrj /* Similarly, make a location with a range as the end-point. */
188338fd1498Szrj const location_t aaa_eq_bbb = make_location (c12, c12, c21);
188438fd1498Szrj ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
188538fd1498Szrj ASSERT_EQ (c12, get_start (aaa_eq_bbb));
188638fd1498Szrj ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
188738fd1498Szrj ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
188838fd1498Szrj ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
188938fd1498Szrj const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
189038fd1498Szrj /* It should use the finish location of the range, not store the range
189138fd1498Szrj itself. */
189238fd1498Szrj ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
189338fd1498Szrj ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
189438fd1498Szrj ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
189538fd1498Szrj ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
189638fd1498Szrj ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
189738fd1498Szrj }
189838fd1498Szrj
189938fd1498Szrj /* Verify reading of input files (e.g. for caret-based diagnostics). */
190038fd1498Szrj
190138fd1498Szrj static void
test_reading_source_line()190238fd1498Szrj test_reading_source_line ()
190338fd1498Szrj {
190438fd1498Szrj /* Create a tempfile and write some text to it. */
190538fd1498Szrj temp_source_file tmp (SELFTEST_LOCATION, ".txt",
190638fd1498Szrj "01234567890123456789\n"
190738fd1498Szrj "This is the test text\n"
190838fd1498Szrj "This is the 3rd line");
190938fd1498Szrj
191038fd1498Szrj /* Read back a specific line from the tempfile. */
191138fd1498Szrj int line_size;
191238fd1498Szrj const char *source_line = location_get_source_line (tmp.get_filename (),
191338fd1498Szrj 3, &line_size);
191438fd1498Szrj ASSERT_TRUE (source_line != NULL);
191538fd1498Szrj ASSERT_EQ (20, line_size);
191638fd1498Szrj ASSERT_TRUE (!strncmp ("This is the 3rd line",
191738fd1498Szrj source_line, line_size));
191838fd1498Szrj
191938fd1498Szrj source_line = location_get_source_line (tmp.get_filename (),
192038fd1498Szrj 2, &line_size);
192138fd1498Szrj ASSERT_TRUE (source_line != NULL);
192238fd1498Szrj ASSERT_EQ (21, line_size);
192338fd1498Szrj ASSERT_TRUE (!strncmp ("This is the test text",
192438fd1498Szrj source_line, line_size));
192538fd1498Szrj
192638fd1498Szrj source_line = location_get_source_line (tmp.get_filename (),
192738fd1498Szrj 4, &line_size);
192838fd1498Szrj ASSERT_TRUE (source_line == NULL);
192938fd1498Szrj }
193038fd1498Szrj
193138fd1498Szrj /* Tests of lexing. */
193238fd1498Szrj
193338fd1498Szrj /* Verify that token TOK from PARSER has cpp_token_as_text
193438fd1498Szrj equal to EXPECTED_TEXT. */
193538fd1498Szrj
193638fd1498Szrj #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
193738fd1498Szrj SELFTEST_BEGIN_STMT \
193838fd1498Szrj unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
193938fd1498Szrj ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
194038fd1498Szrj SELFTEST_END_STMT
194138fd1498Szrj
194238fd1498Szrj /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
194338fd1498Szrj and ranges from EXP_START_COL to EXP_FINISH_COL.
194438fd1498Szrj Use LOC as the effective location of the selftest. */
194538fd1498Szrj
194638fd1498Szrj static void
assert_token_loc_eq(const location & loc,const cpp_token * tok,const char * exp_filename,int exp_linenum,int exp_start_col,int exp_finish_col)194738fd1498Szrj assert_token_loc_eq (const location &loc,
194838fd1498Szrj const cpp_token *tok,
194938fd1498Szrj const char *exp_filename, int exp_linenum,
195038fd1498Szrj int exp_start_col, int exp_finish_col)
195138fd1498Szrj {
195238fd1498Szrj location_t tok_loc = tok->src_loc;
195338fd1498Szrj ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
195438fd1498Szrj ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
195538fd1498Szrj
195638fd1498Szrj /* If location_t values are sufficiently high, then column numbers
195738fd1498Szrj will be unavailable. */
195838fd1498Szrj if (!should_have_column_data_p (tok_loc))
195938fd1498Szrj return;
196038fd1498Szrj
196138fd1498Szrj ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
196238fd1498Szrj source_range tok_range = get_range_from_loc (line_table, tok_loc);
196338fd1498Szrj ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
196438fd1498Szrj ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
196538fd1498Szrj }
196638fd1498Szrj
196738fd1498Szrj /* Use assert_token_loc_eq to verify the TOK->src_loc, using
196838fd1498Szrj SELFTEST_LOCATION as the effective location of the selftest. */
196938fd1498Szrj
197038fd1498Szrj #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
197138fd1498Szrj EXP_START_COL, EXP_FINISH_COL) \
197238fd1498Szrj assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
197338fd1498Szrj (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
197438fd1498Szrj
197538fd1498Szrj /* Test of lexing a file using libcpp, verifying tokens and their
197638fd1498Szrj location information. */
197738fd1498Szrj
197838fd1498Szrj static void
test_lexer(const line_table_case & case_)197938fd1498Szrj test_lexer (const line_table_case &case_)
198038fd1498Szrj {
198138fd1498Szrj /* Create a tempfile and write some text to it. */
198238fd1498Szrj const char *content =
198338fd1498Szrj /*00000000011111111112222222222333333.3333444444444.455555555556
198438fd1498Szrj 12345678901234567890123456789012345.6789012345678.901234567890. */
198538fd1498Szrj ("test_name /* c-style comment */\n"
198638fd1498Szrj " \"test literal\"\n"
198738fd1498Szrj " // test c++-style comment\n"
198838fd1498Szrj " 42\n");
198938fd1498Szrj temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
199038fd1498Szrj
199138fd1498Szrj line_table_test ltt (case_);
199238fd1498Szrj
199338fd1498Szrj cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
199438fd1498Szrj
199538fd1498Szrj const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
199638fd1498Szrj ASSERT_NE (fname, NULL);
199738fd1498Szrj
199838fd1498Szrj /* Verify that we get the expected tokens back, with the correct
199938fd1498Szrj location information. */
200038fd1498Szrj
200138fd1498Szrj location_t loc;
200238fd1498Szrj const cpp_token *tok;
200338fd1498Szrj tok = cpp_get_token_with_location (parser, &loc);
200438fd1498Szrj ASSERT_NE (tok, NULL);
200538fd1498Szrj ASSERT_EQ (tok->type, CPP_NAME);
200638fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
200738fd1498Szrj ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
200838fd1498Szrj
200938fd1498Szrj tok = cpp_get_token_with_location (parser, &loc);
201038fd1498Szrj ASSERT_NE (tok, NULL);
201138fd1498Szrj ASSERT_EQ (tok->type, CPP_STRING);
201238fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
201338fd1498Szrj ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
201438fd1498Szrj
201538fd1498Szrj tok = cpp_get_token_with_location (parser, &loc);
201638fd1498Szrj ASSERT_NE (tok, NULL);
201738fd1498Szrj ASSERT_EQ (tok->type, CPP_NUMBER);
201838fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
201938fd1498Szrj ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
202038fd1498Szrj
202138fd1498Szrj tok = cpp_get_token_with_location (parser, &loc);
202238fd1498Szrj ASSERT_NE (tok, NULL);
202338fd1498Szrj ASSERT_EQ (tok->type, CPP_EOF);
202438fd1498Szrj
202538fd1498Szrj cpp_finish (parser, NULL);
202638fd1498Szrj cpp_destroy (parser);
202738fd1498Szrj }
202838fd1498Szrj
202938fd1498Szrj /* Forward decls. */
203038fd1498Szrj
203138fd1498Szrj struct lexer_test;
203238fd1498Szrj class lexer_test_options;
203338fd1498Szrj
203438fd1498Szrj /* A class for specifying options of a lexer_test.
203538fd1498Szrj The "apply" vfunc is called during the lexer_test constructor. */
203638fd1498Szrj
203738fd1498Szrj class lexer_test_options
203838fd1498Szrj {
203938fd1498Szrj public:
204038fd1498Szrj virtual void apply (lexer_test &) = 0;
204138fd1498Szrj };
204238fd1498Szrj
204338fd1498Szrj /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
204438fd1498Szrj in its dtor.
204538fd1498Szrj
204638fd1498Szrj This is needed by struct lexer_test to ensure that the cleanup of the
204738fd1498Szrj cpp_reader happens *after* the cleanup of the temp_source_file. */
204838fd1498Szrj
204938fd1498Szrj class cpp_reader_ptr
205038fd1498Szrj {
205138fd1498Szrj public:
cpp_reader_ptr(cpp_reader * ptr)205238fd1498Szrj cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
205338fd1498Szrj
~cpp_reader_ptr()205438fd1498Szrj ~cpp_reader_ptr ()
205538fd1498Szrj {
205638fd1498Szrj cpp_finish (m_ptr, NULL);
205738fd1498Szrj cpp_destroy (m_ptr);
205838fd1498Szrj }
205938fd1498Szrj
206038fd1498Szrj operator cpp_reader * () const { return m_ptr; }
206138fd1498Szrj
206238fd1498Szrj private:
206338fd1498Szrj cpp_reader *m_ptr;
206438fd1498Szrj };
206538fd1498Szrj
206638fd1498Szrj /* A struct for writing lexer tests. */
206738fd1498Szrj
206838fd1498Szrj struct lexer_test
206938fd1498Szrj {
207038fd1498Szrj lexer_test (const line_table_case &case_, const char *content,
207138fd1498Szrj lexer_test_options *options);
207238fd1498Szrj ~lexer_test ();
207338fd1498Szrj
207438fd1498Szrj const cpp_token *get_token ();
207538fd1498Szrj
207638fd1498Szrj /* The ordering of these fields matters.
207738fd1498Szrj The line_table_test must be first, since the cpp_reader_ptr
207838fd1498Szrj uses it.
207938fd1498Szrj The cpp_reader must be cleaned up *after* the temp_source_file
208038fd1498Szrj since the filenames in input.c's input cache are owned by the
208138fd1498Szrj cpp_reader; in particular, when ~temp_source_file evicts the
208238fd1498Szrj filename the filenames must still be alive. */
208338fd1498Szrj line_table_test m_ltt;
208438fd1498Szrj cpp_reader_ptr m_parser;
208538fd1498Szrj temp_source_file m_tempfile;
208638fd1498Szrj string_concat_db m_concats;
208738fd1498Szrj bool m_implicitly_expect_EOF;
208838fd1498Szrj };
208938fd1498Szrj
209038fd1498Szrj /* Use an EBCDIC encoding for the execution charset, specifically
209138fd1498Szrj IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
209238fd1498Szrj
209338fd1498Szrj This exercises iconv integration within libcpp.
209438fd1498Szrj Not every build of iconv supports the given charset,
209538fd1498Szrj so we need to flag this error and handle it gracefully. */
209638fd1498Szrj
209738fd1498Szrj class ebcdic_execution_charset : public lexer_test_options
209838fd1498Szrj {
209938fd1498Szrj public:
ebcdic_execution_charset()210038fd1498Szrj ebcdic_execution_charset () : m_num_iconv_errors (0)
210138fd1498Szrj {
210238fd1498Szrj gcc_assert (s_singleton == NULL);
210338fd1498Szrj s_singleton = this;
210438fd1498Szrj }
~ebcdic_execution_charset()210538fd1498Szrj ~ebcdic_execution_charset ()
210638fd1498Szrj {
210738fd1498Szrj gcc_assert (s_singleton == this);
210838fd1498Szrj s_singleton = NULL;
210938fd1498Szrj }
211038fd1498Szrj
apply(lexer_test & test)211138fd1498Szrj void apply (lexer_test &test) FINAL OVERRIDE
211238fd1498Szrj {
211338fd1498Szrj cpp_options *cpp_opts = cpp_get_options (test.m_parser);
211438fd1498Szrj cpp_opts->narrow_charset = "IBM1047";
211538fd1498Szrj
211638fd1498Szrj cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
211738fd1498Szrj callbacks->error = on_error;
211838fd1498Szrj }
211938fd1498Szrj
on_error(cpp_reader * pfile ATTRIBUTE_UNUSED,int level ATTRIBUTE_UNUSED,int reason ATTRIBUTE_UNUSED,rich_location * richloc ATTRIBUTE_UNUSED,const char * msgid,va_list * ap ATTRIBUTE_UNUSED)212038fd1498Szrj static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
212138fd1498Szrj int level ATTRIBUTE_UNUSED,
212238fd1498Szrj int reason ATTRIBUTE_UNUSED,
212338fd1498Szrj rich_location *richloc ATTRIBUTE_UNUSED,
212438fd1498Szrj const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
212538fd1498Szrj ATTRIBUTE_FPTR_PRINTF(5,0)
212638fd1498Szrj {
212738fd1498Szrj gcc_assert (s_singleton);
212838fd1498Szrj /* Avoid exgettext from picking this up, it is translated in libcpp. */
212938fd1498Szrj const char *msg = "conversion from %s to %s not supported by iconv";
213038fd1498Szrj #ifdef ENABLE_NLS
213138fd1498Szrj msg = dgettext ("cpplib", msg);
213238fd1498Szrj #endif
213338fd1498Szrj /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
213438fd1498Szrj when the local iconv build doesn't support the conversion. */
213538fd1498Szrj if (strcmp (msgid, msg) == 0)
213638fd1498Szrj {
213738fd1498Szrj s_singleton->m_num_iconv_errors++;
213838fd1498Szrj return true;
213938fd1498Szrj }
214038fd1498Szrj
214138fd1498Szrj /* Otherwise, we have an unexpected error. */
214238fd1498Szrj abort ();
214338fd1498Szrj }
214438fd1498Szrj
iconv_errors_occurred_p()214538fd1498Szrj bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
214638fd1498Szrj
214738fd1498Szrj private:
214838fd1498Szrj static ebcdic_execution_charset *s_singleton;
214938fd1498Szrj int m_num_iconv_errors;
215038fd1498Szrj };
215138fd1498Szrj
215238fd1498Szrj ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
215338fd1498Szrj
215438fd1498Szrj /* A lexer_test_options subclass that records a list of error
215538fd1498Szrj messages emitted by the lexer. */
215638fd1498Szrj
215738fd1498Szrj class lexer_error_sink : public lexer_test_options
215838fd1498Szrj {
215938fd1498Szrj public:
lexer_error_sink()216038fd1498Szrj lexer_error_sink ()
216138fd1498Szrj {
216238fd1498Szrj gcc_assert (s_singleton == NULL);
216338fd1498Szrj s_singleton = this;
216438fd1498Szrj }
~lexer_error_sink()216538fd1498Szrj ~lexer_error_sink ()
216638fd1498Szrj {
216738fd1498Szrj gcc_assert (s_singleton == this);
216838fd1498Szrj s_singleton = NULL;
216938fd1498Szrj
217038fd1498Szrj int i;
217138fd1498Szrj char *str;
217238fd1498Szrj FOR_EACH_VEC_ELT (m_errors, i, str)
217338fd1498Szrj free (str);
217438fd1498Szrj }
217538fd1498Szrj
apply(lexer_test & test)217638fd1498Szrj void apply (lexer_test &test) FINAL OVERRIDE
217738fd1498Szrj {
217838fd1498Szrj cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
217938fd1498Szrj callbacks->error = on_error;
218038fd1498Szrj }
218138fd1498Szrj
on_error(cpp_reader * pfile ATTRIBUTE_UNUSED,int level ATTRIBUTE_UNUSED,int reason ATTRIBUTE_UNUSED,rich_location * richloc ATTRIBUTE_UNUSED,const char * msgid,va_list * ap)218238fd1498Szrj static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
218338fd1498Szrj int level ATTRIBUTE_UNUSED,
218438fd1498Szrj int reason ATTRIBUTE_UNUSED,
218538fd1498Szrj rich_location *richloc ATTRIBUTE_UNUSED,
218638fd1498Szrj const char *msgid, va_list *ap)
218738fd1498Szrj ATTRIBUTE_FPTR_PRINTF(5,0)
218838fd1498Szrj {
218938fd1498Szrj char *msg = xvasprintf (msgid, *ap);
219038fd1498Szrj s_singleton->m_errors.safe_push (msg);
219138fd1498Szrj return true;
219238fd1498Szrj }
219338fd1498Szrj
219438fd1498Szrj auto_vec<char *> m_errors;
219538fd1498Szrj
219638fd1498Szrj private:
219738fd1498Szrj static lexer_error_sink *s_singleton;
219838fd1498Szrj };
219938fd1498Szrj
220038fd1498Szrj lexer_error_sink *lexer_error_sink::s_singleton;
220138fd1498Szrj
220238fd1498Szrj /* Constructor. Override line_table with a new instance based on CASE_,
220338fd1498Szrj and write CONTENT to a tempfile. Create a cpp_reader, and use it to
220438fd1498Szrj start parsing the tempfile. */
220538fd1498Szrj
lexer_test(const line_table_case & case_,const char * content,lexer_test_options * options)220638fd1498Szrj lexer_test::lexer_test (const line_table_case &case_, const char *content,
220738fd1498Szrj lexer_test_options *options)
220838fd1498Szrj : m_ltt (case_),
220938fd1498Szrj m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
221038fd1498Szrj /* Create a tempfile and write the text to it. */
221138fd1498Szrj m_tempfile (SELFTEST_LOCATION, ".c", content),
221238fd1498Szrj m_concats (),
221338fd1498Szrj m_implicitly_expect_EOF (true)
221438fd1498Szrj {
221538fd1498Szrj if (options)
221638fd1498Szrj options->apply (*this);
221738fd1498Szrj
221838fd1498Szrj cpp_init_iconv (m_parser);
221938fd1498Szrj
222038fd1498Szrj /* Parse the file. */
222138fd1498Szrj const char *fname = cpp_read_main_file (m_parser,
222238fd1498Szrj m_tempfile.get_filename ());
222338fd1498Szrj ASSERT_NE (fname, NULL);
222438fd1498Szrj }
222538fd1498Szrj
222638fd1498Szrj /* Destructor. By default, verify that the next token in m_parser is EOF. */
222738fd1498Szrj
~lexer_test()222838fd1498Szrj lexer_test::~lexer_test ()
222938fd1498Szrj {
223038fd1498Szrj location_t loc;
223138fd1498Szrj const cpp_token *tok;
223238fd1498Szrj
223338fd1498Szrj if (m_implicitly_expect_EOF)
223438fd1498Szrj {
223538fd1498Szrj tok = cpp_get_token_with_location (m_parser, &loc);
223638fd1498Szrj ASSERT_NE (tok, NULL);
223738fd1498Szrj ASSERT_EQ (tok->type, CPP_EOF);
223838fd1498Szrj }
223938fd1498Szrj }
224038fd1498Szrj
224138fd1498Szrj /* Get the next token from m_parser. */
224238fd1498Szrj
224338fd1498Szrj const cpp_token *
get_token()224438fd1498Szrj lexer_test::get_token ()
224538fd1498Szrj {
224638fd1498Szrj location_t loc;
224738fd1498Szrj const cpp_token *tok;
224838fd1498Szrj
224938fd1498Szrj tok = cpp_get_token_with_location (m_parser, &loc);
225038fd1498Szrj ASSERT_NE (tok, NULL);
225138fd1498Szrj return tok;
225238fd1498Szrj }
225338fd1498Szrj
225438fd1498Szrj /* Verify that locations within string literals are correctly handled. */
225538fd1498Szrj
225638fd1498Szrj /* Verify get_source_range_for_substring for token(s) at STRLOC,
225738fd1498Szrj using the string concatenation database for TEST.
225838fd1498Szrj
225938fd1498Szrj Assert that the character at index IDX is on EXPECTED_LINE,
226038fd1498Szrj and that it begins at column EXPECTED_START_COL and ends at
226138fd1498Szrj EXPECTED_FINISH_COL (unless the locations are beyond
226238fd1498Szrj LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
226338fd1498Szrj columns). */
226438fd1498Szrj
226538fd1498Szrj static void
assert_char_at_range(const location & loc,lexer_test & test,location_t strloc,enum cpp_ttype type,int idx,int expected_line,int expected_start_col,int expected_finish_col)226638fd1498Szrj assert_char_at_range (const location &loc,
226738fd1498Szrj lexer_test& test,
226838fd1498Szrj location_t strloc, enum cpp_ttype type, int idx,
226938fd1498Szrj int expected_line, int expected_start_col,
227038fd1498Szrj int expected_finish_col)
227138fd1498Szrj {
227238fd1498Szrj cpp_reader *pfile = test.m_parser;
227338fd1498Szrj string_concat_db *concats = &test.m_concats;
227438fd1498Szrj
227538fd1498Szrj source_range actual_range = source_range();
227638fd1498Szrj const char *err
227738fd1498Szrj = get_source_range_for_char (pfile, concats, strloc, type, idx,
227838fd1498Szrj &actual_range);
227938fd1498Szrj if (should_have_column_data_p (strloc))
228038fd1498Szrj ASSERT_EQ_AT (loc, NULL, err);
228138fd1498Szrj else
228238fd1498Szrj {
228338fd1498Szrj ASSERT_STREQ_AT (loc,
228438fd1498Szrj "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
228538fd1498Szrj err);
228638fd1498Szrj return;
228738fd1498Szrj }
228838fd1498Szrj
228938fd1498Szrj int actual_start_line = LOCATION_LINE (actual_range.m_start);
229038fd1498Szrj ASSERT_EQ_AT (loc, expected_line, actual_start_line);
229138fd1498Szrj int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
229238fd1498Szrj ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
229338fd1498Szrj
229438fd1498Szrj if (should_have_column_data_p (actual_range.m_start))
229538fd1498Szrj {
229638fd1498Szrj int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
229738fd1498Szrj ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
229838fd1498Szrj }
229938fd1498Szrj if (should_have_column_data_p (actual_range.m_finish))
230038fd1498Szrj {
230138fd1498Szrj int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
230238fd1498Szrj ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
230338fd1498Szrj }
230438fd1498Szrj }
230538fd1498Szrj
230638fd1498Szrj /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
230738fd1498Szrj the effective location of any errors. */
230838fd1498Szrj
230938fd1498Szrj #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
231038fd1498Szrj EXPECTED_START_COL, EXPECTED_FINISH_COL) \
231138fd1498Szrj assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
231238fd1498Szrj (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
231338fd1498Szrj (EXPECTED_FINISH_COL))
231438fd1498Szrj
231538fd1498Szrj /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
231638fd1498Szrj using the string concatenation database for TEST.
231738fd1498Szrj
231838fd1498Szrj Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
231938fd1498Szrj
232038fd1498Szrj static void
assert_num_substring_ranges(const location & loc,lexer_test & test,location_t strloc,enum cpp_ttype type,int expected_num_ranges)232138fd1498Szrj assert_num_substring_ranges (const location &loc,
232238fd1498Szrj lexer_test& test,
232338fd1498Szrj location_t strloc,
232438fd1498Szrj enum cpp_ttype type,
232538fd1498Szrj int expected_num_ranges)
232638fd1498Szrj {
232738fd1498Szrj cpp_reader *pfile = test.m_parser;
232838fd1498Szrj string_concat_db *concats = &test.m_concats;
232938fd1498Szrj
233038fd1498Szrj int actual_num_ranges = -1;
233138fd1498Szrj const char *err
233238fd1498Szrj = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
233338fd1498Szrj &actual_num_ranges);
233438fd1498Szrj if (should_have_column_data_p (strloc))
233538fd1498Szrj ASSERT_EQ_AT (loc, NULL, err);
233638fd1498Szrj else
233738fd1498Szrj {
233838fd1498Szrj ASSERT_STREQ_AT (loc,
233938fd1498Szrj "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
234038fd1498Szrj err);
234138fd1498Szrj return;
234238fd1498Szrj }
234338fd1498Szrj ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
234438fd1498Szrj }
234538fd1498Szrj
234638fd1498Szrj /* Macro for calling assert_num_substring_ranges, supplying
234738fd1498Szrj SELFTEST_LOCATION for the effective location of any errors. */
234838fd1498Szrj
234938fd1498Szrj #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
235038fd1498Szrj EXPECTED_NUM_RANGES) \
235138fd1498Szrj assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
235238fd1498Szrj (TYPE), (EXPECTED_NUM_RANGES))
235338fd1498Szrj
235438fd1498Szrj
235538fd1498Szrj /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
235638fd1498Szrj returns an error (using the string concatenation database for TEST). */
235738fd1498Szrj
235838fd1498Szrj static void
assert_has_no_substring_ranges(const location & loc,lexer_test & test,location_t strloc,enum cpp_ttype type,const char * expected_err)235938fd1498Szrj assert_has_no_substring_ranges (const location &loc,
236038fd1498Szrj lexer_test& test,
236138fd1498Szrj location_t strloc,
236238fd1498Szrj enum cpp_ttype type,
236338fd1498Szrj const char *expected_err)
236438fd1498Szrj {
236538fd1498Szrj cpp_reader *pfile = test.m_parser;
236638fd1498Szrj string_concat_db *concats = &test.m_concats;
236738fd1498Szrj cpp_substring_ranges ranges;
236838fd1498Szrj const char *actual_err
236938fd1498Szrj = get_substring_ranges_for_loc (pfile, concats, strloc,
237038fd1498Szrj type, ranges);
237138fd1498Szrj if (should_have_column_data_p (strloc))
237238fd1498Szrj ASSERT_STREQ_AT (loc, expected_err, actual_err);
237338fd1498Szrj else
237438fd1498Szrj ASSERT_STREQ_AT (loc,
237538fd1498Szrj "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
237638fd1498Szrj actual_err);
237738fd1498Szrj }
237838fd1498Szrj
237938fd1498Szrj #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
238038fd1498Szrj assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
238138fd1498Szrj (STRLOC), (TYPE), (ERR))
238238fd1498Szrj
238338fd1498Szrj /* Lex a simple string literal. Verify the substring location data, before
238438fd1498Szrj and after running cpp_interpret_string on it. */
238538fd1498Szrj
238638fd1498Szrj static void
test_lexer_string_locations_simple(const line_table_case & case_)238738fd1498Szrj test_lexer_string_locations_simple (const line_table_case &case_)
238838fd1498Szrj {
238938fd1498Szrj /* Digits 0-9 (with 0 at column 10), the simple way.
239038fd1498Szrj ....................000000000.11111111112.2222222223333333333
239138fd1498Szrj ....................123456789.01234567890.1234567890123456789
239238fd1498Szrj We add a trailing comment to ensure that we correctly locate
239338fd1498Szrj the end of the string literal token. */
239438fd1498Szrj const char *content = " \"0123456789\" /* not a string */\n";
239538fd1498Szrj lexer_test test (case_, content, NULL);
239638fd1498Szrj
239738fd1498Szrj /* Verify that we get the expected token back, with the correct
239838fd1498Szrj location information. */
239938fd1498Szrj const cpp_token *tok = test.get_token ();
240038fd1498Szrj ASSERT_EQ (tok->type, CPP_STRING);
240138fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
240238fd1498Szrj ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
240338fd1498Szrj
240438fd1498Szrj /* At this point in lexing, the quote characters are treated as part of
240538fd1498Szrj the string (they are stripped off by cpp_interpret_string). */
240638fd1498Szrj
240738fd1498Szrj ASSERT_EQ (tok->val.str.len, 12);
240838fd1498Szrj
240938fd1498Szrj /* Verify that cpp_interpret_string works. */
241038fd1498Szrj cpp_string dst_string;
241138fd1498Szrj const enum cpp_ttype type = CPP_STRING;
241238fd1498Szrj bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
241338fd1498Szrj &dst_string, type);
241438fd1498Szrj ASSERT_TRUE (result);
241538fd1498Szrj ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
241638fd1498Szrj free (const_cast <unsigned char *> (dst_string.text));
241738fd1498Szrj
241838fd1498Szrj /* Verify ranges of individual characters. This no longer includes the
241938fd1498Szrj opening quote, but does include the closing quote. */
242038fd1498Szrj for (int i = 0; i <= 10; i++)
242138fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
242238fd1498Szrj 10 + i, 10 + i);
242338fd1498Szrj
242438fd1498Szrj ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
242538fd1498Szrj }
242638fd1498Szrj
242738fd1498Szrj /* As test_lexer_string_locations_simple, but use an EBCDIC execution
242838fd1498Szrj encoding. */
242938fd1498Szrj
243038fd1498Szrj static void
test_lexer_string_locations_ebcdic(const line_table_case & case_)243138fd1498Szrj test_lexer_string_locations_ebcdic (const line_table_case &case_)
243238fd1498Szrj {
243338fd1498Szrj /* EBCDIC support requires iconv. */
243438fd1498Szrj if (!HAVE_ICONV)
243538fd1498Szrj return;
243638fd1498Szrj
243738fd1498Szrj /* Digits 0-9 (with 0 at column 10), the simple way.
243838fd1498Szrj ....................000000000.11111111112.2222222223333333333
243938fd1498Szrj ....................123456789.01234567890.1234567890123456789
244038fd1498Szrj We add a trailing comment to ensure that we correctly locate
244138fd1498Szrj the end of the string literal token. */
244238fd1498Szrj const char *content = " \"0123456789\" /* not a string */\n";
244338fd1498Szrj ebcdic_execution_charset use_ebcdic;
244438fd1498Szrj lexer_test test (case_, content, &use_ebcdic);
244538fd1498Szrj
244638fd1498Szrj /* Verify that we get the expected token back, with the correct
244738fd1498Szrj location information. */
244838fd1498Szrj const cpp_token *tok = test.get_token ();
244938fd1498Szrj ASSERT_EQ (tok->type, CPP_STRING);
245038fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
245138fd1498Szrj ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
245238fd1498Szrj
245338fd1498Szrj /* At this point in lexing, the quote characters are treated as part of
245438fd1498Szrj the string (they are stripped off by cpp_interpret_string). */
245538fd1498Szrj
245638fd1498Szrj ASSERT_EQ (tok->val.str.len, 12);
245738fd1498Szrj
245838fd1498Szrj /* The remainder of the test requires an iconv implementation that
245938fd1498Szrj can convert from UTF-8 to the EBCDIC encoding requested above. */
246038fd1498Szrj if (use_ebcdic.iconv_errors_occurred_p ())
246138fd1498Szrj return;
246238fd1498Szrj
246338fd1498Szrj /* Verify that cpp_interpret_string works. */
246438fd1498Szrj cpp_string dst_string;
246538fd1498Szrj const enum cpp_ttype type = CPP_STRING;
246638fd1498Szrj bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
246738fd1498Szrj &dst_string, type);
246838fd1498Szrj ASSERT_TRUE (result);
246938fd1498Szrj /* We should now have EBCDIC-encoded text, specifically
247038fd1498Szrj IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
247138fd1498Szrj The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
247238fd1498Szrj ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
247338fd1498Szrj (const char *)dst_string.text);
247438fd1498Szrj free (const_cast <unsigned char *> (dst_string.text));
247538fd1498Szrj
247638fd1498Szrj /* Verify that we don't attempt to record substring location information
247738fd1498Szrj for such cases. */
247838fd1498Szrj ASSERT_HAS_NO_SUBSTRING_RANGES
247938fd1498Szrj (test, tok->src_loc, type,
248038fd1498Szrj "execution character set != source character set");
248138fd1498Szrj }
248238fd1498Szrj
248338fd1498Szrj /* Lex a string literal containing a hex-escaped character.
248438fd1498Szrj Verify the substring location data, before and after running
248538fd1498Szrj cpp_interpret_string on it. */
248638fd1498Szrj
248738fd1498Szrj static void
test_lexer_string_locations_hex(const line_table_case & case_)248838fd1498Szrj test_lexer_string_locations_hex (const line_table_case &case_)
248938fd1498Szrj {
249038fd1498Szrj /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
249138fd1498Szrj and with a space in place of digit 6, to terminate the escaped
249238fd1498Szrj hex code.
249338fd1498Szrj ....................000000000.111111.11112222.
249438fd1498Szrj ....................123456789.012345.67890123. */
249538fd1498Szrj const char *content = " \"01234\\x35 789\"\n";
249638fd1498Szrj lexer_test test (case_, content, NULL);
249738fd1498Szrj
249838fd1498Szrj /* Verify that we get the expected token back, with the correct
249938fd1498Szrj location information. */
250038fd1498Szrj const cpp_token *tok = test.get_token ();
250138fd1498Szrj ASSERT_EQ (tok->type, CPP_STRING);
250238fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
250338fd1498Szrj ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
250438fd1498Szrj
250538fd1498Szrj /* At this point in lexing, the quote characters are treated as part of
250638fd1498Szrj the string (they are stripped off by cpp_interpret_string). */
250738fd1498Szrj ASSERT_EQ (tok->val.str.len, 15);
250838fd1498Szrj
250938fd1498Szrj /* Verify that cpp_interpret_string works. */
251038fd1498Szrj cpp_string dst_string;
251138fd1498Szrj const enum cpp_ttype type = CPP_STRING;
251238fd1498Szrj bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
251338fd1498Szrj &dst_string, type);
251438fd1498Szrj ASSERT_TRUE (result);
251538fd1498Szrj ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
251638fd1498Szrj free (const_cast <unsigned char *> (dst_string.text));
251738fd1498Szrj
251838fd1498Szrj /* Verify ranges of individual characters. This no longer includes the
251938fd1498Szrj opening quote, but does include the closing quote. */
252038fd1498Szrj for (int i = 0; i <= 4; i++)
252138fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
252238fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
252338fd1498Szrj for (int i = 6; i <= 10; i++)
252438fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
252538fd1498Szrj
252638fd1498Szrj ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
252738fd1498Szrj }
252838fd1498Szrj
252938fd1498Szrj /* Lex a string literal containing an octal-escaped character.
253038fd1498Szrj Verify the substring location data after running cpp_interpret_string
253138fd1498Szrj on it. */
253238fd1498Szrj
253338fd1498Szrj static void
test_lexer_string_locations_oct(const line_table_case & case_)253438fd1498Szrj test_lexer_string_locations_oct (const line_table_case &case_)
253538fd1498Szrj {
253638fd1498Szrj /* Digits 0-9, expressing digit 5 in ASCII as "\065"
253738fd1498Szrj and with a space in place of digit 6, to terminate the escaped
253838fd1498Szrj octal code.
253938fd1498Szrj ....................000000000.111111.11112222.2222223333333333444
254038fd1498Szrj ....................123456789.012345.67890123.4567890123456789012 */
254138fd1498Szrj const char *content = " \"01234\\065 789\" /* not a string */\n";
254238fd1498Szrj lexer_test test (case_, content, NULL);
254338fd1498Szrj
254438fd1498Szrj /* Verify that we get the expected token back, with the correct
254538fd1498Szrj location information. */
254638fd1498Szrj const cpp_token *tok = test.get_token ();
254738fd1498Szrj ASSERT_EQ (tok->type, CPP_STRING);
254838fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
254938fd1498Szrj
255038fd1498Szrj /* Verify that cpp_interpret_string works. */
255138fd1498Szrj cpp_string dst_string;
255238fd1498Szrj const enum cpp_ttype type = CPP_STRING;
255338fd1498Szrj bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
255438fd1498Szrj &dst_string, type);
255538fd1498Szrj ASSERT_TRUE (result);
255638fd1498Szrj ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
255738fd1498Szrj free (const_cast <unsigned char *> (dst_string.text));
255838fd1498Szrj
255938fd1498Szrj /* Verify ranges of individual characters. This no longer includes the
256038fd1498Szrj opening quote, but does include the closing quote. */
256138fd1498Szrj for (int i = 0; i < 5; i++)
256238fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
256338fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
256438fd1498Szrj for (int i = 6; i <= 10; i++)
256538fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
256638fd1498Szrj
256738fd1498Szrj ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
256838fd1498Szrj }
256938fd1498Szrj
257038fd1498Szrj /* Test of string literal containing letter escapes. */
257138fd1498Szrj
257238fd1498Szrj static void
test_lexer_string_locations_letter_escape_1(const line_table_case & case_)257338fd1498Szrj test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
257438fd1498Szrj {
257538fd1498Szrj /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
257638fd1498Szrj .....................000000000.1.11111.1.1.11222.22222223333333
257738fd1498Szrj .....................123456789.0.12345.6.7.89012.34567890123456. */
257838fd1498Szrj const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
257938fd1498Szrj lexer_test test (case_, content, NULL);
258038fd1498Szrj
258138fd1498Szrj /* Verify that we get the expected tokens back. */
258238fd1498Szrj const cpp_token *tok = test.get_token ();
258338fd1498Szrj ASSERT_EQ (tok->type, CPP_STRING);
258438fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
258538fd1498Szrj
258638fd1498Szrj /* Verify ranges of individual characters. */
258738fd1498Szrj /* "\t". */
258838fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
258938fd1498Szrj 0, 1, 10, 11);
259038fd1498Szrj /* "foo". */
259138fd1498Szrj for (int i = 1; i <= 3; i++)
259238fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
259338fd1498Szrj i, 1, 11 + i, 11 + i);
259438fd1498Szrj /* "\\" and "\n". */
259538fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
259638fd1498Szrj 4, 1, 15, 16);
259738fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
259838fd1498Szrj 5, 1, 17, 18);
259938fd1498Szrj
260038fd1498Szrj /* "bar" and closing quote for nul-terminator. */
260138fd1498Szrj for (int i = 6; i <= 9; i++)
260238fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
260338fd1498Szrj i, 1, 13 + i, 13 + i);
260438fd1498Szrj
260538fd1498Szrj ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
260638fd1498Szrj }
260738fd1498Szrj
260838fd1498Szrj /* Another test of a string literal containing a letter escape.
260938fd1498Szrj Based on string seen in
261038fd1498Szrj printf ("%-%\n");
261138fd1498Szrj in gcc.dg/format/c90-printf-1.c. */
261238fd1498Szrj
261338fd1498Szrj static void
test_lexer_string_locations_letter_escape_2(const line_table_case & case_)261438fd1498Szrj test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
261538fd1498Szrj {
261638fd1498Szrj /* .....................000000000.1111.11.1111.22222222223.
261738fd1498Szrj .....................123456789.0123.45.6789.01234567890. */
261838fd1498Szrj const char *content = (" \"%-%\\n\" /* non-str */\n");
261938fd1498Szrj lexer_test test (case_, content, NULL);
262038fd1498Szrj
262138fd1498Szrj /* Verify that we get the expected tokens back. */
262238fd1498Szrj const cpp_token *tok = test.get_token ();
262338fd1498Szrj ASSERT_EQ (tok->type, CPP_STRING);
262438fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
262538fd1498Szrj
262638fd1498Szrj /* Verify ranges of individual characters. */
262738fd1498Szrj /* "%-%". */
262838fd1498Szrj for (int i = 0; i < 3; i++)
262938fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
263038fd1498Szrj i, 1, 10 + i, 10 + i);
263138fd1498Szrj /* "\n". */
263238fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
263338fd1498Szrj 3, 1, 13, 14);
263438fd1498Szrj
263538fd1498Szrj /* Closing quote for nul-terminator. */
263638fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
263738fd1498Szrj 4, 1, 15, 15);
263838fd1498Szrj
263938fd1498Szrj ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
264038fd1498Szrj }
264138fd1498Szrj
264238fd1498Szrj /* Lex a string literal containing UCN 4 characters.
264338fd1498Szrj Verify the substring location data after running cpp_interpret_string
264438fd1498Szrj on it. */
264538fd1498Szrj
264638fd1498Szrj static void
test_lexer_string_locations_ucn4(const line_table_case & case_)264738fd1498Szrj test_lexer_string_locations_ucn4 (const line_table_case &case_)
264838fd1498Szrj {
264938fd1498Szrj /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
265038fd1498Szrj as UCN 4.
265138fd1498Szrj ....................000000000.111111.111122.222222223.33333333344444
265238fd1498Szrj ....................123456789.012345.678901.234567890.12345678901234 */
265338fd1498Szrj const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
265438fd1498Szrj lexer_test test (case_, content, NULL);
265538fd1498Szrj
265638fd1498Szrj /* Verify that we get the expected token back, with the correct
265738fd1498Szrj location information. */
265838fd1498Szrj const cpp_token *tok = test.get_token ();
265938fd1498Szrj ASSERT_EQ (tok->type, CPP_STRING);
266038fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
266138fd1498Szrj
266238fd1498Szrj /* Verify that cpp_interpret_string works.
266338fd1498Szrj The string should be encoded in the execution character
266438fd1498Szrj set. Assuming that that is UTF-8, we should have the following:
266538fd1498Szrj ----------- ---- ----- ------- ----------------
266638fd1498Szrj Byte offset Byte Octal Unicode Source Column(s)
266738fd1498Szrj ----------- ---- ----- ------- ----------------
266838fd1498Szrj 0 0x30 '0' 10
266938fd1498Szrj 1 0x31 '1' 11
267038fd1498Szrj 2 0x32 '2' 12
267138fd1498Szrj 3 0x33 '3' 13
267238fd1498Szrj 4 0x34 '4' 14
267338fd1498Szrj 5 0xE2 \342 U+2174 15-20
267438fd1498Szrj 6 0x85 \205 (cont) 15-20
267538fd1498Szrj 7 0xB4 \264 (cont) 15-20
267638fd1498Szrj 8 0xE2 \342 U+2175 21-26
267738fd1498Szrj 9 0x85 \205 (cont) 21-26
267838fd1498Szrj 10 0xB5 \265 (cont) 21-26
267938fd1498Szrj 11 0x37 '7' 27
268038fd1498Szrj 12 0x38 '8' 28
268138fd1498Szrj 13 0x39 '9' 29
268238fd1498Szrj 14 0x00 30 (closing quote)
268338fd1498Szrj ----------- ---- ----- ------- ---------------. */
268438fd1498Szrj
268538fd1498Szrj cpp_string dst_string;
268638fd1498Szrj const enum cpp_ttype type = CPP_STRING;
268738fd1498Szrj bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
268838fd1498Szrj &dst_string, type);
268938fd1498Szrj ASSERT_TRUE (result);
269038fd1498Szrj ASSERT_STREQ ("01234\342\205\264\342\205\265789",
269138fd1498Szrj (const char *)dst_string.text);
269238fd1498Szrj free (const_cast <unsigned char *> (dst_string.text));
269338fd1498Szrj
269438fd1498Szrj /* Verify ranges of individual characters. This no longer includes the
269538fd1498Szrj opening quote, but does include the closing quote.
269638fd1498Szrj '01234'. */
269738fd1498Szrj for (int i = 0; i <= 4; i++)
269838fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
269938fd1498Szrj /* U+2174. */
270038fd1498Szrj for (int i = 5; i <= 7; i++)
270138fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
270238fd1498Szrj /* U+2175. */
270338fd1498Szrj for (int i = 8; i <= 10; i++)
270438fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
270538fd1498Szrj /* '789' and nul terminator */
270638fd1498Szrj for (int i = 11; i <= 14; i++)
270738fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
270838fd1498Szrj
270938fd1498Szrj ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
271038fd1498Szrj }
271138fd1498Szrj
271238fd1498Szrj /* Lex a string literal containing UCN 8 characters.
271338fd1498Szrj Verify the substring location data after running cpp_interpret_string
271438fd1498Szrj on it. */
271538fd1498Szrj
271638fd1498Szrj static void
test_lexer_string_locations_ucn8(const line_table_case & case_)271738fd1498Szrj test_lexer_string_locations_ucn8 (const line_table_case &case_)
271838fd1498Szrj {
271938fd1498Szrj /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
272038fd1498Szrj ....................000000000.111111.1111222222.2222333333333.344444
272138fd1498Szrj ....................123456789.012345.6789012345.6789012345678.901234 */
272238fd1498Szrj const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
272338fd1498Szrj lexer_test test (case_, content, NULL);
272438fd1498Szrj
272538fd1498Szrj /* Verify that we get the expected token back, with the correct
272638fd1498Szrj location information. */
272738fd1498Szrj const cpp_token *tok = test.get_token ();
272838fd1498Szrj ASSERT_EQ (tok->type, CPP_STRING);
272938fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
273038fd1498Szrj "\"01234\\U00002174\\U00002175789\"");
273138fd1498Szrj
273238fd1498Szrj /* Verify that cpp_interpret_string works.
273338fd1498Szrj The UTF-8 encoding of the string is identical to that from
273438fd1498Szrj the ucn4 testcase above; the only difference is the column
273538fd1498Szrj locations. */
273638fd1498Szrj cpp_string dst_string;
273738fd1498Szrj const enum cpp_ttype type = CPP_STRING;
273838fd1498Szrj bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
273938fd1498Szrj &dst_string, type);
274038fd1498Szrj ASSERT_TRUE (result);
274138fd1498Szrj ASSERT_STREQ ("01234\342\205\264\342\205\265789",
274238fd1498Szrj (const char *)dst_string.text);
274338fd1498Szrj free (const_cast <unsigned char *> (dst_string.text));
274438fd1498Szrj
274538fd1498Szrj /* Verify ranges of individual characters. This no longer includes the
274638fd1498Szrj opening quote, but does include the closing quote.
274738fd1498Szrj '01234'. */
274838fd1498Szrj for (int i = 0; i <= 4; i++)
274938fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
275038fd1498Szrj /* U+2174. */
275138fd1498Szrj for (int i = 5; i <= 7; i++)
275238fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
275338fd1498Szrj /* U+2175. */
275438fd1498Szrj for (int i = 8; i <= 10; i++)
275538fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
275638fd1498Szrj /* '789' at columns 35-37 */
275738fd1498Szrj for (int i = 11; i <= 13; i++)
275838fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
275938fd1498Szrj /* Closing quote/nul-terminator at column 38. */
276038fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
276138fd1498Szrj
276238fd1498Szrj ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
276338fd1498Szrj }
276438fd1498Szrj
276538fd1498Szrj /* Fetch a big-endian 32-bit value and convert to host endianness. */
276638fd1498Szrj
276738fd1498Szrj static uint32_t
uint32_from_big_endian(const uint32_t * ptr_be_value)276838fd1498Szrj uint32_from_big_endian (const uint32_t *ptr_be_value)
276938fd1498Szrj {
277038fd1498Szrj const unsigned char *buf = (const unsigned char *)ptr_be_value;
277138fd1498Szrj return (((uint32_t) buf[0] << 24)
277238fd1498Szrj | ((uint32_t) buf[1] << 16)
277338fd1498Szrj | ((uint32_t) buf[2] << 8)
277438fd1498Szrj | (uint32_t) buf[3]);
277538fd1498Szrj }
277638fd1498Szrj
277738fd1498Szrj /* Lex a wide string literal and verify that attempts to read substring
277838fd1498Szrj location data from it fail gracefully. */
277938fd1498Szrj
278038fd1498Szrj static void
test_lexer_string_locations_wide_string(const line_table_case & case_)278138fd1498Szrj test_lexer_string_locations_wide_string (const line_table_case &case_)
278238fd1498Szrj {
278338fd1498Szrj /* Digits 0-9.
278438fd1498Szrj ....................000000000.11111111112.22222222233333
278538fd1498Szrj ....................123456789.01234567890.12345678901234 */
278638fd1498Szrj const char *content = " L\"0123456789\" /* non-str */\n";
278738fd1498Szrj lexer_test test (case_, content, NULL);
278838fd1498Szrj
278938fd1498Szrj /* Verify that we get the expected token back, with the correct
279038fd1498Szrj location information. */
279138fd1498Szrj const cpp_token *tok = test.get_token ();
279238fd1498Szrj ASSERT_EQ (tok->type, CPP_WSTRING);
279338fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
279438fd1498Szrj
279538fd1498Szrj /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
279638fd1498Szrj cpp_string dst_string;
279738fd1498Szrj const enum cpp_ttype type = CPP_WSTRING;
279838fd1498Szrj bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
279938fd1498Szrj &dst_string, type);
280038fd1498Szrj ASSERT_TRUE (result);
280138fd1498Szrj /* The cpp_reader defaults to big-endian with
280238fd1498Szrj CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
280338fd1498Szrj now be encoded as UTF-32BE. */
280438fd1498Szrj const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
280538fd1498Szrj ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
280638fd1498Szrj ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
280738fd1498Szrj ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
280838fd1498Szrj ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
280938fd1498Szrj free (const_cast <unsigned char *> (dst_string.text));
281038fd1498Szrj
281138fd1498Szrj /* We don't yet support generating substring location information
281238fd1498Szrj for L"" strings. */
281338fd1498Szrj ASSERT_HAS_NO_SUBSTRING_RANGES
281438fd1498Szrj (test, tok->src_loc, type,
281538fd1498Szrj "execution character set != source character set");
281638fd1498Szrj }
281738fd1498Szrj
281838fd1498Szrj /* Fetch a big-endian 16-bit value and convert to host endianness. */
281938fd1498Szrj
282038fd1498Szrj static uint16_t
uint16_from_big_endian(const uint16_t * ptr_be_value)282138fd1498Szrj uint16_from_big_endian (const uint16_t *ptr_be_value)
282238fd1498Szrj {
282338fd1498Szrj const unsigned char *buf = (const unsigned char *)ptr_be_value;
282438fd1498Szrj return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
282538fd1498Szrj }
282638fd1498Szrj
282738fd1498Szrj /* Lex a u"" string literal and verify that attempts to read substring
282838fd1498Szrj location data from it fail gracefully. */
282938fd1498Szrj
283038fd1498Szrj static void
test_lexer_string_locations_string16(const line_table_case & case_)283138fd1498Szrj test_lexer_string_locations_string16 (const line_table_case &case_)
283238fd1498Szrj {
283338fd1498Szrj /* Digits 0-9.
283438fd1498Szrj ....................000000000.11111111112.22222222233333
283538fd1498Szrj ....................123456789.01234567890.12345678901234 */
283638fd1498Szrj const char *content = " u\"0123456789\" /* non-str */\n";
283738fd1498Szrj lexer_test test (case_, content, NULL);
283838fd1498Szrj
283938fd1498Szrj /* Verify that we get the expected token back, with the correct
284038fd1498Szrj location information. */
284138fd1498Szrj const cpp_token *tok = test.get_token ();
284238fd1498Szrj ASSERT_EQ (tok->type, CPP_STRING16);
284338fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
284438fd1498Szrj
284538fd1498Szrj /* Verify that cpp_interpret_string works, using CPP_STRING16. */
284638fd1498Szrj cpp_string dst_string;
284738fd1498Szrj const enum cpp_ttype type = CPP_STRING16;
284838fd1498Szrj bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
284938fd1498Szrj &dst_string, type);
285038fd1498Szrj ASSERT_TRUE (result);
285138fd1498Szrj
285238fd1498Szrj /* The cpp_reader defaults to big-endian, so dst_string should
285338fd1498Szrj now be encoded as UTF-16BE. */
285438fd1498Szrj const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
285538fd1498Szrj ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
285638fd1498Szrj ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
285738fd1498Szrj ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
285838fd1498Szrj ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
285938fd1498Szrj free (const_cast <unsigned char *> (dst_string.text));
286038fd1498Szrj
286138fd1498Szrj /* We don't yet support generating substring location information
286238fd1498Szrj for L"" strings. */
286338fd1498Szrj ASSERT_HAS_NO_SUBSTRING_RANGES
286438fd1498Szrj (test, tok->src_loc, type,
286538fd1498Szrj "execution character set != source character set");
286638fd1498Szrj }
286738fd1498Szrj
286838fd1498Szrj /* Lex a U"" string literal and verify that attempts to read substring
286938fd1498Szrj location data from it fail gracefully. */
287038fd1498Szrj
287138fd1498Szrj static void
test_lexer_string_locations_string32(const line_table_case & case_)287238fd1498Szrj test_lexer_string_locations_string32 (const line_table_case &case_)
287338fd1498Szrj {
287438fd1498Szrj /* Digits 0-9.
287538fd1498Szrj ....................000000000.11111111112.22222222233333
287638fd1498Szrj ....................123456789.01234567890.12345678901234 */
287738fd1498Szrj const char *content = " U\"0123456789\" /* non-str */\n";
287838fd1498Szrj lexer_test test (case_, content, NULL);
287938fd1498Szrj
288038fd1498Szrj /* Verify that we get the expected token back, with the correct
288138fd1498Szrj location information. */
288238fd1498Szrj const cpp_token *tok = test.get_token ();
288338fd1498Szrj ASSERT_EQ (tok->type, CPP_STRING32);
288438fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
288538fd1498Szrj
288638fd1498Szrj /* Verify that cpp_interpret_string works, using CPP_STRING32. */
288738fd1498Szrj cpp_string dst_string;
288838fd1498Szrj const enum cpp_ttype type = CPP_STRING32;
288938fd1498Szrj bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
289038fd1498Szrj &dst_string, type);
289138fd1498Szrj ASSERT_TRUE (result);
289238fd1498Szrj
289338fd1498Szrj /* The cpp_reader defaults to big-endian, so dst_string should
289438fd1498Szrj now be encoded as UTF-32BE. */
289538fd1498Szrj const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
289638fd1498Szrj ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
289738fd1498Szrj ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
289838fd1498Szrj ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
289938fd1498Szrj ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
290038fd1498Szrj free (const_cast <unsigned char *> (dst_string.text));
290138fd1498Szrj
290238fd1498Szrj /* We don't yet support generating substring location information
290338fd1498Szrj for L"" strings. */
290438fd1498Szrj ASSERT_HAS_NO_SUBSTRING_RANGES
290538fd1498Szrj (test, tok->src_loc, type,
290638fd1498Szrj "execution character set != source character set");
290738fd1498Szrj }
290838fd1498Szrj
290938fd1498Szrj /* Lex a u8-string literal.
291038fd1498Szrj Verify the substring location data after running cpp_interpret_string
291138fd1498Szrj on it. */
291238fd1498Szrj
291338fd1498Szrj static void
test_lexer_string_locations_u8(const line_table_case & case_)291438fd1498Szrj test_lexer_string_locations_u8 (const line_table_case &case_)
291538fd1498Szrj {
291638fd1498Szrj /* Digits 0-9.
291738fd1498Szrj ....................000000000.11111111112.22222222233333
291838fd1498Szrj ....................123456789.01234567890.12345678901234 */
291938fd1498Szrj const char *content = " u8\"0123456789\" /* non-str */\n";
292038fd1498Szrj lexer_test test (case_, content, NULL);
292138fd1498Szrj
292238fd1498Szrj /* Verify that we get the expected token back, with the correct
292338fd1498Szrj location information. */
292438fd1498Szrj const cpp_token *tok = test.get_token ();
292538fd1498Szrj ASSERT_EQ (tok->type, CPP_UTF8STRING);
292638fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
292738fd1498Szrj
292838fd1498Szrj /* Verify that cpp_interpret_string works. */
292938fd1498Szrj cpp_string dst_string;
293038fd1498Szrj const enum cpp_ttype type = CPP_STRING;
293138fd1498Szrj bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
293238fd1498Szrj &dst_string, type);
293338fd1498Szrj ASSERT_TRUE (result);
293438fd1498Szrj ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
293538fd1498Szrj free (const_cast <unsigned char *> (dst_string.text));
293638fd1498Szrj
293738fd1498Szrj /* Verify ranges of individual characters. This no longer includes the
293838fd1498Szrj opening quote, but does include the closing quote. */
293938fd1498Szrj for (int i = 0; i <= 10; i++)
294038fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
294138fd1498Szrj }
294238fd1498Szrj
294338fd1498Szrj /* Lex a string literal containing UTF-8 source characters.
294438fd1498Szrj Verify the substring location data after running cpp_interpret_string
294538fd1498Szrj on it. */
294638fd1498Szrj
294738fd1498Szrj static void
test_lexer_string_locations_utf8_source(const line_table_case & case_)294838fd1498Szrj test_lexer_string_locations_utf8_source (const line_table_case &case_)
294938fd1498Szrj {
295038fd1498Szrj /* This string literal is written out to the source file as UTF-8,
295138fd1498Szrj and is of the form "before mojibake after", where "mojibake"
295238fd1498Szrj is written as the following four unicode code points:
295338fd1498Szrj U+6587 CJK UNIFIED IDEOGRAPH-6587
295438fd1498Szrj U+5B57 CJK UNIFIED IDEOGRAPH-5B57
295538fd1498Szrj U+5316 CJK UNIFIED IDEOGRAPH-5316
295638fd1498Szrj U+3051 HIRAGANA LETTER KE.
295738fd1498Szrj Each of these is 3 bytes wide when encoded in UTF-8, whereas the
295838fd1498Szrj "before" and "after" are 1 byte per unicode character.
295938fd1498Szrj
296038fd1498Szrj The numbering shown are "columns", which are *byte* numbers within
296138fd1498Szrj the line, rather than unicode character numbers.
296238fd1498Szrj
296338fd1498Szrj .................... 000000000.1111111.
296438fd1498Szrj .................... 123456789.0123456. */
296538fd1498Szrj const char *content = (" \"before "
296638fd1498Szrj /* U+6587 CJK UNIFIED IDEOGRAPH-6587
296738fd1498Szrj UTF-8: 0xE6 0x96 0x87
296838fd1498Szrj C octal escaped UTF-8: \346\226\207
296938fd1498Szrj "column" numbers: 17-19. */
297038fd1498Szrj "\346\226\207"
297138fd1498Szrj
297238fd1498Szrj /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
297338fd1498Szrj UTF-8: 0xE5 0xAD 0x97
297438fd1498Szrj C octal escaped UTF-8: \345\255\227
297538fd1498Szrj "column" numbers: 20-22. */
297638fd1498Szrj "\345\255\227"
297738fd1498Szrj
297838fd1498Szrj /* U+5316 CJK UNIFIED IDEOGRAPH-5316
297938fd1498Szrj UTF-8: 0xE5 0x8C 0x96
298038fd1498Szrj C octal escaped UTF-8: \345\214\226
298138fd1498Szrj "column" numbers: 23-25. */
298238fd1498Szrj "\345\214\226"
298338fd1498Szrj
298438fd1498Szrj /* U+3051 HIRAGANA LETTER KE
298538fd1498Szrj UTF-8: 0xE3 0x81 0x91
298638fd1498Szrj C octal escaped UTF-8: \343\201\221
298738fd1498Szrj "column" numbers: 26-28. */
298838fd1498Szrj "\343\201\221"
298938fd1498Szrj
299038fd1498Szrj /* column numbers 29 onwards
299138fd1498Szrj 2333333.33334444444444
299238fd1498Szrj 9012345.67890123456789. */
299338fd1498Szrj " after\" /* non-str */\n");
299438fd1498Szrj lexer_test test (case_, content, NULL);
299538fd1498Szrj
299638fd1498Szrj /* Verify that we get the expected token back, with the correct
299738fd1498Szrj location information. */
299838fd1498Szrj const cpp_token *tok = test.get_token ();
299938fd1498Szrj ASSERT_EQ (tok->type, CPP_STRING);
300038fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ
300138fd1498Szrj (test.m_parser, tok,
300238fd1498Szrj "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
300338fd1498Szrj
300438fd1498Szrj /* Verify that cpp_interpret_string works. */
300538fd1498Szrj cpp_string dst_string;
300638fd1498Szrj const enum cpp_ttype type = CPP_STRING;
300738fd1498Szrj bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
300838fd1498Szrj &dst_string, type);
300938fd1498Szrj ASSERT_TRUE (result);
301038fd1498Szrj ASSERT_STREQ
301138fd1498Szrj ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
301238fd1498Szrj (const char *)dst_string.text);
301338fd1498Szrj free (const_cast <unsigned char *> (dst_string.text));
301438fd1498Szrj
301538fd1498Szrj /* Verify ranges of individual characters. This no longer includes the
301638fd1498Szrj opening quote, but does include the closing quote.
301738fd1498Szrj Assuming that both source and execution encodings are UTF-8, we have
301838fd1498Szrj a run of 25 octets in each, plus the NUL terminator. */
301938fd1498Szrj for (int i = 0; i < 25; i++)
302038fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
302138fd1498Szrj /* NUL-terminator should use the closing quote at column 35. */
302238fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
302338fd1498Szrj
302438fd1498Szrj ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
302538fd1498Szrj }
302638fd1498Szrj
302738fd1498Szrj /* Test of string literal concatenation. */
302838fd1498Szrj
302938fd1498Szrj static void
test_lexer_string_locations_concatenation_1(const line_table_case & case_)303038fd1498Szrj test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
303138fd1498Szrj {
303238fd1498Szrj /* Digits 0-9.
303338fd1498Szrj .....................000000000.111111.11112222222222
303438fd1498Szrj .....................123456789.012345.67890123456789. */
303538fd1498Szrj const char *content = (" \"01234\" /* non-str */\n"
303638fd1498Szrj " \"56789\" /* non-str */\n");
303738fd1498Szrj lexer_test test (case_, content, NULL);
303838fd1498Szrj
303938fd1498Szrj location_t input_locs[2];
304038fd1498Szrj
304138fd1498Szrj /* Verify that we get the expected tokens back. */
304238fd1498Szrj auto_vec <cpp_string> input_strings;
304338fd1498Szrj const cpp_token *tok_a = test.get_token ();
304438fd1498Szrj ASSERT_EQ (tok_a->type, CPP_STRING);
304538fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
304638fd1498Szrj input_strings.safe_push (tok_a->val.str);
304738fd1498Szrj input_locs[0] = tok_a->src_loc;
304838fd1498Szrj
304938fd1498Szrj const cpp_token *tok_b = test.get_token ();
305038fd1498Szrj ASSERT_EQ (tok_b->type, CPP_STRING);
305138fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
305238fd1498Szrj input_strings.safe_push (tok_b->val.str);
305338fd1498Szrj input_locs[1] = tok_b->src_loc;
305438fd1498Szrj
305538fd1498Szrj /* Verify that cpp_interpret_string works. */
305638fd1498Szrj cpp_string dst_string;
305738fd1498Szrj const enum cpp_ttype type = CPP_STRING;
305838fd1498Szrj bool result = cpp_interpret_string (test.m_parser,
305938fd1498Szrj input_strings.address (), 2,
306038fd1498Szrj &dst_string, type);
306138fd1498Szrj ASSERT_TRUE (result);
306238fd1498Szrj ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
306338fd1498Szrj free (const_cast <unsigned char *> (dst_string.text));
306438fd1498Szrj
306538fd1498Szrj /* Simulate c-lex.c's lex_string in order to record concatenation. */
306638fd1498Szrj test.m_concats.record_string_concatenation (2, input_locs);
306738fd1498Szrj
306838fd1498Szrj location_t initial_loc = input_locs[0];
306938fd1498Szrj
307038fd1498Szrj /* "01234" on line 1. */
307138fd1498Szrj for (int i = 0; i <= 4; i++)
307238fd1498Szrj ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
307338fd1498Szrj /* "56789" in line 2, plus its closing quote for the nul terminator. */
307438fd1498Szrj for (int i = 5; i <= 10; i++)
307538fd1498Szrj ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
307638fd1498Szrj
307738fd1498Szrj ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
307838fd1498Szrj }
307938fd1498Szrj
308038fd1498Szrj /* Another test of string literal concatenation. */
308138fd1498Szrj
308238fd1498Szrj static void
test_lexer_string_locations_concatenation_2(const line_table_case & case_)308338fd1498Szrj test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
308438fd1498Szrj {
308538fd1498Szrj /* Digits 0-9.
308638fd1498Szrj .....................000000000.111.11111112222222
308738fd1498Szrj .....................123456789.012.34567890123456. */
308838fd1498Szrj const char *content = (" \"01\" /* non-str */\n"
308938fd1498Szrj " \"23\" /* non-str */\n"
309038fd1498Szrj " \"45\" /* non-str */\n"
309138fd1498Szrj " \"67\" /* non-str */\n"
309238fd1498Szrj " \"89\" /* non-str */\n");
309338fd1498Szrj lexer_test test (case_, content, NULL);
309438fd1498Szrj
309538fd1498Szrj auto_vec <cpp_string> input_strings;
309638fd1498Szrj location_t input_locs[5];
309738fd1498Szrj
309838fd1498Szrj /* Verify that we get the expected tokens back. */
309938fd1498Szrj for (int i = 0; i < 5; i++)
310038fd1498Szrj {
310138fd1498Szrj const cpp_token *tok = test.get_token ();
310238fd1498Szrj ASSERT_EQ (tok->type, CPP_STRING);
310338fd1498Szrj input_strings.safe_push (tok->val.str);
310438fd1498Szrj input_locs[i] = tok->src_loc;
310538fd1498Szrj }
310638fd1498Szrj
310738fd1498Szrj /* Verify that cpp_interpret_string works. */
310838fd1498Szrj cpp_string dst_string;
310938fd1498Szrj const enum cpp_ttype type = CPP_STRING;
311038fd1498Szrj bool result = cpp_interpret_string (test.m_parser,
311138fd1498Szrj input_strings.address (), 5,
311238fd1498Szrj &dst_string, type);
311338fd1498Szrj ASSERT_TRUE (result);
311438fd1498Szrj ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
311538fd1498Szrj free (const_cast <unsigned char *> (dst_string.text));
311638fd1498Szrj
311738fd1498Szrj /* Simulate c-lex.c's lex_string in order to record concatenation. */
311838fd1498Szrj test.m_concats.record_string_concatenation (5, input_locs);
311938fd1498Szrj
312038fd1498Szrj location_t initial_loc = input_locs[0];
312138fd1498Szrj
312238fd1498Szrj /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
312338fd1498Szrj detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
312438fd1498Szrj and expect get_source_range_for_substring to fail.
312538fd1498Szrj However, for a string concatenation test, we can have a case
312638fd1498Szrj where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
312738fd1498Szrj but subsequent strings can be after it.
312838fd1498Szrj Attempting to detect this within assert_char_at_range
312938fd1498Szrj would overcomplicate the logic for the common test cases, so
313038fd1498Szrj we detect it here. */
313138fd1498Szrj if (should_have_column_data_p (input_locs[0])
313238fd1498Szrj && !should_have_column_data_p (input_locs[4]))
313338fd1498Szrj {
313438fd1498Szrj /* Verify that get_source_range_for_substring gracefully rejects
313538fd1498Szrj this case. */
313638fd1498Szrj source_range actual_range;
313738fd1498Szrj const char *err
313838fd1498Szrj = get_source_range_for_char (test.m_parser, &test.m_concats,
313938fd1498Szrj initial_loc, type, 0, &actual_range);
314038fd1498Szrj ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
314138fd1498Szrj return;
314238fd1498Szrj }
314338fd1498Szrj
314438fd1498Szrj for (int i = 0; i < 5; i++)
314538fd1498Szrj for (int j = 0; j < 2; j++)
314638fd1498Szrj ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
314738fd1498Szrj i + 1, 10 + j, 10 + j);
314838fd1498Szrj
314938fd1498Szrj /* NUL-terminator should use the final closing quote at line 5 column 12. */
315038fd1498Szrj ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
315138fd1498Szrj
315238fd1498Szrj ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
315338fd1498Szrj }
315438fd1498Szrj
315538fd1498Szrj /* Another test of string literal concatenation, this time combined with
315638fd1498Szrj various kinds of escaped characters. */
315738fd1498Szrj
315838fd1498Szrj static void
test_lexer_string_locations_concatenation_3(const line_table_case & case_)315938fd1498Szrj test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
316038fd1498Szrj {
316138fd1498Szrj /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
316238fd1498Szrj digit 6 in ASCII as octal "\066", concatenating multiple strings. */
316338fd1498Szrj const char *content
316438fd1498Szrj /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
316538fd1498Szrj .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
316638fd1498Szrj = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
316738fd1498Szrj lexer_test test (case_, content, NULL);
316838fd1498Szrj
316938fd1498Szrj auto_vec <cpp_string> input_strings;
317038fd1498Szrj location_t input_locs[4];
317138fd1498Szrj
317238fd1498Szrj /* Verify that we get the expected tokens back. */
317338fd1498Szrj for (int i = 0; i < 4; i++)
317438fd1498Szrj {
317538fd1498Szrj const cpp_token *tok = test.get_token ();
317638fd1498Szrj ASSERT_EQ (tok->type, CPP_STRING);
317738fd1498Szrj input_strings.safe_push (tok->val.str);
317838fd1498Szrj input_locs[i] = tok->src_loc;
317938fd1498Szrj }
318038fd1498Szrj
318138fd1498Szrj /* Verify that cpp_interpret_string works. */
318238fd1498Szrj cpp_string dst_string;
318338fd1498Szrj const enum cpp_ttype type = CPP_STRING;
318438fd1498Szrj bool result = cpp_interpret_string (test.m_parser,
318538fd1498Szrj input_strings.address (), 4,
318638fd1498Szrj &dst_string, type);
318738fd1498Szrj ASSERT_TRUE (result);
318838fd1498Szrj ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
318938fd1498Szrj free (const_cast <unsigned char *> (dst_string.text));
319038fd1498Szrj
319138fd1498Szrj /* Simulate c-lex.c's lex_string in order to record concatenation. */
319238fd1498Szrj test.m_concats.record_string_concatenation (4, input_locs);
319338fd1498Szrj
319438fd1498Szrj location_t initial_loc = input_locs[0];
319538fd1498Szrj
319638fd1498Szrj for (int i = 0; i <= 4; i++)
319738fd1498Szrj ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
319838fd1498Szrj ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
319938fd1498Szrj ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
320038fd1498Szrj for (int i = 7; i <= 9; i++)
320138fd1498Szrj ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
320238fd1498Szrj
320338fd1498Szrj /* NUL-terminator should use the location of the final closing quote. */
320438fd1498Szrj ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
320538fd1498Szrj
320638fd1498Szrj ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
320738fd1498Szrj }
320838fd1498Szrj
320938fd1498Szrj /* Test of string literal in a macro. */
321038fd1498Szrj
321138fd1498Szrj static void
test_lexer_string_locations_macro(const line_table_case & case_)321238fd1498Szrj test_lexer_string_locations_macro (const line_table_case &case_)
321338fd1498Szrj {
321438fd1498Szrj /* Digits 0-9.
321538fd1498Szrj .....................0000000001111111111.22222222223.
321638fd1498Szrj .....................1234567890123456789.01234567890. */
321738fd1498Szrj const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
321838fd1498Szrj " MACRO");
321938fd1498Szrj lexer_test test (case_, content, NULL);
322038fd1498Szrj
322138fd1498Szrj /* Verify that we get the expected tokens back. */
322238fd1498Szrj const cpp_token *tok = test.get_token ();
322338fd1498Szrj ASSERT_EQ (tok->type, CPP_PADDING);
322438fd1498Szrj
322538fd1498Szrj tok = test.get_token ();
322638fd1498Szrj ASSERT_EQ (tok->type, CPP_STRING);
322738fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
322838fd1498Szrj
322938fd1498Szrj /* Verify ranges of individual characters. We ought to
323038fd1498Szrj see columns within the macro definition. */
323138fd1498Szrj for (int i = 0; i <= 10; i++)
323238fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
323338fd1498Szrj i, 1, 20 + i, 20 + i);
323438fd1498Szrj
323538fd1498Szrj ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
323638fd1498Szrj
323738fd1498Szrj tok = test.get_token ();
323838fd1498Szrj ASSERT_EQ (tok->type, CPP_PADDING);
323938fd1498Szrj }
324038fd1498Szrj
324138fd1498Szrj /* Test of stringification of a macro argument. */
324238fd1498Szrj
324338fd1498Szrj static void
test_lexer_string_locations_stringified_macro_argument(const line_table_case & case_)324438fd1498Szrj test_lexer_string_locations_stringified_macro_argument
324538fd1498Szrj (const line_table_case &case_)
324638fd1498Szrj {
324738fd1498Szrj /* .....................000000000111111111122222222223.
324838fd1498Szrj .....................123456789012345678901234567890. */
324938fd1498Szrj const char *content = ("#define MACRO(X) #X /* non-str */\n"
325038fd1498Szrj "MACRO(foo)\n");
325138fd1498Szrj lexer_test test (case_, content, NULL);
325238fd1498Szrj
325338fd1498Szrj /* Verify that we get the expected token back. */
325438fd1498Szrj const cpp_token *tok = test.get_token ();
325538fd1498Szrj ASSERT_EQ (tok->type, CPP_PADDING);
325638fd1498Szrj
325738fd1498Szrj tok = test.get_token ();
325838fd1498Szrj ASSERT_EQ (tok->type, CPP_STRING);
325938fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
326038fd1498Szrj
326138fd1498Szrj /* We don't support getting the location of a stringified macro
326238fd1498Szrj argument. Verify that it fails gracefully. */
326338fd1498Szrj ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
326438fd1498Szrj "cpp_interpret_string_1 failed");
326538fd1498Szrj
326638fd1498Szrj tok = test.get_token ();
326738fd1498Szrj ASSERT_EQ (tok->type, CPP_PADDING);
326838fd1498Szrj
326938fd1498Szrj tok = test.get_token ();
327038fd1498Szrj ASSERT_EQ (tok->type, CPP_PADDING);
327138fd1498Szrj }
327238fd1498Szrj
327338fd1498Szrj /* Ensure that we are fail gracefully if something attempts to pass
327438fd1498Szrj in a location that isn't a string literal token. Seen on this code:
327538fd1498Szrj
327638fd1498Szrj const char a[] = " %d ";
327738fd1498Szrj __builtin_printf (a, 0.5);
327838fd1498Szrj ^
327938fd1498Szrj
328038fd1498Szrj when c-format.c erroneously used the indicated one-character
328138fd1498Szrj location as the format string location, leading to a read past the
328238fd1498Szrj end of a string buffer in cpp_interpret_string_1. */
328338fd1498Szrj
328438fd1498Szrj static void
test_lexer_string_locations_non_string(const line_table_case & case_)328538fd1498Szrj test_lexer_string_locations_non_string (const line_table_case &case_)
328638fd1498Szrj {
328738fd1498Szrj /* .....................000000000111111111122222222223.
328838fd1498Szrj .....................123456789012345678901234567890. */
328938fd1498Szrj const char *content = (" a\n");
329038fd1498Szrj lexer_test test (case_, content, NULL);
329138fd1498Szrj
329238fd1498Szrj /* Verify that we get the expected token back. */
329338fd1498Szrj const cpp_token *tok = test.get_token ();
329438fd1498Szrj ASSERT_EQ (tok->type, CPP_NAME);
329538fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
329638fd1498Szrj
329738fd1498Szrj /* At this point, libcpp is attempting to interpret the name as a
329838fd1498Szrj string literal, despite it not starting with a quote. We don't detect
329938fd1498Szrj that, but we should at least fail gracefully. */
330038fd1498Szrj ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
330138fd1498Szrj "cpp_interpret_string_1 failed");
330238fd1498Szrj }
330338fd1498Szrj
330438fd1498Szrj /* Ensure that we can read substring information for a token which
330538fd1498Szrj starts in one linemap and ends in another . Adapted from
330638fd1498Szrj gcc.dg/cpp/pr69985.c. */
330738fd1498Szrj
330838fd1498Szrj static void
test_lexer_string_locations_long_line(const line_table_case & case_)330938fd1498Szrj test_lexer_string_locations_long_line (const line_table_case &case_)
331038fd1498Szrj {
331138fd1498Szrj /* .....................000000.000111111111
331238fd1498Szrj .....................123456.789012346789. */
331338fd1498Szrj const char *content = ("/* A very long line, so that we start a new line map. */\n"
331438fd1498Szrj " \"0123456789012345678901234567890123456789"
331538fd1498Szrj "0123456789012345678901234567890123456789"
331638fd1498Szrj "0123456789012345678901234567890123456789"
331738fd1498Szrj "0123456789\"\n");
331838fd1498Szrj
331938fd1498Szrj lexer_test test (case_, content, NULL);
332038fd1498Szrj
332138fd1498Szrj /* Verify that we get the expected token back. */
332238fd1498Szrj const cpp_token *tok = test.get_token ();
332338fd1498Szrj ASSERT_EQ (tok->type, CPP_STRING);
332438fd1498Szrj
332538fd1498Szrj if (!should_have_column_data_p (line_table->highest_location))
332638fd1498Szrj return;
332738fd1498Szrj
332838fd1498Szrj /* Verify ranges of individual characters. */
332938fd1498Szrj ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
333038fd1498Szrj for (int i = 0; i < 131; i++)
333138fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
333238fd1498Szrj i, 2, 7 + i, 7 + i);
333338fd1498Szrj }
333438fd1498Szrj
333538fd1498Szrj /* Test of locations within a raw string that doesn't contain a newline. */
333638fd1498Szrj
333738fd1498Szrj static void
test_lexer_string_locations_raw_string_one_line(const line_table_case & case_)333838fd1498Szrj test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
333938fd1498Szrj {
334038fd1498Szrj /* .....................00.0000000111111111122.
334138fd1498Szrj .....................12.3456789012345678901. */
334238fd1498Szrj const char *content = ("R\"foo(0123456789)foo\"\n");
334338fd1498Szrj lexer_test test (case_, content, NULL);
334438fd1498Szrj
334538fd1498Szrj /* Verify that we get the expected token back. */
334638fd1498Szrj const cpp_token *tok = test.get_token ();
334738fd1498Szrj ASSERT_EQ (tok->type, CPP_STRING);
334838fd1498Szrj
334938fd1498Szrj /* Verify that cpp_interpret_string works. */
335038fd1498Szrj cpp_string dst_string;
335138fd1498Szrj const enum cpp_ttype type = CPP_STRING;
335238fd1498Szrj bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
335338fd1498Szrj &dst_string, type);
335438fd1498Szrj ASSERT_TRUE (result);
335538fd1498Szrj ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
335638fd1498Szrj free (const_cast <unsigned char *> (dst_string.text));
335738fd1498Szrj
335838fd1498Szrj if (!should_have_column_data_p (line_table->highest_location))
335938fd1498Szrj return;
336038fd1498Szrj
336138fd1498Szrj /* 0-9, plus the nil terminator. */
336238fd1498Szrj ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
336338fd1498Szrj for (int i = 0; i < 11; i++)
336438fd1498Szrj ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
336538fd1498Szrj i, 1, 7 + i, 7 + i);
336638fd1498Szrj }
336738fd1498Szrj
336838fd1498Szrj /* Test of locations within a raw string that contains a newline. */
336938fd1498Szrj
337038fd1498Szrj static void
test_lexer_string_locations_raw_string_multiline(const line_table_case & case_)337138fd1498Szrj test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
337238fd1498Szrj {
337338fd1498Szrj /* .....................00.0000.
337438fd1498Szrj .....................12.3456. */
337538fd1498Szrj const char *content = ("R\"foo(\n"
337638fd1498Szrj /* .....................00000.
337738fd1498Szrj .....................12345. */
337838fd1498Szrj "hello\n"
337938fd1498Szrj "world\n"
338038fd1498Szrj /* .....................00000.
338138fd1498Szrj .....................12345. */
338238fd1498Szrj ")foo\"\n");
338338fd1498Szrj lexer_test test (case_, content, NULL);
338438fd1498Szrj
338538fd1498Szrj /* Verify that we get the expected token back. */
338638fd1498Szrj const cpp_token *tok = test.get_token ();
338738fd1498Szrj ASSERT_EQ (tok->type, CPP_STRING);
338838fd1498Szrj
338938fd1498Szrj /* Verify that cpp_interpret_string works. */
339038fd1498Szrj cpp_string dst_string;
339138fd1498Szrj const enum cpp_ttype type = CPP_STRING;
339238fd1498Szrj bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
339338fd1498Szrj &dst_string, type);
339438fd1498Szrj ASSERT_TRUE (result);
339538fd1498Szrj ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
339638fd1498Szrj free (const_cast <unsigned char *> (dst_string.text));
339738fd1498Szrj
339838fd1498Szrj if (!should_have_column_data_p (line_table->highest_location))
339938fd1498Szrj return;
340038fd1498Szrj
340138fd1498Szrj /* Currently we don't support locations within raw strings that
340238fd1498Szrj contain newlines. */
340338fd1498Szrj ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
340438fd1498Szrj "range endpoints are on different lines");
340538fd1498Szrj }
340638fd1498Szrj
340738fd1498Szrj /* Test of parsing an unterminated raw string. */
340838fd1498Szrj
340938fd1498Szrj static void
test_lexer_string_locations_raw_string_unterminated(const line_table_case & case_)341038fd1498Szrj test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
341138fd1498Szrj {
341238fd1498Szrj const char *content = "R\"ouch()ouCh\" /* etc */";
341338fd1498Szrj
341438fd1498Szrj lexer_error_sink errors;
341538fd1498Szrj lexer_test test (case_, content, &errors);
341638fd1498Szrj test.m_implicitly_expect_EOF = false;
341738fd1498Szrj
341838fd1498Szrj /* Attempt to parse the raw string. */
341938fd1498Szrj const cpp_token *tok = test.get_token ();
342038fd1498Szrj ASSERT_EQ (tok->type, CPP_EOF);
342138fd1498Szrj
342238fd1498Szrj ASSERT_EQ (1, errors.m_errors.length ());
342338fd1498Szrj /* We expect the message "unterminated raw string"
342438fd1498Szrj in the "cpplib" translation domain.
342538fd1498Szrj It's not clear that dgettext is available on all supported hosts,
342638fd1498Szrj so this assertion is commented-out for now.
342738fd1498Szrj ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
342838fd1498Szrj errors.m_errors[0]);
342938fd1498Szrj */
343038fd1498Szrj }
343138fd1498Szrj
343238fd1498Szrj /* Test of lexing char constants. */
343338fd1498Szrj
343438fd1498Szrj static void
test_lexer_char_constants(const line_table_case & case_)343538fd1498Szrj test_lexer_char_constants (const line_table_case &case_)
343638fd1498Szrj {
343738fd1498Szrj /* Various char constants.
343838fd1498Szrj .....................0000000001111111111.22222222223.
343938fd1498Szrj .....................1234567890123456789.01234567890. */
344038fd1498Szrj const char *content = (" 'a'\n"
344138fd1498Szrj " u'a'\n"
344238fd1498Szrj " U'a'\n"
344338fd1498Szrj " L'a'\n"
344438fd1498Szrj " 'abc'\n");
344538fd1498Szrj lexer_test test (case_, content, NULL);
344638fd1498Szrj
344738fd1498Szrj /* Verify that we get the expected tokens back. */
344838fd1498Szrj /* 'a'. */
344938fd1498Szrj const cpp_token *tok = test.get_token ();
345038fd1498Szrj ASSERT_EQ (tok->type, CPP_CHAR);
345138fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
345238fd1498Szrj
345338fd1498Szrj unsigned int chars_seen;
345438fd1498Szrj int unsignedp;
345538fd1498Szrj cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
345638fd1498Szrj &chars_seen, &unsignedp);
345738fd1498Szrj ASSERT_EQ (cc, 'a');
345838fd1498Szrj ASSERT_EQ (chars_seen, 1);
345938fd1498Szrj
346038fd1498Szrj /* u'a'. */
346138fd1498Szrj tok = test.get_token ();
346238fd1498Szrj ASSERT_EQ (tok->type, CPP_CHAR16);
346338fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
346438fd1498Szrj
346538fd1498Szrj /* U'a'. */
346638fd1498Szrj tok = test.get_token ();
346738fd1498Szrj ASSERT_EQ (tok->type, CPP_CHAR32);
346838fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
346938fd1498Szrj
347038fd1498Szrj /* L'a'. */
347138fd1498Szrj tok = test.get_token ();
347238fd1498Szrj ASSERT_EQ (tok->type, CPP_WCHAR);
347338fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
347438fd1498Szrj
347538fd1498Szrj /* 'abc' (c-char-sequence). */
347638fd1498Szrj tok = test.get_token ();
347738fd1498Szrj ASSERT_EQ (tok->type, CPP_CHAR);
347838fd1498Szrj ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
347938fd1498Szrj }
348038fd1498Szrj /* A table of interesting location_t values, giving one axis of our test
348138fd1498Szrj matrix. */
348238fd1498Szrj
348338fd1498Szrj static const location_t boundary_locations[] = {
348438fd1498Szrj /* Zero means "don't override the default values for a new line_table". */
348538fd1498Szrj 0,
348638fd1498Szrj
348738fd1498Szrj /* An arbitrary non-zero value that isn't close to one of
348838fd1498Szrj the boundary values below. */
348938fd1498Szrj 0x10000,
349038fd1498Szrj
349138fd1498Szrj /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
349238fd1498Szrj LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
349338fd1498Szrj LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
349438fd1498Szrj LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
349538fd1498Szrj LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
349638fd1498Szrj LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
349738fd1498Szrj
349838fd1498Szrj /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
349938fd1498Szrj LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
350038fd1498Szrj LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
350138fd1498Szrj LINE_MAP_MAX_LOCATION_WITH_COLS,
350238fd1498Szrj LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
350338fd1498Szrj LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
350438fd1498Szrj };
350538fd1498Szrj
350638fd1498Szrj /* Run TESTCASE multiple times, once for each case in our test matrix. */
350738fd1498Szrj
350838fd1498Szrj void
for_each_line_table_case(void (* testcase)(const line_table_case &))350938fd1498Szrj for_each_line_table_case (void (*testcase) (const line_table_case &))
351038fd1498Szrj {
351138fd1498Szrj /* As noted above in the description of struct line_table_case,
351238fd1498Szrj we want to explore a test matrix of interesting line_table
351338fd1498Szrj situations, running various selftests for each case within the
351438fd1498Szrj matrix. */
351538fd1498Szrj
351638fd1498Szrj /* Run all tests with:
351738fd1498Szrj (a) line_table->default_range_bits == 0, and
351838fd1498Szrj (b) line_table->default_range_bits == 5. */
351938fd1498Szrj int num_cases_tested = 0;
352038fd1498Szrj for (int default_range_bits = 0; default_range_bits <= 5;
352138fd1498Szrj default_range_bits += 5)
352238fd1498Szrj {
352338fd1498Szrj /* ...and use each of the "interesting" location values as
352438fd1498Szrj the starting location within line_table. */
352538fd1498Szrj const int num_boundary_locations
352638fd1498Szrj = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
352738fd1498Szrj for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
352838fd1498Szrj {
352938fd1498Szrj line_table_case c (default_range_bits, boundary_locations[loc_idx]);
353038fd1498Szrj
353138fd1498Szrj testcase (c);
353238fd1498Szrj
353338fd1498Szrj num_cases_tested++;
353438fd1498Szrj }
353538fd1498Szrj }
353638fd1498Szrj
353738fd1498Szrj /* Verify that we fully covered the test matrix. */
353838fd1498Szrj ASSERT_EQ (num_cases_tested, 2 * 12);
353938fd1498Szrj }
354038fd1498Szrj
3541*e215fc28Szrj /* Verify that when presented with a consecutive pair of locations with
3542*e215fc28Szrj a very large line offset, we don't attempt to consolidate them into
3543*e215fc28Szrj a single ordinary linemap where the line offsets within the line map
3544*e215fc28Szrj would lead to overflow (PR lto/88147). */
3545*e215fc28Szrj
3546*e215fc28Szrj static void
test_line_offset_overflow()3547*e215fc28Szrj test_line_offset_overflow ()
3548*e215fc28Szrj {
3549*e215fc28Szrj line_table_test ltt (line_table_case (5, 0));
3550*e215fc28Szrj
3551*e215fc28Szrj linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3552*e215fc28Szrj linemap_line_start (line_table, 1, 100);
3553*e215fc28Szrj location_t loc_a = linemap_line_start (line_table, 2578, 255);
3554*e215fc28Szrj assert_loceq ("foo.c", 2578, 0, loc_a);
3555*e215fc28Szrj
3556*e215fc28Szrj const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3557*e215fc28Szrj ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3558*e215fc28Szrj ASSERT_EQ (ordmap_a->m_range_bits, 5);
3559*e215fc28Szrj
3560*e215fc28Szrj location_t loc_b = linemap_line_start (line_table, 404198, 512);
3561*e215fc28Szrj assert_loceq ("foo.c", 404198, 0, loc_b);
3562*e215fc28Szrj
3563*e215fc28Szrj /* We should have started a new linemap, rather than attempting to store
3564*e215fc28Szrj a very large line offset. */
3565*e215fc28Szrj const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3566*e215fc28Szrj ASSERT_NE (ordmap_a, ordmap_b);
3567*e215fc28Szrj }
3568*e215fc28Szrj
356938fd1498Szrj /* Run all of the selftests within this file. */
357038fd1498Szrj
357138fd1498Szrj void
input_c_tests()357238fd1498Szrj input_c_tests ()
357338fd1498Szrj {
357438fd1498Szrj test_linenum_comparisons ();
357538fd1498Szrj test_should_have_column_data_p ();
357638fd1498Szrj test_unknown_location ();
357738fd1498Szrj test_builtins ();
357838fd1498Szrj for_each_line_table_case (test_make_location_nonpure_range_endpoints);
357938fd1498Szrj
358038fd1498Szrj for_each_line_table_case (test_accessing_ordinary_linemaps);
358138fd1498Szrj for_each_line_table_case (test_lexer);
358238fd1498Szrj for_each_line_table_case (test_lexer_string_locations_simple);
358338fd1498Szrj for_each_line_table_case (test_lexer_string_locations_ebcdic);
358438fd1498Szrj for_each_line_table_case (test_lexer_string_locations_hex);
358538fd1498Szrj for_each_line_table_case (test_lexer_string_locations_oct);
358638fd1498Szrj for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
358738fd1498Szrj for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
358838fd1498Szrj for_each_line_table_case (test_lexer_string_locations_ucn4);
358938fd1498Szrj for_each_line_table_case (test_lexer_string_locations_ucn8);
359038fd1498Szrj for_each_line_table_case (test_lexer_string_locations_wide_string);
359138fd1498Szrj for_each_line_table_case (test_lexer_string_locations_string16);
359238fd1498Szrj for_each_line_table_case (test_lexer_string_locations_string32);
359338fd1498Szrj for_each_line_table_case (test_lexer_string_locations_u8);
359438fd1498Szrj for_each_line_table_case (test_lexer_string_locations_utf8_source);
359538fd1498Szrj for_each_line_table_case (test_lexer_string_locations_concatenation_1);
359638fd1498Szrj for_each_line_table_case (test_lexer_string_locations_concatenation_2);
359738fd1498Szrj for_each_line_table_case (test_lexer_string_locations_concatenation_3);
359838fd1498Szrj for_each_line_table_case (test_lexer_string_locations_macro);
359938fd1498Szrj for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
360038fd1498Szrj for_each_line_table_case (test_lexer_string_locations_non_string);
360138fd1498Szrj for_each_line_table_case (test_lexer_string_locations_long_line);
360238fd1498Szrj for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
360338fd1498Szrj for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
360438fd1498Szrj for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
360538fd1498Szrj for_each_line_table_case (test_lexer_char_constants);
360638fd1498Szrj
360738fd1498Szrj test_reading_source_line ();
3608*e215fc28Szrj
3609*e215fc28Szrj test_line_offset_overflow ();
361038fd1498Szrj }
361138fd1498Szrj
361238fd1498Szrj } // namespace selftest
361338fd1498Szrj
361438fd1498Szrj #endif /* CHECKING_P */
3615