1*e4b17023SJohn Marino /* CPP Library - lexical analysis.
2*e4b17023SJohn Marino Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010,
3*e4b17023SJohn Marino 2011 Free Software Foundation, Inc.
4*e4b17023SJohn Marino Contributed by Per Bothner, 1994-95.
5*e4b17023SJohn Marino Based on CCCP program by Paul Rubin, June 1986
6*e4b17023SJohn Marino Adapted to ANSI C, Richard Stallman, Jan 1987
7*e4b17023SJohn Marino Broken out to separate file, Zack Weinberg, Mar 2000
8*e4b17023SJohn Marino
9*e4b17023SJohn Marino This program is free software; you can redistribute it and/or modify it
10*e4b17023SJohn Marino under the terms of the GNU General Public License as published by the
11*e4b17023SJohn Marino Free Software Foundation; either version 3, or (at your option) any
12*e4b17023SJohn Marino later version.
13*e4b17023SJohn Marino
14*e4b17023SJohn Marino This program is distributed in the hope that it will be useful,
15*e4b17023SJohn Marino but WITHOUT ANY WARRANTY; without even the implied warranty of
16*e4b17023SJohn Marino MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17*e4b17023SJohn Marino GNU General Public License for more details.
18*e4b17023SJohn Marino
19*e4b17023SJohn Marino You should have received a copy of the GNU General Public License
20*e4b17023SJohn Marino along with this program; see the file COPYING3. If not see
21*e4b17023SJohn Marino <http://www.gnu.org/licenses/>. */
22*e4b17023SJohn Marino
23*e4b17023SJohn Marino #include "config.h"
24*e4b17023SJohn Marino #include "system.h"
25*e4b17023SJohn Marino #include "cpplib.h"
26*e4b17023SJohn Marino #include "internal.h"
27*e4b17023SJohn Marino
28*e4b17023SJohn Marino enum spell_type
29*e4b17023SJohn Marino {
30*e4b17023SJohn Marino SPELL_OPERATOR = 0,
31*e4b17023SJohn Marino SPELL_IDENT,
32*e4b17023SJohn Marino SPELL_LITERAL,
33*e4b17023SJohn Marino SPELL_NONE
34*e4b17023SJohn Marino };
35*e4b17023SJohn Marino
36*e4b17023SJohn Marino struct token_spelling
37*e4b17023SJohn Marino {
38*e4b17023SJohn Marino enum spell_type category;
39*e4b17023SJohn Marino const unsigned char *name;
40*e4b17023SJohn Marino };
41*e4b17023SJohn Marino
42*e4b17023SJohn Marino static const unsigned char *const digraph_spellings[] =
43*e4b17023SJohn Marino { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
44*e4b17023SJohn Marino
45*e4b17023SJohn Marino #define OP(e, s) { SPELL_OPERATOR, UC s },
46*e4b17023SJohn Marino #define TK(e, s) { SPELL_ ## s, UC #e },
47*e4b17023SJohn Marino static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
48*e4b17023SJohn Marino #undef OP
49*e4b17023SJohn Marino #undef TK
50*e4b17023SJohn Marino
51*e4b17023SJohn Marino #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
52*e4b17023SJohn Marino #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53*e4b17023SJohn Marino
54*e4b17023SJohn Marino static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
55*e4b17023SJohn Marino static int skip_line_comment (cpp_reader *);
56*e4b17023SJohn Marino static void skip_whitespace (cpp_reader *, cppchar_t);
57*e4b17023SJohn Marino static void lex_string (cpp_reader *, cpp_token *, const uchar *);
58*e4b17023SJohn Marino static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
59*e4b17023SJohn Marino static void store_comment (cpp_reader *, cpp_token *);
60*e4b17023SJohn Marino static void create_literal (cpp_reader *, cpp_token *, const uchar *,
61*e4b17023SJohn Marino unsigned int, enum cpp_ttype);
62*e4b17023SJohn Marino static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
63*e4b17023SJohn Marino static int name_p (cpp_reader *, const cpp_string *);
64*e4b17023SJohn Marino static tokenrun *next_tokenrun (tokenrun *);
65*e4b17023SJohn Marino
66*e4b17023SJohn Marino static _cpp_buff *new_buff (size_t);
67*e4b17023SJohn Marino
68*e4b17023SJohn Marino
69*e4b17023SJohn Marino /* Utility routine:
70*e4b17023SJohn Marino
71*e4b17023SJohn Marino Compares, the token TOKEN to the NUL-terminated string STRING.
72*e4b17023SJohn Marino TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
73*e4b17023SJohn Marino int
cpp_ideq(const cpp_token * token,const char * string)74*e4b17023SJohn Marino cpp_ideq (const cpp_token *token, const char *string)
75*e4b17023SJohn Marino {
76*e4b17023SJohn Marino if (token->type != CPP_NAME)
77*e4b17023SJohn Marino return 0;
78*e4b17023SJohn Marino
79*e4b17023SJohn Marino return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
80*e4b17023SJohn Marino }
81*e4b17023SJohn Marino
82*e4b17023SJohn Marino /* Record a note TYPE at byte POS into the current cleaned logical
83*e4b17023SJohn Marino line. */
84*e4b17023SJohn Marino static void
add_line_note(cpp_buffer * buffer,const uchar * pos,unsigned int type)85*e4b17023SJohn Marino add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
86*e4b17023SJohn Marino {
87*e4b17023SJohn Marino if (buffer->notes_used == buffer->notes_cap)
88*e4b17023SJohn Marino {
89*e4b17023SJohn Marino buffer->notes_cap = buffer->notes_cap * 2 + 200;
90*e4b17023SJohn Marino buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
91*e4b17023SJohn Marino buffer->notes_cap);
92*e4b17023SJohn Marino }
93*e4b17023SJohn Marino
94*e4b17023SJohn Marino buffer->notes[buffer->notes_used].pos = pos;
95*e4b17023SJohn Marino buffer->notes[buffer->notes_used].type = type;
96*e4b17023SJohn Marino buffer->notes_used++;
97*e4b17023SJohn Marino }
98*e4b17023SJohn Marino
99*e4b17023SJohn Marino
100*e4b17023SJohn Marino /* Fast path to find line special characters using optimized character
101*e4b17023SJohn Marino scanning algorithms. Anything complicated falls back to the slow
102*e4b17023SJohn Marino path below. Since this loop is very hot it's worth doing these kinds
103*e4b17023SJohn Marino of optimizations.
104*e4b17023SJohn Marino
105*e4b17023SJohn Marino One of the paths through the ifdefs should provide
106*e4b17023SJohn Marino
107*e4b17023SJohn Marino const uchar *search_line_fast (const uchar *s, const uchar *end);
108*e4b17023SJohn Marino
109*e4b17023SJohn Marino Between S and END, search for \n, \r, \\, ?. Return a pointer to
110*e4b17023SJohn Marino the found character.
111*e4b17023SJohn Marino
112*e4b17023SJohn Marino Note that the last character of the buffer is *always* a newline,
113*e4b17023SJohn Marino as forced by _cpp_convert_input. This fact can be used to avoid
114*e4b17023SJohn Marino explicitly looking for the end of the buffer. */
115*e4b17023SJohn Marino
116*e4b17023SJohn Marino /* Configure gives us an ifdef test. */
117*e4b17023SJohn Marino #ifndef WORDS_BIGENDIAN
118*e4b17023SJohn Marino #define WORDS_BIGENDIAN 0
119*e4b17023SJohn Marino #endif
120*e4b17023SJohn Marino
121*e4b17023SJohn Marino /* We'd like the largest integer that fits into a register. There's nothing
122*e4b17023SJohn Marino in <stdint.h> that gives us that. For most hosts this is unsigned long,
123*e4b17023SJohn Marino but MS decided on an LLP64 model. Thankfully when building with GCC we
124*e4b17023SJohn Marino can get the "real" word size. */
125*e4b17023SJohn Marino #ifdef __GNUC__
126*e4b17023SJohn Marino typedef unsigned int word_type __attribute__((__mode__(__word__)));
127*e4b17023SJohn Marino #else
128*e4b17023SJohn Marino typedef unsigned long word_type;
129*e4b17023SJohn Marino #endif
130*e4b17023SJohn Marino
131*e4b17023SJohn Marino /* The code below is only expecting sizes 4 or 8.
132*e4b17023SJohn Marino Die at compile-time if this expectation is violated. */
133*e4b17023SJohn Marino typedef char check_word_type_size
134*e4b17023SJohn Marino [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
135*e4b17023SJohn Marino
136*e4b17023SJohn Marino /* Return X with the first N bytes forced to values that won't match one
137*e4b17023SJohn Marino of the interesting characters. Note that NUL is not interesting. */
138*e4b17023SJohn Marino
139*e4b17023SJohn Marino static inline word_type
acc_char_mask_misalign(word_type val,unsigned int n)140*e4b17023SJohn Marino acc_char_mask_misalign (word_type val, unsigned int n)
141*e4b17023SJohn Marino {
142*e4b17023SJohn Marino word_type mask = -1;
143*e4b17023SJohn Marino if (WORDS_BIGENDIAN)
144*e4b17023SJohn Marino mask >>= n * 8;
145*e4b17023SJohn Marino else
146*e4b17023SJohn Marino mask <<= n * 8;
147*e4b17023SJohn Marino return val & mask;
148*e4b17023SJohn Marino }
149*e4b17023SJohn Marino
150*e4b17023SJohn Marino /* Return X replicated to all byte positions within WORD_TYPE. */
151*e4b17023SJohn Marino
152*e4b17023SJohn Marino static inline word_type
acc_char_replicate(uchar x)153*e4b17023SJohn Marino acc_char_replicate (uchar x)
154*e4b17023SJohn Marino {
155*e4b17023SJohn Marino word_type ret;
156*e4b17023SJohn Marino
157*e4b17023SJohn Marino ret = (x << 24) | (x << 16) | (x << 8) | x;
158*e4b17023SJohn Marino if (sizeof(word_type) == 8)
159*e4b17023SJohn Marino ret = (ret << 16 << 16) | ret;
160*e4b17023SJohn Marino return ret;
161*e4b17023SJohn Marino }
162*e4b17023SJohn Marino
163*e4b17023SJohn Marino /* Return non-zero if some byte of VAL is (probably) C. */
164*e4b17023SJohn Marino
165*e4b17023SJohn Marino static inline word_type
acc_char_cmp(word_type val,word_type c)166*e4b17023SJohn Marino acc_char_cmp (word_type val, word_type c)
167*e4b17023SJohn Marino {
168*e4b17023SJohn Marino #if defined(__GNUC__) && defined(__alpha__)
169*e4b17023SJohn Marino /* We can get exact results using a compare-bytes instruction.
170*e4b17023SJohn Marino Get (val == c) via (0 >= (val ^ c)). */
171*e4b17023SJohn Marino return __builtin_alpha_cmpbge (0, val ^ c);
172*e4b17023SJohn Marino #else
173*e4b17023SJohn Marino word_type magic = 0x7efefefeU;
174*e4b17023SJohn Marino if (sizeof(word_type) == 8)
175*e4b17023SJohn Marino magic = (magic << 16 << 16) | 0xfefefefeU;
176*e4b17023SJohn Marino magic |= 1;
177*e4b17023SJohn Marino
178*e4b17023SJohn Marino val ^= c;
179*e4b17023SJohn Marino return ((val + magic) ^ ~val) & ~magic;
180*e4b17023SJohn Marino #endif
181*e4b17023SJohn Marino }
182*e4b17023SJohn Marino
183*e4b17023SJohn Marino /* Given the result of acc_char_cmp is non-zero, return the index of
184*e4b17023SJohn Marino the found character. If this was a false positive, return -1. */
185*e4b17023SJohn Marino
186*e4b17023SJohn Marino static inline int
acc_char_index(word_type cmp ATTRIBUTE_UNUSED,word_type val ATTRIBUTE_UNUSED)187*e4b17023SJohn Marino acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
188*e4b17023SJohn Marino word_type val ATTRIBUTE_UNUSED)
189*e4b17023SJohn Marino {
190*e4b17023SJohn Marino #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
191*e4b17023SJohn Marino /* The cmpbge instruction sets *bits* of the result corresponding to
192*e4b17023SJohn Marino matches in the bytes with no false positives. */
193*e4b17023SJohn Marino return __builtin_ctzl (cmp);
194*e4b17023SJohn Marino #else
195*e4b17023SJohn Marino unsigned int i;
196*e4b17023SJohn Marino
197*e4b17023SJohn Marino /* ??? It would be nice to force unrolling here,
198*e4b17023SJohn Marino and have all of these constants folded. */
199*e4b17023SJohn Marino for (i = 0; i < sizeof(word_type); ++i)
200*e4b17023SJohn Marino {
201*e4b17023SJohn Marino uchar c;
202*e4b17023SJohn Marino if (WORDS_BIGENDIAN)
203*e4b17023SJohn Marino c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
204*e4b17023SJohn Marino else
205*e4b17023SJohn Marino c = (val >> i * 8) & 0xff;
206*e4b17023SJohn Marino
207*e4b17023SJohn Marino if (c == '\n' || c == '\r' || c == '\\' || c == '?')
208*e4b17023SJohn Marino return i;
209*e4b17023SJohn Marino }
210*e4b17023SJohn Marino
211*e4b17023SJohn Marino return -1;
212*e4b17023SJohn Marino #endif
213*e4b17023SJohn Marino }
214*e4b17023SJohn Marino
215*e4b17023SJohn Marino /* A version of the fast scanner using bit fiddling techniques.
216*e4b17023SJohn Marino
217*e4b17023SJohn Marino For 32-bit words, one would normally perform 16 comparisons and
218*e4b17023SJohn Marino 16 branches. With this algorithm one performs 24 arithmetic
219*e4b17023SJohn Marino operations and one branch. Whether this is faster with a 32-bit
220*e4b17023SJohn Marino word size is going to be somewhat system dependent.
221*e4b17023SJohn Marino
222*e4b17023SJohn Marino For 64-bit words, we eliminate twice the number of comparisons
223*e4b17023SJohn Marino and branches without increasing the number of arithmetic operations.
224*e4b17023SJohn Marino It's almost certainly going to be a win with 64-bit word size. */
225*e4b17023SJohn Marino
226*e4b17023SJohn Marino static const uchar * search_line_acc_char (const uchar *, const uchar *)
227*e4b17023SJohn Marino ATTRIBUTE_UNUSED;
228*e4b17023SJohn Marino
229*e4b17023SJohn Marino static const uchar *
search_line_acc_char(const uchar * s,const uchar * end ATTRIBUTE_UNUSED)230*e4b17023SJohn Marino search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
231*e4b17023SJohn Marino {
232*e4b17023SJohn Marino const word_type repl_nl = acc_char_replicate ('\n');
233*e4b17023SJohn Marino const word_type repl_cr = acc_char_replicate ('\r');
234*e4b17023SJohn Marino const word_type repl_bs = acc_char_replicate ('\\');
235*e4b17023SJohn Marino const word_type repl_qm = acc_char_replicate ('?');
236*e4b17023SJohn Marino
237*e4b17023SJohn Marino unsigned int misalign;
238*e4b17023SJohn Marino const word_type *p;
239*e4b17023SJohn Marino word_type val, t;
240*e4b17023SJohn Marino
241*e4b17023SJohn Marino /* Align the buffer. Mask out any bytes from before the beginning. */
242*e4b17023SJohn Marino p = (word_type *)((uintptr_t)s & -sizeof(word_type));
243*e4b17023SJohn Marino val = *p;
244*e4b17023SJohn Marino misalign = (uintptr_t)s & (sizeof(word_type) - 1);
245*e4b17023SJohn Marino if (misalign)
246*e4b17023SJohn Marino val = acc_char_mask_misalign (val, misalign);
247*e4b17023SJohn Marino
248*e4b17023SJohn Marino /* Main loop. */
249*e4b17023SJohn Marino while (1)
250*e4b17023SJohn Marino {
251*e4b17023SJohn Marino t = acc_char_cmp (val, repl_nl);
252*e4b17023SJohn Marino t |= acc_char_cmp (val, repl_cr);
253*e4b17023SJohn Marino t |= acc_char_cmp (val, repl_bs);
254*e4b17023SJohn Marino t |= acc_char_cmp (val, repl_qm);
255*e4b17023SJohn Marino
256*e4b17023SJohn Marino if (__builtin_expect (t != 0, 0))
257*e4b17023SJohn Marino {
258*e4b17023SJohn Marino int i = acc_char_index (t, val);
259*e4b17023SJohn Marino if (i >= 0)
260*e4b17023SJohn Marino return (const uchar *)p + i;
261*e4b17023SJohn Marino }
262*e4b17023SJohn Marino
263*e4b17023SJohn Marino val = *++p;
264*e4b17023SJohn Marino }
265*e4b17023SJohn Marino }
266*e4b17023SJohn Marino
267*e4b17023SJohn Marino /* Disable on Solaris 2/x86 until the following problems can be properly
268*e4b17023SJohn Marino autoconfed:
269*e4b17023SJohn Marino
270*e4b17023SJohn Marino The Solaris 8 assembler cannot assemble SSE2/SSE4.2 insns.
271*e4b17023SJohn Marino The Solaris 9 assembler cannot assemble SSE4.2 insns.
272*e4b17023SJohn Marino Before Solaris 9 Update 6, SSE insns cannot be executed.
273*e4b17023SJohn Marino The Solaris 10+ assembler tags objects with the instruction set
274*e4b17023SJohn Marino extensions used, so SSE4.2 executables cannot run on machines that
275*e4b17023SJohn Marino don't support that extension. */
276*e4b17023SJohn Marino
277*e4b17023SJohn Marino #if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
278*e4b17023SJohn Marino
279*e4b17023SJohn Marino /* Replicated character data to be shared between implementations.
280*e4b17023SJohn Marino Recall that outside of a context with vector support we can't
281*e4b17023SJohn Marino define compatible vector types, therefore these are all defined
282*e4b17023SJohn Marino in terms of raw characters. */
283*e4b17023SJohn Marino static const char repl_chars[4][16] __attribute__((aligned(16))) = {
284*e4b17023SJohn Marino { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
285*e4b17023SJohn Marino '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
286*e4b17023SJohn Marino { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
287*e4b17023SJohn Marino '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
288*e4b17023SJohn Marino { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
289*e4b17023SJohn Marino '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
290*e4b17023SJohn Marino { '?', '?', '?', '?', '?', '?', '?', '?',
291*e4b17023SJohn Marino '?', '?', '?', '?', '?', '?', '?', '?' },
292*e4b17023SJohn Marino };
293*e4b17023SJohn Marino
294*e4b17023SJohn Marino /* A version of the fast scanner using MMX vectorized byte compare insns.
295*e4b17023SJohn Marino
296*e4b17023SJohn Marino This uses the PMOVMSKB instruction which was introduced with "MMX2",
297*e4b17023SJohn Marino which was packaged into SSE1; it is also present in the AMD MMX
298*e4b17023SJohn Marino extension. Mark the function as using "sse" so that we emit a real
299*e4b17023SJohn Marino "emms" instruction, rather than the 3dNOW "femms" instruction. */
300*e4b17023SJohn Marino
301*e4b17023SJohn Marino static const uchar *
302*e4b17023SJohn Marino #ifndef __SSE__
303*e4b17023SJohn Marino __attribute__((__target__("sse")))
304*e4b17023SJohn Marino #endif
search_line_mmx(const uchar * s,const uchar * end ATTRIBUTE_UNUSED)305*e4b17023SJohn Marino search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
306*e4b17023SJohn Marino {
307*e4b17023SJohn Marino typedef char v8qi __attribute__ ((__vector_size__ (8)));
308*e4b17023SJohn Marino typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
309*e4b17023SJohn Marino
310*e4b17023SJohn Marino const v8qi repl_nl = *(const v8qi *)repl_chars[0];
311*e4b17023SJohn Marino const v8qi repl_cr = *(const v8qi *)repl_chars[1];
312*e4b17023SJohn Marino const v8qi repl_bs = *(const v8qi *)repl_chars[2];
313*e4b17023SJohn Marino const v8qi repl_qm = *(const v8qi *)repl_chars[3];
314*e4b17023SJohn Marino
315*e4b17023SJohn Marino unsigned int misalign, found, mask;
316*e4b17023SJohn Marino const v8qi *p;
317*e4b17023SJohn Marino v8qi data, t, c;
318*e4b17023SJohn Marino
319*e4b17023SJohn Marino /* Align the source pointer. While MMX doesn't generate unaligned data
320*e4b17023SJohn Marino faults, this allows us to safely scan to the end of the buffer without
321*e4b17023SJohn Marino reading beyond the end of the last page. */
322*e4b17023SJohn Marino misalign = (uintptr_t)s & 7;
323*e4b17023SJohn Marino p = (const v8qi *)((uintptr_t)s & -8);
324*e4b17023SJohn Marino data = *p;
325*e4b17023SJohn Marino
326*e4b17023SJohn Marino /* Create a mask for the bytes that are valid within the first
327*e4b17023SJohn Marino 16-byte block. The Idea here is that the AND with the mask
328*e4b17023SJohn Marino within the loop is "free", since we need some AND or TEST
329*e4b17023SJohn Marino insn in order to set the flags for the branch anyway. */
330*e4b17023SJohn Marino mask = -1u << misalign;
331*e4b17023SJohn Marino
332*e4b17023SJohn Marino /* Main loop processing 8 bytes at a time. */
333*e4b17023SJohn Marino goto start;
334*e4b17023SJohn Marino do
335*e4b17023SJohn Marino {
336*e4b17023SJohn Marino data = *++p;
337*e4b17023SJohn Marino mask = -1;
338*e4b17023SJohn Marino
339*e4b17023SJohn Marino start:
340*e4b17023SJohn Marino t = __builtin_ia32_pcmpeqb(data, repl_nl);
341*e4b17023SJohn Marino c = __builtin_ia32_pcmpeqb(data, repl_cr);
342*e4b17023SJohn Marino t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
343*e4b17023SJohn Marino c = __builtin_ia32_pcmpeqb(data, repl_bs);
344*e4b17023SJohn Marino t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
345*e4b17023SJohn Marino c = __builtin_ia32_pcmpeqb(data, repl_qm);
346*e4b17023SJohn Marino t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
347*e4b17023SJohn Marino found = __builtin_ia32_pmovmskb (t);
348*e4b17023SJohn Marino found &= mask;
349*e4b17023SJohn Marino }
350*e4b17023SJohn Marino while (!found);
351*e4b17023SJohn Marino
352*e4b17023SJohn Marino __builtin_ia32_emms ();
353*e4b17023SJohn Marino
354*e4b17023SJohn Marino /* FOUND contains 1 in bits for which we matched a relevant
355*e4b17023SJohn Marino character. Conversion to the byte index is trivial. */
356*e4b17023SJohn Marino found = __builtin_ctz(found);
357*e4b17023SJohn Marino return (const uchar *)p + found;
358*e4b17023SJohn Marino }
359*e4b17023SJohn Marino
360*e4b17023SJohn Marino /* A version of the fast scanner using SSE2 vectorized byte compare insns. */
361*e4b17023SJohn Marino
362*e4b17023SJohn Marino static const uchar *
363*e4b17023SJohn Marino #ifndef __SSE2__
364*e4b17023SJohn Marino __attribute__((__target__("sse2")))
365*e4b17023SJohn Marino #endif
search_line_sse2(const uchar * s,const uchar * end ATTRIBUTE_UNUSED)366*e4b17023SJohn Marino search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
367*e4b17023SJohn Marino {
368*e4b17023SJohn Marino typedef char v16qi __attribute__ ((__vector_size__ (16)));
369*e4b17023SJohn Marino
370*e4b17023SJohn Marino const v16qi repl_nl = *(const v16qi *)repl_chars[0];
371*e4b17023SJohn Marino const v16qi repl_cr = *(const v16qi *)repl_chars[1];
372*e4b17023SJohn Marino const v16qi repl_bs = *(const v16qi *)repl_chars[2];
373*e4b17023SJohn Marino const v16qi repl_qm = *(const v16qi *)repl_chars[3];
374*e4b17023SJohn Marino
375*e4b17023SJohn Marino unsigned int misalign, found, mask;
376*e4b17023SJohn Marino const v16qi *p;
377*e4b17023SJohn Marino v16qi data, t;
378*e4b17023SJohn Marino
379*e4b17023SJohn Marino /* Align the source pointer. */
380*e4b17023SJohn Marino misalign = (uintptr_t)s & 15;
381*e4b17023SJohn Marino p = (const v16qi *)((uintptr_t)s & -16);
382*e4b17023SJohn Marino data = *p;
383*e4b17023SJohn Marino
384*e4b17023SJohn Marino /* Create a mask for the bytes that are valid within the first
385*e4b17023SJohn Marino 16-byte block. The Idea here is that the AND with the mask
386*e4b17023SJohn Marino within the loop is "free", since we need some AND or TEST
387*e4b17023SJohn Marino insn in order to set the flags for the branch anyway. */
388*e4b17023SJohn Marino mask = -1u << misalign;
389*e4b17023SJohn Marino
390*e4b17023SJohn Marino /* Main loop processing 16 bytes at a time. */
391*e4b17023SJohn Marino goto start;
392*e4b17023SJohn Marino do
393*e4b17023SJohn Marino {
394*e4b17023SJohn Marino data = *++p;
395*e4b17023SJohn Marino mask = -1;
396*e4b17023SJohn Marino
397*e4b17023SJohn Marino start:
398*e4b17023SJohn Marino t = __builtin_ia32_pcmpeqb128(data, repl_nl);
399*e4b17023SJohn Marino t |= __builtin_ia32_pcmpeqb128(data, repl_cr);
400*e4b17023SJohn Marino t |= __builtin_ia32_pcmpeqb128(data, repl_bs);
401*e4b17023SJohn Marino t |= __builtin_ia32_pcmpeqb128(data, repl_qm);
402*e4b17023SJohn Marino found = __builtin_ia32_pmovmskb128 (t);
403*e4b17023SJohn Marino found &= mask;
404*e4b17023SJohn Marino }
405*e4b17023SJohn Marino while (!found);
406*e4b17023SJohn Marino
407*e4b17023SJohn Marino /* FOUND contains 1 in bits for which we matched a relevant
408*e4b17023SJohn Marino character. Conversion to the byte index is trivial. */
409*e4b17023SJohn Marino found = __builtin_ctz(found);
410*e4b17023SJohn Marino return (const uchar *)p + found;
411*e4b17023SJohn Marino }
412*e4b17023SJohn Marino
413*e4b17023SJohn Marino #ifdef HAVE_SSE4
414*e4b17023SJohn Marino /* A version of the fast scanner using SSE 4.2 vectorized string insns. */
415*e4b17023SJohn Marino
416*e4b17023SJohn Marino static const uchar *
417*e4b17023SJohn Marino #ifndef __SSE4_2__
418*e4b17023SJohn Marino __attribute__((__target__("sse4.2")))
419*e4b17023SJohn Marino #endif
search_line_sse42(const uchar * s,const uchar * end)420*e4b17023SJohn Marino search_line_sse42 (const uchar *s, const uchar *end)
421*e4b17023SJohn Marino {
422*e4b17023SJohn Marino typedef char v16qi __attribute__ ((__vector_size__ (16)));
423*e4b17023SJohn Marino static const v16qi search = { '\n', '\r', '?', '\\' };
424*e4b17023SJohn Marino
425*e4b17023SJohn Marino uintptr_t si = (uintptr_t)s;
426*e4b17023SJohn Marino uintptr_t index;
427*e4b17023SJohn Marino
428*e4b17023SJohn Marino /* Check for unaligned input. */
429*e4b17023SJohn Marino if (si & 15)
430*e4b17023SJohn Marino {
431*e4b17023SJohn Marino if (__builtin_expect (end - s < 16, 0)
432*e4b17023SJohn Marino && __builtin_expect ((si & 0xfff) > 0xff0, 0))
433*e4b17023SJohn Marino {
434*e4b17023SJohn Marino /* There are less than 16 bytes left in the buffer, and less
435*e4b17023SJohn Marino than 16 bytes left on the page. Reading 16 bytes at this
436*e4b17023SJohn Marino point might generate a spurious page fault. Defer to the
437*e4b17023SJohn Marino SSE2 implementation, which already handles alignment. */
438*e4b17023SJohn Marino return search_line_sse2 (s, end);
439*e4b17023SJohn Marino }
440*e4b17023SJohn Marino
441*e4b17023SJohn Marino /* ??? The builtin doesn't understand that the PCMPESTRI read from
442*e4b17023SJohn Marino memory need not be aligned. */
443*e4b17023SJohn Marino __asm ("%vpcmpestri $0, (%1), %2"
444*e4b17023SJohn Marino : "=c"(index) : "r"(s), "x"(search), "a"(4), "d"(16));
445*e4b17023SJohn Marino if (__builtin_expect (index < 16, 0))
446*e4b17023SJohn Marino goto found;
447*e4b17023SJohn Marino
448*e4b17023SJohn Marino /* Advance the pointer to an aligned address. We will re-scan a
449*e4b17023SJohn Marino few bytes, but we no longer need care for reading past the
450*e4b17023SJohn Marino end of a page, since we're guaranteed a match. */
451*e4b17023SJohn Marino s = (const uchar *)((si + 16) & -16);
452*e4b17023SJohn Marino }
453*e4b17023SJohn Marino
454*e4b17023SJohn Marino /* Main loop, processing 16 bytes at a time. By doing the whole loop
455*e4b17023SJohn Marino in inline assembly, we can make proper use of the flags set. */
456*e4b17023SJohn Marino __asm ( "sub $16, %1\n"
457*e4b17023SJohn Marino " .balign 16\n"
458*e4b17023SJohn Marino "0: add $16, %1\n"
459*e4b17023SJohn Marino " %vpcmpestri $0, (%1), %2\n"
460*e4b17023SJohn Marino " jnc 0b"
461*e4b17023SJohn Marino : "=&c"(index), "+r"(s)
462*e4b17023SJohn Marino : "x"(search), "a"(4), "d"(16));
463*e4b17023SJohn Marino
464*e4b17023SJohn Marino found:
465*e4b17023SJohn Marino return s + index;
466*e4b17023SJohn Marino }
467*e4b17023SJohn Marino
468*e4b17023SJohn Marino #else
469*e4b17023SJohn Marino /* Work around out-dated assemblers without sse4 support. */
470*e4b17023SJohn Marino #define search_line_sse42 search_line_sse2
471*e4b17023SJohn Marino #endif
472*e4b17023SJohn Marino
473*e4b17023SJohn Marino /* Check the CPU capabilities. */
474*e4b17023SJohn Marino
475*e4b17023SJohn Marino #include "../gcc/config/i386/cpuid.h"
476*e4b17023SJohn Marino
477*e4b17023SJohn Marino typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
478*e4b17023SJohn Marino static search_line_fast_type search_line_fast;
479*e4b17023SJohn Marino
480*e4b17023SJohn Marino #define HAVE_init_vectorized_lexer 1
481*e4b17023SJohn Marino static inline void
init_vectorized_lexer(void)482*e4b17023SJohn Marino init_vectorized_lexer (void)
483*e4b17023SJohn Marino {
484*e4b17023SJohn Marino unsigned dummy, ecx = 0, edx = 0;
485*e4b17023SJohn Marino search_line_fast_type impl = search_line_acc_char;
486*e4b17023SJohn Marino int minimum = 0;
487*e4b17023SJohn Marino
488*e4b17023SJohn Marino #if defined(__SSE4_2__)
489*e4b17023SJohn Marino minimum = 3;
490*e4b17023SJohn Marino #elif defined(__SSE2__)
491*e4b17023SJohn Marino minimum = 2;
492*e4b17023SJohn Marino #elif defined(__SSE__)
493*e4b17023SJohn Marino minimum = 1;
494*e4b17023SJohn Marino #endif
495*e4b17023SJohn Marino
496*e4b17023SJohn Marino if (minimum == 3)
497*e4b17023SJohn Marino impl = search_line_sse42;
498*e4b17023SJohn Marino else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
499*e4b17023SJohn Marino {
500*e4b17023SJohn Marino if (minimum == 3 || (ecx & bit_SSE4_2))
501*e4b17023SJohn Marino impl = search_line_sse42;
502*e4b17023SJohn Marino else if (minimum == 2 || (edx & bit_SSE2))
503*e4b17023SJohn Marino impl = search_line_sse2;
504*e4b17023SJohn Marino else if (minimum == 1 || (edx & bit_SSE))
505*e4b17023SJohn Marino impl = search_line_mmx;
506*e4b17023SJohn Marino }
507*e4b17023SJohn Marino else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
508*e4b17023SJohn Marino {
509*e4b17023SJohn Marino if (minimum == 1
510*e4b17023SJohn Marino || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
511*e4b17023SJohn Marino impl = search_line_mmx;
512*e4b17023SJohn Marino }
513*e4b17023SJohn Marino
514*e4b17023SJohn Marino search_line_fast = impl;
515*e4b17023SJohn Marino }
516*e4b17023SJohn Marino
517*e4b17023SJohn Marino #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__)
518*e4b17023SJohn Marino
519*e4b17023SJohn Marino /* A vection of the fast scanner using AltiVec vectorized byte compares. */
520*e4b17023SJohn Marino /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
521*e4b17023SJohn Marino so we can't compile this function without -maltivec on the command line
522*e4b17023SJohn Marino (or implied by some other switch). */
523*e4b17023SJohn Marino
524*e4b17023SJohn Marino static const uchar *
search_line_fast(const uchar * s,const uchar * end ATTRIBUTE_UNUSED)525*e4b17023SJohn Marino search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
526*e4b17023SJohn Marino {
527*e4b17023SJohn Marino typedef __attribute__((altivec(vector))) unsigned char vc;
528*e4b17023SJohn Marino
529*e4b17023SJohn Marino const vc repl_nl = {
530*e4b17023SJohn Marino '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
531*e4b17023SJohn Marino '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
532*e4b17023SJohn Marino };
533*e4b17023SJohn Marino const vc repl_cr = {
534*e4b17023SJohn Marino '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
535*e4b17023SJohn Marino '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
536*e4b17023SJohn Marino };
537*e4b17023SJohn Marino const vc repl_bs = {
538*e4b17023SJohn Marino '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
539*e4b17023SJohn Marino '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
540*e4b17023SJohn Marino };
541*e4b17023SJohn Marino const vc repl_qm = {
542*e4b17023SJohn Marino '?', '?', '?', '?', '?', '?', '?', '?',
543*e4b17023SJohn Marino '?', '?', '?', '?', '?', '?', '?', '?',
544*e4b17023SJohn Marino };
545*e4b17023SJohn Marino const vc ones = {
546*e4b17023SJohn Marino -1, -1, -1, -1, -1, -1, -1, -1,
547*e4b17023SJohn Marino -1, -1, -1, -1, -1, -1, -1, -1,
548*e4b17023SJohn Marino };
549*e4b17023SJohn Marino const vc zero = { 0 };
550*e4b17023SJohn Marino
551*e4b17023SJohn Marino vc data, mask, t;
552*e4b17023SJohn Marino
553*e4b17023SJohn Marino /* Altivec loads automatically mask addresses with -16. This lets us
554*e4b17023SJohn Marino issue the first load as early as possible. */
555*e4b17023SJohn Marino data = __builtin_vec_ld(0, (const vc *)s);
556*e4b17023SJohn Marino
557*e4b17023SJohn Marino /* Discard bytes before the beginning of the buffer. Do this by
558*e4b17023SJohn Marino beginning with all ones and shifting in zeros according to the
559*e4b17023SJohn Marino mis-alignment. The LVSR instruction pulls the exact shift we
560*e4b17023SJohn Marino want from the address. */
561*e4b17023SJohn Marino mask = __builtin_vec_lvsr(0, s);
562*e4b17023SJohn Marino mask = __builtin_vec_perm(zero, ones, mask);
563*e4b17023SJohn Marino data &= mask;
564*e4b17023SJohn Marino
565*e4b17023SJohn Marino /* While altivec loads mask addresses, we still need to align S so
566*e4b17023SJohn Marino that the offset we compute at the end is correct. */
567*e4b17023SJohn Marino s = (const uchar *)((uintptr_t)s & -16);
568*e4b17023SJohn Marino
569*e4b17023SJohn Marino /* Main loop processing 16 bytes at a time. */
570*e4b17023SJohn Marino goto start;
571*e4b17023SJohn Marino do
572*e4b17023SJohn Marino {
573*e4b17023SJohn Marino vc m_nl, m_cr, m_bs, m_qm;
574*e4b17023SJohn Marino
575*e4b17023SJohn Marino s += 16;
576*e4b17023SJohn Marino data = __builtin_vec_ld(0, (const vc *)s);
577*e4b17023SJohn Marino
578*e4b17023SJohn Marino start:
579*e4b17023SJohn Marino m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
580*e4b17023SJohn Marino m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
581*e4b17023SJohn Marino m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
582*e4b17023SJohn Marino m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
583*e4b17023SJohn Marino t = (m_nl | m_cr) | (m_bs | m_qm);
584*e4b17023SJohn Marino
585*e4b17023SJohn Marino /* T now contains 0xff in bytes for which we matched one of the relevant
586*e4b17023SJohn Marino characters. We want to exit the loop if any byte in T is non-zero.
587*e4b17023SJohn Marino Below is the expansion of vec_any_ne(t, zero). */
588*e4b17023SJohn Marino }
589*e4b17023SJohn Marino while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
590*e4b17023SJohn Marino
591*e4b17023SJohn Marino {
592*e4b17023SJohn Marino #define N (sizeof(vc) / sizeof(long))
593*e4b17023SJohn Marino
594*e4b17023SJohn Marino typedef char check_count[(N == 2 || N == 4) * 2 - 1];
595*e4b17023SJohn Marino union {
596*e4b17023SJohn Marino vc v;
597*e4b17023SJohn Marino unsigned long l[N];
598*e4b17023SJohn Marino } u;
599*e4b17023SJohn Marino unsigned long l, i = 0;
600*e4b17023SJohn Marino
601*e4b17023SJohn Marino u.v = t;
602*e4b17023SJohn Marino
603*e4b17023SJohn Marino /* Find the first word of T that is non-zero. */
604*e4b17023SJohn Marino switch (N)
605*e4b17023SJohn Marino {
606*e4b17023SJohn Marino case 4:
607*e4b17023SJohn Marino l = u.l[i++];
608*e4b17023SJohn Marino if (l != 0)
609*e4b17023SJohn Marino break;
610*e4b17023SJohn Marino s += sizeof(unsigned long);
611*e4b17023SJohn Marino l = u.l[i++];
612*e4b17023SJohn Marino if (l != 0)
613*e4b17023SJohn Marino break;
614*e4b17023SJohn Marino s += sizeof(unsigned long);
615*e4b17023SJohn Marino case 2:
616*e4b17023SJohn Marino l = u.l[i++];
617*e4b17023SJohn Marino if (l != 0)
618*e4b17023SJohn Marino break;
619*e4b17023SJohn Marino s += sizeof(unsigned long);
620*e4b17023SJohn Marino l = u.l[i];
621*e4b17023SJohn Marino }
622*e4b17023SJohn Marino
623*e4b17023SJohn Marino /* L now contains 0xff in bytes for which we matched one of the
624*e4b17023SJohn Marino relevant characters. We can find the byte index by finding
625*e4b17023SJohn Marino its bit index and dividing by 8. */
626*e4b17023SJohn Marino l = __builtin_clzl(l) >> 3;
627*e4b17023SJohn Marino return s + l;
628*e4b17023SJohn Marino
629*e4b17023SJohn Marino #undef N
630*e4b17023SJohn Marino }
631*e4b17023SJohn Marino }
632*e4b17023SJohn Marino
633*e4b17023SJohn Marino #else
634*e4b17023SJohn Marino
635*e4b17023SJohn Marino /* We only have one accellerated alternative. Use a direct call so that
636*e4b17023SJohn Marino we encourage inlining. */
637*e4b17023SJohn Marino
638*e4b17023SJohn Marino #define search_line_fast search_line_acc_char
639*e4b17023SJohn Marino
640*e4b17023SJohn Marino #endif
641*e4b17023SJohn Marino
642*e4b17023SJohn Marino /* Initialize the lexer if needed. */
643*e4b17023SJohn Marino
644*e4b17023SJohn Marino void
_cpp_init_lexer(void)645*e4b17023SJohn Marino _cpp_init_lexer (void)
646*e4b17023SJohn Marino {
647*e4b17023SJohn Marino #ifdef HAVE_init_vectorized_lexer
648*e4b17023SJohn Marino init_vectorized_lexer ();
649*e4b17023SJohn Marino #endif
650*e4b17023SJohn Marino }
651*e4b17023SJohn Marino
652*e4b17023SJohn Marino /* Returns with a logical line that contains no escaped newlines or
653*e4b17023SJohn Marino trigraphs. This is a time-critical inner loop. */
654*e4b17023SJohn Marino void
_cpp_clean_line(cpp_reader * pfile)655*e4b17023SJohn Marino _cpp_clean_line (cpp_reader *pfile)
656*e4b17023SJohn Marino {
657*e4b17023SJohn Marino cpp_buffer *buffer;
658*e4b17023SJohn Marino const uchar *s;
659*e4b17023SJohn Marino uchar c, *d, *p;
660*e4b17023SJohn Marino
661*e4b17023SJohn Marino buffer = pfile->buffer;
662*e4b17023SJohn Marino buffer->cur_note = buffer->notes_used = 0;
663*e4b17023SJohn Marino buffer->cur = buffer->line_base = buffer->next_line;
664*e4b17023SJohn Marino buffer->need_line = false;
665*e4b17023SJohn Marino s = buffer->next_line;
666*e4b17023SJohn Marino
667*e4b17023SJohn Marino if (!buffer->from_stage3)
668*e4b17023SJohn Marino {
669*e4b17023SJohn Marino const uchar *pbackslash = NULL;
670*e4b17023SJohn Marino
671*e4b17023SJohn Marino /* Fast path. This is the common case of an un-escaped line with
672*e4b17023SJohn Marino no trigraphs. The primary win here is by not writing any
673*e4b17023SJohn Marino data back to memory until we have to. */
674*e4b17023SJohn Marino while (1)
675*e4b17023SJohn Marino {
676*e4b17023SJohn Marino /* Perform an optimized search for \n, \r, \\, ?. */
677*e4b17023SJohn Marino s = search_line_fast (s, buffer->rlimit);
678*e4b17023SJohn Marino
679*e4b17023SJohn Marino c = *s;
680*e4b17023SJohn Marino if (c == '\\')
681*e4b17023SJohn Marino {
682*e4b17023SJohn Marino /* Record the location of the backslash and continue. */
683*e4b17023SJohn Marino pbackslash = s++;
684*e4b17023SJohn Marino }
685*e4b17023SJohn Marino else if (__builtin_expect (c == '?', 0))
686*e4b17023SJohn Marino {
687*e4b17023SJohn Marino if (__builtin_expect (s[1] == '?', false)
688*e4b17023SJohn Marino && _cpp_trigraph_map[s[2]])
689*e4b17023SJohn Marino {
690*e4b17023SJohn Marino /* Have a trigraph. We may or may not have to convert
691*e4b17023SJohn Marino it. Add a line note regardless, for -Wtrigraphs. */
692*e4b17023SJohn Marino add_line_note (buffer, s, s[2]);
693*e4b17023SJohn Marino if (CPP_OPTION (pfile, trigraphs))
694*e4b17023SJohn Marino {
695*e4b17023SJohn Marino /* We do, and that means we have to switch to the
696*e4b17023SJohn Marino slow path. */
697*e4b17023SJohn Marino d = (uchar *) s;
698*e4b17023SJohn Marino *d = _cpp_trigraph_map[s[2]];
699*e4b17023SJohn Marino s += 2;
700*e4b17023SJohn Marino goto slow_path;
701*e4b17023SJohn Marino }
702*e4b17023SJohn Marino }
703*e4b17023SJohn Marino /* Not a trigraph. Continue on fast-path. */
704*e4b17023SJohn Marino s++;
705*e4b17023SJohn Marino }
706*e4b17023SJohn Marino else
707*e4b17023SJohn Marino break;
708*e4b17023SJohn Marino }
709*e4b17023SJohn Marino
710*e4b17023SJohn Marino /* This must be \r or \n. We're either done, or we'll be forced
711*e4b17023SJohn Marino to write back to the buffer and continue on the slow path. */
712*e4b17023SJohn Marino d = (uchar *) s;
713*e4b17023SJohn Marino
714*e4b17023SJohn Marino if (__builtin_expect (s == buffer->rlimit, false))
715*e4b17023SJohn Marino goto done;
716*e4b17023SJohn Marino
717*e4b17023SJohn Marino /* DOS line ending? */
718*e4b17023SJohn Marino if (__builtin_expect (c == '\r', false) && s[1] == '\n')
719*e4b17023SJohn Marino {
720*e4b17023SJohn Marino s++;
721*e4b17023SJohn Marino if (s == buffer->rlimit)
722*e4b17023SJohn Marino goto done;
723*e4b17023SJohn Marino }
724*e4b17023SJohn Marino
725*e4b17023SJohn Marino if (__builtin_expect (pbackslash == NULL, true))
726*e4b17023SJohn Marino goto done;
727*e4b17023SJohn Marino
728*e4b17023SJohn Marino /* Check for escaped newline. */
729*e4b17023SJohn Marino p = d;
730*e4b17023SJohn Marino while (is_nvspace (p[-1]))
731*e4b17023SJohn Marino p--;
732*e4b17023SJohn Marino if (p - 1 != pbackslash)
733*e4b17023SJohn Marino goto done;
734*e4b17023SJohn Marino
735*e4b17023SJohn Marino /* Have an escaped newline; process it and proceed to
736*e4b17023SJohn Marino the slow path. */
737*e4b17023SJohn Marino add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
738*e4b17023SJohn Marino d = p - 2;
739*e4b17023SJohn Marino buffer->next_line = p - 1;
740*e4b17023SJohn Marino
741*e4b17023SJohn Marino slow_path:
742*e4b17023SJohn Marino while (1)
743*e4b17023SJohn Marino {
744*e4b17023SJohn Marino c = *++s;
745*e4b17023SJohn Marino *++d = c;
746*e4b17023SJohn Marino
747*e4b17023SJohn Marino if (c == '\n' || c == '\r')
748*e4b17023SJohn Marino {
749*e4b17023SJohn Marino /* Handle DOS line endings. */
750*e4b17023SJohn Marino if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
751*e4b17023SJohn Marino s++;
752*e4b17023SJohn Marino if (s == buffer->rlimit)
753*e4b17023SJohn Marino break;
754*e4b17023SJohn Marino
755*e4b17023SJohn Marino /* Escaped? */
756*e4b17023SJohn Marino p = d;
757*e4b17023SJohn Marino while (p != buffer->next_line && is_nvspace (p[-1]))
758*e4b17023SJohn Marino p--;
759*e4b17023SJohn Marino if (p == buffer->next_line || p[-1] != '\\')
760*e4b17023SJohn Marino break;
761*e4b17023SJohn Marino
762*e4b17023SJohn Marino add_line_note (buffer, p - 1, p != d ? ' ': '\\');
763*e4b17023SJohn Marino d = p - 2;
764*e4b17023SJohn Marino buffer->next_line = p - 1;
765*e4b17023SJohn Marino }
766*e4b17023SJohn Marino else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
767*e4b17023SJohn Marino {
768*e4b17023SJohn Marino /* Add a note regardless, for the benefit of -Wtrigraphs. */
769*e4b17023SJohn Marino add_line_note (buffer, d, s[2]);
770*e4b17023SJohn Marino if (CPP_OPTION (pfile, trigraphs))
771*e4b17023SJohn Marino {
772*e4b17023SJohn Marino *d = _cpp_trigraph_map[s[2]];
773*e4b17023SJohn Marino s += 2;
774*e4b17023SJohn Marino }
775*e4b17023SJohn Marino }
776*e4b17023SJohn Marino }
777*e4b17023SJohn Marino }
778*e4b17023SJohn Marino else
779*e4b17023SJohn Marino {
780*e4b17023SJohn Marino while (*s != '\n' && *s != '\r')
781*e4b17023SJohn Marino s++;
782*e4b17023SJohn Marino d = (uchar *) s;
783*e4b17023SJohn Marino
784*e4b17023SJohn Marino /* Handle DOS line endings. */
785*e4b17023SJohn Marino if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
786*e4b17023SJohn Marino s++;
787*e4b17023SJohn Marino }
788*e4b17023SJohn Marino
789*e4b17023SJohn Marino done:
790*e4b17023SJohn Marino *d = '\n';
791*e4b17023SJohn Marino /* A sentinel note that should never be processed. */
792*e4b17023SJohn Marino add_line_note (buffer, d + 1, '\n');
793*e4b17023SJohn Marino buffer->next_line = s + 1;
794*e4b17023SJohn Marino }
795*e4b17023SJohn Marino
796*e4b17023SJohn Marino /* Return true if the trigraph indicated by NOTE should be warned
797*e4b17023SJohn Marino about in a comment. */
798*e4b17023SJohn Marino static bool
warn_in_comment(cpp_reader * pfile,_cpp_line_note * note)799*e4b17023SJohn Marino warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
800*e4b17023SJohn Marino {
801*e4b17023SJohn Marino const uchar *p;
802*e4b17023SJohn Marino
803*e4b17023SJohn Marino /* Within comments we don't warn about trigraphs, unless the
804*e4b17023SJohn Marino trigraph forms an escaped newline, as that may change
805*e4b17023SJohn Marino behavior. */
806*e4b17023SJohn Marino if (note->type != '/')
807*e4b17023SJohn Marino return false;
808*e4b17023SJohn Marino
809*e4b17023SJohn Marino /* If -trigraphs, then this was an escaped newline iff the next note
810*e4b17023SJohn Marino is coincident. */
811*e4b17023SJohn Marino if (CPP_OPTION (pfile, trigraphs))
812*e4b17023SJohn Marino return note[1].pos == note->pos;
813*e4b17023SJohn Marino
814*e4b17023SJohn Marino /* Otherwise, see if this forms an escaped newline. */
815*e4b17023SJohn Marino p = note->pos + 3;
816*e4b17023SJohn Marino while (is_nvspace (*p))
817*e4b17023SJohn Marino p++;
818*e4b17023SJohn Marino
819*e4b17023SJohn Marino /* There might have been escaped newlines between the trigraph and the
820*e4b17023SJohn Marino newline we found. Hence the position test. */
821*e4b17023SJohn Marino return (*p == '\n' && p < note[1].pos);
822*e4b17023SJohn Marino }
823*e4b17023SJohn Marino
824*e4b17023SJohn Marino /* Process the notes created by add_line_note as far as the current
825*e4b17023SJohn Marino location. */
826*e4b17023SJohn Marino void
_cpp_process_line_notes(cpp_reader * pfile,int in_comment)827*e4b17023SJohn Marino _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
828*e4b17023SJohn Marino {
829*e4b17023SJohn Marino cpp_buffer *buffer = pfile->buffer;
830*e4b17023SJohn Marino
831*e4b17023SJohn Marino for (;;)
832*e4b17023SJohn Marino {
833*e4b17023SJohn Marino _cpp_line_note *note = &buffer->notes[buffer->cur_note];
834*e4b17023SJohn Marino unsigned int col;
835*e4b17023SJohn Marino
836*e4b17023SJohn Marino if (note->pos > buffer->cur)
837*e4b17023SJohn Marino break;
838*e4b17023SJohn Marino
839*e4b17023SJohn Marino buffer->cur_note++;
840*e4b17023SJohn Marino col = CPP_BUF_COLUMN (buffer, note->pos + 1);
841*e4b17023SJohn Marino
842*e4b17023SJohn Marino if (note->type == '\\' || note->type == ' ')
843*e4b17023SJohn Marino {
844*e4b17023SJohn Marino if (note->type == ' ' && !in_comment)
845*e4b17023SJohn Marino cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
846*e4b17023SJohn Marino "backslash and newline separated by space");
847*e4b17023SJohn Marino
848*e4b17023SJohn Marino if (buffer->next_line > buffer->rlimit)
849*e4b17023SJohn Marino {
850*e4b17023SJohn Marino cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
851*e4b17023SJohn Marino "backslash-newline at end of file");
852*e4b17023SJohn Marino /* Prevent "no newline at end of file" warning. */
853*e4b17023SJohn Marino buffer->next_line = buffer->rlimit;
854*e4b17023SJohn Marino }
855*e4b17023SJohn Marino
856*e4b17023SJohn Marino buffer->line_base = note->pos;
857*e4b17023SJohn Marino CPP_INCREMENT_LINE (pfile, 0);
858*e4b17023SJohn Marino }
859*e4b17023SJohn Marino else if (_cpp_trigraph_map[note->type])
860*e4b17023SJohn Marino {
861*e4b17023SJohn Marino if (CPP_OPTION (pfile, warn_trigraphs)
862*e4b17023SJohn Marino && (!in_comment || warn_in_comment (pfile, note)))
863*e4b17023SJohn Marino {
864*e4b17023SJohn Marino if (CPP_OPTION (pfile, trigraphs))
865*e4b17023SJohn Marino cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
866*e4b17023SJohn Marino pfile->line_table->highest_line, col,
867*e4b17023SJohn Marino "trigraph ??%c converted to %c",
868*e4b17023SJohn Marino note->type,
869*e4b17023SJohn Marino (int) _cpp_trigraph_map[note->type]);
870*e4b17023SJohn Marino else
871*e4b17023SJohn Marino {
872*e4b17023SJohn Marino cpp_warning_with_line
873*e4b17023SJohn Marino (pfile, CPP_W_TRIGRAPHS,
874*e4b17023SJohn Marino pfile->line_table->highest_line, col,
875*e4b17023SJohn Marino "trigraph ??%c ignored, use -trigraphs to enable",
876*e4b17023SJohn Marino note->type);
877*e4b17023SJohn Marino }
878*e4b17023SJohn Marino }
879*e4b17023SJohn Marino }
880*e4b17023SJohn Marino else if (note->type == 0)
881*e4b17023SJohn Marino /* Already processed in lex_raw_string. */;
882*e4b17023SJohn Marino else
883*e4b17023SJohn Marino abort ();
884*e4b17023SJohn Marino }
885*e4b17023SJohn Marino }
886*e4b17023SJohn Marino
887*e4b17023SJohn Marino /* Skip a C-style block comment. We find the end of the comment by
888*e4b17023SJohn Marino seeing if an asterisk is before every '/' we encounter. Returns
889*e4b17023SJohn Marino nonzero if comment terminated by EOF, zero otherwise.
890*e4b17023SJohn Marino
891*e4b17023SJohn Marino Buffer->cur points to the initial asterisk of the comment. */
892*e4b17023SJohn Marino bool
_cpp_skip_block_comment(cpp_reader * pfile)893*e4b17023SJohn Marino _cpp_skip_block_comment (cpp_reader *pfile)
894*e4b17023SJohn Marino {
895*e4b17023SJohn Marino cpp_buffer *buffer = pfile->buffer;
896*e4b17023SJohn Marino const uchar *cur = buffer->cur;
897*e4b17023SJohn Marino uchar c;
898*e4b17023SJohn Marino
899*e4b17023SJohn Marino cur++;
900*e4b17023SJohn Marino if (*cur == '/')
901*e4b17023SJohn Marino cur++;
902*e4b17023SJohn Marino
903*e4b17023SJohn Marino for (;;)
904*e4b17023SJohn Marino {
905*e4b17023SJohn Marino /* People like decorating comments with '*', so check for '/'
906*e4b17023SJohn Marino instead for efficiency. */
907*e4b17023SJohn Marino c = *cur++;
908*e4b17023SJohn Marino
909*e4b17023SJohn Marino if (c == '/')
910*e4b17023SJohn Marino {
911*e4b17023SJohn Marino if (cur[-2] == '*')
912*e4b17023SJohn Marino break;
913*e4b17023SJohn Marino
914*e4b17023SJohn Marino /* Warn about potential nested comments, but not if the '/'
915*e4b17023SJohn Marino comes immediately before the true comment delimiter.
916*e4b17023SJohn Marino Don't bother to get it right across escaped newlines. */
917*e4b17023SJohn Marino if (CPP_OPTION (pfile, warn_comments)
918*e4b17023SJohn Marino && cur[0] == '*' && cur[1] != '/')
919*e4b17023SJohn Marino {
920*e4b17023SJohn Marino buffer->cur = cur;
921*e4b17023SJohn Marino cpp_warning_with_line (pfile, CPP_W_COMMENTS,
922*e4b17023SJohn Marino pfile->line_table->highest_line,
923*e4b17023SJohn Marino CPP_BUF_COL (buffer),
924*e4b17023SJohn Marino "\"/*\" within comment");
925*e4b17023SJohn Marino }
926*e4b17023SJohn Marino }
927*e4b17023SJohn Marino else if (c == '\n')
928*e4b17023SJohn Marino {
929*e4b17023SJohn Marino unsigned int cols;
930*e4b17023SJohn Marino buffer->cur = cur - 1;
931*e4b17023SJohn Marino _cpp_process_line_notes (pfile, true);
932*e4b17023SJohn Marino if (buffer->next_line >= buffer->rlimit)
933*e4b17023SJohn Marino return true;
934*e4b17023SJohn Marino _cpp_clean_line (pfile);
935*e4b17023SJohn Marino
936*e4b17023SJohn Marino cols = buffer->next_line - buffer->line_base;
937*e4b17023SJohn Marino CPP_INCREMENT_LINE (pfile, cols);
938*e4b17023SJohn Marino
939*e4b17023SJohn Marino cur = buffer->cur;
940*e4b17023SJohn Marino }
941*e4b17023SJohn Marino }
942*e4b17023SJohn Marino
943*e4b17023SJohn Marino buffer->cur = cur;
944*e4b17023SJohn Marino _cpp_process_line_notes (pfile, true);
945*e4b17023SJohn Marino return false;
946*e4b17023SJohn Marino }
947*e4b17023SJohn Marino
948*e4b17023SJohn Marino /* Skip a C++ line comment, leaving buffer->cur pointing to the
949*e4b17023SJohn Marino terminating newline. Handles escaped newlines. Returns nonzero
950*e4b17023SJohn Marino if a multiline comment. */
951*e4b17023SJohn Marino static int
skip_line_comment(cpp_reader * pfile)952*e4b17023SJohn Marino skip_line_comment (cpp_reader *pfile)
953*e4b17023SJohn Marino {
954*e4b17023SJohn Marino cpp_buffer *buffer = pfile->buffer;
955*e4b17023SJohn Marino source_location orig_line = pfile->line_table->highest_line;
956*e4b17023SJohn Marino
957*e4b17023SJohn Marino while (*buffer->cur != '\n')
958*e4b17023SJohn Marino buffer->cur++;
959*e4b17023SJohn Marino
960*e4b17023SJohn Marino _cpp_process_line_notes (pfile, true);
961*e4b17023SJohn Marino return orig_line != pfile->line_table->highest_line;
962*e4b17023SJohn Marino }
963*e4b17023SJohn Marino
964*e4b17023SJohn Marino /* Skips whitespace, saving the next non-whitespace character. */
965*e4b17023SJohn Marino static void
skip_whitespace(cpp_reader * pfile,cppchar_t c)966*e4b17023SJohn Marino skip_whitespace (cpp_reader *pfile, cppchar_t c)
967*e4b17023SJohn Marino {
968*e4b17023SJohn Marino cpp_buffer *buffer = pfile->buffer;
969*e4b17023SJohn Marino bool saw_NUL = false;
970*e4b17023SJohn Marino
971*e4b17023SJohn Marino do
972*e4b17023SJohn Marino {
973*e4b17023SJohn Marino /* Horizontal space always OK. */
974*e4b17023SJohn Marino if (c == ' ' || c == '\t')
975*e4b17023SJohn Marino ;
976*e4b17023SJohn Marino /* Just \f \v or \0 left. */
977*e4b17023SJohn Marino else if (c == '\0')
978*e4b17023SJohn Marino saw_NUL = true;
979*e4b17023SJohn Marino else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
980*e4b17023SJohn Marino cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
981*e4b17023SJohn Marino CPP_BUF_COL (buffer),
982*e4b17023SJohn Marino "%s in preprocessing directive",
983*e4b17023SJohn Marino c == '\f' ? "form feed" : "vertical tab");
984*e4b17023SJohn Marino
985*e4b17023SJohn Marino c = *buffer->cur++;
986*e4b17023SJohn Marino }
987*e4b17023SJohn Marino /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
988*e4b17023SJohn Marino while (is_nvspace (c));
989*e4b17023SJohn Marino
990*e4b17023SJohn Marino if (saw_NUL)
991*e4b17023SJohn Marino cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
992*e4b17023SJohn Marino
993*e4b17023SJohn Marino buffer->cur--;
994*e4b17023SJohn Marino }
995*e4b17023SJohn Marino
996*e4b17023SJohn Marino /* See if the characters of a number token are valid in a name (no
997*e4b17023SJohn Marino '.', '+' or '-'). */
998*e4b17023SJohn Marino static int
name_p(cpp_reader * pfile,const cpp_string * string)999*e4b17023SJohn Marino name_p (cpp_reader *pfile, const cpp_string *string)
1000*e4b17023SJohn Marino {
1001*e4b17023SJohn Marino unsigned int i;
1002*e4b17023SJohn Marino
1003*e4b17023SJohn Marino for (i = 0; i < string->len; i++)
1004*e4b17023SJohn Marino if (!is_idchar (string->text[i]))
1005*e4b17023SJohn Marino return 0;
1006*e4b17023SJohn Marino
1007*e4b17023SJohn Marino return 1;
1008*e4b17023SJohn Marino }
1009*e4b17023SJohn Marino
1010*e4b17023SJohn Marino /* After parsing an identifier or other sequence, produce a warning about
1011*e4b17023SJohn Marino sequences not in NFC/NFKC. */
1012*e4b17023SJohn Marino static void
warn_about_normalization(cpp_reader * pfile,const cpp_token * token,const struct normalize_state * s)1013*e4b17023SJohn Marino warn_about_normalization (cpp_reader *pfile,
1014*e4b17023SJohn Marino const cpp_token *token,
1015*e4b17023SJohn Marino const struct normalize_state *s)
1016*e4b17023SJohn Marino {
1017*e4b17023SJohn Marino if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
1018*e4b17023SJohn Marino && !pfile->state.skipping)
1019*e4b17023SJohn Marino {
1020*e4b17023SJohn Marino /* Make sure that the token is printed using UCNs, even
1021*e4b17023SJohn Marino if we'd otherwise happily print UTF-8. */
1022*e4b17023SJohn Marino unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
1023*e4b17023SJohn Marino size_t sz;
1024*e4b17023SJohn Marino
1025*e4b17023SJohn Marino sz = cpp_spell_token (pfile, token, buf, false) - buf;
1026*e4b17023SJohn Marino if (NORMALIZE_STATE_RESULT (s) == normalized_C)
1027*e4b17023SJohn Marino cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1028*e4b17023SJohn Marino "`%.*s' is not in NFKC", (int) sz, buf);
1029*e4b17023SJohn Marino else
1030*e4b17023SJohn Marino cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1031*e4b17023SJohn Marino "`%.*s' is not in NFC", (int) sz, buf);
1032*e4b17023SJohn Marino }
1033*e4b17023SJohn Marino }
1034*e4b17023SJohn Marino
1035*e4b17023SJohn Marino /* Returns TRUE if the sequence starting at buffer->cur is invalid in
1036*e4b17023SJohn Marino an identifier. FIRST is TRUE if this starts an identifier. */
1037*e4b17023SJohn Marino static bool
forms_identifier_p(cpp_reader * pfile,int first,struct normalize_state * state)1038*e4b17023SJohn Marino forms_identifier_p (cpp_reader *pfile, int first,
1039*e4b17023SJohn Marino struct normalize_state *state)
1040*e4b17023SJohn Marino {
1041*e4b17023SJohn Marino cpp_buffer *buffer = pfile->buffer;
1042*e4b17023SJohn Marino
1043*e4b17023SJohn Marino if (*buffer->cur == '$')
1044*e4b17023SJohn Marino {
1045*e4b17023SJohn Marino if (!CPP_OPTION (pfile, dollars_in_ident))
1046*e4b17023SJohn Marino return false;
1047*e4b17023SJohn Marino
1048*e4b17023SJohn Marino buffer->cur++;
1049*e4b17023SJohn Marino if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1050*e4b17023SJohn Marino {
1051*e4b17023SJohn Marino CPP_OPTION (pfile, warn_dollars) = 0;
1052*e4b17023SJohn Marino cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1053*e4b17023SJohn Marino }
1054*e4b17023SJohn Marino
1055*e4b17023SJohn Marino return true;
1056*e4b17023SJohn Marino }
1057*e4b17023SJohn Marino
1058*e4b17023SJohn Marino /* Is this a syntactically valid UCN? */
1059*e4b17023SJohn Marino if (CPP_OPTION (pfile, extended_identifiers)
1060*e4b17023SJohn Marino && *buffer->cur == '\\'
1061*e4b17023SJohn Marino && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
1062*e4b17023SJohn Marino {
1063*e4b17023SJohn Marino buffer->cur += 2;
1064*e4b17023SJohn Marino if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1065*e4b17023SJohn Marino state))
1066*e4b17023SJohn Marino return true;
1067*e4b17023SJohn Marino buffer->cur -= 2;
1068*e4b17023SJohn Marino }
1069*e4b17023SJohn Marino
1070*e4b17023SJohn Marino return false;
1071*e4b17023SJohn Marino }
1072*e4b17023SJohn Marino
1073*e4b17023SJohn Marino /* Helper function to get the cpp_hashnode of the identifier BASE. */
1074*e4b17023SJohn Marino static cpp_hashnode *
lex_identifier_intern(cpp_reader * pfile,const uchar * base)1075*e4b17023SJohn Marino lex_identifier_intern (cpp_reader *pfile, const uchar *base)
1076*e4b17023SJohn Marino {
1077*e4b17023SJohn Marino cpp_hashnode *result;
1078*e4b17023SJohn Marino const uchar *cur;
1079*e4b17023SJohn Marino unsigned int len;
1080*e4b17023SJohn Marino unsigned int hash = HT_HASHSTEP (0, *base);
1081*e4b17023SJohn Marino
1082*e4b17023SJohn Marino cur = base + 1;
1083*e4b17023SJohn Marino while (ISIDNUM (*cur))
1084*e4b17023SJohn Marino {
1085*e4b17023SJohn Marino hash = HT_HASHSTEP (hash, *cur);
1086*e4b17023SJohn Marino cur++;
1087*e4b17023SJohn Marino }
1088*e4b17023SJohn Marino len = cur - base;
1089*e4b17023SJohn Marino hash = HT_HASHFINISH (hash, len);
1090*e4b17023SJohn Marino result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1091*e4b17023SJohn Marino base, len, hash, HT_ALLOC));
1092*e4b17023SJohn Marino
1093*e4b17023SJohn Marino /* Rarely, identifiers require diagnostics when lexed. */
1094*e4b17023SJohn Marino if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1095*e4b17023SJohn Marino && !pfile->state.skipping, 0))
1096*e4b17023SJohn Marino {
1097*e4b17023SJohn Marino /* It is allowed to poison the same identifier twice. */
1098*e4b17023SJohn Marino if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1099*e4b17023SJohn Marino cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1100*e4b17023SJohn Marino NODE_NAME (result));
1101*e4b17023SJohn Marino
1102*e4b17023SJohn Marino /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1103*e4b17023SJohn Marino replacement list of a variadic macro. */
1104*e4b17023SJohn Marino if (result == pfile->spec_nodes.n__VA_ARGS__
1105*e4b17023SJohn Marino && !pfile->state.va_args_ok)
1106*e4b17023SJohn Marino cpp_error (pfile, CPP_DL_PEDWARN,
1107*e4b17023SJohn Marino "__VA_ARGS__ can only appear in the expansion"
1108*e4b17023SJohn Marino " of a C99 variadic macro");
1109*e4b17023SJohn Marino
1110*e4b17023SJohn Marino /* For -Wc++-compat, warn about use of C++ named operators. */
1111*e4b17023SJohn Marino if (result->flags & NODE_WARN_OPERATOR)
1112*e4b17023SJohn Marino cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1113*e4b17023SJohn Marino "identifier \"%s\" is a special operator name in C++",
1114*e4b17023SJohn Marino NODE_NAME (result));
1115*e4b17023SJohn Marino }
1116*e4b17023SJohn Marino
1117*e4b17023SJohn Marino return result;
1118*e4b17023SJohn Marino }
1119*e4b17023SJohn Marino
1120*e4b17023SJohn Marino /* Get the cpp_hashnode of an identifier specified by NAME in
1121*e4b17023SJohn Marino the current cpp_reader object. If none is found, NULL is returned. */
1122*e4b17023SJohn Marino cpp_hashnode *
_cpp_lex_identifier(cpp_reader * pfile,const char * name)1123*e4b17023SJohn Marino _cpp_lex_identifier (cpp_reader *pfile, const char *name)
1124*e4b17023SJohn Marino {
1125*e4b17023SJohn Marino cpp_hashnode *result;
1126*e4b17023SJohn Marino result = lex_identifier_intern (pfile, (uchar *) name);
1127*e4b17023SJohn Marino return result;
1128*e4b17023SJohn Marino }
1129*e4b17023SJohn Marino
1130*e4b17023SJohn Marino /* Lex an identifier starting at BUFFER->CUR - 1. */
1131*e4b17023SJohn Marino static cpp_hashnode *
lex_identifier(cpp_reader * pfile,const uchar * base,bool starts_ucn,struct normalize_state * nst)1132*e4b17023SJohn Marino lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
1133*e4b17023SJohn Marino struct normalize_state *nst)
1134*e4b17023SJohn Marino {
1135*e4b17023SJohn Marino cpp_hashnode *result;
1136*e4b17023SJohn Marino const uchar *cur;
1137*e4b17023SJohn Marino unsigned int len;
1138*e4b17023SJohn Marino unsigned int hash = HT_HASHSTEP (0, *base);
1139*e4b17023SJohn Marino
1140*e4b17023SJohn Marino cur = pfile->buffer->cur;
1141*e4b17023SJohn Marino if (! starts_ucn)
1142*e4b17023SJohn Marino while (ISIDNUM (*cur))
1143*e4b17023SJohn Marino {
1144*e4b17023SJohn Marino hash = HT_HASHSTEP (hash, *cur);
1145*e4b17023SJohn Marino cur++;
1146*e4b17023SJohn Marino }
1147*e4b17023SJohn Marino pfile->buffer->cur = cur;
1148*e4b17023SJohn Marino if (starts_ucn || forms_identifier_p (pfile, false, nst))
1149*e4b17023SJohn Marino {
1150*e4b17023SJohn Marino /* Slower version for identifiers containing UCNs (or $). */
1151*e4b17023SJohn Marino do {
1152*e4b17023SJohn Marino while (ISIDNUM (*pfile->buffer->cur))
1153*e4b17023SJohn Marino {
1154*e4b17023SJohn Marino pfile->buffer->cur++;
1155*e4b17023SJohn Marino NORMALIZE_STATE_UPDATE_IDNUM (nst);
1156*e4b17023SJohn Marino }
1157*e4b17023SJohn Marino } while (forms_identifier_p (pfile, false, nst));
1158*e4b17023SJohn Marino result = _cpp_interpret_identifier (pfile, base,
1159*e4b17023SJohn Marino pfile->buffer->cur - base);
1160*e4b17023SJohn Marino }
1161*e4b17023SJohn Marino else
1162*e4b17023SJohn Marino {
1163*e4b17023SJohn Marino len = cur - base;
1164*e4b17023SJohn Marino hash = HT_HASHFINISH (hash, len);
1165*e4b17023SJohn Marino
1166*e4b17023SJohn Marino result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1167*e4b17023SJohn Marino base, len, hash, HT_ALLOC));
1168*e4b17023SJohn Marino }
1169*e4b17023SJohn Marino
1170*e4b17023SJohn Marino /* Rarely, identifiers require diagnostics when lexed. */
1171*e4b17023SJohn Marino if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1172*e4b17023SJohn Marino && !pfile->state.skipping, 0))
1173*e4b17023SJohn Marino {
1174*e4b17023SJohn Marino /* It is allowed to poison the same identifier twice. */
1175*e4b17023SJohn Marino if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1176*e4b17023SJohn Marino cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1177*e4b17023SJohn Marino NODE_NAME (result));
1178*e4b17023SJohn Marino
1179*e4b17023SJohn Marino /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1180*e4b17023SJohn Marino replacement list of a variadic macro. */
1181*e4b17023SJohn Marino if (result == pfile->spec_nodes.n__VA_ARGS__
1182*e4b17023SJohn Marino && !pfile->state.va_args_ok)
1183*e4b17023SJohn Marino cpp_error (pfile, CPP_DL_PEDWARN,
1184*e4b17023SJohn Marino "__VA_ARGS__ can only appear in the expansion"
1185*e4b17023SJohn Marino " of a C99 variadic macro");
1186*e4b17023SJohn Marino
1187*e4b17023SJohn Marino /* For -Wc++-compat, warn about use of C++ named operators. */
1188*e4b17023SJohn Marino if (result->flags & NODE_WARN_OPERATOR)
1189*e4b17023SJohn Marino cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1190*e4b17023SJohn Marino "identifier \"%s\" is a special operator name in C++",
1191*e4b17023SJohn Marino NODE_NAME (result));
1192*e4b17023SJohn Marino }
1193*e4b17023SJohn Marino
1194*e4b17023SJohn Marino return result;
1195*e4b17023SJohn Marino }
1196*e4b17023SJohn Marino
1197*e4b17023SJohn Marino /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
1198*e4b17023SJohn Marino static void
lex_number(cpp_reader * pfile,cpp_string * number,struct normalize_state * nst)1199*e4b17023SJohn Marino lex_number (cpp_reader *pfile, cpp_string *number,
1200*e4b17023SJohn Marino struct normalize_state *nst)
1201*e4b17023SJohn Marino {
1202*e4b17023SJohn Marino const uchar *cur;
1203*e4b17023SJohn Marino const uchar *base;
1204*e4b17023SJohn Marino uchar *dest;
1205*e4b17023SJohn Marino
1206*e4b17023SJohn Marino base = pfile->buffer->cur - 1;
1207*e4b17023SJohn Marino do
1208*e4b17023SJohn Marino {
1209*e4b17023SJohn Marino cur = pfile->buffer->cur;
1210*e4b17023SJohn Marino
1211*e4b17023SJohn Marino /* N.B. ISIDNUM does not include $. */
1212*e4b17023SJohn Marino while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
1213*e4b17023SJohn Marino {
1214*e4b17023SJohn Marino cur++;
1215*e4b17023SJohn Marino NORMALIZE_STATE_UPDATE_IDNUM (nst);
1216*e4b17023SJohn Marino }
1217*e4b17023SJohn Marino
1218*e4b17023SJohn Marino pfile->buffer->cur = cur;
1219*e4b17023SJohn Marino }
1220*e4b17023SJohn Marino while (forms_identifier_p (pfile, false, nst));
1221*e4b17023SJohn Marino
1222*e4b17023SJohn Marino number->len = cur - base;
1223*e4b17023SJohn Marino dest = _cpp_unaligned_alloc (pfile, number->len + 1);
1224*e4b17023SJohn Marino memcpy (dest, base, number->len);
1225*e4b17023SJohn Marino dest[number->len] = '\0';
1226*e4b17023SJohn Marino number->text = dest;
1227*e4b17023SJohn Marino }
1228*e4b17023SJohn Marino
1229*e4b17023SJohn Marino /* Create a token of type TYPE with a literal spelling. */
1230*e4b17023SJohn Marino static void
create_literal(cpp_reader * pfile,cpp_token * token,const uchar * base,unsigned int len,enum cpp_ttype type)1231*e4b17023SJohn Marino create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
1232*e4b17023SJohn Marino unsigned int len, enum cpp_ttype type)
1233*e4b17023SJohn Marino {
1234*e4b17023SJohn Marino uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
1235*e4b17023SJohn Marino
1236*e4b17023SJohn Marino memcpy (dest, base, len);
1237*e4b17023SJohn Marino dest[len] = '\0';
1238*e4b17023SJohn Marino token->type = type;
1239*e4b17023SJohn Marino token->val.str.len = len;
1240*e4b17023SJohn Marino token->val.str.text = dest;
1241*e4b17023SJohn Marino }
1242*e4b17023SJohn Marino
1243*e4b17023SJohn Marino /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1244*e4b17023SJohn Marino sequence from *FIRST_BUFF_P to LAST_BUFF_P. */
1245*e4b17023SJohn Marino
1246*e4b17023SJohn Marino static void
bufring_append(cpp_reader * pfile,const uchar * base,size_t len,_cpp_buff ** first_buff_p,_cpp_buff ** last_buff_p)1247*e4b17023SJohn Marino bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
1248*e4b17023SJohn Marino _cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
1249*e4b17023SJohn Marino {
1250*e4b17023SJohn Marino _cpp_buff *first_buff = *first_buff_p;
1251*e4b17023SJohn Marino _cpp_buff *last_buff = *last_buff_p;
1252*e4b17023SJohn Marino
1253*e4b17023SJohn Marino if (first_buff == NULL)
1254*e4b17023SJohn Marino first_buff = last_buff = _cpp_get_buff (pfile, len);
1255*e4b17023SJohn Marino else if (len > BUFF_ROOM (last_buff))
1256*e4b17023SJohn Marino {
1257*e4b17023SJohn Marino size_t room = BUFF_ROOM (last_buff);
1258*e4b17023SJohn Marino memcpy (BUFF_FRONT (last_buff), base, room);
1259*e4b17023SJohn Marino BUFF_FRONT (last_buff) += room;
1260*e4b17023SJohn Marino base += room;
1261*e4b17023SJohn Marino len -= room;
1262*e4b17023SJohn Marino last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
1263*e4b17023SJohn Marino }
1264*e4b17023SJohn Marino
1265*e4b17023SJohn Marino memcpy (BUFF_FRONT (last_buff), base, len);
1266*e4b17023SJohn Marino BUFF_FRONT (last_buff) += len;
1267*e4b17023SJohn Marino
1268*e4b17023SJohn Marino *first_buff_p = first_buff;
1269*e4b17023SJohn Marino *last_buff_p = last_buff;
1270*e4b17023SJohn Marino }
1271*e4b17023SJohn Marino
1272*e4b17023SJohn Marino /* Lexes a raw string. The stored string contains the spelling, including
1273*e4b17023SJohn Marino double quotes, delimiter string, '(' and ')', any leading
1274*e4b17023SJohn Marino 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the
1275*e4b17023SJohn Marino literal, or CPP_OTHER if it was not properly terminated.
1276*e4b17023SJohn Marino
1277*e4b17023SJohn Marino The spelling is NUL-terminated, but it is not guaranteed that this
1278*e4b17023SJohn Marino is the first NUL since embedded NULs are preserved. */
1279*e4b17023SJohn Marino
1280*e4b17023SJohn Marino static void
lex_raw_string(cpp_reader * pfile,cpp_token * token,const uchar * base,const uchar * cur)1281*e4b17023SJohn Marino lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
1282*e4b17023SJohn Marino const uchar *cur)
1283*e4b17023SJohn Marino {
1284*e4b17023SJohn Marino const uchar *raw_prefix;
1285*e4b17023SJohn Marino unsigned int raw_prefix_len = 0;
1286*e4b17023SJohn Marino enum cpp_ttype type;
1287*e4b17023SJohn Marino size_t total_len = 0;
1288*e4b17023SJohn Marino _cpp_buff *first_buff = NULL, *last_buff = NULL;
1289*e4b17023SJohn Marino _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
1290*e4b17023SJohn Marino
1291*e4b17023SJohn Marino type = (*base == 'L' ? CPP_WSTRING :
1292*e4b17023SJohn Marino *base == 'U' ? CPP_STRING32 :
1293*e4b17023SJohn Marino *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1294*e4b17023SJohn Marino : CPP_STRING);
1295*e4b17023SJohn Marino
1296*e4b17023SJohn Marino raw_prefix = cur + 1;
1297*e4b17023SJohn Marino while (raw_prefix_len < 16)
1298*e4b17023SJohn Marino {
1299*e4b17023SJohn Marino switch (raw_prefix[raw_prefix_len])
1300*e4b17023SJohn Marino {
1301*e4b17023SJohn Marino case ' ': case '(': case ')': case '\\': case '\t':
1302*e4b17023SJohn Marino case '\v': case '\f': case '\n': default:
1303*e4b17023SJohn Marino break;
1304*e4b17023SJohn Marino /* Basic source charset except the above chars. */
1305*e4b17023SJohn Marino case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1306*e4b17023SJohn Marino case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1307*e4b17023SJohn Marino case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1308*e4b17023SJohn Marino case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1309*e4b17023SJohn Marino case 'y': case 'z':
1310*e4b17023SJohn Marino case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1311*e4b17023SJohn Marino case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1312*e4b17023SJohn Marino case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1313*e4b17023SJohn Marino case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1314*e4b17023SJohn Marino case 'Y': case 'Z':
1315*e4b17023SJohn Marino case '0': case '1': case '2': case '3': case '4': case '5':
1316*e4b17023SJohn Marino case '6': case '7': case '8': case '9':
1317*e4b17023SJohn Marino case '_': case '{': case '}': case '#': case '[': case ']':
1318*e4b17023SJohn Marino case '<': case '>': case '%': case ':': case ';': case '.':
1319*e4b17023SJohn Marino case '?': case '*': case '+': case '-': case '/': case '^':
1320*e4b17023SJohn Marino case '&': case '|': case '~': case '!': case '=': case ',':
1321*e4b17023SJohn Marino case '"': case '\'':
1322*e4b17023SJohn Marino raw_prefix_len++;
1323*e4b17023SJohn Marino continue;
1324*e4b17023SJohn Marino }
1325*e4b17023SJohn Marino break;
1326*e4b17023SJohn Marino }
1327*e4b17023SJohn Marino
1328*e4b17023SJohn Marino if (raw_prefix[raw_prefix_len] != '(')
1329*e4b17023SJohn Marino {
1330*e4b17023SJohn Marino int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
1331*e4b17023SJohn Marino + 1;
1332*e4b17023SJohn Marino if (raw_prefix_len == 16)
1333*e4b17023SJohn Marino cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
1334*e4b17023SJohn Marino "raw string delimiter longer than 16 characters");
1335*e4b17023SJohn Marino else
1336*e4b17023SJohn Marino cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
1337*e4b17023SJohn Marino "invalid character '%c' in raw string delimiter",
1338*e4b17023SJohn Marino (int) raw_prefix[raw_prefix_len]);
1339*e4b17023SJohn Marino pfile->buffer->cur = raw_prefix - 1;
1340*e4b17023SJohn Marino create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
1341*e4b17023SJohn Marino return;
1342*e4b17023SJohn Marino }
1343*e4b17023SJohn Marino
1344*e4b17023SJohn Marino cur = raw_prefix + raw_prefix_len + 1;
1345*e4b17023SJohn Marino for (;;)
1346*e4b17023SJohn Marino {
1347*e4b17023SJohn Marino #define BUF_APPEND(STR,LEN) \
1348*e4b17023SJohn Marino do { \
1349*e4b17023SJohn Marino bufring_append (pfile, (const uchar *)(STR), (LEN), \
1350*e4b17023SJohn Marino &first_buff, &last_buff); \
1351*e4b17023SJohn Marino total_len += (LEN); \
1352*e4b17023SJohn Marino } while (0);
1353*e4b17023SJohn Marino
1354*e4b17023SJohn Marino cppchar_t c;
1355*e4b17023SJohn Marino
1356*e4b17023SJohn Marino /* If we previously performed any trigraph or line splicing
1357*e4b17023SJohn Marino transformations, undo them within the body of the raw string. */
1358*e4b17023SJohn Marino while (note->pos < cur)
1359*e4b17023SJohn Marino ++note;
1360*e4b17023SJohn Marino for (; note->pos == cur; ++note)
1361*e4b17023SJohn Marino {
1362*e4b17023SJohn Marino switch (note->type)
1363*e4b17023SJohn Marino {
1364*e4b17023SJohn Marino case '\\':
1365*e4b17023SJohn Marino case ' ':
1366*e4b17023SJohn Marino /* Restore backslash followed by newline. */
1367*e4b17023SJohn Marino BUF_APPEND (base, cur - base);
1368*e4b17023SJohn Marino base = cur;
1369*e4b17023SJohn Marino BUF_APPEND ("\\", 1);
1370*e4b17023SJohn Marino after_backslash:
1371*e4b17023SJohn Marino if (note->type == ' ')
1372*e4b17023SJohn Marino {
1373*e4b17023SJohn Marino /* GNU backslash whitespace newline extension. FIXME
1374*e4b17023SJohn Marino could be any sequence of non-vertical space. When we
1375*e4b17023SJohn Marino can properly restore any such sequence, we should mark
1376*e4b17023SJohn Marino this note as handled so _cpp_process_line_notes
1377*e4b17023SJohn Marino doesn't warn. */
1378*e4b17023SJohn Marino BUF_APPEND (" ", 1);
1379*e4b17023SJohn Marino }
1380*e4b17023SJohn Marino
1381*e4b17023SJohn Marino BUF_APPEND ("\n", 1);
1382*e4b17023SJohn Marino break;
1383*e4b17023SJohn Marino
1384*e4b17023SJohn Marino case 0:
1385*e4b17023SJohn Marino /* Already handled. */
1386*e4b17023SJohn Marino break;
1387*e4b17023SJohn Marino
1388*e4b17023SJohn Marino default:
1389*e4b17023SJohn Marino if (_cpp_trigraph_map[note->type])
1390*e4b17023SJohn Marino {
1391*e4b17023SJohn Marino /* Don't warn about this trigraph in
1392*e4b17023SJohn Marino _cpp_process_line_notes, since trigraphs show up as
1393*e4b17023SJohn Marino trigraphs in raw strings. */
1394*e4b17023SJohn Marino uchar type = note->type;
1395*e4b17023SJohn Marino note->type = 0;
1396*e4b17023SJohn Marino
1397*e4b17023SJohn Marino if (!CPP_OPTION (pfile, trigraphs))
1398*e4b17023SJohn Marino /* If we didn't convert the trigraph in the first
1399*e4b17023SJohn Marino place, don't do anything now either. */
1400*e4b17023SJohn Marino break;
1401*e4b17023SJohn Marino
1402*e4b17023SJohn Marino BUF_APPEND (base, cur - base);
1403*e4b17023SJohn Marino base = cur;
1404*e4b17023SJohn Marino BUF_APPEND ("??", 2);
1405*e4b17023SJohn Marino
1406*e4b17023SJohn Marino /* ??/ followed by newline gets two line notes, one for
1407*e4b17023SJohn Marino the trigraph and one for the backslash/newline. */
1408*e4b17023SJohn Marino if (type == '/' && note[1].pos == cur)
1409*e4b17023SJohn Marino {
1410*e4b17023SJohn Marino if (note[1].type != '\\'
1411*e4b17023SJohn Marino && note[1].type != ' ')
1412*e4b17023SJohn Marino abort ();
1413*e4b17023SJohn Marino BUF_APPEND ("/", 1);
1414*e4b17023SJohn Marino ++note;
1415*e4b17023SJohn Marino goto after_backslash;
1416*e4b17023SJohn Marino }
1417*e4b17023SJohn Marino /* The ) from ??) could be part of the suffix. */
1418*e4b17023SJohn Marino else if (type == ')'
1419*e4b17023SJohn Marino && strncmp ((const char *) cur+1,
1420*e4b17023SJohn Marino (const char *) raw_prefix,
1421*e4b17023SJohn Marino raw_prefix_len) == 0
1422*e4b17023SJohn Marino && cur[raw_prefix_len+1] == '"')
1423*e4b17023SJohn Marino {
1424*e4b17023SJohn Marino BUF_APPEND (")", 1);
1425*e4b17023SJohn Marino base++;
1426*e4b17023SJohn Marino cur += raw_prefix_len + 2;
1427*e4b17023SJohn Marino goto break_outer_loop;
1428*e4b17023SJohn Marino }
1429*e4b17023SJohn Marino else
1430*e4b17023SJohn Marino {
1431*e4b17023SJohn Marino /* Skip the replacement character. */
1432*e4b17023SJohn Marino base = ++cur;
1433*e4b17023SJohn Marino BUF_APPEND (&type, 1);
1434*e4b17023SJohn Marino }
1435*e4b17023SJohn Marino }
1436*e4b17023SJohn Marino else
1437*e4b17023SJohn Marino abort ();
1438*e4b17023SJohn Marino break;
1439*e4b17023SJohn Marino }
1440*e4b17023SJohn Marino }
1441*e4b17023SJohn Marino c = *cur++;
1442*e4b17023SJohn Marino
1443*e4b17023SJohn Marino if (c == ')'
1444*e4b17023SJohn Marino && strncmp ((const char *) cur, (const char *) raw_prefix,
1445*e4b17023SJohn Marino raw_prefix_len) == 0
1446*e4b17023SJohn Marino && cur[raw_prefix_len] == '"')
1447*e4b17023SJohn Marino {
1448*e4b17023SJohn Marino cur += raw_prefix_len + 1;
1449*e4b17023SJohn Marino break;
1450*e4b17023SJohn Marino }
1451*e4b17023SJohn Marino else if (c == '\n')
1452*e4b17023SJohn Marino {
1453*e4b17023SJohn Marino if (pfile->state.in_directive
1454*e4b17023SJohn Marino || pfile->state.parsing_args
1455*e4b17023SJohn Marino || pfile->state.in_deferred_pragma)
1456*e4b17023SJohn Marino {
1457*e4b17023SJohn Marino cur--;
1458*e4b17023SJohn Marino type = CPP_OTHER;
1459*e4b17023SJohn Marino cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
1460*e4b17023SJohn Marino "unterminated raw string");
1461*e4b17023SJohn Marino break;
1462*e4b17023SJohn Marino }
1463*e4b17023SJohn Marino
1464*e4b17023SJohn Marino BUF_APPEND (base, cur - base);
1465*e4b17023SJohn Marino
1466*e4b17023SJohn Marino if (pfile->buffer->cur < pfile->buffer->rlimit)
1467*e4b17023SJohn Marino CPP_INCREMENT_LINE (pfile, 0);
1468*e4b17023SJohn Marino pfile->buffer->need_line = true;
1469*e4b17023SJohn Marino
1470*e4b17023SJohn Marino pfile->buffer->cur = cur-1;
1471*e4b17023SJohn Marino _cpp_process_line_notes (pfile, false);
1472*e4b17023SJohn Marino if (!_cpp_get_fresh_line (pfile))
1473*e4b17023SJohn Marino {
1474*e4b17023SJohn Marino source_location src_loc = token->src_loc;
1475*e4b17023SJohn Marino token->type = CPP_EOF;
1476*e4b17023SJohn Marino /* Tell the compiler the line number of the EOF token. */
1477*e4b17023SJohn Marino token->src_loc = pfile->line_table->highest_line;
1478*e4b17023SJohn Marino token->flags = BOL;
1479*e4b17023SJohn Marino if (first_buff != NULL)
1480*e4b17023SJohn Marino _cpp_release_buff (pfile, first_buff);
1481*e4b17023SJohn Marino cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
1482*e4b17023SJohn Marino "unterminated raw string");
1483*e4b17023SJohn Marino return;
1484*e4b17023SJohn Marino }
1485*e4b17023SJohn Marino
1486*e4b17023SJohn Marino cur = base = pfile->buffer->cur;
1487*e4b17023SJohn Marino note = &pfile->buffer->notes[pfile->buffer->cur_note];
1488*e4b17023SJohn Marino }
1489*e4b17023SJohn Marino }
1490*e4b17023SJohn Marino break_outer_loop:
1491*e4b17023SJohn Marino
1492*e4b17023SJohn Marino if (CPP_OPTION (pfile, user_literals))
1493*e4b17023SJohn Marino {
1494*e4b17023SJohn Marino /* Grab user defined literal suffix. */
1495*e4b17023SJohn Marino if (ISIDST (*cur))
1496*e4b17023SJohn Marino {
1497*e4b17023SJohn Marino type = cpp_userdef_string_add_type (type);
1498*e4b17023SJohn Marino ++cur;
1499*e4b17023SJohn Marino }
1500*e4b17023SJohn Marino while (ISIDNUM (*cur))
1501*e4b17023SJohn Marino ++cur;
1502*e4b17023SJohn Marino }
1503*e4b17023SJohn Marino
1504*e4b17023SJohn Marino pfile->buffer->cur = cur;
1505*e4b17023SJohn Marino if (first_buff == NULL)
1506*e4b17023SJohn Marino create_literal (pfile, token, base, cur - base, type);
1507*e4b17023SJohn Marino else
1508*e4b17023SJohn Marino {
1509*e4b17023SJohn Marino uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
1510*e4b17023SJohn Marino
1511*e4b17023SJohn Marino token->type = type;
1512*e4b17023SJohn Marino token->val.str.len = total_len + (cur - base);
1513*e4b17023SJohn Marino token->val.str.text = dest;
1514*e4b17023SJohn Marino last_buff = first_buff;
1515*e4b17023SJohn Marino while (last_buff != NULL)
1516*e4b17023SJohn Marino {
1517*e4b17023SJohn Marino memcpy (dest, last_buff->base,
1518*e4b17023SJohn Marino BUFF_FRONT (last_buff) - last_buff->base);
1519*e4b17023SJohn Marino dest += BUFF_FRONT (last_buff) - last_buff->base;
1520*e4b17023SJohn Marino last_buff = last_buff->next;
1521*e4b17023SJohn Marino }
1522*e4b17023SJohn Marino _cpp_release_buff (pfile, first_buff);
1523*e4b17023SJohn Marino memcpy (dest, base, cur - base);
1524*e4b17023SJohn Marino dest[cur - base] = '\0';
1525*e4b17023SJohn Marino }
1526*e4b17023SJohn Marino }
1527*e4b17023SJohn Marino
1528*e4b17023SJohn Marino /* Lexes a string, character constant, or angle-bracketed header file
1529*e4b17023SJohn Marino name. The stored string contains the spelling, including opening
1530*e4b17023SJohn Marino quote and any leading 'L', 'u', 'U' or 'u8' and optional
1531*e4b17023SJohn Marino 'R' modifier. It returns the type of the literal, or CPP_OTHER
1532*e4b17023SJohn Marino if it was not properly terminated, or CPP_LESS for an unterminated
1533*e4b17023SJohn Marino header name which must be relexed as normal tokens.
1534*e4b17023SJohn Marino
1535*e4b17023SJohn Marino The spelling is NUL-terminated, but it is not guaranteed that this
1536*e4b17023SJohn Marino is the first NUL since embedded NULs are preserved. */
1537*e4b17023SJohn Marino static void
lex_string(cpp_reader * pfile,cpp_token * token,const uchar * base)1538*e4b17023SJohn Marino lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
1539*e4b17023SJohn Marino {
1540*e4b17023SJohn Marino bool saw_NUL = false;
1541*e4b17023SJohn Marino const uchar *cur;
1542*e4b17023SJohn Marino cppchar_t terminator;
1543*e4b17023SJohn Marino enum cpp_ttype type;
1544*e4b17023SJohn Marino
1545*e4b17023SJohn Marino cur = base;
1546*e4b17023SJohn Marino terminator = *cur++;
1547*e4b17023SJohn Marino if (terminator == 'L' || terminator == 'U')
1548*e4b17023SJohn Marino terminator = *cur++;
1549*e4b17023SJohn Marino else if (terminator == 'u')
1550*e4b17023SJohn Marino {
1551*e4b17023SJohn Marino terminator = *cur++;
1552*e4b17023SJohn Marino if (terminator == '8')
1553*e4b17023SJohn Marino terminator = *cur++;
1554*e4b17023SJohn Marino }
1555*e4b17023SJohn Marino if (terminator == 'R')
1556*e4b17023SJohn Marino {
1557*e4b17023SJohn Marino lex_raw_string (pfile, token, base, cur);
1558*e4b17023SJohn Marino return;
1559*e4b17023SJohn Marino }
1560*e4b17023SJohn Marino if (terminator == '"')
1561*e4b17023SJohn Marino type = (*base == 'L' ? CPP_WSTRING :
1562*e4b17023SJohn Marino *base == 'U' ? CPP_STRING32 :
1563*e4b17023SJohn Marino *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1564*e4b17023SJohn Marino : CPP_STRING);
1565*e4b17023SJohn Marino else if (terminator == '\'')
1566*e4b17023SJohn Marino type = (*base == 'L' ? CPP_WCHAR :
1567*e4b17023SJohn Marino *base == 'U' ? CPP_CHAR32 :
1568*e4b17023SJohn Marino *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
1569*e4b17023SJohn Marino else
1570*e4b17023SJohn Marino terminator = '>', type = CPP_HEADER_NAME;
1571*e4b17023SJohn Marino
1572*e4b17023SJohn Marino for (;;)
1573*e4b17023SJohn Marino {
1574*e4b17023SJohn Marino cppchar_t c = *cur++;
1575*e4b17023SJohn Marino
1576*e4b17023SJohn Marino /* In #include-style directives, terminators are not escapable. */
1577*e4b17023SJohn Marino if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1578*e4b17023SJohn Marino cur++;
1579*e4b17023SJohn Marino else if (c == terminator)
1580*e4b17023SJohn Marino break;
1581*e4b17023SJohn Marino else if (c == '\n')
1582*e4b17023SJohn Marino {
1583*e4b17023SJohn Marino cur--;
1584*e4b17023SJohn Marino /* Unmatched quotes always yield undefined behavior, but
1585*e4b17023SJohn Marino greedy lexing means that what appears to be an unterminated
1586*e4b17023SJohn Marino header name may actually be a legitimate sequence of tokens. */
1587*e4b17023SJohn Marino if (terminator == '>')
1588*e4b17023SJohn Marino {
1589*e4b17023SJohn Marino token->type = CPP_LESS;
1590*e4b17023SJohn Marino return;
1591*e4b17023SJohn Marino }
1592*e4b17023SJohn Marino type = CPP_OTHER;
1593*e4b17023SJohn Marino break;
1594*e4b17023SJohn Marino }
1595*e4b17023SJohn Marino else if (c == '\0')
1596*e4b17023SJohn Marino saw_NUL = true;
1597*e4b17023SJohn Marino }
1598*e4b17023SJohn Marino
1599*e4b17023SJohn Marino if (saw_NUL && !pfile->state.skipping)
1600*e4b17023SJohn Marino cpp_error (pfile, CPP_DL_WARNING,
1601*e4b17023SJohn Marino "null character(s) preserved in literal");
1602*e4b17023SJohn Marino
1603*e4b17023SJohn Marino if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1604*e4b17023SJohn Marino cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1605*e4b17023SJohn Marino (int) terminator);
1606*e4b17023SJohn Marino
1607*e4b17023SJohn Marino if (CPP_OPTION (pfile, user_literals))
1608*e4b17023SJohn Marino {
1609*e4b17023SJohn Marino /* Grab user defined literal suffix. */
1610*e4b17023SJohn Marino if (ISIDST (*cur))
1611*e4b17023SJohn Marino {
1612*e4b17023SJohn Marino type = cpp_userdef_char_add_type (type);
1613*e4b17023SJohn Marino type = cpp_userdef_string_add_type (type);
1614*e4b17023SJohn Marino ++cur;
1615*e4b17023SJohn Marino }
1616*e4b17023SJohn Marino while (ISIDNUM (*cur))
1617*e4b17023SJohn Marino ++cur;
1618*e4b17023SJohn Marino }
1619*e4b17023SJohn Marino
1620*e4b17023SJohn Marino pfile->buffer->cur = cur;
1621*e4b17023SJohn Marino create_literal (pfile, token, base, cur - base, type);
1622*e4b17023SJohn Marino }
1623*e4b17023SJohn Marino
1624*e4b17023SJohn Marino /* Return the comment table. The client may not make any assumption
1625*e4b17023SJohn Marino about the ordering of the table. */
1626*e4b17023SJohn Marino cpp_comment_table *
cpp_get_comments(cpp_reader * pfile)1627*e4b17023SJohn Marino cpp_get_comments (cpp_reader *pfile)
1628*e4b17023SJohn Marino {
1629*e4b17023SJohn Marino return &pfile->comments;
1630*e4b17023SJohn Marino }
1631*e4b17023SJohn Marino
1632*e4b17023SJohn Marino /* Append a comment to the end of the comment table. */
1633*e4b17023SJohn Marino static void
store_comment(cpp_reader * pfile,cpp_token * token)1634*e4b17023SJohn Marino store_comment (cpp_reader *pfile, cpp_token *token)
1635*e4b17023SJohn Marino {
1636*e4b17023SJohn Marino int len;
1637*e4b17023SJohn Marino
1638*e4b17023SJohn Marino if (pfile->comments.allocated == 0)
1639*e4b17023SJohn Marino {
1640*e4b17023SJohn Marino pfile->comments.allocated = 256;
1641*e4b17023SJohn Marino pfile->comments.entries = (cpp_comment *) xmalloc
1642*e4b17023SJohn Marino (pfile->comments.allocated * sizeof (cpp_comment));
1643*e4b17023SJohn Marino }
1644*e4b17023SJohn Marino
1645*e4b17023SJohn Marino if (pfile->comments.count == pfile->comments.allocated)
1646*e4b17023SJohn Marino {
1647*e4b17023SJohn Marino pfile->comments.allocated *= 2;
1648*e4b17023SJohn Marino pfile->comments.entries = (cpp_comment *) xrealloc
1649*e4b17023SJohn Marino (pfile->comments.entries,
1650*e4b17023SJohn Marino pfile->comments.allocated * sizeof (cpp_comment));
1651*e4b17023SJohn Marino }
1652*e4b17023SJohn Marino
1653*e4b17023SJohn Marino len = token->val.str.len;
1654*e4b17023SJohn Marino
1655*e4b17023SJohn Marino /* Copy comment. Note, token may not be NULL terminated. */
1656*e4b17023SJohn Marino pfile->comments.entries[pfile->comments.count].comment =
1657*e4b17023SJohn Marino (char *) xmalloc (sizeof (char) * (len + 1));
1658*e4b17023SJohn Marino memcpy (pfile->comments.entries[pfile->comments.count].comment,
1659*e4b17023SJohn Marino token->val.str.text, len);
1660*e4b17023SJohn Marino pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1661*e4b17023SJohn Marino
1662*e4b17023SJohn Marino /* Set source location. */
1663*e4b17023SJohn Marino pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1664*e4b17023SJohn Marino
1665*e4b17023SJohn Marino /* Increment the count of entries in the comment table. */
1666*e4b17023SJohn Marino pfile->comments.count++;
1667*e4b17023SJohn Marino }
1668*e4b17023SJohn Marino
1669*e4b17023SJohn Marino /* The stored comment includes the comment start and any terminator. */
1670*e4b17023SJohn Marino static void
save_comment(cpp_reader * pfile,cpp_token * token,const unsigned char * from,cppchar_t type)1671*e4b17023SJohn Marino save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1672*e4b17023SJohn Marino cppchar_t type)
1673*e4b17023SJohn Marino {
1674*e4b17023SJohn Marino unsigned char *buffer;
1675*e4b17023SJohn Marino unsigned int len, clen, i;
1676*e4b17023SJohn Marino
1677*e4b17023SJohn Marino len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
1678*e4b17023SJohn Marino
1679*e4b17023SJohn Marino /* C++ comments probably (not definitely) have moved past a new
1680*e4b17023SJohn Marino line, which we don't want to save in the comment. */
1681*e4b17023SJohn Marino if (is_vspace (pfile->buffer->cur[-1]))
1682*e4b17023SJohn Marino len--;
1683*e4b17023SJohn Marino
1684*e4b17023SJohn Marino /* If we are currently in a directive or in argument parsing, then
1685*e4b17023SJohn Marino we need to store all C++ comments as C comments internally, and
1686*e4b17023SJohn Marino so we need to allocate a little extra space in that case.
1687*e4b17023SJohn Marino
1688*e4b17023SJohn Marino Note that the only time we encounter a directive here is
1689*e4b17023SJohn Marino when we are saving comments in a "#define". */
1690*e4b17023SJohn Marino clen = ((pfile->state.in_directive || pfile->state.parsing_args)
1691*e4b17023SJohn Marino && type == '/') ? len + 2 : len;
1692*e4b17023SJohn Marino
1693*e4b17023SJohn Marino buffer = _cpp_unaligned_alloc (pfile, clen);
1694*e4b17023SJohn Marino
1695*e4b17023SJohn Marino token->type = CPP_COMMENT;
1696*e4b17023SJohn Marino token->val.str.len = clen;
1697*e4b17023SJohn Marino token->val.str.text = buffer;
1698*e4b17023SJohn Marino
1699*e4b17023SJohn Marino buffer[0] = '/';
1700*e4b17023SJohn Marino memcpy (buffer + 1, from, len - 1);
1701*e4b17023SJohn Marino
1702*e4b17023SJohn Marino /* Finish conversion to a C comment, if necessary. */
1703*e4b17023SJohn Marino if ((pfile->state.in_directive || pfile->state.parsing_args) && type == '/')
1704*e4b17023SJohn Marino {
1705*e4b17023SJohn Marino buffer[1] = '*';
1706*e4b17023SJohn Marino buffer[clen - 2] = '*';
1707*e4b17023SJohn Marino buffer[clen - 1] = '/';
1708*e4b17023SJohn Marino /* As there can be in a C++ comments illegal sequences for C comments
1709*e4b17023SJohn Marino we need to filter them out. */
1710*e4b17023SJohn Marino for (i = 2; i < (clen - 2); i++)
1711*e4b17023SJohn Marino if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
1712*e4b17023SJohn Marino buffer[i] = '|';
1713*e4b17023SJohn Marino }
1714*e4b17023SJohn Marino
1715*e4b17023SJohn Marino /* Finally store this comment for use by clients of libcpp. */
1716*e4b17023SJohn Marino store_comment (pfile, token);
1717*e4b17023SJohn Marino }
1718*e4b17023SJohn Marino
1719*e4b17023SJohn Marino /* Allocate COUNT tokens for RUN. */
1720*e4b17023SJohn Marino void
_cpp_init_tokenrun(tokenrun * run,unsigned int count)1721*e4b17023SJohn Marino _cpp_init_tokenrun (tokenrun *run, unsigned int count)
1722*e4b17023SJohn Marino {
1723*e4b17023SJohn Marino run->base = XNEWVEC (cpp_token, count);
1724*e4b17023SJohn Marino run->limit = run->base + count;
1725*e4b17023SJohn Marino run->next = NULL;
1726*e4b17023SJohn Marino }
1727*e4b17023SJohn Marino
1728*e4b17023SJohn Marino /* Returns the next tokenrun, or creates one if there is none. */
1729*e4b17023SJohn Marino static tokenrun *
next_tokenrun(tokenrun * run)1730*e4b17023SJohn Marino next_tokenrun (tokenrun *run)
1731*e4b17023SJohn Marino {
1732*e4b17023SJohn Marino if (run->next == NULL)
1733*e4b17023SJohn Marino {
1734*e4b17023SJohn Marino run->next = XNEW (tokenrun);
1735*e4b17023SJohn Marino run->next->prev = run;
1736*e4b17023SJohn Marino _cpp_init_tokenrun (run->next, 250);
1737*e4b17023SJohn Marino }
1738*e4b17023SJohn Marino
1739*e4b17023SJohn Marino return run->next;
1740*e4b17023SJohn Marino }
1741*e4b17023SJohn Marino
1742*e4b17023SJohn Marino /* Return the number of not yet processed token in a given
1743*e4b17023SJohn Marino context. */
1744*e4b17023SJohn Marino int
_cpp_remaining_tokens_num_in_context(cpp_context * context)1745*e4b17023SJohn Marino _cpp_remaining_tokens_num_in_context (cpp_context *context)
1746*e4b17023SJohn Marino {
1747*e4b17023SJohn Marino if (context->tokens_kind == TOKENS_KIND_DIRECT)
1748*e4b17023SJohn Marino return (LAST (context).token - FIRST (context).token);
1749*e4b17023SJohn Marino else if (context->tokens_kind == TOKENS_KIND_INDIRECT
1750*e4b17023SJohn Marino || context->tokens_kind == TOKENS_KIND_EXTENDED)
1751*e4b17023SJohn Marino return (LAST (context).ptoken - FIRST (context).ptoken);
1752*e4b17023SJohn Marino else
1753*e4b17023SJohn Marino abort ();
1754*e4b17023SJohn Marino }
1755*e4b17023SJohn Marino
1756*e4b17023SJohn Marino /* Returns the token present at index INDEX in a given context. If
1757*e4b17023SJohn Marino INDEX is zero, the next token to be processed is returned. */
1758*e4b17023SJohn Marino static const cpp_token*
_cpp_token_from_context_at(cpp_context * context,int index)1759*e4b17023SJohn Marino _cpp_token_from_context_at (cpp_context *context, int index)
1760*e4b17023SJohn Marino {
1761*e4b17023SJohn Marino if (context->tokens_kind == TOKENS_KIND_DIRECT)
1762*e4b17023SJohn Marino return &(FIRST (context).token[index]);
1763*e4b17023SJohn Marino else if (context->tokens_kind == TOKENS_KIND_INDIRECT
1764*e4b17023SJohn Marino || context->tokens_kind == TOKENS_KIND_EXTENDED)
1765*e4b17023SJohn Marino return FIRST (context).ptoken[index];
1766*e4b17023SJohn Marino else
1767*e4b17023SJohn Marino abort ();
1768*e4b17023SJohn Marino }
1769*e4b17023SJohn Marino
1770*e4b17023SJohn Marino /* Look ahead in the input stream. */
1771*e4b17023SJohn Marino const cpp_token *
cpp_peek_token(cpp_reader * pfile,int index)1772*e4b17023SJohn Marino cpp_peek_token (cpp_reader *pfile, int index)
1773*e4b17023SJohn Marino {
1774*e4b17023SJohn Marino cpp_context *context = pfile->context;
1775*e4b17023SJohn Marino const cpp_token *peektok;
1776*e4b17023SJohn Marino int count;
1777*e4b17023SJohn Marino
1778*e4b17023SJohn Marino /* First, scan through any pending cpp_context objects. */
1779*e4b17023SJohn Marino while (context->prev)
1780*e4b17023SJohn Marino {
1781*e4b17023SJohn Marino ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
1782*e4b17023SJohn Marino
1783*e4b17023SJohn Marino if (index < (int) sz)
1784*e4b17023SJohn Marino return _cpp_token_from_context_at (context, index);
1785*e4b17023SJohn Marino index -= (int) sz;
1786*e4b17023SJohn Marino context = context->prev;
1787*e4b17023SJohn Marino }
1788*e4b17023SJohn Marino
1789*e4b17023SJohn Marino /* We will have to read some new tokens after all (and do so
1790*e4b17023SJohn Marino without invalidating preceding tokens). */
1791*e4b17023SJohn Marino count = index;
1792*e4b17023SJohn Marino pfile->keep_tokens++;
1793*e4b17023SJohn Marino
1794*e4b17023SJohn Marino do
1795*e4b17023SJohn Marino {
1796*e4b17023SJohn Marino peektok = _cpp_lex_token (pfile);
1797*e4b17023SJohn Marino if (peektok->type == CPP_EOF)
1798*e4b17023SJohn Marino return peektok;
1799*e4b17023SJohn Marino }
1800*e4b17023SJohn Marino while (index--);
1801*e4b17023SJohn Marino
1802*e4b17023SJohn Marino _cpp_backup_tokens_direct (pfile, count + 1);
1803*e4b17023SJohn Marino pfile->keep_tokens--;
1804*e4b17023SJohn Marino
1805*e4b17023SJohn Marino return peektok;
1806*e4b17023SJohn Marino }
1807*e4b17023SJohn Marino
1808*e4b17023SJohn Marino /* Allocate a single token that is invalidated at the same time as the
1809*e4b17023SJohn Marino rest of the tokens on the line. Has its line and col set to the
1810*e4b17023SJohn Marino same as the last lexed token, so that diagnostics appear in the
1811*e4b17023SJohn Marino right place. */
1812*e4b17023SJohn Marino cpp_token *
_cpp_temp_token(cpp_reader * pfile)1813*e4b17023SJohn Marino _cpp_temp_token (cpp_reader *pfile)
1814*e4b17023SJohn Marino {
1815*e4b17023SJohn Marino cpp_token *old, *result;
1816*e4b17023SJohn Marino ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
1817*e4b17023SJohn Marino ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
1818*e4b17023SJohn Marino
1819*e4b17023SJohn Marino old = pfile->cur_token - 1;
1820*e4b17023SJohn Marino /* Any pre-existing lookaheads must not be clobbered. */
1821*e4b17023SJohn Marino if (la)
1822*e4b17023SJohn Marino {
1823*e4b17023SJohn Marino if (sz <= la)
1824*e4b17023SJohn Marino {
1825*e4b17023SJohn Marino tokenrun *next = next_tokenrun (pfile->cur_run);
1826*e4b17023SJohn Marino
1827*e4b17023SJohn Marino if (sz < la)
1828*e4b17023SJohn Marino memmove (next->base + 1, next->base,
1829*e4b17023SJohn Marino (la - sz) * sizeof (cpp_token));
1830*e4b17023SJohn Marino
1831*e4b17023SJohn Marino next->base[0] = pfile->cur_run->limit[-1];
1832*e4b17023SJohn Marino }
1833*e4b17023SJohn Marino
1834*e4b17023SJohn Marino if (sz > 1)
1835*e4b17023SJohn Marino memmove (pfile->cur_token + 1, pfile->cur_token,
1836*e4b17023SJohn Marino MIN (la, sz - 1) * sizeof (cpp_token));
1837*e4b17023SJohn Marino }
1838*e4b17023SJohn Marino
1839*e4b17023SJohn Marino if (!sz && pfile->cur_token == pfile->cur_run->limit)
1840*e4b17023SJohn Marino {
1841*e4b17023SJohn Marino pfile->cur_run = next_tokenrun (pfile->cur_run);
1842*e4b17023SJohn Marino pfile->cur_token = pfile->cur_run->base;
1843*e4b17023SJohn Marino }
1844*e4b17023SJohn Marino
1845*e4b17023SJohn Marino result = pfile->cur_token++;
1846*e4b17023SJohn Marino result->src_loc = old->src_loc;
1847*e4b17023SJohn Marino return result;
1848*e4b17023SJohn Marino }
1849*e4b17023SJohn Marino
1850*e4b17023SJohn Marino /* Lex a token into RESULT (external interface). Takes care of issues
1851*e4b17023SJohn Marino like directive handling, token lookahead, multiple include
1852*e4b17023SJohn Marino optimization and skipping. */
1853*e4b17023SJohn Marino const cpp_token *
_cpp_lex_token(cpp_reader * pfile)1854*e4b17023SJohn Marino _cpp_lex_token (cpp_reader *pfile)
1855*e4b17023SJohn Marino {
1856*e4b17023SJohn Marino cpp_token *result;
1857*e4b17023SJohn Marino
1858*e4b17023SJohn Marino for (;;)
1859*e4b17023SJohn Marino {
1860*e4b17023SJohn Marino if (pfile->cur_token == pfile->cur_run->limit)
1861*e4b17023SJohn Marino {
1862*e4b17023SJohn Marino pfile->cur_run = next_tokenrun (pfile->cur_run);
1863*e4b17023SJohn Marino pfile->cur_token = pfile->cur_run->base;
1864*e4b17023SJohn Marino }
1865*e4b17023SJohn Marino /* We assume that the current token is somewhere in the current
1866*e4b17023SJohn Marino run. */
1867*e4b17023SJohn Marino if (pfile->cur_token < pfile->cur_run->base
1868*e4b17023SJohn Marino || pfile->cur_token >= pfile->cur_run->limit)
1869*e4b17023SJohn Marino abort ();
1870*e4b17023SJohn Marino
1871*e4b17023SJohn Marino if (pfile->lookaheads)
1872*e4b17023SJohn Marino {
1873*e4b17023SJohn Marino pfile->lookaheads--;
1874*e4b17023SJohn Marino result = pfile->cur_token++;
1875*e4b17023SJohn Marino }
1876*e4b17023SJohn Marino else
1877*e4b17023SJohn Marino result = _cpp_lex_direct (pfile);
1878*e4b17023SJohn Marino
1879*e4b17023SJohn Marino if (result->flags & BOL)
1880*e4b17023SJohn Marino {
1881*e4b17023SJohn Marino /* Is this a directive. If _cpp_handle_directive returns
1882*e4b17023SJohn Marino false, it is an assembler #. */
1883*e4b17023SJohn Marino if (result->type == CPP_HASH
1884*e4b17023SJohn Marino /* 6.10.3 p 11: Directives in a list of macro arguments
1885*e4b17023SJohn Marino gives undefined behavior. This implementation
1886*e4b17023SJohn Marino handles the directive as normal. */
1887*e4b17023SJohn Marino && pfile->state.parsing_args != 1)
1888*e4b17023SJohn Marino {
1889*e4b17023SJohn Marino if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1890*e4b17023SJohn Marino {
1891*e4b17023SJohn Marino if (pfile->directive_result.type == CPP_PADDING)
1892*e4b17023SJohn Marino continue;
1893*e4b17023SJohn Marino result = &pfile->directive_result;
1894*e4b17023SJohn Marino }
1895*e4b17023SJohn Marino }
1896*e4b17023SJohn Marino else if (pfile->state.in_deferred_pragma)
1897*e4b17023SJohn Marino result = &pfile->directive_result;
1898*e4b17023SJohn Marino
1899*e4b17023SJohn Marino if (pfile->cb.line_change && !pfile->state.skipping)
1900*e4b17023SJohn Marino pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
1901*e4b17023SJohn Marino }
1902*e4b17023SJohn Marino
1903*e4b17023SJohn Marino /* We don't skip tokens in directives. */
1904*e4b17023SJohn Marino if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
1905*e4b17023SJohn Marino break;
1906*e4b17023SJohn Marino
1907*e4b17023SJohn Marino /* Outside a directive, invalidate controlling macros. At file
1908*e4b17023SJohn Marino EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1909*e4b17023SJohn Marino get here and MI optimization works. */
1910*e4b17023SJohn Marino pfile->mi_valid = false;
1911*e4b17023SJohn Marino
1912*e4b17023SJohn Marino if (!pfile->state.skipping || result->type == CPP_EOF)
1913*e4b17023SJohn Marino break;
1914*e4b17023SJohn Marino }
1915*e4b17023SJohn Marino
1916*e4b17023SJohn Marino return result;
1917*e4b17023SJohn Marino }
1918*e4b17023SJohn Marino
1919*e4b17023SJohn Marino /* Returns true if a fresh line has been loaded. */
1920*e4b17023SJohn Marino bool
_cpp_get_fresh_line(cpp_reader * pfile)1921*e4b17023SJohn Marino _cpp_get_fresh_line (cpp_reader *pfile)
1922*e4b17023SJohn Marino {
1923*e4b17023SJohn Marino int return_at_eof;
1924*e4b17023SJohn Marino
1925*e4b17023SJohn Marino /* We can't get a new line until we leave the current directive. */
1926*e4b17023SJohn Marino if (pfile->state.in_directive)
1927*e4b17023SJohn Marino return false;
1928*e4b17023SJohn Marino
1929*e4b17023SJohn Marino for (;;)
1930*e4b17023SJohn Marino {
1931*e4b17023SJohn Marino cpp_buffer *buffer = pfile->buffer;
1932*e4b17023SJohn Marino
1933*e4b17023SJohn Marino if (!buffer->need_line)
1934*e4b17023SJohn Marino return true;
1935*e4b17023SJohn Marino
1936*e4b17023SJohn Marino if (buffer->next_line < buffer->rlimit)
1937*e4b17023SJohn Marino {
1938*e4b17023SJohn Marino _cpp_clean_line (pfile);
1939*e4b17023SJohn Marino return true;
1940*e4b17023SJohn Marino }
1941*e4b17023SJohn Marino
1942*e4b17023SJohn Marino /* First, get out of parsing arguments state. */
1943*e4b17023SJohn Marino if (pfile->state.parsing_args)
1944*e4b17023SJohn Marino return false;
1945*e4b17023SJohn Marino
1946*e4b17023SJohn Marino /* End of buffer. Non-empty files should end in a newline. */
1947*e4b17023SJohn Marino if (buffer->buf != buffer->rlimit
1948*e4b17023SJohn Marino && buffer->next_line > buffer->rlimit
1949*e4b17023SJohn Marino && !buffer->from_stage3)
1950*e4b17023SJohn Marino {
1951*e4b17023SJohn Marino /* Clip to buffer size. */
1952*e4b17023SJohn Marino buffer->next_line = buffer->rlimit;
1953*e4b17023SJohn Marino }
1954*e4b17023SJohn Marino
1955*e4b17023SJohn Marino return_at_eof = buffer->return_at_eof;
1956*e4b17023SJohn Marino _cpp_pop_buffer (pfile);
1957*e4b17023SJohn Marino if (pfile->buffer == NULL || return_at_eof)
1958*e4b17023SJohn Marino return false;
1959*e4b17023SJohn Marino }
1960*e4b17023SJohn Marino }
1961*e4b17023SJohn Marino
1962*e4b17023SJohn Marino #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
1963*e4b17023SJohn Marino do \
1964*e4b17023SJohn Marino { \
1965*e4b17023SJohn Marino result->type = ELSE_TYPE; \
1966*e4b17023SJohn Marino if (*buffer->cur == CHAR) \
1967*e4b17023SJohn Marino buffer->cur++, result->type = THEN_TYPE; \
1968*e4b17023SJohn Marino } \
1969*e4b17023SJohn Marino while (0)
1970*e4b17023SJohn Marino
1971*e4b17023SJohn Marino /* Lex a token into pfile->cur_token, which is also incremented, to
1972*e4b17023SJohn Marino get diagnostics pointing to the correct location.
1973*e4b17023SJohn Marino
1974*e4b17023SJohn Marino Does not handle issues such as token lookahead, multiple-include
1975*e4b17023SJohn Marino optimization, directives, skipping etc. This function is only
1976*e4b17023SJohn Marino suitable for use by _cpp_lex_token, and in special cases like
1977*e4b17023SJohn Marino lex_expansion_token which doesn't care for any of these issues.
1978*e4b17023SJohn Marino
1979*e4b17023SJohn Marino When meeting a newline, returns CPP_EOF if parsing a directive,
1980*e4b17023SJohn Marino otherwise returns to the start of the token buffer if permissible.
1981*e4b17023SJohn Marino Returns the location of the lexed token. */
1982*e4b17023SJohn Marino cpp_token *
_cpp_lex_direct(cpp_reader * pfile)1983*e4b17023SJohn Marino _cpp_lex_direct (cpp_reader *pfile)
1984*e4b17023SJohn Marino {
1985*e4b17023SJohn Marino cppchar_t c;
1986*e4b17023SJohn Marino cpp_buffer *buffer;
1987*e4b17023SJohn Marino const unsigned char *comment_start;
1988*e4b17023SJohn Marino cpp_token *result = pfile->cur_token++;
1989*e4b17023SJohn Marino
1990*e4b17023SJohn Marino fresh_line:
1991*e4b17023SJohn Marino result->flags = 0;
1992*e4b17023SJohn Marino buffer = pfile->buffer;
1993*e4b17023SJohn Marino if (buffer->need_line)
1994*e4b17023SJohn Marino {
1995*e4b17023SJohn Marino if (pfile->state.in_deferred_pragma)
1996*e4b17023SJohn Marino {
1997*e4b17023SJohn Marino result->type = CPP_PRAGMA_EOL;
1998*e4b17023SJohn Marino pfile->state.in_deferred_pragma = false;
1999*e4b17023SJohn Marino if (!pfile->state.pragma_allow_expansion)
2000*e4b17023SJohn Marino pfile->state.prevent_expansion--;
2001*e4b17023SJohn Marino return result;
2002*e4b17023SJohn Marino }
2003*e4b17023SJohn Marino if (!_cpp_get_fresh_line (pfile))
2004*e4b17023SJohn Marino {
2005*e4b17023SJohn Marino result->type = CPP_EOF;
2006*e4b17023SJohn Marino if (!pfile->state.in_directive)
2007*e4b17023SJohn Marino {
2008*e4b17023SJohn Marino /* Tell the compiler the line number of the EOF token. */
2009*e4b17023SJohn Marino result->src_loc = pfile->line_table->highest_line;
2010*e4b17023SJohn Marino result->flags = BOL;
2011*e4b17023SJohn Marino }
2012*e4b17023SJohn Marino return result;
2013*e4b17023SJohn Marino }
2014*e4b17023SJohn Marino if (!pfile->keep_tokens)
2015*e4b17023SJohn Marino {
2016*e4b17023SJohn Marino pfile->cur_run = &pfile->base_run;
2017*e4b17023SJohn Marino result = pfile->base_run.base;
2018*e4b17023SJohn Marino pfile->cur_token = result + 1;
2019*e4b17023SJohn Marino }
2020*e4b17023SJohn Marino result->flags = BOL;
2021*e4b17023SJohn Marino if (pfile->state.parsing_args == 2)
2022*e4b17023SJohn Marino result->flags |= PREV_WHITE;
2023*e4b17023SJohn Marino }
2024*e4b17023SJohn Marino buffer = pfile->buffer;
2025*e4b17023SJohn Marino update_tokens_line:
2026*e4b17023SJohn Marino result->src_loc = pfile->line_table->highest_line;
2027*e4b17023SJohn Marino
2028*e4b17023SJohn Marino skipped_white:
2029*e4b17023SJohn Marino if (buffer->cur >= buffer->notes[buffer->cur_note].pos
2030*e4b17023SJohn Marino && !pfile->overlaid_buffer)
2031*e4b17023SJohn Marino {
2032*e4b17023SJohn Marino _cpp_process_line_notes (pfile, false);
2033*e4b17023SJohn Marino result->src_loc = pfile->line_table->highest_line;
2034*e4b17023SJohn Marino }
2035*e4b17023SJohn Marino c = *buffer->cur++;
2036*e4b17023SJohn Marino
2037*e4b17023SJohn Marino if (pfile->forced_token_location_p)
2038*e4b17023SJohn Marino result->src_loc = *pfile->forced_token_location_p;
2039*e4b17023SJohn Marino else
2040*e4b17023SJohn Marino result->src_loc = linemap_position_for_column (pfile->line_table,
2041*e4b17023SJohn Marino CPP_BUF_COLUMN (buffer, buffer->cur));
2042*e4b17023SJohn Marino
2043*e4b17023SJohn Marino switch (c)
2044*e4b17023SJohn Marino {
2045*e4b17023SJohn Marino case ' ': case '\t': case '\f': case '\v': case '\0':
2046*e4b17023SJohn Marino result->flags |= PREV_WHITE;
2047*e4b17023SJohn Marino skip_whitespace (pfile, c);
2048*e4b17023SJohn Marino goto skipped_white;
2049*e4b17023SJohn Marino
2050*e4b17023SJohn Marino case '\n':
2051*e4b17023SJohn Marino if (buffer->cur < buffer->rlimit)
2052*e4b17023SJohn Marino CPP_INCREMENT_LINE (pfile, 0);
2053*e4b17023SJohn Marino buffer->need_line = true;
2054*e4b17023SJohn Marino goto fresh_line;
2055*e4b17023SJohn Marino
2056*e4b17023SJohn Marino case '0': case '1': case '2': case '3': case '4':
2057*e4b17023SJohn Marino case '5': case '6': case '7': case '8': case '9':
2058*e4b17023SJohn Marino {
2059*e4b17023SJohn Marino struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2060*e4b17023SJohn Marino result->type = CPP_NUMBER;
2061*e4b17023SJohn Marino lex_number (pfile, &result->val.str, &nst);
2062*e4b17023SJohn Marino warn_about_normalization (pfile, result, &nst);
2063*e4b17023SJohn Marino break;
2064*e4b17023SJohn Marino }
2065*e4b17023SJohn Marino
2066*e4b17023SJohn Marino case 'L':
2067*e4b17023SJohn Marino case 'u':
2068*e4b17023SJohn Marino case 'U':
2069*e4b17023SJohn Marino case 'R':
2070*e4b17023SJohn Marino /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2071*e4b17023SJohn Marino wide strings or raw strings. */
2072*e4b17023SJohn Marino if (c == 'L' || CPP_OPTION (pfile, rliterals)
2073*e4b17023SJohn Marino || (c != 'R' && CPP_OPTION (pfile, uliterals)))
2074*e4b17023SJohn Marino {
2075*e4b17023SJohn Marino if ((*buffer->cur == '\'' && c != 'R')
2076*e4b17023SJohn Marino || *buffer->cur == '"'
2077*e4b17023SJohn Marino || (*buffer->cur == 'R'
2078*e4b17023SJohn Marino && c != 'R'
2079*e4b17023SJohn Marino && buffer->cur[1] == '"'
2080*e4b17023SJohn Marino && CPP_OPTION (pfile, rliterals))
2081*e4b17023SJohn Marino || (*buffer->cur == '8'
2082*e4b17023SJohn Marino && c == 'u'
2083*e4b17023SJohn Marino && (buffer->cur[1] == '"'
2084*e4b17023SJohn Marino || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
2085*e4b17023SJohn Marino && CPP_OPTION (pfile, rliterals)))))
2086*e4b17023SJohn Marino {
2087*e4b17023SJohn Marino lex_string (pfile, result, buffer->cur - 1);
2088*e4b17023SJohn Marino break;
2089*e4b17023SJohn Marino }
2090*e4b17023SJohn Marino }
2091*e4b17023SJohn Marino /* Fall through. */
2092*e4b17023SJohn Marino
2093*e4b17023SJohn Marino case '_':
2094*e4b17023SJohn Marino case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2095*e4b17023SJohn Marino case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2096*e4b17023SJohn Marino case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2097*e4b17023SJohn Marino case 's': case 't': case 'v': case 'w': case 'x':
2098*e4b17023SJohn Marino case 'y': case 'z':
2099*e4b17023SJohn Marino case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2100*e4b17023SJohn Marino case 'G': case 'H': case 'I': case 'J': case 'K':
2101*e4b17023SJohn Marino case 'M': case 'N': case 'O': case 'P': case 'Q':
2102*e4b17023SJohn Marino case 'S': case 'T': case 'V': case 'W': case 'X':
2103*e4b17023SJohn Marino case 'Y': case 'Z':
2104*e4b17023SJohn Marino result->type = CPP_NAME;
2105*e4b17023SJohn Marino {
2106*e4b17023SJohn Marino struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2107*e4b17023SJohn Marino result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
2108*e4b17023SJohn Marino &nst);
2109*e4b17023SJohn Marino warn_about_normalization (pfile, result, &nst);
2110*e4b17023SJohn Marino }
2111*e4b17023SJohn Marino
2112*e4b17023SJohn Marino /* Convert named operators to their proper types. */
2113*e4b17023SJohn Marino if (result->val.node.node->flags & NODE_OPERATOR)
2114*e4b17023SJohn Marino {
2115*e4b17023SJohn Marino result->flags |= NAMED_OP;
2116*e4b17023SJohn Marino result->type = (enum cpp_ttype) result->val.node.node->directive_index;
2117*e4b17023SJohn Marino }
2118*e4b17023SJohn Marino break;
2119*e4b17023SJohn Marino
2120*e4b17023SJohn Marino case '\'':
2121*e4b17023SJohn Marino case '"':
2122*e4b17023SJohn Marino lex_string (pfile, result, buffer->cur - 1);
2123*e4b17023SJohn Marino break;
2124*e4b17023SJohn Marino
2125*e4b17023SJohn Marino case '/':
2126*e4b17023SJohn Marino /* A potential block or line comment. */
2127*e4b17023SJohn Marino comment_start = buffer->cur;
2128*e4b17023SJohn Marino c = *buffer->cur;
2129*e4b17023SJohn Marino
2130*e4b17023SJohn Marino if (c == '*')
2131*e4b17023SJohn Marino {
2132*e4b17023SJohn Marino if (_cpp_skip_block_comment (pfile))
2133*e4b17023SJohn Marino cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
2134*e4b17023SJohn Marino }
2135*e4b17023SJohn Marino else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
2136*e4b17023SJohn Marino || cpp_in_system_header (pfile)))
2137*e4b17023SJohn Marino {
2138*e4b17023SJohn Marino /* Warn about comments only if pedantically GNUC89, and not
2139*e4b17023SJohn Marino in system headers. */
2140*e4b17023SJohn Marino if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
2141*e4b17023SJohn Marino && ! buffer->warned_cplusplus_comments)
2142*e4b17023SJohn Marino {
2143*e4b17023SJohn Marino cpp_error (pfile, CPP_DL_PEDWARN,
2144*e4b17023SJohn Marino "C++ style comments are not allowed in ISO C90");
2145*e4b17023SJohn Marino cpp_error (pfile, CPP_DL_PEDWARN,
2146*e4b17023SJohn Marino "(this will be reported only once per input file)");
2147*e4b17023SJohn Marino buffer->warned_cplusplus_comments = 1;
2148*e4b17023SJohn Marino }
2149*e4b17023SJohn Marino
2150*e4b17023SJohn Marino if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
2151*e4b17023SJohn Marino cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
2152*e4b17023SJohn Marino }
2153*e4b17023SJohn Marino else if (c == '=')
2154*e4b17023SJohn Marino {
2155*e4b17023SJohn Marino buffer->cur++;
2156*e4b17023SJohn Marino result->type = CPP_DIV_EQ;
2157*e4b17023SJohn Marino break;
2158*e4b17023SJohn Marino }
2159*e4b17023SJohn Marino else
2160*e4b17023SJohn Marino {
2161*e4b17023SJohn Marino result->type = CPP_DIV;
2162*e4b17023SJohn Marino break;
2163*e4b17023SJohn Marino }
2164*e4b17023SJohn Marino
2165*e4b17023SJohn Marino if (!pfile->state.save_comments)
2166*e4b17023SJohn Marino {
2167*e4b17023SJohn Marino result->flags |= PREV_WHITE;
2168*e4b17023SJohn Marino goto update_tokens_line;
2169*e4b17023SJohn Marino }
2170*e4b17023SJohn Marino
2171*e4b17023SJohn Marino /* Save the comment as a token in its own right. */
2172*e4b17023SJohn Marino save_comment (pfile, result, comment_start, c);
2173*e4b17023SJohn Marino break;
2174*e4b17023SJohn Marino
2175*e4b17023SJohn Marino case '<':
2176*e4b17023SJohn Marino if (pfile->state.angled_headers)
2177*e4b17023SJohn Marino {
2178*e4b17023SJohn Marino lex_string (pfile, result, buffer->cur - 1);
2179*e4b17023SJohn Marino if (result->type != CPP_LESS)
2180*e4b17023SJohn Marino break;
2181*e4b17023SJohn Marino }
2182*e4b17023SJohn Marino
2183*e4b17023SJohn Marino result->type = CPP_LESS;
2184*e4b17023SJohn Marino if (*buffer->cur == '=')
2185*e4b17023SJohn Marino buffer->cur++, result->type = CPP_LESS_EQ;
2186*e4b17023SJohn Marino else if (*buffer->cur == '<')
2187*e4b17023SJohn Marino {
2188*e4b17023SJohn Marino buffer->cur++;
2189*e4b17023SJohn Marino IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
2190*e4b17023SJohn Marino }
2191*e4b17023SJohn Marino else if (CPP_OPTION (pfile, digraphs))
2192*e4b17023SJohn Marino {
2193*e4b17023SJohn Marino if (*buffer->cur == ':')
2194*e4b17023SJohn Marino {
2195*e4b17023SJohn Marino buffer->cur++;
2196*e4b17023SJohn Marino result->flags |= DIGRAPH;
2197*e4b17023SJohn Marino result->type = CPP_OPEN_SQUARE;
2198*e4b17023SJohn Marino }
2199*e4b17023SJohn Marino else if (*buffer->cur == '%')
2200*e4b17023SJohn Marino {
2201*e4b17023SJohn Marino buffer->cur++;
2202*e4b17023SJohn Marino result->flags |= DIGRAPH;
2203*e4b17023SJohn Marino result->type = CPP_OPEN_BRACE;
2204*e4b17023SJohn Marino }
2205*e4b17023SJohn Marino }
2206*e4b17023SJohn Marino break;
2207*e4b17023SJohn Marino
2208*e4b17023SJohn Marino case '>':
2209*e4b17023SJohn Marino result->type = CPP_GREATER;
2210*e4b17023SJohn Marino if (*buffer->cur == '=')
2211*e4b17023SJohn Marino buffer->cur++, result->type = CPP_GREATER_EQ;
2212*e4b17023SJohn Marino else if (*buffer->cur == '>')
2213*e4b17023SJohn Marino {
2214*e4b17023SJohn Marino buffer->cur++;
2215*e4b17023SJohn Marino IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
2216*e4b17023SJohn Marino }
2217*e4b17023SJohn Marino break;
2218*e4b17023SJohn Marino
2219*e4b17023SJohn Marino case '%':
2220*e4b17023SJohn Marino result->type = CPP_MOD;
2221*e4b17023SJohn Marino if (*buffer->cur == '=')
2222*e4b17023SJohn Marino buffer->cur++, result->type = CPP_MOD_EQ;
2223*e4b17023SJohn Marino else if (CPP_OPTION (pfile, digraphs))
2224*e4b17023SJohn Marino {
2225*e4b17023SJohn Marino if (*buffer->cur == ':')
2226*e4b17023SJohn Marino {
2227*e4b17023SJohn Marino buffer->cur++;
2228*e4b17023SJohn Marino result->flags |= DIGRAPH;
2229*e4b17023SJohn Marino result->type = CPP_HASH;
2230*e4b17023SJohn Marino if (*buffer->cur == '%' && buffer->cur[1] == ':')
2231*e4b17023SJohn Marino buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
2232*e4b17023SJohn Marino }
2233*e4b17023SJohn Marino else if (*buffer->cur == '>')
2234*e4b17023SJohn Marino {
2235*e4b17023SJohn Marino buffer->cur++;
2236*e4b17023SJohn Marino result->flags |= DIGRAPH;
2237*e4b17023SJohn Marino result->type = CPP_CLOSE_BRACE;
2238*e4b17023SJohn Marino }
2239*e4b17023SJohn Marino }
2240*e4b17023SJohn Marino break;
2241*e4b17023SJohn Marino
2242*e4b17023SJohn Marino case '.':
2243*e4b17023SJohn Marino result->type = CPP_DOT;
2244*e4b17023SJohn Marino if (ISDIGIT (*buffer->cur))
2245*e4b17023SJohn Marino {
2246*e4b17023SJohn Marino struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2247*e4b17023SJohn Marino result->type = CPP_NUMBER;
2248*e4b17023SJohn Marino lex_number (pfile, &result->val.str, &nst);
2249*e4b17023SJohn Marino warn_about_normalization (pfile, result, &nst);
2250*e4b17023SJohn Marino }
2251*e4b17023SJohn Marino else if (*buffer->cur == '.' && buffer->cur[1] == '.')
2252*e4b17023SJohn Marino buffer->cur += 2, result->type = CPP_ELLIPSIS;
2253*e4b17023SJohn Marino else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2254*e4b17023SJohn Marino buffer->cur++, result->type = CPP_DOT_STAR;
2255*e4b17023SJohn Marino break;
2256*e4b17023SJohn Marino
2257*e4b17023SJohn Marino case '+':
2258*e4b17023SJohn Marino result->type = CPP_PLUS;
2259*e4b17023SJohn Marino if (*buffer->cur == '+')
2260*e4b17023SJohn Marino buffer->cur++, result->type = CPP_PLUS_PLUS;
2261*e4b17023SJohn Marino else if (*buffer->cur == '=')
2262*e4b17023SJohn Marino buffer->cur++, result->type = CPP_PLUS_EQ;
2263*e4b17023SJohn Marino break;
2264*e4b17023SJohn Marino
2265*e4b17023SJohn Marino case '-':
2266*e4b17023SJohn Marino result->type = CPP_MINUS;
2267*e4b17023SJohn Marino if (*buffer->cur == '>')
2268*e4b17023SJohn Marino {
2269*e4b17023SJohn Marino buffer->cur++;
2270*e4b17023SJohn Marino result->type = CPP_DEREF;
2271*e4b17023SJohn Marino if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2272*e4b17023SJohn Marino buffer->cur++, result->type = CPP_DEREF_STAR;
2273*e4b17023SJohn Marino }
2274*e4b17023SJohn Marino else if (*buffer->cur == '-')
2275*e4b17023SJohn Marino buffer->cur++, result->type = CPP_MINUS_MINUS;
2276*e4b17023SJohn Marino else if (*buffer->cur == '=')
2277*e4b17023SJohn Marino buffer->cur++, result->type = CPP_MINUS_EQ;
2278*e4b17023SJohn Marino break;
2279*e4b17023SJohn Marino
2280*e4b17023SJohn Marino case '&':
2281*e4b17023SJohn Marino result->type = CPP_AND;
2282*e4b17023SJohn Marino if (*buffer->cur == '&')
2283*e4b17023SJohn Marino buffer->cur++, result->type = CPP_AND_AND;
2284*e4b17023SJohn Marino else if (*buffer->cur == '=')
2285*e4b17023SJohn Marino buffer->cur++, result->type = CPP_AND_EQ;
2286*e4b17023SJohn Marino break;
2287*e4b17023SJohn Marino
2288*e4b17023SJohn Marino case '|':
2289*e4b17023SJohn Marino result->type = CPP_OR;
2290*e4b17023SJohn Marino if (*buffer->cur == '|')
2291*e4b17023SJohn Marino buffer->cur++, result->type = CPP_OR_OR;
2292*e4b17023SJohn Marino else if (*buffer->cur == '=')
2293*e4b17023SJohn Marino buffer->cur++, result->type = CPP_OR_EQ;
2294*e4b17023SJohn Marino break;
2295*e4b17023SJohn Marino
2296*e4b17023SJohn Marino case ':':
2297*e4b17023SJohn Marino result->type = CPP_COLON;
2298*e4b17023SJohn Marino if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
2299*e4b17023SJohn Marino buffer->cur++, result->type = CPP_SCOPE;
2300*e4b17023SJohn Marino else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
2301*e4b17023SJohn Marino {
2302*e4b17023SJohn Marino buffer->cur++;
2303*e4b17023SJohn Marino result->flags |= DIGRAPH;
2304*e4b17023SJohn Marino result->type = CPP_CLOSE_SQUARE;
2305*e4b17023SJohn Marino }
2306*e4b17023SJohn Marino break;
2307*e4b17023SJohn Marino
2308*e4b17023SJohn Marino case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
2309*e4b17023SJohn Marino case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
2310*e4b17023SJohn Marino case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
2311*e4b17023SJohn Marino case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
2312*e4b17023SJohn Marino case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
2313*e4b17023SJohn Marino
2314*e4b17023SJohn Marino case '?': result->type = CPP_QUERY; break;
2315*e4b17023SJohn Marino case '~': result->type = CPP_COMPL; break;
2316*e4b17023SJohn Marino case ',': result->type = CPP_COMMA; break;
2317*e4b17023SJohn Marino case '(': result->type = CPP_OPEN_PAREN; break;
2318*e4b17023SJohn Marino case ')': result->type = CPP_CLOSE_PAREN; break;
2319*e4b17023SJohn Marino case '[': result->type = CPP_OPEN_SQUARE; break;
2320*e4b17023SJohn Marino case ']': result->type = CPP_CLOSE_SQUARE; break;
2321*e4b17023SJohn Marino case '{': result->type = CPP_OPEN_BRACE; break;
2322*e4b17023SJohn Marino case '}': result->type = CPP_CLOSE_BRACE; break;
2323*e4b17023SJohn Marino case ';': result->type = CPP_SEMICOLON; break;
2324*e4b17023SJohn Marino
2325*e4b17023SJohn Marino /* @ is a punctuator in Objective-C. */
2326*e4b17023SJohn Marino case '@': result->type = CPP_ATSIGN; break;
2327*e4b17023SJohn Marino
2328*e4b17023SJohn Marino case '$':
2329*e4b17023SJohn Marino case '\\':
2330*e4b17023SJohn Marino {
2331*e4b17023SJohn Marino const uchar *base = --buffer->cur;
2332*e4b17023SJohn Marino struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2333*e4b17023SJohn Marino
2334*e4b17023SJohn Marino if (forms_identifier_p (pfile, true, &nst))
2335*e4b17023SJohn Marino {
2336*e4b17023SJohn Marino result->type = CPP_NAME;
2337*e4b17023SJohn Marino result->val.node.node = lex_identifier (pfile, base, true, &nst);
2338*e4b17023SJohn Marino warn_about_normalization (pfile, result, &nst);
2339*e4b17023SJohn Marino break;
2340*e4b17023SJohn Marino }
2341*e4b17023SJohn Marino buffer->cur++;
2342*e4b17023SJohn Marino }
2343*e4b17023SJohn Marino
2344*e4b17023SJohn Marino default:
2345*e4b17023SJohn Marino create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
2346*e4b17023SJohn Marino break;
2347*e4b17023SJohn Marino }
2348*e4b17023SJohn Marino
2349*e4b17023SJohn Marino return result;
2350*e4b17023SJohn Marino }
2351*e4b17023SJohn Marino
2352*e4b17023SJohn Marino /* An upper bound on the number of bytes needed to spell TOKEN.
2353*e4b17023SJohn Marino Does not include preceding whitespace. */
2354*e4b17023SJohn Marino unsigned int
cpp_token_len(const cpp_token * token)2355*e4b17023SJohn Marino cpp_token_len (const cpp_token *token)
2356*e4b17023SJohn Marino {
2357*e4b17023SJohn Marino unsigned int len;
2358*e4b17023SJohn Marino
2359*e4b17023SJohn Marino switch (TOKEN_SPELL (token))
2360*e4b17023SJohn Marino {
2361*e4b17023SJohn Marino default: len = 6; break;
2362*e4b17023SJohn Marino case SPELL_LITERAL: len = token->val.str.len; break;
2363*e4b17023SJohn Marino case SPELL_IDENT: len = NODE_LEN (token->val.node.node) * 10; break;
2364*e4b17023SJohn Marino }
2365*e4b17023SJohn Marino
2366*e4b17023SJohn Marino return len;
2367*e4b17023SJohn Marino }
2368*e4b17023SJohn Marino
2369*e4b17023SJohn Marino /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
2370*e4b17023SJohn Marino Return the number of bytes read out of NAME. (There are always
2371*e4b17023SJohn Marino 10 bytes written to BUFFER.) */
2372*e4b17023SJohn Marino
2373*e4b17023SJohn Marino static size_t
utf8_to_ucn(unsigned char * buffer,const unsigned char * name)2374*e4b17023SJohn Marino utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
2375*e4b17023SJohn Marino {
2376*e4b17023SJohn Marino int j;
2377*e4b17023SJohn Marino int ucn_len = 0;
2378*e4b17023SJohn Marino int ucn_len_c;
2379*e4b17023SJohn Marino unsigned t;
2380*e4b17023SJohn Marino unsigned long utf32;
2381*e4b17023SJohn Marino
2382*e4b17023SJohn Marino /* Compute the length of the UTF-8 sequence. */
2383*e4b17023SJohn Marino for (t = *name; t & 0x80; t <<= 1)
2384*e4b17023SJohn Marino ucn_len++;
2385*e4b17023SJohn Marino
2386*e4b17023SJohn Marino utf32 = *name & (0x7F >> ucn_len);
2387*e4b17023SJohn Marino for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
2388*e4b17023SJohn Marino {
2389*e4b17023SJohn Marino utf32 = (utf32 << 6) | (*++name & 0x3F);
2390*e4b17023SJohn Marino
2391*e4b17023SJohn Marino /* Ill-formed UTF-8. */
2392*e4b17023SJohn Marino if ((*name & ~0x3F) != 0x80)
2393*e4b17023SJohn Marino abort ();
2394*e4b17023SJohn Marino }
2395*e4b17023SJohn Marino
2396*e4b17023SJohn Marino *buffer++ = '\\';
2397*e4b17023SJohn Marino *buffer++ = 'U';
2398*e4b17023SJohn Marino for (j = 7; j >= 0; j--)
2399*e4b17023SJohn Marino *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
2400*e4b17023SJohn Marino return ucn_len;
2401*e4b17023SJohn Marino }
2402*e4b17023SJohn Marino
2403*e4b17023SJohn Marino /* Given a token TYPE corresponding to a digraph, return a pointer to
2404*e4b17023SJohn Marino the spelling of the digraph. */
2405*e4b17023SJohn Marino static const unsigned char *
cpp_digraph2name(enum cpp_ttype type)2406*e4b17023SJohn Marino cpp_digraph2name (enum cpp_ttype type)
2407*e4b17023SJohn Marino {
2408*e4b17023SJohn Marino return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
2409*e4b17023SJohn Marino }
2410*e4b17023SJohn Marino
2411*e4b17023SJohn Marino /* Write the spelling of a token TOKEN to BUFFER. The buffer must
2412*e4b17023SJohn Marino already contain the enough space to hold the token's spelling.
2413*e4b17023SJohn Marino Returns a pointer to the character after the last character written.
2414*e4b17023SJohn Marino FORSTRING is true if this is to be the spelling after translation
2415*e4b17023SJohn Marino phase 1 (this is different for UCNs).
2416*e4b17023SJohn Marino FIXME: Would be nice if we didn't need the PFILE argument. */
2417*e4b17023SJohn Marino unsigned char *
cpp_spell_token(cpp_reader * pfile,const cpp_token * token,unsigned char * buffer,bool forstring)2418*e4b17023SJohn Marino cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
2419*e4b17023SJohn Marino unsigned char *buffer, bool forstring)
2420*e4b17023SJohn Marino {
2421*e4b17023SJohn Marino switch (TOKEN_SPELL (token))
2422*e4b17023SJohn Marino {
2423*e4b17023SJohn Marino case SPELL_OPERATOR:
2424*e4b17023SJohn Marino {
2425*e4b17023SJohn Marino const unsigned char *spelling;
2426*e4b17023SJohn Marino unsigned char c;
2427*e4b17023SJohn Marino
2428*e4b17023SJohn Marino if (token->flags & DIGRAPH)
2429*e4b17023SJohn Marino spelling = cpp_digraph2name (token->type);
2430*e4b17023SJohn Marino else if (token->flags & NAMED_OP)
2431*e4b17023SJohn Marino goto spell_ident;
2432*e4b17023SJohn Marino else
2433*e4b17023SJohn Marino spelling = TOKEN_NAME (token);
2434*e4b17023SJohn Marino
2435*e4b17023SJohn Marino while ((c = *spelling++) != '\0')
2436*e4b17023SJohn Marino *buffer++ = c;
2437*e4b17023SJohn Marino }
2438*e4b17023SJohn Marino break;
2439*e4b17023SJohn Marino
2440*e4b17023SJohn Marino spell_ident:
2441*e4b17023SJohn Marino case SPELL_IDENT:
2442*e4b17023SJohn Marino if (forstring)
2443*e4b17023SJohn Marino {
2444*e4b17023SJohn Marino memcpy (buffer, NODE_NAME (token->val.node.node),
2445*e4b17023SJohn Marino NODE_LEN (token->val.node.node));
2446*e4b17023SJohn Marino buffer += NODE_LEN (token->val.node.node);
2447*e4b17023SJohn Marino }
2448*e4b17023SJohn Marino else
2449*e4b17023SJohn Marino {
2450*e4b17023SJohn Marino size_t i;
2451*e4b17023SJohn Marino const unsigned char * name = NODE_NAME (token->val.node.node);
2452*e4b17023SJohn Marino
2453*e4b17023SJohn Marino for (i = 0; i < NODE_LEN (token->val.node.node); i++)
2454*e4b17023SJohn Marino if (name[i] & ~0x7F)
2455*e4b17023SJohn Marino {
2456*e4b17023SJohn Marino i += utf8_to_ucn (buffer, name + i) - 1;
2457*e4b17023SJohn Marino buffer += 10;
2458*e4b17023SJohn Marino }
2459*e4b17023SJohn Marino else
2460*e4b17023SJohn Marino *buffer++ = NODE_NAME (token->val.node.node)[i];
2461*e4b17023SJohn Marino }
2462*e4b17023SJohn Marino break;
2463*e4b17023SJohn Marino
2464*e4b17023SJohn Marino case SPELL_LITERAL:
2465*e4b17023SJohn Marino memcpy (buffer, token->val.str.text, token->val.str.len);
2466*e4b17023SJohn Marino buffer += token->val.str.len;
2467*e4b17023SJohn Marino break;
2468*e4b17023SJohn Marino
2469*e4b17023SJohn Marino case SPELL_NONE:
2470*e4b17023SJohn Marino cpp_error (pfile, CPP_DL_ICE,
2471*e4b17023SJohn Marino "unspellable token %s", TOKEN_NAME (token));
2472*e4b17023SJohn Marino break;
2473*e4b17023SJohn Marino }
2474*e4b17023SJohn Marino
2475*e4b17023SJohn Marino return buffer;
2476*e4b17023SJohn Marino }
2477*e4b17023SJohn Marino
2478*e4b17023SJohn Marino /* Returns TOKEN spelt as a null-terminated string. The string is
2479*e4b17023SJohn Marino freed when the reader is destroyed. Useful for diagnostics. */
2480*e4b17023SJohn Marino unsigned char *
cpp_token_as_text(cpp_reader * pfile,const cpp_token * token)2481*e4b17023SJohn Marino cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
2482*e4b17023SJohn Marino {
2483*e4b17023SJohn Marino unsigned int len = cpp_token_len (token) + 1;
2484*e4b17023SJohn Marino unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
2485*e4b17023SJohn Marino
2486*e4b17023SJohn Marino end = cpp_spell_token (pfile, token, start, false);
2487*e4b17023SJohn Marino end[0] = '\0';
2488*e4b17023SJohn Marino
2489*e4b17023SJohn Marino return start;
2490*e4b17023SJohn Marino }
2491*e4b17023SJohn Marino
2492*e4b17023SJohn Marino /* Returns a pointer to a string which spells the token defined by
2493*e4b17023SJohn Marino TYPE and FLAGS. Used by C front ends, which really should move to
2494*e4b17023SJohn Marino using cpp_token_as_text. */
2495*e4b17023SJohn Marino const char *
cpp_type2name(enum cpp_ttype type,unsigned char flags)2496*e4b17023SJohn Marino cpp_type2name (enum cpp_ttype type, unsigned char flags)
2497*e4b17023SJohn Marino {
2498*e4b17023SJohn Marino if (flags & DIGRAPH)
2499*e4b17023SJohn Marino return (const char *) cpp_digraph2name (type);
2500*e4b17023SJohn Marino else if (flags & NAMED_OP)
2501*e4b17023SJohn Marino return cpp_named_operator2name (type);
2502*e4b17023SJohn Marino
2503*e4b17023SJohn Marino return (const char *) token_spellings[type].name;
2504*e4b17023SJohn Marino }
2505*e4b17023SJohn Marino
2506*e4b17023SJohn Marino /* Writes the spelling of token to FP, without any preceding space.
2507*e4b17023SJohn Marino Separated from cpp_spell_token for efficiency - to avoid stdio
2508*e4b17023SJohn Marino double-buffering. */
2509*e4b17023SJohn Marino void
cpp_output_token(const cpp_token * token,FILE * fp)2510*e4b17023SJohn Marino cpp_output_token (const cpp_token *token, FILE *fp)
2511*e4b17023SJohn Marino {
2512*e4b17023SJohn Marino switch (TOKEN_SPELL (token))
2513*e4b17023SJohn Marino {
2514*e4b17023SJohn Marino case SPELL_OPERATOR:
2515*e4b17023SJohn Marino {
2516*e4b17023SJohn Marino const unsigned char *spelling;
2517*e4b17023SJohn Marino int c;
2518*e4b17023SJohn Marino
2519*e4b17023SJohn Marino if (token->flags & DIGRAPH)
2520*e4b17023SJohn Marino spelling = cpp_digraph2name (token->type);
2521*e4b17023SJohn Marino else if (token->flags & NAMED_OP)
2522*e4b17023SJohn Marino goto spell_ident;
2523*e4b17023SJohn Marino else
2524*e4b17023SJohn Marino spelling = TOKEN_NAME (token);
2525*e4b17023SJohn Marino
2526*e4b17023SJohn Marino c = *spelling;
2527*e4b17023SJohn Marino do
2528*e4b17023SJohn Marino putc (c, fp);
2529*e4b17023SJohn Marino while ((c = *++spelling) != '\0');
2530*e4b17023SJohn Marino }
2531*e4b17023SJohn Marino break;
2532*e4b17023SJohn Marino
2533*e4b17023SJohn Marino spell_ident:
2534*e4b17023SJohn Marino case SPELL_IDENT:
2535*e4b17023SJohn Marino {
2536*e4b17023SJohn Marino size_t i;
2537*e4b17023SJohn Marino const unsigned char * name = NODE_NAME (token->val.node.node);
2538*e4b17023SJohn Marino
2539*e4b17023SJohn Marino for (i = 0; i < NODE_LEN (token->val.node.node); i++)
2540*e4b17023SJohn Marino if (name[i] & ~0x7F)
2541*e4b17023SJohn Marino {
2542*e4b17023SJohn Marino unsigned char buffer[10];
2543*e4b17023SJohn Marino i += utf8_to_ucn (buffer, name + i) - 1;
2544*e4b17023SJohn Marino fwrite (buffer, 1, 10, fp);
2545*e4b17023SJohn Marino }
2546*e4b17023SJohn Marino else
2547*e4b17023SJohn Marino fputc (NODE_NAME (token->val.node.node)[i], fp);
2548*e4b17023SJohn Marino }
2549*e4b17023SJohn Marino break;
2550*e4b17023SJohn Marino
2551*e4b17023SJohn Marino case SPELL_LITERAL:
2552*e4b17023SJohn Marino fwrite (token->val.str.text, 1, token->val.str.len, fp);
2553*e4b17023SJohn Marino break;
2554*e4b17023SJohn Marino
2555*e4b17023SJohn Marino case SPELL_NONE:
2556*e4b17023SJohn Marino /* An error, most probably. */
2557*e4b17023SJohn Marino break;
2558*e4b17023SJohn Marino }
2559*e4b17023SJohn Marino }
2560*e4b17023SJohn Marino
2561*e4b17023SJohn Marino /* Compare two tokens. */
2562*e4b17023SJohn Marino int
_cpp_equiv_tokens(const cpp_token * a,const cpp_token * b)2563*e4b17023SJohn Marino _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
2564*e4b17023SJohn Marino {
2565*e4b17023SJohn Marino if (a->type == b->type && a->flags == b->flags)
2566*e4b17023SJohn Marino switch (TOKEN_SPELL (a))
2567*e4b17023SJohn Marino {
2568*e4b17023SJohn Marino default: /* Keep compiler happy. */
2569*e4b17023SJohn Marino case SPELL_OPERATOR:
2570*e4b17023SJohn Marino /* token_no is used to track where multiple consecutive ##
2571*e4b17023SJohn Marino tokens were originally located. */
2572*e4b17023SJohn Marino return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
2573*e4b17023SJohn Marino case SPELL_NONE:
2574*e4b17023SJohn Marino return (a->type != CPP_MACRO_ARG
2575*e4b17023SJohn Marino || a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
2576*e4b17023SJohn Marino case SPELL_IDENT:
2577*e4b17023SJohn Marino return a->val.node.node == b->val.node.node;
2578*e4b17023SJohn Marino case SPELL_LITERAL:
2579*e4b17023SJohn Marino return (a->val.str.len == b->val.str.len
2580*e4b17023SJohn Marino && !memcmp (a->val.str.text, b->val.str.text,
2581*e4b17023SJohn Marino a->val.str.len));
2582*e4b17023SJohn Marino }
2583*e4b17023SJohn Marino
2584*e4b17023SJohn Marino return 0;
2585*e4b17023SJohn Marino }
2586*e4b17023SJohn Marino
2587*e4b17023SJohn Marino /* Returns nonzero if a space should be inserted to avoid an
2588*e4b17023SJohn Marino accidental token paste for output. For simplicity, it is
2589*e4b17023SJohn Marino conservative, and occasionally advises a space where one is not
2590*e4b17023SJohn Marino needed, e.g. "." and ".2". */
2591*e4b17023SJohn Marino int
cpp_avoid_paste(cpp_reader * pfile,const cpp_token * token1,const cpp_token * token2)2592*e4b17023SJohn Marino cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
2593*e4b17023SJohn Marino const cpp_token *token2)
2594*e4b17023SJohn Marino {
2595*e4b17023SJohn Marino enum cpp_ttype a = token1->type, b = token2->type;
2596*e4b17023SJohn Marino cppchar_t c;
2597*e4b17023SJohn Marino
2598*e4b17023SJohn Marino if (token1->flags & NAMED_OP)
2599*e4b17023SJohn Marino a = CPP_NAME;
2600*e4b17023SJohn Marino if (token2->flags & NAMED_OP)
2601*e4b17023SJohn Marino b = CPP_NAME;
2602*e4b17023SJohn Marino
2603*e4b17023SJohn Marino c = EOF;
2604*e4b17023SJohn Marino if (token2->flags & DIGRAPH)
2605*e4b17023SJohn Marino c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
2606*e4b17023SJohn Marino else if (token_spellings[b].category == SPELL_OPERATOR)
2607*e4b17023SJohn Marino c = token_spellings[b].name[0];
2608*e4b17023SJohn Marino
2609*e4b17023SJohn Marino /* Quickly get everything that can paste with an '='. */
2610*e4b17023SJohn Marino if ((int) a <= (int) CPP_LAST_EQ && c == '=')
2611*e4b17023SJohn Marino return 1;
2612*e4b17023SJohn Marino
2613*e4b17023SJohn Marino switch (a)
2614*e4b17023SJohn Marino {
2615*e4b17023SJohn Marino case CPP_GREATER: return c == '>';
2616*e4b17023SJohn Marino case CPP_LESS: return c == '<' || c == '%' || c == ':';
2617*e4b17023SJohn Marino case CPP_PLUS: return c == '+';
2618*e4b17023SJohn Marino case CPP_MINUS: return c == '-' || c == '>';
2619*e4b17023SJohn Marino case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
2620*e4b17023SJohn Marino case CPP_MOD: return c == ':' || c == '>';
2621*e4b17023SJohn Marino case CPP_AND: return c == '&';
2622*e4b17023SJohn Marino case CPP_OR: return c == '|';
2623*e4b17023SJohn Marino case CPP_COLON: return c == ':' || c == '>';
2624*e4b17023SJohn Marino case CPP_DEREF: return c == '*';
2625*e4b17023SJohn Marino case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
2626*e4b17023SJohn Marino case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
2627*e4b17023SJohn Marino case CPP_NAME: return ((b == CPP_NUMBER
2628*e4b17023SJohn Marino && name_p (pfile, &token2->val.str))
2629*e4b17023SJohn Marino || b == CPP_NAME
2630*e4b17023SJohn Marino || b == CPP_CHAR || b == CPP_STRING); /* L */
2631*e4b17023SJohn Marino case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
2632*e4b17023SJohn Marino || c == '.' || c == '+' || c == '-');
2633*e4b17023SJohn Marino /* UCNs */
2634*e4b17023SJohn Marino case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
2635*e4b17023SJohn Marino && b == CPP_NAME)
2636*e4b17023SJohn Marino || (CPP_OPTION (pfile, objc)
2637*e4b17023SJohn Marino && token1->val.str.text[0] == '@'
2638*e4b17023SJohn Marino && (b == CPP_NAME || b == CPP_STRING)));
2639*e4b17023SJohn Marino default: break;
2640*e4b17023SJohn Marino }
2641*e4b17023SJohn Marino
2642*e4b17023SJohn Marino return 0;
2643*e4b17023SJohn Marino }
2644*e4b17023SJohn Marino
2645*e4b17023SJohn Marino /* Output all the remaining tokens on the current line, and a newline
2646*e4b17023SJohn Marino character, to FP. Leading whitespace is removed. If there are
2647*e4b17023SJohn Marino macros, special token padding is not performed. */
2648*e4b17023SJohn Marino void
cpp_output_line(cpp_reader * pfile,FILE * fp)2649*e4b17023SJohn Marino cpp_output_line (cpp_reader *pfile, FILE *fp)
2650*e4b17023SJohn Marino {
2651*e4b17023SJohn Marino const cpp_token *token;
2652*e4b17023SJohn Marino
2653*e4b17023SJohn Marino token = cpp_get_token (pfile);
2654*e4b17023SJohn Marino while (token->type != CPP_EOF)
2655*e4b17023SJohn Marino {
2656*e4b17023SJohn Marino cpp_output_token (token, fp);
2657*e4b17023SJohn Marino token = cpp_get_token (pfile);
2658*e4b17023SJohn Marino if (token->flags & PREV_WHITE)
2659*e4b17023SJohn Marino putc (' ', fp);
2660*e4b17023SJohn Marino }
2661*e4b17023SJohn Marino
2662*e4b17023SJohn Marino putc ('\n', fp);
2663*e4b17023SJohn Marino }
2664*e4b17023SJohn Marino
2665*e4b17023SJohn Marino /* Return a string representation of all the remaining tokens on the
2666*e4b17023SJohn Marino current line. The result is allocated using xmalloc and must be
2667*e4b17023SJohn Marino freed by the caller. */
2668*e4b17023SJohn Marino unsigned char *
cpp_output_line_to_string(cpp_reader * pfile,const unsigned char * dir_name)2669*e4b17023SJohn Marino cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
2670*e4b17023SJohn Marino {
2671*e4b17023SJohn Marino const cpp_token *token;
2672*e4b17023SJohn Marino unsigned int out = dir_name ? ustrlen (dir_name) : 0;
2673*e4b17023SJohn Marino unsigned int alloced = 120 + out;
2674*e4b17023SJohn Marino unsigned char *result = (unsigned char *) xmalloc (alloced);
2675*e4b17023SJohn Marino
2676*e4b17023SJohn Marino /* If DIR_NAME is empty, there are no initial contents. */
2677*e4b17023SJohn Marino if (dir_name)
2678*e4b17023SJohn Marino {
2679*e4b17023SJohn Marino sprintf ((char *) result, "#%s ", dir_name);
2680*e4b17023SJohn Marino out += 2;
2681*e4b17023SJohn Marino }
2682*e4b17023SJohn Marino
2683*e4b17023SJohn Marino token = cpp_get_token (pfile);
2684*e4b17023SJohn Marino while (token->type != CPP_EOF)
2685*e4b17023SJohn Marino {
2686*e4b17023SJohn Marino unsigned char *last;
2687*e4b17023SJohn Marino /* Include room for a possible space and the terminating nul. */
2688*e4b17023SJohn Marino unsigned int len = cpp_token_len (token) + 2;
2689*e4b17023SJohn Marino
2690*e4b17023SJohn Marino if (out + len > alloced)
2691*e4b17023SJohn Marino {
2692*e4b17023SJohn Marino alloced *= 2;
2693*e4b17023SJohn Marino if (out + len > alloced)
2694*e4b17023SJohn Marino alloced = out + len;
2695*e4b17023SJohn Marino result = (unsigned char *) xrealloc (result, alloced);
2696*e4b17023SJohn Marino }
2697*e4b17023SJohn Marino
2698*e4b17023SJohn Marino last = cpp_spell_token (pfile, token, &result[out], 0);
2699*e4b17023SJohn Marino out = last - result;
2700*e4b17023SJohn Marino
2701*e4b17023SJohn Marino token = cpp_get_token (pfile);
2702*e4b17023SJohn Marino if (token->flags & PREV_WHITE)
2703*e4b17023SJohn Marino result[out++] = ' ';
2704*e4b17023SJohn Marino }
2705*e4b17023SJohn Marino
2706*e4b17023SJohn Marino result[out] = '\0';
2707*e4b17023SJohn Marino return result;
2708*e4b17023SJohn Marino }
2709*e4b17023SJohn Marino
2710*e4b17023SJohn Marino /* Memory buffers. Changing these three constants can have a dramatic
2711*e4b17023SJohn Marino effect on performance. The values here are reasonable defaults,
2712*e4b17023SJohn Marino but might be tuned. If you adjust them, be sure to test across a
2713*e4b17023SJohn Marino range of uses of cpplib, including heavy nested function-like macro
2714*e4b17023SJohn Marino expansion. Also check the change in peak memory usage (NJAMD is a
2715*e4b17023SJohn Marino good tool for this). */
2716*e4b17023SJohn Marino #define MIN_BUFF_SIZE 8000
2717*e4b17023SJohn Marino #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2718*e4b17023SJohn Marino #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2719*e4b17023SJohn Marino (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2720*e4b17023SJohn Marino
2721*e4b17023SJohn Marino #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2722*e4b17023SJohn Marino #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2723*e4b17023SJohn Marino #endif
2724*e4b17023SJohn Marino
2725*e4b17023SJohn Marino /* Create a new allocation buffer. Place the control block at the end
2726*e4b17023SJohn Marino of the buffer, so that buffer overflows will cause immediate chaos. */
2727*e4b17023SJohn Marino static _cpp_buff *
new_buff(size_t len)2728*e4b17023SJohn Marino new_buff (size_t len)
2729*e4b17023SJohn Marino {
2730*e4b17023SJohn Marino _cpp_buff *result;
2731*e4b17023SJohn Marino unsigned char *base;
2732*e4b17023SJohn Marino
2733*e4b17023SJohn Marino if (len < MIN_BUFF_SIZE)
2734*e4b17023SJohn Marino len = MIN_BUFF_SIZE;
2735*e4b17023SJohn Marino len = CPP_ALIGN (len);
2736*e4b17023SJohn Marino
2737*e4b17023SJohn Marino base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
2738*e4b17023SJohn Marino result = (_cpp_buff *) (base + len);
2739*e4b17023SJohn Marino result->base = base;
2740*e4b17023SJohn Marino result->cur = base;
2741*e4b17023SJohn Marino result->limit = base + len;
2742*e4b17023SJohn Marino result->next = NULL;
2743*e4b17023SJohn Marino return result;
2744*e4b17023SJohn Marino }
2745*e4b17023SJohn Marino
2746*e4b17023SJohn Marino /* Place a chain of unwanted allocation buffers on the free list. */
2747*e4b17023SJohn Marino void
_cpp_release_buff(cpp_reader * pfile,_cpp_buff * buff)2748*e4b17023SJohn Marino _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
2749*e4b17023SJohn Marino {
2750*e4b17023SJohn Marino _cpp_buff *end = buff;
2751*e4b17023SJohn Marino
2752*e4b17023SJohn Marino while (end->next)
2753*e4b17023SJohn Marino end = end->next;
2754*e4b17023SJohn Marino end->next = pfile->free_buffs;
2755*e4b17023SJohn Marino pfile->free_buffs = buff;
2756*e4b17023SJohn Marino }
2757*e4b17023SJohn Marino
2758*e4b17023SJohn Marino /* Return a free buffer of size at least MIN_SIZE. */
2759*e4b17023SJohn Marino _cpp_buff *
_cpp_get_buff(cpp_reader * pfile,size_t min_size)2760*e4b17023SJohn Marino _cpp_get_buff (cpp_reader *pfile, size_t min_size)
2761*e4b17023SJohn Marino {
2762*e4b17023SJohn Marino _cpp_buff *result, **p;
2763*e4b17023SJohn Marino
2764*e4b17023SJohn Marino for (p = &pfile->free_buffs;; p = &(*p)->next)
2765*e4b17023SJohn Marino {
2766*e4b17023SJohn Marino size_t size;
2767*e4b17023SJohn Marino
2768*e4b17023SJohn Marino if (*p == NULL)
2769*e4b17023SJohn Marino return new_buff (min_size);
2770*e4b17023SJohn Marino result = *p;
2771*e4b17023SJohn Marino size = result->limit - result->base;
2772*e4b17023SJohn Marino /* Return a buffer that's big enough, but don't waste one that's
2773*e4b17023SJohn Marino way too big. */
2774*e4b17023SJohn Marino if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2775*e4b17023SJohn Marino break;
2776*e4b17023SJohn Marino }
2777*e4b17023SJohn Marino
2778*e4b17023SJohn Marino *p = result->next;
2779*e4b17023SJohn Marino result->next = NULL;
2780*e4b17023SJohn Marino result->cur = result->base;
2781*e4b17023SJohn Marino return result;
2782*e4b17023SJohn Marino }
2783*e4b17023SJohn Marino
2784*e4b17023SJohn Marino /* Creates a new buffer with enough space to hold the uncommitted
2785*e4b17023SJohn Marino remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2786*e4b17023SJohn Marino the excess bytes to the new buffer. Chains the new buffer after
2787*e4b17023SJohn Marino BUFF, and returns the new buffer. */
2788*e4b17023SJohn Marino _cpp_buff *
_cpp_append_extend_buff(cpp_reader * pfile,_cpp_buff * buff,size_t min_extra)2789*e4b17023SJohn Marino _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
2790*e4b17023SJohn Marino {
2791*e4b17023SJohn Marino size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2792*e4b17023SJohn Marino _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2793*e4b17023SJohn Marino
2794*e4b17023SJohn Marino buff->next = new_buff;
2795*e4b17023SJohn Marino memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2796*e4b17023SJohn Marino return new_buff;
2797*e4b17023SJohn Marino }
2798*e4b17023SJohn Marino
2799*e4b17023SJohn Marino /* Creates a new buffer with enough space to hold the uncommitted
2800*e4b17023SJohn Marino remaining bytes of the buffer pointed to by BUFF, and at least
2801*e4b17023SJohn Marino MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2802*e4b17023SJohn Marino Chains the new buffer before the buffer pointed to by BUFF, and
2803*e4b17023SJohn Marino updates the pointer to point to the new buffer. */
2804*e4b17023SJohn Marino void
_cpp_extend_buff(cpp_reader * pfile,_cpp_buff ** pbuff,size_t min_extra)2805*e4b17023SJohn Marino _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
2806*e4b17023SJohn Marino {
2807*e4b17023SJohn Marino _cpp_buff *new_buff, *old_buff = *pbuff;
2808*e4b17023SJohn Marino size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2809*e4b17023SJohn Marino
2810*e4b17023SJohn Marino new_buff = _cpp_get_buff (pfile, size);
2811*e4b17023SJohn Marino memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2812*e4b17023SJohn Marino new_buff->next = old_buff;
2813*e4b17023SJohn Marino *pbuff = new_buff;
2814*e4b17023SJohn Marino }
2815*e4b17023SJohn Marino
2816*e4b17023SJohn Marino /* Free a chain of buffers starting at BUFF. */
2817*e4b17023SJohn Marino void
_cpp_free_buff(_cpp_buff * buff)2818*e4b17023SJohn Marino _cpp_free_buff (_cpp_buff *buff)
2819*e4b17023SJohn Marino {
2820*e4b17023SJohn Marino _cpp_buff *next;
2821*e4b17023SJohn Marino
2822*e4b17023SJohn Marino for (; buff; buff = next)
2823*e4b17023SJohn Marino {
2824*e4b17023SJohn Marino next = buff->next;
2825*e4b17023SJohn Marino free (buff->base);
2826*e4b17023SJohn Marino }
2827*e4b17023SJohn Marino }
2828*e4b17023SJohn Marino
2829*e4b17023SJohn Marino /* Allocate permanent, unaligned storage of length LEN. */
2830*e4b17023SJohn Marino unsigned char *
_cpp_unaligned_alloc(cpp_reader * pfile,size_t len)2831*e4b17023SJohn Marino _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
2832*e4b17023SJohn Marino {
2833*e4b17023SJohn Marino _cpp_buff *buff = pfile->u_buff;
2834*e4b17023SJohn Marino unsigned char *result = buff->cur;
2835*e4b17023SJohn Marino
2836*e4b17023SJohn Marino if (len > (size_t) (buff->limit - result))
2837*e4b17023SJohn Marino {
2838*e4b17023SJohn Marino buff = _cpp_get_buff (pfile, len);
2839*e4b17023SJohn Marino buff->next = pfile->u_buff;
2840*e4b17023SJohn Marino pfile->u_buff = buff;
2841*e4b17023SJohn Marino result = buff->cur;
2842*e4b17023SJohn Marino }
2843*e4b17023SJohn Marino
2844*e4b17023SJohn Marino buff->cur = result + len;
2845*e4b17023SJohn Marino return result;
2846*e4b17023SJohn Marino }
2847*e4b17023SJohn Marino
2848*e4b17023SJohn Marino /* Allocate permanent, unaligned storage of length LEN from a_buff.
2849*e4b17023SJohn Marino That buffer is used for growing allocations when saving macro
2850*e4b17023SJohn Marino replacement lists in a #define, and when parsing an answer to an
2851*e4b17023SJohn Marino assertion in #assert, #unassert or #if (and therefore possibly
2852*e4b17023SJohn Marino whilst expanding macros). It therefore must not be used by any
2853*e4b17023SJohn Marino code that they might call: specifically the lexer and the guts of
2854*e4b17023SJohn Marino the macro expander.
2855*e4b17023SJohn Marino
2856*e4b17023SJohn Marino All existing other uses clearly fit this restriction: storing
2857*e4b17023SJohn Marino registered pragmas during initialization. */
2858*e4b17023SJohn Marino unsigned char *
_cpp_aligned_alloc(cpp_reader * pfile,size_t len)2859*e4b17023SJohn Marino _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
2860*e4b17023SJohn Marino {
2861*e4b17023SJohn Marino _cpp_buff *buff = pfile->a_buff;
2862*e4b17023SJohn Marino unsigned char *result = buff->cur;
2863*e4b17023SJohn Marino
2864*e4b17023SJohn Marino if (len > (size_t) (buff->limit - result))
2865*e4b17023SJohn Marino {
2866*e4b17023SJohn Marino buff = _cpp_get_buff (pfile, len);
2867*e4b17023SJohn Marino buff->next = pfile->a_buff;
2868*e4b17023SJohn Marino pfile->a_buff = buff;
2869*e4b17023SJohn Marino result = buff->cur;
2870*e4b17023SJohn Marino }
2871*e4b17023SJohn Marino
2872*e4b17023SJohn Marino buff->cur = result + len;
2873*e4b17023SJohn Marino return result;
2874*e4b17023SJohn Marino }
2875*e4b17023SJohn Marino
2876*e4b17023SJohn Marino /* Say which field of TOK is in use. */
2877*e4b17023SJohn Marino
2878*e4b17023SJohn Marino enum cpp_token_fld_kind
cpp_token_val_index(cpp_token * tok)2879*e4b17023SJohn Marino cpp_token_val_index (cpp_token *tok)
2880*e4b17023SJohn Marino {
2881*e4b17023SJohn Marino switch (TOKEN_SPELL (tok))
2882*e4b17023SJohn Marino {
2883*e4b17023SJohn Marino case SPELL_IDENT:
2884*e4b17023SJohn Marino return CPP_TOKEN_FLD_NODE;
2885*e4b17023SJohn Marino case SPELL_LITERAL:
2886*e4b17023SJohn Marino return CPP_TOKEN_FLD_STR;
2887*e4b17023SJohn Marino case SPELL_OPERATOR:
2888*e4b17023SJohn Marino if (tok->type == CPP_PASTE)
2889*e4b17023SJohn Marino return CPP_TOKEN_FLD_TOKEN_NO;
2890*e4b17023SJohn Marino else
2891*e4b17023SJohn Marino return CPP_TOKEN_FLD_NONE;
2892*e4b17023SJohn Marino case SPELL_NONE:
2893*e4b17023SJohn Marino if (tok->type == CPP_MACRO_ARG)
2894*e4b17023SJohn Marino return CPP_TOKEN_FLD_ARG_NO;
2895*e4b17023SJohn Marino else if (tok->type == CPP_PADDING)
2896*e4b17023SJohn Marino return CPP_TOKEN_FLD_SOURCE;
2897*e4b17023SJohn Marino else if (tok->type == CPP_PRAGMA)
2898*e4b17023SJohn Marino return CPP_TOKEN_FLD_PRAGMA;
2899*e4b17023SJohn Marino /* else fall through */
2900*e4b17023SJohn Marino default:
2901*e4b17023SJohn Marino return CPP_TOKEN_FLD_NONE;
2902*e4b17023SJohn Marino }
2903*e4b17023SJohn Marino }
2904*e4b17023SJohn Marino
2905*e4b17023SJohn Marino /* All tokens lexed in R after calling this function will be forced to have
2906*e4b17023SJohn Marino their source_location the same as the location referenced by P, until
2907*e4b17023SJohn Marino cpp_stop_forcing_token_locations is called for R. */
2908*e4b17023SJohn Marino
2909*e4b17023SJohn Marino void
cpp_force_token_locations(cpp_reader * r,source_location * p)2910*e4b17023SJohn Marino cpp_force_token_locations (cpp_reader *r, source_location *p)
2911*e4b17023SJohn Marino {
2912*e4b17023SJohn Marino r->forced_token_location_p = p;
2913*e4b17023SJohn Marino }
2914*e4b17023SJohn Marino
2915*e4b17023SJohn Marino /* Go back to assigning locations naturally for lexed tokens. */
2916*e4b17023SJohn Marino
2917*e4b17023SJohn Marino void
cpp_stop_forcing_token_locations(cpp_reader * r)2918*e4b17023SJohn Marino cpp_stop_forcing_token_locations (cpp_reader *r)
2919*e4b17023SJohn Marino {
2920*e4b17023SJohn Marino r->forced_token_location_p = NULL;
2921*e4b17023SJohn Marino }
2922