1 /* $NetBSD: lsym_word.c,v 1.7 2023/06/17 22:09:24 rillig Exp $ */
2
3 /*
4 * Tests for the token lsym_word, which represents a constant, a string
5 * literal or an identifier.
6 *
7 * See also:
8 * lsym_funcname.c for an identifier followed by '('
9 */
10
11 // TODO: Is '"string"(' syntactically valid in any context?
12 // TODO: Is '123(' syntactically valid in any context?
13 // TODO: Would the output of the above depend on -pcs/-npcs?
14 // TODO: Add more systematic tests.
15 // TODO: Completely cover each state transition in lex_number_state.
16
17 // TODO: Consider splitting this token into lsym_name and lsym_value, to
18 // TODO: make it easier to skip tokens during lookahead, for example since
19 // TODO: L"" is not an identifier but a string literal.
20
21 //indent input
22 // TODO: add input
23 //indent end
24
25 //indent run-equals-input
26
27
28 /*
29 * Since 2019-04-04 and before NetBSD lexi.c 1.149 from 2021-11-20, the first
30 * character after a backslash continuation was always considered part of a
31 * word, no matter whether it was a word character or not.
32 */
33 //indent input
34 int var\
35 +name = 4;
36 //indent end
37
38 //indent run
39 int var + name = 4;
40 //indent end
41
42
43 //indent input
44 wchar_t wide_string[] = L"wide string";
45 //indent end
46
47 /*
48 * Regardless of the line length, the 'L' must never be separated from the
49 * string literal. Before lexi.c 1.167 from 2021-11-28, the 'L' was a
50 * separate token, which could have resulted in accidental spacing between the
51 * 'L' and the following "".
52 */
53 //indent run-equals-input -di0
54
55 //indent run-equals-input -di0 -l25
56
57 //indent run-equals-input -di0 -l1
58
59
60 //indent input
61 wchar_t wide_char[] = L'w';
62 //indent end
63
64 //indent run-equals-input -di0
65
66
67 /* Binary number literals, a GCC extension that was added in C11. */
68 //indent input
69 #define b00101010 -1
t(void)70 void t(void) {
71 unsigned a[] = {0b00101010, 0x00005678, 02, 17U};
72 float x[] = {.7f, 0.7f};
73 unsigned long ul[] = {0b00001111UL, 0x01010101UL, 02UL, 17UL};
74
75 if (0 b00101010)
76 return;
77 /* $ '0r' is not a number base prefix, so the tokens are split. */
78 if (0r12345)
79 return;
80 }
81 //indent end
82
83 //indent run
84 #define b00101010 -1
85 void
t(void)86 t(void)
87 {
88 unsigned a[] = {0b00101010, 0x00005678, 02, 17U};
89 float x[] = {.7f, 0.7f};
90 unsigned long ul[] = {0b00001111UL, 0x01010101UL, 02UL, 17UL};
91
92 if (0 b00101010)
93 return;
94 if (0 r12345)
95 return;
96 }
97 //indent end
98
99
100 /* Floating point numbers. */
101 //indent input
t(void)102 void t(void) {
103 unsigned long x = 314UL;
104 double y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L};
105 int z = 0b0101;
106 DO_NOTHING;
107 x._y = 5;
108 }
109 //indent end
110
111 //indent run
112 void
t(void)113 t(void)
114 {
115 unsigned long x = 314UL;
116 double y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L};
117 int z = 0b0101;
118 DO_NOTHING;
119 x._y = 5;
120 }
121 //indent end
122
123
124 /*
125 * Test identifiers containing '$', which some compilers support as an
126 * extension to the C standard.
127 */
128 //indent input
129 int $ = jQuery; // just kidding
130 const char SYS$LOGIN[]="$HOME";
131 //indent end
132
133 //indent run
134 int $ = jQuery; // just kidding
135 const char SYS$LOGIN[] = "$HOME";
136 //indent end
137
138
139 /*
140 * Test the tokenizer for number constants.
141 *
142 * When the tokenizer reads a character that makes a token invalid (such as
143 * '0x') but may later be extended to form a valid token (such as '0x123'),
144 * indent does not care about this invalid prefix and returns it nevertheless.
145 */
146 //indent input
147 int unfinished_hex_prefix = 0x;
148 double unfinished_hex_float = 0x123p;
149 //indent end
150
151 //indent run-equals-input -di0
152