xref: /netbsd-src/tests/usr.bin/indent/lsym_word.c (revision 65e3242c1e7ce8e4fda93bdaae8c93d7270ffe07)
1 /* $NetBSD: lsym_word.c,v 1.7 2023/06/17 22:09:24 rillig Exp $ */
2 
3 /*
4  * Tests for the token lsym_word, which represents a constant, a string
5  * literal or an identifier.
6  *
7  * See also:
8  *	lsym_funcname.c		for an identifier followed by '('
9  */
10 
11 // TODO: Is '"string"(' syntactically valid in any context?
12 // TODO: Is '123(' syntactically valid in any context?
13 // TODO: Would the output of the above depend on -pcs/-npcs?
14 // TODO: Add more systematic tests.
15 // TODO: Completely cover each state transition in lex_number_state.
16 
17 // TODO: Consider splitting this token into lsym_name and lsym_value, to
18 // TODO: make it easier to skip tokens during lookahead, for example since
19 // TODO: L"" is not an identifier but a string literal.
20 
21 //indent input
22 // TODO: add input
23 //indent end
24 
25 //indent run-equals-input
26 
27 
28 /*
29  * Since 2019-04-04 and before NetBSD lexi.c 1.149 from 2021-11-20, the first
30  * character after a backslash continuation was always considered part of a
31  * word, no matter whether it was a word character or not.
32  */
33 //indent input
34 int var\
35 +name = 4;
36 //indent end
37 
38 //indent run
39 int		var + name = 4;
40 //indent end
41 
42 
43 //indent input
44 wchar_t wide_string[] = L"wide string";
45 //indent end
46 
47 /*
48  * Regardless of the line length, the 'L' must never be separated from the
49  * string literal.  Before lexi.c 1.167 from 2021-11-28, the 'L' was a
50  * separate token, which could have resulted in accidental spacing between the
51  * 'L' and the following "".
52  */
53 //indent run-equals-input -di0
54 
55 //indent run-equals-input -di0 -l25
56 
57 //indent run-equals-input -di0 -l1
58 
59 
60 //indent input
61 wchar_t wide_char[] = L'w';
62 //indent end
63 
64 //indent run-equals-input -di0
65 
66 
67 /* Binary number literals, a GCC extension that was added in C11. */
68 //indent input
69 #define b00101010 -1
t(void)70 void t(void) {
71 	unsigned a[] = {0b00101010, 0x00005678, 02, 17U};
72 	float x[] = {.7f, 0.7f};
73 	unsigned long ul[] = {0b00001111UL, 0x01010101UL, 02UL, 17UL};
74 
75 	if (0 b00101010)
76 		return;
77 	/* $ '0r' is not a number base prefix, so the tokens are split. */
78 	if (0r12345)
79 		return;
80 }
81 //indent end
82 
83 //indent run
84 #define b00101010 -1
85 void
t(void)86 t(void)
87 {
88 	unsigned	a[] = {0b00101010, 0x00005678, 02, 17U};
89 	float		x[] = {.7f, 0.7f};
90 	unsigned long	ul[] = {0b00001111UL, 0x01010101UL, 02UL, 17UL};
91 
92 	if (0 b00101010)
93 		return;
94 	if (0 r12345)
95 		return;
96 }
97 //indent end
98 
99 
100 /* Floating point numbers. */
101 //indent input
t(void)102 void t(void) {
103 	unsigned long x = 314UL;
104 	double y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L};
105 	int z = 0b0101;
106 	DO_NOTHING;
107 	x._y = 5;
108 }
109 //indent end
110 
111 //indent run
112 void
t(void)113 t(void)
114 {
115 	unsigned long	x = 314UL;
116 	double		y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L};
117 	int		z = 0b0101;
118 	DO_NOTHING;
119 	x._y = 5;
120 }
121 //indent end
122 
123 
124 /*
125  * Test identifiers containing '$', which some compilers support as an
126  * extension to the C standard.
127  */
128 //indent input
129 int $		= jQuery;			// just kidding
130 const char SYS$LOGIN[]="$HOME";
131 //indent end
132 
133 //indent run
134 int		$ = jQuery;	// just kidding
135 const char	SYS$LOGIN[] = "$HOME";
136 //indent end
137 
138 
139 /*
140  * Test the tokenizer for number constants.
141  *
142  * When the tokenizer reads a character that makes a token invalid (such as
143  * '0x') but may later be extended to form a valid token (such as '0x123'),
144  * indent does not care about this invalid prefix and returns it nevertheless.
145  */
146 //indent input
147 int unfinished_hex_prefix = 0x;
148 double unfinished_hex_float = 0x123p;
149 //indent end
150 
151 //indent run-equals-input -di0
152