1 /* $NetBSD: lsym_word.c,v 1.7 2023/06/17 22:09:24 rillig Exp $ */ 2 3 /* 4 * Tests for the token lsym_word, which represents a constant, a string 5 * literal or an identifier. 6 * 7 * See also: 8 * lsym_funcname.c for an identifier followed by '(' 9 */ 10 11 // TODO: Is '"string"(' syntactically valid in any context? 12 // TODO: Is '123(' syntactically valid in any context? 13 // TODO: Would the output of the above depend on -pcs/-npcs? 14 // TODO: Add more systematic tests. 15 // TODO: Completely cover each state transition in lex_number_state. 16 17 // TODO: Consider splitting this token into lsym_name and lsym_value, to 18 // TODO: make it easier to skip tokens during lookahead, for example since 19 // TODO: L"" is not an identifier but a string literal. 20 21 //indent input 22 // TODO: add input 23 //indent end 24 25 //indent run-equals-input 26 27 28 /* 29 * Since 2019-04-04 and before NetBSD lexi.c 1.149 from 2021-11-20, the first 30 * character after a backslash continuation was always considered part of a 31 * word, no matter whether it was a word character or not. 32 */ 33 //indent input 34 int var\ 35 +name = 4; 36 //indent end 37 38 //indent run 39 int var + name = 4; 40 //indent end 41 42 43 //indent input 44 wchar_t wide_string[] = L"wide string"; 45 //indent end 46 47 /* 48 * Regardless of the line length, the 'L' must never be separated from the 49 * string literal. Before lexi.c 1.167 from 2021-11-28, the 'L' was a 50 * separate token, which could have resulted in accidental spacing between the 51 * 'L' and the following "". 52 */ 53 //indent run-equals-input -di0 54 55 //indent run-equals-input -di0 -l25 56 57 //indent run-equals-input -di0 -l1 58 59 60 //indent input 61 wchar_t wide_char[] = L'w'; 62 //indent end 63 64 //indent run-equals-input -di0 65 66 67 /* Binary number literals, a GCC extension that was added in C11. */ 68 //indent input 69 #define b00101010 -1 70 void t(void) { 71 unsigned a[] = {0b00101010, 0x00005678, 02, 17U}; 72 float x[] = {.7f, 0.7f}; 73 unsigned long ul[] = {0b00001111UL, 0x01010101UL, 02UL, 17UL}; 74 75 if (0 b00101010) 76 return; 77 /* $ '0r' is not a number base prefix, so the tokens are split. */ 78 if (0r12345) 79 return; 80 } 81 //indent end 82 83 //indent run 84 #define b00101010 -1 85 void 86 t(void) 87 { 88 unsigned a[] = {0b00101010, 0x00005678, 02, 17U}; 89 float x[] = {.7f, 0.7f}; 90 unsigned long ul[] = {0b00001111UL, 0x01010101UL, 02UL, 17UL}; 91 92 if (0 b00101010) 93 return; 94 if (0 r12345) 95 return; 96 } 97 //indent end 98 99 100 /* Floating point numbers. */ 101 //indent input 102 void t(void) { 103 unsigned long x = 314UL; 104 double y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L}; 105 int z = 0b0101; 106 DO_NOTHING; 107 x._y = 5; 108 } 109 //indent end 110 111 //indent run 112 void 113 t(void) 114 { 115 unsigned long x = 314UL; 116 double y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L}; 117 int z = 0b0101; 118 DO_NOTHING; 119 x._y = 5; 120 } 121 //indent end 122 123 124 /* 125 * Test identifiers containing '$', which some compilers support as an 126 * extension to the C standard. 127 */ 128 //indent input 129 int $ = jQuery; // just kidding 130 const char SYS$LOGIN[]="$HOME"; 131 //indent end 132 133 //indent run 134 int $ = jQuery; // just kidding 135 const char SYS$LOGIN[] = "$HOME"; 136 //indent end 137 138 139 /* 140 * Test the tokenizer for number constants. 141 * 142 * When the tokenizer reads a character that makes a token invalid (such as 143 * '0x') but may later be extended to form a valid token (such as '0x123'), 144 * indent does not care about this invalid prefix and returns it nevertheless. 145 */ 146 //indent input 147 int unfinished_hex_prefix = 0x; 148 double unfinished_hex_float = 0x123p; 149 //indent end 150 151 //indent run-equals-input -di0 152