1 /* $NetBSD: lsym_word.c,v 1.6 2022/04/24 10:36:37 rillig Exp $ */ 2 3 /* 4 * Tests for the token lsym_word, which represents a constant, a string 5 * literal or an identifier. 6 * 7 * See also: 8 * lsym_funcname.c for an identifier followed by '(' 9 */ 10 11 // TODO: Is '"string"(' syntactically valid in any context? 12 // TODO: Is '123(' syntactically valid in any context? 13 // TODO: Would the output of the above depend on -pcs/-npcs? 14 // TODO: Add more systematic tests. 15 // TODO: Completely cover each state transition in lex_number_state. 16 17 //indent input 18 // TODO: add input 19 //indent end 20 21 //indent run-equals-input 22 23 24 /* 25 * Since 2019-04-04 and before NetBSD lexi.c 1.149 from 2021-11-20, the first 26 * character after a backslash continuation was always considered part of a 27 * word, no matter whether it was a word character or not. 28 */ 29 //indent input 30 int var\ 31 +name = 4; 32 //indent end 33 34 //indent run 35 int var + name = 4; 36 //indent end 37 38 39 //indent input 40 wchar_t wide_string[] = L"wide string"; 41 //indent end 42 43 /* 44 * Regardless of the line length, the 'L' must never be separated from the 45 * string literal. Before lexi.c 1.167 from 2021-11-28, the 'L' was a 46 * separate token, which could have resulted in accidental spacing between the 47 * 'L' and the following "". 48 */ 49 //indent run-equals-input -di0 50 51 //indent run-equals-input -di0 -l25 52 53 //indent run-equals-input -di0 -l1 54 55 56 //indent input 57 wchar_t wide_char[] = L'w'; 58 //indent end 59 60 //indent run-equals-input -di0 61 62 63 /* Binary number literals, a GCC extension that was added in C11. */ 64 //indent input 65 #define b00101010 -1 66 void t(void) { 67 unsigned a[] = {0b00101010, 0x00005678, 02, 17U}; 68 float x[] = {.7f, 0.7f}; 69 unsigned long ul[] = {0b00001111UL, 0x01010101UL, 02UL, 17UL}; 70 71 if (0 b00101010) 72 return; 73 /* $ '0r' is not a number base prefix, so the tokens are split. */ 74 if (0r12345) 75 return; 76 } 77 //indent end 78 79 //indent run 80 #define b00101010 -1 81 void 82 t(void) 83 { 84 unsigned a[] = {0b00101010, 0x00005678, 02, 17U}; 85 float x[] = {.7f, 0.7f}; 86 unsigned long ul[] = {0b00001111UL, 0x01010101UL, 02UL, 17UL}; 87 88 if (0 b00101010) 89 return; 90 if (0 r12345) 91 return; 92 } 93 //indent end 94 95 96 /* Floating point numbers. */ 97 //indent input 98 void t(void) { 99 unsigned long x = 314UL; 100 double y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L}; 101 int z = 0b0101; 102 DO_NOTHING; 103 x._y = 5; 104 } 105 //indent end 106 107 //indent run 108 void 109 t(void) 110 { 111 unsigned long x = 314UL; 112 double y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L}; 113 int z = 0b0101; 114 DO_NOTHING; 115 x._y = 5; 116 } 117 //indent end 118 119 120 /* 121 * Test identifiers containing '$', which some compilers support as an 122 * extension to the C standard. 123 */ 124 //indent input 125 int $ = jQuery; // just kidding 126 const char SYS$LOGIN[]="$HOME"; 127 //indent end 128 129 //indent run 130 int $ = jQuery; // just kidding 131 const char SYS$LOGIN[] = "$HOME"; 132 //indent end 133 134 135 /* 136 * Test the tokenizer for number constants. 137 * 138 * When the tokenizer reads a character that makes a token invalid (such as 139 * '0x') but may later be extended to form a valid token (such as '0x123'), 140 * indent does not care about this invalid prefix and returns it nevertheless. 141 */ 142 //indent input 143 int unfinished_hex_prefix = 0x; 144 double unfinished_hex_float = 0x123p; 145 //indent end 146 147 //indent run-equals-input -di0 148