1 /* $NetBSD: llex.c,v 1.1.1.2 2012/03/15 00:08:13 alnsn Exp $ */ 2 3 /* 4 ** $Id: llex.c,v 1.1.1.2 2012/03/15 00:08:13 alnsn Exp $ 5 ** Lexical Analyzer 6 ** See Copyright Notice in lua.h 7 */ 8 9 10 #include <ctype.h> 11 #include <locale.h> 12 #include <string.h> 13 14 #define llex_c 15 #define LUA_CORE 16 17 #include "lua.h" 18 19 #include "ldo.h" 20 #include "llex.h" 21 #include "lobject.h" 22 #include "lparser.h" 23 #include "lstate.h" 24 #include "lstring.h" 25 #include "ltable.h" 26 #include "lzio.h" 27 28 29 30 #define next(ls) (ls->current = zgetc(ls->z)) 31 32 33 34 35 #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') 36 37 38 /* ORDER RESERVED */ 39 const char *const luaX_tokens [] = { 40 "and", "break", "do", "else", "elseif", 41 "end", "false", "for", "function", "if", 42 "in", "local", "nil", "not", "or", "repeat", 43 "return", "then", "true", "until", "while", 44 "..", "...", "==", ">=", "<=", "~=", 45 "<number>", "<name>", "<string>", "<eof>", 46 NULL 47 }; 48 49 50 #define save_and_next(ls) (save(ls, ls->current), next(ls)) 51 52 53 static void save (LexState *ls, int c) { 54 Mbuffer *b = ls->buff; 55 if (b->n + 1 > b->buffsize) { 56 size_t newsize; 57 if (b->buffsize >= MAX_SIZET/2) 58 luaX_lexerror(ls, "lexical element too long", 0); 59 newsize = b->buffsize * 2; 60 luaZ_resizebuffer(ls->L, b, newsize); 61 } 62 b->buffer[b->n++] = cast(char, c); 63 } 64 65 66 void luaX_init (lua_State *L) { 67 int i; 68 for (i=0; i<NUM_RESERVED; i++) { 69 TString *ts = luaS_new(L, luaX_tokens[i]); 70 luaS_fix(ts); /* reserved words are never collected */ 71 lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN); 72 ts->tsv.reserved = cast_byte(i+1); /* reserved word */ 73 } 74 } 75 76 77 #define MAXSRC 80 78 79 80 const char *luaX_token2str (LexState *ls, int token) { 81 if (token < FIRST_RESERVED) { 82 lua_assert(token == cast(unsigned char, token)); 83 return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) : 84 luaO_pushfstring(ls->L, "%c", token); 85 } 86 else 87 return luaX_tokens[token-FIRST_RESERVED]; 88 } 89 90 91 static const char *txtToken (LexState *ls, int token) { 92 switch (token) { 93 case TK_NAME: 94 case TK_STRING: 95 case TK_NUMBER: 96 save(ls, '\0'); 97 return luaZ_buffer(ls->buff); 98 default: 99 return luaX_token2str(ls, token); 100 } 101 } 102 103 104 void luaX_lexerror (LexState *ls, const char *msg, int token) { 105 char buff[MAXSRC]; 106 luaO_chunkid(buff, getstr(ls->source), MAXSRC); 107 msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg); 108 if (token) 109 luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token)); 110 luaD_throw(ls->L, LUA_ERRSYNTAX); 111 } 112 113 114 void luaX_syntaxerror (LexState *ls, const char *msg) { 115 luaX_lexerror(ls, msg, ls->t.token); 116 } 117 118 119 TString *luaX_newstring (LexState *ls, const char *str, size_t l) { 120 lua_State *L = ls->L; 121 TString *ts = luaS_newlstr(L, str, l); 122 TValue *o = luaH_setstr(L, ls->fs->h, ts); /* entry for `str' */ 123 if (ttisnil(o)) { 124 setbvalue(o, 1); /* make sure `str' will not be collected */ 125 luaC_checkGC(L); 126 } 127 return ts; 128 } 129 130 131 static void inclinenumber (LexState *ls) { 132 int old = ls->current; 133 lua_assert(currIsNewline(ls)); 134 next(ls); /* skip `\n' or `\r' */ 135 if (currIsNewline(ls) && ls->current != old) 136 next(ls); /* skip `\n\r' or `\r\n' */ 137 if (++ls->linenumber >= MAX_INT) 138 luaX_syntaxerror(ls, "chunk has too many lines"); 139 } 140 141 142 void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) { 143 ls->decpoint = '.'; 144 ls->L = L; 145 ls->lookahead.token = TK_EOS; /* no look-ahead token */ 146 ls->z = z; 147 ls->fs = NULL; 148 ls->linenumber = 1; 149 ls->lastline = 1; 150 ls->source = source; 151 luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */ 152 next(ls); /* read first char */ 153 } 154 155 156 157 /* 158 ** ======================================================= 159 ** LEXICAL ANALYZER 160 ** ======================================================= 161 */ 162 163 164 165 static int check_next (LexState *ls, const char *set) { 166 if (!strchr(set, ls->current)) 167 return 0; 168 save_and_next(ls); 169 return 1; 170 } 171 172 173 static void buffreplace (LexState *ls, char from, char to) { 174 size_t n = luaZ_bufflen(ls->buff); 175 char *p = luaZ_buffer(ls->buff); 176 while (n--) 177 if (p[n] == from) p[n] = to; 178 } 179 180 181 static void trydecpoint (LexState *ls, SemInfo *seminfo) { 182 /* format error: try to update decimal point separator */ 183 struct lconv *cv = localeconv(); 184 char old = ls->decpoint; 185 ls->decpoint = (cv ? cv->decimal_point[0] : '.'); 186 buffreplace(ls, old, ls->decpoint); /* try updated decimal separator */ 187 if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) { 188 /* format error with correct decimal point: no more options */ 189 buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */ 190 luaX_lexerror(ls, "malformed number", TK_NUMBER); 191 } 192 } 193 194 195 /* LUA_NUMBER */ 196 static void read_numeral (LexState *ls, SemInfo *seminfo) { 197 lua_assert(isdigit(ls->current)); 198 do { 199 save_and_next(ls); 200 } while (isdigit(ls->current) || ls->current == '.'); 201 if (check_next(ls, "Ee")) /* `E'? */ 202 check_next(ls, "+-"); /* optional exponent sign */ 203 while (isalnum(ls->current) || ls->current == '_') 204 save_and_next(ls); 205 save(ls, '\0'); 206 buffreplace(ls, '.', ls->decpoint); /* follow locale for decimal point */ 207 if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) /* format error? */ 208 trydecpoint(ls, seminfo); /* try to update decimal point separator */ 209 } 210 211 212 static int skip_sep (LexState *ls) { 213 int count = 0; 214 int s = ls->current; 215 lua_assert(s == '[' || s == ']'); 216 save_and_next(ls); 217 while (ls->current == '=') { 218 save_and_next(ls); 219 count++; 220 } 221 return (ls->current == s) ? count : (-count) - 1; 222 } 223 224 225 static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) { 226 int cont = 0; 227 (void)(cont); /* avoid warnings when `cont' is not used */ 228 save_and_next(ls); /* skip 2nd `[' */ 229 if (currIsNewline(ls)) /* string starts with a newline? */ 230 inclinenumber(ls); /* skip it */ 231 for (;;) { 232 switch (ls->current) { 233 case EOZ: 234 luaX_lexerror(ls, (seminfo) ? "unfinished long string" : 235 "unfinished long comment", TK_EOS); 236 break; /* to avoid warnings */ 237 #if defined(LUA_COMPAT_LSTR) 238 case '[': { 239 if (skip_sep(ls) == sep) { 240 save_and_next(ls); /* skip 2nd `[' */ 241 cont++; 242 #if LUA_COMPAT_LSTR == 1 243 if (sep == 0) 244 luaX_lexerror(ls, "nesting of [[...]] is deprecated", '['); 245 #endif 246 } 247 break; 248 } 249 #endif 250 case ']': { 251 if (skip_sep(ls) == sep) { 252 save_and_next(ls); /* skip 2nd `]' */ 253 #if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2 254 cont--; 255 if (sep == 0 && cont >= 0) break; 256 #endif 257 goto endloop; 258 } 259 break; 260 } 261 case '\n': 262 case '\r': { 263 save(ls, '\n'); 264 inclinenumber(ls); 265 if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */ 266 break; 267 } 268 default: { 269 if (seminfo) save_and_next(ls); 270 else next(ls); 271 } 272 } 273 } endloop: 274 if (seminfo) 275 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep), 276 luaZ_bufflen(ls->buff) - 2*(2 + sep)); 277 } 278 279 280 static void read_string (LexState *ls, int del, SemInfo *seminfo) { 281 save_and_next(ls); 282 while (ls->current != del) { 283 switch (ls->current) { 284 case EOZ: 285 luaX_lexerror(ls, "unfinished string", TK_EOS); 286 continue; /* to avoid warnings */ 287 case '\n': 288 case '\r': 289 luaX_lexerror(ls, "unfinished string", TK_STRING); 290 continue; /* to avoid warnings */ 291 case '\\': { 292 int c; 293 next(ls); /* do not save the `\' */ 294 switch (ls->current) { 295 case 'a': c = '\a'; break; 296 case 'b': c = '\b'; break; 297 case 'f': c = '\f'; break; 298 case 'n': c = '\n'; break; 299 case 'r': c = '\r'; break; 300 case 't': c = '\t'; break; 301 case 'v': c = '\v'; break; 302 case '\n': /* go through */ 303 case '\r': save(ls, '\n'); inclinenumber(ls); continue; 304 case EOZ: continue; /* will raise an error next loop */ 305 default: { 306 if (!isdigit(ls->current)) 307 save_and_next(ls); /* handles \\, \", \', and \? */ 308 else { /* \xxx */ 309 int i = 0; 310 c = 0; 311 do { 312 c = 10*c + (ls->current-'0'); 313 next(ls); 314 } while (++i<3 && isdigit(ls->current)); 315 if (c > UCHAR_MAX) 316 luaX_lexerror(ls, "escape sequence too large", TK_STRING); 317 save(ls, c); 318 } 319 continue; 320 } 321 } 322 save(ls, c); 323 next(ls); 324 continue; 325 } 326 default: 327 save_and_next(ls); 328 } 329 } 330 save_and_next(ls); /* skip delimiter */ 331 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1, 332 luaZ_bufflen(ls->buff) - 2); 333 } 334 335 336 static int llex (LexState *ls, SemInfo *seminfo) { 337 luaZ_resetbuffer(ls->buff); 338 for (;;) { 339 switch (ls->current) { 340 case '\n': 341 case '\r': { 342 inclinenumber(ls); 343 continue; 344 } 345 case '-': { 346 next(ls); 347 if (ls->current != '-') return '-'; 348 /* else is a comment */ 349 next(ls); 350 if (ls->current == '[') { 351 int sep = skip_sep(ls); 352 luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */ 353 if (sep >= 0) { 354 read_long_string(ls, NULL, sep); /* long comment */ 355 luaZ_resetbuffer(ls->buff); 356 continue; 357 } 358 } 359 /* else short comment */ 360 while (!currIsNewline(ls) && ls->current != EOZ) 361 next(ls); 362 continue; 363 } 364 case '[': { 365 int sep = skip_sep(ls); 366 if (sep >= 0) { 367 read_long_string(ls, seminfo, sep); 368 return TK_STRING; 369 } 370 else if (sep == -1) return '['; 371 else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING); 372 } 373 case '=': { 374 next(ls); 375 if (ls->current != '=') return '='; 376 else { next(ls); return TK_EQ; } 377 } 378 case '<': { 379 next(ls); 380 if (ls->current != '=') return '<'; 381 else { next(ls); return TK_LE; } 382 } 383 case '>': { 384 next(ls); 385 if (ls->current != '=') return '>'; 386 else { next(ls); return TK_GE; } 387 } 388 case '~': { 389 next(ls); 390 if (ls->current != '=') return '~'; 391 else { next(ls); return TK_NE; } 392 } 393 case '"': 394 case '\'': { 395 read_string(ls, ls->current, seminfo); 396 return TK_STRING; 397 } 398 case '.': { 399 save_and_next(ls); 400 if (check_next(ls, ".")) { 401 if (check_next(ls, ".")) 402 return TK_DOTS; /* ... */ 403 else return TK_CONCAT; /* .. */ 404 } 405 else if (!isdigit(ls->current)) return '.'; 406 else { 407 read_numeral(ls, seminfo); 408 return TK_NUMBER; 409 } 410 } 411 case EOZ: { 412 return TK_EOS; 413 } 414 default: { 415 if (isspace(ls->current)) { 416 lua_assert(!currIsNewline(ls)); 417 next(ls); 418 continue; 419 } 420 else if (isdigit(ls->current)) { 421 read_numeral(ls, seminfo); 422 return TK_NUMBER; 423 } 424 else if (isalpha(ls->current) || ls->current == '_') { 425 /* identifier or reserved word */ 426 TString *ts; 427 do { 428 save_and_next(ls); 429 } while (isalnum(ls->current) || ls->current == '_'); 430 ts = luaX_newstring(ls, luaZ_buffer(ls->buff), 431 luaZ_bufflen(ls->buff)); 432 if (ts->tsv.reserved > 0) /* reserved word? */ 433 return ts->tsv.reserved - 1 + FIRST_RESERVED; 434 else { 435 seminfo->ts = ts; 436 return TK_NAME; 437 } 438 } 439 else { 440 int c = ls->current; 441 next(ls); 442 return c; /* single-char tokens (+ - / ...) */ 443 } 444 } 445 } 446 } 447 } 448 449 450 void luaX_next (LexState *ls) { 451 ls->lastline = ls->linenumber; 452 if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */ 453 ls->t = ls->lookahead; /* use this one */ 454 ls->lookahead.token = TK_EOS; /* and discharge it */ 455 } 456 else 457 ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */ 458 } 459 460 461 void luaX_lookahead (LexState *ls) { 462 lua_assert(ls->lookahead.token == TK_EOS); 463 ls->lookahead.token = llex(ls, &ls->lookahead.seminfo); 464 } 465 466