xref: /netbsd-src/external/mit/lua/dist/src/llex.c (revision 6a493d6bc668897c91594964a732d38505b70cbb)
1 /*	$NetBSD: llex.c,v 1.1.1.2 2012/03/15 00:08:13 alnsn Exp $	*/
2 
3 /*
4 ** $Id: llex.c,v 1.1.1.2 2012/03/15 00:08:13 alnsn Exp $
5 ** Lexical Analyzer
6 ** See Copyright Notice in lua.h
7 */
8 
9 
10 #include <ctype.h>
11 #include <locale.h>
12 #include <string.h>
13 
14 #define llex_c
15 #define LUA_CORE
16 
17 #include "lua.h"
18 
19 #include "ldo.h"
20 #include "llex.h"
21 #include "lobject.h"
22 #include "lparser.h"
23 #include "lstate.h"
24 #include "lstring.h"
25 #include "ltable.h"
26 #include "lzio.h"
27 
28 
29 
30 #define next(ls) (ls->current = zgetc(ls->z))
31 
32 
33 
34 
35 #define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r')
36 
37 
38 /* ORDER RESERVED */
39 const char *const luaX_tokens [] = {
40     "and", "break", "do", "else", "elseif",
41     "end", "false", "for", "function", "if",
42     "in", "local", "nil", "not", "or", "repeat",
43     "return", "then", "true", "until", "while",
44     "..", "...", "==", ">=", "<=", "~=",
45     "<number>", "<name>", "<string>", "<eof>",
46     NULL
47 };
48 
49 
50 #define save_and_next(ls) (save(ls, ls->current), next(ls))
51 
52 
53 static void save (LexState *ls, int c) {
54   Mbuffer *b = ls->buff;
55   if (b->n + 1 > b->buffsize) {
56     size_t newsize;
57     if (b->buffsize >= MAX_SIZET/2)
58       luaX_lexerror(ls, "lexical element too long", 0);
59     newsize = b->buffsize * 2;
60     luaZ_resizebuffer(ls->L, b, newsize);
61   }
62   b->buffer[b->n++] = cast(char, c);
63 }
64 
65 
66 void luaX_init (lua_State *L) {
67   int i;
68   for (i=0; i<NUM_RESERVED; i++) {
69     TString *ts = luaS_new(L, luaX_tokens[i]);
70     luaS_fix(ts);  /* reserved words are never collected */
71     lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN);
72     ts->tsv.reserved = cast_byte(i+1);  /* reserved word */
73   }
74 }
75 
76 
77 #define MAXSRC          80
78 
79 
80 const char *luaX_token2str (LexState *ls, int token) {
81   if (token < FIRST_RESERVED) {
82     lua_assert(token == cast(unsigned char, token));
83     return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) :
84                               luaO_pushfstring(ls->L, "%c", token);
85   }
86   else
87     return luaX_tokens[token-FIRST_RESERVED];
88 }
89 
90 
91 static const char *txtToken (LexState *ls, int token) {
92   switch (token) {
93     case TK_NAME:
94     case TK_STRING:
95     case TK_NUMBER:
96       save(ls, '\0');
97       return luaZ_buffer(ls->buff);
98     default:
99       return luaX_token2str(ls, token);
100   }
101 }
102 
103 
104 void luaX_lexerror (LexState *ls, const char *msg, int token) {
105   char buff[MAXSRC];
106   luaO_chunkid(buff, getstr(ls->source), MAXSRC);
107   msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
108   if (token)
109     luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token));
110   luaD_throw(ls->L, LUA_ERRSYNTAX);
111 }
112 
113 
114 void luaX_syntaxerror (LexState *ls, const char *msg) {
115   luaX_lexerror(ls, msg, ls->t.token);
116 }
117 
118 
119 TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
120   lua_State *L = ls->L;
121   TString *ts = luaS_newlstr(L, str, l);
122   TValue *o = luaH_setstr(L, ls->fs->h, ts);  /* entry for `str' */
123   if (ttisnil(o)) {
124     setbvalue(o, 1);  /* make sure `str' will not be collected */
125     luaC_checkGC(L);
126   }
127   return ts;
128 }
129 
130 
131 static void inclinenumber (LexState *ls) {
132   int old = ls->current;
133   lua_assert(currIsNewline(ls));
134   next(ls);  /* skip `\n' or `\r' */
135   if (currIsNewline(ls) && ls->current != old)
136     next(ls);  /* skip `\n\r' or `\r\n' */
137   if (++ls->linenumber >= MAX_INT)
138     luaX_syntaxerror(ls, "chunk has too many lines");
139 }
140 
141 
142 void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) {
143   ls->decpoint = '.';
144   ls->L = L;
145   ls->lookahead.token = TK_EOS;  /* no look-ahead token */
146   ls->z = z;
147   ls->fs = NULL;
148   ls->linenumber = 1;
149   ls->lastline = 1;
150   ls->source = source;
151   luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
152   next(ls);  /* read first char */
153 }
154 
155 
156 
157 /*
158 ** =======================================================
159 ** LEXICAL ANALYZER
160 ** =======================================================
161 */
162 
163 
164 
165 static int check_next (LexState *ls, const char *set) {
166   if (!strchr(set, ls->current))
167     return 0;
168   save_and_next(ls);
169   return 1;
170 }
171 
172 
173 static void buffreplace (LexState *ls, char from, char to) {
174   size_t n = luaZ_bufflen(ls->buff);
175   char *p = luaZ_buffer(ls->buff);
176   while (n--)
177     if (p[n] == from) p[n] = to;
178 }
179 
180 
181 static void trydecpoint (LexState *ls, SemInfo *seminfo) {
182   /* format error: try to update decimal point separator */
183   struct lconv *cv = localeconv();
184   char old = ls->decpoint;
185   ls->decpoint = (cv ? cv->decimal_point[0] : '.');
186   buffreplace(ls, old, ls->decpoint);  /* try updated decimal separator */
187   if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) {
188     /* format error with correct decimal point: no more options */
189     buffreplace(ls, ls->decpoint, '.');  /* undo change (for error message) */
190     luaX_lexerror(ls, "malformed number", TK_NUMBER);
191   }
192 }
193 
194 
195 /* LUA_NUMBER */
196 static void read_numeral (LexState *ls, SemInfo *seminfo) {
197   lua_assert(isdigit(ls->current));
198   do {
199     save_and_next(ls);
200   } while (isdigit(ls->current) || ls->current == '.');
201   if (check_next(ls, "Ee"))  /* `E'? */
202     check_next(ls, "+-");  /* optional exponent sign */
203   while (isalnum(ls->current) || ls->current == '_')
204     save_and_next(ls);
205   save(ls, '\0');
206   buffreplace(ls, '.', ls->decpoint);  /* follow locale for decimal point */
207   if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r))  /* format error? */
208     trydecpoint(ls, seminfo); /* try to update decimal point separator */
209 }
210 
211 
212 static int skip_sep (LexState *ls) {
213   int count = 0;
214   int s = ls->current;
215   lua_assert(s == '[' || s == ']');
216   save_and_next(ls);
217   while (ls->current == '=') {
218     save_and_next(ls);
219     count++;
220   }
221   return (ls->current == s) ? count : (-count) - 1;
222 }
223 
224 
225 static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
226   int cont = 0;
227   (void)(cont);  /* avoid warnings when `cont' is not used */
228   save_and_next(ls);  /* skip 2nd `[' */
229   if (currIsNewline(ls))  /* string starts with a newline? */
230     inclinenumber(ls);  /* skip it */
231   for (;;) {
232     switch (ls->current) {
233       case EOZ:
234         luaX_lexerror(ls, (seminfo) ? "unfinished long string" :
235                                    "unfinished long comment", TK_EOS);
236         break;  /* to avoid warnings */
237 #if defined(LUA_COMPAT_LSTR)
238       case '[': {
239         if (skip_sep(ls) == sep) {
240           save_and_next(ls);  /* skip 2nd `[' */
241           cont++;
242 #if LUA_COMPAT_LSTR == 1
243           if (sep == 0)
244             luaX_lexerror(ls, "nesting of [[...]] is deprecated", '[');
245 #endif
246         }
247         break;
248       }
249 #endif
250       case ']': {
251         if (skip_sep(ls) == sep) {
252           save_and_next(ls);  /* skip 2nd `]' */
253 #if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2
254           cont--;
255           if (sep == 0 && cont >= 0) break;
256 #endif
257           goto endloop;
258         }
259         break;
260       }
261       case '\n':
262       case '\r': {
263         save(ls, '\n');
264         inclinenumber(ls);
265         if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
266         break;
267       }
268       default: {
269         if (seminfo) save_and_next(ls);
270         else next(ls);
271       }
272     }
273   } endloop:
274   if (seminfo)
275     seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
276                                      luaZ_bufflen(ls->buff) - 2*(2 + sep));
277 }
278 
279 
280 static void read_string (LexState *ls, int del, SemInfo *seminfo) {
281   save_and_next(ls);
282   while (ls->current != del) {
283     switch (ls->current) {
284       case EOZ:
285         luaX_lexerror(ls, "unfinished string", TK_EOS);
286         continue;  /* to avoid warnings */
287       case '\n':
288       case '\r':
289         luaX_lexerror(ls, "unfinished string", TK_STRING);
290         continue;  /* to avoid warnings */
291       case '\\': {
292         int c;
293         next(ls);  /* do not save the `\' */
294         switch (ls->current) {
295           case 'a': c = '\a'; break;
296           case 'b': c = '\b'; break;
297           case 'f': c = '\f'; break;
298           case 'n': c = '\n'; break;
299           case 'r': c = '\r'; break;
300           case 't': c = '\t'; break;
301           case 'v': c = '\v'; break;
302           case '\n':  /* go through */
303           case '\r': save(ls, '\n'); inclinenumber(ls); continue;
304           case EOZ: continue;  /* will raise an error next loop */
305           default: {
306             if (!isdigit(ls->current))
307               save_and_next(ls);  /* handles \\, \", \', and \? */
308             else {  /* \xxx */
309               int i = 0;
310               c = 0;
311               do {
312                 c = 10*c + (ls->current-'0');
313                 next(ls);
314               } while (++i<3 && isdigit(ls->current));
315               if (c > UCHAR_MAX)
316                 luaX_lexerror(ls, "escape sequence too large", TK_STRING);
317               save(ls, c);
318             }
319             continue;
320           }
321         }
322         save(ls, c);
323         next(ls);
324         continue;
325       }
326       default:
327         save_and_next(ls);
328     }
329   }
330   save_and_next(ls);  /* skip delimiter */
331   seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
332                                    luaZ_bufflen(ls->buff) - 2);
333 }
334 
335 
336 static int llex (LexState *ls, SemInfo *seminfo) {
337   luaZ_resetbuffer(ls->buff);
338   for (;;) {
339     switch (ls->current) {
340       case '\n':
341       case '\r': {
342         inclinenumber(ls);
343         continue;
344       }
345       case '-': {
346         next(ls);
347         if (ls->current != '-') return '-';
348         /* else is a comment */
349         next(ls);
350         if (ls->current == '[') {
351           int sep = skip_sep(ls);
352           luaZ_resetbuffer(ls->buff);  /* `skip_sep' may dirty the buffer */
353           if (sep >= 0) {
354             read_long_string(ls, NULL, sep);  /* long comment */
355             luaZ_resetbuffer(ls->buff);
356             continue;
357           }
358         }
359         /* else short comment */
360         while (!currIsNewline(ls) && ls->current != EOZ)
361           next(ls);
362         continue;
363       }
364       case '[': {
365         int sep = skip_sep(ls);
366         if (sep >= 0) {
367           read_long_string(ls, seminfo, sep);
368           return TK_STRING;
369         }
370         else if (sep == -1) return '[';
371         else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING);
372       }
373       case '=': {
374         next(ls);
375         if (ls->current != '=') return '=';
376         else { next(ls); return TK_EQ; }
377       }
378       case '<': {
379         next(ls);
380         if (ls->current != '=') return '<';
381         else { next(ls); return TK_LE; }
382       }
383       case '>': {
384         next(ls);
385         if (ls->current != '=') return '>';
386         else { next(ls); return TK_GE; }
387       }
388       case '~': {
389         next(ls);
390         if (ls->current != '=') return '~';
391         else { next(ls); return TK_NE; }
392       }
393       case '"':
394       case '\'': {
395         read_string(ls, ls->current, seminfo);
396         return TK_STRING;
397       }
398       case '.': {
399         save_and_next(ls);
400         if (check_next(ls, ".")) {
401           if (check_next(ls, "."))
402             return TK_DOTS;   /* ... */
403           else return TK_CONCAT;   /* .. */
404         }
405         else if (!isdigit(ls->current)) return '.';
406         else {
407           read_numeral(ls, seminfo);
408           return TK_NUMBER;
409         }
410       }
411       case EOZ: {
412         return TK_EOS;
413       }
414       default: {
415         if (isspace(ls->current)) {
416           lua_assert(!currIsNewline(ls));
417           next(ls);
418           continue;
419         }
420         else if (isdigit(ls->current)) {
421           read_numeral(ls, seminfo);
422           return TK_NUMBER;
423         }
424         else if (isalpha(ls->current) || ls->current == '_') {
425           /* identifier or reserved word */
426           TString *ts;
427           do {
428             save_and_next(ls);
429           } while (isalnum(ls->current) || ls->current == '_');
430           ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
431                                   luaZ_bufflen(ls->buff));
432           if (ts->tsv.reserved > 0)  /* reserved word? */
433             return ts->tsv.reserved - 1 + FIRST_RESERVED;
434           else {
435             seminfo->ts = ts;
436             return TK_NAME;
437           }
438         }
439         else {
440           int c = ls->current;
441           next(ls);
442           return c;  /* single-char tokens (+ - / ...) */
443         }
444       }
445     }
446   }
447 }
448 
449 
450 void luaX_next (LexState *ls) {
451   ls->lastline = ls->linenumber;
452   if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
453     ls->t = ls->lookahead;  /* use this one */
454     ls->lookahead.token = TK_EOS;  /* and discharge it */
455   }
456   else
457     ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
458 }
459 
460 
461 void luaX_lookahead (LexState *ls) {
462   lua_assert(ls->lookahead.token == TK_EOS);
463   ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
464 }
465 
466