xref: /netbsd-src/external/bsd/elftosb/dist/elftosb2/elftosb_lexer.l (revision 993229b6fea628ff8b1fa09146c80b0cfb2768eb)
1 /*
2  * Copyright (c) Freescale Semiconductor, Inc. All rights reserved.
3  * See included license file for license details.
4  */
5 
6 %option c++
7 /* %option prefix="Elftosb" */
8 %option yylineno
9 %option never-interactive
10 %option yyclass="ElftosbLexer"
11 %option noyywrap
12 
13 %{
14 #include "ElftosbLexer.h"
15 #include <stdlib.h>
16 #include <limits.h>
17 #include <string>
18 #include "HexValues.h"
19 #include "Value.h"
20 
21 using namespace elftosb;
22 
23 //! Always executed before all other actions when a token is matched.
24 //! This action just assign the first and last lines of the token to
25 //! the current line. In most cases this is correct.
26 #define YY_USER_ACTION	do {									\
27 							m_location.m_firstLine = m_line;		\
28 							m_location.m_lastLine = m_line;		\
29 						} while (0);
30 
31 %}
32 
33 DIGIT		[0-9]
34 HEXDIGIT	[0-9a-fA-F]
35 BINDIGIT	[0-1]
36 IDENT		[a-zA-Z_][a-zA-Z0-9_]*
37 ESC			\\(x{HEXDIGIT}{2}|.)
38 
39 /* start conditions */
40 %x blob mlcmt
41 
42 %%
43 
44 options			{ return TOK_OPTIONS; }
45 constants		{ return TOK_CONSTANTS; }
46 sources			{ return TOK_SOURCES; }
47 filters			{ return TOK_FILTERS; }
48 section			{ return TOK_SECTION; }
49 extern			{ return TOK_EXTERN; }
50 from			{ return TOK_FROM; }
51 raw				{ return TOK_RAW; }
52 load			{ return TOK_LOAD; }
53 jump			{ return TOK_JUMP; }
54 call			{ return TOK_CALL; }
55 mode			{ return TOK_MODE; }
56 if				{ return TOK_IF; }
57 else			{ return TOK_ELSE; }
58 defined			{ return TOK_DEFINED; }
59 info			{ return TOK_INFO; }
60 warning			{ return TOK_WARNING; }
61 error			{ return TOK_ERROR; }
62 sizeof			{ return TOK_SIZEOF; }
63 dcd				{ return TOK_DCD; }
64 hab				{ return TOK_HAB; }
65 ivt             { return TOK_IVT; }
66 
67 [whb]/[^a-zA-Z_0-9]					{	// must be followed by any non-ident char
68 										int_size_t theSize;
69 										switch (yytext[0])
70 										{
71 											case 'w':
72 												theSize = kWordSize;
73 												break;
74 											case 'h':
75 												theSize = kHalfWordSize;
76 												break;
77 											case 'b':
78 												theSize = kByteSize;
79 												break;
80 										}
81 										m_symbolValue.m_int = new elftosb::SizedIntegerValue(0, theSize);
82 										return TOK_INT_SIZE;
83 									}
84 
85 true|yes							{
86 										m_symbolValue.m_int = new elftosb::SizedIntegerValue(1, kWordSize);
87 										return TOK_INT_LITERAL;
88 									}
89 
90 false|no							{
91 										m_symbolValue.m_int = new elftosb::SizedIntegerValue(0, kWordSize);
92 										return TOK_INT_LITERAL;
93 									}
94 
95 {IDENT}								{
96 										m_symbolValue.m_str = new std::string(yytext);
97 										if (isSourceName(m_symbolValue.m_str))
98 										{
99 											return TOK_SOURCE_NAME;
100 										}
101 										else
102 										{
103 											return TOK_IDENT;
104 										}
105 									}
106 
107 ({DIGIT}+|0x{HEXDIGIT}+|0b{BINDIGIT}+)([ \t]*[GMK])?			{
108 										int base = 0;
109 										uint32_t value;
110 										int mult;
111 
112 										// check for binary number
113 										if (yytext[0] == '0' && yytext[1] == 'b')
114 										{
115 											base = 2;		// this is a binary number
116 											yytext += 2;	// skip over the "0b"
117 										}
118 
119 										// convert value
120 										value = (uint32_t)strtoul(yytext, NULL, base);
121 
122 										// find multiplier
123 										switch (yytext[strlen(yytext) - 1])
124 										{
125 											case 'G':
126 												mult = 1024 * 1024 * 1024;
127 												break;
128 											case 'M':
129 												mult = 1024 * 1024;
130 												break;
131 											case 'K':
132 												mult = 1024;
133 												break;
134 											default:
135 												mult = 1;
136 												break;
137 										}
138 
139 										// set resulting symbol value
140 										m_symbolValue.m_int = new elftosb::SizedIntegerValue(value * mult, kWordSize);
141 										return TOK_INT_LITERAL;
142 									}
143 
144 \'(.|ESC)\'|\'(.|ESC){2}\'|\'(.|ESC){4}\'		{
145 										uint32_t value = 0;
146 										int_size_t theSize;
147 										int len = strlen(yytext);
148 										if (len >= 3)
149 										{
150 											value = yytext[1];
151 											theSize = kByteSize;
152 										}
153 										if (len >= 4)
154 										{
155 											value = (value << 8) | yytext[2];
156 											theSize = kHalfWordSize;
157 										}
158 										if (len >= 6)
159 										{
160 											value = (value << 8) | yytext[3];
161 											value = (value << 8) | yytext[4];
162 											theSize = kWordSize;
163 										}
164 										m_symbolValue.m_int = new elftosb::SizedIntegerValue(value, theSize);
165 										return TOK_INT_LITERAL;
166 									}
167 
168 \$[\.\*a-zA-Z0-9_\[\]\^\?\-]+					{
169 										// remove $ from string
170 										m_symbolValue.m_str = new std::string(&yytext[1]);
171 										return TOK_SECTION_NAME;
172 									}
173 
174 
175 "/*"                                { BEGIN(mlcmt); }
176 
177 "{{"								{
178 										m_blob = new Blob();
179 										m_blobFirstLine = yylineno;
180 										BEGIN(blob);
181 									}
182 
183 "{"									{ return '{'; }
184 
185 "}"									{ return '}'; }
186 
187 "("									{ return '('; }
188 
189 ")"									{ return ')'; }
190 
191 "["									{ return '['; }
192 
193 "]"									{ return ']'; }
194 
195 "="									{ return '='; }
196 
197 ","									{ return ','; }
198 
199 ":"									{ return ':'; }
200 
201 ";"									{ return ';'; }
202 
203 "."									{ return '.'; }
204 
205 ">"									{ return '>'; }
206 
207 ".."								{ return TOK_DOT_DOT; }
208 
209 "+"									{ return '+'; }
210 
211 "-"									{ return '-'; }
212 
213 "*"									{ return '*'; }
214 
215 "/"									{ return '/'; }
216 
217 "%"									{ return '%'; }
218 
219 "~"									{ return '~'; }
220 
221 "^"									{ return '^'; }
222 
223 "<<"								{ return TOK_LSHIFT; }
224 
225 ">>"								{ return TOK_RSHIFT; }
226 
227 "&"									{ return '&'; }
228 
229 "|"									{ return '|'; }
230 
231 "**"								{ return TOK_POWER; }
232 
233 "<"									{ return '<'; }
234 
235 ">="								{ return TOK_GEQ; }
236 
237 "<="								{ return TOK_LEQ; }
238 
239 "=="								{ return TOK_EQ; }
240 
241 "!="								{ return TOK_NEQ; }
242 
243 "&&"								{ return TOK_AND; }
244 
245 "||"								{ return TOK_OR; }
246 
247 "!"									{ return '!'; }
248 
249 \"(ESC|[^\"])*\"					{
250 										// get rid of quotes
251 										yytext++;
252 										yytext[strlen(yytext) - 1] = 0;
253 //										processStringEscapes(yytext, yytext);
254 										m_symbolValue.m_str = new std::string(yytext);
255 										return TOK_STRING_LITERAL;
256 									}
257 
258 <blob>{HEXDIGIT}{2}					{
259 										uint8_t x = (hexCharToInt(yytext[0]) << 4) | hexCharToInt(yytext[1]);
260 										m_blob->append(&x, 1);
261 									}
262 
263 <blob>"}}"							{
264 										BEGIN(INITIAL);
265 										m_symbolValue.m_blob = m_blob;
266 										m_blob = NULL;
267 										m_location.m_firstLine = m_blobFirstLine;
268 										return TOK_BLOB;
269 									}
270 
271 <mlcmt>\*\/                         {
272                                         // end of multi-line comment, return to initial state
273                                         BEGIN(INITIAL);
274                                     }
275 
276 
277 (#|\/\/).*$							/* absorb single-line comment */
278 
279 <*>[ \t]							/* eat up whitespace in all states */
280 
281 <*>(\r\n|\r|\n)						{
282 										/* eat up whitespace and count lines in all states */
283 										m_line++;
284 									}
285 
286 <mlcmt>.                            /* ignore all other chars in a multi-line comment */
287 
288 <*>.								{
289 										/* all other chars produce errors */
290 										char msg[50];
291 										sprintf(msg, "unexpected character '%c' on line %d", yytext[0], m_line);
292 										LexerError(msg);
293 									}
294 
295 %%
296 
297 // verbatim code copied to the bottom of the output
298 
299 
300