1 /* mclex.c -- lexer for Windows mc files parser. 2 Copyright (C) 2007-2022 Free Software Foundation, Inc. 3 4 Written by Kai Tietz, Onevision. 5 6 This file is part of GNU Binutils. 7 8 This program is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or 11 (at your option) any later version. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program; if not, write to the Free Software 20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 21 02110-1301, USA. */ 22 23 /* This is a lexer used by the Windows rc file parser. 24 It basically just recognized a bunch of keywords. */ 25 26 #include "sysdep.h" 27 #include "bfd.h" 28 #include "bucomm.h" 29 #include "libiberty.h" 30 #include "safe-ctype.h" 31 #include "windmc.h" 32 #include "mcparse.h" 33 34 #include <assert.h> 35 36 /* Exported globals. */ 37 bool mclex_want_nl = false; 38 bool mclex_want_line = false; 39 bool mclex_want_filename = false; 40 41 /* Local globals. */ 42 static unichar *input_stream = NULL; 43 static unichar *input_stream_pos = NULL; 44 static int input_line = 1; 45 static const char *input_filename = NULL; 46 47 void 48 mc_set_content (const unichar *src) 49 { 50 if (!src) 51 return; 52 input_stream = input_stream_pos = unichar_dup (src); 53 } 54 55 void 56 mc_set_inputfile (const char *name) 57 { 58 if (! name || *name == 0) 59 input_filename = "-"; 60 else 61 { 62 const char *s1 = strrchr (name, '/'); 63 const char *s2 = strrchr (name, '\\'); 64 65 if (! s1) 66 s1 = s2; 67 if (s1 && s2 && s1 < s2) 68 s1 = s2; 69 if (! s1) 70 s1 = name; 71 else 72 s1++; 73 s1 = xstrdup (s1); 74 input_filename = s1; 75 } 76 } 77 78 static void 79 show_msg (const char *kind, const char *msg, va_list argp) 80 { 81 fprintf (stderr, "In %s at line %d: %s: ", input_filename, input_line, kind); 82 vfprintf (stderr, msg, argp); 83 fprintf (stderr, ".\n"); 84 } 85 86 void 87 mc_warn (const char *s, ...) 88 { 89 va_list argp; 90 va_start (argp, s); 91 show_msg ("warning", s, argp); 92 va_end (argp); 93 } 94 95 void 96 mc_fatal (const char *s, ...) 97 { 98 va_list argp; 99 va_start (argp, s); 100 show_msg ("fatal", s, argp); 101 va_end (argp); 102 xexit (1); 103 } 104 105 106 static void 107 mc_error (const char *s, ...) 108 { 109 va_list argp; 110 va_start (argp, s); 111 show_msg ("parser", s, argp); 112 va_end (argp); 113 } 114 115 void 116 yyerror (const char *s) 117 { 118 mc_error (s); 119 } 120 121 static unichar * 122 get_diff (unichar *end, unichar *start) 123 { 124 unichar *ret; 125 unichar save = *end; 126 127 *end = 0; 128 ret = unichar_dup (start); 129 *end = save; 130 return ret; 131 } 132 133 static rc_uint_type 134 parse_digit (unichar ch) 135 { 136 rc_uint_type base = 10, v = 0, c; 137 138 if (ch == '0') 139 { 140 base = 8; 141 switch (input_stream_pos[0]) 142 { 143 case 'x': case 'X': base = 16; input_stream_pos++; break; 144 case 'o': case 'O': base = 8; input_stream_pos++; break; 145 case 'b': case 'B': base = 2; input_stream_pos++; break; 146 } 147 } 148 else 149 v = (rc_uint_type) (ch - '0'); 150 151 while ((ch = input_stream_pos[0]) != 0) 152 { 153 if (ch >= 'A' && ch <= 'F') 154 c = (rc_uint_type) (ch - 'A') + 10; 155 else if (ch >= 'a' && ch <= 'f') 156 c = (rc_uint_type) (ch - 'a') + 10; 157 else if (ch >= '0' && ch <= '9') 158 c = (rc_uint_type) (ch - '0'); 159 else 160 break; 161 v *= base; 162 v += c; 163 ++input_stream_pos; 164 } 165 if (input_stream_pos[0] == 'U' || input_stream_pos[0] == 'u') 166 input_stream_pos++; 167 if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l') 168 input_stream_pos++; 169 if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l') 170 input_stream_pos++; 171 return v; 172 } 173 174 static mc_keyword *keyword_top = NULL; 175 176 const mc_keyword * 177 enum_facility (int e) 178 { 179 mc_keyword *h = keyword_top; 180 181 while (h != NULL) 182 { 183 while (h && strcmp (h->group_name, "facility") != 0) 184 h = h->next; 185 if (e == 0) 186 return h; 187 --e; 188 if (h) 189 h = h->next; 190 } 191 return h; 192 } 193 194 const mc_keyword * 195 enum_severity (int e) 196 { 197 mc_keyword *h = keyword_top; 198 199 while (h != NULL) 200 { 201 while (h && strcmp (h->group_name, "severity") != 0) 202 h = h->next; 203 if (e == 0) 204 return h; 205 --e; 206 if (h) 207 h = h->next; 208 } 209 return h; 210 } 211 212 static void 213 mc_add_keyword_ascii (const char *sz, int rid, const char *grp, rc_uint_type nv, const char *sv) 214 { 215 unichar *usz = NULL, *usv = NULL; 216 rc_uint_type usz_len; 217 218 unicode_from_codepage (&usz_len, &usz, sz, CP_ACP); 219 if (sv) 220 unicode_from_codepage (&usz_len, &usv, sv, CP_ACP); 221 mc_add_keyword (usz, rid, grp, nv, usv); 222 } 223 224 void 225 mc_add_keyword (unichar *usz, int rid, const char *grp, rc_uint_type nv, unichar *sv) 226 { 227 mc_keyword *p, *c, *n; 228 size_t len = unichar_len (usz); 229 230 c = keyword_top; 231 p = NULL; 232 while (c != NULL) 233 { 234 if (c->len > len) 235 break; 236 if (c->len == len) 237 { 238 int e = memcmp (usz, c->usz, len * sizeof (unichar)); 239 240 if (e < 0) 241 break; 242 if (! e) 243 { 244 if (! strcmp (grp, "keyword") || strcmp (c->group_name, grp) != 0) 245 fatal (_("Duplicate symbol entered into keyword list.")); 246 c->rid = rid; 247 c->nval = nv; 248 c->sval = (!sv ? NULL : unichar_dup (sv)); 249 if (! strcmp (grp, "language")) 250 { 251 const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv); 252 253 if (lag == NULL) 254 fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv); 255 memcpy (&c->lang_info, lag, sizeof (*lag)); 256 } 257 return; 258 } 259 } 260 c = (p = c)->next; 261 } 262 n = xmalloc (sizeof (mc_keyword)); 263 n->next = c; 264 n->len = len; 265 n->group_name = grp; 266 n->usz = usz; 267 n->rid = rid; 268 n->nval = nv; 269 n->sval = (!sv ? NULL : unichar_dup (sv)); 270 if (! strcmp (grp, "language")) 271 { 272 const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv); 273 if (lag == NULL) 274 fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv); 275 memcpy (&n->lang_info, lag, sizeof (*lag)); 276 } 277 if (! p) 278 keyword_top = n; 279 else 280 p->next = n; 281 } 282 283 static int 284 mc_token (const unichar *t, size_t len) 285 { 286 static int was_init = 0; 287 mc_keyword *k; 288 289 if (! was_init) 290 { 291 was_init = 1; 292 mc_add_keyword_ascii ("OutputBase", MCOUTPUTBASE, "keyword", 0, NULL); 293 mc_add_keyword_ascii ("MessageIdTypedef", MCMESSAGEIDTYPEDEF, "keyword", 0, NULL); 294 mc_add_keyword_ascii ("SeverityNames", MCSEVERITYNAMES, "keyword", 0, NULL); 295 mc_add_keyword_ascii ("FacilityNames", MCFACILITYNAMES, "keyword", 0, NULL); 296 mc_add_keyword_ascii ("LanguageNames", MCLANGUAGENAMES, "keyword", 0, NULL); 297 mc_add_keyword_ascii ("MessageId", MCMESSAGEID, "keyword", 0, NULL); 298 mc_add_keyword_ascii ("Severity", MCSEVERITY, "keyword", 0, NULL); 299 mc_add_keyword_ascii ("Facility", MCFACILITY, "keyword", 0, NULL); 300 mc_add_keyword_ascii ("SymbolicName", MCSYMBOLICNAME, "keyword", 0, NULL); 301 mc_add_keyword_ascii ("Language", MCLANGUAGE, "keyword", 0, NULL); 302 mc_add_keyword_ascii ("Success", MCTOKEN, "severity", 0, NULL); 303 mc_add_keyword_ascii ("Informational", MCTOKEN, "severity", 1, NULL); 304 mc_add_keyword_ascii ("Warning", MCTOKEN, "severity", 2, NULL); 305 mc_add_keyword_ascii ("Error", MCTOKEN, "severity", 3, NULL); 306 mc_add_keyword_ascii ("System", MCTOKEN, "facility", 0xff, NULL); 307 mc_add_keyword_ascii ("Application", MCTOKEN, "facility", 0xfff, NULL); 308 mc_add_keyword_ascii ("English", MCTOKEN, "language", 0x409, "MSG00001"); 309 } 310 k = keyword_top; 311 if (!len || !t || *t == 0) 312 return -1; 313 while (k != NULL) 314 { 315 if (k->len > len) 316 break; 317 if (k->len == len) 318 { 319 if (! memcmp (k->usz, t, len * sizeof (unichar))) 320 { 321 if (k->rid == MCTOKEN) 322 yylval.tok = k; 323 return k->rid; 324 } 325 } 326 k = k->next; 327 } 328 return -1; 329 } 330 331 /* Skip characters in input_stream_pos up to and including a newline 332 character. Returns non-zero if the newline was found, zero otherwise. */ 333 334 static int 335 skip_until_eol (void) 336 { 337 while (input_stream_pos[0] != 0 && input_stream_pos[0] != '\n') 338 ++input_stream_pos; 339 if (input_stream_pos[0] == 0) 340 return 0; 341 if (input_stream_pos[0] == '\n') 342 { 343 ++input_stream_pos; 344 input_line += 1; 345 } 346 return 1; 347 } 348 349 int 350 yylex (void) 351 { 352 unichar *start_token; 353 unichar ch; 354 355 if (! input_stream_pos) 356 { 357 fatal ("Input stream not setuped.\n"); 358 return -1; 359 } 360 361 if (mclex_want_line) 362 { 363 start_token = input_stream_pos; 364 if (input_stream_pos[0] == 0) 365 return -1; 366 /* PR 26082: Reject a period followed by EOF. */ 367 if (input_stream_pos[0] == '.' && input_stream_pos[1] == 0) 368 return -1; 369 if (input_stream_pos[0] == '.' 370 && (input_stream_pos[1] == '\n' 371 || (input_stream_pos[1] == '\r' && input_stream_pos[2] == '\n'))) 372 { 373 mclex_want_line = false; 374 return skip_until_eol () ? MCENDLINE : -1; 375 } 376 if (!skip_until_eol ()) 377 return -1; 378 yylval.ustr = get_diff (input_stream_pos, start_token); 379 return MCLINE; 380 } 381 382 while ((ch = input_stream_pos[0]) <= 0x20) 383 { 384 if (ch == 0) 385 return -1; 386 ++input_stream_pos; 387 if (ch == '\n') 388 input_line += 1; 389 if (mclex_want_nl && ch == '\n') 390 { 391 mclex_want_nl = false; 392 return NL; 393 } 394 } 395 start_token = input_stream_pos; 396 ++input_stream_pos; 397 if (mclex_want_filename) 398 { 399 mclex_want_filename = false; 400 if (ch == '"') 401 { 402 start_token++; 403 while ((ch = input_stream_pos[0]) != 0) 404 { 405 if (ch == '"') 406 break; 407 ++input_stream_pos; 408 } 409 yylval.ustr = get_diff (input_stream_pos, start_token); 410 if (ch == '"') 411 ++input_stream_pos; 412 } 413 else 414 { 415 while ((ch = input_stream_pos[0]) != 0) 416 { 417 if (ch <= 0x20 || ch == ')') 418 break; 419 ++input_stream_pos; 420 } 421 yylval.ustr = get_diff (input_stream_pos, start_token); 422 } 423 return MCFILENAME; 424 } 425 switch (ch) 426 { 427 case ';': 428 ++start_token; 429 if (!skip_until_eol ()) 430 return -1; 431 yylval.ustr = get_diff (input_stream_pos, start_token); 432 return MCCOMMENT; 433 case '=': 434 return '='; 435 case '(': 436 return '('; 437 case ')': 438 return ')'; 439 case '+': 440 return '+'; 441 case ':': 442 return ':'; 443 case '0': case '1': case '2': case '3': case '4': 444 case '5': case '6': case '7': case '8': case '9': 445 yylval.ival = parse_digit (ch); 446 return MCNUMBER; 447 default: 448 if (ch >= 0x40) 449 { 450 int ret; 451 while (input_stream_pos[0] >= 0x40 || (input_stream_pos[0] >= '0' && input_stream_pos[0] <= '9')) 452 ++input_stream_pos; 453 ret = mc_token (start_token, (size_t) (input_stream_pos - start_token)); 454 if (ret != -1) 455 return ret; 456 yylval.ustr = get_diff (input_stream_pos, start_token); 457 return MCIDENT; 458 } 459 mc_error ("illegal character 0x%x.", ch); 460 } 461 return -1; 462 } 463