148edc7cfSGordon Ross /*
248edc7cfSGordon Ross * This file and its contents are supplied under the terms of the
348edc7cfSGordon Ross * Common Development and Distribution License ("CDDL"), version 1.0.
448edc7cfSGordon Ross * You may only use this file in accordance with the terms of version
548edc7cfSGordon Ross * 1.0 of the CDDL.
648edc7cfSGordon Ross *
748edc7cfSGordon Ross * A full copy of the text of the CDDL should have accompanied this
848edc7cfSGordon Ross * source. A copy of the CDDL is also available via the Internet at
948edc7cfSGordon Ross * http://www.illumos.org/license/CDDL.
1048edc7cfSGordon Ross */
1148edc7cfSGordon Ross
1248edc7cfSGordon Ross /*
1348edc7cfSGordon Ross * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
1448edc7cfSGordon Ross */
1548edc7cfSGordon Ross
1648edc7cfSGordon Ross /*
1748edc7cfSGordon Ross * This file contains the "scanner", which tokenizes charmap files
1848edc7cfSGordon Ross * for iconv for processing by the higher level grammar processor.
1948edc7cfSGordon Ross */
2048edc7cfSGordon Ross
2148edc7cfSGordon Ross #include <stdio.h>
2248edc7cfSGordon Ross #include <stdlib.h>
2348edc7cfSGordon Ross #include <ctype.h>
2448edc7cfSGordon Ross #include <limits.h>
2548edc7cfSGordon Ross #include <string.h>
2648edc7cfSGordon Ross #include <widec.h>
2748edc7cfSGordon Ross #include <sys/types.h>
2848edc7cfSGordon Ross #include <assert.h>
2948edc7cfSGordon Ross #include "charmap.h"
3048edc7cfSGordon Ross #include "parser.tab.h"
3148edc7cfSGordon Ross
3248edc7cfSGordon Ross int com_char = '#';
3348edc7cfSGordon Ross int esc_char = '\\';
3448edc7cfSGordon Ross int mb_cur_min = 1;
3548edc7cfSGordon Ross int mb_cur_max = MB_LEN_MAX;
3648edc7cfSGordon Ross int lineno = 1;
3748edc7cfSGordon Ross int warnings = 0;
3848edc7cfSGordon Ross static int nextline;
3948edc7cfSGordon Ross static FILE *input = stdin;
4048edc7cfSGordon Ross static const char *filename = "<stdin>";
4148edc7cfSGordon Ross static int instring = 0;
4248edc7cfSGordon Ross static int escaped = 0;
4348edc7cfSGordon Ross
4448edc7cfSGordon Ross /*
4548edc7cfSGordon Ross * Token space ... grows on demand.
4648edc7cfSGordon Ross */
4748edc7cfSGordon Ross static char *token = NULL;
4848edc7cfSGordon Ross static int tokidx;
4948edc7cfSGordon Ross static int toksz = 0;
5048edc7cfSGordon Ross static int hadtok = 0;
5148edc7cfSGordon Ross
5248edc7cfSGordon Ross /*
5348edc7cfSGordon Ross * The last keyword seen. This is useful to trigger the special lexer rules
5448edc7cfSGordon Ross * for "copy" and also collating symbols and elements.
5548edc7cfSGordon Ross */
5648edc7cfSGordon Ross int last_kw = 0;
5748edc7cfSGordon Ross static int category = T_END;
5848edc7cfSGordon Ross
5948edc7cfSGordon Ross static struct token {
6048edc7cfSGordon Ross int id;
6148edc7cfSGordon Ross const char *name;
6248edc7cfSGordon Ross } keywords[] = {
6348edc7cfSGordon Ross { T_COM_CHAR, "comment_char" },
6448edc7cfSGordon Ross { T_ESC_CHAR, "escape_char" },
6548edc7cfSGordon Ross { T_END, "END" },
6648edc7cfSGordon Ross
6748edc7cfSGordon Ross /*
6848edc7cfSGordon Ross * These are keywords used in the charmap file. Note that
6948edc7cfSGordon Ross * Solaris orginally used angle brackets to wrap some of them,
7048edc7cfSGordon Ross * but we removed that to simplify our parser. The first of these
7148edc7cfSGordon Ross * items are "global items."
7248edc7cfSGordon Ross */
7348edc7cfSGordon Ross { T_CHARMAP, "CHARMAP" },
7448edc7cfSGordon Ross { T_WIDTH, "WIDTH" },
7548edc7cfSGordon Ross { T_WIDTH_DEFAULT, "WIDTH_DEFAULT" },
7648edc7cfSGordon Ross
7748edc7cfSGordon Ross { -1, NULL },
7848edc7cfSGordon Ross };
7948edc7cfSGordon Ross
8048edc7cfSGordon Ross /*
8148edc7cfSGordon Ross * These special words are only used in a charmap file, enclosed in <>.
8248edc7cfSGordon Ross */
8348edc7cfSGordon Ross static struct token symwords[] = {
8448edc7cfSGordon Ross { T_COM_CHAR, "comment_char" },
8548edc7cfSGordon Ross { T_ESC_CHAR, "escape_char" },
8648edc7cfSGordon Ross { T_CODE_SET, "code_set_name" },
8748edc7cfSGordon Ross { T_MB_CUR_MAX, "mb_cur_max" },
8848edc7cfSGordon Ross { T_MB_CUR_MIN, "mb_cur_min" },
8948edc7cfSGordon Ross { -1, NULL },
9048edc7cfSGordon Ross };
9148edc7cfSGordon Ross
9248edc7cfSGordon Ross static int categories[] = {
9348edc7cfSGordon Ross T_CHARMAP,
9448edc7cfSGordon Ross 0
9548edc7cfSGordon Ross };
9648edc7cfSGordon Ross
9748edc7cfSGordon Ross void
reset_scanner(const char * fname)9848edc7cfSGordon Ross reset_scanner(const char *fname)
9948edc7cfSGordon Ross {
10048edc7cfSGordon Ross if (fname == NULL) {
10148edc7cfSGordon Ross filename = "<stdin>";
10248edc7cfSGordon Ross input = stdin;
10348edc7cfSGordon Ross } else {
10448edc7cfSGordon Ross if (input != stdin)
10548edc7cfSGordon Ross (void) fclose(input);
10648edc7cfSGordon Ross if ((input = fopen(fname, "r")) == NULL) {
10748edc7cfSGordon Ross perror(fname);
10848edc7cfSGordon Ross exit(1);
10948edc7cfSGordon Ross }
11048edc7cfSGordon Ross filename = fname;
11148edc7cfSGordon Ross }
11248edc7cfSGordon Ross com_char = '#';
11348edc7cfSGordon Ross esc_char = '\\';
11448edc7cfSGordon Ross instring = 0;
11548edc7cfSGordon Ross escaped = 0;
11648edc7cfSGordon Ross lineno = 1;
11748edc7cfSGordon Ross nextline = 1;
11848edc7cfSGordon Ross tokidx = 0;
11948edc7cfSGordon Ross last_kw = 0;
12048edc7cfSGordon Ross category = T_END;
12148edc7cfSGordon Ross }
12248edc7cfSGordon Ross
12348edc7cfSGordon Ross #define hex(x) \
12448edc7cfSGordon Ross (isdigit(x) ? (x - '0') : ((islower(x) ? (x - 'a') : (x - 'A')) + 10))
12548edc7cfSGordon Ross #define isodigit(x) ((x >= '0') && (x <= '7'))
12648edc7cfSGordon Ross
12748edc7cfSGordon Ross static int
scanc(void)12848edc7cfSGordon Ross scanc(void)
12948edc7cfSGordon Ross {
13048edc7cfSGordon Ross int c;
13148edc7cfSGordon Ross
13248edc7cfSGordon Ross c = getc(input);
13348edc7cfSGordon Ross lineno = nextline;
13448edc7cfSGordon Ross if (c == '\n') {
13548edc7cfSGordon Ross nextline++;
13648edc7cfSGordon Ross }
13748edc7cfSGordon Ross return (c);
13848edc7cfSGordon Ross }
13948edc7cfSGordon Ross
14048edc7cfSGordon Ross static void
unscanc(int c)14148edc7cfSGordon Ross unscanc(int c)
14248edc7cfSGordon Ross {
14348edc7cfSGordon Ross if (c == '\n') {
14448edc7cfSGordon Ross nextline--;
14548edc7cfSGordon Ross }
14648edc7cfSGordon Ross if (ungetc(c, input) < 0) {
147*1a90c98dSToomas Soome (void) yyerror(_("ungetc failed"));
14848edc7cfSGordon Ross }
14948edc7cfSGordon Ross }
15048edc7cfSGordon Ross
15148edc7cfSGordon Ross static int
scan_hex_byte(void)15248edc7cfSGordon Ross scan_hex_byte(void)
15348edc7cfSGordon Ross {
15448edc7cfSGordon Ross int c1, c2;
15548edc7cfSGordon Ross int v;
15648edc7cfSGordon Ross
15748edc7cfSGordon Ross c1 = scanc();
15848edc7cfSGordon Ross if (!isxdigit(c1)) {
159*1a90c98dSToomas Soome (void) yyerror(_("malformed hex digit"));
16048edc7cfSGordon Ross return (0);
16148edc7cfSGordon Ross }
16248edc7cfSGordon Ross c2 = scanc();
16348edc7cfSGordon Ross if (!isxdigit(c2)) {
164*1a90c98dSToomas Soome (void) yyerror(_("malformed hex digit"));
16548edc7cfSGordon Ross return (0);
16648edc7cfSGordon Ross }
16748edc7cfSGordon Ross v = ((hex(c1) << 4) | hex(c2));
16848edc7cfSGordon Ross return (v);
16948edc7cfSGordon Ross }
17048edc7cfSGordon Ross
17148edc7cfSGordon Ross static int
scan_dec_byte(void)17248edc7cfSGordon Ross scan_dec_byte(void)
17348edc7cfSGordon Ross {
17448edc7cfSGordon Ross int c1, c2, c3;
17548edc7cfSGordon Ross int b;
17648edc7cfSGordon Ross
17748edc7cfSGordon Ross c1 = scanc();
17848edc7cfSGordon Ross if (!isdigit(c1)) {
179*1a90c98dSToomas Soome (void) yyerror(_("malformed decimal digit"));
18048edc7cfSGordon Ross return (0);
18148edc7cfSGordon Ross }
18248edc7cfSGordon Ross b = c1 - '0';
18348edc7cfSGordon Ross c2 = scanc();
18448edc7cfSGordon Ross if (!isdigit(c2)) {
185*1a90c98dSToomas Soome (void) yyerror(_("malformed decimal digit"));
18648edc7cfSGordon Ross return (0);
18748edc7cfSGordon Ross }
18848edc7cfSGordon Ross b *= 10;
18948edc7cfSGordon Ross b += (c2 - '0');
19048edc7cfSGordon Ross c3 = scanc();
19148edc7cfSGordon Ross if (!isdigit(c3)) {
19248edc7cfSGordon Ross unscanc(c3);
19348edc7cfSGordon Ross } else {
19448edc7cfSGordon Ross b *= 10;
19548edc7cfSGordon Ross b += (c3 - '0');
19648edc7cfSGordon Ross }
19748edc7cfSGordon Ross return (b);
19848edc7cfSGordon Ross }
19948edc7cfSGordon Ross
20048edc7cfSGordon Ross static int
scan_oct_byte(void)20148edc7cfSGordon Ross scan_oct_byte(void)
20248edc7cfSGordon Ross {
20348edc7cfSGordon Ross int c1, c2, c3;
20448edc7cfSGordon Ross int b;
20548edc7cfSGordon Ross
20648edc7cfSGordon Ross b = 0;
20748edc7cfSGordon Ross
20848edc7cfSGordon Ross c1 = scanc();
20948edc7cfSGordon Ross if (!isodigit(c1)) {
210*1a90c98dSToomas Soome (void) yyerror(_("malformed octal digit"));
21148edc7cfSGordon Ross return (0);
21248edc7cfSGordon Ross }
21348edc7cfSGordon Ross b = c1 - '0';
21448edc7cfSGordon Ross c2 = scanc();
21548edc7cfSGordon Ross if (!isodigit(c2)) {
216*1a90c98dSToomas Soome (void) yyerror(_("malformed octal digit"));
21748edc7cfSGordon Ross return (0);
21848edc7cfSGordon Ross }
21948edc7cfSGordon Ross b *= 8;
22048edc7cfSGordon Ross b += (c2 - '0');
22148edc7cfSGordon Ross c3 = scanc();
22248edc7cfSGordon Ross if (!isodigit(c3)) {
22348edc7cfSGordon Ross unscanc(c3);
22448edc7cfSGordon Ross } else {
22548edc7cfSGordon Ross b *= 8;
22648edc7cfSGordon Ross b += (c3 - '0');
22748edc7cfSGordon Ross }
22848edc7cfSGordon Ross return (b);
22948edc7cfSGordon Ross }
23048edc7cfSGordon Ross
23148edc7cfSGordon Ross void
add_tok(int c)23248edc7cfSGordon Ross add_tok(int c)
23348edc7cfSGordon Ross {
23448edc7cfSGordon Ross if ((tokidx + 1) >= toksz) {
23548edc7cfSGordon Ross toksz += 64;
23648edc7cfSGordon Ross if ((token = realloc(token, toksz)) == NULL) {
237*1a90c98dSToomas Soome (void) yyerror(_("out of memory"));
23848edc7cfSGordon Ross tokidx = 0;
23948edc7cfSGordon Ross toksz = 0;
24048edc7cfSGordon Ross return;
24148edc7cfSGordon Ross }
24248edc7cfSGordon Ross }
24348edc7cfSGordon Ross
24448edc7cfSGordon Ross token[tokidx++] = (char)c;
24548edc7cfSGordon Ross token[tokidx] = 0;
24648edc7cfSGordon Ross }
24748edc7cfSGordon Ross
24848edc7cfSGordon Ross static int
get_byte(void)24948edc7cfSGordon Ross get_byte(void)
25048edc7cfSGordon Ross {
25148edc7cfSGordon Ross int c;
25248edc7cfSGordon Ross
25348edc7cfSGordon Ross if ((c = scanc()) != esc_char) {
25448edc7cfSGordon Ross unscanc(c);
25548edc7cfSGordon Ross return (EOF);
25648edc7cfSGordon Ross }
25748edc7cfSGordon Ross c = scanc();
25848edc7cfSGordon Ross
25948edc7cfSGordon Ross switch (c) {
26048edc7cfSGordon Ross case 'd':
26148edc7cfSGordon Ross case 'D':
26248edc7cfSGordon Ross return (scan_dec_byte());
26348edc7cfSGordon Ross case 'x':
26448edc7cfSGordon Ross case 'X':
26548edc7cfSGordon Ross return (scan_hex_byte());
26648edc7cfSGordon Ross case '0':
26748edc7cfSGordon Ross case '1':
26848edc7cfSGordon Ross case '2':
26948edc7cfSGordon Ross case '3':
27048edc7cfSGordon Ross case '4':
27148edc7cfSGordon Ross case '5':
27248edc7cfSGordon Ross case '6':
27348edc7cfSGordon Ross case '7':
27448edc7cfSGordon Ross /* put the character back so we can get it */
27548edc7cfSGordon Ross unscanc(c);
27648edc7cfSGordon Ross return (scan_oct_byte());
27748edc7cfSGordon Ross default:
27848edc7cfSGordon Ross unscanc(c);
27948edc7cfSGordon Ross unscanc(esc_char);
28048edc7cfSGordon Ross return (EOF);
28148edc7cfSGordon Ross }
28248edc7cfSGordon Ross }
28348edc7cfSGordon Ross
28448edc7cfSGordon Ross int
get_escaped(int c)28548edc7cfSGordon Ross get_escaped(int c)
28648edc7cfSGordon Ross {
28748edc7cfSGordon Ross switch (c) {
28848edc7cfSGordon Ross case 'n':
28948edc7cfSGordon Ross return ('\n');
29048edc7cfSGordon Ross case 'r':
29148edc7cfSGordon Ross return ('\r');
29248edc7cfSGordon Ross case 't':
29348edc7cfSGordon Ross return ('\t');
29448edc7cfSGordon Ross case 'f':
29548edc7cfSGordon Ross return ('\f');
29648edc7cfSGordon Ross case 'v':
29748edc7cfSGordon Ross return ('\v');
29848edc7cfSGordon Ross case 'b':
29948edc7cfSGordon Ross return ('\b');
30048edc7cfSGordon Ross case 'a':
30148edc7cfSGordon Ross return ('\a');
30248edc7cfSGordon Ross default:
30348edc7cfSGordon Ross return (c);
30448edc7cfSGordon Ross }
30548edc7cfSGordon Ross }
30648edc7cfSGordon Ross
30748edc7cfSGordon Ross int
get_wide(void)30848edc7cfSGordon Ross get_wide(void)
30948edc7cfSGordon Ross {
31048edc7cfSGordon Ross /* NB: yylval.mbs[0] is the length */
31148edc7cfSGordon Ross char *mbs = &yylval.mbs[1];
31248edc7cfSGordon Ross int mbi = 0;
31348edc7cfSGordon Ross int c;
31448edc7cfSGordon Ross
31548edc7cfSGordon Ross mbs[mbi] = 0;
31648edc7cfSGordon Ross if (mb_cur_max > MB_LEN_MAX) {
317*1a90c98dSToomas Soome (void) yyerror(_("max multibyte character size too big"));
31848edc7cfSGordon Ross return (T_NULL);
31948edc7cfSGordon Ross }
32048edc7cfSGordon Ross for (;;) {
32148edc7cfSGordon Ross if ((c = get_byte()) == EOF)
32248edc7cfSGordon Ross break;
32348edc7cfSGordon Ross if (mbi == mb_cur_max) {
32448edc7cfSGordon Ross unscanc(c);
325*1a90c98dSToomas Soome (void) yyerror(_("length > mb_cur_max"));
32648edc7cfSGordon Ross return (T_NULL);
32748edc7cfSGordon Ross }
32848edc7cfSGordon Ross mbs[mbi++] = c;
32948edc7cfSGordon Ross mbs[mbi] = 0;
33048edc7cfSGordon Ross }
33148edc7cfSGordon Ross
33248edc7cfSGordon Ross /* result in yylval.mbs */
33348edc7cfSGordon Ross mbs[-1] = mbi;
33448edc7cfSGordon Ross return (T_CHAR);
33548edc7cfSGordon Ross }
33648edc7cfSGordon Ross
33748edc7cfSGordon Ross int
get_symbol(void)33848edc7cfSGordon Ross get_symbol(void)
33948edc7cfSGordon Ross {
34048edc7cfSGordon Ross int c;
34148edc7cfSGordon Ross
34248edc7cfSGordon Ross while ((c = scanc()) != EOF) {
34348edc7cfSGordon Ross if (escaped) {
34448edc7cfSGordon Ross escaped = 0;
34548edc7cfSGordon Ross if (c == '\n')
34648edc7cfSGordon Ross continue;
34748edc7cfSGordon Ross add_tok(get_escaped(c));
34848edc7cfSGordon Ross continue;
34948edc7cfSGordon Ross }
35048edc7cfSGordon Ross if (c == esc_char) {
35148edc7cfSGordon Ross escaped = 1;
35248edc7cfSGordon Ross continue;
35348edc7cfSGordon Ross }
35448edc7cfSGordon Ross if (c == '\n') { /* well that's strange! */
355*1a90c98dSToomas Soome (void) yyerror(_("unterminated symbolic name"));
35648edc7cfSGordon Ross continue;
35748edc7cfSGordon Ross }
35848edc7cfSGordon Ross if (c == '>') { /* end of symbol */
35948edc7cfSGordon Ross
36048edc7cfSGordon Ross /*
36148edc7cfSGordon Ross * This restarts the token from the beginning
36248edc7cfSGordon Ross * the next time we scan a character. (This
36348edc7cfSGordon Ross * token is complete.)
36448edc7cfSGordon Ross */
36548edc7cfSGordon Ross
36648edc7cfSGordon Ross if (token == NULL) {
367*1a90c98dSToomas Soome (void) yyerror(_("missing symbolic name"));
36848edc7cfSGordon Ross return (T_NULL);
36948edc7cfSGordon Ross }
37048edc7cfSGordon Ross tokidx = 0;
37148edc7cfSGordon Ross
37248edc7cfSGordon Ross /*
37348edc7cfSGordon Ross * A few symbols are handled as keywords outside
37448edc7cfSGordon Ross * of the normal categories.
37548edc7cfSGordon Ross */
37648edc7cfSGordon Ross if (category == T_END) {
37748edc7cfSGordon Ross int i;
37848edc7cfSGordon Ross for (i = 0; symwords[i].name != 0; i++) {
37948edc7cfSGordon Ross if (strcmp(token, symwords[i].name) ==
38048edc7cfSGordon Ross 0) {
38148edc7cfSGordon Ross last_kw = symwords[i].id;
38248edc7cfSGordon Ross return (last_kw);
38348edc7cfSGordon Ross }
38448edc7cfSGordon Ross }
38548edc7cfSGordon Ross }
38648edc7cfSGordon Ross /* its an undefined symbol */
38748edc7cfSGordon Ross yylval.token = strdup(token);
38848edc7cfSGordon Ross if (yylval.token == NULL) {
38948edc7cfSGordon Ross perror("malloc");
39048edc7cfSGordon Ross exit(1);
39148edc7cfSGordon Ross }
39248edc7cfSGordon Ross token = NULL;
39348edc7cfSGordon Ross toksz = 0;
39448edc7cfSGordon Ross tokidx = 0;
39548edc7cfSGordon Ross return (T_SYMBOL);
39648edc7cfSGordon Ross }
39748edc7cfSGordon Ross add_tok(c);
39848edc7cfSGordon Ross }
39948edc7cfSGordon Ross
400*1a90c98dSToomas Soome (void) yyerror(_("unterminated symbolic name"));
40148edc7cfSGordon Ross return (EOF);
40248edc7cfSGordon Ross }
40348edc7cfSGordon Ross
40448edc7cfSGordon Ross
40548edc7cfSGordon Ross static int
consume_token(void)40648edc7cfSGordon Ross consume_token(void)
40748edc7cfSGordon Ross {
40848edc7cfSGordon Ross int len = tokidx;
40948edc7cfSGordon Ross int i;
41048edc7cfSGordon Ross
41148edc7cfSGordon Ross tokidx = 0;
41248edc7cfSGordon Ross if (token == NULL)
41348edc7cfSGordon Ross return (T_NULL);
41448edc7cfSGordon Ross
41548edc7cfSGordon Ross /*
41648edc7cfSGordon Ross * this one is special, because we don't want it to alter the
41748edc7cfSGordon Ross * last_kw field.
41848edc7cfSGordon Ross */
41948edc7cfSGordon Ross if (strcmp(token, "...") == 0) {
42048edc7cfSGordon Ross return (T_ELLIPSIS);
42148edc7cfSGordon Ross }
42248edc7cfSGordon Ross
42348edc7cfSGordon Ross /* search for reserved words first */
42448edc7cfSGordon Ross for (i = 0; keywords[i].name; i++) {
42548edc7cfSGordon Ross int j;
42648edc7cfSGordon Ross if (strcmp(keywords[i].name, token) != 0) {
42748edc7cfSGordon Ross continue;
42848edc7cfSGordon Ross }
42948edc7cfSGordon Ross
43048edc7cfSGordon Ross last_kw = keywords[i].id;
43148edc7cfSGordon Ross
43248edc7cfSGordon Ross /* clear the top level category if we're done with it */
43348edc7cfSGordon Ross if (last_kw == T_END) {
43448edc7cfSGordon Ross category = T_END;
43548edc7cfSGordon Ross }
43648edc7cfSGordon Ross
43748edc7cfSGordon Ross /* set the top level category if we're changing */
43848edc7cfSGordon Ross for (j = 0; categories[j]; j++) {
43948edc7cfSGordon Ross if (categories[j] != last_kw)
44048edc7cfSGordon Ross continue;
44148edc7cfSGordon Ross category = last_kw;
44248edc7cfSGordon Ross }
44348edc7cfSGordon Ross
44448edc7cfSGordon Ross return (keywords[i].id);
44548edc7cfSGordon Ross }
44648edc7cfSGordon Ross
44748edc7cfSGordon Ross /* maybe its a numeric constant? */
44848edc7cfSGordon Ross if (isdigit(*token) || (*token == '-' && isdigit(token[1]))) {
44948edc7cfSGordon Ross char *eptr;
45048edc7cfSGordon Ross yylval.num = strtol(token, &eptr, 10);
45148edc7cfSGordon Ross if (*eptr != 0)
452*1a90c98dSToomas Soome (void) yyerror(_("malformed number"));
45348edc7cfSGordon Ross return (T_NUMBER);
45448edc7cfSGordon Ross }
45548edc7cfSGordon Ross
45648edc7cfSGordon Ross /*
45748edc7cfSGordon Ross * A single lone character is treated as a character literal.
45848edc7cfSGordon Ross * To avoid duplication of effort, we stick in the charmap.
45948edc7cfSGordon Ross */
46048edc7cfSGordon Ross if (len == 1) {
46148edc7cfSGordon Ross yylval.mbs[0] = 1; /* length */
46248edc7cfSGordon Ross yylval.mbs[1] = token[0];
46348edc7cfSGordon Ross yylval.mbs[2] = '\0';
46448edc7cfSGordon Ross return (T_CHAR);
46548edc7cfSGordon Ross }
46648edc7cfSGordon Ross
46748edc7cfSGordon Ross /* anything else is treated as a symbolic name */
46848edc7cfSGordon Ross yylval.token = strdup(token);
46948edc7cfSGordon Ross token = NULL;
47048edc7cfSGordon Ross toksz = 0;
47148edc7cfSGordon Ross tokidx = 0;
47248edc7cfSGordon Ross return (T_NAME);
47348edc7cfSGordon Ross }
47448edc7cfSGordon Ross
47548edc7cfSGordon Ross void
scan_to_eol(void)47648edc7cfSGordon Ross scan_to_eol(void)
47748edc7cfSGordon Ross {
47848edc7cfSGordon Ross int c;
47948edc7cfSGordon Ross while ((c = scanc()) != '\n') {
48048edc7cfSGordon Ross if (c == EOF) {
48148edc7cfSGordon Ross /* end of file without newline! */
48248edc7cfSGordon Ross errf(_("missing newline"));
48348edc7cfSGordon Ross return;
48448edc7cfSGordon Ross }
48548edc7cfSGordon Ross }
48648edc7cfSGordon Ross assert(c == '\n');
48748edc7cfSGordon Ross }
48848edc7cfSGordon Ross
48948edc7cfSGordon Ross int
yylex(void)49048edc7cfSGordon Ross yylex(void)
49148edc7cfSGordon Ross {
49248edc7cfSGordon Ross int c;
49348edc7cfSGordon Ross
49448edc7cfSGordon Ross while ((c = scanc()) != EOF) {
49548edc7cfSGordon Ross
49648edc7cfSGordon Ross /* special handling for quoted string */
49748edc7cfSGordon Ross if (instring) {
49848edc7cfSGordon Ross if (escaped) {
49948edc7cfSGordon Ross escaped = 0;
50048edc7cfSGordon Ross
50148edc7cfSGordon Ross /* if newline, just eat and forget it */
50248edc7cfSGordon Ross if (c == '\n')
50348edc7cfSGordon Ross continue;
50448edc7cfSGordon Ross
50548edc7cfSGordon Ross if (strchr("xXd01234567", c)) {
50648edc7cfSGordon Ross unscanc(c);
50748edc7cfSGordon Ross unscanc(esc_char);
50848edc7cfSGordon Ross return (get_wide());
50948edc7cfSGordon Ross }
51048edc7cfSGordon Ross yylval.mbs[0] = 1; /* length */
51148edc7cfSGordon Ross yylval.mbs[1] = get_escaped(c);
51248edc7cfSGordon Ross yylval.mbs[2] = '\0';
51348edc7cfSGordon Ross return (T_CHAR);
51448edc7cfSGordon Ross }
51548edc7cfSGordon Ross if (c == esc_char) {
51648edc7cfSGordon Ross escaped = 1;
51748edc7cfSGordon Ross continue;
51848edc7cfSGordon Ross }
51948edc7cfSGordon Ross switch (c) {
52048edc7cfSGordon Ross case '<':
52148edc7cfSGordon Ross return (get_symbol());
52248edc7cfSGordon Ross case '>':
52348edc7cfSGordon Ross /* oops! should generate syntax error */
52448edc7cfSGordon Ross return (T_GT);
52548edc7cfSGordon Ross case '"':
52648edc7cfSGordon Ross instring = 0;
52748edc7cfSGordon Ross return (T_QUOTE);
52848edc7cfSGordon Ross default:
52948edc7cfSGordon Ross yylval.mbs[0] = 1; /* length */
53048edc7cfSGordon Ross yylval.mbs[1] = c;
53148edc7cfSGordon Ross yylval.mbs[2] = '\0';
53248edc7cfSGordon Ross return (T_CHAR);
53348edc7cfSGordon Ross }
53448edc7cfSGordon Ross }
53548edc7cfSGordon Ross
53648edc7cfSGordon Ross /* escaped characters first */
53748edc7cfSGordon Ross if (escaped) {
53848edc7cfSGordon Ross escaped = 0;
53948edc7cfSGordon Ross if (c == '\n') {
54048edc7cfSGordon Ross /* eat the newline */
54148edc7cfSGordon Ross continue;
54248edc7cfSGordon Ross }
54348edc7cfSGordon Ross hadtok = 1;
54448edc7cfSGordon Ross if (tokidx) {
54548edc7cfSGordon Ross /* an escape mid-token is nonsense */
54648edc7cfSGordon Ross return (T_NULL);
54748edc7cfSGordon Ross }
54848edc7cfSGordon Ross
54948edc7cfSGordon Ross /* numeric escapes are treated as wide characters */
55048edc7cfSGordon Ross if (strchr("xXd01234567", c)) {
55148edc7cfSGordon Ross unscanc(c);
55248edc7cfSGordon Ross unscanc(esc_char);
55348edc7cfSGordon Ross return (get_wide());
55448edc7cfSGordon Ross }
55548edc7cfSGordon Ross
55648edc7cfSGordon Ross add_tok(get_escaped(c));
55748edc7cfSGordon Ross continue;
55848edc7cfSGordon Ross }
55948edc7cfSGordon Ross
56048edc7cfSGordon Ross /* if it is the escape charter itself note it */
56148edc7cfSGordon Ross if (c == esc_char) {
56248edc7cfSGordon Ross escaped = 1;
56348edc7cfSGordon Ross continue;
56448edc7cfSGordon Ross }
56548edc7cfSGordon Ross
56648edc7cfSGordon Ross /* remove from the comment char to end of line */
56748edc7cfSGordon Ross if (c == com_char) {
56848edc7cfSGordon Ross while (c != '\n') {
56948edc7cfSGordon Ross if ((c = scanc()) == EOF) {
57048edc7cfSGordon Ross /* end of file without newline! */
57148edc7cfSGordon Ross return (EOF);
57248edc7cfSGordon Ross }
57348edc7cfSGordon Ross }
57448edc7cfSGordon Ross assert(c == '\n');
57548edc7cfSGordon Ross if (!hadtok) {
57648edc7cfSGordon Ross /*
57748edc7cfSGordon Ross * If there were no tokens on this line,
57848edc7cfSGordon Ross * then just pretend it didn't exist at all.
57948edc7cfSGordon Ross */
58048edc7cfSGordon Ross continue;
58148edc7cfSGordon Ross }
58248edc7cfSGordon Ross hadtok = 0;
58348edc7cfSGordon Ross return (T_NL);
58448edc7cfSGordon Ross }
58548edc7cfSGordon Ross
58648edc7cfSGordon Ross if (strchr(" \t\n;()<>,\"", c) && (tokidx != 0)) {
58748edc7cfSGordon Ross /*
58848edc7cfSGordon Ross * These are all token delimiters. If there
58948edc7cfSGordon Ross * is a token already in progress, we need to
59048edc7cfSGordon Ross * process it.
59148edc7cfSGordon Ross */
59248edc7cfSGordon Ross unscanc(c);
59348edc7cfSGordon Ross return (consume_token());
59448edc7cfSGordon Ross }
59548edc7cfSGordon Ross
59648edc7cfSGordon Ross switch (c) {
59748edc7cfSGordon Ross case '\n':
59848edc7cfSGordon Ross if (!hadtok) {
59948edc7cfSGordon Ross /*
60048edc7cfSGordon Ross * If the line was completely devoid of tokens,
60148edc7cfSGordon Ross * then just ignore it.
60248edc7cfSGordon Ross */
60348edc7cfSGordon Ross continue;
60448edc7cfSGordon Ross }
60548edc7cfSGordon Ross /* we're starting a new line, reset the token state */
60648edc7cfSGordon Ross hadtok = 0;
60748edc7cfSGordon Ross return (T_NL);
60848edc7cfSGordon Ross case ',':
60948edc7cfSGordon Ross hadtok = 1;
61048edc7cfSGordon Ross return (T_COMMA);
61148edc7cfSGordon Ross case ';':
61248edc7cfSGordon Ross hadtok = 1;
61348edc7cfSGordon Ross return (T_SEMI);
61448edc7cfSGordon Ross case '(':
61548edc7cfSGordon Ross hadtok = 1;
61648edc7cfSGordon Ross return (T_LPAREN);
61748edc7cfSGordon Ross case ')':
61848edc7cfSGordon Ross hadtok = 1;
61948edc7cfSGordon Ross return (T_RPAREN);
62048edc7cfSGordon Ross case '>':
62148edc7cfSGordon Ross hadtok = 1;
62248edc7cfSGordon Ross return (T_GT);
62348edc7cfSGordon Ross case '<':
62448edc7cfSGordon Ross /* symbol start! */
62548edc7cfSGordon Ross hadtok = 1;
62648edc7cfSGordon Ross return (get_symbol());
62748edc7cfSGordon Ross case ' ':
62848edc7cfSGordon Ross case '\t':
62948edc7cfSGordon Ross /* whitespace, just ignore it */
63048edc7cfSGordon Ross continue;
63148edc7cfSGordon Ross case '"':
63248edc7cfSGordon Ross hadtok = 1;
63348edc7cfSGordon Ross instring = 1;
63448edc7cfSGordon Ross return (T_QUOTE);
63548edc7cfSGordon Ross default:
63648edc7cfSGordon Ross hadtok = 1;
63748edc7cfSGordon Ross add_tok(c);
63848edc7cfSGordon Ross continue;
63948edc7cfSGordon Ross }
64048edc7cfSGordon Ross }
64148edc7cfSGordon Ross return (EOF);
64248edc7cfSGordon Ross }
64348edc7cfSGordon Ross
644*1a90c98dSToomas Soome int
yyerror(const char * msg)64548edc7cfSGordon Ross yyerror(const char *msg)
64648edc7cfSGordon Ross {
64748edc7cfSGordon Ross (void) fprintf(stderr, _("%s: %d: error: %s\n"),
64848edc7cfSGordon Ross filename, lineno, msg);
64948edc7cfSGordon Ross exit(1);
65048edc7cfSGordon Ross }
65148edc7cfSGordon Ross
65248edc7cfSGordon Ross void
errf(const char * fmt,...)65348edc7cfSGordon Ross errf(const char *fmt, ...)
65448edc7cfSGordon Ross {
65548edc7cfSGordon Ross char *msg;
65648edc7cfSGordon Ross
65748edc7cfSGordon Ross va_list va;
65848edc7cfSGordon Ross va_start(va, fmt);
65948edc7cfSGordon Ross (void) vasprintf(&msg, fmt, va);
66048edc7cfSGordon Ross va_end(va);
66148edc7cfSGordon Ross
66248edc7cfSGordon Ross (void) fprintf(stderr, _("%s: %d: error: %s\n"),
66348edc7cfSGordon Ross filename, lineno, msg);
66448edc7cfSGordon Ross free(msg);
66548edc7cfSGordon Ross exit(1);
66648edc7cfSGordon Ross }
66748edc7cfSGordon Ross
66848edc7cfSGordon Ross void
warn(const char * fmt,...)66948edc7cfSGordon Ross warn(const char *fmt, ...)
67048edc7cfSGordon Ross {
67148edc7cfSGordon Ross char *msg;
67248edc7cfSGordon Ross
67348edc7cfSGordon Ross va_list va;
67448edc7cfSGordon Ross va_start(va, fmt);
67548edc7cfSGordon Ross (void) vasprintf(&msg, fmt, va);
67648edc7cfSGordon Ross va_end(va);
67748edc7cfSGordon Ross
67848edc7cfSGordon Ross (void) fprintf(stderr, _("%s: %d: warning: %s\n"),
67948edc7cfSGordon Ross filename, lineno, msg);
68048edc7cfSGordon Ross free(msg);
68148edc7cfSGordon Ross warnings++;
68248edc7cfSGordon Ross }
683