xref: /csrg-svn/bin/sh/mksyntax.c (revision 60698)
147132Sbostic /*-
2*60698Sbostic  * Copyright (c) 1991, 1993
3*60698Sbostic  *	The Regents of the University of California.  All rights reserved.
447132Sbostic  *
547132Sbostic  * This code is derived from software contributed to Berkeley by
647132Sbostic  * Kenneth Almquist.
747132Sbostic  *
847132Sbostic  * %sccs.include.redist.c%
947132Sbostic  */
1047132Sbostic 
1147132Sbostic #ifndef lint
12*60698Sbostic static char copyright[] =
13*60698Sbostic "@(#) Copyright (c) 1991, 1993\n\
14*60698Sbostic 	The Regents of the University of California.  All rights reserved.\n";
1547132Sbostic #endif /* not lint */
1647132Sbostic 
1747132Sbostic #ifndef lint
18*60698Sbostic static char sccsid[] = "@(#)mksyntax.c	8.1 (Berkeley) 05/31/93";
1947132Sbostic #endif /* not lint */
2047132Sbostic 
2147132Sbostic /*
2247132Sbostic  * This program creates syntax.h and syntax.c.
2347132Sbostic  */
2447132Sbostic 
2547132Sbostic #include <stdio.h>
2647132Sbostic #include "parser.h"
2747132Sbostic 
2847132Sbostic 
2947132Sbostic struct synclass {
3047132Sbostic 	char *name;
3147132Sbostic 	char *comment;
3247132Sbostic };
3347132Sbostic 
3447132Sbostic /* Syntax classes */
3547132Sbostic struct synclass synclass[] = {
3647132Sbostic 	"CWORD",		"character is nothing special",
3747132Sbostic 	"CNL",		"newline character",
3847132Sbostic 	"CBACK",		"a backslash character",
3947132Sbostic 	"CSQUOTE",	"single quote",
4047132Sbostic 	"CDQUOTE",	"double quote",
4147132Sbostic 	"CENDQUOTE",	"a terminating quote",
4247132Sbostic 	"CBQUOTE",	"backwards single quote",
4347132Sbostic 	"CVAR",		"a dollar sign",
4447132Sbostic 	"CENDVAR",	"a '}' character",
4553301Smarc 	"CLP",		"a left paren in arithmetic",
4653301Smarc 	"CRP",		"a right paren in arithmetic",
4747132Sbostic 	"CEOF",		"end of file",
4847132Sbostic 	"CCTL",		"like CWORD, except it must be escaped",
4947132Sbostic 	"CSPCL",		"these terminate a word",
5047132Sbostic 	NULL, NULL
5147132Sbostic };
5247132Sbostic 
5347132Sbostic 
5447132Sbostic /*
5547132Sbostic  * Syntax classes for is_ functions.  Warning:  if you add new classes
5647132Sbostic  * you may have to change the definition of the is_in_name macro.
5747132Sbostic  */
5847132Sbostic struct synclass is_entry[] = {
5947132Sbostic 	"ISDIGIT",	"a digit",
6047132Sbostic 	"ISUPPER",	"an upper case letter",
6147132Sbostic 	"ISLOWER",	"a lower case letter",
6247132Sbostic 	"ISUNDER",	"an underscore",
6347132Sbostic 	"ISSPECL",	"the name of a special parameter",
6447132Sbostic 	NULL, NULL,
6547132Sbostic };
6647132Sbostic 
6747132Sbostic char writer[] = "\
6847132Sbostic /*\n\
6947132Sbostic  * This file was generated by the mksyntax program.\n\
7047132Sbostic  */\n\
7147132Sbostic \n";
7247132Sbostic 
7347132Sbostic 
7447132Sbostic FILE *cfile;
7547132Sbostic FILE *hfile;
7647132Sbostic char *syntax[513];
7747132Sbostic int base;
7847132Sbostic int size;		/* number of values which a char variable can have */
7947132Sbostic int nbits;		/* number of bits in a character */
8047132Sbostic int digit_contig;	/* true if digits are contiguous */
8147132Sbostic 
8247132Sbostic 
8347132Sbostic main() {
8447132Sbostic 	char c;
8547132Sbostic 	char d;
8647132Sbostic 	int sign;
8747132Sbostic 	int i;
8847132Sbostic 	char buf[80];
8947132Sbostic 	int pos;
9047132Sbostic 	static char digit[] = "0123456789";
9147132Sbostic 
9247132Sbostic 	/* Create output files */
9347132Sbostic 	if ((cfile = fopen("syntax.c", "w")) == NULL) {
9447132Sbostic 		perror("syntax.c");
9547132Sbostic 		exit(2);
9647132Sbostic 	}
9747132Sbostic 	if ((hfile = fopen("syntax.h", "w")) == NULL) {
9847132Sbostic 		perror("syntax.h");
9947132Sbostic 		exit(2);
10047132Sbostic 	}
10147132Sbostic 	fputs(writer, hfile);
10247132Sbostic 	fputs(writer, cfile);
10347132Sbostic 
10447132Sbostic 	/* Determine the characteristics of chars. */
10547132Sbostic 	c = -1;
10647132Sbostic 	if (c < 0)
10747132Sbostic 		sign = 1;
10847132Sbostic 	else
10947132Sbostic 		sign = 0;
11047132Sbostic 	for (nbits = 1 ; ; nbits++) {
11147132Sbostic 		d = (1 << nbits) - 1;
11247132Sbostic 		if (d == c)
11347132Sbostic 			break;
11447132Sbostic 	}
11547132Sbostic 	printf("%s %d bit chars\n", sign? "signed" : "unsigned", nbits);
11647132Sbostic 	if (nbits > 9) {
11747132Sbostic 		fputs("Characters can't have more than 9 bits\n", stderr);
11847132Sbostic 		exit(2);
11947132Sbostic 	}
12047132Sbostic 	size = (1 << nbits) + 1;
12147132Sbostic 	base = 1;
12247132Sbostic 	if (sign)
12347132Sbostic 		base += 1 << (nbits - 1);
12447132Sbostic 	digit_contig = 1;
12547132Sbostic 	for (i = 0 ; i < 10 ; i++) {
12647132Sbostic 		if (digit[i] != '0' + i)
12747132Sbostic 			digit_contig = 0;
12847132Sbostic 	}
12947132Sbostic 
13047166Sbostic 	fputs("#include <sys/cdefs.h>\n", hfile);
13147166Sbostic 
13247132Sbostic 	/* Generate the #define statements in the header file */
13347132Sbostic 	fputs("/* Syntax classes */\n", hfile);
13447132Sbostic 	for (i = 0 ; synclass[i].name ; i++) {
13547132Sbostic 		sprintf(buf, "#define %s %d", synclass[i].name, i);
13647132Sbostic 		fputs(buf, hfile);
13747132Sbostic 		for (pos = strlen(buf) ; pos < 32 ; pos = pos + 8 &~ 07)
13847132Sbostic 			putc('\t', hfile);
13947132Sbostic 		fprintf(hfile, "/* %s */\n", synclass[i].comment);
14047132Sbostic 	}
14147132Sbostic 	putc('\n', hfile);
14247132Sbostic 	fputs("/* Syntax classes for is_ functions */\n", hfile);
14347132Sbostic 	for (i = 0 ; is_entry[i].name ; i++) {
14447132Sbostic 		sprintf(buf, "#define %s %#o", is_entry[i].name, 1 << i);
14547132Sbostic 		fputs(buf, hfile);
14647132Sbostic 		for (pos = strlen(buf) ; pos < 32 ; pos = pos + 8 &~ 07)
14747132Sbostic 			putc('\t', hfile);
14847132Sbostic 		fprintf(hfile, "/* %s */\n", is_entry[i].comment);
14947132Sbostic 	}
15047132Sbostic 	putc('\n', hfile);
15147132Sbostic 	fprintf(hfile, "#define SYNBASE %d\n", base);
15247132Sbostic 	fprintf(hfile, "#define PEOF %d\n\n", -base);
15347132Sbostic 	putc('\n', hfile);
15447132Sbostic 	fputs("#define BASESYNTAX (basesyntax + SYNBASE)\n", hfile);
15547132Sbostic 	fputs("#define DQSYNTAX (dqsyntax + SYNBASE)\n", hfile);
15647132Sbostic 	fputs("#define SQSYNTAX (sqsyntax + SYNBASE)\n", hfile);
15753301Smarc 	fputs("#define ARISYNTAX (arisyntax + SYNBASE)\n", hfile);
15847132Sbostic 	putc('\n', hfile);
15947132Sbostic 	output_type_macros();		/* is_digit, etc. */
16047132Sbostic 	putc('\n', hfile);
16147132Sbostic 
16247132Sbostic 	/* Generate the syntax tables. */
16347132Sbostic 	fputs("#include \"shell.h\"\n", cfile);
16447132Sbostic 	fputs("#include \"syntax.h\"\n\n", cfile);
16547132Sbostic 	init();
16647132Sbostic 	fputs("/* syntax table used when not in quotes */\n", cfile);
16747132Sbostic 	add("\n", "CNL");
16847132Sbostic 	add("\\", "CBACK");
16947132Sbostic 	add("'", "CSQUOTE");
17047132Sbostic 	add("\"", "CDQUOTE");
17147132Sbostic 	add("`", "CBQUOTE");
17247132Sbostic 	add("$", "CVAR");
17347132Sbostic 	add("}", "CENDVAR");
17447132Sbostic 	add("<>();&| \t", "CSPCL");
17547132Sbostic 	print("basesyntax");
17647132Sbostic 	init();
17747132Sbostic 	fputs("\n/* syntax table used when in double quotes */\n", cfile);
17847132Sbostic 	add("\n", "CNL");
17947132Sbostic 	add("\\", "CBACK");
18047132Sbostic 	add("\"", "CENDQUOTE");
18147132Sbostic 	add("`", "CBQUOTE");
18247132Sbostic 	add("$", "CVAR");
18347132Sbostic 	add("}", "CENDVAR");
18453301Smarc 	add("!*?[=~:/", "CCTL");	/* ':/' for tilde - yuck */
18547132Sbostic 	print("dqsyntax");
18647132Sbostic 	init();
18747132Sbostic 	fputs("\n/* syntax table used when in single quotes */\n", cfile);
18847132Sbostic 	add("\n", "CNL");
18947132Sbostic 	add("'", "CENDQUOTE");
19053301Smarc 	add("!*?[=~:/", "CCTL");	/* ':/' for tilde - yuck */
19147132Sbostic 	print("sqsyntax");
19253301Smarc 	init();
19353301Smarc 	fputs("\n/* syntax table used when in arithmetic */\n", cfile);
19453301Smarc 	add("\n", "CNL");
19553301Smarc 	add("\\", "CBACK");
19653301Smarc 	add("`", "CBQUOTE");
19753301Smarc 	add("'", "CSQUOTE");
19853301Smarc 	add("\"", "CDQUOTE");
19953301Smarc 	add("$", "CVAR");
20053301Smarc 	add("}", "CENDVAR");
20153301Smarc 	add("(", "CLP");
20253301Smarc 	add(")", "CRP");
20353301Smarc 	print("arisyntax");
20447132Sbostic 	filltable("0");
20547132Sbostic 	fputs("\n/* character classification table */\n", cfile);
20647132Sbostic 	add("0123456789", "ISDIGIT");
20747132Sbostic 	add("abcdefghijklmnopqrstucvwxyz", "ISLOWER");
20847132Sbostic 	add("ABCDEFGHIJKLMNOPQRSTUCVWXYZ", "ISUPPER");
20947132Sbostic 	add("_", "ISUNDER");
21047132Sbostic 	add("#?$!-*@", "ISSPECL");
21147132Sbostic 	print("is_type");
21247132Sbostic 	if (! digit_contig)
21347132Sbostic 		digit_convert();
21447132Sbostic 	exit(0);
21547132Sbostic }
21647132Sbostic 
21747132Sbostic 
21847132Sbostic 
21947132Sbostic /*
22047132Sbostic  * Clear the syntax table.
22147132Sbostic  */
22247132Sbostic 
22347132Sbostic filltable(dftval)
22447132Sbostic 	char *dftval;
22547132Sbostic 	{
22647132Sbostic 	int i;
22747132Sbostic 
22847132Sbostic 	for (i = 0 ; i < size ; i++)
22947132Sbostic 		syntax[i] = dftval;
23047132Sbostic }
23147132Sbostic 
23247132Sbostic 
23347132Sbostic /*
23447132Sbostic  * Initialize the syntax table with default values.
23547132Sbostic  */
23647132Sbostic 
23747132Sbostic init() {
23847132Sbostic 	filltable("CWORD");
23947132Sbostic 	syntax[0] = "CEOF";
24047132Sbostic 	syntax[base + CTLESC] = "CCTL";
24147132Sbostic 	syntax[base + CTLVAR] = "CCTL";
24247132Sbostic 	syntax[base + CTLENDVAR] = "CCTL";
24347132Sbostic 	syntax[base + CTLBACKQ] = "CCTL";
24447132Sbostic 	syntax[base + CTLBACKQ + CTLQUOTE] = "CCTL";
24553301Smarc 	syntax[base + CTLARI] = "CCTL";
24653301Smarc 	syntax[base + CTLENDARI] = "CCTL";
24747132Sbostic }
24847132Sbostic 
24947132Sbostic 
25047132Sbostic /*
25147132Sbostic  * Add entries to the syntax table.
25247132Sbostic  */
25347132Sbostic 
25447132Sbostic add(p, type)
25547132Sbostic 	char *p, *type;
25647132Sbostic 	{
25747132Sbostic 	while (*p)
25847132Sbostic 		syntax[*p++ + base] = type;
25947132Sbostic }
26047132Sbostic 
26147132Sbostic 
26247132Sbostic 
26347132Sbostic /*
26447132Sbostic  * Output the syntax table.
26547132Sbostic  */
26647132Sbostic 
26747132Sbostic print(name)
26847132Sbostic 	char *name;
26947132Sbostic 	{
27047132Sbostic 	int i;
27147132Sbostic 	int col;
27247132Sbostic 
27347132Sbostic 	fprintf(hfile, "extern const char %s[];\n", name);
27447132Sbostic 	fprintf(cfile, "const char %s[%d] = {\n", name, size);
27547132Sbostic 	col = 0;
27647132Sbostic 	for (i = 0 ; i < size ; i++) {
27747132Sbostic 		if (i == 0) {
27847132Sbostic 			fputs("      ", cfile);
27947132Sbostic 		} else if ((i & 03) == 0) {
28047132Sbostic 			fputs(",\n      ", cfile);
28147132Sbostic 			col = 0;
28247132Sbostic 		} else {
28347132Sbostic 			putc(',', cfile);
28447132Sbostic 			while (++col < 9 * (i & 03))
28547132Sbostic 				putc(' ', cfile);
28647132Sbostic 		}
28747132Sbostic 		fputs(syntax[i], cfile);
28847132Sbostic 		col += strlen(syntax[i]);
28947132Sbostic 	}
29047132Sbostic 	fputs("\n};\n", cfile);
29147132Sbostic }
29247132Sbostic 
29347132Sbostic 
29447132Sbostic 
29547132Sbostic /*
29647132Sbostic  * Output character classification macros (e.g. is_digit).  If digits are
29747132Sbostic  * contiguous, we can test for them quickly.
29847132Sbostic  */
29947132Sbostic 
30047132Sbostic char *macro[] = {
30147132Sbostic 	"#define is_digit(c)\t((is_type+SYNBASE)[c] & ISDIGIT)",
30247132Sbostic 	"#define is_alpha(c)\t((is_type+SYNBASE)[c] & (ISUPPER|ISLOWER))",
30347132Sbostic 	"#define is_name(c)\t((is_type+SYNBASE)[c] & (ISUPPER|ISLOWER|ISUNDER))",
30447132Sbostic 	"#define is_in_name(c)\t((is_type+SYNBASE)[c] & (ISUPPER|ISLOWER|ISUNDER|ISDIGIT))",
30547132Sbostic 	"#define is_special(c)\t((is_type+SYNBASE)[c] & (ISSPECL|ISDIGIT))",
30647132Sbostic 	NULL
30747132Sbostic };
30847132Sbostic 
30947132Sbostic output_type_macros() {
31047132Sbostic 	char **pp;
31147132Sbostic 
31247132Sbostic 	if (digit_contig)
31347132Sbostic 		macro[0] = "#define is_digit(c)\t((unsigned)((c) - '0') <= 9)";
31447132Sbostic 	for (pp = macro ; *pp ; pp++)
31547132Sbostic 		fprintf(hfile, "%s\n", *pp);
31647132Sbostic 	if (digit_contig)
31747132Sbostic 		fputs("#define digit_val(c)\t((c) - '0')\n", hfile);
31847132Sbostic 	else
31947132Sbostic 		fputs("#define digit_val(c)\t(digit_value[c])\n", hfile);
32047132Sbostic }
32147132Sbostic 
32247132Sbostic 
32347132Sbostic 
32447132Sbostic /*
32547132Sbostic  * Output digit conversion table (if digits are not contiguous).
32647132Sbostic  */
32747132Sbostic 
32847132Sbostic digit_convert() {
32947132Sbostic 	int maxdigit;
33047132Sbostic 	static char digit[] = "0123456789";
33147132Sbostic 	char *p;
33247132Sbostic 	int i;
33347132Sbostic 
33447132Sbostic 	maxdigit = 0;
33547132Sbostic 	for (p = digit ; *p ; p++)
33647132Sbostic 		if (*p > maxdigit)
33747132Sbostic 			maxdigit = *p;
33847132Sbostic 	fputs("extern const char digit_value[];\n", hfile);
33947132Sbostic 	fputs("\n\nconst char digit_value[] = {\n", cfile);
34047132Sbostic 	for (i = 0 ; i <= maxdigit ; i++) {
34147132Sbostic 		for (p = digit ; *p && *p != i ; p++);
34247132Sbostic 		if (*p == '\0')
34347132Sbostic 			p = digit;
34447132Sbostic 		fprintf(cfile, "      %d,\n", p - digit);
34547132Sbostic 	}
34647132Sbostic 	fputs("};\n", cfile);
34747132Sbostic }
348