xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/x2p/a2py.c (revision 0:68f95e015346)
1 /* $RCSfile: a2py.c,v $$Revision: 4.1 $$Date: 92/08/07 18:29:14 $
2  *
3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4  *    2000, 2001, 2002, by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  *
9  * $Log:	a2py.c,v $
10  */
11 
12 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
13 #if defined(WIN32)
14 #include <io.h>
15 #endif
16 #if defined(NETWARE)
17 #include "../netware/clibstuf.h"
18 #endif
19 #include "../patchlevel.h"
20 #endif
21 #include "util.h"
22 
23 char *filename;
24 char *myname;
25 
26 int checkers = 0;
27 
28 int oper0(int type);
29 int oper1(int type, int arg1);
30 int oper2(int type, int arg1, int arg2);
31 int oper3(int type, int arg1, int arg2, int arg3);
32 int oper4(int type, int arg1, int arg2, int arg3, int arg4);
33 int oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5);
34 STR *walk(int useval, int level, register int node, int *numericptr, int minprec);
35 #ifdef NETWARE
36 char *savestr(char *str);
37 char *cpy2(register char *to, register char *from, register int delim);
38 #endif
39 
40 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
41 static void usage(void);
42 
43 static void
usage()44 usage()
45 {
46     printf("\nThis is the AWK to PERL translator, revision %d.0, version %d\n", PERL_REVISION, PERL_VERSION);
47     printf("\nUsage: %s [-D<number>] [-F<char>] [-n<fieldlist>] [-<number>] filename\n", myname);
48     printf("\n  -D<number>      sets debugging flags."
49            "\n  -F<character>   the awk script to translate is always invoked with"
50            "\n                  this -F switch."
51            "\n  -n<fieldlist>   specifies the names of the input fields if input does"
52            "\n                  not have to be split into an array."
53            "\n  -<number>       causes a2p to assume that input will always have that"
54            "\n                  many fields.\n");
55     exit(1);
56 }
57 #endif
58 
59 int
main(register int argc,register char ** argv,register char ** env)60 main(register int argc, register char **argv, register char **env)
61 {
62     register STR *str;
63     int i;
64     STR *tmpstr;
65     /* char *namelist;    */
66 
67 	#ifdef NETWARE
68 		fnInitGpfGlobals();	/* For importing the CLIB calls in place of Watcom calls */
69 	#endif	/* NETWARE */
70 
71     myname = argv[0];
72     linestr = str_new(80);
73     str = str_new(0);		/* first used for -I flags */
74     for (argc--,argv++; argc; argc--,argv++) {
75 	if (argv[0][0] != '-' || !argv[0][1])
76 	    break;
77 	switch (argv[0][1]) {
78 #ifdef DEBUGGING
79 	case 'D':
80 	    debug = atoi(argv[0]+2);
81 #if YYDEBUG
82 	    yydebug = (debug & 1);
83 #endif
84 	    break;
85 #endif
86 	case '0': case '1': case '2': case '3': case '4':
87 	case '5': case '6': case '7': case '8': case '9':
88 	    maxfld = atoi(argv[0]+1);
89 	    absmaxfld = TRUE;
90 	    break;
91 	case 'F':
92 	    fswitch = argv[0][2];
93 	    break;
94 	case 'n':
95 	    namelist = savestr(argv[0]+2);
96 	    break;
97 	case 'o':
98 	    old_awk = TRUE;
99 	    break;
100 	case '-':
101 	    argc--,argv++;
102 	    goto switch_end;
103 	case 0:
104 	    break;
105 	default:
106 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
107 	    fprintf(stderr, "Unrecognized switch: %s\n",argv[0]);
108             usage();
109 #else
110 	    fatal("Unrecognized switch: %s\n",argv[0]);
111 #endif
112 	}
113     }
114   switch_end:
115 
116     /* open script */
117 
118     if (argv[0] == Nullch) {
119 #if defined(OS2) || defined(WIN32) || defined(NETWARE)
120 	if ( isatty(fileno(stdin)) )
121 	    usage();
122 #endif
123         argv[0] = "-";
124     }
125     filename = savestr(argv[0]);
126 
127     filename = savestr(argv[0]);
128     if (strEQ(filename,"-"))
129 	argv[0] = "";
130     if (!*argv[0])
131 	rsfp = stdin;
132     else
133 	rsfp = fopen(argv[0],"r");
134     if (rsfp == Nullfp)
135 	fatal("Awk script \"%s\" doesn't seem to exist.\n",filename);
136 
137     /* init tokener */
138 
139     bufptr = str_get(linestr);
140     symtab = hnew();
141     curarghash = hnew();
142 
143     /* now parse the report spec */
144 
145     if (yyparse())
146 	fatal("Translation aborted due to syntax errors.\n");
147 
148 #ifdef DEBUGGING
149     if (debug & 2) {
150 	int type, len;
151 
152 	for (i=1; i<mop;) {
153 	    type = ops[i].ival;
154 	    len = type >> 8;
155 	    type &= 255;
156 	    printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]);
157 	    if (type == OSTRING)
158 		printf("\t\"%s\"\n",ops[i].cval),i++;
159 	    else {
160 		while (len--) {
161 		    printf("\t%d",ops[i].ival),i++;
162 		}
163 		putchar('\n');
164 	    }
165 	}
166     }
167     if (debug & 8)
168 	dump(root);
169 #endif
170 
171     /* first pass to look for numeric variables */
172 
173     prewalk(0,0,root,&i);
174 
175     /* second pass to produce new program */
176 
177     tmpstr = walk(0,0,root,&i,P_MIN);
178     str = str_make(STARTPERL);
179     str_cat(str, "\neval 'exec ");
180     str_cat(str, BIN);
181     str_cat(str, "/perl -S $0 ${1+\"$@\"}'\n\
182     if $running_under_some_shell;\n\
183 			# this emulates #! processing on NIH machines.\n\
184 			# (remove #! line above if indigestible)\n\n");
185     str_cat(str,
186       "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift;\n");
187     str_cat(str,
188       "			# process any FOO=bar switches\n\n");
189     if (do_opens && opens) {
190 	str_scat(str,opens);
191 	str_free(opens);
192 	str_cat(str,"\n");
193     }
194     str_scat(str,tmpstr);
195     str_free(tmpstr);
196 #ifdef DEBUGGING
197     if (!(debug & 16))
198 #endif
199     fixup(str);
200     putlines(str);
201     if (checkers) {
202 	fprintf(stderr,
203 	  "Please check my work on the %d line%s I've marked with \"#???\".\n",
204 		checkers, checkers == 1 ? "" : "s" );
205 	fprintf(stderr,
206 	  "The operation I've selected may be wrong for the operand types.\n");
207     }
208     exit(0);
209     /* by ANSI specs return is needed. This also shuts up VC++ and his warnings */
210     return(0);
211 }
212 
213 #define RETURN(retval) return (bufptr = s,retval)
214 #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval)
215 #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval)
216 #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,idtype)
217 
218 int idtype;
219 
220 int
yylex(void)221 yylex(void)
222 {
223     register char *s = bufptr;
224     register char *d;
225     register int tmp;
226 
227   retry:
228 #if YYDEBUG
229     if (yydebug) {
230 	if (strchr(s,'\n'))
231 	    fprintf(stderr,"Tokener at %s",s);
232 	else
233 	    fprintf(stderr,"Tokener at %s\n",s);
234     }
235 #endif
236     switch (*s) {
237     default:
238 	fprintf(stderr,
239 	    "Unrecognized character %c in file %s line %d--ignoring.\n",
240 	     *s++,filename,line);
241 	goto retry;
242     case '\\':
243 	s++;
244 	if (*s && *s != '\n') {
245 	    yyerror("Ignoring spurious backslash");
246 	    goto retry;
247 	}
248 	/*FALLSTHROUGH*/
249     case 0:
250 	s = str_get(linestr);
251 	*s = '\0';
252 	if (!rsfp)
253 	    RETURN(0);
254 	line++;
255 	if ((s = str_gets(linestr, rsfp)) == Nullch) {
256 	    if (rsfp != stdin)
257 		fclose(rsfp);
258 	    rsfp = Nullfp;
259 	    s = str_get(linestr);
260 	    RETURN(0);
261 	}
262 	goto retry;
263     case ' ': case '\t':
264 	s++;
265 	goto retry;
266     case '\n':
267 	*s = '\0';
268 	XTERM(NEWLINE);
269     case '#':
270 	yylval = string(s,0);
271 	*s = '\0';
272 	XTERM(COMMENT);
273     case ';':
274 	tmp = *s++;
275 	if (*s == '\n') {
276 	    s++;
277 	    XTERM(SEMINEW);
278 	}
279 	XTERM(tmp);
280     case '(':
281 	tmp = *s++;
282 	XTERM(tmp);
283     case '{':
284     case '[':
285     case ')':
286     case ']':
287     case '?':
288     case ':':
289 	tmp = *s++;
290 	XOP(tmp);
291 #ifdef EBCDIC
292     case 7:
293 #else
294     case 127:
295 #endif
296 	s++;
297 	XTERM('}');
298     case '}':
299 	for (d = s + 1; isSPACE(*d); d++) ;
300 	if (!*d)
301 	    s = d - 1;
302 	*s = 127;
303 	XTERM(';');
304     case ',':
305 	tmp = *s++;
306 	XTERM(tmp);
307     case '~':
308 	s++;
309 	yylval = string("~",1);
310 	XTERM(MATCHOP);
311     case '+':
312     case '-':
313 	if (s[1] == *s) {
314 	    s++;
315 	    if (*s++ == '+')
316 		XTERM(INCR);
317 	    else
318 		XTERM(DECR);
319 	}
320 	/* FALL THROUGH */
321     case '*':
322     case '%':
323     case '^':
324 	tmp = *s++;
325 	if (*s == '=') {
326 	    if (tmp == '^')
327 		yylval = string("**=",3);
328 	    else
329 		yylval = string(s-1,2);
330 	    s++;
331 	    XTERM(ASGNOP);
332 	}
333 	XTERM(tmp);
334     case '&':
335 	s++;
336 	tmp = *s++;
337 	if (tmp == '&')
338 	    XTERM(ANDAND);
339 	s--;
340 	XTERM('&');
341     case '|':
342 	s++;
343 	tmp = *s++;
344 	if (tmp == '|')
345 	    XTERM(OROR);
346 	s--;
347 	while (*s == ' ' || *s == '\t')
348 	    s++;
349 	if (strnEQ(s,"getline",7))
350 	    XTERM('p');
351 	else
352 	    XTERM('|');
353     case '=':
354 	s++;
355 	tmp = *s++;
356 	if (tmp == '=') {
357 	    yylval = string("==",2);
358 	    XTERM(RELOP);
359 	}
360 	s--;
361 	yylval = string("=",1);
362 	XTERM(ASGNOP);
363     case '!':
364 	s++;
365 	tmp = *s++;
366 	if (tmp == '=') {
367 	    yylval = string("!=",2);
368 	    XTERM(RELOP);
369 	}
370 	if (tmp == '~') {
371 	    yylval = string("!~",2);
372 	    XTERM(MATCHOP);
373 	}
374 	s--;
375 	XTERM(NOT);
376     case '<':
377 	s++;
378 	tmp = *s++;
379 	if (tmp == '=') {
380 	    yylval = string("<=",2);
381 	    XTERM(RELOP);
382 	}
383 	s--;
384 	XTERM('<');
385     case '>':
386 	s++;
387 	tmp = *s++;
388 	if (tmp == '>') {
389 	    yylval = string(">>",2);
390 	    XTERM(GRGR);
391 	}
392 	if (tmp == '=') {
393 	    yylval = string(">=",2);
394 	    XTERM(RELOP);
395 	}
396 	s--;
397 	XTERM('>');
398 
399 #define SNARFWORD \
400 	d = tokenbuf; \
401 	while (isALPHA(*s) || isDIGIT(*s) || *s == '_') \
402 	    *d++ = *s++; \
403 	*d = '\0'; \
404 	d = tokenbuf; \
405 	if (*s == '(') \
406 	    idtype = USERFUN; \
407 	else \
408 	    idtype = VAR;
409 
410     case '$':
411 	s++;
412 	if (*s == '0') {
413 	    s++;
414 	    do_chop = TRUE;
415 	    need_entire = TRUE;
416 	    idtype = VAR;
417 	    ID("0");
418 	}
419 	do_split = TRUE;
420 	if (isDIGIT(*s)) {
421 	    for (d = s; isDIGIT(*s); s++) ;
422 	    yylval = string(d,s-d);
423 	    tmp = atoi(d);
424 	    if (tmp > maxfld)
425 		maxfld = tmp;
426 	    XOP(FIELD);
427 	}
428 	split_to_array = set_array_base = TRUE;
429 	XOP(VFIELD);
430 
431     case '/':			/* may either be division or pattern */
432 	if (expectterm) {
433 	    s = scanpat(s);
434 	    XTERM(REGEX);
435 	}
436 	tmp = *s++;
437 	if (*s == '=') {
438 	    yylval = string("/=",2);
439 	    s++;
440 	    XTERM(ASGNOP);
441 	}
442 	XTERM(tmp);
443 
444     case '0': case '1': case '2': case '3': case '4':
445     case '5': case '6': case '7': case '8': case '9': case '.':
446 	s = scannum(s);
447 	XOP(NUMBER);
448     case '"':
449 	s++;
450 	s = cpy2(tokenbuf,s,s[-1]);
451 	if (!*s)
452 	    fatal("String not terminated:\n%s",str_get(linestr));
453 	s++;
454 	yylval = string(tokenbuf,0);
455 	XOP(STRING);
456 
457     case 'a': case 'A':
458 	SNARFWORD;
459 	if (strEQ(d,"ARGC"))
460 	    set_array_base = TRUE;
461 	if (strEQ(d,"ARGV")) {
462 	    yylval=numary(string("ARGV",0));
463 	    XOP(VAR);
464 	}
465 	if (strEQ(d,"atan2")) {
466 	    yylval = OATAN2;
467 	    XTERM(FUNN);
468 	}
469 	ID(d);
470     case 'b': case 'B':
471 	SNARFWORD;
472 	if (strEQ(d,"break"))
473 	    XTERM(BREAK);
474 	if (strEQ(d,"BEGIN"))
475 	    XTERM(BEGIN);
476 	ID(d);
477     case 'c': case 'C':
478 	SNARFWORD;
479 	if (strEQ(d,"continue"))
480 	    XTERM(CONTINUE);
481 	if (strEQ(d,"cos")) {
482 	    yylval = OCOS;
483 	    XTERM(FUN1);
484 	}
485 	if (strEQ(d,"close")) {
486 	    do_fancy_opens = 1;
487 	    yylval = OCLOSE;
488 	    XTERM(FUN1);
489 	}
490 	if (strEQ(d,"chdir"))
491 	    *d = toUPPER(*d);
492 	else if (strEQ(d,"crypt"))
493 	    *d = toUPPER(*d);
494 	else if (strEQ(d,"chop"))
495 	    *d = toUPPER(*d);
496 	else if (strEQ(d,"chmod"))
497 	    *d = toUPPER(*d);
498 	else if (strEQ(d,"chown"))
499 	    *d = toUPPER(*d);
500 	ID(d);
501     case 'd': case 'D':
502 	SNARFWORD;
503 	if (strEQ(d,"do"))
504 	    XTERM(DO);
505 	if (strEQ(d,"delete"))
506 	    XTERM(DELETE);
507 	if (strEQ(d,"die"))
508 	    *d = toUPPER(*d);
509 	ID(d);
510     case 'e': case 'E':
511 	SNARFWORD;
512 	if (strEQ(d,"END"))
513 	    XTERM(END);
514 	if (strEQ(d,"else"))
515 	    XTERM(ELSE);
516 	if (strEQ(d,"exit")) {
517 	    saw_line_op = TRUE;
518 	    XTERM(EXIT);
519 	}
520 	if (strEQ(d,"exp")) {
521 	    yylval = OEXP;
522 	    XTERM(FUN1);
523 	}
524 	if (strEQ(d,"elsif"))
525 	    *d = toUPPER(*d);
526 	else if (strEQ(d,"eq"))
527 	    *d = toUPPER(*d);
528 	else if (strEQ(d,"eval"))
529 	    *d = toUPPER(*d);
530 	else if (strEQ(d,"eof"))
531 	    *d = toUPPER(*d);
532 	else if (strEQ(d,"each"))
533 	    *d = toUPPER(*d);
534 	else if (strEQ(d,"exec"))
535 	    *d = toUPPER(*d);
536 	ID(d);
537     case 'f': case 'F':
538 	SNARFWORD;
539 	if (strEQ(d,"FS")) {
540 	    saw_FS++;
541 	    if (saw_FS == 1 && in_begin) {
542 		for (d = s; *d && isSPACE(*d); d++) ;
543 		if (*d == '=') {
544 		    for (d++; *d && isSPACE(*d); d++) ;
545 		    if (*d == '"' && d[2] == '"')
546 			const_FS = d[1];
547 		}
548 	    }
549 	    ID(tokenbuf);
550 	}
551 	if (strEQ(d,"for"))
552 	    XTERM(FOR);
553 	else if (strEQ(d,"function"))
554 	    XTERM(FUNCTION);
555 	if (strEQ(d,"FILENAME"))
556 	    d = "ARGV";
557 	if (strEQ(d,"foreach"))
558 	    *d = toUPPER(*d);
559 	else if (strEQ(d,"format"))
560 	    *d = toUPPER(*d);
561 	else if (strEQ(d,"fork"))
562 	    *d = toUPPER(*d);
563 	else if (strEQ(d,"fh"))
564 	    *d = toUPPER(*d);
565 	ID(d);
566     case 'g': case 'G':
567 	SNARFWORD;
568 	if (strEQ(d,"getline"))
569 	    XTERM(GETLINE);
570 	if (strEQ(d,"gsub"))
571 	    XTERM(GSUB);
572 	if (strEQ(d,"ge"))
573 	    *d = toUPPER(*d);
574 	else if (strEQ(d,"gt"))
575 	    *d = toUPPER(*d);
576 	else if (strEQ(d,"goto"))
577 	    *d = toUPPER(*d);
578 	else if (strEQ(d,"gmtime"))
579 	    *d = toUPPER(*d);
580 	ID(d);
581     case 'h': case 'H':
582 	SNARFWORD;
583 	if (strEQ(d,"hex"))
584 	    *d = toUPPER(*d);
585 	ID(d);
586     case 'i': case 'I':
587 	SNARFWORD;
588 	if (strEQ(d,"if"))
589 	    XTERM(IF);
590 	if (strEQ(d,"in"))
591 	    XTERM(IN);
592 	if (strEQ(d,"index")) {
593 	    set_array_base = TRUE;
594 	    XTERM(INDEX);
595 	}
596 	if (strEQ(d,"int")) {
597 	    yylval = OINT;
598 	    XTERM(FUN1);
599 	}
600 	ID(d);
601     case 'j': case 'J':
602 	SNARFWORD;
603 	if (strEQ(d,"join"))
604 	    *d = toUPPER(*d);
605 	ID(d);
606     case 'k': case 'K':
607 	SNARFWORD;
608 	if (strEQ(d,"keys"))
609 	    *d = toUPPER(*d);
610 	else if (strEQ(d,"kill"))
611 	    *d = toUPPER(*d);
612 	ID(d);
613     case 'l': case 'L':
614 	SNARFWORD;
615 	if (strEQ(d,"length")) {
616 	    yylval = OLENGTH;
617 	    XTERM(FUN1);
618 	}
619 	if (strEQ(d,"log")) {
620 	    yylval = OLOG;
621 	    XTERM(FUN1);
622 	}
623 	if (strEQ(d,"last"))
624 	    *d = toUPPER(*d);
625 	else if (strEQ(d,"local"))
626 	    *d = toUPPER(*d);
627 	else if (strEQ(d,"lt"))
628 	    *d = toUPPER(*d);
629 	else if (strEQ(d,"le"))
630 	    *d = toUPPER(*d);
631 	else if (strEQ(d,"locatime"))
632 	    *d = toUPPER(*d);
633 	else if (strEQ(d,"link"))
634 	    *d = toUPPER(*d);
635 	ID(d);
636     case 'm': case 'M':
637 	SNARFWORD;
638 	if (strEQ(d,"match")) {
639 	    set_array_base = TRUE;
640 	    XTERM(MATCH);
641 	}
642 	if (strEQ(d,"m"))
643 	    *d = toUPPER(*d);
644 	ID(d);
645     case 'n': case 'N':
646 	SNARFWORD;
647 	if (strEQ(d,"NF"))
648 	    do_chop = do_split = split_to_array = set_array_base = TRUE;
649 	if (strEQ(d,"next")) {
650 	    saw_line_op = TRUE;
651 	    XTERM(NEXT);
652 	}
653 	if (strEQ(d,"ne"))
654 	    *d = toUPPER(*d);
655 	ID(d);
656     case 'o': case 'O':
657 	SNARFWORD;
658 	if (strEQ(d,"ORS")) {
659 	    saw_ORS = TRUE;
660 	    d = "\\";
661 	}
662 	if (strEQ(d,"OFS")) {
663 	    saw_OFS = TRUE;
664 	    d = ",";
665 	}
666 	if (strEQ(d,"OFMT")) {
667 	    d = "#";
668 	}
669 	if (strEQ(d,"open"))
670 	    *d = toUPPER(*d);
671 	else if (strEQ(d,"ord"))
672 	    *d = toUPPER(*d);
673 	else if (strEQ(d,"oct"))
674 	    *d = toUPPER(*d);
675 	ID(d);
676     case 'p': case 'P':
677 	SNARFWORD;
678 	if (strEQ(d,"print")) {
679 	    XTERM(PRINT);
680 	}
681 	if (strEQ(d,"printf")) {
682 	    XTERM(PRINTF);
683 	}
684 	if (strEQ(d,"push"))
685 	    *d = toUPPER(*d);
686 	else if (strEQ(d,"pop"))
687 	    *d = toUPPER(*d);
688 	ID(d);
689     case 'q': case 'Q':
690 	SNARFWORD;
691 	ID(d);
692     case 'r': case 'R':
693 	SNARFWORD;
694 	if (strEQ(d,"RS")) {
695 	    d = "/";
696 	    saw_RS = TRUE;
697 	}
698 	if (strEQ(d,"rand")) {
699 	    yylval = ORAND;
700 	    XTERM(FUN1);
701 	}
702 	if (strEQ(d,"return"))
703 	    XTERM(RET);
704 	if (strEQ(d,"reset"))
705 	    *d = toUPPER(*d);
706 	else if (strEQ(d,"redo"))
707 	    *d = toUPPER(*d);
708 	else if (strEQ(d,"rename"))
709 	    *d = toUPPER(*d);
710 	ID(d);
711     case 's': case 'S':
712 	SNARFWORD;
713 	if (strEQ(d,"split")) {
714 	    set_array_base = TRUE;
715 	    XOP(SPLIT);
716 	}
717 	if (strEQ(d,"substr")) {
718 	    set_array_base = TRUE;
719 	    XTERM(SUBSTR);
720 	}
721 	if (strEQ(d,"sub"))
722 	    XTERM(SUB);
723 	if (strEQ(d,"sprintf")) {
724             /* In old awk, { print sprintf("str%sg"),"in" } prints
725              * "string"; in new awk, "in" is not considered an argument to
726              * sprintf, so the statement breaks.  To support both, the
727              * grammar treats arguments to SPRINTF_OLD like old awk,
728              * SPRINTF_NEW like new.  Here we return the appropriate one.
729              */
730 	    XTERM(old_awk ? SPRINTF_OLD : SPRINTF_NEW);
731         }
732 	if (strEQ(d,"sqrt")) {
733 	    yylval = OSQRT;
734 	    XTERM(FUN1);
735 	}
736 	if (strEQ(d,"SUBSEP")) {
737 	    d = ";";
738 	}
739 	if (strEQ(d,"sin")) {
740 	    yylval = OSIN;
741 	    XTERM(FUN1);
742 	}
743 	if (strEQ(d,"srand")) {
744 	    yylval = OSRAND;
745 	    XTERM(FUN1);
746 	}
747 	if (strEQ(d,"system")) {
748 	    yylval = OSYSTEM;
749 	    XTERM(FUN1);
750 	}
751 	if (strEQ(d,"s"))
752 	    *d = toUPPER(*d);
753 	else if (strEQ(d,"shift"))
754 	    *d = toUPPER(*d);
755 	else if (strEQ(d,"select"))
756 	    *d = toUPPER(*d);
757 	else if (strEQ(d,"seek"))
758 	    *d = toUPPER(*d);
759 	else if (strEQ(d,"stat"))
760 	    *d = toUPPER(*d);
761 	else if (strEQ(d,"study"))
762 	    *d = toUPPER(*d);
763 	else if (strEQ(d,"sleep"))
764 	    *d = toUPPER(*d);
765 	else if (strEQ(d,"symlink"))
766 	    *d = toUPPER(*d);
767 	else if (strEQ(d,"sort"))
768 	    *d = toUPPER(*d);
769 	ID(d);
770     case 't': case 'T':
771 	SNARFWORD;
772 	if (strEQ(d,"tr"))
773 	    *d = toUPPER(*d);
774 	else if (strEQ(d,"tell"))
775 	    *d = toUPPER(*d);
776 	else if (strEQ(d,"time"))
777 	    *d = toUPPER(*d);
778 	else if (strEQ(d,"times"))
779 	    *d = toUPPER(*d);
780 	ID(d);
781     case 'u': case 'U':
782 	SNARFWORD;
783 	if (strEQ(d,"until"))
784 	    *d = toUPPER(*d);
785 	else if (strEQ(d,"unless"))
786 	    *d = toUPPER(*d);
787 	else if (strEQ(d,"umask"))
788 	    *d = toUPPER(*d);
789 	else if (strEQ(d,"unshift"))
790 	    *d = toUPPER(*d);
791 	else if (strEQ(d,"unlink"))
792 	    *d = toUPPER(*d);
793 	else if (strEQ(d,"utime"))
794 	    *d = toUPPER(*d);
795 	ID(d);
796     case 'v': case 'V':
797 	SNARFWORD;
798 	if (strEQ(d,"values"))
799 	    *d = toUPPER(*d);
800 	ID(d);
801     case 'w': case 'W':
802 	SNARFWORD;
803 	if (strEQ(d,"while"))
804 	    XTERM(WHILE);
805 	if (strEQ(d,"write"))
806 	    *d = toUPPER(*d);
807 	else if (strEQ(d,"wait"))
808 	    *d = toUPPER(*d);
809 	ID(d);
810     case 'x': case 'X':
811 	SNARFWORD;
812 	if (strEQ(d,"x"))
813 	    *d = toUPPER(*d);
814 	ID(d);
815     case 'y': case 'Y':
816 	SNARFWORD;
817 	if (strEQ(d,"y"))
818 	    *d = toUPPER(*d);
819 	ID(d);
820     case 'z': case 'Z':
821 	SNARFWORD;
822 	ID(d);
823     }
824 }
825 
826 char *
scanpat(register char * s)827 scanpat(register char *s)
828 {
829     register char *d;
830 
831     switch (*s++) {
832     case '/':
833 	break;
834     default:
835 	fatal("Search pattern not found:\n%s",str_get(linestr));
836     }
837 
838     d = tokenbuf;
839     for (; *s; s++,d++) {
840 	if (*s == '\\') {
841 	    if (s[1] == '/')
842 		*d++ = *s++;
843 	    else if (s[1] == '\\')
844 		*d++ = *s++;
845 	    else if (s[1] == '[')
846 		*d++ = *s++;
847 	}
848 	else if (*s == '[') {
849 	    *d++ = *s++;
850 	    do {
851 		if (*s == '\\' && s[1])
852 		    *d++ = *s++;
853 		if (*s == '/' || (*s == '-' && s[1] == ']'))
854 		    *d++ = '\\';
855 		*d++ = *s++;
856 	    } while (*s && *s != ']');
857 	}
858 	else if (*s == '/')
859 	    break;
860 	*d = *s;
861     }
862     *d = '\0';
863 
864     if (!*s)
865 	fatal("Search pattern not terminated:\n%s",str_get(linestr));
866     s++;
867     yylval = string(tokenbuf,0);
868     return s;
869 }
870 
871 void
yyerror(char * s)872 yyerror(char *s)
873 {
874     fprintf(stderr,"%s in file %s at line %d\n",
875       s,filename,line);
876 }
877 
878 char *
scannum(register char * s)879 scannum(register char *s)
880 {
881     register char *d;
882 
883     switch (*s) {
884     case '1': case '2': case '3': case '4': case '5':
885     case '6': case '7': case '8': case '9': case '0' : case '.':
886 	d = tokenbuf;
887 	while (isDIGIT(*s)) {
888 	    *d++ = *s++;
889 	}
890 	if (*s == '.') {
891 	    if (isDIGIT(s[1])) {
892 		*d++ = *s++;
893 		while (isDIGIT(*s)) {
894 		    *d++ = *s++;
895 		}
896 	    }
897 	    else
898 		s++;
899 	}
900 	if (strchr("eE",*s) && strchr("+-0123456789",s[1])) {
901 	    *d++ = *s++;
902 	    if (*s == '+' || *s == '-')
903 		*d++ = *s++;
904 	    while (isDIGIT(*s))
905 		*d++ = *s++;
906 	}
907 	*d = '\0';
908 	yylval = string(tokenbuf,0);
909 	break;
910     }
911     return s;
912 }
913 
914 int
string(char * ptr,int len)915 string(char *ptr, int len)
916 {
917     int retval = mop;
918 
919     ops[mop++].ival = OSTRING + (1<<8);
920     if (!len)
921 	len = strlen(ptr);
922     ops[mop].cval = (char *) safemalloc(len+1);
923     strncpy(ops[mop].cval,ptr,len);
924     ops[mop++].cval[len] = '\0';
925     if (mop >= OPSMAX)
926 	fatal("Recompile a2p with larger OPSMAX\n");
927     return retval;
928 }
929 
930 int
oper0(int type)931 oper0(int type)
932 {
933     int retval = mop;
934 
935     if (type > 255)
936 	fatal("type > 255 (%d)\n",type);
937     ops[mop++].ival = type;
938     if (mop >= OPSMAX)
939 	fatal("Recompile a2p with larger OPSMAX\n");
940     return retval;
941 }
942 
943 int
oper1(int type,int arg1)944 oper1(int type, int arg1)
945 {
946     int retval = mop;
947 
948     if (type > 255)
949 	fatal("type > 255 (%d)\n",type);
950     ops[mop++].ival = type + (1<<8);
951     ops[mop++].ival = arg1;
952     if (mop >= OPSMAX)
953 	fatal("Recompile a2p with larger OPSMAX\n");
954     return retval;
955 }
956 
957 int
oper2(int type,int arg1,int arg2)958 oper2(int type, int arg1, int arg2)
959 {
960     int retval = mop;
961 
962     if (type > 255)
963 	fatal("type > 255 (%d)\n",type);
964     ops[mop++].ival = type + (2<<8);
965     ops[mop++].ival = arg1;
966     ops[mop++].ival = arg2;
967     if (mop >= OPSMAX)
968 	fatal("Recompile a2p with larger OPSMAX\n");
969     return retval;
970 }
971 
972 int
oper3(int type,int arg1,int arg2,int arg3)973 oper3(int type, int arg1, int arg2, int arg3)
974 {
975     int retval = mop;
976 
977     if (type > 255)
978 	fatal("type > 255 (%d)\n",type);
979     ops[mop++].ival = type + (3<<8);
980     ops[mop++].ival = arg1;
981     ops[mop++].ival = arg2;
982     ops[mop++].ival = arg3;
983     if (mop >= OPSMAX)
984 	fatal("Recompile a2p with larger OPSMAX\n");
985     return retval;
986 }
987 
988 int
oper4(int type,int arg1,int arg2,int arg3,int arg4)989 oper4(int type, int arg1, int arg2, int arg3, int arg4)
990 {
991     int retval = mop;
992 
993     if (type > 255)
994 	fatal("type > 255 (%d)\n",type);
995     ops[mop++].ival = type + (4<<8);
996     ops[mop++].ival = arg1;
997     ops[mop++].ival = arg2;
998     ops[mop++].ival = arg3;
999     ops[mop++].ival = arg4;
1000     if (mop >= OPSMAX)
1001 	fatal("Recompile a2p with larger OPSMAX\n");
1002     return retval;
1003 }
1004 
1005 int
oper5(int type,int arg1,int arg2,int arg3,int arg4,int arg5)1006 oper5(int type, int arg1, int arg2, int arg3, int arg4, int arg5)
1007 {
1008     int retval = mop;
1009 
1010     if (type > 255)
1011 	fatal("type > 255 (%d)\n",type);
1012     ops[mop++].ival = type + (5<<8);
1013     ops[mop++].ival = arg1;
1014     ops[mop++].ival = arg2;
1015     ops[mop++].ival = arg3;
1016     ops[mop++].ival = arg4;
1017     ops[mop++].ival = arg5;
1018     if (mop >= OPSMAX)
1019 	fatal("Recompile a2p with larger OPSMAX\n");
1020     return retval;
1021 }
1022 
1023 int depth = 0;
1024 
1025 void
dump(int branch)1026 dump(int branch)
1027 {
1028     register int type;
1029     register int len;
1030     register int i;
1031 
1032     type = ops[branch].ival;
1033     len = type >> 8;
1034     type &= 255;
1035     for (i=depth; i; i--)
1036 	printf(" ");
1037     if (type == OSTRING) {
1038 	printf("%-5d\"%s\"\n",branch,ops[branch+1].cval);
1039     }
1040     else {
1041 	printf("(%-5d%s %d\n",branch,opname[type],len);
1042 	depth++;
1043 	for (i=1; i<=len; i++)
1044 	    dump(ops[branch+i].ival);
1045 	depth--;
1046 	for (i=depth; i; i--)
1047 	    printf(" ");
1048 	printf(")\n");
1049     }
1050 }
1051 
1052 int
bl(int arg,int maybe)1053 bl(int arg, int maybe)
1054 {
1055     if (!arg)
1056 	return 0;
1057     else if ((ops[arg].ival & 255) != OBLOCK)
1058 	return oper2(OBLOCK,arg,maybe);
1059     else if ((ops[arg].ival >> 8) < 2)
1060 	return oper2(OBLOCK,ops[arg+1].ival,maybe);
1061     else
1062 	return arg;
1063 }
1064 
1065 void
fixup(STR * str)1066 fixup(STR *str)
1067 {
1068     register char *s;
1069     register char *t;
1070 
1071     for (s = str->str_ptr; *s; s++) {
1072 	if (*s == ';' && s[1] == ' ' && s[2] == '\n') {
1073 	    strcpy(s+1,s+2);
1074 	    s++;
1075 	}
1076 	else if (*s == '\n') {
1077 	    for (t = s+1; isSPACE(*t & 127); t++) ;
1078 	    t--;
1079 	    while (isSPACE(*t & 127) && *t != '\n') t--;
1080 	    if (*t == '\n' && t-s > 1) {
1081 		if (s[-1] == '{')
1082 		    s--;
1083 		strcpy(s+1,t);
1084 	    }
1085 	    s++;
1086 	}
1087     }
1088 }
1089 
1090 void
putlines(STR * str)1091 putlines(STR *str)
1092 {
1093     register char *d, *s, *t, *e;
1094     register int pos, newpos;
1095 
1096     d = tokenbuf;
1097     pos = 0;
1098     for (s = str->str_ptr; *s; s++) {
1099 	*d++ = *s;
1100 	pos++;
1101 	if (*s == '\n') {
1102 	    *d = '\0';
1103 	    d = tokenbuf;
1104 	    pos = 0;
1105 	    putone();
1106 	}
1107 	else if (*s == '\t')
1108 	    pos += 7;
1109 	if (pos > 78) {		/* split a long line? */
1110 	    *d-- = '\0';
1111 	    newpos = 0;
1112 	    for (t = tokenbuf; isSPACE(*t & 127); t++) {
1113 		if (*t == '\t')
1114 		    newpos += 8;
1115 		else
1116 		    newpos += 1;
1117 	    }
1118 	    e = d;
1119 	    while (d > tokenbuf && (*d != ' ' || d[-1] != ';'))
1120 		d--;
1121 	    if (d < t+10) {
1122 		d = e;
1123 		while (d > tokenbuf &&
1124 		  (*d != ' ' || d[-1] != '|' || d[-2] != '|') )
1125 		    d--;
1126 	    }
1127 	    if (d < t+10) {
1128 		d = e;
1129 		while (d > tokenbuf &&
1130 		  (*d != ' ' || d[-1] != '&' || d[-2] != '&') )
1131 		    d--;
1132 	    }
1133 	    if (d < t+10) {
1134 		d = e;
1135 		while (d > tokenbuf && (*d != ' ' || d[-1] != ','))
1136 		    d--;
1137 	    }
1138 	    if (d < t+10) {
1139 		d = e;
1140 		while (d > tokenbuf && *d != ' ')
1141 		    d--;
1142 	    }
1143 	    if (d > t+3) {
1144                 char save[2048];
1145                 strcpy(save, d);
1146 		*d = '\n';
1147                 d[1] = '\0';
1148 		putone();
1149 		putchar('\n');
1150 		if (d[-1] != ';' && !(newpos % 4)) {
1151 		    *t++ = ' ';
1152 		    *t++ = ' ';
1153 		    newpos += 2;
1154 		}
1155 		strcpy(t,save+1);
1156 		newpos += strlen(t);
1157 		d = t + strlen(t);
1158 		pos = newpos;
1159 	    }
1160 	    else
1161 		d = e + 1;
1162 	}
1163     }
1164 }
1165 
1166 void
putone(void)1167 putone(void)
1168 {
1169     register char *t;
1170 
1171     for (t = tokenbuf; *t; t++) {
1172 	*t &= 127;
1173 	if (*t == 127) {
1174 	    *t = ' ';
1175 	    strcpy(t+strlen(t)-1, "\t#???\n");
1176 	    checkers++;
1177 	}
1178     }
1179     t = tokenbuf;
1180     if (*t == '#') {
1181 	if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11))
1182 	    return;
1183 	if (strnEQ(t,"#!/usr/bin/awk",14) || strnEQ(t,"#! /usr/bin/awk",15))
1184 	    return;
1185     }
1186     fputs(tokenbuf,stdout);
1187 }
1188 
1189 int
numary(int arg)1190 numary(int arg)
1191 {
1192     STR *key;
1193     int dummy;
1194 
1195     key = walk(0,0,arg,&dummy,P_MIN);
1196     str_cat(key,"[]");
1197     hstore(symtab,key->str_ptr,str_make("1"));
1198     str_free(key);
1199     set_array_base = TRUE;
1200     return arg;
1201 }
1202 
1203 int
rememberargs(int arg)1204 rememberargs(int arg)
1205 {
1206     int type;
1207     STR *str;
1208 
1209     if (!arg)
1210 	return arg;
1211     type = ops[arg].ival & 255;
1212     if (type == OCOMMA) {
1213 	rememberargs(ops[arg+1].ival);
1214 	rememberargs(ops[arg+3].ival);
1215     }
1216     else if (type == OVAR) {
1217 	str = str_new(0);
1218 	hstore(curarghash,ops[ops[arg+1].ival+1].cval,str);
1219     }
1220     else
1221 	fatal("panic: unknown argument type %d, line %d\n",type,line);
1222     return arg;
1223 }
1224 
1225 int
aryrefarg(int arg)1226 aryrefarg(int arg)
1227 {
1228     int type = ops[arg].ival & 255;
1229     STR *str;
1230 
1231     if (type != OSTRING)
1232 	fatal("panic: aryrefarg %d, line %d\n",type,line);
1233     str = hfetch(curarghash,ops[arg+1].cval);
1234     if (str)
1235 	str_set(str,"*");
1236     return arg;
1237 }
1238 
1239 int
fixfargs(int name,int arg,int prevargs)1240 fixfargs(int name, int arg, int prevargs)
1241 {
1242     int type;
1243     STR *str;
1244     int numargs = 0;
1245 
1246     if (!arg)
1247 	return prevargs;
1248     type = ops[arg].ival & 255;
1249     if (type == OCOMMA) {
1250 	numargs = fixfargs(name,ops[arg+1].ival,prevargs);
1251 	numargs = fixfargs(name,ops[arg+3].ival,numargs);
1252     }
1253     else if (type == OVAR) {
1254 	str = hfetch(curarghash,ops[ops[arg+1].ival+1].cval);
1255 	if (strEQ(str_get(str),"*")) {
1256 	    char tmpbuf[128];
1257 
1258 	    str_set(str,"");		/* in case another routine has this */
1259 	    ops[arg].ival &= ~255;
1260 	    ops[arg].ival |= OSTAR;
1261 	    sprintf(tmpbuf,"%s:%d",ops[name+1].cval,prevargs);
1262 	    fprintf(stderr,"Adding %s\n",tmpbuf);
1263 	    str = str_new(0);
1264 	    str_set(str,"*");
1265 	    hstore(curarghash,tmpbuf,str);
1266 	}
1267 	numargs = prevargs + 1;
1268     }
1269     else
1270 	fatal("panic: unknown argument type %d, arg %d, line %d\n",
1271 	  type,prevargs+1,line);
1272     return numargs;
1273 }
1274 
1275 int
fixrargs(char * name,int arg,int prevargs)1276 fixrargs(char *name, int arg, int prevargs)
1277 {
1278     int type;
1279     STR *str;
1280     int numargs;
1281 
1282     if (!arg)
1283 	return prevargs;
1284     type = ops[arg].ival & 255;
1285     if (type == OCOMMA) {
1286 	numargs = fixrargs(name,ops[arg+1].ival,prevargs);
1287 	numargs = fixrargs(name,ops[arg+3].ival,numargs);
1288     }
1289     else {
1290 	char *tmpbuf = (char *) safemalloc(strlen(name) + (sizeof(prevargs) * 3) + 5);
1291 	sprintf(tmpbuf,"%s:%d",name,prevargs);
1292 	str = hfetch(curarghash,tmpbuf);
1293 	safefree(tmpbuf);
1294 	if (str && strEQ(str->str_ptr,"*")) {
1295 	    if (type == OVAR || type == OSTAR) {
1296 		ops[arg].ival &= ~255;
1297 		ops[arg].ival |= OSTAR;
1298 	    }
1299 	    else
1300 		fatal("Can't pass expression by reference as arg %d of %s\n",
1301 		    prevargs+1, name);
1302 	}
1303 	numargs = prevargs + 1;
1304     }
1305     return numargs;
1306 }
1307