xref: /netbsd-src/external/historical/nawk/dist/awkgram.y (revision 3b29b3e809927840440a4e214eaab8cecc06ed5e)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 %{
26 #if HAVE_NBTOOL_CONFIG_H
27 #include "nbtool_config.h"
28 #endif
29 
30 #include <stdio.h>
31 #include <string.h>
32 #include "awk.h"
33 
34 void checkdup(Node *list, Cell *item);
35 int yywrap(void) { return(1); }
36 
37 Node	*beginloc = 0;
38 Node	*endloc = 0;
39 bool	infunc	= false;	/* = true if in arglist or body of func */
40 int	inloop	= 0;	/* >= 1 if in while, for, do; can't be bool, since loops can next */
41 char	*curfname = 0;	/* current function name */
42 Node	*arglist = 0;	/* list of args for current function */
43 %}
44 
45 %union {
46 	Node	*p;
47 	Cell	*cp;
48 	int	i;
49 	char	*s;
50 }
51 
52 %token	<i>	FIRSTTOKEN	/* must be first */
53 %token	<p>	PROGRAM PASTAT PASTAT2 XBEGIN XEND
54 %token	<i>	NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
55 %token	<i>	ARRAY
56 %token	<i>	MATCH NOTMATCH MATCHOP
57 %token	<i>	FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO
58 %token	<i>	AND BOR APPEND EQ GE GT LE LT NE IN
59 %token	<i>	ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
60 %token	<i>	GENSUB SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
61 %token	<i>	ADD MINUS MULT DIVIDE MOD
62 %token	<i>	ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
63 %token	<i>	PRINT PRINTF SPRINTF
64 %token	<p>	ELSE INTEST CONDEXPR
65 %token	<i>	POSTINCR PREINCR POSTDECR PREDECR
66 %token	<cp>	VAR IVAR VARNF CALL NUMBER STRING
67 %token	<s>	REGEXPR
68 
69 %type	<p>	pas pattern ppattern plist pplist patlist prarg term re
70 %type	<p>	pa_pat pa_stat pa_stats
71 %type	<s>	reg_expr
72 %type	<p>	simple_stmt opt_simple_stmt stmt stmtlist
73 %type	<p>	var varname funcname varlist
74 %type	<p>	for if else while
75 %type	<i>	do st
76 %type	<i>	pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
77 %type	<i>	subop print
78 %type	<cp>	string
79 
80 %right	ASGNOP
81 %right	'?'
82 %right	':'
83 %left	BOR
84 %left	AND
85 %left	GETLINE
86 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
87 %left	ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC
88 %left	GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
89 %left	PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
90 %left	REGEXPR VAR VARNF IVAR WHILE '('
91 %left	CAT
92 %left	'+' '-'
93 %left	'*' '/' '%'
94 %left	NOT UMINUS UPLUS
95 %right	POWER
96 %right	DECR INCR
97 %left	INDIRECT
98 %token	LASTTOKEN	/* must be last */
99 
100 %%
101 
102 program:
103 	  pas	{ if (errorflag==0)
104 			winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
105 	| error	{ yyclearin; bracecheck(); SYNTAX("bailing out"); }
106 	;
107 
108 and:
109 	  AND | and NL
110 	;
111 
112 bor:
113 	  BOR | bor NL
114 	;
115 
116 comma:
117 	  ',' | comma NL
118 	;
119 
120 do:
121 	  DO | do NL
122 	;
123 
124 else:
125 	  ELSE | else NL
126 	;
127 
128 for:
129 	  FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
130 		{ --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
131 	| FOR '(' opt_simple_stmt ';'  ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
132 		{ --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
133 	| FOR '(' varname IN varname rparen {inloop++;} stmt
134 		{ --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
135 	;
136 
137 funcname:
138 	  VAR	{ setfname($1); }
139 	| CALL	{ setfname($1); }
140 	;
141 
142 if:
143 	  IF '(' pattern rparen		{ $$ = notnull($3); }
144 	;
145 
146 lbrace:
147 	  '{' | lbrace NL
148 	;
149 
150 nl:
151 	  NL | nl NL
152 	;
153 
154 opt_nl:
155 	  /* empty */	{ $$ = 0; }
156 	| nl
157 	;
158 
159 opt_pst:
160 	  /* empty */	{ $$ = 0; }
161 	| pst
162 	;
163 
164 
165 opt_simple_stmt:
166 	  /* empty */			{ $$ = 0; }
167 	| simple_stmt
168 	;
169 
170 pas:
171 	  opt_pst			{ $$ = 0; }
172 	| opt_pst pa_stats opt_pst	{ $$ = $2; }
173 	;
174 
175 pa_pat:
176 	  pattern	{ $$ = notnull($1); }
177 	;
178 
179 pa_stat:
180 	  pa_pat			{ $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
181 	| pa_pat lbrace stmtlist '}'	{ $$ = stat2(PASTAT, $1, $3); }
182 	| pa_pat ',' opt_nl pa_pat		{ $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); }
183 	| pa_pat ',' opt_nl pa_pat lbrace stmtlist '}'	{ $$ = pa2stat($1, $4, $6); }
184 	| lbrace stmtlist '}'		{ $$ = stat2(PASTAT, NIL, $2); }
185 	| XBEGIN lbrace stmtlist '}'
186 		{ beginloc = linkum(beginloc, $3); $$ = 0; }
187 	| XEND lbrace stmtlist '}'
188 		{ endloc = linkum(endloc, $3); $$ = 0; }
189 	| FUNC funcname '(' varlist rparen {infunc = true;} lbrace stmtlist '}'
190 		{ infunc = false; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
191 	;
192 
193 pa_stats:
194 	  pa_stat
195 	| pa_stats opt_pst pa_stat	{ $$ = linkum($1, $3); }
196 	;
197 
198 patlist:
199 	  pattern
200 	| patlist comma pattern		{ $$ = linkum($1, $3); }
201 	;
202 
203 ppattern:
204 	  var ASGNOP ppattern		{ $$ = op2($2, $1, $3); }
205 	| ppattern '?' ppattern ':' ppattern %prec '?'
206 	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
207 	| ppattern bor ppattern %prec BOR
208 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
209 	| ppattern and ppattern %prec AND
210 		{ $$ = op2(AND, notnull($1), notnull($3)); }
211 	| ppattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); free($3); }
212 	| ppattern MATCHOP ppattern
213 		{ if (constnode($3)) {
214 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
215 			free($3);
216 		  } else
217 			$$ = op3($2, (Node *)1, $1, $3); }
218 	| ppattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
219 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
220 	| ppattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
221 	| re
222 	| term
223 	;
224 
225 pattern:
226 	  var ASGNOP pattern		{ $$ = op2($2, $1, $3); }
227 	| pattern '?' pattern ':' pattern %prec '?'
228 	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
229 	| pattern bor pattern %prec BOR
230 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
231 	| pattern and pattern %prec AND
232 		{ $$ = op2(AND, notnull($1), notnull($3)); }
233 	| pattern EQ pattern		{ $$ = op2($2, $1, $3); }
234 	| pattern GE pattern		{ $$ = op2($2, $1, $3); }
235 	| pattern GT pattern		{ $$ = op2($2, $1, $3); }
236 	| pattern LE pattern		{ $$ = op2($2, $1, $3); }
237 	| pattern LT pattern		{ $$ = op2($2, $1, $3); }
238 	| pattern NE pattern		{ $$ = op2($2, $1, $3); }
239 	| pattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); free($3); }
240 	| pattern MATCHOP pattern
241 		{ if (constnode($3)) {
242 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
243 			free($3);
244 		  } else
245 			$$ = op3($2, (Node *)1, $1, $3); }
246 	| pattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
247 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
248 	| pattern '|' GETLINE var	{
249 			if (safe) SYNTAX("cmd | getline is unsafe");
250 			else $$ = op3(GETLINE, $4, itonp($2), $1); }
251 	| pattern '|' GETLINE		{
252 			if (safe) SYNTAX("cmd | getline is unsafe");
253 			else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
254 	| pattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
255 	| re
256 	| term
257 	;
258 
259 plist:
260 	  pattern comma pattern		{ $$ = linkum($1, $3); }
261 	| plist comma pattern		{ $$ = linkum($1, $3); }
262 	;
263 
264 pplist:
265 	  ppattern
266 	| pplist comma ppattern		{ $$ = linkum($1, $3); }
267 	;
268 
269 prarg:
270 	  /* empty */			{ $$ = rectonode(); }
271 	| pplist
272 	| '(' plist ')'			{ $$ = $2; }
273 	;
274 
275 print:
276 	  PRINT | PRINTF
277 	;
278 
279 pst:
280 	  NL | ';' | pst NL | pst ';'
281 	;
282 
283 rbrace:
284 	  '}' | rbrace NL
285 	;
286 
287 re:
288 	   reg_expr
289 		{ $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); free($1); }
290 	| NOT re	{ $$ = op1(NOT, notnull($2)); }
291 	;
292 
293 reg_expr:
294 	  '/' {startreg();} REGEXPR '/'		{ $$ = $3; }
295 	;
296 
297 rparen:
298 	  ')' | rparen NL
299 	;
300 
301 simple_stmt:
302 	  print prarg '|' term		{
303 			if (safe) SYNTAX("print | is unsafe");
304 			else $$ = stat3($1, $2, itonp($3), $4); }
305 	| print prarg APPEND term	{
306 			if (safe) SYNTAX("print >> is unsafe");
307 			else $$ = stat3($1, $2, itonp($3), $4); }
308 	| print prarg GT term		{
309 			if (safe) SYNTAX("print > is unsafe");
310 			else $$ = stat3($1, $2, itonp($3), $4); }
311 	| print prarg			{ $$ = stat3($1, $2, NIL, NIL); }
312 	| DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
313 	| DELETE varname		 { $$ = stat2(DELETE, makearr($2), 0); }
314 	| pattern			{ $$ = exptostat($1); }
315 	| error				{ yyclearin; SYNTAX("illegal statement"); }
316 	;
317 
318 st:
319 	  nl
320 	| ';' opt_nl
321 	;
322 
323 stmt:
324 	  BREAK st		{ if (!inloop) SYNTAX("break illegal outside of loops");
325 				  $$ = stat1(BREAK, NIL); }
326 	| CONTINUE st		{  if (!inloop) SYNTAX("continue illegal outside of loops");
327 				  $$ = stat1(CONTINUE, NIL); }
328 	| do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
329 		{ $$ = stat2(DO, $3, notnull($7)); }
330 	| EXIT pattern st	{ $$ = stat1(EXIT, $2); }
331 	| EXIT st		{ $$ = stat1(EXIT, NIL); }
332 	| for
333 	| if stmt else stmt	{ $$ = stat3(IF, $1, $2, $4); }
334 	| if stmt		{ $$ = stat3(IF, $1, $2, NIL); }
335 	| lbrace stmtlist rbrace { $$ = $2; }
336 	| NEXT st	{ if (infunc)
337 				SYNTAX("next is illegal inside a function");
338 			  $$ = stat1(NEXT, NIL); }
339 	| NEXTFILE st	{ if (infunc)
340 				SYNTAX("nextfile is illegal inside a function");
341 			  $$ = stat1(NEXTFILE, NIL); }
342 	| RETURN pattern st	{ $$ = stat1(RETURN, $2); }
343 	| RETURN st		{ $$ = stat1(RETURN, NIL); }
344 	| simple_stmt st
345 	| while {inloop++;} stmt	{ --inloop; $$ = stat2(WHILE, $1, $3); }
346 	| ';' opt_nl		{ $$ = 0; }
347 	;
348 
349 stmtlist:
350 	  stmt
351 	| stmtlist stmt		{ $$ = linkum($1, $2); }
352 	;
353 
354 subop:
355 	  SUB | GSUB
356 	;
357 
358 string:
359 	  STRING
360 	| string STRING		{ $$ = catstr($1, $2); }
361 	;
362 
363 term:
364  	  term '/' ASGNOP term		{ $$ = op2(DIVEQ, $1, $4); }
365  	| term '+' term			{ $$ = op2(ADD, $1, $3); }
366 	| term '-' term			{ $$ = op2(MINUS, $1, $3); }
367 	| term '*' term			{ $$ = op2(MULT, $1, $3); }
368 	| term '/' term			{ $$ = op2(DIVIDE, $1, $3); }
369 	| term '%' term			{ $$ = op2(MOD, $1, $3); }
370 	| term POWER term		{ $$ = op2(POWER, $1, $3); }
371 	| '-' term %prec UMINUS		{ $$ = op1(UMINUS, $2); }
372 	| '+' term %prec UMINUS		{ $$ = op1(UPLUS, $2); }
373 	| NOT term %prec UMINUS		{ $$ = op1(NOT, notnull($2)); }
374 	| BLTIN '(' ')'			{ $$ = op2(BLTIN, itonp($1), rectonode()); }
375 	| BLTIN '(' patlist ')'		{ $$ = op2(BLTIN, itonp($1), $3); }
376 	| BLTIN				{ $$ = op2(BLTIN, itonp($1), rectonode()); }
377 	| CALL '(' ')'			{ $$ = op2(CALL, celltonode($1,CVAR), NIL); }
378 	| CALL '(' patlist ')'		{ $$ = op2(CALL, celltonode($1,CVAR), $3); }
379 	| CLOSE term			{ $$ = op1(CLOSE, $2); }
380 	| DECR var			{ $$ = op1(PREDECR, $2); }
381 	| INCR var			{ $$ = op1(PREINCR, $2); }
382 	| var DECR			{ $$ = op1(POSTDECR, $1); }
383 	| var INCR			{ $$ = op1(POSTINCR, $1); }
384 	| GENSUB '(' reg_expr comma pattern comma pattern ')'
385 		{ $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); }
386 	| GENSUB '(' pattern comma pattern comma pattern ')'
387 		{ if (constnode($3)) {
388 			$$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode());
389 			free($3);
390 		  } else
391 			$$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode());
392 		}
393 	| GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')'
394 		{ $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); }
395 	| GENSUB '(' pattern comma pattern comma pattern comma pattern ')'
396 		{ if (constnode($3)) {
397 			$$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9);
398 			free($3);
399 		  } else
400 			$$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9);
401 		}
402 	| GETLINE var LT term		{ $$ = op3(GETLINE, $2, itonp($3), $4); }
403 	| GETLINE LT term		{ $$ = op3(GETLINE, NIL, itonp($2), $3); }
404 	| GETLINE var			{ $$ = op3(GETLINE, $2, NIL, NIL); }
405 	| GETLINE			{ $$ = op3(GETLINE, NIL, NIL, NIL); }
406 	| INDEX '(' pattern comma pattern ')'
407 		{ $$ = op2(INDEX, $3, $5); }
408 	| INDEX '(' pattern comma reg_expr ')'
409 		{ SYNTAX("index() doesn't permit regular expressions");
410 		  $$ = op2(INDEX, $3, (Node*)$5); }
411 	| '(' pattern ')'		{ $$ = $2; }
412 	| MATCHFCN '(' pattern comma reg_expr ')'
413 		{ $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); free($5); }
414 	| MATCHFCN '(' pattern comma pattern ')'
415 		{ if (constnode($5)) {
416 			$$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
417 			free($5);
418 		  } else
419 			$$ = op3(MATCHFCN, (Node *)1, $3, $5); }
420 	| NUMBER			{ $$ = celltonode($1, CCON); }
421 	| SPLIT '(' pattern comma varname comma pattern ')'     /* string */
422 		{ $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
423 	| SPLIT '(' pattern comma varname comma reg_expr ')'    /* const /regexp/ */
424 		{ $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); free($7); }
425 	| SPLIT '(' pattern comma varname ')'
426 		{ $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); }  /* default */
427 	| SPRINTF '(' patlist ')'	{ $$ = op1($1, $3); }
428 	| string	 		{ $$ = celltonode($1, CCON); }
429 	| subop '(' reg_expr comma pattern ')'
430 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); free($3); }
431 	| subop '(' pattern comma pattern ')'
432 		{ if (constnode($3)) {
433 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
434 			free($3);
435 		  } else
436 			$$ = op4($1, (Node *)1, $3, $5, rectonode()); }
437 	| subop '(' reg_expr comma pattern comma var ')'
438 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); free($3); }
439 	| subop '(' pattern comma pattern comma var ')'
440 		{ if (constnode($3)) {
441 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
442 			free($3);
443 		  } else
444 			$$ = op4($1, (Node *)1, $3, $5, $7); }
445 	| SUBSTR '(' pattern comma pattern comma pattern ')'
446 		{ $$ = op3(SUBSTR, $3, $5, $7); }
447 	| SUBSTR '(' pattern comma pattern ')'
448 		{ $$ = op3(SUBSTR, $3, $5, NIL); }
449 	| var
450 	;
451 
452 var:
453 	  varname
454 	| varname '[' patlist ']'	{ $$ = op2(ARRAY, makearr($1), $3); }
455 	| IVAR				{ $$ = op1(INDIRECT, celltonode($1, CVAR)); }
456 	| INDIRECT term	 		{ $$ = op1(INDIRECT, $2); }
457 	;
458 
459 varlist:
460 	  /* nothing */		{ arglist = $$ = 0; }
461 	| VAR			{ arglist = $$ = celltonode($1,CVAR); }
462 	| varlist comma VAR	{
463 			checkdup($1, $3);
464 			arglist = $$ = linkum($1,celltonode($3,CVAR)); }
465 	;
466 
467 varname:
468 	  VAR			{ $$ = celltonode($1, CVAR); }
469 	| ARG 			{ $$ = op1(ARG, itonp($1)); }
470 	| VARNF			{ $$ = op1(VARNF, (Node *) $1); }
471 	;
472 
473 
474 while:
475 	  WHILE '(' pattern rparen	{ $$ = notnull($3); }
476 	;
477 
478 %%
479 
480 void setfname(Cell *p)
481 {
482 	if (isarr(p))
483 		SYNTAX("%s is an array, not a function", p->nval);
484 	else if (isfcn(p))
485 		SYNTAX("you can't define function %s more than once", p->nval);
486 	curfname = p->nval;
487 }
488 
489 int constnode(Node *p)
490 {
491 	return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
492 }
493 
494 char *strnode(Node *p)
495 {
496 	return ((Cell *)(p->narg[0]))->sval;
497 }
498 
499 Node *notnull(Node *n)
500 {
501 	switch (n->nobj) {
502 	case LE: case LT: case EQ: case NE: case GT: case GE:
503 	case BOR: case AND: case NOT:
504 		return n;
505 	default:
506 		return op2(NE, n, nullnode);
507 	}
508 }
509 
510 void checkdup(Node *vl, Cell *cp)	/* check if name already in list */
511 {
512 	char *s = cp->nval;
513 	for ( ; vl; vl = vl->nnext) {
514 		if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
515 			SYNTAX("duplicate argument %s", s);
516 			break;
517 		}
518 	}
519 }
520