xref: /netbsd-src/external/historical/nawk/dist/awkgram.y (revision cb861154c176d3dcc8ff846f449e3c16a5f5edb5)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 %{
26 #if HAVE_NBTOOL_CONFIG_H
27 #include "nbtool_config.h"
28 #endif
29 
30 #include <stdio.h>
31 #include <string.h>
32 #include "awk.h"
33 
34 void checkdup(Node *list, Cell *item);
35 int yywrap(void) { return(1); }
36 
37 Node	*beginloc = 0;
38 Node	*endloc = 0;
39 int	infunc	= 0;	/* = 1 if in arglist or body of func */
40 int	inloop	= 0;	/* = 1 if in while, for, do */
41 char	*curfname = 0;	/* current function name */
42 Node	*arglist = 0;	/* list of args for current function */
43 %}
44 
45 %union {
46 	Node	*p;
47 	Cell	*cp;
48 	int	i;
49 	char	*s;
50 }
51 
52 %token	<i>	FIRSTTOKEN	/* must be first */
53 %token	<p>	PROGRAM PASTAT PASTAT2 XBEGIN XEND
54 %token	<i>	NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
55 %token	<i>	ARRAY
56 %token	<i>	MATCH NOTMATCH MATCHOP
57 %token	<i>	FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE
58 %token	<i>	AND BOR APPEND EQ GE GT LE LT NE IN
59 %token	<i>	ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
60 %token	<i>	SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
61 %token	<i>	ADD MINUS MULT DIVIDE MOD
62 %token	<i>	ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
63 %token	<i>	PRINT PRINTF SPRINTF
64 %token	<p>	ELSE INTEST CONDEXPR
65 %token	<i>	POSTINCR PREINCR POSTDECR PREDECR
66 %token	<cp>	VAR IVAR VARNF CALL NUMBER STRING
67 %token	<s>	REGEXPR
68 
69 %type	<p>	pas pattern ppattern plist pplist patlist prarg term re
70 %type	<p>	pa_pat pa_stat pa_stats
71 %type	<s>	reg_expr
72 %type	<p>	simple_stmt opt_simple_stmt stmt stmtlist
73 %type	<p>	var varname funcname varlist
74 %type	<p>	for if else while
75 %type	<i>	do st
76 %type	<i>	pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
77 %type	<i>	subop print
78 
79 %right	ASGNOP
80 %right	'?'
81 %right	':'
82 %left	BOR
83 %left	AND
84 %left	GETLINE
85 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
86 %left	ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC
87 %left	GENSUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
88 %left	PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
89 %left	REGEXPR VAR VARNF IVAR WHILE '('
90 %left	CAT
91 %left	'+' '-'
92 %left	'*' '/' '%'
93 %left	NOT UMINUS
94 %right	POWER
95 %right	DECR INCR
96 %left	INDIRECT
97 %token	LASTTOKEN	/* must be last */
98 
99 %%
100 
101 program:
102 	  pas	{ if (errorflag==0)
103 			winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
104 	| error	{ yyclearin; bracecheck(); SYNTAX("bailing out"); }
105 	;
106 
107 and:
108 	  AND | and NL
109 	;
110 
111 bor:
112 	  BOR | bor NL
113 	;
114 
115 comma:
116 	  ',' | comma NL
117 	;
118 
119 do:
120 	  DO | do NL
121 	;
122 
123 else:
124 	  ELSE | else NL
125 	;
126 
127 for:
128 	  FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
129 		{ --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
130 	| FOR '(' opt_simple_stmt ';'  ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
131 		{ --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
132 	| FOR '(' varname IN varname rparen {inloop++;} stmt
133 		{ --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
134 	;
135 
136 funcname:
137 	  VAR	{ setfname($1); }
138 	| CALL	{ setfname($1); }
139 	;
140 
141 if:
142 	  IF '(' pattern rparen		{ $$ = notnull($3); }
143 	;
144 
145 lbrace:
146 	  '{' | lbrace NL
147 	;
148 
149 nl:
150 	  NL | nl NL
151 	;
152 
153 opt_nl:
154 	  /* empty */	{ $$ = 0; }
155 	| nl
156 	;
157 
158 opt_pst:
159 	  /* empty */	{ $$ = 0; }
160 	| pst
161 	;
162 
163 
164 opt_simple_stmt:
165 	  /* empty */			{ $$ = 0; }
166 	| simple_stmt
167 	;
168 
169 pas:
170 	  opt_pst			{ $$ = 0; }
171 	| opt_pst pa_stats opt_pst	{ $$ = $2; }
172 	;
173 
174 pa_pat:
175 	  pattern	{ $$ = notnull($1); }
176 	;
177 
178 pa_stat:
179 	  pa_pat			{ $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
180 	| pa_pat lbrace stmtlist '}'	{ $$ = stat2(PASTAT, $1, $3); }
181 	| pa_pat ',' pa_pat		{ $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); }
182 	| pa_pat ',' pa_pat lbrace stmtlist '}'	{ $$ = pa2stat($1, $3, $5); }
183 	| lbrace stmtlist '}'		{ $$ = stat2(PASTAT, NIL, $2); }
184 	| XBEGIN lbrace stmtlist '}'
185 		{ beginloc = linkum(beginloc, $3); $$ = 0; }
186 	| XEND lbrace stmtlist '}'
187 		{ endloc = linkum(endloc, $3); $$ = 0; }
188 	| FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
189 		{ infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
190 	;
191 
192 pa_stats:
193 	  pa_stat
194 	| pa_stats opt_pst pa_stat	{ $$ = linkum($1, $3); }
195 	;
196 
197 patlist:
198 	  pattern
199 	| patlist comma pattern		{ $$ = linkum($1, $3); }
200 	;
201 
202 ppattern:
203 	  var ASGNOP ppattern		{ $$ = op2($2, $1, $3); }
204 	| ppattern '?' ppattern ':' ppattern %prec '?'
205 	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
206 	| ppattern bor ppattern %prec BOR
207 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
208 	| ppattern and ppattern %prec AND
209 		{ $$ = op2(AND, notnull($1), notnull($3)); }
210 	| ppattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
211 	| ppattern MATCHOP ppattern
212 		{ if (constnode($3))
213 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
214 		  else
215 			$$ = op3($2, (Node *)1, $1, $3); }
216 	| ppattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
217 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
218 	| ppattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
219 	| re
220 	| term
221 	;
222 
223 pattern:
224 	  var ASGNOP pattern		{ $$ = op2($2, $1, $3); }
225 	| pattern '?' pattern ':' pattern %prec '?'
226 	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
227 	| pattern bor pattern %prec BOR
228 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
229 	| pattern and pattern %prec AND
230 		{ $$ = op2(AND, notnull($1), notnull($3)); }
231 	| pattern EQ pattern		{ $$ = op2($2, $1, $3); }
232 	| pattern GE pattern		{ $$ = op2($2, $1, $3); }
233 	| pattern GT pattern		{ $$ = op2($2, $1, $3); }
234 	| pattern LE pattern		{ $$ = op2($2, $1, $3); }
235 	| pattern LT pattern		{ $$ = op2($2, $1, $3); }
236 	| pattern NE pattern		{ $$ = op2($2, $1, $3); }
237 	| pattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
238 	| pattern MATCHOP pattern
239 		{ if (constnode($3))
240 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
241 		  else
242 			$$ = op3($2, (Node *)1, $1, $3); }
243 	| pattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
244 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
245 	| pattern '|' GETLINE var	{
246 			if (safe) SYNTAX("cmd | getline is unsafe");
247 			else $$ = op3(GETLINE, $4, itonp($2), $1); }
248 	| pattern '|' GETLINE		{
249 			if (safe) SYNTAX("cmd | getline is unsafe");
250 			else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
251 	| pattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
252 	| re
253 	| term
254 	;
255 
256 plist:
257 	  pattern comma pattern		{ $$ = linkum($1, $3); }
258 	| plist comma pattern		{ $$ = linkum($1, $3); }
259 	;
260 
261 pplist:
262 	  ppattern
263 	| pplist comma ppattern		{ $$ = linkum($1, $3); }
264 	;
265 
266 prarg:
267 	  /* empty */			{ $$ = rectonode(); }
268 	| pplist
269 	| '(' plist ')'			{ $$ = $2; }
270 	;
271 
272 print:
273 	  PRINT | PRINTF
274 	;
275 
276 pst:
277 	  NL | ';' | pst NL | pst ';'
278 	;
279 
280 rbrace:
281 	  '}' | rbrace NL
282 	;
283 
284 re:
285 	   reg_expr
286 		{ $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
287 	| NOT re	{ $$ = op1(NOT, notnull($2)); }
288 	;
289 
290 reg_expr:
291 	  '/' {startreg();} REGEXPR '/'		{ $$ = $3; }
292 	;
293 
294 rparen:
295 	  ')' | rparen NL
296 	;
297 
298 simple_stmt:
299 	  print prarg '|' term		{
300 			if (safe) SYNTAX("print | is unsafe");
301 			else $$ = stat3($1, $2, itonp($3), $4); }
302 	| print prarg APPEND term	{
303 			if (safe) SYNTAX("print >> is unsafe");
304 			else $$ = stat3($1, $2, itonp($3), $4); }
305 	| print prarg GT term		{
306 			if (safe) SYNTAX("print > is unsafe");
307 			else $$ = stat3($1, $2, itonp($3), $4); }
308 	| print prarg			{ $$ = stat3($1, $2, NIL, NIL); }
309 	| DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
310 	| DELETE varname		 { $$ = stat2(DELETE, makearr($2), 0); }
311 	| pattern			{ $$ = exptostat($1); }
312 	| error				{ yyclearin; SYNTAX("illegal statement"); }
313 	;
314 
315 st:
316 	  nl
317 	| ';' opt_nl
318 	;
319 
320 stmt:
321 	  BREAK st		{ if (!inloop) SYNTAX("break illegal outside of loops");
322 				  $$ = stat1(BREAK, NIL); }
323 	| CONTINUE st		{  if (!inloop) SYNTAX("continue illegal outside of loops");
324 				  $$ = stat1(CONTINUE, NIL); }
325 	| do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
326 		{ $$ = stat2(DO, $3, notnull($7)); }
327 	| EXIT pattern st	{ $$ = stat1(EXIT, $2); }
328 	| EXIT st		{ $$ = stat1(EXIT, NIL); }
329 	| for
330 	| if stmt else stmt	{ $$ = stat3(IF, $1, $2, $4); }
331 	| if stmt		{ $$ = stat3(IF, $1, $2, NIL); }
332 	| lbrace stmtlist rbrace { $$ = $2; }
333 	| NEXT st	{ if (infunc)
334 				SYNTAX("next is illegal inside a function");
335 			  $$ = stat1(NEXT, NIL); }
336 	| NEXTFILE st	{ if (infunc)
337 				SYNTAX("nextfile is illegal inside a function");
338 			  $$ = stat1(NEXTFILE, NIL); }
339 	| RETURN pattern st	{ $$ = stat1(RETURN, $2); }
340 	| RETURN st		{ $$ = stat1(RETURN, NIL); }
341 	| simple_stmt st
342 	| while {inloop++;} stmt	{ --inloop; $$ = stat2(WHILE, $1, $3); }
343 	| ';' opt_nl		{ $$ = 0; }
344 	;
345 
346 stmtlist:
347 	  stmt
348 	| stmtlist stmt		{ $$ = linkum($1, $2); }
349 	;
350 
351 subop:
352 	  SUB | GSUB
353 	;
354 
355 term:
356  	  term '/' ASGNOP term		{ $$ = op2(DIVEQ, $1, $4); }
357  	| term '+' term			{ $$ = op2(ADD, $1, $3); }
358 	| term '-' term			{ $$ = op2(MINUS, $1, $3); }
359 	| term '*' term			{ $$ = op2(MULT, $1, $3); }
360 	| term '/' term			{ $$ = op2(DIVIDE, $1, $3); }
361 	| term '%' term			{ $$ = op2(MOD, $1, $3); }
362 	| term POWER term		{ $$ = op2(POWER, $1, $3); }
363 	| '-' term %prec UMINUS		{ $$ = op1(UMINUS, $2); }
364 	| '+' term %prec UMINUS		{ $$ = $2; }
365 	| NOT term %prec UMINUS		{ $$ = op1(NOT, notnull($2)); }
366 	| BLTIN '(' ')'			{ $$ = op2(BLTIN, itonp($1), rectonode()); }
367 	| BLTIN '(' patlist ')'		{ $$ = op2(BLTIN, itonp($1), $3); }
368 	| BLTIN				{ $$ = op2(BLTIN, itonp($1), rectonode()); }
369 	| CALL '(' ')'			{ $$ = op2(CALL, celltonode($1,CVAR), NIL); }
370 	| CALL '(' patlist ')'		{ $$ = op2(CALL, celltonode($1,CVAR), $3); }
371 	| CLOSE term			{ $$ = op1(CLOSE, $2); }
372 	| DECR var			{ $$ = op1(PREDECR, $2); }
373 	| INCR var			{ $$ = op1(PREINCR, $2); }
374 	| var DECR			{ $$ = op1(POSTDECR, $1); }
375 	| var INCR			{ $$ = op1(POSTINCR, $1); }
376 	| GENSUB '(' reg_expr comma pattern comma pattern ')'
377 		{ $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); }
378 	| GENSUB '(' pattern comma pattern comma pattern ')'
379 		{ if (constnode($3))
380 			$$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode());
381 		  else
382 			$$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode());
383 		}
384 	| GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')'
385 		{ $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); }
386 	| GENSUB '(' pattern comma pattern comma pattern comma pattern ')'
387 		{ if (constnode($3))
388 			$$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9);
389 		  else
390 			$$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9);
391 		}
392 	| GETLINE var LT term		{ $$ = op3(GETLINE, $2, itonp($3), $4); }
393 	| GETLINE LT term		{ $$ = op3(GETLINE, NIL, itonp($2), $3); }
394 	| GETLINE var			{ $$ = op3(GETLINE, $2, NIL, NIL); }
395 	| GETLINE			{ $$ = op3(GETLINE, NIL, NIL, NIL); }
396 	| INDEX '(' pattern comma pattern ')'
397 		{ $$ = op2(INDEX, $3, $5); }
398 	| INDEX '(' pattern comma reg_expr ')'
399 		{ SYNTAX("index() doesn't permit regular expressions");
400 		  $$ = op2(INDEX, $3, (Node*)$5); }
401 	| '(' pattern ')'		{ $$ = $2; }
402 	| MATCHFCN '(' pattern comma reg_expr ')'
403 		{ $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
404 	| MATCHFCN '(' pattern comma pattern ')'
405 		{ if (constnode($5))
406 			$$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
407 		  else
408 			$$ = op3(MATCHFCN, (Node *)1, $3, $5); }
409 	| NUMBER			{ $$ = celltonode($1, CCON); }
410 	| SPLIT '(' pattern comma varname comma pattern ')'     /* string */
411 		{ $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
412 	| SPLIT '(' pattern comma varname comma reg_expr ')'    /* const /regexp/ */
413 		{ $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
414 	| SPLIT '(' pattern comma varname ')'
415 		{ $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); }  /* default */
416 	| SPRINTF '(' patlist ')'	{ $$ = op1($1, $3); }
417 	| STRING	 		{ $$ = celltonode($1, CCON); }
418 	| subop '(' reg_expr comma pattern ')'
419 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
420 	| subop '(' pattern comma pattern ')'
421 		{ if (constnode($3))
422 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
423 		  else
424 			$$ = op4($1, (Node *)1, $3, $5, rectonode()); }
425 	| subop '(' reg_expr comma pattern comma var ')'
426 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
427 	| subop '(' pattern comma pattern comma var ')'
428 		{ if (constnode($3))
429 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
430 		  else
431 			$$ = op4($1, (Node *)1, $3, $5, $7); }
432 	| SUBSTR '(' pattern comma pattern comma pattern ')'
433 		{ $$ = op3(SUBSTR, $3, $5, $7); }
434 	| SUBSTR '(' pattern comma pattern ')'
435 		{ $$ = op3(SUBSTR, $3, $5, NIL); }
436 	| var
437 	;
438 
439 var:
440 	  varname
441 	| varname '[' patlist ']'	{ $$ = op2(ARRAY, makearr($1), $3); }
442 	| IVAR				{ $$ = op1(INDIRECT, celltonode($1, CVAR)); }
443 	| INDIRECT term	 		{ $$ = op1(INDIRECT, $2); }
444 	;
445 
446 varlist:
447 	  /* nothing */		{ arglist = $$ = 0; }
448 	| VAR			{ arglist = $$ = celltonode($1,CVAR); }
449 	| varlist comma VAR	{
450 			checkdup($1, $3);
451 			arglist = $$ = linkum($1,celltonode($3,CVAR)); }
452 	;
453 
454 varname:
455 	  VAR			{ $$ = celltonode($1, CVAR); }
456 	| ARG 			{ $$ = op1(ARG, itonp($1)); }
457 	| VARNF			{ $$ = op1(VARNF, (Node *) $1); }
458 	;
459 
460 
461 while:
462 	  WHILE '(' pattern rparen	{ $$ = notnull($3); }
463 	;
464 
465 %%
466 
467 void setfname(Cell *p)
468 {
469 	if (isarr(p))
470 		SYNTAX("%s is an array, not a function", p->nval);
471 	else if (isfcn(p))
472 		SYNTAX("you can't define function %s more than once", p->nval);
473 	curfname = p->nval;
474 }
475 
476 int constnode(Node *p)
477 {
478 	return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
479 }
480 
481 char *strnode(Node *p)
482 {
483 	return ((Cell *)(p->narg[0]))->sval;
484 }
485 
486 Node *notnull(Node *n)
487 {
488 	switch (n->nobj) {
489 	case LE: case LT: case EQ: case NE: case GT: case GE:
490 	case BOR: case AND: case NOT:
491 		return n;
492 	default:
493 		return op2(NE, n, nullnode);
494 	}
495 }
496 
497 void checkdup(Node *vl, Cell *cp)	/* check if name already in list */
498 {
499 	char *s = cp->nval;
500 	for ( ; vl; vl = vl->nnext) {
501 		if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
502 			SYNTAX("duplicate argument %s", s);
503 			break;
504 		}
505 	}
506 }
507