1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 %{ 26 #if HAVE_NBTOOL_CONFIG_H 27 #include "nbtool_config.h" 28 #endif 29 30 #include <stdio.h> 31 #include <string.h> 32 #include "awk.h" 33 34 void checkdup(Node *list, Cell *item); 35 int yywrap(void) { return(1); } 36 37 Node *beginloc = 0; 38 Node *endloc = 0; 39 int infunc = 0; /* = 1 if in arglist or body of func */ 40 int inloop = 0; /* = 1 if in while, for, do */ 41 char *curfname = 0; /* current function name */ 42 Node *arglist = 0; /* list of args for current function */ 43 %} 44 45 %union { 46 Node *p; 47 Cell *cp; 48 int i; 49 char *s; 50 } 51 52 %token <i> FIRSTTOKEN /* must be first */ 53 %token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND 54 %token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']' 55 %token <i> ARRAY 56 %token <i> MATCH NOTMATCH MATCHOP 57 %token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE 58 %token <i> AND BOR APPEND EQ GE GT LE LT NE IN 59 %token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC 60 %token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE 61 %token <i> ADD MINUS MULT DIVIDE MOD 62 %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ 63 %token <i> PRINT PRINTF SPRINTF 64 %token <p> ELSE INTEST CONDEXPR 65 %token <i> POSTINCR PREINCR POSTDECR PREDECR 66 %token <cp> VAR IVAR VARNF CALL NUMBER STRING 67 %token <s> REGEXPR 68 69 %type <p> pas pattern ppattern plist pplist patlist prarg term re 70 %type <p> pa_pat pa_stat pa_stats 71 %type <s> reg_expr 72 %type <p> simple_stmt opt_simple_stmt stmt stmtlist 73 %type <p> var varname funcname varlist 74 %type <p> for if else while 75 %type <i> do st 76 %type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor 77 %type <i> subop print 78 %type <cp> string 79 80 %right ASGNOP 81 %right '?' 82 %right ':' 83 %left BOR 84 %left AND 85 %left GETLINE 86 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|' 87 %left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC 88 %left GENSUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER 89 %left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR 90 %left REGEXPR VAR VARNF IVAR WHILE '(' 91 %left CAT 92 %left '+' '-' 93 %left '*' '/' '%' 94 %left NOT UMINUS 95 %right POWER 96 %right DECR INCR 97 %left INDIRECT 98 %token LASTTOKEN /* must be last */ 99 100 %% 101 102 program: 103 pas { if (errorflag==0) 104 winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); } 105 | error { yyclearin; bracecheck(); SYNTAX("bailing out"); } 106 ; 107 108 and: 109 AND | and NL 110 ; 111 112 bor: 113 BOR | bor NL 114 ; 115 116 comma: 117 ',' | comma NL 118 ; 119 120 do: 121 DO | do NL 122 ; 123 124 else: 125 ELSE | else NL 126 ; 127 128 for: 129 FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt 130 { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); } 131 | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt 132 { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); } 133 | FOR '(' varname IN varname rparen {inloop++;} stmt 134 { --inloop; $$ = stat3(IN, $3, makearr($5), $8); } 135 ; 136 137 funcname: 138 VAR { setfname($1); } 139 | CALL { setfname($1); } 140 ; 141 142 if: 143 IF '(' pattern rparen { $$ = notnull($3); } 144 ; 145 146 lbrace: 147 '{' | lbrace NL 148 ; 149 150 nl: 151 NL | nl NL 152 ; 153 154 opt_nl: 155 /* empty */ { $$ = 0; } 156 | nl 157 ; 158 159 opt_pst: 160 /* empty */ { $$ = 0; } 161 | pst 162 ; 163 164 165 opt_simple_stmt: 166 /* empty */ { $$ = 0; } 167 | simple_stmt 168 ; 169 170 pas: 171 opt_pst { $$ = 0; } 172 | opt_pst pa_stats opt_pst { $$ = $2; } 173 ; 174 175 pa_pat: 176 pattern { $$ = notnull($1); } 177 ; 178 179 pa_stat: 180 pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); } 181 | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); } 182 | pa_pat ',' pa_pat { $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); } 183 | pa_pat ',' pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $3, $5); } 184 | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); } 185 | XBEGIN lbrace stmtlist '}' 186 { beginloc = linkum(beginloc, $3); $$ = 0; } 187 | XEND lbrace stmtlist '}' 188 { endloc = linkum(endloc, $3); $$ = 0; } 189 | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}' 190 { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; } 191 ; 192 193 pa_stats: 194 pa_stat 195 | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); } 196 ; 197 198 patlist: 199 pattern 200 | patlist comma pattern { $$ = linkum($1, $3); } 201 ; 202 203 ppattern: 204 var ASGNOP ppattern { $$ = op2($2, $1, $3); } 205 | ppattern '?' ppattern ':' ppattern %prec '?' 206 { $$ = op3(CONDEXPR, notnull($1), $3, $5); } 207 | ppattern bor ppattern %prec BOR 208 { $$ = op2(BOR, notnull($1), notnull($3)); } 209 | ppattern and ppattern %prec AND 210 { $$ = op2(AND, notnull($1), notnull($3)); } 211 | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } 212 | ppattern MATCHOP ppattern 213 { if (constnode($3)) 214 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); 215 else 216 $$ = op3($2, (Node *)1, $1, $3); } 217 | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } 218 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } 219 | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); } 220 | re 221 | term 222 ; 223 224 pattern: 225 var ASGNOP pattern { $$ = op2($2, $1, $3); } 226 | pattern '?' pattern ':' pattern %prec '?' 227 { $$ = op3(CONDEXPR, notnull($1), $3, $5); } 228 | pattern bor pattern %prec BOR 229 { $$ = op2(BOR, notnull($1), notnull($3)); } 230 | pattern and pattern %prec AND 231 { $$ = op2(AND, notnull($1), notnull($3)); } 232 | pattern EQ pattern { $$ = op2($2, $1, $3); } 233 | pattern GE pattern { $$ = op2($2, $1, $3); } 234 | pattern GT pattern { $$ = op2($2, $1, $3); } 235 | pattern LE pattern { $$ = op2($2, $1, $3); } 236 | pattern LT pattern { $$ = op2($2, $1, $3); } 237 | pattern NE pattern { $$ = op2($2, $1, $3); } 238 | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } 239 | pattern MATCHOP pattern 240 { if (constnode($3)) 241 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); 242 else 243 $$ = op3($2, (Node *)1, $1, $3); } 244 | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } 245 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } 246 | pattern '|' GETLINE var { 247 if (safe) SYNTAX("cmd | getline is unsafe"); 248 else $$ = op3(GETLINE, $4, itonp($2), $1); } 249 | pattern '|' GETLINE { 250 if (safe) SYNTAX("cmd | getline is unsafe"); 251 else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); } 252 | pattern term %prec CAT { $$ = op2(CAT, $1, $2); } 253 | re 254 | term 255 ; 256 257 plist: 258 pattern comma pattern { $$ = linkum($1, $3); } 259 | plist comma pattern { $$ = linkum($1, $3); } 260 ; 261 262 pplist: 263 ppattern 264 | pplist comma ppattern { $$ = linkum($1, $3); } 265 ; 266 267 prarg: 268 /* empty */ { $$ = rectonode(); } 269 | pplist 270 | '(' plist ')' { $$ = $2; } 271 ; 272 273 print: 274 PRINT | PRINTF 275 ; 276 277 pst: 278 NL | ';' | pst NL | pst ';' 279 ; 280 281 rbrace: 282 '}' | rbrace NL 283 ; 284 285 re: 286 reg_expr 287 { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); } 288 | NOT re { $$ = op1(NOT, notnull($2)); } 289 ; 290 291 reg_expr: 292 '/' {startreg();} REGEXPR '/' { $$ = $3; } 293 ; 294 295 rparen: 296 ')' | rparen NL 297 ; 298 299 simple_stmt: 300 print prarg '|' term { 301 if (safe) SYNTAX("print | is unsafe"); 302 else $$ = stat3($1, $2, itonp($3), $4); } 303 | print prarg APPEND term { 304 if (safe) SYNTAX("print >> is unsafe"); 305 else $$ = stat3($1, $2, itonp($3), $4); } 306 | print prarg GT term { 307 if (safe) SYNTAX("print > is unsafe"); 308 else $$ = stat3($1, $2, itonp($3), $4); } 309 | print prarg { $$ = stat3($1, $2, NIL, NIL); } 310 | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); } 311 | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); } 312 | pattern { $$ = exptostat($1); } 313 | error { yyclearin; SYNTAX("illegal statement"); } 314 ; 315 316 st: 317 nl 318 | ';' opt_nl 319 ; 320 321 stmt: 322 BREAK st { if (!inloop) SYNTAX("break illegal outside of loops"); 323 $$ = stat1(BREAK, NIL); } 324 | CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops"); 325 $$ = stat1(CONTINUE, NIL); } 326 | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st 327 { $$ = stat2(DO, $3, notnull($7)); } 328 | EXIT pattern st { $$ = stat1(EXIT, $2); } 329 | EXIT st { $$ = stat1(EXIT, NIL); } 330 | for 331 | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); } 332 | if stmt { $$ = stat3(IF, $1, $2, NIL); } 333 | lbrace stmtlist rbrace { $$ = $2; } 334 | NEXT st { if (infunc) 335 SYNTAX("next is illegal inside a function"); 336 $$ = stat1(NEXT, NIL); } 337 | NEXTFILE st { if (infunc) 338 SYNTAX("nextfile is illegal inside a function"); 339 $$ = stat1(NEXTFILE, NIL); } 340 | RETURN pattern st { $$ = stat1(RETURN, $2); } 341 | RETURN st { $$ = stat1(RETURN, NIL); } 342 | simple_stmt st 343 | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); } 344 | ';' opt_nl { $$ = 0; } 345 ; 346 347 stmtlist: 348 stmt 349 | stmtlist stmt { $$ = linkum($1, $2); } 350 ; 351 352 subop: 353 SUB | GSUB 354 ; 355 356 string: 357 STRING 358 | string STRING { $$ = catstr($1, $2); } 359 ; 360 361 term: 362 term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); } 363 | term '+' term { $$ = op2(ADD, $1, $3); } 364 | term '-' term { $$ = op2(MINUS, $1, $3); } 365 | term '*' term { $$ = op2(MULT, $1, $3); } 366 | term '/' term { $$ = op2(DIVIDE, $1, $3); } 367 | term '%' term { $$ = op2(MOD, $1, $3); } 368 | term POWER term { $$ = op2(POWER, $1, $3); } 369 | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); } 370 | '+' term %prec UMINUS { $$ = $2; } 371 | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); } 372 | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); } 373 | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); } 374 | BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); } 375 | CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); } 376 | CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); } 377 | CLOSE term { $$ = op1(CLOSE, $2); } 378 | DECR var { $$ = op1(PREDECR, $2); } 379 | INCR var { $$ = op1(PREINCR, $2); } 380 | var DECR { $$ = op1(POSTDECR, $1); } 381 | var INCR { $$ = op1(POSTINCR, $1); } 382 | GENSUB '(' reg_expr comma pattern comma pattern ')' 383 { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); } 384 | GENSUB '(' pattern comma pattern comma pattern ')' 385 { if (constnode($3)) 386 $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode()); 387 else 388 $$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode()); 389 } 390 | GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')' 391 { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); } 392 | GENSUB '(' pattern comma pattern comma pattern comma pattern ')' 393 { if (constnode($3)) 394 $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9); 395 else 396 $$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9); 397 } 398 | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); } 399 | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); } 400 | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); } 401 | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); } 402 | INDEX '(' pattern comma pattern ')' 403 { $$ = op2(INDEX, $3, $5); } 404 | INDEX '(' pattern comma reg_expr ')' 405 { SYNTAX("index() doesn't permit regular expressions"); 406 $$ = op2(INDEX, $3, (Node*)$5); } 407 | '(' pattern ')' { $$ = $2; } 408 | MATCHFCN '(' pattern comma reg_expr ')' 409 { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); } 410 | MATCHFCN '(' pattern comma pattern ')' 411 { if (constnode($5)) 412 $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1)); 413 else 414 $$ = op3(MATCHFCN, (Node *)1, $3, $5); } 415 | NUMBER { $$ = celltonode($1, CCON); } 416 | SPLIT '(' pattern comma varname comma pattern ')' /* string */ 417 { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); } 418 | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */ 419 { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); } 420 | SPLIT '(' pattern comma varname ')' 421 { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */ 422 | SPRINTF '(' patlist ')' { $$ = op1($1, $3); } 423 | string { $$ = celltonode($1, CCON); } 424 | subop '(' reg_expr comma pattern ')' 425 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); } 426 | subop '(' pattern comma pattern ')' 427 { if (constnode($3)) 428 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode()); 429 else 430 $$ = op4($1, (Node *)1, $3, $5, rectonode()); } 431 | subop '(' reg_expr comma pattern comma var ')' 432 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); } 433 | subop '(' pattern comma pattern comma var ')' 434 { if (constnode($3)) 435 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7); 436 else 437 $$ = op4($1, (Node *)1, $3, $5, $7); } 438 | SUBSTR '(' pattern comma pattern comma pattern ')' 439 { $$ = op3(SUBSTR, $3, $5, $7); } 440 | SUBSTR '(' pattern comma pattern ')' 441 { $$ = op3(SUBSTR, $3, $5, NIL); } 442 | var 443 ; 444 445 var: 446 varname 447 | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); } 448 | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); } 449 | INDIRECT term { $$ = op1(INDIRECT, $2); } 450 ; 451 452 varlist: 453 /* nothing */ { arglist = $$ = 0; } 454 | VAR { arglist = $$ = celltonode($1,CVAR); } 455 | varlist comma VAR { 456 checkdup($1, $3); 457 arglist = $$ = linkum($1,celltonode($3,CVAR)); } 458 ; 459 460 varname: 461 VAR { $$ = celltonode($1, CVAR); } 462 | ARG { $$ = op1(ARG, itonp($1)); } 463 | VARNF { $$ = op1(VARNF, (Node *) $1); } 464 ; 465 466 467 while: 468 WHILE '(' pattern rparen { $$ = notnull($3); } 469 ; 470 471 %% 472 473 void setfname(Cell *p) 474 { 475 if (isarr(p)) 476 SYNTAX("%s is an array, not a function", p->nval); 477 else if (isfcn(p)) 478 SYNTAX("you can't define function %s more than once", p->nval); 479 curfname = p->nval; 480 } 481 482 int constnode(Node *p) 483 { 484 return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON; 485 } 486 487 char *strnode(Node *p) 488 { 489 return ((Cell *)(p->narg[0]))->sval; 490 } 491 492 Node *notnull(Node *n) 493 { 494 switch (n->nobj) { 495 case LE: case LT: case EQ: case NE: case GT: case GE: 496 case BOR: case AND: case NOT: 497 return n; 498 default: 499 return op2(NE, n, nullnode); 500 } 501 } 502 503 void checkdup(Node *vl, Cell *cp) /* check if name already in list */ 504 { 505 char *s = cp->nval; 506 for ( ; vl; vl = vl->nnext) { 507 if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) { 508 SYNTAX("duplicate argument %s", s); 509 break; 510 } 511 } 512 } 513