1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 %{ 26 #if HAVE_NBTOOL_CONFIG_H 27 #include "nbtool_config.h" 28 #endif 29 30 #include <stdio.h> 31 #include <string.h> 32 #include "awk.h" 33 34 void checkdup(Node *list, Cell *item); 35 int yywrap(void) { return(1); } 36 37 Node *beginloc = 0; 38 Node *endloc = 0; 39 bool infunc = false; /* = true if in arglist or body of func */ 40 int inloop = 0; /* >= 1 if in while, for, do; can't be bool, since loops can next */ 41 char *curfname = 0; /* current function name */ 42 Node *arglist = 0; /* list of args for current function */ 43 %} 44 45 %union { 46 Node *p; 47 Cell *cp; 48 int i; 49 char *s; 50 } 51 52 %token <i> FIRSTTOKEN /* must be first */ 53 %token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND 54 %token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']' 55 %token <i> ARRAY 56 %token <i> MATCH NOTMATCH MATCHOP 57 %token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO 58 %token <i> AND BOR APPEND EQ GE GT LE LT NE IN 59 %token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC 60 %token <i> GENSUB SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE 61 %token <i> ADD MINUS MULT DIVIDE MOD 62 %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ 63 %token <i> PRINT PRINTF SPRINTF 64 %token <p> ELSE INTEST CONDEXPR 65 %token <i> POSTINCR PREINCR POSTDECR PREDECR 66 %token <cp> VAR IVAR VARNF CALL NUMBER STRING 67 %token <s> REGEXPR 68 69 %type <p> pas pattern ppattern plist pplist patlist prarg term re 70 %type <p> pa_pat pa_stat pa_stats 71 %type <s> reg_expr 72 %type <p> simple_stmt opt_simple_stmt stmt stmtlist 73 %type <p> var varname funcname varlist 74 %type <p> for if else while 75 %type <i> do st 76 %type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor 77 %type <i> subop print 78 %type <cp> string 79 80 %right ASGNOP 81 %right '?' 82 %right ':' 83 %left BOR 84 %left AND 85 %left GETLINE 86 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|' 87 %left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC 88 %left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER 89 %left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR 90 %left REGEXPR VAR VARNF IVAR WHILE '(' 91 %left CAT 92 %left '+' '-' 93 %left '*' '/' '%' 94 %left NOT UMINUS UPLUS 95 %right POWER 96 %right DECR INCR 97 %left INDIRECT 98 %token LASTTOKEN /* must be last */ 99 100 %% 101 102 program: 103 pas { if (errorflag==0) 104 winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); } 105 | error { yyclearin; bracecheck(); SYNTAX("bailing out"); } 106 ; 107 108 and: 109 AND | and NL 110 ; 111 112 bor: 113 BOR | bor NL 114 ; 115 116 comma: 117 ',' | comma NL 118 ; 119 120 do: 121 DO | do NL 122 ; 123 124 else: 125 ELSE | else NL 126 ; 127 128 for: 129 FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt 130 { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); } 131 | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt 132 { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); } 133 | FOR '(' varname IN varname rparen {inloop++;} stmt 134 { --inloop; $$ = stat3(IN, $3, makearr($5), $8); } 135 ; 136 137 funcname: 138 VAR { setfname($1); } 139 | CALL { setfname($1); } 140 ; 141 142 if: 143 IF '(' pattern rparen { $$ = notnull($3); } 144 ; 145 146 lbrace: 147 '{' | lbrace NL 148 ; 149 150 nl: 151 NL | nl NL 152 ; 153 154 opt_nl: 155 /* empty */ { $$ = 0; } 156 | nl 157 ; 158 159 opt_pst: 160 /* empty */ { $$ = 0; } 161 | pst 162 ; 163 164 165 opt_simple_stmt: 166 /* empty */ { $$ = 0; } 167 | simple_stmt 168 ; 169 170 pas: 171 opt_pst { $$ = 0; } 172 | opt_pst pa_stats opt_pst { $$ = $2; } 173 ; 174 175 pa_pat: 176 pattern { $$ = notnull($1); } 177 ; 178 179 pa_stat: 180 pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); } 181 | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); } 182 | pa_pat ',' opt_nl pa_pat { $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); } 183 | pa_pat ',' opt_nl pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $4, $6); } 184 | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); } 185 | XBEGIN lbrace stmtlist '}' 186 { beginloc = linkum(beginloc, $3); $$ = 0; } 187 | XEND lbrace stmtlist '}' 188 { endloc = linkum(endloc, $3); $$ = 0; } 189 | FUNC funcname '(' varlist rparen {infunc = true;} lbrace stmtlist '}' 190 { infunc = false; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; } 191 ; 192 193 pa_stats: 194 pa_stat 195 | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); } 196 ; 197 198 patlist: 199 pattern 200 | patlist comma pattern { $$ = linkum($1, $3); } 201 ; 202 203 ppattern: 204 var ASGNOP ppattern { $$ = op2($2, $1, $3); } 205 | ppattern '?' ppattern ':' ppattern %prec '?' 206 { $$ = op3(CONDEXPR, notnull($1), $3, $5); } 207 | ppattern bor ppattern %prec BOR 208 { $$ = op2(BOR, notnull($1), notnull($3)); } 209 | ppattern and ppattern %prec AND 210 { $$ = op2(AND, notnull($1), notnull($3)); } 211 | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); free($3); } 212 | ppattern MATCHOP ppattern 213 { if (constnode($3)) { 214 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); 215 free($3); 216 } else 217 $$ = op3($2, (Node *)1, $1, $3); } 218 | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } 219 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } 220 | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); } 221 | re 222 | term 223 ; 224 225 pattern: 226 var ASGNOP pattern { $$ = op2($2, $1, $3); } 227 | pattern '?' pattern ':' pattern %prec '?' 228 { $$ = op3(CONDEXPR, notnull($1), $3, $5); } 229 | pattern bor pattern %prec BOR 230 { $$ = op2(BOR, notnull($1), notnull($3)); } 231 | pattern and pattern %prec AND 232 { $$ = op2(AND, notnull($1), notnull($3)); } 233 | pattern EQ pattern { $$ = op2($2, $1, $3); } 234 | pattern GE pattern { $$ = op2($2, $1, $3); } 235 | pattern GT pattern { $$ = op2($2, $1, $3); } 236 | pattern LE pattern { $$ = op2($2, $1, $3); } 237 | pattern LT pattern { $$ = op2($2, $1, $3); } 238 | pattern NE pattern { $$ = op2($2, $1, $3); } 239 | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); free($3); } 240 | pattern MATCHOP pattern 241 { if (constnode($3)) { 242 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); 243 free($3); 244 } else 245 $$ = op3($2, (Node *)1, $1, $3); } 246 | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } 247 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } 248 | pattern '|' GETLINE var { 249 if (safe) SYNTAX("cmd | getline is unsafe"); 250 else $$ = op3(GETLINE, $4, itonp($2), $1); } 251 | pattern '|' GETLINE { 252 if (safe) SYNTAX("cmd | getline is unsafe"); 253 else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); } 254 | pattern term %prec CAT { $$ = op2(CAT, $1, $2); } 255 | re 256 | term 257 ; 258 259 plist: 260 pattern comma pattern { $$ = linkum($1, $3); } 261 | plist comma pattern { $$ = linkum($1, $3); } 262 ; 263 264 pplist: 265 ppattern 266 | pplist comma ppattern { $$ = linkum($1, $3); } 267 ; 268 269 prarg: 270 /* empty */ { $$ = rectonode(); } 271 | pplist 272 | '(' plist ')' { $$ = $2; } 273 ; 274 275 print: 276 PRINT | PRINTF 277 ; 278 279 pst: 280 NL | ';' | pst NL | pst ';' 281 ; 282 283 rbrace: 284 '}' | rbrace NL 285 ; 286 287 re: 288 reg_expr 289 { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); free($1); } 290 | NOT re { $$ = op1(NOT, notnull($2)); } 291 ; 292 293 reg_expr: 294 '/' {startreg();} REGEXPR '/' { $$ = $3; } 295 ; 296 297 rparen: 298 ')' | rparen NL 299 ; 300 301 simple_stmt: 302 print prarg '|' term { 303 if (safe) SYNTAX("print | is unsafe"); 304 else $$ = stat3($1, $2, itonp($3), $4); } 305 | print prarg APPEND term { 306 if (safe) SYNTAX("print >> is unsafe"); 307 else $$ = stat3($1, $2, itonp($3), $4); } 308 | print prarg GT term { 309 if (safe) SYNTAX("print > is unsafe"); 310 else $$ = stat3($1, $2, itonp($3), $4); } 311 | print prarg { $$ = stat3($1, $2, NIL, NIL); } 312 | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); } 313 | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); } 314 | pattern { $$ = exptostat($1); } 315 | error { yyclearin; SYNTAX("illegal statement"); } 316 ; 317 318 st: 319 nl 320 | ';' opt_nl 321 ; 322 323 stmt: 324 BREAK st { if (!inloop) SYNTAX("break illegal outside of loops"); 325 $$ = stat1(BREAK, NIL); } 326 | CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops"); 327 $$ = stat1(CONTINUE, NIL); } 328 | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st 329 { $$ = stat2(DO, $3, notnull($7)); } 330 | EXIT pattern st { $$ = stat1(EXIT, $2); } 331 | EXIT st { $$ = stat1(EXIT, NIL); } 332 | for 333 | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); } 334 | if stmt { $$ = stat3(IF, $1, $2, NIL); } 335 | lbrace stmtlist rbrace { $$ = $2; } 336 | NEXT st { if (infunc) 337 SYNTAX("next is illegal inside a function"); 338 $$ = stat1(NEXT, NIL); } 339 | NEXTFILE st { if (infunc) 340 SYNTAX("nextfile is illegal inside a function"); 341 $$ = stat1(NEXTFILE, NIL); } 342 | RETURN pattern st { $$ = stat1(RETURN, $2); } 343 | RETURN st { $$ = stat1(RETURN, NIL); } 344 | simple_stmt st 345 | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); } 346 | ';' opt_nl { $$ = 0; } 347 ; 348 349 stmtlist: 350 stmt 351 | stmtlist stmt { $$ = linkum($1, $2); } 352 ; 353 354 subop: 355 SUB | GSUB 356 ; 357 358 string: 359 STRING 360 | string STRING { $$ = catstr($1, $2); } 361 ; 362 363 term: 364 term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); } 365 | term '+' term { $$ = op2(ADD, $1, $3); } 366 | term '-' term { $$ = op2(MINUS, $1, $3); } 367 | term '*' term { $$ = op2(MULT, $1, $3); } 368 | term '/' term { $$ = op2(DIVIDE, $1, $3); } 369 | term '%' term { $$ = op2(MOD, $1, $3); } 370 | term POWER term { $$ = op2(POWER, $1, $3); } 371 | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); } 372 | '+' term %prec UMINUS { $$ = op1(UPLUS, $2); } 373 | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); } 374 | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); } 375 | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); } 376 | BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); } 377 | CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); } 378 | CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); } 379 | CLOSE term { $$ = op1(CLOSE, $2); } 380 | DECR var { $$ = op1(PREDECR, $2); } 381 | INCR var { $$ = op1(PREINCR, $2); } 382 | var DECR { $$ = op1(POSTDECR, $1); } 383 | var INCR { $$ = op1(POSTINCR, $1); } 384 | GENSUB '(' reg_expr comma pattern comma pattern ')' 385 { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); } 386 | GENSUB '(' pattern comma pattern comma pattern ')' 387 { if (constnode($3)) { 388 $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode()); 389 free($3); 390 } else 391 $$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode()); 392 } 393 | GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')' 394 { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); } 395 | GENSUB '(' pattern comma pattern comma pattern comma pattern ')' 396 { if (constnode($3)) { 397 $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9); 398 free($3); 399 } else 400 $$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9); 401 } 402 | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); } 403 | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); } 404 | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); } 405 | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); } 406 | INDEX '(' pattern comma pattern ')' 407 { $$ = op2(INDEX, $3, $5); } 408 | INDEX '(' pattern comma reg_expr ')' 409 { SYNTAX("index() doesn't permit regular expressions"); 410 $$ = op2(INDEX, $3, (Node*)$5); } 411 | '(' pattern ')' { $$ = $2; } 412 | MATCHFCN '(' pattern comma reg_expr ')' 413 { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); free($5); } 414 | MATCHFCN '(' pattern comma pattern ')' 415 { if (constnode($5)) { 416 $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1)); 417 free($5); 418 } else 419 $$ = op3(MATCHFCN, (Node *)1, $3, $5); } 420 | NUMBER { $$ = celltonode($1, CCON); } 421 | SPLIT '(' pattern comma varname comma pattern ')' /* string */ 422 { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); } 423 | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */ 424 { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); free($7); } 425 | SPLIT '(' pattern comma varname ')' 426 { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */ 427 | SPRINTF '(' patlist ')' { $$ = op1($1, $3); } 428 | string { $$ = celltonode($1, CCON); } 429 | subop '(' reg_expr comma pattern ')' 430 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); free($3); } 431 | subop '(' pattern comma pattern ')' 432 { if (constnode($3)) { 433 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode()); 434 free($3); 435 } else 436 $$ = op4($1, (Node *)1, $3, $5, rectonode()); } 437 | subop '(' reg_expr comma pattern comma var ')' 438 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); free($3); } 439 | subop '(' pattern comma pattern comma var ')' 440 { if (constnode($3)) { 441 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7); 442 free($3); 443 } else 444 $$ = op4($1, (Node *)1, $3, $5, $7); } 445 | SUBSTR '(' pattern comma pattern comma pattern ')' 446 { $$ = op3(SUBSTR, $3, $5, $7); } 447 | SUBSTR '(' pattern comma pattern ')' 448 { $$ = op3(SUBSTR, $3, $5, NIL); } 449 | var 450 ; 451 452 var: 453 varname 454 | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); } 455 | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); } 456 | INDIRECT term { $$ = op1(INDIRECT, $2); } 457 ; 458 459 varlist: 460 /* nothing */ { arglist = $$ = 0; } 461 | VAR { arglist = $$ = celltonode($1,CVAR); } 462 | varlist comma VAR { 463 checkdup($1, $3); 464 arglist = $$ = linkum($1,celltonode($3,CVAR)); } 465 ; 466 467 varname: 468 VAR { $$ = celltonode($1, CVAR); } 469 | ARG { $$ = op1(ARG, itonp($1)); } 470 | VARNF { $$ = op1(VARNF, (Node *) $1); } 471 ; 472 473 474 while: 475 WHILE '(' pattern rparen { $$ = notnull($3); } 476 ; 477 478 %% 479 480 void setfname(Cell *p) 481 { 482 if (isarr(p)) 483 SYNTAX("%s is an array, not a function", p->nval); 484 else if (isfcn(p)) 485 SYNTAX("you can't define function %s more than once", p->nval); 486 curfname = p->nval; 487 } 488 489 int constnode(Node *p) 490 { 491 return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON; 492 } 493 494 char *strnode(Node *p) 495 { 496 return ((Cell *)(p->narg[0]))->sval; 497 } 498 499 Node *notnull(Node *n) 500 { 501 switch (n->nobj) { 502 case LE: case LT: case EQ: case NE: case GT: case GE: 503 case BOR: case AND: case NOT: 504 return n; 505 default: 506 return op2(NE, n, nullnode); 507 } 508 } 509 510 void checkdup(Node *vl, Cell *cp) /* check if name already in list */ 511 { 512 char *s = cp->nval; 513 for ( ; vl; vl = vl->nnext) { 514 if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) { 515 SYNTAX("duplicate argument %s", s); 516 break; 517 } 518 } 519 } 520