1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 %{ 26 #if HAVE_NBTOOL_CONFIG_H 27 #include "nbtool_config.h" 28 #endif 29 30 #include <stdio.h> 31 #include <string.h> 32 #include "awk.h" 33 34 void checkdup(Node *list, Cell *item); 35 int yywrap(void) { return(1); } 36 37 Node *beginloc = 0; 38 Node *endloc = 0; 39 int infunc = 0; /* = 1 if in arglist or body of func */ 40 int inloop = 0; /* = 1 if in while, for, do */ 41 char *curfname = 0; /* current function name */ 42 Node *arglist = 0; /* list of args for current function */ 43 %} 44 45 %union { 46 Node *p; 47 Cell *cp; 48 int i; 49 char *s; 50 } 51 52 %token <i> FIRSTTOKEN /* must be first */ 53 %token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND 54 %token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']' 55 %token <i> ARRAY 56 %token <i> MATCH NOTMATCH MATCHOP 57 %token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE 58 %token <i> AND BOR APPEND EQ GE GT LE LT NE IN 59 %token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC 60 %token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE 61 %token <i> ADD MINUS MULT DIVIDE MOD 62 %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ 63 %token <i> PRINT PRINTF SPRINTF 64 %token <p> ELSE INTEST CONDEXPR 65 %token <i> POSTINCR PREINCR POSTDECR PREDECR 66 %token <cp> VAR IVAR VARNF CALL NUMBER STRING 67 %token <s> REGEXPR 68 69 %type <p> pas pattern ppattern plist pplist patlist prarg term re 70 %type <p> pa_pat pa_stat pa_stats 71 %type <s> reg_expr 72 %type <p> simple_stmt opt_simple_stmt stmt stmtlist 73 %type <p> var varname funcname varlist 74 %type <p> for if else while 75 %type <i> do st 76 %type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor 77 %type <i> subop print 78 79 %right ASGNOP 80 %right '?' 81 %right ':' 82 %left BOR 83 %left AND 84 %left GETLINE 85 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|' 86 %left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC 87 %left GENSUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER 88 %left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR 89 %left REGEXPR VAR VARNF IVAR WHILE '(' 90 %left CAT 91 %left '+' '-' 92 %left '*' '/' '%' 93 %left NOT UMINUS 94 %right POWER 95 %right DECR INCR 96 %left INDIRECT 97 %token LASTTOKEN /* must be last */ 98 99 %% 100 101 program: 102 pas { if (errorflag==0) 103 winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); } 104 | error { yyclearin; bracecheck(); SYNTAX("bailing out"); } 105 ; 106 107 and: 108 AND | and NL 109 ; 110 111 bor: 112 BOR | bor NL 113 ; 114 115 comma: 116 ',' | comma NL 117 ; 118 119 do: 120 DO | do NL 121 ; 122 123 else: 124 ELSE | else NL 125 ; 126 127 for: 128 FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt 129 { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); } 130 | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt 131 { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); } 132 | FOR '(' varname IN varname rparen {inloop++;} stmt 133 { --inloop; $$ = stat3(IN, $3, makearr($5), $8); } 134 ; 135 136 funcname: 137 VAR { setfname($1); } 138 | CALL { setfname($1); } 139 ; 140 141 if: 142 IF '(' pattern rparen { $$ = notnull($3); } 143 ; 144 145 lbrace: 146 '{' | lbrace NL 147 ; 148 149 nl: 150 NL | nl NL 151 ; 152 153 opt_nl: 154 /* empty */ { $$ = 0; } 155 | nl 156 ; 157 158 opt_pst: 159 /* empty */ { $$ = 0; } 160 | pst 161 ; 162 163 164 opt_simple_stmt: 165 /* empty */ { $$ = 0; } 166 | simple_stmt 167 ; 168 169 pas: 170 opt_pst { $$ = 0; } 171 | opt_pst pa_stats opt_pst { $$ = $2; } 172 ; 173 174 pa_pat: 175 pattern { $$ = notnull($1); } 176 ; 177 178 pa_stat: 179 pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); } 180 | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); } 181 | pa_pat ',' pa_pat { $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); } 182 | pa_pat ',' pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $3, $5); } 183 | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); } 184 | XBEGIN lbrace stmtlist '}' 185 { beginloc = linkum(beginloc, $3); $$ = 0; } 186 | XEND lbrace stmtlist '}' 187 { endloc = linkum(endloc, $3); $$ = 0; } 188 | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}' 189 { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; } 190 ; 191 192 pa_stats: 193 pa_stat 194 | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); } 195 ; 196 197 patlist: 198 pattern 199 | patlist comma pattern { $$ = linkum($1, $3); } 200 ; 201 202 ppattern: 203 var ASGNOP ppattern { $$ = op2($2, $1, $3); } 204 | ppattern '?' ppattern ':' ppattern %prec '?' 205 { $$ = op3(CONDEXPR, notnull($1), $3, $5); } 206 | ppattern bor ppattern %prec BOR 207 { $$ = op2(BOR, notnull($1), notnull($3)); } 208 | ppattern and ppattern %prec AND 209 { $$ = op2(AND, notnull($1), notnull($3)); } 210 | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } 211 | ppattern MATCHOP ppattern 212 { if (constnode($3)) 213 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); 214 else 215 $$ = op3($2, (Node *)1, $1, $3); } 216 | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } 217 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } 218 | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); } 219 | re 220 | term 221 ; 222 223 pattern: 224 var ASGNOP pattern { $$ = op2($2, $1, $3); } 225 | pattern '?' pattern ':' pattern %prec '?' 226 { $$ = op3(CONDEXPR, notnull($1), $3, $5); } 227 | pattern bor pattern %prec BOR 228 { $$ = op2(BOR, notnull($1), notnull($3)); } 229 | pattern and pattern %prec AND 230 { $$ = op2(AND, notnull($1), notnull($3)); } 231 | pattern EQ pattern { $$ = op2($2, $1, $3); } 232 | pattern GE pattern { $$ = op2($2, $1, $3); } 233 | pattern GT pattern { $$ = op2($2, $1, $3); } 234 | pattern LE pattern { $$ = op2($2, $1, $3); } 235 | pattern LT pattern { $$ = op2($2, $1, $3); } 236 | pattern NE pattern { $$ = op2($2, $1, $3); } 237 | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } 238 | pattern MATCHOP pattern 239 { if (constnode($3)) 240 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); 241 else 242 $$ = op3($2, (Node *)1, $1, $3); } 243 | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } 244 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } 245 | pattern '|' GETLINE var { 246 if (safe) SYNTAX("cmd | getline is unsafe"); 247 else $$ = op3(GETLINE, $4, itonp($2), $1); } 248 | pattern '|' GETLINE { 249 if (safe) SYNTAX("cmd | getline is unsafe"); 250 else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); } 251 | pattern term %prec CAT { $$ = op2(CAT, $1, $2); } 252 | re 253 | term 254 ; 255 256 plist: 257 pattern comma pattern { $$ = linkum($1, $3); } 258 | plist comma pattern { $$ = linkum($1, $3); } 259 ; 260 261 pplist: 262 ppattern 263 | pplist comma ppattern { $$ = linkum($1, $3); } 264 ; 265 266 prarg: 267 /* empty */ { $$ = rectonode(); } 268 | pplist 269 | '(' plist ')' { $$ = $2; } 270 ; 271 272 print: 273 PRINT | PRINTF 274 ; 275 276 pst: 277 NL | ';' | pst NL | pst ';' 278 ; 279 280 rbrace: 281 '}' | rbrace NL 282 ; 283 284 re: 285 reg_expr 286 { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); } 287 | NOT re { $$ = op1(NOT, notnull($2)); } 288 ; 289 290 reg_expr: 291 '/' {startreg();} REGEXPR '/' { $$ = $3; } 292 ; 293 294 rparen: 295 ')' | rparen NL 296 ; 297 298 simple_stmt: 299 print prarg '|' term { 300 if (safe) SYNTAX("print | is unsafe"); 301 else $$ = stat3($1, $2, itonp($3), $4); } 302 | print prarg APPEND term { 303 if (safe) SYNTAX("print >> is unsafe"); 304 else $$ = stat3($1, $2, itonp($3), $4); } 305 | print prarg GT term { 306 if (safe) SYNTAX("print > is unsafe"); 307 else $$ = stat3($1, $2, itonp($3), $4); } 308 | print prarg { $$ = stat3($1, $2, NIL, NIL); } 309 | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); } 310 | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); } 311 | pattern { $$ = exptostat($1); } 312 | error { yyclearin; SYNTAX("illegal statement"); } 313 ; 314 315 st: 316 nl 317 | ';' opt_nl 318 ; 319 320 stmt: 321 BREAK st { if (!inloop) SYNTAX("break illegal outside of loops"); 322 $$ = stat1(BREAK, NIL); } 323 | CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops"); 324 $$ = stat1(CONTINUE, NIL); } 325 | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st 326 { $$ = stat2(DO, $3, notnull($7)); } 327 | EXIT pattern st { $$ = stat1(EXIT, $2); } 328 | EXIT st { $$ = stat1(EXIT, NIL); } 329 | for 330 | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); } 331 | if stmt { $$ = stat3(IF, $1, $2, NIL); } 332 | lbrace stmtlist rbrace { $$ = $2; } 333 | NEXT st { if (infunc) 334 SYNTAX("next is illegal inside a function"); 335 $$ = stat1(NEXT, NIL); } 336 | NEXTFILE st { if (infunc) 337 SYNTAX("nextfile is illegal inside a function"); 338 $$ = stat1(NEXTFILE, NIL); } 339 | RETURN pattern st { $$ = stat1(RETURN, $2); } 340 | RETURN st { $$ = stat1(RETURN, NIL); } 341 | simple_stmt st 342 | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); } 343 | ';' opt_nl { $$ = 0; } 344 ; 345 346 stmtlist: 347 stmt 348 | stmtlist stmt { $$ = linkum($1, $2); } 349 ; 350 351 subop: 352 SUB | GSUB 353 ; 354 355 term: 356 term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); } 357 | term '+' term { $$ = op2(ADD, $1, $3); } 358 | term '-' term { $$ = op2(MINUS, $1, $3); } 359 | term '*' term { $$ = op2(MULT, $1, $3); } 360 | term '/' term { $$ = op2(DIVIDE, $1, $3); } 361 | term '%' term { $$ = op2(MOD, $1, $3); } 362 | term POWER term { $$ = op2(POWER, $1, $3); } 363 | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); } 364 | '+' term %prec UMINUS { $$ = $2; } 365 | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); } 366 | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); } 367 | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); } 368 | BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); } 369 | CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); } 370 | CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); } 371 | CLOSE term { $$ = op1(CLOSE, $2); } 372 | DECR var { $$ = op1(PREDECR, $2); } 373 | INCR var { $$ = op1(PREINCR, $2); } 374 | var DECR { $$ = op1(POSTDECR, $1); } 375 | var INCR { $$ = op1(POSTINCR, $1); } 376 | GENSUB '(' reg_expr comma pattern comma pattern ')' 377 { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); } 378 | GENSUB '(' pattern comma pattern comma pattern ')' 379 { if (constnode($3)) 380 $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode()); 381 else 382 $$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode()); 383 } 384 | GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')' 385 { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); } 386 | GENSUB '(' pattern comma pattern comma pattern comma pattern ')' 387 { if (constnode($3)) 388 $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9); 389 else 390 $$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9); 391 } 392 | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); } 393 | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); } 394 | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); } 395 | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); } 396 | INDEX '(' pattern comma pattern ')' 397 { $$ = op2(INDEX, $3, $5); } 398 | INDEX '(' pattern comma reg_expr ')' 399 { SYNTAX("index() doesn't permit regular expressions"); 400 $$ = op2(INDEX, $3, (Node*)$5); } 401 | '(' pattern ')' { $$ = $2; } 402 | MATCHFCN '(' pattern comma reg_expr ')' 403 { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); } 404 | MATCHFCN '(' pattern comma pattern ')' 405 { if (constnode($5)) 406 $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1)); 407 else 408 $$ = op3(MATCHFCN, (Node *)1, $3, $5); } 409 | NUMBER { $$ = celltonode($1, CCON); } 410 | SPLIT '(' pattern comma varname comma pattern ')' /* string */ 411 { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); } 412 | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */ 413 { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); } 414 | SPLIT '(' pattern comma varname ')' 415 { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */ 416 | SPRINTF '(' patlist ')' { $$ = op1($1, $3); } 417 | STRING { $$ = celltonode($1, CCON); } 418 | subop '(' reg_expr comma pattern ')' 419 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); } 420 | subop '(' pattern comma pattern ')' 421 { if (constnode($3)) 422 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode()); 423 else 424 $$ = op4($1, (Node *)1, $3, $5, rectonode()); } 425 | subop '(' reg_expr comma pattern comma var ')' 426 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); } 427 | subop '(' pattern comma pattern comma var ')' 428 { if (constnode($3)) 429 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7); 430 else 431 $$ = op4($1, (Node *)1, $3, $5, $7); } 432 | SUBSTR '(' pattern comma pattern comma pattern ')' 433 { $$ = op3(SUBSTR, $3, $5, $7); } 434 | SUBSTR '(' pattern comma pattern ')' 435 { $$ = op3(SUBSTR, $3, $5, NIL); } 436 | var 437 ; 438 439 var: 440 varname 441 | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); } 442 | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); } 443 | INDIRECT term { $$ = op1(INDIRECT, $2); } 444 ; 445 446 varlist: 447 /* nothing */ { arglist = $$ = 0; } 448 | VAR { arglist = $$ = celltonode($1,CVAR); } 449 | varlist comma VAR { 450 checkdup($1, $3); 451 arglist = $$ = linkum($1,celltonode($3,CVAR)); } 452 ; 453 454 varname: 455 VAR { $$ = celltonode($1, CVAR); } 456 | ARG { $$ = op1(ARG, itonp($1)); } 457 | VARNF { $$ = op1(VARNF, (Node *) $1); } 458 ; 459 460 461 while: 462 WHILE '(' pattern rparen { $$ = notnull($3); } 463 ; 464 465 %% 466 467 void setfname(Cell *p) 468 { 469 if (isarr(p)) 470 SYNTAX("%s is an array, not a function", p->nval); 471 else if (isfcn(p)) 472 SYNTAX("you can't define function %s more than once", p->nval); 473 curfname = p->nval; 474 } 475 476 int constnode(Node *p) 477 { 478 return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON; 479 } 480 481 char *strnode(Node *p) 482 { 483 return ((Cell *)(p->narg[0]))->sval; 484 } 485 486 Node *notnull(Node *n) 487 { 488 switch (n->nobj) { 489 case LE: case LT: case EQ: case NE: case GT: case GE: 490 case BOR: case AND: case NOT: 491 return n; 492 default: 493 return op2(NE, n, nullnode); 494 } 495 } 496 497 void checkdup(Node *vl, Cell *cp) /* check if name already in list */ 498 { 499 char *s = cp->nval; 500 for ( ; vl; vl = vl->nnext) { 501 if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) { 502 SYNTAX("duplicate argument %s", s); 503 break; 504 } 505 } 506 } 507