1 /* $OpenBSD: tran.c,v 1.6 1999/12/08 23:09:46 millert Exp $ */ 2 /**************************************************************** 3 Copyright (C) Lucent Technologies 1997 4 All Rights Reserved 5 6 Permission to use, copy, modify, and distribute this software and 7 its documentation for any purpose and without fee is hereby 8 granted, provided that the above copyright notice appear in all 9 copies and that both that the copyright notice and this 10 permission notice and warranty disclaimer appear in supporting 11 documentation, and that the name Lucent Technologies or any of 12 its entities not be used in advertising or publicity pertaining 13 to distribution of the software without specific, written prior 14 permission. 15 16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 23 THIS SOFTWARE. 24 ****************************************************************/ 25 26 #define DEBUG 27 #include <stdio.h> 28 #include <math.h> 29 #include <ctype.h> 30 #include <string.h> 31 #include <stdlib.h> 32 #include "awk.h" 33 #include "ytab.h" 34 35 #define FULLTAB 2 /* rehash when table gets this x full */ 36 #define GROWTAB 4 /* grow table by this factor */ 37 38 Array *symtab; /* main symbol table */ 39 40 char **FS; /* initial field sep */ 41 char **RS; /* initial record sep */ 42 char **OFS; /* output field sep */ 43 char **ORS; /* output record sep */ 44 char **OFMT; /* output format for numbers */ 45 char **CONVFMT; /* format for conversions in getsval */ 46 Awkfloat *NF; /* number of fields in current record */ 47 Awkfloat *NR; /* number of current record */ 48 Awkfloat *FNR; /* number of current record in current file */ 49 char **FILENAME; /* current filename argument */ 50 Awkfloat *ARGC; /* number of arguments from command line */ 51 char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */ 52 Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */ 53 Awkfloat *RLENGTH; /* length of same */ 54 55 Cell *nrloc; /* NR */ 56 Cell *nfloc; /* NF */ 57 Cell *fnrloc; /* FNR */ 58 Array *ARGVtab; /* symbol table containing ARGV[...] */ 59 Array *ENVtab; /* symbol table containing ENVIRON[...] */ 60 Cell *rstartloc; /* RSTART */ 61 Cell *rlengthloc; /* RLENGTH */ 62 Cell *symtabloc; /* SYMTAB */ 63 64 Cell *nullloc; /* a guaranteed empty cell */ 65 Node *nullnode; /* zero&null, converted into a node for comparisons */ 66 Cell *literal0; 67 68 extern Cell **fldtab; 69 70 void syminit(void) /* initialize symbol table with builtin vars */ 71 { 72 literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab); 73 /* this is used for if(x)... tests: */ 74 nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab); 75 nullnode = celltonode(nullloc, CCON); 76 77 FS = &setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab)->sval; 78 RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval; 79 OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval; 80 ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval; 81 OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; 82 CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; 83 FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval; 84 nfloc = setsymtab("NF", "", 0.0, NUM, symtab); 85 NF = &nfloc->fval; 86 nrloc = setsymtab("NR", "", 0.0, NUM, symtab); 87 NR = &nrloc->fval; 88 fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab); 89 FNR = &fnrloc->fval; 90 SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval; 91 rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab); 92 RSTART = &rstartloc->fval; 93 rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab); 94 RLENGTH = &rlengthloc->fval; 95 symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab); 96 symtabloc->sval = (char *) symtab; 97 } 98 99 void arginit(int ac, char **av) /* set up ARGV and ARGC */ 100 { 101 Cell *cp; 102 int i; 103 char temp[50]; 104 105 ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval; 106 cp = setsymtab("ARGV", "", 0.0, ARR, symtab); 107 ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */ 108 cp->sval = (char *) ARGVtab; 109 for (i = 0; i < ac; i++) { 110 sprintf(temp, "%d", i); 111 if (is_number(*av)) 112 setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab); 113 else 114 setsymtab(temp, *av, 0.0, STR, ARGVtab); 115 av++; 116 } 117 } 118 119 void envinit(char **envp) /* set up ENVIRON variable */ 120 { 121 Cell *cp; 122 char *p; 123 124 cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab); 125 ENVtab = makesymtab(NSYMTAB); 126 cp->sval = (char *) ENVtab; 127 for ( ; *envp; envp++) { 128 if ((p = strchr(*envp, '=')) == NULL) 129 continue; 130 *p++ = 0; /* split into two strings at = */ 131 if (is_number(p)) 132 setsymtab(*envp, p, atof(p), STR|NUM, ENVtab); 133 else 134 setsymtab(*envp, p, 0.0, STR, ENVtab); 135 p[-1] = '='; /* restore in case env is passed down to a shell */ 136 } 137 } 138 139 Array *makesymtab(int n) /* make a new symbol table */ 140 { 141 Array *ap; 142 Cell **tp; 143 144 ap = (Array *) malloc(sizeof(Array)); 145 tp = (Cell **) calloc(n, sizeof(Cell *)); 146 if (ap == NULL || tp == NULL) 147 FATAL("out of space in makesymtab"); 148 ap->nelem = 0; 149 ap->size = n; 150 ap->tab = tp; 151 return(ap); 152 } 153 154 void freesymtab(Cell *ap) /* free a symbol table */ 155 { 156 Cell *cp, *temp; 157 Array *tp; 158 int i; 159 160 if (!isarr(ap)) 161 return; 162 tp = (Array *) ap->sval; 163 if (tp == NULL) 164 return; 165 for (i = 0; i < tp->size; i++) { 166 for (cp = tp->tab[i]; cp != NULL; cp = temp) { 167 xfree(cp->nval); 168 if (freeable(cp)) 169 xfree(cp->sval); 170 temp = cp->cnext; /* avoids freeing then using */ 171 free(cp); 172 } 173 tp->tab[i] = 0; 174 } 175 free(tp->tab); 176 free(tp); 177 } 178 179 void freeelem(Cell *ap, char *s) /* free elem s from ap (i.e., ap["s"] */ 180 { 181 Array *tp; 182 Cell *p, *prev = NULL; 183 int h; 184 185 tp = (Array *) ap->sval; 186 h = hash(s, tp->size); 187 for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext) 188 if (strcmp(s, p->nval) == 0) { 189 if (prev == NULL) /* 1st one */ 190 tp->tab[h] = p->cnext; 191 else /* middle somewhere */ 192 prev->cnext = p->cnext; 193 if (freeable(p)) 194 xfree(p->sval); 195 free(p->nval); 196 free(p); 197 tp->nelem--; 198 return; 199 } 200 } 201 202 Cell *setsymtab(char *n, char *s, Awkfloat f, unsigned t, Array *tp) 203 { 204 int h; 205 Cell *p; 206 207 if (n != NULL && (p = lookup(n, tp)) != NULL) { 208 dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n", 209 p, p->nval, p->sval, p->fval, p->tval) ); 210 return(p); 211 } 212 p = (Cell *) malloc(sizeof(Cell)); 213 if (p == NULL) 214 FATAL("out of space for symbol table at %s", n); 215 p->nval = tostring(n); 216 p->sval = s ? tostring(s) : tostring(""); 217 p->fval = f; 218 p->tval = t; 219 p->csub = CUNK; 220 p->ctype = OCELL; 221 tp->nelem++; 222 if (tp->nelem > FULLTAB * tp->size) 223 rehash(tp); 224 h = hash(n, tp->size); 225 p->cnext = tp->tab[h]; 226 tp->tab[h] = p; 227 dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n", 228 p, p->nval, p->sval, p->fval, p->tval) ); 229 return(p); 230 } 231 232 int hash(char *s, int n) /* form hash value for string s */ 233 { 234 unsigned hashval; 235 236 for (hashval = 0; *s != '\0'; s++) 237 hashval = (*s + 31 * hashval); 238 return hashval % n; 239 } 240 241 void rehash(Array *tp) /* rehash items in small table into big one */ 242 { 243 int i, nh, nsz; 244 Cell *cp, *op, **np; 245 246 nsz = GROWTAB * tp->size; 247 np = (Cell **) calloc(nsz, sizeof(Cell *)); 248 if (np == NULL) /* can't do it, but can keep running. */ 249 return; /* someone else will run out later. */ 250 for (i = 0; i < tp->size; i++) { 251 for (cp = tp->tab[i]; cp; cp = op) { 252 op = cp->cnext; 253 nh = hash(cp->nval, nsz); 254 cp->cnext = np[nh]; 255 np[nh] = cp; 256 } 257 } 258 free(tp->tab); 259 tp->tab = np; 260 tp->size = nsz; 261 } 262 263 Cell *lookup(char *s, Array *tp) /* look for s in tp */ 264 { 265 Cell *p; 266 int h; 267 268 h = hash(s, tp->size); 269 for (p = tp->tab[h]; p != NULL; p = p->cnext) 270 if (strcmp(s, p->nval) == 0) 271 return(p); /* found it */ 272 return(NULL); /* not found */ 273 } 274 275 Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ 276 { 277 int fldno; 278 279 if ((vp->tval & (NUM | STR)) == 0) 280 funnyvar(vp, "assign to"); 281 if (isfld(vp)) { 282 donerec = 0; /* mark $0 invalid */ 283 fldno = atoi(vp->nval); 284 if (fldno > *NF) 285 newfld(fldno); 286 dprintf( ("setting field %d to %g\n", fldno, f) ); 287 } else if (isrec(vp)) { 288 donefld = 0; /* mark $1... invalid */ 289 donerec = 1; 290 } 291 if (freeable(vp)) 292 xfree(vp->sval); /* free any previous string */ 293 vp->tval &= ~STR; /* mark string invalid */ 294 vp->tval |= NUM; /* mark number ok */ 295 dprintf( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) ); 296 return vp->fval = f; 297 } 298 299 void funnyvar(Cell *vp, char *rw) 300 { 301 if (isarr(vp)) 302 FATAL("can't %s %s; it's an array name.", rw, vp->nval); 303 if (vp->tval & FCN) 304 FATAL("can't %s %s; it's a function.", rw, vp->nval); 305 WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o", 306 vp, vp->nval, vp->sval, vp->fval, vp->tval); 307 } 308 309 char *setsval(Cell *vp, char *s) /* set string val of a Cell */ 310 { 311 char *t; 312 int fldno; 313 314 dprintf( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) ); 315 if ((vp->tval & (NUM | STR)) == 0) 316 funnyvar(vp, "assign to"); 317 if (isfld(vp)) { 318 donerec = 0; /* mark $0 invalid */ 319 fldno = atoi(vp->nval); 320 if (fldno > *NF) 321 newfld(fldno); 322 dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) ); 323 } else if (isrec(vp)) { 324 donefld = 0; /* mark $1... invalid */ 325 donerec = 1; 326 } 327 t = tostring(s); /* in case it's self-assign */ 328 vp->tval &= ~NUM; 329 vp->tval |= STR; 330 if (freeable(vp)) 331 xfree(vp->sval); 332 vp->tval &= ~DONTFREE; 333 dprintf( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) ); 334 return(vp->sval = t); 335 } 336 337 Awkfloat getfval(Cell *vp) /* get float val of a Cell */ 338 { 339 if ((vp->tval & (NUM | STR)) == 0) 340 funnyvar(vp, "read value of"); 341 if (isfld(vp) && donefld == 0) 342 fldbld(); 343 else if (isrec(vp) && donerec == 0) 344 recbld(); 345 if (!isnum(vp)) { /* not a number */ 346 vp->fval = atof(vp->sval); /* best guess */ 347 if (is_number(vp->sval) && !(vp->tval&CON)) 348 vp->tval |= NUM; /* make NUM only sparingly */ 349 } 350 dprintf( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) ); 351 return(vp->fval); 352 } 353 354 char *getsval(Cell *vp) /* get string val of a Cell */ 355 { 356 char s[100]; /* BUG: unchecked */ 357 double dtemp; 358 359 if ((vp->tval & (NUM | STR)) == 0) 360 funnyvar(vp, "read value of"); 361 if (isfld(vp) && donefld == 0) 362 fldbld(); 363 else if (isrec(vp) && donerec == 0) 364 recbld(); 365 if (isstr(vp) == 0) { 366 if (freeable(vp)) 367 xfree(vp->sval); 368 if (modf(vp->fval, &dtemp) == 0) /* it's integral */ 369 sprintf(s, "%.30g", vp->fval); 370 else 371 sprintf(s, *CONVFMT, vp->fval); 372 vp->sval = tostring(s); 373 vp->tval &= ~DONTFREE; 374 vp->tval |= STR; 375 } 376 dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) ); 377 return(vp->sval); 378 } 379 380 char *tostring(char *s) /* make a copy of string s */ 381 { 382 char *p; 383 384 p = (char *) malloc(strlen(s)+1); 385 if (p == NULL) 386 FATAL("out of space in tostring on %s", s); 387 strcpy(p, s); 388 return(p); 389 } 390 391 char *qstring(char *s, int delim) /* collect string up to next delim */ 392 { 393 char *os = s; 394 int c, n; 395 char *buf, *bp; 396 397 if ((buf = (char *) malloc(strlen(s)+3)) == NULL) 398 FATAL( "out of space in qstring(%s)", s); 399 for (bp = buf; (c = *s) != delim; s++) { 400 if (c == '\n') 401 SYNTAX( "newline in string %.20s...", os ); 402 else if (c != '\\') 403 *bp++ = c; 404 else { /* \something */ 405 c = *++s; 406 if (c == 0) { /* \ at end */ 407 *bp++ = '\\'; 408 break; /* for loop */ 409 } 410 switch (c) { 411 case '\\': *bp++ = '\\'; break; 412 case 'n': *bp++ = '\n'; break; 413 case 't': *bp++ = '\t'; break; 414 case 'b': *bp++ = '\b'; break; 415 case 'f': *bp++ = '\f'; break; 416 case 'r': *bp++ = '\r'; break; 417 default: 418 if (!isdigit(c)) { 419 *bp++ = c; 420 break; 421 } 422 n = c - '0'; 423 if (isdigit(s[1])) { 424 n = 8 * n + *++s - '0'; 425 if (isdigit(s[1])) 426 n = 8 * n + *++s - '0'; 427 } 428 *bp++ = n; 429 break; 430 } 431 } 432 } 433 *bp++ = 0; 434 return buf; 435 } 436