xref: /csrg-svn/contrib/awk.research/lib.c (revision 65390)
1 /****************************************************************
2 Copyright (C) AT&T 1993
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name of AT&T or any of its entities
11 not be used in advertising or publicity pertaining to
12 distribution of the software without specific, written prior
13 permission.
14 
15 AT&T DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL AT&T OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 #define DEBUG
26 #include <stdio.h>
27 #include <string.h>
28 #include <ctype.h>
29 #include <errno.h>
30 #include <stdlib.h>
31 #include "awk.h"
32 #include "y.tab.h"
33 
34 #define	getfval(p)	(((p)->tval & (ARR|FLD|REC|NUM)) == NUM ? (p)->fval : r_getfval(p))
35 #define	getsval(p)	(((p)->tval & (ARR|FLD|REC|STR)) == STR ? (p)->sval : r_getsval(p))
36 
37 FILE	*infile	= NULL;
38 uchar	*file	= (uchar*) "";
39 int	recsize	= RECSIZE;
40 uchar	*recdata;
41 uchar	*record;
42 uchar	*fields;
43 Cell	*fldtab;
44 
45 #define	MAXFLD	200
46 int	nfields	= MAXFLD;	/* can be set from commandline in main */
47 
48 int	donefld;	/* 1 = implies rec broken into fields */
49 int	donerec;	/* 1 = record is valid (no flds have changed) */
50 
51 int	maxfld	= 0;	/* last used field */
52 int	argno	= 1;	/* current input argument number */
53 extern	Awkfloat *ARGC;
54 
55 void recinit(unsigned int n)
56 {
57 	static Cell dollar0 = {
58 	    OCELL, CFLD, (uchar*) "$0", /*recdata*/0, 0.0, REC|STR|DONTFREE };
59 	static Cell dollar1 = {
60 	    OCELL, CFLD, NULL, (uchar*) "", 0.0, FLD|STR|DONTFREE };
61 	int i;
62 
63 	record = recdata = (uchar *) malloc(n);
64 	fields = (uchar *) malloc(n);
65 	fldtab = (Cell *) malloc(nfields * sizeof(Cell));
66 	if (recdata == NULL || fields == NULL || fldtab == NULL)
67 		ERROR "out of space for $0 and fields" FATAL;
68 	fldtab[0] = dollar0;
69 	fldtab[0].sval = recdata;
70 	for (i = 1; i < nfields; i++)
71 		fldtab[i] = dollar1;
72 }
73 
74 void initgetrec(void)
75 {
76 	int i;
77 	uchar *p;
78 
79 	for (i = 1; i < *ARGC; i++) {
80 		if (!isclvar(p = getargv(i))) {	/* find 1st real filename */
81 			setsval(lookup("FILENAME", symtab), getargv(i));
82 			return;
83 		}
84 		setclvar(p);	/* a commandline assignment before filename */
85 		argno++;
86 	}
87 	infile = stdin;		/* no filenames, so use stdin */
88 }
89 
90 getrec(uchar *buf)	/* get next input record from whatever source */
91 {			/* note: tests whether buf == record */
92 	int c;
93 	static int firsttime = 1;
94 
95 	if (firsttime) {
96 		firsttime = 0;
97 		initgetrec();
98 	}
99 	dprintf( ("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
100 		*RS, *FS, *ARGC, *FILENAME) );
101 	donefld = 0;
102 	donerec = 1;
103 	buf[0] = 0;
104 	while (argno < *ARGC || infile == stdin) {
105 		dprintf( ("argno=%d, file=|%s|\n", argno, file) );
106 		if (infile == NULL) {	/* have to open a new file */
107 			file = getargv(argno);
108 			if (*file == '\0') {	/* it's been zapped */
109 				argno++;
110 				continue;
111 			}
112 			if (isclvar(file)) {	/* a var=value arg */
113 				setclvar(file);
114 				argno++;
115 				continue;
116 			}
117 			*FILENAME = file;
118 			dprintf( ("opening file %s\n", file) );
119 			if (*file == '-' && *(file+1) == '\0')
120 				infile = stdin;
121 			else if ((infile = fopen((char *)file, "r")) == NULL)
122 				ERROR "can't open file %s", file FATAL;
123 			setfval(fnrloc, 0.0);
124 		}
125 		c = readrec(buf, recsize, infile);
126 		if (c != 0 || buf[0] != '\0') {	/* normal record */
127 			if (buf == record) {
128 				if (!(recloc->tval & DONTFREE))
129 					xfree(recloc->sval);
130 				recloc->sval = record;
131 				recloc->tval = REC | STR | DONTFREE;
132 				if (isnumber(recloc->sval)) {
133 					recloc->fval = atof(recloc->sval);
134 					recloc->tval |= NUM;
135 				}
136 			}
137 			setfval(nrloc, nrloc->fval+1);
138 			setfval(fnrloc, fnrloc->fval+1);
139 			return 1;
140 		}
141 		/* EOF arrived on this file; set up next */
142 		if (infile != stdin)
143 			fclose(infile);
144 		infile = NULL;
145 		argno++;
146 	}
147 	return 0;	/* true end of file */
148 }
149 
150 readrec(uchar *buf, int bufsize, FILE *inf)	/* read one record into buf */
151 {
152 	register int sep, c;
153 	register uchar *rr;
154 	register int nrr;
155 
156 	if ((sep = **RS) == 0) {
157 		sep = '\n';
158 		while ((c=getc(inf)) == '\n' && c != EOF)	/* skip leading \n's */
159 			;
160 		if (c != EOF)
161 			ungetc(c, inf);
162 	}
163 	for (rr = buf, nrr = bufsize; ; ) {
164 		for (; (c=getc(inf)) != sep && c != EOF; *rr++ = c)
165 			if (--nrr < 0)
166 				ERROR "input record `%.30s...' too long; try -mr n", buf FATAL;
167 		if (**RS == sep || c == EOF)
168 			break;
169 		if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
170 			break;
171 		*rr++ = '\n';
172 		*rr++ = c;
173 	}
174 	if (rr > buf + bufsize)
175 		ERROR "input record `%.30s...' too long; try -mr n", buf FATAL;
176 	*rr = 0;
177 	dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
178 	return c == EOF && rr == buf ? 0 : 1;
179 }
180 
181 uchar *getargv(int n)	/* get ARGV[n] */
182 {
183 	Cell *x;
184 	uchar *s, temp[10];
185 	extern Array *ARGVtab;
186 
187 	sprintf((char *)temp, "%d", n);
188 	x = setsymtab(temp, "", 0.0, STR, ARGVtab);
189 	s = getsval(x);
190 	dprintf( ("getargv(%d) returns |%s|\n", n, s) );
191 	return s;
192 }
193 
194 void setclvar(uchar *s)	/* set var=value from s */
195 {
196 	uchar *p;
197 	Cell *q;
198 
199 	for (p=s; *p != '='; p++)
200 		;
201 	*p++ = 0;
202 	p = qstring(p, '\0');
203 	q = setsymtab(s, p, 0.0, STR, symtab);
204 	setsval(q, p);
205 	if (isnumber(q->sval)) {
206 		q->fval = atof(q->sval);
207 		q->tval |= NUM;
208 	}
209 	dprintf( ("command line set %s to |%s|\n", s, p) );
210 }
211 
212 
213 void fldbld(void)	/* create fields from current record */
214 {
215 	register uchar *r, *fr, sep;
216 	Cell *p;
217 	int i;
218 
219 	if (donefld)
220 		return;
221 	if (!(recloc->tval & STR))
222 		getsval(recloc);
223 	r = recloc->sval;
224 	fr = fields;
225 	i = 0;	/* number of fields accumulated here */
226 	if (strlen(*FS) > 1) {	/* it's a regular expression */
227 		i = refldbld(r, *FS);
228 	} else if ((sep = **FS) == ' ') {	/* default whitespace */
229 		for (i = 0; ; ) {
230 			while (*r == ' ' || *r == '\t' || *r == '\n')
231 				r++;
232 			if (*r == 0)
233 				break;
234 			i++;
235 			if (i >= nfields)
236 				break;
237 			if (!(fldtab[i].tval & DONTFREE))
238 				xfree(fldtab[i].sval);
239 			fldtab[i].sval = fr;
240 			fldtab[i].tval = FLD | STR | DONTFREE;
241 			do
242 				*fr++ = *r++;
243 			while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0');
244 			*fr++ = 0;
245 		}
246 		*fr = 0;
247 	} else if (*r != 0) {	/* if 0, it's a null field */
248 		for (;;) {
249 			i++;
250 			if (i >= nfields)
251 				break;
252 			if (!(fldtab[i].tval & DONTFREE))
253 				xfree(fldtab[i].sval);
254 			fldtab[i].sval = fr;
255 			fldtab[i].tval = FLD | STR | DONTFREE;
256 			while (*r != sep && *r != '\n' && *r != '\0')	/* \n is always a separator */
257 				*fr++ = *r++;
258 			*fr++ = 0;
259 			if (*r++ == 0)
260 				break;
261 		}
262 		*fr = 0;
263 	}
264 	if (i >= nfields)
265 		ERROR "record `%.30s...' has too many fields; try -mf n", record FATAL;
266 	/* clean out junk from previous record */
267 	cleanfld(i, maxfld);
268 	maxfld = i;
269 	donefld = 1;
270 	for (p = fldtab+1; p <= fldtab+maxfld; p++) {
271 		if(isnumber(p->sval)) {
272 			p->fval = atof(p->sval);
273 			p->tval |= NUM;
274 		}
275 	}
276 	setfval(nfloc, (Awkfloat) maxfld);
277 	if (dbg)
278 		for (p = fldtab; p <= fldtab+maxfld; p++)
279 			printf("field %d: |%s|\n", p-fldtab, p->sval);
280 }
281 
282 void cleanfld(int n1, int n2)	/* clean out fields n1..n2 inclusive */
283 {
284 	static uchar *nullstat = (uchar *) "";
285 	register Cell *p, *q;
286 
287 	for (p = &fldtab[n2], q = &fldtab[n1]; p > q; p--) {
288 		if (!(p->tval & DONTFREE))
289 			xfree(p->sval);
290 		p->tval = FLD | STR | DONTFREE;
291 		p->sval = nullstat;
292 	}
293 }
294 
295 void newfld(int n)	/* add field n (after end) */
296 {
297 	if (n >= nfields)
298 		ERROR "creating too many fields (%d); try -mf n", n, record FATAL;
299 	cleanfld(maxfld, n);
300 	maxfld = n;
301 	setfval(nfloc, (Awkfloat) n);
302 }
303 
304 refldbld(uchar *rec, uchar *fs)	/* build fields from reg expr in FS */
305 {
306 	uchar *fr;
307 	int i, tempstat;
308 	fa *pfa;
309 
310 	fr = fields;
311 	*fr = '\0';
312 	if (*rec == '\0')
313 		return 0;
314 	pfa = makedfa(fs, 1);
315 	dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) );
316 	tempstat = pfa->initstat;
317 	for (i = 1; i < nfields; i++) {
318 		if (!(fldtab[i].tval & DONTFREE))
319 			xfree(fldtab[i].sval);
320 		fldtab[i].tval = FLD | STR | DONTFREE;
321 		fldtab[i].sval = fr;
322 		dprintf( ("refldbld: i=%d\n", i) );
323 		if (nematch(pfa, rec)) {
324 			pfa->initstat = 2;	/* horrible coupling */
325 			dprintf( ("match %s (%d chars)\n", patbeg, patlen) );
326 			strncpy(fr, rec, patbeg-rec);
327 			fr += patbeg - rec + 1;
328 			*(fr-1) = '\0';
329 			rec = patbeg + patlen;
330 		} else {
331 			dprintf( ("no match %s\n", rec) );
332 			strcpy(fr, rec);
333 			pfa->initstat = tempstat;
334 			break;
335 		}
336 	}
337 	return i;
338 }
339 
340 void recbld(void)	/* create $0 from $1..$NF if necessary */
341 {
342 	register int i;
343 	register uchar *r, *p;
344 	static uchar *rec = 0;
345 
346 	if (donerec == 1)
347 		return;
348 	if (rec == 0) {
349 		rec = (uchar *) malloc(recsize);
350 		if (rec == 0)
351 			ERROR "out of space building $0, record size %d", recsize FATAL;
352 	}
353 	r = rec;
354 	for (i = 1; i <= *NF; i++) {
355 		p = getsval(&fldtab[i]);
356 		while (r < rec+recsize-1 && (*r = *p++))
357 			r++;
358 		if (i < *NF)
359 			for (p = *OFS; r < rec+recsize-1 && (*r = *p++); )
360 				r++;
361 	}
362 	if (r > rec + recsize - 1)
363 		ERROR "built giant record `%.30s...'; try -mr n", record FATAL;
364 	*r = '\0';
365 	dprintf( ("in recbld FS=%o, recloc=%o\n", **FS, recloc) );
366 	recloc->tval = REC | STR | DONTFREE;
367 	recloc->sval = record = rec;
368 	dprintf( ("in recbld FS=%o, recloc=%o\n", **FS, recloc) );
369 	dprintf( ("recbld = |%s|\n", record) );
370 	donerec = 1;
371 }
372 
373 Cell *fieldadr(int n)
374 {
375 	if (n < 0 || n >= nfields)
376 		ERROR "trying to access field %d; try -mf n", n FATAL;
377 	return(&fldtab[n]);
378 }
379 
380 int	errorflag	= 0;
381 char	errbuf[200];
382 
383 void yyerror(uchar *s)
384 {
385 	extern uchar *cmdname, *curfname;
386 	static int been_here = 0;
387 
388 	if (been_here++ > 2)
389 		return;
390 	fprintf(stderr, "%s: %s", cmdname, s);
391 	fprintf(stderr, " at source line %d", lineno);
392 	if (curfname != NULL)
393 		fprintf(stderr, " in function %s", curfname);
394 	fprintf(stderr, "\n");
395 	errorflag = 2;
396 	eprint();
397 }
398 
399 void fpecatch(int n)
400 {
401 	ERROR "floating point exception %d", n FATAL;
402 }
403 
404 extern int bracecnt, brackcnt, parencnt;
405 
406 void bracecheck(void)
407 {
408 	int c;
409 	static int beenhere = 0;
410 
411 	if (beenhere++)
412 		return;
413 	while ((c = input()) != EOF && c != '\0')
414 		bclass(c);
415 	bcheck2(bracecnt, '{', '}');
416 	bcheck2(brackcnt, '[', ']');
417 	bcheck2(parencnt, '(', ')');
418 }
419 
420 void bcheck2(int n, int c1, int c2)
421 {
422 	if (n == 1)
423 		fprintf(stderr, "\tmissing %c\n", c2);
424 	else if (n > 1)
425 		fprintf(stderr, "\t%d missing %c's\n", n, c2);
426 	else if (n == -1)
427 		fprintf(stderr, "\textra %c\n", c2);
428 	else if (n < -1)
429 		fprintf(stderr, "\t%d extra %c's\n", -n, c2);
430 }
431 
432 void error(int f, char *s)
433 {
434 	extern Node *curnode;
435 	extern uchar *cmdname;
436 
437 	fflush(stdout);
438 	fprintf(stderr, "%s: ", cmdname);
439 	fprintf(stderr, "%s", s);
440 	fprintf(stderr, "\n");
441 	if (compile_time != 2 && NR && *NR > 0) {
442 		fprintf(stderr, " input record number %g", *FNR);
443 		if (strcmp(*FILENAME, "-") != 0)
444 			fprintf(stderr, ", file %s", *FILENAME);
445 		fprintf(stderr, "\n");
446 	}
447 	if (compile_time != 2 && curnode)
448 		fprintf(stderr, " source line number %d\n", curnode->lineno);
449 	else if (compile_time != 2 && lineno)
450 		fprintf(stderr, " source line number %d\n", lineno);
451 	eprint();
452 	if (f) {
453 		if (dbg > 1)		/* core dump if serious debugging on */
454 			abort();
455 		exit(2);
456 	}
457 }
458 
459 void eprint(void)	/* try to print context around error */
460 {
461 	uchar *p, *q;
462 	int c;
463 	static int been_here = 0;
464 	extern uchar ebuf[], *ep;
465 
466 	if (compile_time == 2 || compile_time == 0 || been_here++ > 0)
467 		return;
468 	p = ep - 1;
469 	if (p > ebuf && *p == '\n')
470 		p--;
471 	for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--)
472 		;
473 	while (*p == '\n')
474 		p++;
475 	fprintf(stderr, " context is\n\t");
476 	for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
477 		;
478 	for ( ; p < q; p++)
479 		if (*p)
480 			putc(*p, stderr);
481 	fprintf(stderr, " >>> ");
482 	for ( ; p < ep; p++)
483 		if (*p)
484 			putc(*p, stderr);
485 	fprintf(stderr, " <<< ");
486 	if (*ep)
487 		while ((c = input()) != '\n' && c != '\0' && c != EOF) {
488 			putc(c, stderr);
489 			bclass(c);
490 		}
491 	putc('\n', stderr);
492 	ep = ebuf;
493 }
494 
495 void bclass(int c)
496 {
497 	switch (c) {
498 	case '{': bracecnt++; break;
499 	case '}': bracecnt--; break;
500 	case '[': brackcnt++; break;
501 	case ']': brackcnt--; break;
502 	case '(': parencnt++; break;
503 	case ')': parencnt--; break;
504 	}
505 }
506 
507 double errcheck(double x, uchar *s)
508 {
509 	extern int errno;
510 
511 	if (errno == EDOM) {
512 		errno = 0;
513 		ERROR "%s argument out of domain", s WARNING;
514 		x = 1;
515 	} else if (errno == ERANGE) {
516 		errno = 0;
517 		ERROR "%s result out of range", s WARNING;
518 		x = 1;
519 	}
520 	return x;
521 }
522 
523 isclvar(uchar *s)	/* is s of form var=something ? */
524 {
525 	uchar *os = s;
526 
527 	if (!isalpha(*s) && *s != '_')
528 		return 0;
529 	for ( ; *s; s++)
530 		if (!(isalnum(*s) || *s == '_'))
531 			break;
532 	return *s == '=' && s > os && *(s+1) != '=';
533 }
534 
535 #define	MAXEXPON	38	/* maximum exponent for fp number. should be IEEE */
536 
537 isnumber(uchar *s)	/* probably should be done by a library function */
538 {
539 	register int d1, d2;
540 	int point;
541 	uchar *es;
542 
543 	d1 = d2 = point = 0;
544 	while (*s == ' ' || *s == '\t' || *s == '\n')
545 		s++;
546 	if (*s == '\0')
547 		return(0);	/* empty stuff isn't number */
548 	if (*s == '+' || *s == '-')
549 		s++;
550 	if (!isdigit(*s) && *s != '.')
551 		return(0);
552 	if (isdigit(*s)) {
553 		do {
554 			d1++;
555 			s++;
556 		} while (isdigit(*s));
557 	}
558 	if (*s == '.') {
559 		point++;
560 		s++;
561 	}
562 	if (isdigit(*s)) {
563 		d2++;
564 		do {
565 			s++;
566 		} while (isdigit(*s));
567 	}
568 	if (!(d1 || point && d2))
569 		return(0);
570 	if (*s == 'e' || *s == 'E') {
571 		s++;
572 		if (*s == '+' || *s == '-')
573 			s++;
574 		if (!isdigit(*s))
575 			return(0);
576 		es = s;
577 		do {
578 			s++;
579 		} while (isdigit(*s));
580 		if (s - es > 2)
581 			return(0);
582 		else if (s - es == 2 && (int)(10 * (*es-'0') + *(es+1)-'0') >= MAXEXPON)
583 			return(0);
584 	}
585 	while (*s == ' ' || *s == '\t' || *s == '\n')
586 		s++;
587 	if (*s == '\0')
588 		return(1);
589 	else
590 		return(0);
591 }
592