xref: /plan9-contrib/sys/src/cmd/awk/lib.c (revision 219b2ee8daee37f4aad58d63f21287faa8e4ffdc)
1 /*
2 Copyright (c) 1989 AT&T
3 	All Rights Reserved
4 
5 THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T.
6 
7 The copyright notice above does not evidence any
8 actual or intended publication of such source code.
9 */
10 
11 #define DEBUG
12 #include <stdio.h>
13 #include <string.h>
14 #include <ctype.h>
15 #include <errno.h>
16 #include <stdlib.h>
17 #include "awk.h"
18 #include "y.tab.h"
19 
20 #define	getfval(p)	(((p)->tval & (ARR|FLD|REC|NUM)) == NUM ? (p)->fval : r_getfval(p))
21 #define	getsval(p)	(((p)->tval & (ARR|FLD|REC|STR)) == STR ? (p)->sval : r_getsval(p))
22 
23 FILE	*infile	= NULL;
24 uchar	*file	= (uchar*) "";
25 int	recsize	= RECSIZE;
26 uchar	*recdata;
27 uchar	*record;
28 uchar	*fields;
29 Cell	*fldtab;
30 
31 #define	MAXFLD	200
32 int	nfields	= MAXFLD;	/* can be set from commandline in main */
33 
34 int	donefld;	/* 1 = implies rec broken into fields */
35 int	donerec;	/* 1 = record is valid (no flds have changed) */
36 
37 int	maxfld	= 0;	/* last used field */
38 int	argno	= 1;	/* current input argument number */
39 extern	Awkfloat *ARGC;
40 
41 void recinit(unsigned int n)
42 {
43 	static Cell dollar0 = {
44 	    OCELL, CFLD, (uchar*) "$0", /*recdata*/0, 0.0, REC|STR|DONTFREE };
45 	static Cell dollar1 = {
46 	    OCELL, CFLD, NULL, (uchar*) "", 0.0, FLD|STR|DONTFREE };
47 	int i;
48 
49 	record = recdata = (uchar *) malloc(n);
50 	fields = (uchar *) malloc(n);
51 	fldtab = (Cell *) malloc(nfields * sizeof(Cell));
52 	if (recdata == NULL || fields == NULL || fldtab == NULL)
53 		ERROR "out of space for $0 and fields" FATAL;
54 	fldtab[0] = dollar0;
55 	fldtab[0].sval = recdata;
56 	for (i = 1; i < nfields; i++)
57 		fldtab[i] = dollar1;
58 }
59 
60 void initgetrec(void)
61 {
62 	int i;
63 	uchar *p;
64 
65 	for (i = 1; i < *ARGC; i++) {
66 		if (!isclvar(p = getargv(i))) {	/* find 1st real filename */
67 			setsval(lookup("FILENAME", symtab), getargv(i));
68 			return;
69 		}
70 		setclvar(p);	/* a commandline assignment before filename */
71 		argno++;
72 	}
73 	infile = stdin;		/* no filenames, so use stdin */
74 }
75 
76 getrec(uchar *buf)	/* get next input record from whatever source */
77 {			/* note: tests whether buf == record */
78 	int c;
79 	static int firsttime = 1;
80 
81 	if (firsttime) {
82 		firsttime = 0;
83 		initgetrec();
84 	}
85 	dprintf( ("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
86 		*RS, *FS, *ARGC, *FILENAME) );
87 	donefld = 0;
88 	donerec = 1;
89 	buf[0] = 0;
90 	while (argno < *ARGC || infile == stdin) {
91 		dprintf( ("argno=%d, file=|%s|\n", argno, file) );
92 		if (infile == NULL) {	/* have to open a new file */
93 			file = getargv(argno);
94 			if (*file == '\0') {	/* it's been zapped */
95 				argno++;
96 				continue;
97 			}
98 			if (isclvar(file)) {	/* a var=value arg */
99 				setclvar(file);
100 				argno++;
101 				continue;
102 			}
103 			*FILENAME = file;
104 			dprintf( ("opening file %s\n", file) );
105 			if (*file == '-' && *(file+1) == '\0')
106 				infile = stdin;
107 			else if ((infile = fopen((char *)file, "r")) == NULL)
108 				ERROR "can't open file %s", file FATAL;
109 			setfval(fnrloc, 0.0);
110 		}
111 		c = readrec(buf, recsize, infile);
112 		if (c != 0 || buf[0] != '\0') {	/* normal record */
113 			if (buf == record) {
114 				if (!(recloc->tval & DONTFREE))
115 					xfree(recloc->sval);
116 				recloc->sval = record;
117 				recloc->tval = REC | STR | DONTFREE;
118 				if (isnumber(recloc->sval)) {
119 					recloc->fval = atof(recloc->sval);
120 					recloc->tval |= NUM;
121 				}
122 			}
123 			setfval(nrloc, nrloc->fval+1);
124 			setfval(fnrloc, fnrloc->fval+1);
125 			return 1;
126 		}
127 		/* EOF arrived on this file; set up next */
128 		if (infile != stdin)
129 			fclose(infile);
130 		infile = NULL;
131 		argno++;
132 	}
133 	return 0;	/* true end of file */
134 }
135 
136 readrec(uchar *buf, int bufsize, FILE *inf)	/* read one record into buf */
137 {
138 	register int sep, c;
139 	register uchar *rr;
140 	register int nrr;
141 
142 	if ((sep = **RS) == 0) {
143 		sep = '\n';
144 		while ((c=getc(inf)) == '\n' && c != EOF)	/* skip leading \n's */
145 			;
146 		if (c != EOF)
147 			ungetc(c, inf);
148 	}
149 	for (rr = buf, nrr = bufsize; ; ) {
150 		for (; (c=getc(inf)) != sep && c != EOF; *rr++ = c)
151 			if (--nrr < 0)
152 				ERROR "input record `%.30s...' too long; try -mr n", buf FATAL;
153 		if (**RS == sep || c == EOF)
154 			break;
155 		if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
156 			break;
157 		*rr++ = '\n';
158 		*rr++ = c;
159 	}
160 	if (rr > buf + bufsize)
161 		ERROR "input record `%.30s...' too long; try -mr n", buf FATAL;
162 	*rr = 0;
163 	dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
164 	return c == EOF && rr == buf ? 0 : 1;
165 }
166 
167 uchar *getargv(int n)	/* get ARGV[n] */
168 {
169 	Cell *x;
170 	uchar *s, temp[10];
171 	extern Array *ARGVtab;
172 
173 	sprintf((char *)temp, "%d", n);
174 	x = setsymtab(temp, "", 0.0, STR, ARGVtab);
175 	s = getsval(x);
176 	dprintf( ("getargv(%d) returns |%s|\n", n, s) );
177 	return s;
178 }
179 
180 void setclvar(uchar *s)	/* set var=value from s */
181 {
182 	uchar *p;
183 	Cell *q;
184 
185 	for (p=s; *p != '='; p++)
186 		;
187 	*p++ = 0;
188 	p = qstring(p, '\0');
189 	q = setsymtab(s, p, 0.0, STR, symtab);
190 	setsval(q, p);
191 	if (isnumber(q->sval)) {
192 		q->fval = atof(q->sval);
193 		q->tval |= NUM;
194 	}
195 	dprintf( ("command line set %s to |%s|\n", s, p) );
196 }
197 
198 
199 void fldbld(void)	/* create fields from current record */
200 {
201 	register uchar *r, *fr, sep;
202 	Cell *p;
203 	int i;
204 
205 	if (donefld)
206 		return;
207 	if (!(recloc->tval & STR))
208 		getsval(recloc);
209 	r = recloc->sval;
210 	fr = fields;
211 	i = 0;	/* number of fields accumulated here */
212 	if (strlen(*FS) > 1) {	/* it's a regular expression */
213 		i = refldbld(r, *FS);
214 	} else if ((sep = **FS) == ' ') {	/* default whitespace */
215 		for (i = 0; ; ) {
216 			while (*r == ' ' || *r == '\t' || *r == '\n')
217 				r++;
218 			if (*r == 0)
219 				break;
220 			i++;
221 			if (i >= nfields)
222 				break;
223 			if (!(fldtab[i].tval & DONTFREE))
224 				xfree(fldtab[i].sval);
225 			fldtab[i].sval = fr;
226 			fldtab[i].tval = FLD | STR | DONTFREE;
227 			do
228 				*fr++ = *r++;
229 			while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0');
230 			*fr++ = 0;
231 		}
232 		*fr = 0;
233 	} else if (*r != 0) {	/* if 0, it's a null field */
234 		for (;;) {
235 			i++;
236 			if (i >= nfields)
237 				break;
238 			if (!(fldtab[i].tval & DONTFREE))
239 				xfree(fldtab[i].sval);
240 			fldtab[i].sval = fr;
241 			fldtab[i].tval = FLD | STR | DONTFREE;
242 			while (*r != sep && *r != '\n' && *r != '\0')	/* \n is always a separator */
243 				*fr++ = *r++;
244 			*fr++ = 0;
245 			if (*r++ == 0)
246 				break;
247 		}
248 		*fr = 0;
249 	}
250 	if (i >= nfields)
251 		ERROR "record `%.30s...' has too many fields; try -mf n", record FATAL;
252 	/* clean out junk from previous record */
253 	cleanfld(i, maxfld);
254 	maxfld = i;
255 	donefld = 1;
256 	for (p = fldtab+1; p <= fldtab+maxfld; p++) {
257 		if(isnumber(p->sval)) {
258 			p->fval = atof(p->sval);
259 			p->tval |= NUM;
260 		}
261 	}
262 	setfval(nfloc, (Awkfloat) maxfld);
263 	if (dbg)
264 		for (p = fldtab; p <= fldtab+maxfld; p++)
265 			printf("field %d: |%s|\n", p-fldtab, p->sval);
266 }
267 
268 void cleanfld(int n1, int n2)	/* clean out fields n1..n2 inclusive */
269 {
270 	static uchar *nullstat = (uchar *) "";
271 	register Cell *p, *q;
272 
273 	for (p = &fldtab[n2], q = &fldtab[n1]; p > q; p--) {
274 		if (!(p->tval & DONTFREE))
275 			xfree(p->sval);
276 		p->tval = FLD | STR | DONTFREE;
277 		p->sval = nullstat;
278 	}
279 }
280 
281 void newfld(int n)	/* add field n (after end) */
282 {
283 	if (n >= nfields)
284 		ERROR "creating too many fields (%d); try -mf n", n, record FATAL;
285 	cleanfld(maxfld, n);
286 	maxfld = n;
287 	setfval(nfloc, (Awkfloat) n);
288 }
289 
290 refldbld(uchar *rec, uchar *fs)	/* build fields from reg expr in FS */
291 {
292 	uchar *fr;
293 	void *p;
294 	int i;
295 
296 	fr = fields;
297 	*fr = '\0';
298 	if (*rec == '\0')
299 		return 0;
300 	p = compre(fs);
301 	dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) );
302 	for (i = 1; i < nfields; i++) {
303 		if (!(fldtab[i].tval & DONTFREE))
304 			xfree(fldtab[i].sval);
305 		fldtab[i].tval = FLD | STR | DONTFREE;
306 		fldtab[i].sval = fr;
307 		dprintf( ("refldbld: i=%d\n", i) );
308 		if (nematch(p, rec, rec)) {
309 			dprintf( ("match %s (%d chars)\n", patbeg, patlen) );
310 			strncpy(fr, rec, patbeg-rec);
311 			fr += patbeg - rec + 1;
312 			*(fr-1) = '\0';
313 			rec = patbeg + patlen;
314 		} else {
315 			dprintf( ("no match %s\n", rec) );
316 			strcpy(fr, rec);
317 			break;
318 		}
319 	}
320 	return i;
321 }
322 
323 void recbld(void)	/* create $0 from $1..$NF if necessary */
324 {
325 	register int i;
326 	register uchar *r, *p;
327 	static uchar *rec = 0;
328 
329 	if (donerec == 1)
330 		return;
331 	if (rec == 0) {
332 		rec = (uchar *) malloc(recsize);
333 		if (rec == 0)
334 			ERROR "out of space building $0, record size %d", recsize FATAL;
335 	}
336 	r = rec;
337 	for (i = 1; i <= *NF; i++) {
338 		p = getsval(&fldtab[i]);
339 		while (r < rec+recsize-1 && (*r = *p++))
340 			r++;
341 		if (i < *NF)
342 			for (p = *OFS; r < rec+recsize-1 && (*r = *p++); )
343 				r++;
344 	}
345 	if (r > rec + recsize - 1)
346 		ERROR "built giant record `%.30s...'; try -mr n", record FATAL;
347 	*r = '\0';
348 	dprintf( ("in recbld FS=%o, recloc=%o\n", **FS, recloc) );
349 	recloc->tval = REC | STR | DONTFREE;
350 	recloc->sval = record = rec;
351 	dprintf( ("in recbld FS=%o, recloc=%o\n", **FS, recloc) );
352 	dprintf( ("recbld = |%s|\n", record) );
353 	donerec = 1;
354 }
355 
356 Cell *fieldadr(int n)
357 {
358 	if (n < 0 || n >= nfields)
359 		ERROR "trying to access field %d; try -mf n", n FATAL;
360 	return(&fldtab[n]);
361 }
362 
363 int	errorflag	= 0;
364 char	errbuf[200];
365 
366 void yyerror(uchar *s)
367 {
368 	extern uchar *cmdname, *curfname;
369 	static int been_here = 0;
370 
371 	if (been_here++ > 2)
372 		return;
373 	fprintf(stderr, "%s: %s", cmdname, s);
374 	fprintf(stderr, " at source line %d", lineno);
375 	if (curfname != NULL)
376 		fprintf(stderr, " in function %s", curfname);
377 	fprintf(stderr, "\n");
378 	errorflag = 2;
379 	eprint();
380 }
381 
382 void fpecatch(int n)
383 {
384 	ERROR "floating point exception %d", n FATAL;
385 }
386 
387 extern int bracecnt, brackcnt, parencnt;
388 
389 void bracecheck(void)
390 {
391 	int c;
392 	static int beenhere = 0;
393 
394 	if (beenhere++)
395 		return;
396 	while ((c = input()) != EOF && c != '\0')
397 		bclass(c);
398 	bcheck2(bracecnt, '{', '}');
399 	bcheck2(brackcnt, '[', ']');
400 	bcheck2(parencnt, '(', ')');
401 }
402 
403 void bcheck2(int n, int c1, int c2)
404 {
405 	if (n == 1)
406 		fprintf(stderr, "\tmissing %c\n", c2);
407 	else if (n > 1)
408 		fprintf(stderr, "\t%d missing %c's\n", n, c2);
409 	else if (n == -1)
410 		fprintf(stderr, "\textra %c\n", c2);
411 	else if (n < -1)
412 		fprintf(stderr, "\t%d extra %c's\n", -n, c2);
413 }
414 
415 void error(int f, char *s)
416 {
417 	extern Node *curnode;
418 	extern uchar *cmdname;
419 
420 	fflush(stdout);
421 	fprintf(stderr, "%s: ", cmdname);
422 	fprintf(stderr, "%s", s);
423 	fprintf(stderr, "\n");
424 	if (compile_time != 2 && NR && *NR > 0) {
425 		fprintf(stderr, " input record number %d", (int) (*FNR));
426 		if (strcmp(*FILENAME, "-") != 0)
427 			fprintf(stderr, ", file %s", *FILENAME);
428 		fprintf(stderr, "\n");
429 	}
430 	if (compile_time != 2 && curnode)
431 		fprintf(stderr, " source line number %d\n", curnode->lineno);
432 	else if (compile_time != 2 && lineno)
433 		fprintf(stderr, " source line number %d\n", lineno);
434 	eprint();
435 	if (f) {
436 		if (dbg > 1)		/* core dump if serious debugging on */
437 			abort();
438 		exit(2);
439 	}
440 }
441 
442 void eprint(void)	/* try to print context around error */
443 {
444 	uchar *p, *q;
445 	int c;
446 	static int been_here = 0;
447 	extern uchar ebuf[], *ep;
448 
449 	if (compile_time == 2 || compile_time == 0 || been_here++ > 0)
450 		return;
451 	p = ep - 1;
452 	if (p > ebuf && *p == '\n')
453 		p--;
454 	for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--)
455 		;
456 	while (*p == '\n')
457 		p++;
458 	fprintf(stderr, " context is\n\t");
459 	for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
460 		;
461 	for ( ; p < q; p++)
462 		if (*p)
463 			putc(*p, stderr);
464 	fprintf(stderr, " >>> ");
465 	for ( ; p < ep; p++)
466 		if (*p)
467 			putc(*p, stderr);
468 	fprintf(stderr, " <<< ");
469 	if (*ep)
470 		while ((c = input()) != '\n' && c != '\0' && c != EOF) {
471 			putc(c, stderr);
472 			bclass(c);
473 		}
474 	putc('\n', stderr);
475 	ep = ebuf;
476 }
477 
478 void bclass(int c)
479 {
480 	switch (c) {
481 	case '{': bracecnt++; break;
482 	case '}': bracecnt--; break;
483 	case '[': brackcnt++; break;
484 	case ']': brackcnt--; break;
485 	case '(': parencnt++; break;
486 	case ')': parencnt--; break;
487 	}
488 }
489 
490 double errcheck(double x, uchar *s)
491 {
492 	extern int errno;
493 
494 	if (errno == EDOM) {
495 		errno = 0;
496 		ERROR "%s argument out of domain", s WARNING;
497 		x = 1;
498 	} else if (errno == ERANGE) {
499 		errno = 0;
500 		ERROR "%s result out of range", s WARNING;
501 		x = 1;
502 	}
503 	return x;
504 }
505 
506 isclvar(uchar *s)	/* is s of form var=something ? */
507 {
508 	uchar *os = s;
509 
510 	if (!isalpha(*s) && *s != '_')
511 		return 0;
512 	for ( ; *s; s++)
513 		if (!(isalnum(*s) || *s == '_'))
514 			break;
515 	return *s == '=' && s > os && *(s+1) != '=';
516 }
517 
518 #define	MAXEXPON	38	/* maximum exponent for fp number. should be IEEE */
519 
520 isnumber(uchar *s)	/* should be done by a library function */
521 {
522 	register int d1, d2;
523 	int point;
524 	uchar *es;
525 
526 	d1 = d2 = point = 0;
527 	while (*s == ' ' || *s == '\t' || *s == '\n')
528 		s++;
529 	if (*s == '\0')
530 		return(0);	/* empty stuff isn't a number */
531 	if (*s == '+' || *s == '-')
532 		s++;
533 	if (!isdigit(*s) && *s != '.')
534 		return(0);
535 	if (isdigit(*s)) {
536 		do {
537 			d1++;
538 			s++;
539 		} while (isdigit(*s));
540 	}
541 	if (*s == '.') {
542 		point++;
543 		s++;
544 	}
545 	if (isdigit(*s)) {
546 		d2++;
547 		do {
548 			s++;
549 		} while (isdigit(*s));
550 	}
551 	if (!(d1 || point && d2))
552 		return(0);
553 	if (*s == 'e' || *s == 'E') {
554 		s++;
555 		if (*s == '+' || *s == '-')
556 			s++;
557 		if (!isdigit(*s))
558 			return(0);
559 		es = s;
560 		do {
561 			s++;
562 		} while (isdigit(*s));
563 		if (s - es > 2)
564 			return(0);
565 		else if (s - es == 2 && (int)(10 * (*es-'0') + *(es+1)-'0') >= MAXEXPON)
566 			return(0);
567 	}
568 	while (*s == ' ' || *s == '\t' || *s == '\n')
569 		s++;
570 	if (*s == '\0')
571 		return(1);
572 	else
573 		return(0);
574 }
575