xref: /openbsd-src/usr.bin/awk/lib.c (revision a4afd6dad3fba28f80e70208181c06c482259988)
1 /****************************************************************
2 Copyright (C) AT&T and Lucent Technologies 1996
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the names of AT&T or Lucent Technologies
11 or any of their entities not be used in advertising or publicity
12 pertaining to distribution of the software without specific,
13 written prior permission.
14 
15 AT&T AND LUCENT DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
16 SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17 FITNESS. IN NO EVENT SHALL AT&T OR LUCENT OR ANY OF THEIR
18 ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
19 DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20 DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
21 OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
22 USE OR PERFORMANCE OF THIS SOFTWARE.
23 ****************************************************************/
24 
25 #define DEBUG
26 #include <stdio.h>
27 #include <string.h>
28 #include <ctype.h>
29 #include <errno.h>
30 #include <stdlib.h>
31 #include "awk.h"
32 #include "awkgram.h"
33 
34 FILE	*infile	= NULL;
35 char	*file	= "";
36 int	recsize	= RECSIZE;
37 char	*recdata;
38 char	*record;
39 char	*fields;
40 Cell	*fldtab;
41 
42 #define	MAXFLD	200
43 int	nfields	= MAXFLD;	/* can be set from commandline in main */
44 
45 int	donefld;	/* 1 = implies rec broken into fields */
46 int	donerec;	/* 1 = record is valid (no flds have changed) */
47 
48 int	maxfld	= 0;	/* last used field */
49 int	argno	= 1;	/* current input argument number */
50 extern	Awkfloat *ARGC;
51 
52 void recinit(unsigned int n)
53 {
54 	static Cell dollar0 = {
55 	    OCELL, CFLD, "$0", /*recdata*/0, 0.0, REC|STR|DONTFREE };
56 	static Cell dollar1 = {
57 	    OCELL, CFLD, NULL, "", 0.0, FLD|STR|DONTFREE };
58 	int i;
59 
60 	record = recdata = (char *) malloc(n);
61 	fields = (char *) malloc(n);
62 	fldtab = (Cell *) malloc(nfields * sizeof(Cell));
63 	if (recdata == NULL || fields == NULL || fldtab == NULL)
64 		ERROR "out of space for $0 and fields" FATAL;
65 	fldtab[0] = dollar0;
66 	fldtab[0].sval = recdata;
67 	for (i = 1; i < nfields; i++)
68 		fldtab[i] = dollar1;
69 }
70 
71 void initgetrec(void)
72 {
73 	int i;
74 	char *p;
75 
76 	for (i = 1; i < *ARGC; i++) {
77 		if (!isclvar(p = getargv(i))) {	/* find 1st real filename */
78 			setsval(lookup("FILENAME", symtab), getargv(i));
79 			return;
80 		}
81 		setclvar(p);	/* a commandline assignment before filename */
82 		argno++;
83 	}
84 	infile = stdin;		/* no filenames, so use stdin */
85 }
86 
87 int getrec(char *buf)	/* get next input record from whatever source */
88 {			/* note: tests whether buf == record */
89 	int c;
90 	static int firsttime = 1;
91 
92 	if (firsttime) {
93 		firsttime = 0;
94 		initgetrec();
95 	}
96 	dprintf( ("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
97 		*RS, *FS, *ARGC, *FILENAME) );
98 	donefld = 0;
99 	donerec = 1;
100 	buf[0] = 0;
101 	while (argno < *ARGC || infile == stdin) {
102 		dprintf( ("argno=%d, file=|%s|\n", argno, file) );
103 		if (infile == NULL) {	/* have to open a new file */
104 			file = getargv(argno);
105 			if (*file == '\0') {	/* it's been zapped */
106 				argno++;
107 				continue;
108 			}
109 			if (isclvar(file)) {	/* a var=value arg */
110 				setclvar(file);
111 				argno++;
112 				continue;
113 			}
114 			*FILENAME = file;
115 			dprintf( ("opening file %s\n", file) );
116 			if (*file == '-' && *(file+1) == '\0')
117 				infile = stdin;
118 			else if ((infile = fopen((char *)file, "r")) == NULL)
119 				ERROR "can't open file %s", file FATAL;
120 			setfval(fnrloc, 0.0);
121 		}
122 		c = readrec(buf, recsize, infile);
123 		if (c != 0 || buf[0] != '\0') {	/* normal record */
124 			if (buf == record) {
125 				if (!(recloc->tval & DONTFREE))
126 					xfree(recloc->sval);
127 				recloc->sval = record;
128 				recloc->tval = REC | STR | DONTFREE;
129 				if (isnumber(recloc->sval)) {
130 					recloc->fval = atof(recloc->sval);
131 					recloc->tval |= NUM;
132 				}
133 			}
134 			setfval(nrloc, nrloc->fval+1);
135 			setfval(fnrloc, fnrloc->fval+1);
136 			return 1;
137 		}
138 		/* EOF arrived on this file; set up next */
139 		if (infile != stdin)
140 			fclose(infile);
141 		infile = NULL;
142 		argno++;
143 	}
144 	return 0;	/* true end of file */
145 }
146 
147 void nextfile(void)
148 {
149 	if (infile != stdin)
150 		fclose(infile);
151 	infile = NULL;
152 	argno++;
153 }
154 
155 int readrec(char *buf, int bufsize, FILE *inf)	/* read one record into buf */
156 {
157 	int sep, c;
158 	char *rr;
159 	int nrr;
160 
161 	if ((sep = **RS) == 0) {
162 		sep = '\n';
163 		while ((c=getc(inf)) == '\n' && c != EOF)	/* skip leading \n's */
164 			;
165 		if (c != EOF)
166 			ungetc(c, inf);
167 	}
168 	for (rr = buf, nrr = bufsize; ; ) {
169 		for (; (c=getc(inf)) != sep && c != EOF; *rr++ = c)
170 			if (--nrr < 0)
171 				ERROR "input record `%.30s...' too long; try -mr n", buf FATAL;
172 		if (**RS == sep || c == EOF)
173 			break;
174 		if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
175 			break;
176 		*rr++ = '\n';
177 		*rr++ = c;
178 	}
179 	if (rr > buf + bufsize)
180 		ERROR "input record `%.30s...' too long; try -mr n", buf FATAL;
181 	*rr = 0;
182 	dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
183 	return c == EOF && rr == buf ? 0 : 1;
184 }
185 
186 char *getargv(int n)	/* get ARGV[n] */
187 {
188 	Cell *x;
189 	char *s, temp[10];
190 	extern Array *ARGVtab;
191 
192 	sprintf(temp, "%d", n);
193 	x = setsymtab(temp, "", 0.0, STR, ARGVtab);
194 	s = getsval(x);
195 	dprintf( ("getargv(%d) returns |%s|\n", n, s) );
196 	return s;
197 }
198 
199 void setclvar(char *s)	/* set var=value from s */
200 {
201 	char *p;
202 	Cell *q;
203 
204 	for (p=s; *p != '='; p++)
205 		;
206 	*p++ = 0;
207 	p = qstring(p, '\0');
208 	q = setsymtab(s, p, 0.0, STR, symtab);
209 	setsval(q, p);
210 	if (isnumber(q->sval)) {
211 		q->fval = atof(q->sval);
212 		q->tval |= NUM;
213 	}
214 	dprintf( ("command line set %s to |%s|\n", s, p) );
215 }
216 
217 
218 void fldbld(void)	/* create fields from current record */
219 {
220 	char *r, *fr, sep;
221 	Cell *p;
222 	int i;
223 
224 	if (donefld)
225 		return;
226 	if (!(recloc->tval & STR))
227 		getsval(recloc);
228 	r = recloc->sval;
229 	fr = fields;
230 	i = 0;	/* number of fields accumulated here */
231 	if (strlen(*FS) > 1) {	/* it's a regular expression */
232 		i = refldbld(r, *FS);
233 	} else if ((sep = **FS) == ' ') {	/* default whitespace */
234 		for (i = 0; ; ) {
235 			while (*r == ' ' || *r == '\t' || *r == '\n')
236 				r++;
237 			if (*r == 0)
238 				break;
239 			i++;
240 			if (i >= nfields)
241 				break;
242 			if (!(fldtab[i].tval & DONTFREE))
243 				xfree(fldtab[i].sval);
244 			fldtab[i].sval = fr;
245 			fldtab[i].tval = FLD | STR | DONTFREE;
246 			do
247 				*fr++ = *r++;
248 			while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0');
249 			*fr++ = 0;
250 		}
251 		*fr = 0;
252 	} else if ((sep = **FS) == 0) {		/* new: FS="" => 1 char/field */
253 		for (i = 0; *r != 0; r++) {
254 			char buf[2];
255 			i++;
256 			if (i >= nfields)
257 				break;
258 			if (!(fldtab[i].tval & DONTFREE))
259 				xfree(fldtab[i].sval);
260 			buf[0] = *r;
261 			buf[1] = 0;
262 			fldtab[i].sval = tostring(buf);
263 			fldtab[i].tval = FLD | STR;
264 		}
265 		*fr = 0;
266 	} else if (*r != 0) {	/* if 0, it's a null field */
267 		for (;;) {
268 			i++;
269 			if (i >= nfields)
270 				break;
271 			if (!(fldtab[i].tval & DONTFREE))
272 				xfree(fldtab[i].sval);
273 			fldtab[i].sval = fr;
274 			fldtab[i].tval = FLD | STR | DONTFREE;
275 			while (*r != sep && *r != '\n' && *r != '\0')	/* \n is always a separator */
276 				*fr++ = *r++;
277 			*fr++ = 0;
278 			if (*r++ == 0)
279 				break;
280 		}
281 		*fr = 0;
282 	}
283 	if (i >= nfields)
284 		ERROR "record `%.30s...' has too many fields; try -mf n", record FATAL;
285 	/* clean out junk from previous record */
286 	cleanfld(i, maxfld);
287 	maxfld = i;
288 	donefld = 1;
289 	for (p = fldtab+1; p <= fldtab+maxfld; p++) {
290 		if(isnumber(p->sval)) {
291 			p->fval = atof(p->sval);
292 			p->tval |= NUM;
293 		}
294 	}
295 	setfval(nfloc, (Awkfloat) maxfld);
296 	if (dbg)
297 		for (p = fldtab; p <= fldtab+maxfld; p++)
298 			printf("field %d: |%s|\n", p-fldtab, p->sval);
299 }
300 
301 void cleanfld(int n1, int n2)	/* clean out fields n1..n2 inclusive */
302 {
303 	static char *nullstat = "";
304 	Cell *p, *q;
305 
306 	for (p = &fldtab[n2], q = &fldtab[n1]; p > q; p--) {
307 		if (!(p->tval & DONTFREE))
308 			xfree(p->sval);
309 		p->tval = FLD | STR | DONTFREE;
310 		p->sval = nullstat;
311 	}
312 }
313 
314 void newfld(int n)	/* add field n (after end) */
315 {
316 	if (n >= nfields)
317 		ERROR "creating too many fields (%d); try -mf n", n FATAL;
318 	cleanfld(maxfld, n);
319 	maxfld = n;
320 	setfval(nfloc, (Awkfloat) n);
321 }
322 
323 int refldbld(char *rec, char *fs)	/* build fields from reg expr in FS */
324 {
325 	char *fr;
326 	int i, tempstat;
327 	fa *pfa;
328 
329 	fr = fields;
330 	*fr = '\0';
331 	if (*rec == '\0')
332 		return 0;
333 	pfa = makedfa(fs, 1);
334 	dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) );
335 	tempstat = pfa->initstat;
336 	for (i = 1; i < nfields; i++) {
337 		if (!(fldtab[i].tval & DONTFREE))
338 			xfree(fldtab[i].sval);
339 		fldtab[i].tval = FLD | STR | DONTFREE;
340 		fldtab[i].sval = fr;
341 		dprintf( ("refldbld: i=%d\n", i) );
342 		if (nematch(pfa, rec)) {
343 			pfa->initstat = 2;	/* horrible coupling */
344 			dprintf( ("match %s (%d chars)\n", patbeg, patlen) );
345 			strncpy(fr, rec, patbeg-rec);
346 			fr += patbeg - rec + 1;
347 			*(fr-1) = '\0';
348 			rec = patbeg + patlen;
349 		} else {
350 			dprintf( ("no match %s\n", rec) );
351 			strcpy(fr, rec);
352 			pfa->initstat = tempstat;
353 			break;
354 		}
355 	}
356 	return i;
357 }
358 
359 void recbld(void)	/* create $0 from $1..$NF if necessary */
360 {
361 	int i;
362 	char *r, *p;
363 	static char *rec = 0;
364 
365 	if (donerec == 1)
366 		return;
367 	if (rec == 0) {
368 		rec = (char *) malloc(recsize);
369 		if (rec == 0)
370 			ERROR "out of space building $0, record size %d", recsize FATAL;
371 	}
372 	r = rec;
373 	for (i = 1; i <= *NF; i++) {
374 		p = getsval(&fldtab[i]);
375 		while (r < rec+recsize-1 && (*r = *p++))
376 			r++;
377 		if (i < *NF)
378 			for (p = *OFS; r < rec+recsize-1 && (*r = *p++); )
379 				r++;
380 	}
381 	if (r > rec + recsize - 1)
382 		ERROR "built giant record `%.30s...'; try -mr n", record FATAL;
383 	*r = '\0';
384 	dprintf( ("in recbld FS=%o, recloc=%p\n", **FS, recloc) );
385 	recloc->tval = REC | STR | DONTFREE;
386 	recloc->sval = record = rec;
387 	dprintf( ("in recbld FS=%o, recloc=%p\n", **FS, recloc) );
388 	dprintf( ("recbld = |%s|\n", record) );
389 	donerec = 1;
390 }
391 
392 Cell *fieldadr(int n)
393 {
394 	if (n < 0 || n >= nfields)
395 		ERROR "trying to access field %d; try -mf n", n FATAL;
396 	return(&fldtab[n]);
397 }
398 
399 int	errorflag	= 0;
400 char	errbuf[200];
401 
402 void yyerror(char *s)
403 {
404 	extern char *cmdname, *curfname;
405 	static int been_here = 0;
406 
407 	if (been_here++ > 2)
408 		return;
409 	fprintf(stderr, "%s: %s", cmdname, s);
410 	fprintf(stderr, " at source line %d", lineno);
411 	if (curfname != NULL)
412 		fprintf(stderr, " in function %s", curfname);
413 	fprintf(stderr, "\n");
414 	errorflag = 2;
415 	eprint();
416 }
417 
418 void fpecatch(int n)
419 {
420 	ERROR "floating point exception %d", n FATAL;
421 }
422 
423 extern int bracecnt, brackcnt, parencnt;
424 
425 void bracecheck(void)
426 {
427 	int c;
428 	static int beenhere = 0;
429 
430 	if (beenhere++)
431 		return;
432 	while ((c = lex_input()) != EOF && c != '\0')
433 		bclass(c);
434 	bcheck2(bracecnt, '{', '}');
435 	bcheck2(brackcnt, '[', ']');
436 	bcheck2(parencnt, '(', ')');
437 }
438 
439 void bcheck2(int n, int c1, int c2)
440 {
441 	if (n == 1)
442 		fprintf(stderr, "\tmissing %c\n", c2);
443 	else if (n > 1)
444 		fprintf(stderr, "\t%d missing %c's\n", n, c2);
445 	else if (n == -1)
446 		fprintf(stderr, "\textra %c\n", c2);
447 	else if (n < -1)
448 		fprintf(stderr, "\t%d extra %c's\n", -n, c2);
449 }
450 
451 void error(int f, char *s)
452 {
453 	extern Node *curnode;
454 	extern char *cmdname;
455 
456 	fflush(stdout);
457 	fprintf(stderr, "%s: ", cmdname);
458 	fprintf(stderr, "%s", s);
459 	fprintf(stderr, "\n");
460 	if (compile_time != 2 && NR && *NR > 0) {
461 		fprintf(stderr, " input record number %d", (int) (*FNR));
462 		if (strcmp(*FILENAME, "-") != 0)
463 			fprintf(stderr, ", file %s", *FILENAME);
464 		fprintf(stderr, "\n");
465 	}
466 	if (compile_time != 2 && curnode)
467 		fprintf(stderr, " source line number %d\n", curnode->lineno);
468 	else if (compile_time != 2 && lineno)
469 		fprintf(stderr, " source line number %d\n", lineno);
470 	eprint();
471 	if (f) {
472 		if (dbg > 1)		/* core dump if serious debugging on */
473 			abort();
474 		exit(2);
475 	}
476 }
477 
478 void eprint(void)	/* try to print context around error */
479 {
480 #if 0
481 	char *p, *q;
482 	int c;
483 	static int been_here = 0;
484 	extern char ebuf[], *ep;
485 
486 	if (compile_time == 2 || compile_time == 0 || been_here++ > 0)
487 		return;
488 	p = ep - 1;
489 	if (p > ebuf && *p == '\n')
490 		p--;
491 	for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--)
492 		;
493 	while (*p == '\n')
494 		p++;
495 	fprintf(stderr, " context is\n\t");
496 	for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
497 		;
498 	for ( ; p < q; p++)
499 		if (*p)
500 			putc(*p, stderr);
501 	fprintf(stderr, " >>> ");
502 	for ( ; p < ep; p++)
503 		if (*p)
504 			putc(*p, stderr);
505 	fprintf(stderr, " <<< ");
506 	if (*ep)
507 		while ((c = input()) != '\n' && c != '\0' && c != EOF) {
508 			putc(c, stderr);
509 			bclass(c);
510 		}
511 	putc('\n', stderr);
512 	ep = ebuf;
513 #endif
514 }
515 
516 void bclass(int c)
517 {
518 	switch (c) {
519 	case '{': bracecnt++; break;
520 	case '}': bracecnt--; break;
521 	case '[': brackcnt++; break;
522 	case ']': brackcnt--; break;
523 	case '(': parencnt++; break;
524 	case ')': parencnt--; break;
525 	}
526 }
527 
528 double errcheck(double x, char *s)
529 {
530 	extern int errno;
531 
532 	if (errno == EDOM) {
533 		errno = 0;
534 		ERROR "%s argument out of domain", s WARNING;
535 		x = 1;
536 	} else if (errno == ERANGE) {
537 		errno = 0;
538 		ERROR "%s result out of range", s WARNING;
539 		x = 1;
540 	}
541 	return x;
542 }
543 
544 int isclvar(char *s)	/* is s of form var=something ? */
545 {
546 	char *os = s;
547 
548 	if (!isalpha(*s) && *s != '_')
549 		return 0;
550 	for ( ; *s; s++)
551 		if (!(isalnum(*s) || *s == '_'))
552 			break;
553 	return *s == '=' && s > os && *(s+1) != '=';
554 }
555 
556 #define	MAXEXPON	38	/* maximum exponent for fp number. should be IEEE */
557 
558 int isnumber(char *s)	/* should be done by a library function */
559 {
560 	int d1, d2;
561 	int point;
562 	char *es;
563 
564 	d1 = d2 = point = 0;
565 	while (*s == ' ' || *s == '\t' || *s == '\n')
566 		s++;
567 	if (*s == '\0')
568 		return(0);	/* empty stuff isn't a number */
569 	if (*s == '+' || *s == '-')
570 		s++;
571 	if (!isdigit(*s) && *s != '.')
572 		return(0);
573 	if (isdigit(*s)) {
574 		do {
575 			d1++;
576 			s++;
577 		} while (isdigit(*s));
578 	}
579 	if (*s == '.') {
580 		point++;
581 		s++;
582 	}
583 	if (isdigit(*s)) {
584 		d2++;
585 		do {
586 			s++;
587 		} while (isdigit(*s));
588 	}
589 	if (!(d1 || (point && d2)))
590 		return(0);
591 	if (*s == 'e' || *s == 'E') {
592 		s++;
593 		if (*s == '+' || *s == '-')
594 			s++;
595 		if (!isdigit(*s))
596 			return(0);
597 		es = s;
598 		do {
599 			s++;
600 		} while (isdigit(*s));
601 		if (s - es > 2)
602 			return(0);
603 		else if (s - es == 2 && (int)(10 * (*es-'0') + *(es+1)-'0') >= MAXEXPON)
604 			return(0);
605 	}
606 	while (*s == ' ' || *s == '\t' || *s == '\n')
607 		s++;
608 	if (*s == '\0')
609 		return(1);
610 	else
611 		return(0);
612 }
613 
614 #if 0
615 	/* THIS IS AN EXPERIMENT THAT'S NOT DONE. */
616 	/* strtod ought to provide a better test of what's */
617 	/* a valid number, but it doesn't work according to */
618 	/* the standard on any machine near me! */
619 
620 	#include <math.h>
621 	isnumber(char *s)
622 	{
623 		double r;
624 		char *ep;
625 		errno = 0;
626 		r = strtod(s, &ep);
627 		if (r == HUGE_VAL || errno == ERANGE)
628 			return 0;
629 		while (*ep == ' ' || *ep == '\t' || *ep == '\n')
630 			ep++;
631 		if (*ep == '\0')
632 			return 1;
633 		else
634 			return 0;
635 	}
636 #endif
637