xref: /openbsd-src/usr.bin/awk/lib.c (revision 62a742911104f98b9185b2c6b6007d9b1c36396c)
1 /*	$OpenBSD: lib.c,v 1.6 1999/04/20 17:31:29 millert Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 #define DEBUG
27 #include <stdio.h>
28 #include <string.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <stdlib.h>
32 #include "awk.h"
33 #include "ytab.h"
34 
35 FILE	*infile	= NULL;
36 char	*file	= "";
37 char	*record;
38 int	recsize	= RECSIZE;
39 char	*fields;
40 int	fieldssize = RECSIZE;
41 
42 Cell	**fldtab;	/* pointers to Cells */
43 char	inputFS[100] = " ";
44 
45 #define	MAXFLD	200
46 int	nfields	= MAXFLD;	/* last allocated slot for $i */
47 
48 int	donefld;	/* 1 = implies rec broken into fields */
49 int	donerec;	/* 1 = record is valid (no flds have changed) */
50 
51 int	lastfld	= 0;	/* last used field */
52 int	argno	= 1;	/* current input argument number */
53 extern	Awkfloat *ARGC;
54 
55 static Cell dollar0 = { OCELL, CFLD, NULL, "", 0.0, REC|STR|DONTFREE };
56 static Cell dollar1 = { OCELL, CFLD, NULL, "", 0.0, FLD|STR|DONTFREE };
57 
58 void recinit(unsigned int n)
59 {
60 	record = (char *) malloc(n);
61 	fields = (char *) malloc(n);
62 	fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *));
63 	if (record == NULL || fields == NULL || fldtab == NULL)
64 		ERROR "out of space for $0 and fields" FATAL;
65 
66 	fldtab[0] = (Cell *) malloc(sizeof (Cell));
67 	*fldtab[0] = dollar0;
68 	fldtab[0]->sval = record;
69 	fldtab[0]->nval = tostring("0");
70 	makefields(1, nfields);
71 }
72 
73 void makefields(int n1, int n2)		/* create $n1..$n2 inclusive */
74 {
75 	char temp[50];
76 	int i;
77 
78 	for (i = n1; i <= n2; i++) {
79 		fldtab[i] = (Cell *) malloc(sizeof (struct Cell));
80 		if (fldtab[i] == NULL)
81 			ERROR "out of space in makefields %d", i FATAL;
82 		*fldtab[i] = dollar1;
83 		sprintf(temp, "%d", i);
84 		fldtab[i]->nval = tostring(temp);
85 	}
86 }
87 
88 void initgetrec(void)
89 {
90 	int i;
91 	char *p;
92 
93 	for (i = 1; i < *ARGC; i++) {
94 		if (!isclvar(p = getargv(i))) {	/* find 1st real filename */
95 			setsval(lookup("FILENAME", symtab), getargv(i));
96 			return;
97 		}
98 		setclvar(p);	/* a commandline assignment before filename */
99 		argno++;
100 	}
101 	infile = stdin;		/* no filenames, so use stdin */
102 }
103 
104 int getrec(char **pbuf, int *pbufsize, int isrecord)	/* get next input record */
105 {			/* note: cares whether buf == record */
106 	int c;
107 	static int firsttime = 1;
108 	char *buf = *pbuf;
109 	int bufsize = *pbufsize;
110 
111 	if (firsttime) {
112 		firsttime = 0;
113 		initgetrec();
114 	}
115 	   dprintf( ("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
116 		*RS, *FS, *ARGC, *FILENAME) );
117 	if (isrecord) {
118 		donefld = 0;
119 		donerec = 1;
120 	}
121 	buf[0] = 0;
122 	while (argno < *ARGC || infile == stdin) {
123 		   dprintf( ("argno=%d, file=|%s|\n", argno, file) );
124 		if (infile == NULL) {	/* have to open a new file */
125 			file = getargv(argno);
126 			if (*file == '\0') {	/* it's been zapped */
127 				argno++;
128 				continue;
129 			}
130 			if (isclvar(file)) {	/* a var=value arg */
131 				setclvar(file);
132 				argno++;
133 				continue;
134 			}
135 			*FILENAME = file;
136 			   dprintf( ("opening file %s\n", file) );
137 			if (*file == '-' && *(file+1) == '\0')
138 				infile = stdin;
139 			else if ((infile = fopen(file, "r")) == NULL)
140 				ERROR "can't open file %s", file FATAL;
141 			setfval(fnrloc, 0.0);
142 		}
143 		c = readrec(&buf, &bufsize, infile);
144 		if (c != 0 || buf[0] != '\0') {	/* normal record */
145 			if (isrecord) {
146 				if (freeable(fldtab[0]))
147 					xfree(fldtab[0]->sval);
148 				fldtab[0]->sval = buf;	/* buf == record */
149 				fldtab[0]->tval = REC | STR | DONTFREE;
150 				if (is_number(fldtab[0]->sval)) {
151 					fldtab[0]->fval = atof(fldtab[0]->sval);
152 					fldtab[0]->tval |= NUM;
153 				}
154 			}
155 			setfval(nrloc, nrloc->fval+1);
156 			setfval(fnrloc, fnrloc->fval+1);
157 			*pbuf = buf;
158 			*pbufsize = bufsize;
159 			return 1;
160 		}
161 		/* EOF arrived on this file; set up next */
162 		if (infile != stdin)
163 			fclose(infile);
164 		infile = NULL;
165 		argno++;
166 	}
167 	*pbuf = buf;
168 	*pbufsize = bufsize;
169 	return 0;	/* true end of file */
170 }
171 
172 void nextfile(void)
173 {
174 	if (infile != stdin)
175 		fclose(infile);
176 	infile = NULL;
177 	argno++;
178 }
179 
180 int readrec(char **pbuf, int *pbufsize, FILE *inf)	/* read one record into buf */
181 {
182 	int sep, c;
183 	char *rr, *buf = *pbuf;
184 	int bufsize = *pbufsize;
185 
186 	if (strlen(*FS) >= sizeof(inputFS))
187 		ERROR "field separator %.10s... is too long", *FS FATAL;
188 	strcpy(inputFS, *FS);	/* for subsequent field splitting */
189 	if ((sep = **RS) == 0) {
190 		sep = '\n';
191 		while ((c=getc(inf)) == '\n' && c != EOF)	/* skip leading \n's */
192 			;
193 		if (c != EOF)
194 			ungetc(c, inf);
195 	}
196 	for (rr = buf; ; ) {
197 		for (; (c=getc(inf)) != sep && c != EOF; ) {
198 			if (rr-buf+1 > bufsize)
199 				if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 1"))
200 					ERROR "input record `%.30s...' too long", buf FATAL;
201 			*rr++ = c;
202 		}
203 		if (**RS == sep || c == EOF)
204 			break;
205 		if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
206 			break;
207 		if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, "readrec 2"))
208 			ERROR "input record `%.30s...' too long", buf FATAL;
209 		*rr++ = '\n';
210 		*rr++ = c;
211 	}
212 	if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
213 		ERROR "input record `%.30s...' too long", buf FATAL;
214 	*rr = 0;
215 	   dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
216 	*pbuf = buf;
217 	*pbufsize = bufsize;
218 	return c == EOF && rr == buf ? 0 : 1;
219 }
220 
221 char *getargv(int n)	/* get ARGV[n] */
222 {
223 	Cell *x;
224 	char *s, temp[50];
225 	extern Array *ARGVtab;
226 
227 	sprintf(temp, "%d", n);
228 	x = setsymtab(temp, "", 0.0, STR, ARGVtab);
229 	s = getsval(x);
230 	   dprintf( ("getargv(%d) returns |%s|\n", n, s) );
231 	return s;
232 }
233 
234 void setclvar(char *s)	/* set var=value from s */
235 {
236 	char *p;
237 	Cell *q;
238 
239 	for (p=s; *p != '='; p++)
240 		;
241 	*p++ = 0;
242 	p = qstring(p, '\0');
243 	q = setsymtab(s, p, 0.0, STR, symtab);
244 	setsval(q, p);
245 	if (is_number(q->sval)) {
246 		q->fval = atof(q->sval);
247 		q->tval |= NUM;
248 	}
249 	   dprintf( ("command line set %s to |%s|\n", s, p) );
250 }
251 
252 
253 void fldbld(void)	/* create fields from current record */
254 {
255 	/* this relies on having fields[] the same length as $0 */
256 	/* the fields are all stored in this one array with \0's */
257 	char *r, *fr, sep;
258 	Cell *p;
259 	int i, j, n;
260 
261 	if (donefld)
262 		return;
263 	if (!isstr(fldtab[0]))
264 		getsval(fldtab[0]);
265 	r = fldtab[0]->sval;
266 	n = strlen(r);
267 	if (n > fieldssize) {
268 		xfree(fields);
269 		if ((fields = (char *) malloc(n+1)) == NULL)
270 			ERROR "out of space for fields in fldbld %d", n FATAL;
271 		fieldssize = n;
272 	}
273 	fr = fields;
274 	i = 0;	/* number of fields accumulated here */
275 	if (strlen(inputFS) > 1) {	/* it's a regular expression */
276 		i = refldbld(r, inputFS);
277 	} else if ((sep = *inputFS) == ' ') {	/* default whitespace */
278 		for (i = 0; ; ) {
279 			while (*r == ' ' || *r == '\t' || *r == '\n')
280 				r++;
281 			if (*r == 0)
282 				break;
283 			i++;
284 			if (i > nfields)
285 				growfldtab(i);
286 			if (freeable(fldtab[i]))
287 				xfree(fldtab[i]->sval);
288 			fldtab[i]->sval = fr;
289 			fldtab[i]->tval = FLD | STR | DONTFREE;
290 			do
291 				*fr++ = *r++;
292 			while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0');
293 			*fr++ = 0;
294 		}
295 		*fr = 0;
296 	} else if ((sep = *inputFS) == 0) {		/* new: FS="" => 1 char/field */
297 		for (i = 0; *r != 0; r++) {
298 			char buf[2];
299 			i++;
300 			if (i > nfields)
301 				growfldtab(i);
302 			if (freeable(fldtab[i]))
303 				xfree(fldtab[i]->sval);
304 			buf[0] = *r;
305 			buf[1] = 0;
306 			fldtab[i]->sval = tostring(buf);
307 			fldtab[i]->tval = FLD | STR;
308 		}
309 		*fr = 0;
310 	} else if (*r != 0) {	/* if 0, it's a null field */
311 		for (;;) {
312 			i++;
313 			if (i > nfields)
314 				growfldtab(i);
315 			if (freeable(fldtab[i]))
316 				xfree(fldtab[i]->sval);
317 			fldtab[i]->sval = fr;
318 			fldtab[i]->tval = FLD | STR | DONTFREE;
319 			while (*r != sep && *r != '\n' && *r != '\0')	/* \n is always a separator */
320 				*fr++ = *r++;
321 			*fr++ = 0;
322 			if (*r++ == 0)
323 				break;
324 		}
325 		*fr = 0;
326 	}
327 	if (i > nfields)
328 		ERROR "record `%.30s...' has too many fields; can't happen", r FATAL;
329 	cleanfld(i+1, lastfld);	/* clean out junk from previous record */
330 	lastfld = i;
331 	donefld = 1;
332 	for (j = 1; j <= lastfld; j++) {
333 		p = fldtab[j];
334 		if(is_number(p->sval)) {
335 			p->fval = atof(p->sval);
336 			p->tval |= NUM;
337 		}
338 	}
339 	setfval(nfloc, (Awkfloat) lastfld);
340 	if (dbg) {
341 		for (j = 0; j <= lastfld; j++) {
342 			p = fldtab[j];
343 			printf("field %d (%s): |%s|\n", j, p->nval, p->sval);
344 		}
345 	}
346 }
347 
348 void cleanfld(int n1, int n2)	/* clean out fields n1 .. n2 inclusive */
349 {				/* nvals remain intact */
350 	Cell *p;
351 	int i;
352 
353 	for (i = n1; i <= n2; i++) {
354 		p = fldtab[i];
355 		if (freeable(p))
356 			xfree(p->sval);
357 		p->sval = "";
358 		p->tval = FLD | STR | DONTFREE;
359 	}
360 }
361 
362 void newfld(int n)	/* add field n after end of existing lastfld */
363 {
364 	if (n > nfields)
365 		growfldtab(n);
366 	cleanfld(lastfld+1, n);
367 	lastfld = n;
368 	setfval(nfloc, (Awkfloat) n);
369 }
370 
371 Cell *fieldadr(int n)	/* get nth field */
372 {
373 	if (n < 0)
374 		ERROR "trying to access field %d", n FATAL;
375 	if (n > nfields)	/* fields after NF are empty */
376 		growfldtab(n);	/* but does not increase NF */
377 	return(fldtab[n]);
378 }
379 
380 void growfldtab(int n)	/* make new fields up to at least $n */
381 {
382 	int nf = 2 * nfields;
383 
384 	if (n > nf)
385 		nf = n;
386 	fldtab = (Cell **) realloc(fldtab, (nf+1) * (sizeof (struct Cell *)));
387 	if (fldtab == NULL)
388 		ERROR "out of space creating %d fields", nf FATAL;
389 	makefields(nfields+1, nf);
390 	nfields = nf;
391 }
392 
393 int refldbld(char *rec, char *fs)	/* build fields from reg expr in FS */
394 {
395 	/* this relies on having fields[] the same length as $0 */
396 	/* the fields are all stored in this one array with \0's */
397 	char *fr;
398 	int i, tempstat, n;
399 	fa *pfa;
400 
401 	n = strlen(rec);
402 	if (n > fieldssize) {
403 		xfree(fields);
404 		if ((fields = (char *) malloc(n+1)) == NULL)
405 			ERROR "out of space for fields in refldbld %d", n FATAL;
406 		fieldssize = n;
407 	}
408 	fr = fields;
409 	*fr = '\0';
410 	if (*rec == '\0')
411 		return 0;
412 	pfa = makedfa(fs, 1);
413 	   dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) );
414 	tempstat = pfa->initstat;
415 	for (i = 1; ; i++) {
416 		if (i > nfields)
417 			growfldtab(i);
418 		if (freeable(fldtab[i]))
419 			xfree(fldtab[i]->sval);
420 		fldtab[i]->tval = FLD | STR | DONTFREE;
421 		fldtab[i]->sval = fr;
422 		   dprintf( ("refldbld: i=%d\n", i) );
423 		if (nematch(pfa, rec)) {
424 			pfa->initstat = 2;	/* horrible coupling to b.c */
425 			   dprintf( ("match %s (%d chars)\n", patbeg, patlen) );
426 			strncpy(fr, rec, patbeg-rec);
427 			fr += patbeg - rec + 1;
428 			*(fr-1) = '\0';
429 			rec = patbeg + patlen;
430 		} else {
431 			   dprintf( ("no match %s\n", rec) );
432 			strcpy(fr, rec);
433 			pfa->initstat = tempstat;
434 			break;
435 		}
436 	}
437 	return i;
438 }
439 
440 void recbld(void)	/* create $0 from $1..$NF if necessary */
441 {
442 	int i;
443 	char *r, *p;
444 
445 	if (donerec == 1)
446 		return;
447 	r = record;
448 	for (i = 1; i <= *NF; i++) {
449 		p = getsval(fldtab[i]);
450 		if (!adjbuf(&record, &recsize, 1+strlen(p)+r-record, recsize, &r, "recbld 1"))
451 			ERROR "created $0 `%.30s...' too long", record FATAL;
452 		while ((*r = *p++) != 0)
453 			r++;
454 		if (i < *NF) {
455 			if (!adjbuf(&record, &recsize, 2+strlen(*OFS)+r-record, recsize, &r, "recbld 2"))
456 				ERROR "created $0 `%.30s...' too long", record FATAL;
457 			for (p = *OFS; (*r = *p++) != 0; )
458 				r++;
459 		}
460 	}
461 	if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3"))
462 		ERROR "built giant record `%.30s...'", record FATAL;
463 	*r = '\0';
464 	   dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
465 
466 	if (freeable(fldtab[0]))
467 		xfree(fldtab[0]->sval);
468 	fldtab[0]->tval = REC | STR | DONTFREE;
469 	fldtab[0]->sval = record;
470 
471 	   dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
472 	   dprintf( ("recbld = |%s|\n", record) );
473 	donerec = 1;
474 }
475 
476 int	errorflag	= 0;
477 char	errbuf[300];	/* used by ERROR macro */
478 
479 void yyerror(char *s)
480 {
481 	extern char *cmdname, *curfname;
482 	static int been_here = 0;
483 
484 	if (been_here++ > 2)
485 		return;
486 	fprintf(stderr, "%s: %s", cmdname, s);
487 	fprintf(stderr, " at source line %d", lineno);
488 	if (curfname != NULL)
489 		fprintf(stderr, " in function %s", curfname);
490 	if (compile_time == 1 && cursource() != NULL)
491 		fprintf(stderr, " source file %s", cursource());
492 	fprintf(stderr, "\n");
493 	errorflag = 2;
494 	eprint();
495 }
496 
497 void fpecatch(int n)
498 {
499 	ERROR "floating point exception %d", n FATAL;
500 }
501 
502 extern int bracecnt, brackcnt, parencnt;
503 
504 void bracecheck(void)
505 {
506 	int c;
507 	static int beenhere = 0;
508 
509 	if (beenhere++)
510 		return;
511 	while ((c = input()) != EOF && c != '\0')
512 		bclass(c);
513 	bcheck2(bracecnt, '{', '}');
514 	bcheck2(brackcnt, '[', ']');
515 	bcheck2(parencnt, '(', ')');
516 }
517 
518 void bcheck2(int n, int c1, int c2)
519 {
520 	if (n == 1)
521 		fprintf(stderr, "\tmissing %c\n", c2);
522 	else if (n > 1)
523 		fprintf(stderr, "\t%d missing %c's\n", n, c2);
524 	else if (n == -1)
525 		fprintf(stderr, "\textra %c\n", c2);
526 	else if (n < -1)
527 		fprintf(stderr, "\t%d extra %c's\n", -n, c2);
528 }
529 
530 void error(int f, char *s)
531 {
532 	extern Node *curnode;
533 	extern char *cmdname;
534 
535 	fflush(stdout);
536 	fprintf(stderr, "%s: ", cmdname);
537 	fprintf(stderr, "%s", s);
538 	fprintf(stderr, "\n");
539 	if (compile_time != 2 && NR && *NR > 0) {
540 		fprintf(stderr, " input record number %d", (int) (*FNR));
541 		if (strcmp(*FILENAME, "-") != 0)
542 			fprintf(stderr, ", file %s", *FILENAME);
543 		fprintf(stderr, "\n");
544 	}
545 	if (compile_time != 2 && curnode)
546 		fprintf(stderr, " source line number %d", curnode->lineno);
547 	else if (compile_time != 2 && lineno)
548 		fprintf(stderr, " source line number %d", lineno);
549 	if (compile_time == 1 && cursource() != NULL)
550 		fprintf(stderr, " source file %s", cursource());
551 	fprintf(stderr, "\n");
552 	eprint();
553 	if (f) {
554 		if (dbg > 1)		/* core dump if serious debugging on */
555 			abort();
556 		exit(2);
557 	}
558 }
559 
560 void eprint(void)	/* try to print context around error */
561 {
562 	char *p, *q;
563 	int c;
564 	static int been_here = 0;
565 	extern char ebuf[], *ep;
566 
567 	if (compile_time == 2 || compile_time == 0 || been_here++ > 0)
568 		return;
569 	p = ep - 1;
570 	if (p > ebuf && *p == '\n')
571 		p--;
572 	for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--)
573 		;
574 	while (*p == '\n')
575 		p++;
576 	fprintf(stderr, " context is\n\t");
577 	for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
578 		;
579 	for ( ; p < q; p++)
580 		if (*p)
581 			putc(*p, stderr);
582 	fprintf(stderr, " >>> ");
583 	for ( ; p < ep; p++)
584 		if (*p)
585 			putc(*p, stderr);
586 	fprintf(stderr, " <<< ");
587 	if (*ep)
588 		while ((c = input()) != '\n' && c != '\0' && c != EOF) {
589 			putc(c, stderr);
590 			bclass(c);
591 		}
592 	putc('\n', stderr);
593 	ep = ebuf;
594 }
595 
596 void bclass(int c)
597 {
598 	switch (c) {
599 	case '{': bracecnt++; break;
600 	case '}': bracecnt--; break;
601 	case '[': brackcnt++; break;
602 	case ']': brackcnt--; break;
603 	case '(': parencnt++; break;
604 	case ')': parencnt--; break;
605 	}
606 }
607 
608 double errcheck(double x, char *s)
609 {
610 
611 	if (errno == EDOM) {
612 		errno = 0;
613 		ERROR "%s argument out of domain", s WARNING;
614 		x = 1;
615 	} else if (errno == ERANGE) {
616 		errno = 0;
617 		ERROR "%s result out of range", s WARNING;
618 		x = 1;
619 	}
620 	return x;
621 }
622 
623 int isclvar(char *s)	/* is s of form var=something ? */
624 {
625 	char *os = s;
626 
627 	if (!isalpha(*s) && *s != '_')
628 		return 0;
629 	for ( ; *s; s++)
630 		if (!(isalnum(*s) || *s == '_'))
631 			break;
632 	return *s == '=' && s > os && *(s+1) != '=';
633 }
634 
635 /* strtod is supposed to be a proper test of what's a valid number */
636 
637 #include <math.h>
638 int is_number(char *s)
639 {
640 	double r;
641 	char *ep;
642 	errno = 0;
643 	r = strtod(s, &ep);
644 	if (ep == s || r == HUGE_VAL || errno == ERANGE)
645 		return 0;
646 	while (*ep == ' ' || *ep == '\t' || *ep == '\n')
647 		ep++;
648 	if (*ep == '\0')
649 		return 1;
650 	else
651 		return 0;
652 }
653