xref: /openbsd-src/usr.bin/awk/main.c (revision f6aab3d83b51b91c24247ad2c2573574de475a82)
1 /*	$OpenBSD: main.c,v 1.64 2023/10/31 01:08:51 millert Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 const char	*version = "version 20231030";
27 
28 #define DEBUG
29 #include <stdio.h>
30 #include <ctype.h>
31 #include <locale.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <signal.h>
35 #include <unistd.h>
36 #include "awk.h"
37 
38 extern	char	*__progname;
39 extern	char	**environ;
40 extern	int	nfields;
41 
42 int	dbg	= 0;
43 Awkfloat	srand_seed = 1;
44 char	*cmdname;	/* gets argv[0] for error messages */
45 extern	FILE	*yyin;	/* lex input file */
46 char	*lexprog;	/* points to program argument if it exists */
47 extern	int errorflag;	/* non-zero if any syntax errors; set by yyerror */
48 enum compile_states	compile_time = ERROR_PRINTING;
49 
50 static char	**pfile;	/* program filenames from -f's */
51 static size_t	maxpfile;	/* max program filename */
52 static size_t	npfile;		/* number of filenames */
53 static size_t	curpfile;	/* current filename */
54 
55 bool	CSV = false;		/* true for csv input */
56 bool	safe = false;		/* true => "safe" mode */
57 bool	do_posix = false;	/* true => POSIX mode */
58 
59 size_t	awk_mb_cur_max = 1;
60 
61 static noreturn void fpecatch(int n
62 #ifdef SA_SIGINFO
63 	, siginfo_t *si, void *uc
64 #endif
65 )
66 {
67 	extern Node *curnode;
68 #ifdef SA_SIGINFO
69 	static const char *emsg[] = {
70 		[0] = "Unknown error",
71 		[FPE_INTDIV] = "Integer divide by zero",
72 		[FPE_INTOVF] = "Integer overflow",
73 		[FPE_FLTDIV] = "Floating point divide by zero",
74 		[FPE_FLTOVF] = "Floating point overflow",
75 		[FPE_FLTUND] = "Floating point underflow",
76 		[FPE_FLTRES] = "Floating point inexact result",
77 		[FPE_FLTINV] = "Invalid Floating point operation",
78 		[FPE_FLTSUB] = "Subscript out of range",
79 	};
80 #endif
81 	dprintf(STDERR_FILENO, "floating point exception%s%s\n",
82 #ifdef SA_SIGINFO
83 		": ", (size_t)si->si_code < sizeof(emsg) / sizeof(emsg[0]) &&
84 		emsg[si->si_code] ? emsg[si->si_code] : emsg[0]
85 #else
86 		"", ""
87 #endif
88 	    );
89 
90 	if (compile_time != 2 && NR && *NR > 0) {
91 		dprintf(STDERR_FILENO, " input record number %d", (int) (*FNR));
92 		if (strcmp(*FILENAME, "-") != 0) {
93 			dprintf(STDERR_FILENO, ", file %s", *FILENAME);
94 		}
95 		dprintf(STDERR_FILENO, "\n");
96 	}
97 	if (compile_time != 2 && curnode) {
98 		dprintf(STDERR_FILENO, " source line number %d", curnode->lineno);
99 	} else if (compile_time != 2 && lineno) {
100 		dprintf(STDERR_FILENO, " source line number %d", lineno);
101 	}
102 	if (compile_time == 1 && cursource() != NULL) {
103 		dprintf(STDERR_FILENO, " source file %s", cursource());
104 	}
105 	dprintf(STDERR_FILENO, "\n");
106 	if (dbg > 1)		/* core dump if serious debugging on */
107 		abort();
108 	_exit(2);
109 }
110 
111 static const char *
112 setfs(char *p)
113 {
114 	/* wart: t=>\t */
115 	if (p[0] == 't' && p[1] == '\0')
116 		return "\t";
117 	return p;
118 }
119 
120 static char *
121 getarg(int *argc, char ***argv, const char *msg)
122 {
123 	if ((*argv)[1][2] != '\0') {	/* arg is -fsomething */
124 		return &(*argv)[1][2];
125 	} else {			/* arg is -f something */
126 		(*argc)--; (*argv)++;
127 		if (*argc <= 1)
128 			FATAL("%s", msg);
129 		return (*argv)[1];
130 	}
131 }
132 
133 int main(int argc, char *argv[])
134 {
135 	const char *fs = NULL;
136 	char *fn, *vn;
137 
138 	setlocale(LC_CTYPE, "");
139 	setlocale(LC_NUMERIC, "C"); /* for parsing cmdline & prog */
140 	awk_mb_cur_max = MB_CUR_MAX;
141 	cmdname = __progname;
142 
143 	if (pledge("stdio rpath wpath cpath proc exec", NULL) == -1) {
144 		fprintf(stderr, "%s: pledge: incorrect arguments\n",
145 		    cmdname);
146 		exit(1);
147 	}
148 
149 	if (argc == 1) {
150 		fprintf(stderr, "usage: %s [-safe] [-V] [-d[n]] "
151 		    "[-f fs | --csv] [-v var=value]\n"
152 		    "\t   [prog | -f progfile] file ...\n", cmdname);
153 		return 1;
154 	}
155 #ifdef SA_SIGINFO
156 	{
157 		struct sigaction sa;
158 		sa.sa_sigaction = fpecatch;
159 		sa.sa_flags = SA_SIGINFO;
160 		sigemptyset(&sa.sa_mask);
161 		(void)sigaction(SIGFPE, &sa, NULL);
162 	}
163 #else
164 	(void)signal(SIGFPE, fpecatch);
165 #endif
166 
167 	do_posix = (getenv("POSIXLY_CORRECT") != NULL);
168 
169 	yyin = NULL;
170 	symtab = makesymtab(NSYMTAB);
171 	while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') {
172 		if (strcmp(argv[1], "--version") == 0) {
173 			printf("awk %s\n", version);
174 			return 0;
175 		}
176 		if (strcmp(argv[1], "--") == 0) {	/* explicit end of args */
177 			argc--;
178 			argv++;
179 			break;
180 		}
181 		if (strcmp(argv[1], "--csv") == 0) {	/* turn on csv input processing */
182 			CSV = true;
183 			argc--;
184 			argv++;
185 			continue;
186 		}
187 		switch (argv[1][1]) {
188 		case 's':
189 			if (strcmp(argv[1], "-safe") == 0)
190 				safe = true;
191 			break;
192 		case 'f':	/* next argument is program filename */
193 			fn = getarg(&argc, &argv, "no program filename");
194 			if (npfile >= maxpfile) {
195 				maxpfile += 20;
196 				pfile = (char **) reallocarray(pfile, maxpfile, sizeof(*pfile));
197 				if (pfile == NULL)
198 					FATAL("error allocating space for -f options");
199  			}
200 			pfile[npfile++] = fn;
201  			break;
202 		case 'F':	/* set field separator */
203 			fs = setfs(getarg(&argc, &argv, "no field separator"));
204 			break;
205 		case 'v':	/* -v a=1 to be done NOW.  one -v for each */
206 			vn = getarg(&argc, &argv, "no variable name");
207 			if (isclvar(vn))
208 				setclvar(vn);
209 			else
210 				FATAL("invalid -v option argument: %s", vn);
211 			break;
212 		case 'd':
213 			dbg = atoi(&argv[1][2]);
214 			if (dbg == 0)
215 				dbg = 1;
216 			printf("awk %s\n", version);
217 			break;
218 		case 'V':
219 			printf("awk %s\n", version);
220 			return 0;
221 		default:
222 			WARNING("unknown option %s ignored", argv[1]);
223 			break;
224 		}
225 		argc--;
226 		argv++;
227 	}
228 
229 	if (safe) {
230 		if (pledge("stdio rpath", NULL) == -1) {
231 			fprintf(stderr, "%s: pledge: incorrect arguments\n",
232 			    cmdname);
233 			exit(1);
234 		}
235 	}
236 
237 	/* argv[1] is now the first argument */
238 	if (npfile == 0) {	/* no -f; first argument is program */
239 		if (argc <= 1) {
240 			if (dbg)
241 				exit(0);
242 			FATAL("no program given");
243 		}
244 		DPRINTF("program = |%s|\n", argv[1]);
245 		lexprog = argv[1];
246 		argc--;
247 		argv++;
248 	}
249 	recinit(recsize);
250 	syminit();
251 	compile_time = COMPILING;
252 	argv[0] = cmdname;	/* put prog name at front of arglist */
253 	DPRINTF("argc=%d, argv[0]=%s\n", argc, argv[0]);
254 	arginit(argc, argv);
255 	if (!safe)
256 		envinit(environ);
257 	yyparse();
258 #if 0
259 	// Doing this would comply with POSIX, but is not compatible with
260 	// other awks and with what most users expect. So comment it out.
261 	setlocale(LC_NUMERIC, ""); /* back to whatever it is locally */
262 #endif
263 	if (fs)
264 		*FS = qstring(fs, '\0');
265 	DPRINTF("errorflag=%d\n", errorflag);
266 	if (errorflag == 0) {
267 		compile_time = RUNNING;
268 		run(winner);
269 	} else
270 		bracecheck();
271 	return(errorflag);
272 }
273 
274 int pgetc(void)		/* get 1 character from awk program */
275 {
276 	int c;
277 
278 	for (;;) {
279 		if (yyin == NULL) {
280 			if (curpfile >= npfile)
281 				return EOF;
282 			if (strcmp(pfile[curpfile], "-") == 0)
283 				yyin = stdin;
284 			else if ((yyin = fopen(pfile[curpfile], "r")) == NULL)
285 				FATAL("can't open file %s", pfile[curpfile]);
286 			lineno = 1;
287 		}
288 		if ((c = getc(yyin)) != EOF)
289 			return c;
290 		if (yyin != stdin)
291 			fclose(yyin);
292 		yyin = NULL;
293 		curpfile++;
294 	}
295 }
296 
297 char *cursource(void)	/* current source file name */
298 {
299 	if (npfile > 0)
300 		return pfile[curpfile < npfile ? curpfile : curpfile - 1];
301 	else
302 		return NULL;
303 }
304