xref: /openbsd-src/usr.bin/awk/main.c (revision 68dd5bb1859285b71cb62a10bf107b8ad54064d9)
1 /*	$OpenBSD: main.c,v 1.68 2024/01/25 16:40:51 millert Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 const char	*version = "version 20240122";
27 
28 #define DEBUG
29 #include <stdio.h>
30 #include <ctype.h>
31 #include <locale.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <signal.h>
35 #include <unistd.h>
36 #include "awk.h"
37 
38 extern	char	*__progname;
39 extern	char	**environ;
40 extern	int	nfields;
41 
42 int	dbg	= 0;
43 Awkfloat	srand_seed = 1;
44 char	*cmdname;	/* gets argv[0] for error messages */
45 extern	FILE	*yyin;	/* lex input file */
46 char	*lexprog;	/* points to program argument if it exists */
47 extern	int errorflag;	/* non-zero if any syntax errors; set by yyerror */
48 enum compile_states	compile_time = ERROR_PRINTING;
49 
50 static char	**pfile;	/* program filenames from -f's */
51 static size_t	maxpfile;	/* max program filename */
52 static size_t	npfile;		/* number of filenames */
53 static size_t	curpfile;	/* current filename */
54 
55 bool	CSV = false;		/* true for csv input */
56 bool	safe = false;		/* true => "safe" mode */
57 bool	do_posix = false;	/* true => POSIX mode */
58 
59 size_t	awk_mb_cur_max = 1;
60 
61 static noreturn void fpecatch(int n
62 #ifdef SA_SIGINFO
63 	, siginfo_t *si, void *uc
64 #endif
65 )
66 {
67 	extern Node *curnode;
68 #ifdef SA_SIGINFO
69 	static const char *emsg[] = {
70 		[0] = "Unknown error",
71 		[FPE_INTDIV] = "Integer divide by zero",
72 		[FPE_INTOVF] = "Integer overflow",
73 		[FPE_FLTDIV] = "Floating point divide by zero",
74 		[FPE_FLTOVF] = "Floating point overflow",
75 		[FPE_FLTUND] = "Floating point underflow",
76 		[FPE_FLTRES] = "Floating point inexact result",
77 		[FPE_FLTINV] = "Invalid Floating point operation",
78 		[FPE_FLTSUB] = "Subscript out of range",
79 	};
80 #endif
81 	dprintf(STDERR_FILENO, "floating point exception%s%s\n",
82 #ifdef SA_SIGINFO
83 		": ", (size_t)si->si_code < sizeof(emsg) / sizeof(emsg[0]) &&
84 		emsg[si->si_code] ? emsg[si->si_code] : emsg[0]
85 #else
86 		"", ""
87 #endif
88 	    );
89 
90 	if (compile_time != 2 && NR && *NR > 0) {
91 		dprintf(STDERR_FILENO, " input record number %d", (int) (*FNR));
92 		if (strcmp(*FILENAME, "-") != 0) {
93 			dprintf(STDERR_FILENO, ", file %s", *FILENAME);
94 		}
95 		dprintf(STDERR_FILENO, "\n");
96 	}
97 	if (compile_time != 2 && curnode) {
98 		dprintf(STDERR_FILENO, " source line number %d", curnode->lineno);
99 	} else if (compile_time != 2 && lineno) {
100 		dprintf(STDERR_FILENO, " source line number %d", lineno);
101 	}
102 	if (compile_time == 1 && cursource() != NULL) {
103 		dprintf(STDERR_FILENO, " source file %s", cursource());
104 	}
105 	dprintf(STDERR_FILENO, "\n");
106 	if (dbg > 1)		/* core dump if serious debugging on */
107 		abort();
108 	_exit(2);
109 }
110 
111 static const char *
112 setfs(char *p)
113 {
114 	/* wart: t=>\t */
115 	if (p[0] == 't' && p[1] == '\0')
116 		return "\t";
117 	return p;
118 }
119 
120 static char *
121 getarg(int *argc, char ***argv, const char *msg)
122 {
123 	if ((*argv)[1][2] != '\0') {	/* arg is -fsomething */
124 		return &(*argv)[1][2];
125 	} else {			/* arg is -f something */
126 		(*argc)--; (*argv)++;
127 		if (*argc <= 1)
128 			FATAL("%s", msg);
129 		return (*argv)[1];
130 	}
131 }
132 
133 int main(int argc, char *argv[])
134 {
135 	const char *fs = NULL;
136 	char *fn, *vn;
137 
138 	setlocale(LC_CTYPE, "");
139 	setlocale(LC_NUMERIC, "C"); /* for parsing cmdline & prog */
140 	awk_mb_cur_max = MB_CUR_MAX;
141 	cmdname = __progname;
142 
143 	if (pledge("stdio rpath wpath cpath proc exec", NULL) == -1) {
144 		fprintf(stderr, "%s: pledge: incorrect arguments\n",
145 		    cmdname);
146 		exit(1);
147 	}
148 
149 	if (argc == 1) {
150 		fprintf(stderr, "usage: %s [-safe] [-V] [-d[n]] "
151 		    "[-f fs | --csv] [-v var=value]\n"
152 		    "\t   [prog | -f progfile] file ...\n", cmdname);
153 		return 1;
154 	}
155 #ifdef SA_SIGINFO
156 	{
157 		struct sigaction sa;
158 		sa.sa_sigaction = fpecatch;
159 		sa.sa_flags = SA_SIGINFO;
160 		sigemptyset(&sa.sa_mask);
161 		(void)sigaction(SIGFPE, &sa, NULL);
162 	}
163 #else
164 	(void)signal(SIGFPE, fpecatch);
165 #endif
166 
167 	do_posix = (getenv("POSIXLY_CORRECT") != NULL);
168 
169 	yyin = NULL;
170 	symtab = makesymtab(NSYMTAB);
171 	while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') {
172 		if (strcmp(argv[1], "--version") == 0) {
173 			printf("awk %s\n", version);
174 			return 0;
175 		}
176 		if (strcmp(argv[1], "--") == 0) {	/* explicit end of args */
177 			argc--;
178 			argv++;
179 			break;
180 		}
181 		if (strcmp(argv[1], "--csv") == 0) {	/* turn on csv input processing */
182 			CSV = true;
183 			argc--;
184 			argv++;
185 			continue;
186 		}
187 		switch (argv[1][1]) {
188 		case 's':
189 			if (strcmp(argv[1], "-safe") == 0)
190 				safe = true;
191 			break;
192 		case 'f':	/* next argument is program filename */
193 			fn = getarg(&argc, &argv, "no program filename");
194 			if (npfile >= maxpfile) {
195 				maxpfile += 20;
196 				pfile = (char **) reallocarray(pfile, maxpfile, sizeof(*pfile));
197 				if (pfile == NULL)
198 					FATAL("error allocating space for -f options");
199  			}
200 			pfile[npfile++] = fn;
201  			break;
202 		case 'F':	/* set field separator */
203 			fs = setfs(getarg(&argc, &argv, "no field separator"));
204 			break;
205 		case 'v':	/* -v a=1 to be done NOW.  one -v for each */
206 			vn = getarg(&argc, &argv, "no variable name");
207 			if (isclvar(vn))
208 				setclvar(vn);
209 			else
210 				FATAL("invalid -v option argument: %s", vn);
211 			break;
212 		case 'd':
213 			dbg = atoi(&argv[1][2]);
214 			if (dbg == 0)
215 				dbg = 1;
216 			printf("awk %s\n", version);
217 			break;
218 		case 'V':
219 			printf("awk %s\n", version);
220 			return 0;
221 		default:
222 			WARNING("unknown option %s ignored", argv[1]);
223 			break;
224 		}
225 		argc--;
226 		argv++;
227 	}
228 
229 	if (safe) {
230 		if (pledge("stdio rpath", NULL) == -1) {
231 			fprintf(stderr, "%s: pledge: incorrect arguments\n",
232 			    cmdname);
233 			exit(1);
234 		}
235 	}
236 
237 	if (CSV && (fs != NULL || lookup("FS", symtab) != NULL))
238 		WARNING("danger: don't set FS when --csv is in effect");
239 
240 	/* argv[1] is now the first argument */
241 	if (npfile == 0) {	/* no -f; first argument is program */
242 		if (argc <= 1) {
243 			if (dbg)
244 				exit(0);
245 			FATAL("no program given");
246 		}
247 		DPRINTF("program = |%s|\n", argv[1]);
248 		lexprog = argv[1];
249 		argc--;
250 		argv++;
251 	}
252 	recinit(recsize);
253 	syminit();
254 	compile_time = COMPILING;
255 	argv[0] = cmdname;	/* put prog name at front of arglist */
256 	DPRINTF("argc=%d, argv[0]=%s\n", argc, argv[0]);
257 	arginit(argc, argv);
258 	if (!safe)
259 		envinit(environ);
260 	yyparse();
261 #if 0
262 	// Doing this would comply with POSIX, but is not compatible with
263 	// other awks and with what most users expect. So comment it out.
264 	setlocale(LC_NUMERIC, ""); /* back to whatever it is locally */
265 #endif
266 	if (fs)
267 		*FS = qstring(fs, '\0');
268 	DPRINTF("errorflag=%d\n", errorflag);
269 	if (errorflag == 0) {
270 		compile_time = RUNNING;
271 		run(winner);
272 	} else
273 		bracecheck();
274 	return(errorflag);
275 }
276 
277 int pgetc(void)		/* get 1 character from awk program */
278 {
279 	int c;
280 
281 	for (;;) {
282 		if (yyin == NULL) {
283 			if (curpfile >= npfile)
284 				return EOF;
285 			if (strcmp(pfile[curpfile], "-") == 0)
286 				yyin = stdin;
287 			else if ((yyin = fopen(pfile[curpfile], "r")) == NULL)
288 				FATAL("can't open file %s", pfile[curpfile]);
289 			lineno = 1;
290 		}
291 		if ((c = getc(yyin)) != EOF)
292 			return c;
293 		if (yyin != stdin)
294 			fclose(yyin);
295 		yyin = NULL;
296 		curpfile++;
297 	}
298 }
299 
300 char *cursource(void)	/* current source file name */
301 {
302 	if (npfile > 0)
303 		return pfile[curpfile < npfile ? curpfile : curpfile - 1];
304 	else
305 		return NULL;
306 }
307