xref: /openbsd-src/usr.bin/lex/filter.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /* $OpenBSD: filter.c,v 1.6 2015/11/19 23:28:03 tedu Exp $ */
2 
3 /* filter - postprocessing of flex output through filters */
4 
5 /*  This file is part of flex. */
6 
7 /*  Redistribution and use in source and binary forms, with or without */
8 /*  modification, are permitted provided that the following conditions */
9 /*  are met: */
10 
11 /*  1. Redistributions of source code must retain the above copyright */
12 /*     notice, this list of conditions and the following disclaimer. */
13 /*  2. Redistributions in binary form must reproduce the above copyright */
14 /*     notice, this list of conditions and the following disclaimer in the */
15 /*     documentation and/or other materials provided with the distribution. */
16 
17 /*  Neither the name of the University nor the names of its contributors */
18 /*  may be used to endorse or promote products derived from this software */
19 /*  without specific prior written permission. */
20 
21 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
22 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
23 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
24 /*  PURPOSE. */
25 
26 #include "flexdef.h"
27 static const char *check_4_gnu_m4 =
28 "m4_dnl ifdef(`__gnu__', ,"
29 "`errprint(Flex requires GNU M4. Set the PATH or set the M4 environment variable to its path name.)"
30 " m4exit(2)')\n";
31 
32 
33 /** global chain. */
34 struct filter *output_chain = NULL;
35 
36 /* Allocate and initialize an external filter.
37  * @param chain the current chain or NULL for new chain
38  * @param cmd the command to execute.
39  * @param ... a NULL terminated list of (const char*) arguments to command,
40  *            not including argv[0].
41  * @return newest filter in chain
42  */
43 struct filter *
44 filter_create_ext(struct filter * chain, const char *cmd,
45     ...)
46 {
47 	struct filter *f;
48 	int max_args;
49 	const char *s;
50 	va_list ap;
51 
52 	/* allocate and initialize new filter */
53 	f = malloc(sizeof(struct filter));
54 	if (!f)
55 		flexerror(_("malloc failed (f) in filter_create_ext"));
56 	memset(f, 0, sizeof(*f));
57 	f->filter_func = NULL;
58 	f->extra = NULL;
59 	f->next = NULL;
60 	f->argc = 0;
61 
62 	if (chain != NULL) {
63 		/* append f to end of chain */
64 		while (chain->next)
65 			chain = chain->next;
66 		chain->next = f;
67 	}
68 	/* allocate argv, and populate it with the argument list. */
69 	max_args = 8;
70 	f->argv = malloc(sizeof(char *) * (max_args + 1));
71 	if (!f->argv)
72 		flexerror(_("malloc failed (f->argv) in filter_create_ext"));
73 	f->argv[f->argc++] = cmd;
74 
75 	va_start(ap, cmd);
76 	while ((s = va_arg(ap, const char *)) != NULL) {
77 		if (f->argc >= max_args) {
78 			max_args += 8;
79 			f->argv = realloc(f->argv,
80 			    sizeof(char *) * (max_args + 1));
81 		}
82 		f->argv[f->argc++] = s;
83 	}
84 	f->argv[f->argc] = NULL;
85 
86 	va_end(ap);
87 	return f;
88 }
89 
90 /* Allocate and initialize an internal filter.
91  * @param chain the current chain or NULL for new chain
92  * @param filter_func The function that will perform the filtering.
93  *        filter_func should return 0 if successful, and -1
94  *        if an error occurs -- or it can simply exit().
95  * @param extra optional user-defined data to pass to the filter.
96  * @return newest filter in chain
97  */
98 struct filter *
99 filter_create_int(struct filter * chain,
100     int (*filter_func) (struct filter *),
101     void *extra)
102 {
103 	struct filter *f;
104 
105 	/* allocate and initialize new filter */
106 	f = malloc(sizeof(struct filter));
107 	if (!f)
108 		flexerror(_("malloc failed in filter_create_int"));
109 	memset(f, 0, sizeof(*f));
110 	f->next = NULL;
111 	f->argc = 0;
112 	f->argv = NULL;
113 
114 	f->filter_func = filter_func;
115 	f->extra = extra;
116 
117 	if (chain != NULL) {
118 		/* append f to end of chain */
119 		while (chain->next)
120 			chain = chain->next;
121 		chain->next = f;
122 	}
123 	return f;
124 }
125 
126 /** Fork and exec entire filter chain.
127  *  @param chain The head of the chain.
128  *  @return true on success.
129  */
130 bool
131 filter_apply_chain(struct filter * chain)
132 {
133 	int pid, pipes[2];
134 	int r;
135 	const int readsz = 512;
136 	char *buf;
137 
138 
139 	/*
140 	 * Tricky recursion, since we want to begin the chain at the END.
141 	 * Why? Because we need all the forked processes to be children of
142 	 * the main flex process.
143 	 */
144 	if (chain)
145 		filter_apply_chain(chain->next);
146 	else
147 		return true;
148 
149 	/*
150 	 * Now we are the right-most unprocessed link in the chain.
151 	 */
152 
153 	fflush(stdout);
154 	fflush(stderr);
155 
156 
157 	if (pipe(pipes) == -1)
158 		flexerror(_("pipe failed"));
159 
160 	if ((pid = fork()) == -1)
161 		flexerror(_("fork failed"));
162 
163 	if (pid == 0) {
164 		/* child */
165 
166 		/*
167 		 * We need stdin (the FILE* stdin) to connect to this new
168 		 * pipe. There is no portable way to set stdin to a new file
169 		 * descriptor, as stdin is not an lvalue on some systems
170 		 * (BSD). So we dup the new pipe onto the stdin descriptor
171 		 * and use a no-op fseek to sync the stream. This is a Hail
172 		 * Mary situation. It seems to work.
173 		 */
174 		close(pipes[1]);
175 		clearerr(stdin);
176 		if (dup2(pipes[0], fileno(stdin)) == -1)
177 			flexfatal(_("dup2(pipes[0],0)"));
178 		close(pipes[0]);
179 		fseek(stdin, 0, SEEK_CUR);
180 
181 		/* run as a filter, either internally or by exec */
182 		if (chain->filter_func) {
183 			int r;
184 
185 			if ((r = chain->filter_func(chain)) == -1)
186 				flexfatal(_("filter_func failed"));
187 			exit(0);
188 		} else {
189 			execvp(chain->argv[0],
190 			    (char **const) (chain->argv));
191 			lerrsf_fatal(_("exec of %s failed"),
192 			    chain->argv[0]);
193 		}
194 
195 		exit(1);
196 	}
197 	/* Parent */
198 	close(pipes[0]);
199 	if (dup2(pipes[1], fileno(stdout)) == -1)
200 		flexfatal(_("dup2(pipes[1],1)"));
201 	close(pipes[1]);
202 	fseek(stdout, 0, SEEK_CUR);
203 
204 	return true;
205 }
206 
207 /** Truncate the chain to max_len number of filters.
208  * @param chain the current chain.
209  * @param max_len the maximum length of the chain.
210  * @return the resulting length of the chain.
211  */
212 int
213 filter_truncate(struct filter * chain, int max_len)
214 {
215 	int len = 1;
216 
217 	if (!chain)
218 		return 0;
219 
220 	while (chain->next && len < max_len) {
221 		chain = chain->next;
222 		++len;
223 	}
224 
225 	chain->next = NULL;
226 	return len;
227 }
228 
229 /** Splits the chain in order to write to a header file.
230  *  Similar in spirit to the 'tee' program.
231  *  The header file name is in extra.
232  *  @return 0 (zero) on success, and -1 on failure.
233  */
234 int
235 filter_tee_header(struct filter * chain)
236 {
237 	/*
238 	 * This function reads from stdin and writes to both the C file and
239 	 * the header file at the same time.
240 	 */
241 
242 	const int readsz = 512;
243 	char *buf;
244 	int to_cfd = -1;
245 	FILE *to_c = NULL, *to_h = NULL;
246 	bool write_header;
247 
248 	write_header = (chain->extra != NULL);
249 
250 	/*
251 	 * Store a copy of the stdout pipe, which is already piped to C file
252 	 * through the running chain. Then create a new pipe to the H file as
253 	 * stdout, and fork the rest of the chain again.
254 	 */
255 
256 	if ((to_cfd = dup(1)) == -1)
257 		flexfatal(_("dup(1) failed"));
258 	to_c = fdopen(to_cfd, "w");
259 
260 	if (write_header) {
261 		if (freopen((char *) chain->extra, "w", stdout) == NULL)
262 			flexfatal(_("freopen(headerfilename) failed"));
263 
264 		filter_apply_chain(chain->next);
265 		to_h = stdout;
266 	}
267 	/*
268 	 * Now to_c is a pipe to the C branch, and to_h is a pipe to the H
269 	 * branch.
270 	 */
271 
272 	if (write_header) {
273 		fputs(check_4_gnu_m4, to_h);
274 		fputs("m4_changecom`'m4_dnl\n", to_h);
275 		fputs("m4_changequote`'m4_dnl\n", to_h);
276 		fputs("m4_changequote([[,]])[[]]m4_dnl\n", to_h);
277 		fputs("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_h);
278 		fputs("m4_define( [[M4_YY_IN_HEADER]],[[]])m4_dnl\n",
279 		    to_h);
280 		fprintf(to_h, "#ifndef %sHEADER_H\n", prefix);
281 		fprintf(to_h, "#define %sHEADER_H 1\n", prefix);
282 		fprintf(to_h, "#define %sIN_HEADER 1\n\n", prefix);
283 		fprintf(to_h,
284 		    "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n",
285 		    headerfilename ? headerfilename : "<stdout>");
286 
287 	}
288 	fputs(check_4_gnu_m4, to_c);
289 	fputs("m4_changecom`'m4_dnl\n", to_c);
290 	fputs("m4_changequote`'m4_dnl\n", to_c);
291 	fputs("m4_changequote([[,]])[[]]m4_dnl\n", to_c);
292 	fputs("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_c);
293 	fprintf(to_c, "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n",
294 	    outfilename ? outfilename : "<stdout>");
295 
296 	buf = malloc(readsz);
297 	if (!buf)
298 		flexerror(_("malloc failed in filter_tee_header"));
299 	while (fgets(buf, readsz, stdin)) {
300 		fputs(buf, to_c);
301 		if (write_header)
302 			fputs(buf, to_h);
303 	}
304 
305 	if (write_header) {
306 		fprintf(to_h, "\n");
307 
308 		/*
309 		 * write a fake line number. It will get fixed by the linedir
310 		 * filter.
311 		 */
312 		fprintf(to_h, "#line 4000 \"M4_YY_OUTFILE_NAME\"\n");
313 
314 		fprintf(to_h, "#undef %sIN_HEADER\n", prefix);
315 		fprintf(to_h, "#endif /* %sHEADER_H */\n", prefix);
316 		fputs("m4_undefine( [[M4_YY_IN_HEADER]])m4_dnl\n", to_h);
317 
318 		fflush(to_h);
319 		if (ferror(to_h))
320 			lerrsf(_("error writing output file %s"),
321 			    (char *) chain->extra);
322 
323 		else if (fclose(to_h))
324 			lerrsf(_("error closing output file %s"),
325 			    (char *) chain->extra);
326 	}
327 	fflush(to_c);
328 	if (ferror(to_c))
329 		lerrsf(_("error writing output file %s"),
330 		    outfilename ? outfilename : "<stdout>");
331 
332 	else if (fclose(to_c))
333 		lerrsf(_("error closing output file %s"),
334 		    outfilename ? outfilename : "<stdout>");
335 
336 	while (wait(0) > 0);
337 
338 	exit(0);
339 	return 0;
340 }
341 
342 /** Adjust the line numbers in the #line directives of the generated scanner.
343  * After the m4 expansion, the line numbers are incorrect since the m4 macros
344  * can add or remove lines.  This only adjusts line numbers for generated code,
345  * not user code. This also happens to be a good place to squeeze multiple
346  * blank lines into a single blank line.
347  */
348 int
349 filter_fix_linedirs(struct filter * chain)
350 {
351 	char *buf;
352 	const int readsz = 512;
353 	int lineno = 1;
354 	bool in_gen = true;	/* in generated code */
355 	bool last_was_blank = false;
356 
357 	if (!chain)
358 		return 0;
359 
360 	buf = malloc(readsz);
361 	if (!buf)
362 		flexerror(_("malloc failed in filter_fix_linedirs"));
363 
364 	while (fgets(buf, readsz, stdin)) {
365 
366 		regmatch_t m[10];
367 
368 		/* Check for #line directive. */
369 		if (buf[0] == '#'
370 		    && regexec(&regex_linedir, buf, 3, m, 0) == 0) {
371 
372 			int num;
373 			char *fname;
374 
375 			/* extract the line number and filename */
376 			num = regmatch_strtol(&m[1], buf, NULL, 0);
377 			fname = regmatch_dup(&m[2], buf);
378 
379 			if (strcmp(fname,
380 				outfilename ? outfilename : "<stdout>") == 0 ||
381 			    strcmp(fname, headerfilename ? headerfilename :
382 				"<stdout>") == 0) {
383 
384 				char *s1, *s2;
385 				char filename[MAXLINE];
386 
387 				s1 = fname;
388 				s2 = filename;
389 
390 				while ((s2 - filename) < (MAXLINE - 1) && *s1) {
391 					/* Escape the backslash */
392 					if (*s1 == '\\')
393 						*s2++ = '\\';
394 					/* Escape the double quote */
395 					if (*s1 == '\"')
396 						*s2++ = '\\';
397 					/* Copy the character as usual */
398 					*s2++ = *s1++;
399 				}
400 
401 				*s2 = '\0';
402 
403 				/* Adjust the line directives. */
404 				in_gen = true;
405 				snprintf(buf, readsz, "#line %d \"%s\"\n",
406 				    lineno + 1, filename);
407 			} else {
408 				/*
409 				 * it's a #line directive for code we didn't
410 				 * write
411 				 */
412 				in_gen = false;
413 			}
414 
415 			free(fname);
416 			last_was_blank = false;
417 		}
418 		/* squeeze blank lines from generated code */
419 		else if (in_gen &&
420 		    regexec(&regex_blank_line, buf, 0, NULL, 0) == 0) {
421 			if (last_was_blank)
422 				continue;
423 			else
424 				last_was_blank = true;
425 		} else {
426 			/* it's a line of normal, non-empty code. */
427 			last_was_blank = false;
428 		}
429 
430 		fputs(buf, stdout);
431 		lineno++;
432 	}
433 	fflush(stdout);
434 	if (ferror(stdout))
435 		lerrsf(_("error writing output file %s"),
436 		    outfilename ? outfilename : "<stdout>");
437 
438 	else if (fclose(stdout))
439 		lerrsf(_("error closing output file %s"),
440 		    outfilename ? outfilename : "<stdout>");
441 
442 	return 0;
443 }
444