xref: /netbsd-src/external/bsd/flex/dist/src/filter.c (revision 596f930c293ef11c72f1fc18967768d00c27abef)
1 /*	$NetBSD: filter.c,v 1.4 2018/12/23 16:27:17 christos Exp $	*/
2 
3 /* filter - postprocessing of flex output through filters */
4 
5 /*  This file is part of flex. */
6 
7 /*  Redistribution and use in source and binary forms, with or without */
8 /*  modification, are permitted provided that the following conditions */
9 /*  are met: */
10 
11 /*  1. Redistributions of source code must retain the above copyright */
12 /*     notice, this list of conditions and the following disclaimer. */
13 /*  2. Redistributions in binary form must reproduce the above copyright */
14 /*     notice, this list of conditions and the following disclaimer in the */
15 /*     documentation and/or other materials provided with the distribution. */
16 
17 /*  Neither the name of the University nor the names of its contributors */
18 /*  may be used to endorse or promote products derived from this software */
19 /*  without specific prior written permission. */
20 
21 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
22 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
23 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
24 /*  PURPOSE. */
25 #include "flexdef.h"
26 __RCSID("$NetBSD: filter.c,v 1.4 2018/12/23 16:27:17 christos Exp $");
27 
28 static const char * check_4_gnu_m4 =
29     "m4_dnl ifdef(`__gnu__', ,"
30     "`errprint(Flex requires GNU M4. Set the PATH or set the M4 environment variable to its path name.)"
31     " m4exit(2)')\n";
32 
33 
34 /** global chain. */
35 struct filter *output_chain = NULL;
36 
37 /* Allocate and initialize an external filter.
38  * @param chain the current chain or NULL for new chain
39  * @param cmd the command to execute.
40  * @param ... a NULL terminated list of (const char*) arguments to command,
41  *            not including argv[0].
42  * @return newest filter in chain
43  */
filter_create_ext(struct filter * chain,const char * cmd,...)44 struct filter *filter_create_ext (struct filter *chain, const char *cmd,
45 				  ...)
46 {
47 	struct filter *f;
48 	int     max_args;
49 	const char *s;
50 	va_list ap;
51 
52 	/* allocate and initialize new filter */
53 	f = malloc(sizeof(struct filter));
54 	if (!f)
55 		flexerror(_("malloc failed (f) in filter_create_ext"));
56 	memset (f, 0, sizeof (*f));
57 	f->filter_func = NULL;
58 	f->extra = NULL;
59 	f->next = NULL;
60 	f->argc = 0;
61 
62 	if (chain != NULL) {
63 		/* append f to end of chain */
64 		while (chain->next)
65 			chain = chain->next;
66 		chain->next = f;
67 	}
68 
69 
70 	/* allocate argv, and populate it with the argument list. */
71 	max_args = 8;
72 	f->argv = malloc(sizeof(char *) * (size_t) (max_args + 1));
73 	if (!f->argv)
74 		flexerror(_("malloc failed (f->argv) in filter_create_ext"));
75 	f->argv[f->argc++] = cmd;
76 
77 	va_start (ap, cmd);
78 	while ((s = va_arg (ap, const char *)) != NULL) {
79 		if (f->argc >= max_args) {
80 			max_args += 8;
81 			f->argv = realloc(f->argv, sizeof(char*) * (size_t) (max_args + 1));
82 		}
83 		f->argv[f->argc++] = s;
84 	}
85 	f->argv[f->argc] = NULL;
86 
87 	va_end (ap);
88 	return f;
89 }
90 
91 /* Allocate and initialize an internal filter.
92  * @param chain the current chain or NULL for new chain
93  * @param filter_func The function that will perform the filtering.
94  *        filter_func should return 0 if successful, and -1
95  *        if an error occurs -- or it can simply exit().
96  * @param extra optional user-defined data to pass to the filter.
97  * @return newest filter in chain
98  */
filter_create_int(struct filter * chain,int (* filter_func)(struct filter *),void * extra)99 struct filter *filter_create_int (struct filter *chain,
100 				  int (*filter_func) (struct filter *),
101 				  void *extra)
102 {
103 	struct filter *f;
104 
105 	/* allocate and initialize new filter */
106 	f = malloc(sizeof(struct filter));
107 	if (!f)
108 		flexerror(_("malloc failed in filter_create_int"));
109 	memset (f, 0, sizeof (*f));
110 	f->next = NULL;
111 	f->argc = 0;
112 	f->argv = NULL;
113 
114 	f->filter_func = filter_func;
115 	f->extra = extra;
116 
117 	if (chain != NULL) {
118 		/* append f to end of chain */
119 		while (chain->next)
120 			chain = chain->next;
121 		chain->next = f;
122 	}
123 
124 	return f;
125 }
126 
127 /** Fork and exec entire filter chain.
128  *  @param chain The head of the chain.
129  *  @return true on success.
130  */
filter_apply_chain(struct filter * chain)131 bool filter_apply_chain (struct filter * chain)
132 {
133 	int     pid, pipes[2];
134 
135 
136 	/* Tricky recursion, since we want to begin the chain
137 	 * at the END. Why? Because we need all the forked processes
138 	 * to be children of the main flex process.
139 	 */
140 	if (chain)
141 		filter_apply_chain (chain->next);
142 	else
143 		return true;
144 
145 	/* Now we are the right-most unprocessed link in the chain.
146 	 */
147 
148 	fflush (stdout);
149 	fflush (stderr);
150 
151 
152 	if (pipe (pipes) == -1)
153 		flexerror (_("pipe failed"));
154 
155 	if ((pid = fork ()) == -1)
156 		flexerror (_("fork failed"));
157 
158 	if (pid == 0) {
159 		/* child */
160 
161         /* We need stdin (the FILE* stdin) to connect to this new pipe.
162          * There is no portable way to set stdin to a new file descriptor,
163          * as stdin is not an lvalue on some systems (BSD).
164          * So we dup the new pipe onto the stdin descriptor and use a no-op fseek
165          * to sync the stream. This is a Hail Mary situation. It seems to work.
166          */
167 		close (pipes[1]);
168 clearerr(stdin);
169 		if (dup2 (pipes[0], fileno (stdin)) == -1)
170 			flexfatal (_("dup2(pipes[0],0)"));
171 		close (pipes[0]);
172         fseek (stdin, 0, SEEK_CUR);
173         ungetc(' ', stdin); /* still an evil hack, but one that works better */
174         (void)fgetc(stdin); /* on NetBSD than the fseek attempt does */
175 
176 		/* run as a filter, either internally or by exec */
177 		if (chain->filter_func) {
178 			int     r;
179 
180 			if ((r = chain->filter_func (chain)) == -1)
181 				flexfatal (_("filter_func failed"));
182 			FLEX_EXIT (0);
183 		}
184 		else {
185 			execvp (chain->argv[0],
186 				(char **const) (chain->argv));
187             lerr_fatal ( _("exec of %s failed"),
188                     chain->argv[0]);
189 		}
190 
191 		FLEX_EXIT (1);
192 	}
193 
194 	/* Parent */
195 	close (pipes[0]);
196 	if (dup2 (pipes[1], fileno (stdout)) == -1)
197 		flexfatal (_("dup2(pipes[1],1)"));
198 	close (pipes[1]);
199     fseek (stdout, 0, SEEK_CUR);
200 
201 	return true;
202 }
203 
204 /** Truncate the chain to max_len number of filters.
205  * @param chain the current chain.
206  * @param max_len the maximum length of the chain.
207  * @return the resulting length of the chain.
208  */
filter_truncate(struct filter * chain,int max_len)209 int filter_truncate (struct filter *chain, int max_len)
210 {
211 	int     len = 1;
212 
213 	if (!chain)
214 		return 0;
215 
216 	while (chain->next && len < max_len) {
217 		chain = chain->next;
218 		++len;
219 	}
220 
221 	chain->next = NULL;
222 	return len;
223 }
224 
225 /** Splits the chain in order to write to a header file.
226  *  Similar in spirit to the 'tee' program.
227  *  The header file name is in extra.
228  *  @return 0 (zero) on success, and -1 on failure.
229  */
filter_tee_header(struct filter * chain)230 int filter_tee_header (struct filter *chain)
231 {
232 	/* This function reads from stdin and writes to both the C file and the
233 	 * header file at the same time.
234 	 */
235 
236 	const int readsz = 512;
237 	char   *buf;
238 	int     to_cfd = -1;
239 	FILE   *to_c = NULL, *to_h = NULL;
240 	bool    write_header;
241 
242 	write_header = (chain->extra != NULL);
243 
244 	/* Store a copy of the stdout pipe, which is already piped to C file
245 	 * through the running chain. Then create a new pipe to the H file as
246 	 * stdout, and fork the rest of the chain again.
247 	 */
248 
249 	if ((to_cfd = dup (1)) == -1)
250 		flexfatal (_("dup(1) failed"));
251 	to_c = fdopen (to_cfd, "w");
252 
253 	if (write_header) {
254 		if (freopen ((char *) chain->extra, "w", stdout) == NULL)
255 			flexfatal (_("freopen(headerfilename) failed"));
256 
257 		filter_apply_chain (chain->next);
258 		to_h = stdout;
259 	}
260 
261 	/* Now to_c is a pipe to the C branch, and to_h is a pipe to the H branch.
262 	 */
263 
264 	if (write_header) {
265         fputs (check_4_gnu_m4, to_h);
266 		fputs ("m4_changecom`'m4_dnl\n", to_h);
267 		fputs ("m4_changequote`'m4_dnl\n", to_h);
268 		fputs ("m4_changequote([[,]])[[]]m4_dnl\n", to_h);
269 	    fputs ("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_h);
270 		fputs ("m4_define( [[M4_YY_IN_HEADER]],[[]])m4_dnl\n",
271 		       to_h);
272 		fprintf (to_h, "#ifndef %sHEADER_H\n", prefix);
273 		fprintf (to_h, "#define %sHEADER_H 1\n", prefix);
274 		fprintf (to_h, "#define %sIN_HEADER 1\n\n", prefix);
275 		fprintf (to_h,
276 			 "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n",
277 			 headerfilename ? headerfilename : "<stdout>");
278 
279 	}
280 
281     fputs (check_4_gnu_m4, to_c);
282 	fputs ("m4_changecom`'m4_dnl\n", to_c);
283 	fputs ("m4_changequote`'m4_dnl\n", to_c);
284 	fputs ("m4_changequote([[,]])[[]]m4_dnl\n", to_c);
285 	fputs ("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_c);
286 	fprintf (to_c, "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n",
287 		 outfilename ? outfilename : "<stdout>");
288 
289 	buf = malloc((size_t) readsz);
290 	if (!buf)
291 		flexerror(_("malloc failed in filter_tee_header"));
292 	while (fgets (buf, readsz, stdin)) {
293 		fputs (buf, to_c);
294 		if (write_header)
295 			fputs (buf, to_h);
296 	}
297 
298 	if (write_header) {
299 		fprintf (to_h, "\n");
300 
301 		/* write a fake line number. It will get fixed by the linedir filter. */
302 		if (gen_line_dirs)
303 			fprintf (to_h, "#line 4000 \"M4_YY_OUTFILE_NAME\"\n");
304 
305 		fprintf (to_h, "#undef %sIN_HEADER\n", prefix);
306 		fprintf (to_h, "#endif /* %sHEADER_H */\n", prefix);
307 		fputs ("m4_undefine( [[M4_YY_IN_HEADER]])m4_dnl\n", to_h);
308 
309 		fflush (to_h);
310 		if (ferror (to_h))
311 			lerr (_("error writing output file %s"),
312 				(char *) chain->extra);
313 
314 		else if (fclose (to_h))
315 			lerr (_("error closing output file %s"),
316 				(char *) chain->extra);
317 	}
318 
319 	fflush (to_c);
320 	if (ferror (to_c))
321 		lerr (_("error writing output file %s"),
322 			outfilename ? outfilename : "<stdout>");
323 
324 	else if (fclose (to_c))
325 		lerr (_("error closing output file %s"),
326 			outfilename ? outfilename : "<stdout>");
327 
328 	while (wait (0) > 0) ;
329 
330 	FLEX_EXIT (0);
331 	return 0;
332 }
333 
334 /** Adjust the line numbers in the #line directives of the generated scanner.
335  * After the m4 expansion, the line numbers are incorrect since the m4 macros
336  * can add or remove lines.  This only adjusts line numbers for generated code,
337  * not user code. This also happens to be a good place to squeeze multiple
338  * blank lines into a single blank line.
339  */
filter_fix_linedirs(struct filter * chain)340 int filter_fix_linedirs (struct filter *chain)
341 {
342 	char   *buf;
343 	const size_t readsz = 512;
344 	int     lineno = 1;
345 	bool    in_gen = true;	/* in generated code */
346 	bool    last_was_blank = false;
347 
348 	if (!chain)
349 		return 0;
350 
351 	buf = malloc(readsz);
352 	if (!buf)
353 		flexerror(_("malloc failed in filter_fix_linedirs"));
354 
355 	while (fgets (buf, (int) readsz, stdin)) {
356 
357 		regmatch_t m[10];
358 
359 		/* Check for #line directive. */
360 		if (buf[0] == '#'
361 			&& regexec (&regex_linedir, buf, 3, m, 0) == 0) {
362 
363 			char   *fname;
364 
365 			/* extract the line number and filename */
366 			fname = regmatch_dup (&m[2], buf);
367 
368 			if (strcmp (fname,
369 				outfilename ? outfilename : "<stdout>")
370 					== 0
371 			 || strcmp (fname,
372 			 	headerfilename ? headerfilename : "<stdout>")
373 					== 0) {
374 
375 				char    *s1, *s2;
376 				char	filename[MAXLINE];
377 
378 				s1 = fname;
379 				s2 = filename;
380 
381 				while ((s2 - filename) < (MAXLINE - 1) && *s1) {
382 					/* Escape the backslash */
383 					if (*s1 == '\\')
384 						*s2++ = '\\';
385 					/* Escape the double quote */
386 					if (*s1 == '\"')
387 						*s2++ = '\\';
388 					/* Copy the character as usual */
389 					*s2++ = *s1++;
390 				}
391 
392 				*s2 = '\0';
393 
394 				/* Adjust the line directives. */
395 				in_gen = true;
396 				snprintf (buf, readsz, "#line %d \"%s\"\n",
397 					  lineno, filename);
398 			}
399 			else {
400 				/* it's a #line directive for code we didn't write */
401 				in_gen = false;
402 			}
403 
404 			free (fname);
405 			last_was_blank = false;
406 		}
407 
408 		/* squeeze blank lines from generated code */
409 		else if (in_gen
410 			 && regexec (&regex_blank_line, buf, 0, NULL,
411 				     0) == 0) {
412 			if (last_was_blank)
413 				continue;
414 			else
415 				last_was_blank = true;
416 		}
417 
418 		else {
419 			/* it's a line of normal, non-empty code. */
420 			last_was_blank = false;
421 		}
422 
423 		fputs (buf, stdout);
424 		lineno++;
425 	}
426 	fflush (stdout);
427 	if (ferror (stdout))
428 		lerr (_("error writing output file %s"),
429 			outfilename ? outfilename : "<stdout>");
430 
431 	else if (fclose (stdout))
432 		lerr (_("error closing output file %s"),
433 			outfilename ? outfilename : "<stdout>");
434 
435 	return 0;
436 }
437 
438 /* vim:set expandtab cindent tabstop=4 softtabstop=4 shiftwidth=4 textwidth=0: */
439