1 /* $OpenBSD: filter.c,v 1.6 2015/11/19 23:28:03 tedu Exp $ */ 2 3 /* filter - postprocessing of flex output through filters */ 4 5 /* This file is part of flex. */ 6 7 /* Redistribution and use in source and binary forms, with or without */ 8 /* modification, are permitted provided that the following conditions */ 9 /* are met: */ 10 11 /* 1. Redistributions of source code must retain the above copyright */ 12 /* notice, this list of conditions and the following disclaimer. */ 13 /* 2. Redistributions in binary form must reproduce the above copyright */ 14 /* notice, this list of conditions and the following disclaimer in the */ 15 /* documentation and/or other materials provided with the distribution. */ 16 17 /* Neither the name of the University nor the names of its contributors */ 18 /* may be used to endorse or promote products derived from this software */ 19 /* without specific prior written permission. */ 20 21 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 22 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 23 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 24 /* PURPOSE. */ 25 26 #include "flexdef.h" 27 static const char *check_4_gnu_m4 = 28 "m4_dnl ifdef(`__gnu__', ," 29 "`errprint(Flex requires GNU M4. Set the PATH or set the M4 environment variable to its path name.)" 30 " m4exit(2)')\n"; 31 32 33 /** global chain. */ 34 struct filter *output_chain = NULL; 35 36 /* Allocate and initialize an external filter. 37 * @param chain the current chain or NULL for new chain 38 * @param cmd the command to execute. 39 * @param ... a NULL terminated list of (const char*) arguments to command, 40 * not including argv[0]. 41 * @return newest filter in chain 42 */ 43 struct filter * 44 filter_create_ext(struct filter * chain, const char *cmd, 45 ...) 46 { 47 struct filter *f; 48 int max_args; 49 const char *s; 50 va_list ap; 51 52 /* allocate and initialize new filter */ 53 f = malloc(sizeof(struct filter)); 54 if (!f) 55 flexerror(_("malloc failed (f) in filter_create_ext")); 56 memset(f, 0, sizeof(*f)); 57 f->filter_func = NULL; 58 f->extra = NULL; 59 f->next = NULL; 60 f->argc = 0; 61 62 if (chain != NULL) { 63 /* append f to end of chain */ 64 while (chain->next) 65 chain = chain->next; 66 chain->next = f; 67 } 68 /* allocate argv, and populate it with the argument list. */ 69 max_args = 8; 70 f->argv = malloc(sizeof(char *) * (max_args + 1)); 71 if (!f->argv) 72 flexerror(_("malloc failed (f->argv) in filter_create_ext")); 73 f->argv[f->argc++] = cmd; 74 75 va_start(ap, cmd); 76 while ((s = va_arg(ap, const char *)) != NULL) { 77 if (f->argc >= max_args) { 78 max_args += 8; 79 f->argv = realloc(f->argv, 80 sizeof(char *) * (max_args + 1)); 81 } 82 f->argv[f->argc++] = s; 83 } 84 f->argv[f->argc] = NULL; 85 86 va_end(ap); 87 return f; 88 } 89 90 /* Allocate and initialize an internal filter. 91 * @param chain the current chain or NULL for new chain 92 * @param filter_func The function that will perform the filtering. 93 * filter_func should return 0 if successful, and -1 94 * if an error occurs -- or it can simply exit(). 95 * @param extra optional user-defined data to pass to the filter. 96 * @return newest filter in chain 97 */ 98 struct filter * 99 filter_create_int(struct filter * chain, 100 int (*filter_func) (struct filter *), 101 void *extra) 102 { 103 struct filter *f; 104 105 /* allocate and initialize new filter */ 106 f = malloc(sizeof(struct filter)); 107 if (!f) 108 flexerror(_("malloc failed in filter_create_int")); 109 memset(f, 0, sizeof(*f)); 110 f->next = NULL; 111 f->argc = 0; 112 f->argv = NULL; 113 114 f->filter_func = filter_func; 115 f->extra = extra; 116 117 if (chain != NULL) { 118 /* append f to end of chain */ 119 while (chain->next) 120 chain = chain->next; 121 chain->next = f; 122 } 123 return f; 124 } 125 126 /** Fork and exec entire filter chain. 127 * @param chain The head of the chain. 128 * @return true on success. 129 */ 130 bool 131 filter_apply_chain(struct filter * chain) 132 { 133 int pid, pipes[2]; 134 int r; 135 const int readsz = 512; 136 char *buf; 137 138 139 /* 140 * Tricky recursion, since we want to begin the chain at the END. 141 * Why? Because we need all the forked processes to be children of 142 * the main flex process. 143 */ 144 if (chain) 145 filter_apply_chain(chain->next); 146 else 147 return true; 148 149 /* 150 * Now we are the right-most unprocessed link in the chain. 151 */ 152 153 fflush(stdout); 154 fflush(stderr); 155 156 157 if (pipe(pipes) == -1) 158 flexerror(_("pipe failed")); 159 160 if ((pid = fork()) == -1) 161 flexerror(_("fork failed")); 162 163 if (pid == 0) { 164 /* child */ 165 166 /* 167 * We need stdin (the FILE* stdin) to connect to this new 168 * pipe. There is no portable way to set stdin to a new file 169 * descriptor, as stdin is not an lvalue on some systems 170 * (BSD). So we dup the new pipe onto the stdin descriptor 171 * and use a no-op fseek to sync the stream. This is a Hail 172 * Mary situation. It seems to work. 173 */ 174 close(pipes[1]); 175 clearerr(stdin); 176 if (dup2(pipes[0], fileno(stdin)) == -1) 177 flexfatal(_("dup2(pipes[0],0)")); 178 close(pipes[0]); 179 fseek(stdin, 0, SEEK_CUR); 180 181 /* run as a filter, either internally or by exec */ 182 if (chain->filter_func) { 183 int r; 184 185 if ((r = chain->filter_func(chain)) == -1) 186 flexfatal(_("filter_func failed")); 187 exit(0); 188 } else { 189 execvp(chain->argv[0], 190 (char **const) (chain->argv)); 191 lerrsf_fatal(_("exec of %s failed"), 192 chain->argv[0]); 193 } 194 195 exit(1); 196 } 197 /* Parent */ 198 close(pipes[0]); 199 if (dup2(pipes[1], fileno(stdout)) == -1) 200 flexfatal(_("dup2(pipes[1],1)")); 201 close(pipes[1]); 202 fseek(stdout, 0, SEEK_CUR); 203 204 return true; 205 } 206 207 /** Truncate the chain to max_len number of filters. 208 * @param chain the current chain. 209 * @param max_len the maximum length of the chain. 210 * @return the resulting length of the chain. 211 */ 212 int 213 filter_truncate(struct filter * chain, int max_len) 214 { 215 int len = 1; 216 217 if (!chain) 218 return 0; 219 220 while (chain->next && len < max_len) { 221 chain = chain->next; 222 ++len; 223 } 224 225 chain->next = NULL; 226 return len; 227 } 228 229 /** Splits the chain in order to write to a header file. 230 * Similar in spirit to the 'tee' program. 231 * The header file name is in extra. 232 * @return 0 (zero) on success, and -1 on failure. 233 */ 234 int 235 filter_tee_header(struct filter * chain) 236 { 237 /* 238 * This function reads from stdin and writes to both the C file and 239 * the header file at the same time. 240 */ 241 242 const int readsz = 512; 243 char *buf; 244 int to_cfd = -1; 245 FILE *to_c = NULL, *to_h = NULL; 246 bool write_header; 247 248 write_header = (chain->extra != NULL); 249 250 /* 251 * Store a copy of the stdout pipe, which is already piped to C file 252 * through the running chain. Then create a new pipe to the H file as 253 * stdout, and fork the rest of the chain again. 254 */ 255 256 if ((to_cfd = dup(1)) == -1) 257 flexfatal(_("dup(1) failed")); 258 to_c = fdopen(to_cfd, "w"); 259 260 if (write_header) { 261 if (freopen((char *) chain->extra, "w", stdout) == NULL) 262 flexfatal(_("freopen(headerfilename) failed")); 263 264 filter_apply_chain(chain->next); 265 to_h = stdout; 266 } 267 /* 268 * Now to_c is a pipe to the C branch, and to_h is a pipe to the H 269 * branch. 270 */ 271 272 if (write_header) { 273 fputs(check_4_gnu_m4, to_h); 274 fputs("m4_changecom`'m4_dnl\n", to_h); 275 fputs("m4_changequote`'m4_dnl\n", to_h); 276 fputs("m4_changequote([[,]])[[]]m4_dnl\n", to_h); 277 fputs("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_h); 278 fputs("m4_define( [[M4_YY_IN_HEADER]],[[]])m4_dnl\n", 279 to_h); 280 fprintf(to_h, "#ifndef %sHEADER_H\n", prefix); 281 fprintf(to_h, "#define %sHEADER_H 1\n", prefix); 282 fprintf(to_h, "#define %sIN_HEADER 1\n\n", prefix); 283 fprintf(to_h, 284 "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n", 285 headerfilename ? headerfilename : "<stdout>"); 286 287 } 288 fputs(check_4_gnu_m4, to_c); 289 fputs("m4_changecom`'m4_dnl\n", to_c); 290 fputs("m4_changequote`'m4_dnl\n", to_c); 291 fputs("m4_changequote([[,]])[[]]m4_dnl\n", to_c); 292 fputs("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_c); 293 fprintf(to_c, "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n", 294 outfilename ? outfilename : "<stdout>"); 295 296 buf = malloc(readsz); 297 if (!buf) 298 flexerror(_("malloc failed in filter_tee_header")); 299 while (fgets(buf, readsz, stdin)) { 300 fputs(buf, to_c); 301 if (write_header) 302 fputs(buf, to_h); 303 } 304 305 if (write_header) { 306 fprintf(to_h, "\n"); 307 308 /* 309 * write a fake line number. It will get fixed by the linedir 310 * filter. 311 */ 312 fprintf(to_h, "#line 4000 \"M4_YY_OUTFILE_NAME\"\n"); 313 314 fprintf(to_h, "#undef %sIN_HEADER\n", prefix); 315 fprintf(to_h, "#endif /* %sHEADER_H */\n", prefix); 316 fputs("m4_undefine( [[M4_YY_IN_HEADER]])m4_dnl\n", to_h); 317 318 fflush(to_h); 319 if (ferror(to_h)) 320 lerrsf(_("error writing output file %s"), 321 (char *) chain->extra); 322 323 else if (fclose(to_h)) 324 lerrsf(_("error closing output file %s"), 325 (char *) chain->extra); 326 } 327 fflush(to_c); 328 if (ferror(to_c)) 329 lerrsf(_("error writing output file %s"), 330 outfilename ? outfilename : "<stdout>"); 331 332 else if (fclose(to_c)) 333 lerrsf(_("error closing output file %s"), 334 outfilename ? outfilename : "<stdout>"); 335 336 while (wait(0) > 0); 337 338 exit(0); 339 return 0; 340 } 341 342 /** Adjust the line numbers in the #line directives of the generated scanner. 343 * After the m4 expansion, the line numbers are incorrect since the m4 macros 344 * can add or remove lines. This only adjusts line numbers for generated code, 345 * not user code. This also happens to be a good place to squeeze multiple 346 * blank lines into a single blank line. 347 */ 348 int 349 filter_fix_linedirs(struct filter * chain) 350 { 351 char *buf; 352 const int readsz = 512; 353 int lineno = 1; 354 bool in_gen = true; /* in generated code */ 355 bool last_was_blank = false; 356 357 if (!chain) 358 return 0; 359 360 buf = malloc(readsz); 361 if (!buf) 362 flexerror(_("malloc failed in filter_fix_linedirs")); 363 364 while (fgets(buf, readsz, stdin)) { 365 366 regmatch_t m[10]; 367 368 /* Check for #line directive. */ 369 if (buf[0] == '#' 370 && regexec(®ex_linedir, buf, 3, m, 0) == 0) { 371 372 int num; 373 char *fname; 374 375 /* extract the line number and filename */ 376 num = regmatch_strtol(&m[1], buf, NULL, 0); 377 fname = regmatch_dup(&m[2], buf); 378 379 if (strcmp(fname, 380 outfilename ? outfilename : "<stdout>") == 0 || 381 strcmp(fname, headerfilename ? headerfilename : 382 "<stdout>") == 0) { 383 384 char *s1, *s2; 385 char filename[MAXLINE]; 386 387 s1 = fname; 388 s2 = filename; 389 390 while ((s2 - filename) < (MAXLINE - 1) && *s1) { 391 /* Escape the backslash */ 392 if (*s1 == '\\') 393 *s2++ = '\\'; 394 /* Escape the double quote */ 395 if (*s1 == '\"') 396 *s2++ = '\\'; 397 /* Copy the character as usual */ 398 *s2++ = *s1++; 399 } 400 401 *s2 = '\0'; 402 403 /* Adjust the line directives. */ 404 in_gen = true; 405 snprintf(buf, readsz, "#line %d \"%s\"\n", 406 lineno + 1, filename); 407 } else { 408 /* 409 * it's a #line directive for code we didn't 410 * write 411 */ 412 in_gen = false; 413 } 414 415 free(fname); 416 last_was_blank = false; 417 } 418 /* squeeze blank lines from generated code */ 419 else if (in_gen && 420 regexec(®ex_blank_line, buf, 0, NULL, 0) == 0) { 421 if (last_was_blank) 422 continue; 423 else 424 last_was_blank = true; 425 } else { 426 /* it's a line of normal, non-empty code. */ 427 last_was_blank = false; 428 } 429 430 fputs(buf, stdout); 431 lineno++; 432 } 433 fflush(stdout); 434 if (ferror(stdout)) 435 lerrsf(_("error writing output file %s"), 436 outfilename ? outfilename : "<stdout>"); 437 438 else if (fclose(stdout)) 439 lerrsf(_("error closing output file %s"), 440 outfilename ? outfilename : "<stdout>"); 441 442 return 0; 443 } 444