1*20c29e2bSop /* $OpenBSD: filter.c,v 1.10 2024/11/09 18:03:44 op Exp $ */ 2a58c1ecbStedu 3a58c1ecbStedu /* filter - postprocessing of flex output through filters */ 4a58c1ecbStedu 5a58c1ecbStedu /* This file is part of flex. */ 6a58c1ecbStedu 7a58c1ecbStedu /* Redistribution and use in source and binary forms, with or without */ 8a58c1ecbStedu /* modification, are permitted provided that the following conditions */ 9a58c1ecbStedu /* are met: */ 10a58c1ecbStedu 11a58c1ecbStedu /* 1. Redistributions of source code must retain the above copyright */ 12a58c1ecbStedu /* notice, this list of conditions and the following disclaimer. */ 13a58c1ecbStedu /* 2. Redistributions in binary form must reproduce the above copyright */ 14a58c1ecbStedu /* notice, this list of conditions and the following disclaimer in the */ 15a58c1ecbStedu /* documentation and/or other materials provided with the distribution. */ 16a58c1ecbStedu 17a58c1ecbStedu /* Neither the name of the University nor the names of its contributors */ 18a58c1ecbStedu /* may be used to endorse or promote products derived from this software */ 19a58c1ecbStedu /* without specific prior written permission. */ 20a58c1ecbStedu 21a58c1ecbStedu /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 22a58c1ecbStedu /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 23a58c1ecbStedu /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 24a58c1ecbStedu /* PURPOSE. */ 25a58c1ecbStedu 26a58c1ecbStedu #include "flexdef.h" 27a58c1ecbStedu static const char *check_4_gnu_m4 = 28a58c1ecbStedu "m4_dnl ifdef(`__gnu__', ," 29a58c1ecbStedu "`errprint(Flex requires GNU M4. Set the PATH or set the M4 environment variable to its path name.)" 30a58c1ecbStedu " m4exit(2)')\n"; 31a58c1ecbStedu 32a58c1ecbStedu 33a58c1ecbStedu /** global chain. */ 34a58c1ecbStedu struct filter *output_chain = NULL; 35a58c1ecbStedu 36a58c1ecbStedu /* Allocate and initialize an external filter. 37a58c1ecbStedu * @param chain the current chain or NULL for new chain 38a58c1ecbStedu * @param cmd the command to execute. 39a58c1ecbStedu * @param ... a NULL terminated list of (const char*) arguments to command, 40a58c1ecbStedu * not including argv[0]. 41a58c1ecbStedu * @return newest filter in chain 42a58c1ecbStedu */ 431017a5bdStedu struct filter * 441017a5bdStedu filter_create_ext(struct filter * chain, const char *cmd, 45a58c1ecbStedu ...) 46a58c1ecbStedu { 47a58c1ecbStedu struct filter *f; 48a58c1ecbStedu int max_args; 49a58c1ecbStedu const char *s; 50a58c1ecbStedu va_list ap; 51a58c1ecbStedu 52a58c1ecbStedu /* allocate and initialize new filter */ 53e09db2ceStedu f = calloc(sizeof(struct filter), 1); 54a58c1ecbStedu if (!f) 55e09db2ceStedu flexerror(_("calloc failed (f) in filter_create_ext")); 56a58c1ecbStedu f->filter_func = NULL; 57a58c1ecbStedu f->extra = NULL; 58a58c1ecbStedu f->next = NULL; 59a58c1ecbStedu f->argc = 0; 60a58c1ecbStedu 61a58c1ecbStedu if (chain != NULL) { 62a58c1ecbStedu /* append f to end of chain */ 63a58c1ecbStedu while (chain->next) 64a58c1ecbStedu chain = chain->next; 65a58c1ecbStedu chain->next = f; 66a58c1ecbStedu } 67a58c1ecbStedu /* allocate argv, and populate it with the argument list. */ 68a58c1ecbStedu max_args = 8; 694c01f208Stedu f->argv = malloc(sizeof(char *) * (max_args + 1)); 70a58c1ecbStedu if (!f->argv) 714c01f208Stedu flexerror(_("malloc failed (f->argv) in filter_create_ext")); 72a58c1ecbStedu f->argv[f->argc++] = cmd; 73a58c1ecbStedu 74a58c1ecbStedu va_start(ap, cmd); 75a58c1ecbStedu while ((s = va_arg(ap, const char *)) != NULL) { 76a58c1ecbStedu if (f->argc >= max_args) { 77a58c1ecbStedu max_args += 8; 784c01f208Stedu f->argv = realloc(f->argv, 79ba45a58bStedu sizeof(char *) * (max_args + 1)); 80a58c1ecbStedu } 81a58c1ecbStedu f->argv[f->argc++] = s; 82a58c1ecbStedu } 83a58c1ecbStedu f->argv[f->argc] = NULL; 84a58c1ecbStedu 85a58c1ecbStedu va_end(ap); 86a58c1ecbStedu return f; 87a58c1ecbStedu } 88a58c1ecbStedu 89a58c1ecbStedu /* Allocate and initialize an internal filter. 90a58c1ecbStedu * @param chain the current chain or NULL for new chain 91a58c1ecbStedu * @param filter_func The function that will perform the filtering. 92a58c1ecbStedu * filter_func should return 0 if successful, and -1 93a58c1ecbStedu * if an error occurs -- or it can simply exit(). 94a58c1ecbStedu * @param extra optional user-defined data to pass to the filter. 95a58c1ecbStedu * @return newest filter in chain 96a58c1ecbStedu */ 971017a5bdStedu struct filter * 981017a5bdStedu filter_create_int(struct filter * chain, 99a58c1ecbStedu int (*filter_func) (struct filter *), 100a58c1ecbStedu void *extra) 101a58c1ecbStedu { 102a58c1ecbStedu struct filter *f; 103a58c1ecbStedu 104a58c1ecbStedu /* allocate and initialize new filter */ 105e09db2ceStedu f = calloc(sizeof(struct filter), 1); 106a58c1ecbStedu if (!f) 107e09db2ceStedu flexerror(_("calloc failed in filter_create_int")); 108a58c1ecbStedu f->next = NULL; 109a58c1ecbStedu f->argc = 0; 110a58c1ecbStedu f->argv = NULL; 111a58c1ecbStedu 112a58c1ecbStedu f->filter_func = filter_func; 113a58c1ecbStedu f->extra = extra; 114a58c1ecbStedu 115a58c1ecbStedu if (chain != NULL) { 116a58c1ecbStedu /* append f to end of chain */ 117a58c1ecbStedu while (chain->next) 118a58c1ecbStedu chain = chain->next; 119a58c1ecbStedu chain->next = f; 120a58c1ecbStedu } 121a58c1ecbStedu return f; 122a58c1ecbStedu } 123a58c1ecbStedu 124a58c1ecbStedu /** Fork and exec entire filter chain. 125a58c1ecbStedu * @param chain The head of the chain. 126a58c1ecbStedu * @return true on success. 127a58c1ecbStedu */ 1281017a5bdStedu bool 1291017a5bdStedu filter_apply_chain(struct filter * chain) 130a58c1ecbStedu { 131a58c1ecbStedu int pid, pipes[2]; 132a58c1ecbStedu 1331017a5bdStedu /* 1341017a5bdStedu * Tricky recursion, since we want to begin the chain at the END. 1351017a5bdStedu * Why? Because we need all the forked processes to be children of 1361017a5bdStedu * the main flex process. 137a58c1ecbStedu */ 138a58c1ecbStedu if (chain) 139a58c1ecbStedu filter_apply_chain(chain->next); 140a58c1ecbStedu else 141a58c1ecbStedu return true; 142a58c1ecbStedu 1431017a5bdStedu /* 1441017a5bdStedu * Now we are the right-most unprocessed link in the chain. 145a58c1ecbStedu */ 146a58c1ecbStedu 147a58c1ecbStedu fflush(stdout); 148a58c1ecbStedu fflush(stderr); 149a58c1ecbStedu 150a58c1ecbStedu 151a58c1ecbStedu if (pipe(pipes) == -1) 152a58c1ecbStedu flexerror(_("pipe failed")); 153a58c1ecbStedu 154a58c1ecbStedu if ((pid = fork()) == -1) 155a58c1ecbStedu flexerror(_("fork failed")); 156a58c1ecbStedu 157a58c1ecbStedu if (pid == 0) { 158a58c1ecbStedu /* child */ 159a58c1ecbStedu 1601017a5bdStedu /* 1611017a5bdStedu * We need stdin (the FILE* stdin) to connect to this new 1621017a5bdStedu * pipe. There is no portable way to set stdin to a new file 1631017a5bdStedu * descriptor, as stdin is not an lvalue on some systems 1641017a5bdStedu * (BSD). So we dup the new pipe onto the stdin descriptor 1651017a5bdStedu * and use a no-op fseek to sync the stream. This is a Hail 1661017a5bdStedu * Mary situation. It seems to work. 167a58c1ecbStedu */ 168a58c1ecbStedu close(pipes[1]); 169a58c1ecbStedu clearerr(stdin); 170a58c1ecbStedu if (dup2(pipes[0], fileno(stdin)) == -1) 171a58c1ecbStedu flexfatal(_("dup2(pipes[0],0)")); 172a58c1ecbStedu close(pipes[0]); 173a58c1ecbStedu fseek(stdin, 0, SEEK_CUR); 174a58c1ecbStedu 175a58c1ecbStedu /* run as a filter, either internally or by exec */ 176a58c1ecbStedu if (chain->filter_func) { 177dcce3f41Slteo if (chain->filter_func(chain) == -1) 178a58c1ecbStedu flexfatal(_("filter_func failed")); 179a58c1ecbStedu exit(0); 1801017a5bdStedu } else { 181a58c1ecbStedu execvp(chain->argv[0], 182a58c1ecbStedu (char **const) (chain->argv)); 183a58c1ecbStedu lerrsf_fatal(_("exec of %s failed"), 184a58c1ecbStedu chain->argv[0]); 185a58c1ecbStedu } 186a58c1ecbStedu 187a58c1ecbStedu exit(1); 188a58c1ecbStedu } 189a58c1ecbStedu /* Parent */ 190a58c1ecbStedu close(pipes[0]); 191a58c1ecbStedu if (dup2(pipes[1], fileno(stdout)) == -1) 192a58c1ecbStedu flexfatal(_("dup2(pipes[1],1)")); 193a58c1ecbStedu close(pipes[1]); 194a58c1ecbStedu fseek(stdout, 0, SEEK_CUR); 195a58c1ecbStedu 196a58c1ecbStedu return true; 197a58c1ecbStedu } 198a58c1ecbStedu 199a58c1ecbStedu /** Truncate the chain to max_len number of filters. 200a58c1ecbStedu * @param chain the current chain. 201a58c1ecbStedu * @param max_len the maximum length of the chain. 202a58c1ecbStedu * @return the resulting length of the chain. 203a58c1ecbStedu */ 2041017a5bdStedu int 2051017a5bdStedu filter_truncate(struct filter * chain, int max_len) 206a58c1ecbStedu { 207a58c1ecbStedu int len = 1; 208a58c1ecbStedu 209a58c1ecbStedu if (!chain) 210a58c1ecbStedu return 0; 211a58c1ecbStedu 212a58c1ecbStedu while (chain->next && len < max_len) { 213a58c1ecbStedu chain = chain->next; 214a58c1ecbStedu ++len; 215a58c1ecbStedu } 216a58c1ecbStedu 217a58c1ecbStedu chain->next = NULL; 218a58c1ecbStedu return len; 219a58c1ecbStedu } 220a58c1ecbStedu 221a58c1ecbStedu /** Splits the chain in order to write to a header file. 222a58c1ecbStedu * Similar in spirit to the 'tee' program. 223a58c1ecbStedu * The header file name is in extra. 224a58c1ecbStedu * @return 0 (zero) on success, and -1 on failure. 225a58c1ecbStedu */ 2261017a5bdStedu int 2271017a5bdStedu filter_tee_header(struct filter * chain) 228a58c1ecbStedu { 2291017a5bdStedu /* 2301017a5bdStedu * This function reads from stdin and writes to both the C file and 2311017a5bdStedu * the header file at the same time. 232a58c1ecbStedu */ 233a58c1ecbStedu 234a58c1ecbStedu const int readsz = 512; 235a58c1ecbStedu char *buf; 236a58c1ecbStedu int to_cfd = -1; 237a58c1ecbStedu FILE *to_c = NULL, *to_h = NULL; 238a58c1ecbStedu bool write_header; 239a58c1ecbStedu 240a58c1ecbStedu write_header = (chain->extra != NULL); 241a58c1ecbStedu 2421017a5bdStedu /* 2431017a5bdStedu * Store a copy of the stdout pipe, which is already piped to C file 244a58c1ecbStedu * through the running chain. Then create a new pipe to the H file as 245a58c1ecbStedu * stdout, and fork the rest of the chain again. 246a58c1ecbStedu */ 247a58c1ecbStedu 248a58c1ecbStedu if ((to_cfd = dup(1)) == -1) 249a58c1ecbStedu flexfatal(_("dup(1) failed")); 250a58c1ecbStedu to_c = fdopen(to_cfd, "w"); 251a58c1ecbStedu 252a58c1ecbStedu if (write_header) { 253a58c1ecbStedu if (freopen((char *) chain->extra, "w", stdout) == NULL) 254a58c1ecbStedu flexfatal(_("freopen(headerfilename) failed")); 255a58c1ecbStedu 256a58c1ecbStedu filter_apply_chain(chain->next); 257a58c1ecbStedu to_h = stdout; 258a58c1ecbStedu } 2591017a5bdStedu /* 2601017a5bdStedu * Now to_c is a pipe to the C branch, and to_h is a pipe to the H 2611017a5bdStedu * branch. 262a58c1ecbStedu */ 263a58c1ecbStedu 264a58c1ecbStedu if (write_header) { 265a58c1ecbStedu fputs(check_4_gnu_m4, to_h); 266a58c1ecbStedu fputs("m4_changecom`'m4_dnl\n", to_h); 267a58c1ecbStedu fputs("m4_changequote`'m4_dnl\n", to_h); 268a58c1ecbStedu fputs("m4_changequote([[,]])[[]]m4_dnl\n", to_h); 269a58c1ecbStedu fputs("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_h); 270a58c1ecbStedu fputs("m4_define( [[M4_YY_IN_HEADER]],[[]])m4_dnl\n", 271a58c1ecbStedu to_h); 272a58c1ecbStedu fprintf(to_h, "#ifndef %sHEADER_H\n", prefix); 273a58c1ecbStedu fprintf(to_h, "#define %sHEADER_H 1\n", prefix); 274a58c1ecbStedu fprintf(to_h, "#define %sIN_HEADER 1\n\n", prefix); 275a58c1ecbStedu fprintf(to_h, 276a58c1ecbStedu "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n", 277a58c1ecbStedu headerfilename ? headerfilename : "<stdout>"); 278a58c1ecbStedu 279a58c1ecbStedu } 280a58c1ecbStedu fputs(check_4_gnu_m4, to_c); 281a58c1ecbStedu fputs("m4_changecom`'m4_dnl\n", to_c); 282a58c1ecbStedu fputs("m4_changequote`'m4_dnl\n", to_c); 283a58c1ecbStedu fputs("m4_changequote([[,]])[[]]m4_dnl\n", to_c); 284a58c1ecbStedu fputs("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_c); 285a58c1ecbStedu fprintf(to_c, "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n", 286a58c1ecbStedu outfilename ? outfilename : "<stdout>"); 287a58c1ecbStedu 28809a27527Stedu buf = malloc(readsz); 289a58c1ecbStedu if (!buf) 2904c01f208Stedu flexerror(_("malloc failed in filter_tee_header")); 291a58c1ecbStedu while (fgets(buf, readsz, stdin)) { 292a58c1ecbStedu fputs(buf, to_c); 293a58c1ecbStedu if (write_header) 294a58c1ecbStedu fputs(buf, to_h); 295a58c1ecbStedu } 296a58c1ecbStedu 297a58c1ecbStedu if (write_header) { 298a58c1ecbStedu fprintf(to_h, "\n"); 299a58c1ecbStedu 3001017a5bdStedu /* 3011017a5bdStedu * write a fake line number. It will get fixed by the linedir 3021017a5bdStedu * filter. 3031017a5bdStedu */ 304a58c1ecbStedu fprintf(to_h, "#line 4000 \"M4_YY_OUTFILE_NAME\"\n"); 305a58c1ecbStedu 306a58c1ecbStedu fprintf(to_h, "#undef %sIN_HEADER\n", prefix); 307a58c1ecbStedu fprintf(to_h, "#endif /* %sHEADER_H */\n", prefix); 308a58c1ecbStedu fputs("m4_undefine( [[M4_YY_IN_HEADER]])m4_dnl\n", to_h); 309a58c1ecbStedu 310a58c1ecbStedu fflush(to_h); 311a58c1ecbStedu if (ferror(to_h)) 312a58c1ecbStedu lerrsf(_("error writing output file %s"), 313a58c1ecbStedu (char *) chain->extra); 314a58c1ecbStedu 315a58c1ecbStedu else if (fclose(to_h)) 316a58c1ecbStedu lerrsf(_("error closing output file %s"), 317a58c1ecbStedu (char *) chain->extra); 318a58c1ecbStedu } 319a58c1ecbStedu fflush(to_c); 320a58c1ecbStedu if (ferror(to_c)) 321a58c1ecbStedu lerrsf(_("error writing output file %s"), 322a58c1ecbStedu outfilename ? outfilename : "<stdout>"); 323a58c1ecbStedu 324a58c1ecbStedu else if (fclose(to_c)) 325a58c1ecbStedu lerrsf(_("error closing output file %s"), 326a58c1ecbStedu outfilename ? outfilename : "<stdout>"); 327a58c1ecbStedu 328a58c1ecbStedu while (wait(0) > 0); 329a58c1ecbStedu 330a58c1ecbStedu exit(0); 331a58c1ecbStedu return 0; 332a58c1ecbStedu } 333a58c1ecbStedu 334a58c1ecbStedu /** Adjust the line numbers in the #line directives of the generated scanner. 335a58c1ecbStedu * After the m4 expansion, the line numbers are incorrect since the m4 macros 336a58c1ecbStedu * can add or remove lines. This only adjusts line numbers for generated code, 337a58c1ecbStedu * not user code. This also happens to be a good place to squeeze multiple 338a58c1ecbStedu * blank lines into a single blank line. 339a58c1ecbStedu */ 3401017a5bdStedu int 3411017a5bdStedu filter_fix_linedirs(struct filter * chain) 342a58c1ecbStedu { 343a58c1ecbStedu char *buf; 344a58c1ecbStedu const int readsz = 512; 345a58c1ecbStedu int lineno = 1; 346a58c1ecbStedu bool in_gen = true; /* in generated code */ 347a58c1ecbStedu bool last_was_blank = false; 348a58c1ecbStedu 349a58c1ecbStedu if (!chain) 350a58c1ecbStedu return 0; 351a58c1ecbStedu 35209a27527Stedu buf = malloc(readsz); 353a58c1ecbStedu if (!buf) 3544c01f208Stedu flexerror(_("malloc failed in filter_fix_linedirs")); 355a58c1ecbStedu 356a58c1ecbStedu while (fgets(buf, readsz, stdin)) { 357a58c1ecbStedu 358a58c1ecbStedu regmatch_t m[10]; 359a58c1ecbStedu 360a58c1ecbStedu /* Check for #line directive. */ 361a58c1ecbStedu if (buf[0] == '#' 362a58c1ecbStedu && regexec(®ex_linedir, buf, 3, m, 0) == 0) { 363a58c1ecbStedu 364a58c1ecbStedu char *fname; 365a58c1ecbStedu 366a58c1ecbStedu /* extract the line number and filename */ 367*20c29e2bSop regmatch_strtol(&m[1], buf, NULL, 0); 368a58c1ecbStedu fname = regmatch_dup(&m[2], buf); 369a58c1ecbStedu 370a58c1ecbStedu if (strcmp(fname, 371ba45a58bStedu outfilename ? outfilename : "<stdout>") == 0 || 372ba45a58bStedu strcmp(fname, headerfilename ? headerfilename : 373ba45a58bStedu "<stdout>") == 0) { 374a58c1ecbStedu 375a58c1ecbStedu char *s1, *s2; 376a58c1ecbStedu char filename[MAXLINE]; 377a58c1ecbStedu 378a58c1ecbStedu s1 = fname; 379a58c1ecbStedu s2 = filename; 380a58c1ecbStedu 381a58c1ecbStedu while ((s2 - filename) < (MAXLINE - 1) && *s1) { 382a58c1ecbStedu /* Escape the backslash */ 383a58c1ecbStedu if (*s1 == '\\') 384a58c1ecbStedu *s2++ = '\\'; 385a58c1ecbStedu /* Escape the double quote */ 386a58c1ecbStedu if (*s1 == '\"') 387a58c1ecbStedu *s2++ = '\\'; 388a58c1ecbStedu /* Copy the character as usual */ 389a58c1ecbStedu *s2++ = *s1++; 390a58c1ecbStedu } 391a58c1ecbStedu 392a58c1ecbStedu *s2 = '\0'; 393a58c1ecbStedu 394a58c1ecbStedu /* Adjust the line directives. */ 395a58c1ecbStedu in_gen = true; 396a58c1ecbStedu snprintf(buf, readsz, "#line %d \"%s\"\n", 397a58c1ecbStedu lineno + 1, filename); 3981017a5bdStedu } else { 3991017a5bdStedu /* 4001017a5bdStedu * it's a #line directive for code we didn't 4011017a5bdStedu * write 4021017a5bdStedu */ 403a58c1ecbStedu in_gen = false; 404a58c1ecbStedu } 405a58c1ecbStedu 406a58c1ecbStedu free(fname); 407a58c1ecbStedu last_was_blank = false; 408a58c1ecbStedu } 409a58c1ecbStedu /* squeeze blank lines from generated code */ 410ba45a58bStedu else if (in_gen && 411ba45a58bStedu regexec(®ex_blank_line, buf, 0, NULL, 0) == 0) { 412a58c1ecbStedu if (last_was_blank) 413a58c1ecbStedu continue; 414a58c1ecbStedu else 415a58c1ecbStedu last_was_blank = true; 4161017a5bdStedu } else { 417a58c1ecbStedu /* it's a line of normal, non-empty code. */ 418a58c1ecbStedu last_was_blank = false; 419a58c1ecbStedu } 420a58c1ecbStedu 421a58c1ecbStedu fputs(buf, stdout); 422a58c1ecbStedu lineno++; 423a58c1ecbStedu } 424a58c1ecbStedu fflush(stdout); 425a58c1ecbStedu if (ferror(stdout)) 426a58c1ecbStedu lerrsf(_("error writing output file %s"), 427a58c1ecbStedu outfilename ? outfilename : "<stdout>"); 428a58c1ecbStedu 429a58c1ecbStedu else if (fclose(stdout)) 430a58c1ecbStedu lerrsf(_("error closing output file %s"), 431a58c1ecbStedu outfilename ? outfilename : "<stdout>"); 432a58c1ecbStedu 433a58c1ecbStedu return 0; 434a58c1ecbStedu } 435