xref: /plan9-contrib/sys/src/cmd/postscript/postreverse/postreverse.c (revision 219b2ee8daee37f4aad58d63f21287faa8e4ffdc)
1 /*
2  *
3  * postreverse - reverse the page order in certain PostScript files.
4  *
5  * Page reversal relies on being able to locate sections of a document using file
6  * structuring comments defined by Adobe (ie. the 1.0 and now 2.0 conventions) and
7  * a few I've added. Among other things a minimally conforming document, according
8  * to the 1.0 conventions,
9  *
10  *	1) Marks the end of the prologue with an %%EndProlog comment.
11  *
12  *	2) Starts each page with a %%Page: comment.
13  *
14  *	3) Marks the end of all the pages %%Trailer comment.
15  *
16  *	4) Obeys page independence (ie. pages can be arbitrarily rearranged).
17  *
18  * The most important change (at least for this program) that Adobe made in going
19  * from the 1.0 to the 2.0 structuring conventions was in the prologue. They now
20  * say the prologue should only define things, and the global initialization that
21  * was in the prologue (1.0 conventions) should now come after the %%EndProlog
22  * comment but before the first %%Page: comment and be bracketed by %%BeginSetup
23  * and %%EndSetup comments. So a document that conforms to Adobe's 2.0 conventions,
24  *
25  *	1) Marks the end of the prologue (only definitions) with %%EndProlog.
26  *
27  *	2) Brackets global initialization with %%BeginSetup and %%EndSetup comments
28  *	   which come after the prologue but before the first %Page: comment.
29  *
30  *	3) Starts each page with a %%Page: comment.
31  *
32  *	4) Marks the end of all the pages with a %%Trailer comment.
33  *
34  *	5) Obeys page independence.
35  *
36  * postreverse can handle documents that follow the 1.0 or 2.0 conventions, but has
37  * also been extended slightly so it works properly with the translators (primarily
38  * dpost) supplied with this package. The page independence requirement has been
39  * relaxed some. In particular definitions exported to the global environment from
40  * within a page should be bracketed by %%BeginGlobal and %%EndGlobal comments.
41  * postreverse pulls them out of each page and inserts them in the setup section
42  * of the document, immediately before it writes the %%EndProlog (for version 1.0)
43  * or %%EndSetup (for version 2.0) comments.
44  *
45  * In addition postreverse accepts documents that choose to mark the end of each
46  * page with a %%EndPage: comment, which from a translator's point of view is often
47  * a more natural approach. Both page boundary comments (ie. Page: and %%EndPage:)
48  * are also accepted, but be warned that everything between consecutive %%EndPage:
49  * and %%Page: comments will be ignored.
50  *
51  * So a document that will reverse properly with postreverse,
52  *
53  *	1) Marks the end of the prologue with %%EndProlog.
54  *
55  *	2) May have a %%BeginSetup/%%EndSetup comment pair before the first %%Page:
56  *	   comment that brackets any global initialization.
57  *
58  *	3) Marks the start of each page with a %%Page: comment, or the end of each
59  *	   page with a %%EndPage: comment. Both page boundary comments are allowed.
60  *
61  *	4) Marks the end of all the pages with a %%Trailer comment.
62  *
63  *	5) Obeys page independence or violates it to a rather limited extent and
64  *	   marks the violations with %%BeginGlobal and %%EndGlobal comments.
65  *
66  * If no file arguments are given postreverse copies stdin to a temporary file and
67  * then processes that file. That means the input is read three times (rather than
68  * two) whenever we handle stdin. That's expensive, and shouldn't be too difficult
69  * to fix, but I haven't gotten around to it yet.
70  *
71  */
72 
73 #include <stdio.h>
74 #include <signal.h>
75 #include <sys/types.h>
76 #include <fcntl.h>
77 
78 #include "comments.h"			/* PostScript file structuring comments */
79 #include "gen.h"			/* general purpose definitions */
80 #include "path.h"			/* for temporary directory */
81 #include "ext.h"			/* external variable declarations */
82 #include "postreverse.h"		/* a few special definitions */
83 
84 int	page = 1;			/* current page number */
85 int	forms = 1;			/* forms per page in the input file */
86 
87 char	*temp_dir = TEMPDIR;		/* temp directory for copying stdin */
88 
89 Pages	pages[1000];			/* byte offsets for all pages */
90 int	next_page = 0;			/* next page goes here */
91 long	start;				/* starting offset for next page */
92 long	endoff = -1;			/* offset where TRAILER was found */
93 int	noreverse = FALSE;		/* don't reverse pages if TRUE */
94 char	*endprolog = ENDPROLOG;		/* occasionally changed to ENDSETUP */
95 
96 double	version = 3.3;			/* of the input file */
97 int	ignoreversion = FALSE;		/* ignore possible forms.ps problems */
98 
99 char	buf[2048];			/* line buffer for input file */
100 
101 FILE	*fp_in;				/* stuff is read from this file */
102 FILE	*fp_out = stdout;		/* and written here */
103 
104 /*****************************************************************************/
105 
106 main(agc, agv)
107 
108     int		agc;
109     char	*agv[];
110 
111 {
112 
113 /*
114  *
115  * A simple program that reverses the pages in specially formatted PostScript
116  * files. Will work with all the translators in this package, and should handle
117  * any document that conforms to Adobe's version 1.0 or 2.0 file structuring
118  * conventions. Only one input file is allowed, and it can either be a named (on
119  * the command line) file or stdin.
120  *
121  */
122 
123     argc = agc;				/* other routines may want them */
124     argv = agv;
125 
126     prog_name = argv[0];		/* just for error messages */
127 
128     init_signals();			/* sets up interrupt handling */
129     options();				/* first get command line options */
130     arguments();			/* then process non-option arguments */
131     done();				/* and clean things up */
132 
133     exit(x_stat);			/* not much could be wrong */
134 
135 }   /* End of main */
136 
137 /*****************************************************************************/
138 
139 init_signals()
140 
141 {
142 
143 /*
144  *
145  * Makes sure we handle interrupts properly.
146  *
147  */
148 
149     if ( signal(SIGINT, interrupt) == SIG_IGN )  {
150 	signal(SIGINT, SIG_IGN);
151 	signal(SIGQUIT, SIG_IGN);
152 	signal(SIGHUP, SIG_IGN);
153     } else {
154 	signal(SIGHUP, interrupt);
155 	signal(SIGQUIT, interrupt);
156     }   /* End else */
157 
158     signal(SIGTERM, interrupt);
159 
160 }   /* End of init_signals */
161 
162 /*****************************************************************************/
163 
164 options()
165 
166 {
167 
168     int		ch;			/* return value from getopt() */
169     char	*optnames = "n:o:rvT:DI";
170 
171     extern char	*optarg;		/* used by getopt() */
172     extern int	optind;
173 
174 /*
175  *
176  * Reads and processes the command line options. The -r option (ie. the one that
177  * turns page reversal off) is really only useful if you want to take dpost output
178  * and produce a page independent output file. In that case global definitions
179  * made within pages and bracketed by %%BeginGlobal/%%EndGlobal comments will be
180  * moved into the prologue or setup section of the document.
181  *
182  */
183 
184     while ( (ch = getopt(argc, argv, optnames)) != EOF )  {
185 	switch ( ch )  {
186 	    case 'n':			/* forms per page */
187 		    if ( (forms = atoi(optarg)) <= 0 )
188 			error(FATAL, "illegal forms request %s", optarg);
189 		    break;
190 
191 	    case 'o':			/* output page list */
192 		    out_list(optarg);
193 		    break;
194 
195 	    case 'r':			/* don't reverse the pages */
196 		    noreverse = TRUE;
197 		    break;
198 
199 	    case 'v':			/* ignore possible forms.ps problems */
200 		    ignoreversion = TRUE;
201 		    break;
202 
203 	    case 'T':			/* temporary file directory */
204 		    temp_dir = optarg;
205 		    break;
206 
207 	    case 'D':			/* debug flag */
208 		    debug = ON;
209 		    break;
210 
211 	    case 'I':			/* ignore FATAL errors */
212 		    ignore = ON;
213 		    break;
214 
215 	    case '?':			/* don't understand the option */
216 		    error(FATAL, "");
217 		    break;
218 
219 	    default:			/* don't know what to do for ch */
220 		    error(FATAL, "missing case for option %c\n", ch);
221 		    break;
222 	}   /* End switch */
223     }   /* End while */
224 
225     argc -= optind;			/* get ready for non-option args */
226     argv += optind;
227 
228 }   /* End of options */
229 
230 /*****************************************************************************/
231 
232 arguments()
233 
234 {
235 
236     char	*name;			/* name of the input file */
237 
238 /*
239  *
240  * postreverse only handles one input file at a time, so if there's more than one
241  * argument left when we get here we'll quit. If none remain we copy stdin to a
242  * temporary file and process that file.
243  *
244  */
245 
246     if ( argc > 1 )			/* can't handle more than one file */
247 	error(FATAL, "too many arguments");
248 
249     if ( argc == 0 )			/* copy stdin to a temporary file */
250 	name = copystdin();
251     else name = *argv;
252 
253     if ( (fp_in = fopen(name, "r")) == NULL )
254 	error(FATAL, "can't open %s", name);
255 
256     reverse();
257 
258 }   /* End of arguments */
259 
260 /*****************************************************************************/
261 
262 done()
263 
264 {
265 
266 /*
267  *
268  * Cleans things up after we've finished reversing the pages in the input file.
269  * All that's really left to do is remove the temp file, provided we used one.
270  *
271  */
272 
273     if ( temp_file != NULL )
274 	unlink(temp_file);
275 
276 }   /* End of done */
277 
278 /*****************************************************************************/
279 
280 char *copystdin()
281 
282 {
283 
284     int		fd_out;			/* for the temporary file */
285     int		fd_in;			/* for stdin */
286     int		count;			/* number of bytes put in buf[] */
287 
288 /*
289  *
290  * Copies stdin to a temporary file and returns the pathname of that file to the
291  * caller. It's an expensive way of doing things, because it means we end up
292  * reading the input file three times - rather than just twice. Could probably be
293  * fixed by creating the temporary file on the fly as we read the file the first
294  * time.
295  *
296  */
297 
298     if ( (temp_file = tempnam(temp_dir, "post")) == NULL )
299 	error(FATAL, "can't generate temp file name");
300 
301     if ( (fd_out = creat(temp_file, 0660)) == -1 )
302 	error(FATAL, "can't open %s", temp_file);
303 
304     fd_in = fileno(stdin);
305 
306     while ( (count = read(fd_in, buf, sizeof(buf))) > 0 )
307 	if ( write(fd_out, buf, count) != count )
308 	    error(FATAL, "error writing to %s", temp_file);
309 
310     close(fd_out);
311 
312     return(temp_file);
313 
314 }   /* End of copystdin */
315 
316 /*****************************************************************************/
317 
318 reverse()
319 
320 {
321 
322 /*
323  *
324  * Begins by looking for the ENDPROLOG comment in the input file. Everything up to
325  * that comment is copied to the output file. If the comment isn't found the entire
326  * input file is copied and moreprolog() returns FALSE. Otherwise readpages() reads
327  * the rest of the input file and remembers (in pages[]) where each page starts and
328  * ends. In addition everything bracketed by %%BeginGlobal and %%EndGlobal comments
329  * is immediately added to the new prologue (or setup section) and ends up being
330  * removed from the individual pages. When readpages() finds the TRAILER comment
331  * or gets to the end of the input file we go back to the pages[] array and use
332  * the saved offsets to write the pages out in reverse order. Finally everything
333  * from the TRAILER comment to the end of the input file is copied to the output
334  * file.
335  *
336  */
337 
338     if ( moreprolog(ENDPROLOG) == TRUE )  {
339 	readpages();
340 	writepages();
341 	trailer();
342     }	/* End if */
343 
344 }   /* End of reverse */
345 
346 /*****************************************************************************/
347 
348 moreprolog(str)
349 
350     char	*str;			/* copy everything up to this string */
351 
352 {
353 
354     int		len;			/* length of FORMSPERPAGE string */
355     int		vlen;			/* length of VERSION string */
356 
357 /*
358  *
359  * Looks for string *str at the start of a line and copies everything up to that
360  * string to the output file. If *str isn't found the entire input file will end
361  * up being copied to the output file and FALSE will be returned to the caller.
362  * The first call (made from reverse()) looks for ENDPROLOG. Any other call comes
363  * from readpages() and will be looking for the ENDSETUP comment.
364  *
365  */
366 
367     len = strlen(FORMSPERPAGE);
368     vlen = strlen(VERSION);
369 
370     while ( fgets(buf, sizeof(buf), fp_in) != NULL )  {
371 	if ( strcmp(buf, str) == 0 )
372 	    return(TRUE);
373 	else if ( strncmp(buf, FORMSPERPAGE, len) == 0 )
374 	    forms = atoi(&buf[len+1]);
375 	else if ( strncmp(buf, VERSION, vlen) == 0 )
376 	    version = atof(&buf[vlen+1]);
377 	fprintf(fp_out, "%s", buf);
378     }	/* End while */
379 
380     return(FALSE);
381 
382 }   /* End of moreprolog */
383 
384 /*****************************************************************************/
385 
386 readpages()
387 
388 {
389 
390     int		endpagelen;		/* length of ENDPAGE */
391     int		pagelen;		/* and PAGE strings */
392     int		sawendpage = TRUE;	/* ENDPAGE equivalent marked last page */
393     int		gotpage = FALSE;	/* TRUE disables BEGINSETUP stuff */
394 
395 /*
396  *
397  * Records starting and ending positions of the requested pages (usually all of
398  * them), puts global definitions in the prologue, and remembers where the TRAILER
399  * was found.
400  *
401  * Page boundaries are marked by the strings PAGE, ENDPAGE, or perhaps both.
402  * Application programs will normally find one or the other more convenient, so
403  * in most cases only one kind of page delimiter will be found in a particular
404  * document.
405  *
406  */
407 
408     pages[0].start = ftell(fp_in);	/* first page starts after ENDPROLOG */
409     endprolog = ENDPROLOG;
410 
411     endpagelen = strlen(ENDPAGE);
412     pagelen = strlen(PAGE);
413 
414     while ( fgets(buf, sizeof(buf), fp_in) != NULL )
415 	if ( buf[0] != '%' )
416 	    continue;
417 	else if ( strncmp(buf, ENDPAGE, endpagelen) == 0 )  {
418 	    if ( in_olist(page++) == ON )  {
419 		pages[next_page].empty = FALSE;
420 		pages[next_page++].stop = ftell(fp_in);
421 	    }	/* End if */
422 	    pages[next_page].start = ftell(fp_in);
423 	    sawendpage = TRUE;
424 	    gotpage = TRUE;
425 	} else if ( strncmp(buf, PAGE, pagelen) == 0 )  {
426 	    if ( sawendpage == FALSE && in_olist(page++) == ON )  {
427 		pages[next_page].empty = FALSE;
428 		pages[next_page++].stop = ftell(fp_in) - strlen(buf);
429 	    }	/* End if */
430 	    pages[next_page].start = ftell(fp_in) - strlen(buf);
431 	    sawendpage = FALSE;
432 	    gotpage = TRUE;
433 	} else if ( gotpage == FALSE && strcmp(buf, BEGINSETUP) == 0 )  {
434 	    fprintf(fp_out, "%s", endprolog);
435 	    fprintf(fp_out, "%s", BEGINSETUP);
436 	    moreprolog(ENDSETUP);
437 	    endprolog = ENDSETUP;
438 	} else if ( strcmp(buf, BEGINGLOBAL) == 0 )  {
439 	    moreprolog(ENDGLOBAL);
440 	} else if ( strcmp(buf, TRAILER) == 0 )  {
441 	    if ( sawendpage == FALSE )
442 		pages[next_page++].stop = ftell(fp_in) - strlen(buf);
443 	    endoff = ftell(fp_in);
444 	    break;
445 	}   /* End if */
446 
447 }   /* End of readpages */
448 
449 /*****************************************************************************/
450 
451 writepages()
452 
453 {
454 
455     int		i, j, k;		/* loop indices */
456 
457 /*
458  *
459  * Goes through the pages[] array, usually from the bottom up, and writes out all
460  * the pages. Documents that print more than one form per page cause things to get
461  * a little more complicated. Each physical page has to have its subpages printed
462  * in the correct order, and we have to build a few dummy subpages for the last
463  * (and now first) sheet of paper, otherwise things will only occasionally work.
464  *
465  */
466 
467     fprintf(fp_out, "%s", endprolog);
468 
469     if ( noreverse == FALSE )		/* fill out the first page */
470 	for ( i = (forms - next_page % forms) % forms; i > 0; i--, next_page++ )
471 	    pages[next_page].empty = TRUE;
472     else forms = next_page;		/* turns reversal off in next loop */
473 
474     for ( i = next_page - forms; i >= 0; i -= forms )
475 	for ( j = i, k = 0; k < forms; j++, k++ )
476 	    if ( pages[j].empty == TRUE ) {
477 		if ( ignoreversion == TRUE || version > 3.1 ) {
478 		    fprintf(fp_out, "%s 0 0\n", PAGE);
479 		    fprintf(fp_out, "/saveobj save def\n");
480 		    fprintf(fp_out, "showpage\n");
481 		    fprintf(fp_out, "saveobj restore\n");
482 		    fprintf(fp_out, "%s 0 0\n", ENDPAGE);
483 		} else {
484 		    fprintf(fp_out, "%s 0 0\n", PAGE);
485 		    fprintf(fp_out, "save showpage restore\n");
486 		    fprintf(fp_out, "%s 0 0\n", ENDPAGE);
487 		}   /* End else */
488 	    } else copypage(pages[j].start, pages[j].stop);
489 
490 }   /* End of writepages */
491 
492 /*****************************************************************************/
493 
494 copypage(start, stop)
495 
496     long	start;			/* starting from this offset */
497     long	stop;			/* and ending here */
498 
499 {
500 
501 /*
502  *
503  * Copies the page beginning at offset start and ending at stop to the output
504  * file. Global definitions are skipped since they've already been added to the
505  * prologue.
506  *
507  */
508 
509     fseek(fp_in, start, 0);
510 
511     while ( ftell(fp_in) < stop && fgets(buf, sizeof(buf), fp_in) != NULL )
512 	if ( buf[0] == '%' && strcmp(buf, BEGINGLOBAL) == 0 )
513 	    while ( fgets(buf, sizeof(buf), fp_in) != NULL && strcmp(buf, ENDGLOBAL) != 0 ) ;
514 	else fprintf(fp_out, "%s", buf);
515 
516 }   /* End of copypage */
517 
518 /*****************************************************************************/
519 
520 trailer()
521 
522 {
523 
524 /*
525  *
526  * Makes sure everything from the TRAILER string to EOF is copied to the output
527  * file.
528  *
529  */
530 
531     if ( endoff > 0 )  {
532 	fprintf(fp_out, "%s", TRAILER);
533 	fseek(fp_in, endoff, 0);
534 	while ( fgets(buf, sizeof(buf), fp_in) != NULL )
535 	    fprintf(fp_out, "%s", buf);
536     }	/* End if */
537 
538 }   /* End of trailer */
539 
540 /*****************************************************************************/
541 
542