xref: /plan9/sys/src/cmd/postscript/postreverse/postreverse.c (revision 14f51593fd82e19ba95969a8c07ff71131015979)
1 /*
2  * postreverse - reverse the page order in certain PostScript files.
3  *
4  * Page reversal relies on being able to locate sections of a document using file
5  * structuring comments defined by Adobe (ie. the 1.0 and now 2.0 conventions) and
6  * a few I've added. Among other things a minimally conforming document, according
7  * to the 1.0 conventions,
8  *
9  *	1) Marks the end of the prologue with an %%EndProlog comment.
10  *
11  *	2) Starts each page with a %%Page: comment.
12  *
13  *	3) Marks the end of all the pages %%Trailer comment.
14  *
15  *	4) Obeys page independence (ie. pages can be arbitrarily rearranged).
16  *
17  * The most important change (at least for this program) that Adobe made in going
18  * from the 1.0 to the 2.0 structuring conventions was in the prologue. They now
19  * say the prologue should only define things, and the global initialization that
20  * was in the prologue (1.0 conventions) should now come after the %%EndProlog
21  * comment but before the first %%Page: comment and be bracketed by %%BeginSetup
22  * and %%EndSetup comments. So a document that conforms to Adobe's 2.0 conventions,
23  *
24  *	1) Marks the end of the prologue (only definitions) with %%EndProlog.
25  *
26  *	2) Brackets global initialization with %%BeginSetup and %%EndSetup comments
27  *	   which come after the prologue but before the first %Page: comment.
28  *
29  *	3) Starts each page with a %%Page: comment.
30  *
31  *	4) Marks the end of all the pages with a %%Trailer comment.
32  *
33  *	5) Obeys page independence.
34  *
35  * postreverse can handle documents that follow the 1.0 or 2.0 conventions, but has
36  * also been extended slightly so it works properly with the translators (primarily
37  * dpost) supplied with this package. The page independence requirement has been
38  * relaxed some. In particular definitions exported to the global environment from
39  * within a page should be bracketed by %%BeginGlobal and %%EndGlobal comments.
40  * postreverse pulls them out of each page and inserts them in the setup section
41  * of the document, immediately before it writes the %%EndProlog (for version 1.0)
42  * or %%EndSetup (for version 2.0) comments.
43  *
44  * In addition postreverse accepts documents that choose to mark the end of each
45  * page with a %%EndPage: comment, which from a translator's point of view is often
46  * a more natural approach. Both page boundary comments (ie. Page: and %%EndPage:)
47  * are also accepted, but be warned that everything between consecutive %%EndPage:
48  * and %%Page: comments will be ignored.
49  *
50  * So a document that will reverse properly with postreverse,
51  *
52  *	1) Marks the end of the prologue with %%EndProlog.
53  *
54  *	2) May have a %%BeginSetup/%%EndSetup comment pair before the first %%Page:
55  *	   comment that brackets any global initialization.
56  *
57  *	3) Marks the start of each page with a %%Page: comment, or the end of each
58  *	   page with a %%EndPage: comment. Both page boundary comments are allowed.
59  *
60  *	4) Marks the end of all the pages with a %%Trailer comment.
61  *
62  *	5) Obeys page independence or violates it to a rather limited extent and
63  *	   marks the violations with %%BeginGlobal and %%EndGlobal comments.
64  *
65  * If no file arguments are given postreverse copies stdin to a temporary file and
66  * then processes that file. That means the input is read three times (rather than
67  * two) whenever we handle stdin. That's expensive, and shouldn't be too difficult
68  * to fix, but I haven't gotten around to it yet.
69  */
70 
71 #define _BSD_EXTENSION
72 #define _POSIX_SOURCE
73 
74 #include <stdio.h>
75 #include <stdlib.h>
76 #include <unistd.h>
77 #include <string.h>
78 #include <signal.h>
79 #include <sys/types.h>
80 #include <fcntl.h>
81 
82 #include "comments.h"			/* PostScript file structuring comments */
83 #include "gen.h"			/* general purpose definitions */
84 #include "path.h"			/* for temporary directory */
85 #include "ext.h"			/* external variable declarations */
86 #include "postreverse.h"		/* a few special definitions */
87 
88 int	page = 1;			/* current page number */
89 int	forms = 1;			/* forms per page in the input file */
90 
91 char	*temp_dir = TEMPDIR;		/* temp directory for copying stdin */
92 
93 Pages	pages[1000];			/* byte offsets for all pages */
94 int	next_page = 0;			/* next page goes here */
95 long	start;				/* starting offset for next page */
96 long	endoff = -1;			/* offset where TRAILER was found */
97 int	noreverse = FALSE;		/* don't reverse pages if TRUE */
98 char	*endprolog = ENDPROLOG;		/* occasionally changed to ENDSETUP */
99 
100 double	version = 3.3;			/* of the input file */
101 int	ignoreversion = FALSE;		/* ignore possible forms.ps problems */
102 
103 char	buf[2048];			/* line buffer for input file */
104 
105 FILE	*fp_in;				/* stuff is read from this file */
106 FILE	*fp_out = stdout;		/* and written here */
107 
main(agc,agv)108 main(agc, agv)
109     int		agc;
110     char	*agv[];
111 {
112 /*
113  * A simple program that reverses the pages in specially formatted PostScript
114  * files. Will work with all the translators in this package, and should handle
115  * any document that conforms to Adobe's version 1.0 or 2.0 file structuring
116  * conventions. Only one input file is allowed, and it can either be a named (on
117  * the command line) file or stdin.
118  */
119 
120     argc = agc;				/* other routines may want them */
121     argv = agv;
122 
123     prog_name = argv[0];		/* just for error messages */
124 
125     init_signals();			/* sets up interrupt handling */
126     options();				/* first get command line options */
127     arguments();			/* then process non-option arguments */
128     done();				/* and clean things up */
129 
130     exit(x_stat);			/* not much could be wrong */
131     return 0;
132 }
133 
init_signals()134 init_signals()
135 {
136 /*
137  * Makes sure we handle interrupts properly.
138  */
139 
140     if ( signal(SIGINT, interrupt) == SIG_IGN )  {
141 	signal(SIGINT, SIG_IGN);
142 	signal(SIGQUIT, SIG_IGN);
143 	signal(SIGHUP, SIG_IGN);
144     } else {
145 	signal(SIGHUP, interrupt);
146 	signal(SIGQUIT, interrupt);
147     }
148 
149     signal(SIGTERM, interrupt);
150 }
151 
options()152 options()
153 {
154     int		ch;			/* return value from getopt() */
155     char	*optnames = "n:o:rvT:DI";
156 
157     extern char	*optarg;		/* used by getopt() */
158     extern int	optind;
159 
160 /*
161  *
162  * Reads and processes the command line options. The -r option (ie. the one that
163  * turns page reversal off) is really only useful if you want to take dpost output
164  * and produce a page independent output file. In that case global definitions
165  * made within pages and bracketed by %%BeginGlobal/%%EndGlobal comments will be
166  * moved into the prologue or setup section of the document.
167  *
168  */
169 
170     while ( (ch = getopt(argc, argv, optnames)) != EOF )  {
171 	switch ( ch )  {
172 	    case 'n':			/* forms per page */
173 		    if ( (forms = atoi(optarg)) <= 0 )
174 			error(FATAL, "illegal forms request %s", optarg);
175 		    break;
176 
177 	    case 'o':			/* output page list */
178 		    out_list(optarg);
179 		    break;
180 
181 	    case 'r':			/* don't reverse the pages */
182 		    noreverse = TRUE;
183 		    break;
184 
185 	    case 'v':			/* ignore possible forms.ps problems */
186 		    ignoreversion = TRUE;
187 		    break;
188 
189 	    case 'T':			/* temporary file directory */
190 		    temp_dir = optarg;
191 		    break;
192 
193 	    case 'D':			/* debug flag */
194 		    debug = ON;
195 		    break;
196 
197 	    case 'I':			/* ignore FATAL errors */
198 		    ignore = ON;
199 		    break;
200 
201 	    case '?':			/* don't understand the option */
202 		    error(FATAL, "");
203 		    break;
204 
205 	    default:			/* don't know what to do for ch */
206 		    error(FATAL, "missing case for option %c\n", ch);
207 		    break;
208 	}   /* End switch */
209     }   /* End while */
210 
211     argc -= optind;			/* get ready for non-option args */
212     argv += optind;
213 
214 }   /* End of options */
215 
216 /*****************************************************************************/
217 
arguments()218 arguments()
219 
220 {
221 
222     char	*name;			/* name of the input file */
223 
224 /*
225  *
226  * postreverse only handles one input file at a time, so if there's more than one
227  * argument left when we get here we'll quit. If none remain we copy stdin to a
228  * temporary file and process that file.
229  *
230  */
231 
232     if ( argc > 1 )			/* can't handle more than one file */
233 	error(FATAL, "too many arguments");
234 
235     if ( argc == 0 )			/* copy stdin to a temporary file */
236 	name = copystdin();
237     else name = *argv;
238 
239     if ( (fp_in = fopen(name, "r")) == NULL )
240 	error(FATAL, "can't open %s", name);
241 
242     reverse();
243 
244 }   /* End of arguments */
245 
246 /*****************************************************************************/
247 
done()248 done()
249 
250 {
251 
252 /*
253  *
254  * Cleans things up after we've finished reversing the pages in the input file.
255  * All that's really left to do is remove the temp file, provided we used one.
256  *
257  */
258 
259     if ( temp_file != NULL )
260 	unlink(temp_file);
261 
262 }   /* End of done */
263 
264 /*****************************************************************************/
265 
copystdin()266 char *copystdin()
267 
268 {
269 
270     int		fd_out;			/* for the temporary file */
271     int		fd_in;			/* for stdin */
272     int		count;			/* number of bytes put in buf[] */
273 
274 /*
275  *
276  * Copies stdin to a temporary file and returns the pathname of that file to the
277  * caller. It's an expensive way of doing things, because it means we end up
278  * reading the input file three times - rather than just twice. Could probably be
279  * fixed by creating the temporary file on the fly as we read the file the first
280  * time.
281  *
282  */
283 
284     if ( (temp_file = tempnam(temp_dir, "post")) == NULL )
285 	error(FATAL, "can't generate temp file name");
286 
287     if ( (fd_out = creat(temp_file, 0660)) == -1 )
288 	error(FATAL, "can't open %s", temp_file);
289 
290     fd_in = fileno(stdin);
291 
292     while ( (count = read(fd_in, buf, sizeof(buf))) > 0 )
293 	if ( write(fd_out, buf, count) != count )
294 	    error(FATAL, "error writing to %s", temp_file);
295 
296     close(fd_out);
297 
298     return(temp_file);
299 
300 }   /* End of copystdin */
301 
302 /*****************************************************************************/
303 
reverse()304 reverse()
305 
306 {
307 
308 /*
309  *
310  * Begins by looking for the ENDPROLOG comment in the input file. Everything up to
311  * that comment is copied to the output file. If the comment isn't found the entire
312  * input file is copied and moreprolog() returns FALSE. Otherwise readpages() reads
313  * the rest of the input file and remembers (in pages[]) where each page starts and
314  * ends. In addition everything bracketed by %%BeginGlobal and %%EndGlobal comments
315  * is immediately added to the new prologue (or setup section) and ends up being
316  * removed from the individual pages. When readpages() finds the TRAILER comment
317  * or gets to the end of the input file we go back to the pages[] array and use
318  * the saved offsets to write the pages out in reverse order. Finally everything
319  * from the TRAILER comment to the end of the input file is copied to the output
320  * file.
321  *
322  */
323 
324     if ( moreprolog(ENDPROLOG) == TRUE )  {
325 	readpages();
326 	writepages();
327 	trailer();
328     }	/* End if */
329 
330 }   /* End of reverse */
331 
332 /*****************************************************************************/
333 
moreprolog(str)334 moreprolog(str)
335 
336     char	*str;			/* copy everything up to this string */
337 
338 {
339 
340     int		len;			/* length of FORMSPERPAGE string */
341     int		vlen;			/* length of VERSION string */
342 
343 /*
344  *
345  * Looks for string *str at the start of a line and copies everything up to that
346  * string to the output file. If *str isn't found the entire input file will end
347  * up being copied to the output file and FALSE will be returned to the caller.
348  * The first call (made from reverse()) looks for ENDPROLOG. Any other call comes
349  * from readpages() and will be looking for the ENDSETUP comment.
350  *
351  */
352 
353     len = strlen(FORMSPERPAGE);
354     vlen = strlen(VERSION);
355 
356     while ( fgets(buf, sizeof(buf), fp_in) != NULL )  {
357 	if ( strcmp(buf, str) == 0 )
358 	    return(TRUE);
359 	else if ( strncmp(buf, FORMSPERPAGE, len) == 0 )
360 	    forms = atoi(&buf[len+1]);
361 	else if ( strncmp(buf, VERSION, vlen) == 0 )
362 	    version = atof(&buf[vlen+1]);
363 	fprintf(fp_out, "%s", buf);
364     }	/* End while */
365 
366     return(FALSE);
367 
368 }   /* End of moreprolog */
369 
370 /*****************************************************************************/
371 
readpages()372 readpages()
373 
374 {
375 
376     int		endpagelen;		/* length of ENDPAGE */
377     int		pagelen;		/* and PAGE strings */
378     int		sawendpage = TRUE;	/* ENDPAGE equivalent marked last page */
379     int		gotpage = FALSE;	/* TRUE disables BEGINSETUP stuff */
380 
381 /*
382  *
383  * Records starting and ending positions of the requested pages (usually all of
384  * them), puts global definitions in the prologue, and remembers where the TRAILER
385  * was found.
386  *
387  * Page boundaries are marked by the strings PAGE, ENDPAGE, or perhaps both.
388  * Application programs will normally find one or the other more convenient, so
389  * in most cases only one kind of page delimiter will be found in a particular
390  * document.
391  *
392  */
393 
394     pages[0].start = ftell(fp_in);	/* first page starts after ENDPROLOG */
395     endprolog = ENDPROLOG;
396 
397     endpagelen = strlen(ENDPAGE);
398     pagelen = strlen(PAGE);
399 
400     while ( fgets(buf, sizeof(buf), fp_in) != NULL )
401 	if ( buf[0] != '%' )
402 	    continue;
403 	else if ( strncmp(buf, ENDPAGE, endpagelen) == 0 )  {
404 	    if ( in_olist(page++) == ON )  {
405 		pages[next_page].empty = FALSE;
406 		pages[next_page++].stop = ftell(fp_in);
407 	    }	/* End if */
408 	    pages[next_page].start = ftell(fp_in);
409 	    sawendpage = TRUE;
410 	    gotpage = TRUE;
411 	} else if ( strncmp(buf, PAGE, pagelen) == 0 )  {
412 	    if ( sawendpage == FALSE && in_olist(page++) == ON )  {
413 		pages[next_page].empty = FALSE;
414 		pages[next_page++].stop = ftell(fp_in) - strlen(buf);
415 	    }	/* End if */
416 	    pages[next_page].start = ftell(fp_in) - strlen(buf);
417 	    sawendpage = FALSE;
418 	    gotpage = TRUE;
419 	} else if ( gotpage == FALSE && strcmp(buf, BEGINSETUP) == 0 )  {
420 	    fprintf(fp_out, "%s", endprolog);
421 	    fprintf(fp_out, "%s", BEGINSETUP);
422 	    moreprolog(ENDSETUP);
423 	    endprolog = ENDSETUP;
424 	} else if ( strcmp(buf, BEGINGLOBAL) == 0 )  {
425 	    moreprolog(ENDGLOBAL);
426 	} else if ( strcmp(buf, TRAILER) == 0 )  {
427 	    if ( sawendpage == FALSE )
428 		pages[next_page++].stop = ftell(fp_in) - strlen(buf);
429 	    endoff = ftell(fp_in);
430 	    break;
431 	}   /* End if */
432 
433 }   /* End of readpages */
434 
435 /*****************************************************************************/
436 
writepages()437 writepages()
438 
439 {
440 
441     int		i, j, k;		/* loop indices */
442 
443 /*
444  *
445  * Goes through the pages[] array, usually from the bottom up, and writes out all
446  * the pages. Documents that print more than one form per page cause things to get
447  * a little more complicated. Each physical page has to have its subpages printed
448  * in the correct order, and we have to build a few dummy subpages for the last
449  * (and now first) sheet of paper, otherwise things will only occasionally work.
450  *
451  */
452 
453     fprintf(fp_out, "%s", endprolog);
454 
455     if ( noreverse == FALSE )		/* fill out the first page */
456 	for ( i = (forms - next_page % forms) % forms; i > 0; i--, next_page++ )
457 	    pages[next_page].empty = TRUE;
458     else forms = next_page;		/* turns reversal off in next loop */
459 
460     for ( i = next_page - forms; i >= 0; i -= forms )
461 	for ( j = i, k = 0; k < forms; j++, k++ )
462 	    if ( pages[j].empty == TRUE ) {
463 		if ( ignoreversion == TRUE || version > 3.1 ) {
464 		    fprintf(fp_out, "%s 0 0\n", PAGE);
465 		    fprintf(fp_out, "/saveobj save def\n");
466 		    fprintf(fp_out, "showpage\n");
467 		    fprintf(fp_out, "saveobj restore\n");
468 		    fprintf(fp_out, "%s 0 0\n", ENDPAGE);
469 		} else {
470 		    fprintf(fp_out, "%s 0 0\n", PAGE);
471 		    fprintf(fp_out, "save showpage restore\n");
472 		    fprintf(fp_out, "%s 0 0\n", ENDPAGE);
473 		}   /* End else */
474 	    } else copypage(pages[j].start, pages[j].stop);
475 
476 }   /* End of writepages */
477 
478 /*****************************************************************************/
479 
copypage(start,stop)480 copypage(start, stop)
481 
482     long	start;			/* starting from this offset */
483     long	stop;			/* and ending here */
484 
485 {
486 
487 /*
488  *
489  * Copies the page beginning at offset start and ending at stop to the output
490  * file. Global definitions are skipped since they've already been added to the
491  * prologue.
492  *
493  */
494 
495     fseek(fp_in, start, 0);
496 
497     while ( ftell(fp_in) < stop && fgets(buf, sizeof(buf), fp_in) != NULL )
498 	if ( buf[0] == '%' && strcmp(buf, BEGINGLOBAL) == 0 )
499 	    while ( fgets(buf, sizeof(buf), fp_in) != NULL && strcmp(buf, ENDGLOBAL) != 0 ) ;
500 	else fprintf(fp_out, "%s", buf);
501 
502 }   /* End of copypage */
503 
504 /*****************************************************************************/
505 
trailer()506 trailer()
507 
508 {
509 
510 /*
511  *
512  * Makes sure everything from the TRAILER string to EOF is copied to the output
513  * file.
514  *
515  */
516 
517     if ( endoff > 0 )  {
518 	fprintf(fp_out, "%s", TRAILER);
519 	fseek(fp_in, endoff, 0);
520 	while ( fgets(buf, sizeof(buf), fp_in) != NULL )
521 	    fprintf(fp_out, "%s", buf);
522     }	/* End if */
523 
524 }   /* End of trailer */
525 
526 /*****************************************************************************/
527 
528