xref: /netbsd-src/external/gpl2/groff/dist/contrib/pdfmark/pdfroff.sh (revision 89a07cf815a29524268025a1139fac4c5190f765)
1#! /bin/sh
2# ------------------------------------------------------------------------------
3#
4# Function: Format PDF Output from groff Markup
5#
6# Copyright (C) 2005, Free Software Foundation, Inc.
7# Written by Keith Marshall (keith.d.marshall@ntlworld.com)
8#
9# This file is part of groff.
10#
11# groff is free software; you can redistribute it and/or modify it under
12# the terms of the GNU General Public License as published by the Free
13# Software Foundation; either version 2, or (at your option) any later
14# version.
15#
16# groff is distributed in the hope that it will be useful, but WITHOUT ANY
17# WARRANTY; without even the implied warranty of MERCHANTABILITY or
18# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
19# for more details.
20#
21# You should have received a copy of the GNU General Public License along
22# with groff; see the file COPYING.  If not, write to the Free Software
23# Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA.
24#
25# ------------------------------------------------------------------------------
26#
27# Set up an identifier for the NULL device.
28# In most cases "/dev/null" will be correct, but some shells on
29# MS-DOS/MS-Windows systems may require us to use "NUL".
30#
31  NULLDEV="/dev/null"
32  test -c $NULLDEV || NULLDEV="NUL"
33#
34# Set up the command name to use in diagnostic messages.
35# (We can't assume we have 'basename', so use the full path if required.
36#  Also use the 'exec 2>...' workaround for a bug in Cygwin's 'ash').
37#
38  CMD=`exec 2>$NULLDEV; basename $0` || CMD=$0
39#
40# To ensure that prerequisite helper programs are available, and are
41# executable, a [fairly] portable method of detecting such programs is
42# provided by function `searchpath'.
43#
44  searchpath(){
45  #
46  # Usage:  searchpath progname path
47  #
48    IFS="${PATH_SEPARATOR-":"}" prog=':'
49    for dir in $2
50    do
51      for ext in '' '.exe'
52      #
53      # try `progname' with all well known extensions
54      # (e.g. Win32 may require `progname.exe')
55      #
56      do
57        try="$dir/$1$ext"
58        test -f "$try" && test -x "$try" && prog="$try" && break
59      done
60      test "$prog" = ":" || break
61    done
62    echo "$prog"
63  }
64# @PATH_SEARCH_SETUP@
65#
66# We need both 'grep' and 'sed' programs, to parse script options,
67# and we also need 'cat', to display help and some error messages,
68# so ensure they are all installed, before we continue.
69#
70  CAT=`searchpath cat "$PATH"`
71  GREP=`searchpath grep "$PATH"`
72  SED=`searchpath sed "$PATH"`
73#
74# Another fundamental requirement is the 'groff' program itself;
75# we MUST use a 'groff' program located in 'GROFF_BIN_DIR', if this
76# is specified; if not, we will search 'GROFF_BIN_PATH', only falling
77# back to a 'PATH' search, if neither of these is specified.
78#
79  if test -n "$GROFF_BIN_DIR"
80  then
81    GPATH=GROFF_BIN_DIR
82    GROFF=`searchpath groff "$GROFF_BIN_DIR"`
83#
84  elif test -n "$GROFF_BIN_PATH"
85  then
86    GPATH=GROFF_BIN_PATH
87    GROFF=`searchpath groff "$GROFF_BIN_PATH"`
88#
89  else
90    GPATH=PATH
91    GROFF=`searchpath groff "$PATH"`
92  fi
93#
94# If one or more of these is missing, diagnose and bail out.
95#
96  NO='' NOPROG="$CMD: installation problem: cannot find program"
97  test "$CAT" = ":" && echo >&2 "$NOPROG 'cat' in PATH" && NO="$NO 'cat'"
98  test "$GREP" = ":" && echo >&2 "$NOPROG 'grep' in PATH" && NO="$NO 'grep'"
99  test "$GROFF" = ":" && echo >&2 "$NOPROG 'groff' in $GPATH" && NO="$NO 'groff'"
100  test "$SED" = ":" && echo >&2 "$NOPROG 'sed' in PATH" && NO="$NO 'sed'"
101  if test -n "$NO"
102  then
103    set $NO
104    test $# -gt 1 && NO="s" IS="are" || NO='' IS="is"
105    while test $# -gt 0
106    do
107      test $# -gt 2 && NO="$NO $1,"
108      test $# -eq 2 && NO="$NO $1 and" && shift
109      test $# -lt 2 && NO="$NO $1"
110      shift
111    done
112    $CAT >&2 <<-ETX
113
114	*** FATAL INSTALLATION ERROR ***
115
116	The program$NO $IS required by '$CMD',
117	but cannot be found; '$CMD' is unable to continue.
118
119	ETX
120    exit 1
121  fi
122#
123# Set up temporary/intermediate file locations.
124#
125  WRKFILE=${GROFF_TMPDIR=${TMPDIR-${TMP-${TEMP-"."}}}}/pdf$$.tmp
126#
127  REFCOPY=${GROFF_TMPDIR}/pdf$$.cmp
128  REFFILE=${GROFF_TMPDIR}/pdf$$.ref
129#
130  CS_DATA=""
131  TC_DATA=${GROFF_TMPDIR}/pdf$$.tc
132  BD_DATA=${GROFF_TMPDIR}/pdf$$.ps
133#
134# Set a trap, to delete temporary files on exit.
135# (FIXME: may want to include other signals, in released version).
136#
137  trap "rm -f ${GROFF_TMPDIR}/pdf$$.*" 0
138#
139# Initialise 'groff' format control settings,
140# to discriminate table of contents and document body formatting passes.
141#
142  TOC_FORMAT="-rPHASE=1"
143  BODY_FORMAT="-rPHASE=2"
144#
145  LONGOPTS="
146    help	reference-dictionary	no-reference-dictionary
147    stylesheet	pdf-output		no-pdf-output
148    version	report-progress		no-toc-relocation
149    "
150# Parse the command line, to identify 'pdfroff' specific options.
151# Collect all other parameters into new argument and file lists,
152# to be passed on to 'groff', enforcing the '-Tps' option.
153#
154  DIFF="" STREAM="" INPUT_FILES=""
155  SHOW_VERSION="" GROFF_STYLE="$GROFF -Tps"
156  while test $# -gt 0
157  do
158    case "$1" in
159#
160#     Long options must be processed locally ...
161#
162      --*)
163#
164#          First identify, matching any abbreviation to its full form.
165#
166           MATCH="" OPTNAME=`IFS==; set dummy $1; echo $2`
167           for OPT in $LONGOPTS
168           do
169             MATCH="$MATCH"`echo --$OPT | $GREP "^$OPTNAME"`
170           done
171#
172#          For options in the form --option=value
173#          capture any specified value into $OPTARG.
174#
175	   OPTARG=`echo $1 | $SED -n s?"^${OPTNAME}="??p`
176#
177#          Perform case specific processing for matched option ...
178#
179           case "$MATCH" in
180
181             --help)
182               $CAT >&2 <<-ETX
183		Usage: $CMD [-option ...] [--long-option ...] [file ...]
184
185		Options:
186		  -h
187		  --help
188		 	Display this usage summary, and exit.
189
190		  -v
191		  --version
192		 	Display a version identification message and exit.
193
194		  --report-progress
195		  	Enable console messages, indicating the progress of the
196		 	PDF document formatting process.
197
198		  --pdf-output=name
199		  	Write the PDF output stream to file 'name'; if this option
200		 	is unspecified, standard output is used for PDF output.
201
202		  --no-pdf-output
203		 	Suppress the generation of PDF output entirely; use this
204		 	with the --reference-dictionary option, if processing a
205		 	document stream to produce only a reference dictionary.
206
207		  --no-reference-dictionary
208		 	Suppress the generation of a '$CMD' reference dictionary
209		 	for the PDF document.  Normally '$CMD' will create a
210		 	reference dictionary, at the start of document processing;
211		 	this option can accelerate processing, if it is known in
212		 	advance, that no reference dictionary is required.
213
214		  --reference-dictionary=name
215		 	Save the document reference dictionary in file 'name'.
216		 	If 'name' already exists, when processing commences, it
217		 	will be used as the base case, from which the updated
218		 	dictionary will be derived.  If this option is not used,
219		 	then the reference dictionary, created during the normal
220		 	execution of '$CMD', will be deleted on completion of
221		 	document processing.
222
223		  --stylesheet=name
224		  	Use the file 'name' as a 'groff' style sheet, to control
225		 	the appearance of the document's front cover section.  If
226		 	this option is not specified, then no special formatting
227		 	is applied, to create a front cover section.
228
229		  --no-toc-relocation
230		 	Suppress the multiple pass 'groff' processing, which is
231		 	normally required to position the table of contents at the
232		 	start of a PDF document.
233
234		ETX
235               exit 0
236               ;;
237
238             --version)
239	       GROFF_STYLE="$GROFF_STYLE \"$1\""
240               SHOW_VERSION="GNU pdfroff (groff) version @VERSION@"
241               ;;
242
243             --report-progress)
244               SHOW_PROGRESS=echo
245               ;;
246
247             --pdf-output)
248	       PDF_OUTPUT="$OPTARG"
249	       ;;
250
251	     --no-pdf-output)
252	       PDF_OUTPUT="$NULLDEV"
253	       ;;
254
255             --reference-dictionary)
256               REFFILE="$OPTARG"
257               ;;
258
259             --no-reference-dictionary)
260               AWK=":" DIFF=":" REFFILE="$NULLDEV" REFCOPY="$NULLDEV"
261               ;;
262
263             --stylesheet)
264               STYLESHEET="$OPTARG" CS_DATA=${GROFF_TMPDIR}/pdf$$.cs
265               ;;
266
267	     --no-toc-relocation)
268	       TC_DATA="" TOC_FORMAT="" BODY_FORMAT=""
269	       ;;
270#
271#          any other non-null match must have matched more than one defined case,
272#          so report the ambiguity, and bail out.
273#
274             --*)
275               echo >&2 "$CMD: ambiguous abbreviation in option '$1'"
276	       exit 1
277               ;;
278#
279#          while no match at all simply represents an undefined case.
280#
281             *)
282               echo >&2 "$CMD: unknown option '$1'"
283	       exit 1
284               ;;
285           esac
286           ;;
287#
288#     A solitary hyphen, as an argument, means "stream STDIN through groff",
289#     while the "-i" option means "append STDIN stream to specified input files",
290#     so set up a mechanism to achieve this, for ALL 'groff' passes.
291#
292      - | -i*)
293	   STREAM="$CAT ${GROFF_TMPDIR}/pdf$$.in |"
294	   test "$1" = "-" && INPUT_FILES="$INPUT_FILES $1" \
295	     || GROFF_STYLE="$GROFF_STYLE $1"
296	   ;;
297#
298#     Those standard options which expect an argument, but are specified with
299#     an intervening space, between flag and argument, must be reparsed, so we
300#     can trap illegal use of '-T dev', or missing input files.
301#
302      -[dfFILmMnoPrTwW])
303           OPTNAME="$1"
304	   shift; set reparse "$OPTNAME$@"
305	   ;;
306#
307#     Among standard options, '-Tdev' is treated as a special case.
308#     '-Tps' is automatically enforced, so if specified, is silently ignored.
309#
310      -Tps) ;;
311#
312#     No other '-Tdev' option is permitted.
313#
314      -T*) echo >&2 "$CMD: option '$1' is incompatible with PDF output"
315           exit 1
316	   ;;
317#
318#     '-h' and '-v' options redirect to their equivalent long forms ...
319#
320      -h*) set redirect --help
321           ;;
322#
323      -v*) shift; set redirect --version "$@"
324           ;;
325#
326#     All other standard options are simply passed through to 'groff',
327#     with no validation beforehand.
328#
329      -*)  GROFF_STYLE="$GROFF_STYLE \"$1\""
330           ;;
331#
332#     All non-option arguments are considered as possible input file names,
333#     and are passed on to 'groff', unaltered.
334#
335      *)   INPUT_FILES="$INPUT_FILES \"$1\""
336           ;;
337    esac
338    shift
339  done
340#
341# If the '-v' or '--version' option was specified,
342# then we simply emulate the behaviour of 'groff', with this option,
343# and quit.
344#
345  if test -n "$SHOW_VERSION"
346  then
347    echo >&2 "$SHOW_VERSION"
348    echo >&2; eval $GROFF_STYLE $INPUT_FILES
349    exit $?
350  fi
351#
352# Establish how to invoke 'echo', suppressing the terminating newline.
353# (Adapted from 'autoconf' code, as found in 'configure' scripts).
354#
355  case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
356    *c*,*-n*)  n=''   c=''   ;;
357    *c*)       n='-n' c=''   ;;
358    *)         n=''   c='\c' ;;
359  esac
360#
361# If STDIN is specified among the input files,
362# or if no input files are specified, then we need to capture STDIN,
363# so we can replay it into each 'groff' processing pass.
364#
365  test -z "$INPUT_FILES" && STREAM="$CAT ${GROFF_TMPDIR}/pdf$$.in |"
366  test -n "$STREAM" && $CAT > ${GROFF_TMPDIR}/pdf$$.in
367#
368# Unless reference resolution is explicitly suppressed,
369# we initiate it by touching the cross reference dictionary file,
370# and initialise the comparator, to kickstart the reference resolver loop.
371#
372  SAY=":"
373  if test -z "$DIFF"
374  then
375    >> $REFFILE
376    echo kickstart > $REFCOPY
377    test "${SHOW_PROGRESS+"set"}" = "set" && SAY=echo
378#
379#   In order to correctly resolve 'pdfmark' references,
380#   we need to have both the 'awk' and 'diff' programs available.
381#
382    NO=''
383    if test -n "$GROFF_AWK_INTERPRETER"
384    then
385      AWK="$GROFF_AWK_INTERPRETER"
386      test -f "$AWK" && test -x "$AWK" || AWK=":"
387    else
388      for prog in @GROFF_AWK_INTERPRETERS@
389      do
390	AWK=`searchpath $prog "$PATH"`
391	test "$AWK" = ":" || break
392      done
393    fi
394    DIFF=`searchpath diff "$PATH"`
395    test "$AWK" = ":" && echo >&2 "$NOPROG 'awk' in PATH" && NO="$NO 'awk'"
396    test "$DIFF" = ":" && echo >&2 "$NOPROG 'diff' in PATH" && NO="$NO 'diff'"
397    if test -n "$NO"
398    then
399      set $NO
400      SAY=":" AWK=":" DIFF=":"
401      test $# -gt 1 && NO="s $1 and $2 are" || NO=" $1 is"
402      $CAT >&2 <<-ETX
403
404	*** WARNING ***
405
406	The program$NO required, but cannot be found;
407	consequently, '$CMD' is unable to resolve 'pdfmark' references.
408
409	Document processing will continue, but no 'pdfmark' reference dictionary
410	will be compiled; if any 'pdfmark' reference appears in the resulting PDF
411	document, the formatting may not be correct.
412
413	ETX
414    fi
415  fi
416#
417# Run the multi-pass 'pdfmark' reference resolver loop ...
418#
419  $SAY >&2 $n Resolving references ..$c
420  until $DIFF $REFCOPY $REFFILE 1>$NULLDEV 2>&1
421  do
422#
423#   until all references are resolved, to yield consistent values
424#   in each of two consecutive passes, or until it seems that no consistent
425#   resolution is achievable.
426#
427    $SAY >&2 $n .$c
428    PASS_INDICATOR="${PASS_INDICATOR}."
429    if test "$PASS_INDICATOR" = "...."
430    then
431#
432#     More than three passes required indicates a probable inconsistency
433#     in the source document; diagnose, and bail out.
434#
435      $SAY >&2 " failed"
436      $CAT >&2 <<-ETX
437	$CMD: unable to resolve references consistently after three passes
438	$CMD: the source document may exhibit instability about the reference(s) ...
439	ETX
440#
441#     Report the unresolved references, as a diff between the two pass files,
442#     preferring 'unified' or 'context' diffs, when available
443#
444      DIFFOPT=''
445      $DIFF -c0 $NULLDEV $NULLDEV 1>$NULLDEV 2>&1 && DIFFOPT='-c0'
446      $DIFF -u0 $NULLDEV $NULLDEV 1>$NULLDEV 2>&1 && DIFFOPT='-u0'
447      $DIFF >&2 $DIFFOPT $REFCOPY $REFFILE
448      exit 1
449    fi
450#
451#   Replace the comparison file copy from any previous pass,
452#   with the most recently updated copy of the reference dictionary.
453#   (Some versions of 'mv' may not support overwriting of an existing file,
454#    so remove the old comparison file first).
455#
456    rm -f $REFCOPY
457    mv $REFFILE $REFCOPY
458#
459#   Run 'groff' and 'awk', to identify reference marks in the document source,
460#   filtering them into the reference dictionary; discard incomplete 'groff' output
461#   at this stage.
462#
463    eval $STREAM $GROFF_STYLE -Z 1>$NULLDEV 2>$WRKFILE $REFCOPY $INPUT_FILES
464    $AWK '/^gropdf-info:href/ {$1 = ".pdfhref D -N"; print}' $WRKFILE > $REFFILE
465  done
466  $SAY >&2 " done"
467#
468# To get to here ...
469# We MUST have resolved all 'pdfmark' references, such that the content of the
470# updated reference dictionary file EXACTLY matches the last saved copy.
471#
472# If PDF output has been suppressed, then there is nothing more to do.
473#
474  test "$PDF_OUTPUT" = "$NULLDEV" && exit 0
475#
476# We are now ready to start preparing the intermediate PostScript files,
477# from which the PDF output will be compiled -- but before proceding further ...
478# let's make sure we have a GhostScript interpreter to convert them!
479#
480  if test -n "$GROFF_GHOSTSCRIPT_INTERPRETER"
481  then
482    GS="$GROFF_GHOSTSCRIPT_INTERPRETER"
483    test -f "$GS" && test -x "$GS" || GS=":"
484  else
485    for prog in @GROFF_GHOSTSCRIPT_INTERPRETERS@
486    do
487      GS=`searchpath $prog "$PATH"`
488      test "$GS" = ":" || break
489    done
490  fi
491#
492# If we could not find a GhostScript interpreter, then we can do no more.
493#
494  if test "$GS" = ":"
495  then
496    echo >&2 "$CMD: installation problem: cannot find GhostScript interpreter"
497    $CAT >&2 <<-ETX
498
499	*** FATAL INSTALLATION ERROR ***
500
501	'$CMD' requires a GhostScript interpreter to convert PostScript to PDF.
502	Since you do not appear to have one installed, '$CMD' connot continue.
503
504	ETX
505    exit 1
506  fi
507#
508# We now extend the local copy of the reference dictionary file,
509# to create a full 'pdfmark' reference map for the document ...
510#
511  $AWK '/^grohtml-info/ {print ".pdfhref Z", $2, $3, $4}' $WRKFILE >> $REFCOPY
512#
513# Re-enable progress reporting, if necessary ...
514# (Missing 'awk' or 'diff' may have disabled it, to avoid display
515#  of spurious messages associated with reference resolution).
516#
517  test "${SHOW_PROGRESS+"set"}" = "set" && SAY=echo
518#
519# If a document cover style sheet is specified ...
520# then we run a special formatting pass, to create a cover section file.
521#
522  if test -n "$STYLESHEET"
523  then
524    DOT='^\.[ 	]*'
525    CS_MACRO=${CS_MACRO-"CS"} CE_MACRO=${CE_MACRO-"CE"}
526    $SAY >&2 $n "Formatting document ... front cover section ..$c"
527    CS_FILTER="$STREAM $SED -n '/${DOT}${CS_MACRO}/,/${DOT}${CE_MACRO}/p'"
528    eval $CS_FILTER $INPUT_FILES | eval $GROFF_STYLE $STYLESHEET - > $CS_DATA
529    $SAY >&2 ". done"
530  fi
531#
532# If table of contents relocation is to be performed (it is, by default),
533# then we run an extra 'groff' pass, to format a TOC intermediate file.
534#
535  if test -n "$TC_DATA"
536  then
537    $SAY >&2 $n "Formatting document ... table of contents ..$c"
538    eval $STREAM $GROFF_STYLE $TOC_FORMAT $REFCOPY $INPUT_FILES > $TC_DATA
539    $SAY >&2 ". done"
540  fi
541#
542# In all cases, a final 'groff' pass is required, to format the document body.
543#
544  $SAY >&2 $n "Formatting document ... body section ..$c"
545  eval $STREAM $GROFF_STYLE $BODY_FORMAT $REFCOPY $INPUT_FILES > $BD_DATA
546  $SAY >&2 ". done"
547#
548# Finally ...
549# Invoke GhostScript as a PDF writer, to bind all of the generated
550# PostScript intermediate files into a single PDF output file.
551#
552  $SAY >&2 $n "Writing PDF output ..$c"
553  PDFWRITE="$GS -dQUIET -dBATCH -dNOPAUSE -sDEVICE=pdfwrite"
554#
555# (This 'sed' script is a hack, to eliminate redundant blank pages).
556#
557  $SED '
558    :again
559      /%%EndPageSetup/b finish
560      /%%Page:/{
561	N
562	b again
563      }
564      b
565    :finish
566      N
567      /^%%Page:.*0 *Cg *EP/d
568    ' $TC_DATA $BD_DATA | $PDFWRITE -sOutputFile=${PDF_OUTPUT-"-"} $CS_DATA -
569  $SAY >&2 ". done"
570#
571# ------------------------------------------------------------------------------
572# RCSfile: pdfroff.sh,v $ $Revision: 1.7 : end of file
573