1#! /bin/sh 2# ------------------------------------------------------------------------------ 3# 4# Function: Format PDF Output from groff Markup 5# 6# Copyright (C) 2005, Free Software Foundation, Inc. 7# Written by Keith Marshall (keith.d.marshall@ntlworld.com) 8# 9# This file is part of groff. 10# 11# groff is free software; you can redistribute it and/or modify it under 12# the terms of the GNU General Public License as published by the Free 13# Software Foundation; either version 2, or (at your option) any later 14# version. 15# 16# groff is distributed in the hope that it will be useful, but WITHOUT ANY 17# WARRANTY; without even the implied warranty of MERCHANTABILITY or 18# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19# for more details. 20# 21# You should have received a copy of the GNU General Public License along 22# with groff; see the file COPYING. If not, write to the Free Software 23# Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. 24# 25# ------------------------------------------------------------------------------ 26# 27# Set up an identifier for the NULL device. 28# In most cases "/dev/null" will be correct, but some shells on 29# MS-DOS/MS-Windows systems may require us to use "NUL". 30# 31 NULLDEV="/dev/null" 32 test -c $NULLDEV || NULLDEV="NUL" 33# 34# Set up the command name to use in diagnostic messages. 35# (We can't assume we have 'basename', so use the full path if required. 36# Also use the 'exec 2>...' workaround for a bug in Cygwin's 'ash'). 37# 38 CMD=`exec 2>$NULLDEV; basename $0` || CMD=$0 39# 40# To ensure that prerequisite helper programs are available, and are 41# executable, a [fairly] portable method of detecting such programs is 42# provided by function `searchpath'. 43# 44 searchpath(){ 45 # 46 # Usage: searchpath progname path 47 # 48 IFS="${PATH_SEPARATOR-":"}" prog=':' 49 for dir in $2 50 do 51 for ext in '' '.exe' 52 # 53 # try `progname' with all well known extensions 54 # (e.g. Win32 may require `progname.exe') 55 # 56 do 57 try="$dir/$1$ext" 58 test -f "$try" && test -x "$try" && prog="$try" && break 59 done 60 test "$prog" = ":" || break 61 done 62 echo "$prog" 63 } 64# @PATH_SEARCH_SETUP@ 65# 66# We need both 'grep' and 'sed' programs, to parse script options, 67# and we also need 'cat', to display help and some error messages, 68# so ensure they are all installed, before we continue. 69# 70 CAT=`searchpath cat "$PATH"` 71 GREP=`searchpath grep "$PATH"` 72 SED=`searchpath sed "$PATH"` 73# 74# Another fundamental requirement is the 'groff' program itself; 75# we MUST use a 'groff' program located in 'GROFF_BIN_DIR', if this 76# is specified; if not, we will search 'GROFF_BIN_PATH', only falling 77# back to a 'PATH' search, if neither of these is specified. 78# 79 if test -n "$GROFF_BIN_DIR" 80 then 81 GPATH=GROFF_BIN_DIR 82 GROFF=`searchpath groff "$GROFF_BIN_DIR"` 83# 84 elif test -n "$GROFF_BIN_PATH" 85 then 86 GPATH=GROFF_BIN_PATH 87 GROFF=`searchpath groff "$GROFF_BIN_PATH"` 88# 89 else 90 GPATH=PATH 91 GROFF=`searchpath groff "$PATH"` 92 fi 93# 94# If one or more of these is missing, diagnose and bail out. 95# 96 NO='' NOPROG="$CMD: installation problem: cannot find program" 97 test "$CAT" = ":" && echo >&2 "$NOPROG 'cat' in PATH" && NO="$NO 'cat'" 98 test "$GREP" = ":" && echo >&2 "$NOPROG 'grep' in PATH" && NO="$NO 'grep'" 99 test "$GROFF" = ":" && echo >&2 "$NOPROG 'groff' in $GPATH" && NO="$NO 'groff'" 100 test "$SED" = ":" && echo >&2 "$NOPROG 'sed' in PATH" && NO="$NO 'sed'" 101 if test -n "$NO" 102 then 103 set $NO 104 test $# -gt 1 && NO="s" IS="are" || NO='' IS="is" 105 while test $# -gt 0 106 do 107 test $# -gt 2 && NO="$NO $1," 108 test $# -eq 2 && NO="$NO $1 and" && shift 109 test $# -lt 2 && NO="$NO $1" 110 shift 111 done 112 $CAT >&2 <<-ETX 113 114 *** FATAL INSTALLATION ERROR *** 115 116 The program$NO $IS required by '$CMD', 117 but cannot be found; '$CMD' is unable to continue. 118 119 ETX 120 exit 1 121 fi 122# 123# Set up temporary/intermediate file locations. 124# 125 WRKFILE=${GROFF_TMPDIR=${TMPDIR-${TMP-${TEMP-"."}}}}/pdf$$.tmp 126# 127 REFCOPY=${GROFF_TMPDIR}/pdf$$.cmp 128 REFFILE=${GROFF_TMPDIR}/pdf$$.ref 129# 130 CS_DATA="" 131 TC_DATA=${GROFF_TMPDIR}/pdf$$.tc 132 BD_DATA=${GROFF_TMPDIR}/pdf$$.ps 133# 134# Set a trap, to delete temporary files on exit. 135# (FIXME: may want to include other signals, in released version). 136# 137 trap "rm -f ${GROFF_TMPDIR}/pdf$$.*" 0 138# 139# Initialise 'groff' format control settings, 140# to discriminate table of contents and document body formatting passes. 141# 142 TOC_FORMAT="-rPHASE=1" 143 BODY_FORMAT="-rPHASE=2" 144# 145 LONGOPTS=" 146 help reference-dictionary no-reference-dictionary 147 stylesheet pdf-output no-pdf-output 148 version report-progress no-toc-relocation 149 " 150# Parse the command line, to identify 'pdfroff' specific options. 151# Collect all other parameters into new argument and file lists, 152# to be passed on to 'groff', enforcing the '-Tps' option. 153# 154 DIFF="" STREAM="" INPUT_FILES="" 155 SHOW_VERSION="" GROFF_STYLE="$GROFF -Tps" 156 while test $# -gt 0 157 do 158 case "$1" in 159# 160# Long options must be processed locally ... 161# 162 --*) 163# 164# First identify, matching any abbreviation to its full form. 165# 166 MATCH="" OPTNAME=`IFS==; set dummy $1; echo $2` 167 for OPT in $LONGOPTS 168 do 169 MATCH="$MATCH"`echo --$OPT | $GREP "^$OPTNAME"` 170 done 171# 172# For options in the form --option=value 173# capture any specified value into $OPTARG. 174# 175 OPTARG=`echo $1 | $SED -n s?"^${OPTNAME}="??p` 176# 177# Perform case specific processing for matched option ... 178# 179 case "$MATCH" in 180 181 --help) 182 $CAT >&2 <<-ETX 183 Usage: $CMD [-option ...] [--long-option ...] [file ...] 184 185 Options: 186 -h 187 --help 188 Display this usage summary, and exit. 189 190 -v 191 --version 192 Display a version identification message and exit. 193 194 --report-progress 195 Enable console messages, indicating the progress of the 196 PDF document formatting process. 197 198 --pdf-output=name 199 Write the PDF output stream to file 'name'; if this option 200 is unspecified, standard output is used for PDF output. 201 202 --no-pdf-output 203 Suppress the generation of PDF output entirely; use this 204 with the --reference-dictionary option, if processing a 205 document stream to produce only a reference dictionary. 206 207 --no-reference-dictionary 208 Suppress the generation of a '$CMD' reference dictionary 209 for the PDF document. Normally '$CMD' will create a 210 reference dictionary, at the start of document processing; 211 this option can accelerate processing, if it is known in 212 advance, that no reference dictionary is required. 213 214 --reference-dictionary=name 215 Save the document reference dictionary in file 'name'. 216 If 'name' already exists, when processing commences, it 217 will be used as the base case, from which the updated 218 dictionary will be derived. If this option is not used, 219 then the reference dictionary, created during the normal 220 execution of '$CMD', will be deleted on completion of 221 document processing. 222 223 --stylesheet=name 224 Use the file 'name' as a 'groff' style sheet, to control 225 the appearance of the document's front cover section. If 226 this option is not specified, then no special formatting 227 is applied, to create a front cover section. 228 229 --no-toc-relocation 230 Suppress the multiple pass 'groff' processing, which is 231 normally required to position the table of contents at the 232 start of a PDF document. 233 234 ETX 235 exit 0 236 ;; 237 238 --version) 239 GROFF_STYLE="$GROFF_STYLE \"$1\"" 240 SHOW_VERSION="GNU pdfroff (groff) version @VERSION@" 241 ;; 242 243 --report-progress) 244 SHOW_PROGRESS=echo 245 ;; 246 247 --pdf-output) 248 PDF_OUTPUT="$OPTARG" 249 ;; 250 251 --no-pdf-output) 252 PDF_OUTPUT="$NULLDEV" 253 ;; 254 255 --reference-dictionary) 256 REFFILE="$OPTARG" 257 ;; 258 259 --no-reference-dictionary) 260 AWK=":" DIFF=":" REFFILE="$NULLDEV" REFCOPY="$NULLDEV" 261 ;; 262 263 --stylesheet) 264 STYLESHEET="$OPTARG" CS_DATA=${GROFF_TMPDIR}/pdf$$.cs 265 ;; 266 267 --no-toc-relocation) 268 TC_DATA="" TOC_FORMAT="" BODY_FORMAT="" 269 ;; 270# 271# any other non-null match must have matched more than one defined case, 272# so report the ambiguity, and bail out. 273# 274 --*) 275 echo >&2 "$CMD: ambiguous abbreviation in option '$1'" 276 exit 1 277 ;; 278# 279# while no match at all simply represents an undefined case. 280# 281 *) 282 echo >&2 "$CMD: unknown option '$1'" 283 exit 1 284 ;; 285 esac 286 ;; 287# 288# A solitary hyphen, as an argument, means "stream STDIN through groff", 289# while the "-i" option means "append STDIN stream to specified input files", 290# so set up a mechanism to achieve this, for ALL 'groff' passes. 291# 292 - | -i*) 293 STREAM="$CAT ${GROFF_TMPDIR}/pdf$$.in |" 294 test "$1" = "-" && INPUT_FILES="$INPUT_FILES $1" \ 295 || GROFF_STYLE="$GROFF_STYLE $1" 296 ;; 297# 298# Those standard options which expect an argument, but are specified with 299# an intervening space, between flag and argument, must be reparsed, so we 300# can trap illegal use of '-T dev', or missing input files. 301# 302 -[dfFILmMnoPrTwW]) 303 OPTNAME="$1" 304 shift; set reparse "$OPTNAME$@" 305 ;; 306# 307# Among standard options, '-Tdev' is treated as a special case. 308# '-Tps' is automatically enforced, so if specified, is silently ignored. 309# 310 -Tps) ;; 311# 312# No other '-Tdev' option is permitted. 313# 314 -T*) echo >&2 "$CMD: option '$1' is incompatible with PDF output" 315 exit 1 316 ;; 317# 318# '-h' and '-v' options redirect to their equivalent long forms ... 319# 320 -h*) set redirect --help 321 ;; 322# 323 -v*) shift; set redirect --version "$@" 324 ;; 325# 326# All other standard options are simply passed through to 'groff', 327# with no validation beforehand. 328# 329 -*) GROFF_STYLE="$GROFF_STYLE \"$1\"" 330 ;; 331# 332# All non-option arguments are considered as possible input file names, 333# and are passed on to 'groff', unaltered. 334# 335 *) INPUT_FILES="$INPUT_FILES \"$1\"" 336 ;; 337 esac 338 shift 339 done 340# 341# If the '-v' or '--version' option was specified, 342# then we simply emulate the behaviour of 'groff', with this option, 343# and quit. 344# 345 if test -n "$SHOW_VERSION" 346 then 347 echo >&2 "$SHOW_VERSION" 348 echo >&2; eval $GROFF_STYLE $INPUT_FILES 349 exit $? 350 fi 351# 352# Establish how to invoke 'echo', suppressing the terminating newline. 353# (Adapted from 'autoconf' code, as found in 'configure' scripts). 354# 355 case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in 356 *c*,*-n*) n='' c='' ;; 357 *c*) n='-n' c='' ;; 358 *) n='' c='\c' ;; 359 esac 360# 361# If STDIN is specified among the input files, 362# or if no input files are specified, then we need to capture STDIN, 363# so we can replay it into each 'groff' processing pass. 364# 365 test -z "$INPUT_FILES" && STREAM="$CAT ${GROFF_TMPDIR}/pdf$$.in |" 366 test -n "$STREAM" && $CAT > ${GROFF_TMPDIR}/pdf$$.in 367# 368# Unless reference resolution is explicitly suppressed, 369# we initiate it by touching the cross reference dictionary file, 370# and initialise the comparator, to kickstart the reference resolver loop. 371# 372 SAY=":" 373 if test -z "$DIFF" 374 then 375 >> $REFFILE 376 echo kickstart > $REFCOPY 377 test "${SHOW_PROGRESS+"set"}" = "set" && SAY=echo 378# 379# In order to correctly resolve 'pdfmark' references, 380# we need to have both the 'awk' and 'diff' programs available. 381# 382 NO='' 383 if test -n "$GROFF_AWK_INTERPRETER" 384 then 385 AWK="$GROFF_AWK_INTERPRETER" 386 test -f "$AWK" && test -x "$AWK" || AWK=":" 387 else 388 for prog in @GROFF_AWK_INTERPRETERS@ 389 do 390 AWK=`searchpath $prog "$PATH"` 391 test "$AWK" = ":" || break 392 done 393 fi 394 DIFF=`searchpath diff "$PATH"` 395 test "$AWK" = ":" && echo >&2 "$NOPROG 'awk' in PATH" && NO="$NO 'awk'" 396 test "$DIFF" = ":" && echo >&2 "$NOPROG 'diff' in PATH" && NO="$NO 'diff'" 397 if test -n "$NO" 398 then 399 set $NO 400 SAY=":" AWK=":" DIFF=":" 401 test $# -gt 1 && NO="s $1 and $2 are" || NO=" $1 is" 402 $CAT >&2 <<-ETX 403 404 *** WARNING *** 405 406 The program$NO required, but cannot be found; 407 consequently, '$CMD' is unable to resolve 'pdfmark' references. 408 409 Document processing will continue, but no 'pdfmark' reference dictionary 410 will be compiled; if any 'pdfmark' reference appears in the resulting PDF 411 document, the formatting may not be correct. 412 413 ETX 414 fi 415 fi 416# 417# Run the multi-pass 'pdfmark' reference resolver loop ... 418# 419 $SAY >&2 $n Resolving references ..$c 420 until $DIFF $REFCOPY $REFFILE 1>$NULLDEV 2>&1 421 do 422# 423# until all references are resolved, to yield consistent values 424# in each of two consecutive passes, or until it seems that no consistent 425# resolution is achievable. 426# 427 $SAY >&2 $n .$c 428 PASS_INDICATOR="${PASS_INDICATOR}." 429 if test "$PASS_INDICATOR" = "...." 430 then 431# 432# More than three passes required indicates a probable inconsistency 433# in the source document; diagnose, and bail out. 434# 435 $SAY >&2 " failed" 436 $CAT >&2 <<-ETX 437 $CMD: unable to resolve references consistently after three passes 438 $CMD: the source document may exhibit instability about the reference(s) ... 439 ETX 440# 441# Report the unresolved references, as a diff between the two pass files, 442# preferring 'unified' or 'context' diffs, when available 443# 444 DIFFOPT='' 445 $DIFF -c0 $NULLDEV $NULLDEV 1>$NULLDEV 2>&1 && DIFFOPT='-c0' 446 $DIFF -u0 $NULLDEV $NULLDEV 1>$NULLDEV 2>&1 && DIFFOPT='-u0' 447 $DIFF >&2 $DIFFOPT $REFCOPY $REFFILE 448 exit 1 449 fi 450# 451# Replace the comparison file copy from any previous pass, 452# with the most recently updated copy of the reference dictionary. 453# (Some versions of 'mv' may not support overwriting of an existing file, 454# so remove the old comparison file first). 455# 456 rm -f $REFCOPY 457 mv $REFFILE $REFCOPY 458# 459# Run 'groff' and 'awk', to identify reference marks in the document source, 460# filtering them into the reference dictionary; discard incomplete 'groff' output 461# at this stage. 462# 463 eval $STREAM $GROFF_STYLE -Z 1>$NULLDEV 2>$WRKFILE $REFCOPY $INPUT_FILES 464 $AWK '/^gropdf-info:href/ {$1 = ".pdfhref D -N"; print}' $WRKFILE > $REFFILE 465 done 466 $SAY >&2 " done" 467# 468# To get to here ... 469# We MUST have resolved all 'pdfmark' references, such that the content of the 470# updated reference dictionary file EXACTLY matches the last saved copy. 471# 472# If PDF output has been suppressed, then there is nothing more to do. 473# 474 test "$PDF_OUTPUT" = "$NULLDEV" && exit 0 475# 476# We are now ready to start preparing the intermediate PostScript files, 477# from which the PDF output will be compiled -- but before proceding further ... 478# let's make sure we have a GhostScript interpreter to convert them! 479# 480 if test -n "$GROFF_GHOSTSCRIPT_INTERPRETER" 481 then 482 GS="$GROFF_GHOSTSCRIPT_INTERPRETER" 483 test -f "$GS" && test -x "$GS" || GS=":" 484 else 485 for prog in @GROFF_GHOSTSCRIPT_INTERPRETERS@ 486 do 487 GS=`searchpath $prog "$PATH"` 488 test "$GS" = ":" || break 489 done 490 fi 491# 492# If we could not find a GhostScript interpreter, then we can do no more. 493# 494 if test "$GS" = ":" 495 then 496 echo >&2 "$CMD: installation problem: cannot find GhostScript interpreter" 497 $CAT >&2 <<-ETX 498 499 *** FATAL INSTALLATION ERROR *** 500 501 '$CMD' requires a GhostScript interpreter to convert PostScript to PDF. 502 Since you do not appear to have one installed, '$CMD' connot continue. 503 504 ETX 505 exit 1 506 fi 507# 508# We now extend the local copy of the reference dictionary file, 509# to create a full 'pdfmark' reference map for the document ... 510# 511 $AWK '/^grohtml-info/ {print ".pdfhref Z", $2, $3, $4}' $WRKFILE >> $REFCOPY 512# 513# Re-enable progress reporting, if necessary ... 514# (Missing 'awk' or 'diff' may have disabled it, to avoid display 515# of spurious messages associated with reference resolution). 516# 517 test "${SHOW_PROGRESS+"set"}" = "set" && SAY=echo 518# 519# If a document cover style sheet is specified ... 520# then we run a special formatting pass, to create a cover section file. 521# 522 if test -n "$STYLESHEET" 523 then 524 DOT='^\.[ ]*' 525 CS_MACRO=${CS_MACRO-"CS"} CE_MACRO=${CE_MACRO-"CE"} 526 $SAY >&2 $n "Formatting document ... front cover section ..$c" 527 CS_FILTER="$STREAM $SED -n '/${DOT}${CS_MACRO}/,/${DOT}${CE_MACRO}/p'" 528 eval $CS_FILTER $INPUT_FILES | eval $GROFF_STYLE $STYLESHEET - > $CS_DATA 529 $SAY >&2 ". done" 530 fi 531# 532# If table of contents relocation is to be performed (it is, by default), 533# then we run an extra 'groff' pass, to format a TOC intermediate file. 534# 535 if test -n "$TC_DATA" 536 then 537 $SAY >&2 $n "Formatting document ... table of contents ..$c" 538 eval $STREAM $GROFF_STYLE $TOC_FORMAT $REFCOPY $INPUT_FILES > $TC_DATA 539 $SAY >&2 ". done" 540 fi 541# 542# In all cases, a final 'groff' pass is required, to format the document body. 543# 544 $SAY >&2 $n "Formatting document ... body section ..$c" 545 eval $STREAM $GROFF_STYLE $BODY_FORMAT $REFCOPY $INPUT_FILES > $BD_DATA 546 $SAY >&2 ". done" 547# 548# Finally ... 549# Invoke GhostScript as a PDF writer, to bind all of the generated 550# PostScript intermediate files into a single PDF output file. 551# 552 $SAY >&2 $n "Writing PDF output ..$c" 553 PDFWRITE="$GS -dQUIET -dBATCH -dNOPAUSE -sDEVICE=pdfwrite" 554# 555# (This 'sed' script is a hack, to eliminate redundant blank pages). 556# 557 $SED ' 558 :again 559 /%%EndPageSetup/b finish 560 /%%Page:/{ 561 N 562 b again 563 } 564 b 565 :finish 566 N 567 /^%%Page:.*0 *Cg *EP/d 568 ' $TC_DATA $BD_DATA | $PDFWRITE -sOutputFile=${PDF_OUTPUT-"-"} $CS_DATA - 569 $SAY >&2 ". done" 570# 571# ------------------------------------------------------------------------------ 572# RCSfile: pdfroff.sh,v $ $Revision: 1.7 : end of file 573