xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/lib/Pod/Html.pm (revision 0:68f95e015346)
1package Pod::Html;
2use strict;
3require Exporter;
4
5use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
6$VERSION = 1.0502;
7@ISA = qw(Exporter);
8@EXPORT = qw(pod2html htmlify);
9@EXPORT_OK = qw(anchorify);
10
11use Carp;
12use Config;
13use Cwd;
14use File::Spec;
15use File::Spec::Unix;
16use Getopt::Long;
17
18use locale;	# make \w work right in non-ASCII lands
19
20=head1 NAME
21
22Pod::Html - module to convert pod files to HTML
23
24=head1 SYNOPSIS
25
26    use Pod::Html;
27    pod2html([options]);
28
29=head1 DESCRIPTION
30
31Converts files from pod format (see L<perlpod>) to HTML format.  It
32can automatically generate indexes and cross-references, and it keeps
33a cache of things it knows how to cross-reference.
34
35=head1 ARGUMENTS
36
37Pod::Html takes the following arguments:
38
39=over 4
40
41=item backlink
42
43    --backlink="Back to Top"
44
45Adds "Back to Top" links in front of every C<head1> heading (except for
46the first).  By default, no backlinks are generated.
47
48=item cachedir
49
50    --cachedir=name
51
52Creates the item and directory caches in the given directory.
53
54=item css
55
56    --css=stylesheet
57
58Specify the URL of a cascading style sheet.  Also disables all HTML/CSS
59C<style> attributes that are output by default (to avoid conflicts).
60
61=item flush
62
63    --flush
64
65Flushes the item and directory caches.
66
67=item header
68
69    --header
70    --noheader
71
72Creates header and footer blocks containing the text of the C<NAME>
73section.  By default, no headers are generated.
74
75=item help
76
77    --help
78
79Displays the usage message.
80
81=item hiddendirs
82
83    --hiddendirs
84    --nohiddendirs
85
86Include hidden directories in the search for POD's in podpath if recurse
87is set.
88The default is not to traverse any directory whose name begins with C<.>.
89See L</"podpath"> and L</"recurse">.
90
91[This option is for backward compatibility only.
92It's hard to imagine that one would usefully create a module with a
93name component beginning with C<.>.]
94
95=item htmldir
96
97    --htmldir=name
98
99Sets the directory in which the resulting HTML file is placed.  This
100is used to generate relative links to other files. Not passing this
101causes all links to be absolute, since this is the value that tells
102Pod::Html the root of the documentation tree.
103
104=item htmlroot
105
106    --htmlroot=name
107
108Sets the base URL for the HTML files.  When cross-references are made,
109the HTML root is prepended to the URL.
110
111=item index
112
113    --index
114    --noindex
115
116Generate an index at the top of the HTML file.  This is the default
117behaviour.
118
119=item infile
120
121    --infile=name
122
123Specify the pod file to convert.  Input is taken from STDIN if no
124infile is specified.
125
126=item libpods
127
128    --libpods=name:...:name
129
130List of page names (eg, "perlfunc") which contain linkable C<=item>s.
131
132=item netscape
133
134    --netscape
135    --nonetscape
136
137B<Deprecated>, has no effect. For backwards compatibility only.
138
139=item outfile
140
141    --outfile=name
142
143Specify the HTML file to create.  Output goes to STDOUT if no outfile
144is specified.
145
146=item podpath
147
148    --podpath=name:...:name
149
150Specify which subdirectories of the podroot contain pod files whose
151HTML converted forms can be linked to in cross references.
152
153=item podroot
154
155    --podroot=name
156
157Specify the base directory for finding library pods.
158
159=item quiet
160
161    --quiet
162    --noquiet
163
164Don't display I<mostly harmless> warning messages.  These messages
165will be displayed by default.  But this is not the same as C<verbose>
166mode.
167
168=item recurse
169
170    --recurse
171    --norecurse
172
173Recurse into subdirectories specified in podpath (default behaviour).
174
175=item title
176
177    --title=title
178
179Specify the title of the resulting HTML file.
180
181=item verbose
182
183    --verbose
184    --noverbose
185
186Display progress messages.  By default, they won't be displayed.
187
188=back
189
190=head1 EXAMPLE
191
192    pod2html("pod2html",
193	     "--podpath=lib:ext:pod:vms",
194	     "--podroot=/usr/src/perl",
195	     "--htmlroot=/perl/nmanual",
196	     "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
197	     "--recurse",
198	     "--infile=foo.pod",
199	     "--outfile=/perl/nmanual/foo.html");
200
201=head1 ENVIRONMENT
202
203Uses C<$Config{pod2html}> to setup default options.
204
205=head1 AUTHOR
206
207Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
208
209=head1 SEE ALSO
210
211L<perlpod>
212
213=head1 COPYRIGHT
214
215This program is distributed under the Artistic License.
216
217=cut
218
219
220my($Cachedir);
221my($Dircache, $Itemcache);
222my @Begin_Stack;
223my @Libpods;
224my($Htmlroot, $Htmldir, $Htmlfile, $Htmlfileurl);
225my($Podfile, @Podpath, $Podroot);
226my $Css;
227
228my $Recurse;
229my $Quiet;
230my $HiddenDirs;
231my $Verbose;
232my $Doindex;
233
234my $Backlink;
235my($Listlevel, @Listend);
236my $After_Lpar;
237use vars qw($Ignore);  # need to localize it later.
238
239my(%Items_Named, @Items_Seen);
240my($Title, $Header);
241
242my $Top;
243my $Paragraph;
244
245my %Sections;
246
247# Caches
248my %Pages = ();			# associative array used to find the location
249				#   of pages referenced by L<> links.
250my %Items = ();			# associative array used to find the location
251				#   of =item directives referenced by C<> links
252
253my %Local_Items;
254my $Is83;
255my $PTQuote;
256
257my $Curdir = File::Spec->curdir;
258
259init_globals();
260
261sub init_globals {
262    $Cachedir = ".";		# The directory to which item and directory
263				# caches will be written.
264
265    $Dircache = "pod2htmd.tmp";
266    $Itemcache = "pod2htmi.tmp";
267
268    @Begin_Stack = ();		# begin/end stack
269
270    @Libpods = ();	    	# files to search for links from C<> directives
271    $Htmlroot = "/";	    	# http-server base directory from which all
272				#   relative paths in $podpath stem.
273    $Htmldir = "";	    	# The directory to which the html pages
274				# will (eventually) be written.
275    $Htmlfile = "";		# write to stdout by default
276    $Htmlfileurl = "" ;		# The url that other files would use to
277				# refer to this file.  This is only used
278				# to make relative urls that point to
279				# other files.
280
281    $Podfile = "";		# read from stdin by default
282    @Podpath = ();		# list of directories containing library pods.
283    $Podroot = $Curdir;	        # filesystem base directory from which all
284				#   relative paths in $podpath stem.
285    $Css = '';                  # Cascading style sheet
286    $Recurse = 1;		# recurse on subdirectories in $podpath.
287    $Quiet = 0;		        # not quiet by default
288    $Verbose = 0;		# not verbose by default
289    $Doindex = 1;   	    	# non-zero if we should generate an index
290    $Backlink = '';		# text for "back to top" links
291    $Listlevel = 0;		# current list depth
292    @Listend = ();		# the text to use to end the list.
293    $After_Lpar = 0;            # set to true after a par in an =item
294    $Ignore = 1;		# whether or not to format text.  we don't
295				#   format text until we hit our first pod
296				#   directive.
297
298    @Items_Seen = ();	        # for multiples of the same item in perlfunc
299    %Items_Named = ();
300    $Header = 0;		# produce block header/footer
301    $Title = '';		# title to give the pod(s)
302    $Top = 1;			# true if we are at the top of the doc.  used
303				#   to prevent the first <hr /> directive.
304    $Paragraph = '';		# which paragraph we're processing (used
305				#   for error messages)
306    $PTQuote = 0;               # status of double-quote conversion
307    %Sections = ();		# sections within this page
308
309    %Local_Items = ();
310    $Is83 = $^O eq 'dos';       # Is it an 8.3 filesystem?
311}
312
313#
314# clean_data: global clean-up of pod data
315#
316sub clean_data($){
317    my( $dataref ) = @_;
318    for my $i ( 0..$#{$dataref} ) {
319	${$dataref}[$i] =~ s/\s+\Z//;
320
321        # have a look for all-space lines
322      if( ${$dataref}[$i] =~ /^\s+$/m and $dataref->[$i] !~ /^\s/ ){
323	    my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
324	    splice( @$dataref, $i, 1, @chunks );
325	}
326    }
327}
328
329
330sub pod2html {
331    local(@ARGV) = @_;
332    local($/);
333    local $_;
334
335    init_globals();
336
337    $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
338
339    # cache of %Pages and %Items from last time we ran pod2html
340
341    #undef $opt_help if defined $opt_help;
342
343    # parse the command-line parameters
344    parse_command_line();
345
346    # escape the backlink argument (same goes for title but is done later...)
347    $Backlink = html_escape($Backlink) if defined $Backlink;
348
349    # set some variables to their default values if necessary
350    local *POD;
351    unless (@ARGV && $ARGV[0]) {
352	$Podfile  = "-" unless $Podfile;	# stdin
353	open(POD, "<$Podfile")
354		|| die "$0: cannot open $Podfile file for input: $!\n";
355    } else {
356	$Podfile = $ARGV[0];  # XXX: might be more filenames
357	*POD = *ARGV;
358    }
359    $Htmlfile = "-" unless $Htmlfile;	# stdout
360    $Htmlroot = "" if $Htmlroot eq "/";	# so we don't get a //
361    $Htmldir =~ s#/\z## ;               # so we don't get a //
362    if (  $Htmlroot eq ''
363       && defined( $Htmldir )
364       && $Htmldir ne ''
365       && substr( $Htmlfile, 0, length( $Htmldir ) ) eq $Htmldir
366       )
367    {
368	# Set the 'base' url for this file, so that we can use it
369	# as the location from which to calculate relative links
370	# to other files. If this is '', then absolute links will
371	# be used throughout.
372        $Htmlfileurl= "$Htmldir/" . substr( $Htmlfile, length( $Htmldir ) + 1);
373    }
374
375    # read the pod a paragraph at a time
376    warn "Scanning for sections in input file(s)\n" if $Verbose;
377    $/ = "";
378    my @poddata  = <POD>;
379    close(POD);
380
381    # be eol agnostic
382    for (@poddata) {
383	if (/\r/) {
384	    if (/\r\n/) {
385		@poddata = map { s/\r\n/\n/g;
386				 /\n\n/ ?
387				     map { "$_\n\n" } split /\n\n/ :
388				     $_ } @poddata;
389	    } else {
390		@poddata = map { s/\r/\n/g;
391				 /\n\n/ ?
392				     map { "$_\n\n" } split /\n\n/ :
393				     $_ } @poddata;
394	    }
395	    last;
396	}
397    }
398
399    clean_data( \@poddata );
400
401    # scan the pod for =head[1-6] directives and build an index
402    my $index = scan_headings(\%Sections, @poddata);
403
404    unless($index) {
405	warn "No headings in $Podfile\n" if $Verbose;
406    }
407
408    # open the output file
409    open(HTML, ">$Htmlfile")
410	    || die "$0: cannot open $Htmlfile file for output: $!\n";
411
412    # put a title in the HTML file if one wasn't specified
413    if ($Title eq '') {
414	TITLE_SEARCH: {
415 	    for (my $i = 0; $i < @poddata; $i++) {
416		if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
417 		    for my $para ( @poddata[$i, $i+1] ) {
418			last TITLE_SEARCH
419			    if ($Title) = $para =~ /(\S+\s+-+.*\S)/s;
420		    }
421		}
422
423	    }
424	}
425    }
426    if (!$Title and $Podfile =~ /\.pod\z/) {
427	# probably a split pod so take first =head[12] as title
428 	for (my $i = 0; $i < @poddata; $i++) {
429	    last if ($Title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
430	}
431	warn "adopted '$Title' as title for $Podfile\n"
432	    if $Verbose and $Title;
433    }
434    if ($Title) {
435	$Title =~ s/\s*\(.*\)//;
436    } else {
437	warn "$0: no title for $Podfile.\n" unless $Quiet;
438	$Podfile =~ /^(.*)(\.[^.\/]+)?\z/s;
439	$Title = ($Podfile eq "-" ? 'No Title' : $1);
440	warn "using $Title" if $Verbose;
441    }
442    $Title = html_escape($Title);
443
444    my $csslink = '';
445    my $bodystyle = ' style="background-color: white"';
446    my $tdstyle = ' style="background-color: #cccccc"';
447
448    if ($Css) {
449      $csslink = qq(\n<link rel="stylesheet" href="$Css" type="text/css" />);
450      $csslink =~ s,\\,/,g;
451      $csslink =~ s,(/.):,$1|,;
452      $bodystyle = '';
453      $tdstyle = '';
454    }
455
456      my $block = $Header ? <<END_OF_BLOCK : '';
457<table border="0" width="100%" cellspacing="0" cellpadding="3">
458<tr><td class="block"$tdstyle valign="middle">
459<big><strong><span class="block">&nbsp;$Title</span></strong></big>
460</td></tr>
461</table>
462END_OF_BLOCK
463
464    print HTML <<END_OF_HEAD;
465<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
466<html xmlns="http://www.w3.org/1999/xhtml">
467<head>
468<title>$Title</title>$csslink
469<link rev="made" href="mailto:$Config{perladmin}" />
470</head>
471
472<body$bodystyle>
473$block
474END_OF_HEAD
475
476    # load/reload/validate/cache %Pages and %Items
477    get_cache($Dircache, $Itemcache, \@Podpath, $Podroot, $Recurse);
478
479    # scan the pod for =item directives
480    scan_items( \%Local_Items, "", @poddata);
481
482    # put an index at the top of the file.  note, if $Doindex is 0 we
483    # still generate an index, but surround it with an html comment.
484    # that way some other program can extract it if desired.
485    $index =~ s/--+/-/g;
486    print HTML "<p><a name=\"__index__\"></a></p>\n";
487    print HTML "<!-- INDEX BEGIN -->\n";
488    print HTML "<!--\n" unless $Doindex;
489    print HTML $index;
490    print HTML "-->\n" unless $Doindex;
491    print HTML "<!-- INDEX END -->\n\n";
492    print HTML "<hr />\n" if $Doindex and $index;
493
494    # now convert this file
495    my $after_item;             # set to true after an =item
496    my $need_dd = 0;
497    warn "Converting input file $Podfile\n" if $Verbose;
498    foreach my $i (0..$#poddata){
499        $PTQuote = 0; # status of quote conversion
500
501	$_ = $poddata[$i];
502	$Paragraph = $i+1;
503	if (/^(=.*)/s) {	# is it a pod directive?
504	    $Ignore = 0;
505	    $after_item = 0;
506	    $need_dd = 0;
507	    $_ = $1;
508	    if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
509		process_begin($1, $2);
510	    } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
511		process_end($1, $2);
512	    } elsif (/^=cut/) {			# =cut
513		process_cut();
514	    } elsif (/^=pod/) {			# =pod
515		process_pod();
516	    } else {
517		next if @Begin_Stack && $Begin_Stack[-1] ne 'html';
518
519		if (/^=(head[1-6])\s+(.*\S)/s) {	# =head[1-6] heading
520		    process_head( $1, $2, $Doindex && $index );
521		} elsif (/^=item\s*(.*\S)?/sm) {	# =item text
522		    $need_dd = process_item( $1 );
523		    $after_item = 1;
524		} elsif (/^=over\s*(.*)/) {		# =over N
525		    process_over();
526		} elsif (/^=back/) {		# =back
527		    process_back();
528		} elsif (/^=for\s+(\S+)\s*(.*)/si) {# =for
529		    process_for($1,$2);
530		} else {
531		    /^=(\S*)\s*/;
532		    warn "$0: $Podfile: unknown pod directive '$1' in "
533		       . "paragraph $Paragraph.  ignoring.\n";
534		}
535	    }
536	    $Top = 0;
537	}
538	else {
539	    next if $Ignore;
540	    next if @Begin_Stack && $Begin_Stack[-1] ne 'html';
541	    print HTML and next if @Begin_Stack && $Begin_Stack[-1] eq 'html';
542	    print HTML "<dd>\n" if $need_dd;
543	    my $text = $_;
544	    if( $text =~ /\A\s+/ ){
545		process_pre( \$text );
546	        print HTML "<pre>\n$text</pre>\n";
547
548	    } else {
549		process_text( \$text );
550
551		# experimental: check for a paragraph where all lines
552		# have some ...\t...\t...\n pattern
553		if( $text =~ /\t/ ){
554		    my @lines = split( "\n", $text );
555		    if( @lines > 1 ){
556			my $all = 2;
557			foreach my $line ( @lines ){
558			    if( $line =~ /\S/ && $line !~ /\t/ ){
559				$all--;
560				last if $all == 0;
561			    }
562			}
563			if( $all > 0 ){
564			    $text =~ s/\t+/<td>/g;
565			    $text =~ s/^/<tr><td>/gm;
566			    $text = '<table cellspacing="0" cellpadding="0">' .
567                                    $text . '</table>';
568			}
569		    }
570		}
571		## end of experimental
572
573		if( $after_item ){
574		    print HTML "$text\n";
575		    $After_Lpar = 1;
576		} else {
577		    print HTML "<p>$text</p>\n";
578		}
579	    }
580	    print HTML "</dd>\n" if $need_dd;
581	    $after_item = 0;
582	}
583    }
584
585    # finish off any pending directives
586    finish_list();
587
588    # link to page index
589    print HTML "<p><a href=\"#__index__\"><small>$Backlink</small></a></p>\n"
590	if $Doindex and $index and $Backlink;
591
592    print HTML <<END_OF_TAIL;
593$block
594</body>
595
596</html>
597END_OF_TAIL
598
599    # close the html file
600    close(HTML);
601
602    warn "Finished\n" if $Verbose;
603}
604
605##############################################################################
606
607sub usage {
608    my $podfile = shift;
609    warn "$0: $podfile: @_\n" if @_;
610    die <<END_OF_USAGE;
611Usage:  $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
612           --podpath=<name>:...:<name> --podroot=<name>
613           --libpods=<name>:...:<name> --recurse --verbose --index
614           --netscape --norecurse --noindex --cachedir=<name>
615
616  --backlink     - set text for "back to top" links (default: none).
617  --cachedir     - directory for the item and directory cache files.
618  --css          - stylesheet URL
619  --flush        - flushes the item and directory caches.
620  --[no]header   - produce block header/footer (default is no headers).
621  --help         - prints this message.
622  --hiddendirs   - search hidden directories in podpath
623  --htmldir      - directory for resulting HTML files.
624  --htmlroot     - http-server base directory from which all relative paths
625                   in podpath stem (default is /).
626  --[no]index    - generate an index at the top of the resulting html
627                   (default behaviour).
628  --infile       - filename for the pod to convert (input taken from stdin
629                   by default).
630  --libpods      - colon-separated list of pages to search for =item pod
631                   directives in as targets of C<> and implicit links (empty
632                   by default).  note, these are not filenames, but rather
633                   page names like those that appear in L<> links.
634  --outfile      - filename for the resulting html file (output sent to
635                   stdout by default).
636  --podpath      - colon-separated list of directories containing library
637                   pods (empty by default).
638  --podroot      - filesystem base directory from which all relative paths
639                   in podpath stem (default is .).
640  --[no]quiet    - supress some benign warning messages (default is off).
641  --[no]recurse  - recurse on those subdirectories listed in podpath
642                   (default behaviour).
643  --title        - title that will appear in resulting html file.
644  --[no]verbose  - self-explanatory (off by default).
645  --[no]netscape - deprecated, has no effect. for backwards compatibility only.
646
647END_OF_USAGE
648
649}
650
651sub parse_command_line {
652    my ($opt_backlink,$opt_cachedir,$opt_css,$opt_flush,$opt_header,$opt_help,
653	$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,
654	$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_quiet,
655	$opt_recurse,$opt_title,$opt_verbose,$opt_hiddendirs);
656
657    unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
658    my $result = GetOptions(
659			    'backlink=s' => \$opt_backlink,
660			    'cachedir=s' => \$opt_cachedir,
661			    'css=s'      => \$opt_css,
662			    'flush'      => \$opt_flush,
663			    'header!'    => \$opt_header,
664			    'help'       => \$opt_help,
665			    'hiddendirs!'=> \$opt_hiddendirs,
666			    'htmldir=s'  => \$opt_htmldir,
667			    'htmlroot=s' => \$opt_htmlroot,
668			    'index!'     => \$opt_index,
669			    'infile=s'   => \$opt_infile,
670			    'libpods=s'  => \$opt_libpods,
671			    'netscape!'  => \$opt_netscape,
672			    'outfile=s'  => \$opt_outfile,
673			    'podpath=s'  => \$opt_podpath,
674			    'podroot=s'  => \$opt_podroot,
675			    'quiet!'     => \$opt_quiet,
676			    'recurse!'   => \$opt_recurse,
677			    'title=s'    => \$opt_title,
678			    'verbose!'   => \$opt_verbose,
679			   );
680    usage("-", "invalid parameters") if not $result;
681
682    usage("-") if defined $opt_help;	# see if the user asked for help
683    $opt_help = "";			# just to make -w shut-up.
684
685    @Podpath  = split(":", $opt_podpath) if defined $opt_podpath;
686    @Libpods  = split(":", $opt_libpods) if defined $opt_libpods;
687
688    $Backlink = $opt_backlink if defined $opt_backlink;
689    $Cachedir = $opt_cachedir if defined $opt_cachedir;
690    $Css      = $opt_css      if defined $opt_css;
691    $Header   = $opt_header   if defined $opt_header;
692    $Htmldir  = $opt_htmldir  if defined $opt_htmldir;
693    $Htmlroot = $opt_htmlroot if defined $opt_htmlroot;
694    $Doindex  = $opt_index    if defined $opt_index;
695    $Podfile  = $opt_infile   if defined $opt_infile;
696    $HiddenDirs = $opt_hiddendirs if defined $opt_hiddendirs;
697    $Htmlfile = $opt_outfile  if defined $opt_outfile;
698    $Podroot  = $opt_podroot  if defined $opt_podroot;
699    $Quiet    = $opt_quiet    if defined $opt_quiet;
700    $Recurse  = $opt_recurse  if defined $opt_recurse;
701    $Title    = $opt_title    if defined $opt_title;
702    $Verbose  = $opt_verbose  if defined $opt_verbose;
703
704    warn "Flushing item and directory caches\n"
705	if $opt_verbose && defined $opt_flush;
706    $Dircache = "$Cachedir/pod2htmd.tmp";
707    $Itemcache = "$Cachedir/pod2htmi.tmp";
708    if (defined $opt_flush) {
709	1 while unlink($Dircache, $Itemcache);
710    }
711}
712
713
714my $Saved_Cache_Key;
715
716sub get_cache {
717    my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
718    my @cache_key_args = @_;
719
720    # A first-level cache:
721    # Don't bother reading the cache files if they still apply
722    # and haven't changed since we last read them.
723
724    my $this_cache_key = cache_key(@cache_key_args);
725
726    return if $Saved_Cache_Key and $this_cache_key eq $Saved_Cache_Key;
727
728    # load the cache of %Pages and %Items if possible.  $tests will be
729    # non-zero if successful.
730    my $tests = 0;
731    if (-f $dircache && -f $itemcache) {
732	warn "scanning for item cache\n" if $Verbose;
733	$tests = load_cache($dircache, $itemcache, $podpath, $podroot);
734    }
735
736    # if we didn't succeed in loading the cache then we must (re)build
737    #  %Pages and %Items.
738    if (!$tests) {
739	warn "scanning directories in pod-path\n" if $Verbose;
740	scan_podpath($podroot, $recurse, 0);
741    }
742    $Saved_Cache_Key = cache_key(@cache_key_args);
743}
744
745sub cache_key {
746    my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
747    return join('!', $dircache, $itemcache, $recurse,
748	@$podpath, $podroot, stat($dircache), stat($itemcache));
749}
750
751#
752# load_cache - tries to find if the caches stored in $dircache and $itemcache
753#  are valid caches of %Pages and %Items.  if they are valid then it loads
754#  them and returns a non-zero value.
755#
756sub load_cache {
757    my($dircache, $itemcache, $podpath, $podroot) = @_;
758    my($tests);
759    local $_;
760
761    $tests = 0;
762
763    open(CACHE, "<$itemcache") ||
764	die "$0: error opening $itemcache for reading: $!\n";
765    $/ = "\n";
766
767    # is it the same podpath?
768    $_ = <CACHE>;
769    chomp($_);
770    $tests++ if (join(":", @$podpath) eq $_);
771
772    # is it the same podroot?
773    $_ = <CACHE>;
774    chomp($_);
775    $tests++ if ($podroot eq $_);
776
777    # load the cache if its good
778    if ($tests != 2) {
779	close(CACHE);
780	return 0;
781    }
782
783    warn "loading item cache\n" if $Verbose;
784    while (<CACHE>) {
785	/(.*?) (.*)$/;
786	$Items{$1} = $2;
787    }
788    close(CACHE);
789
790    warn "scanning for directory cache\n" if $Verbose;
791    open(CACHE, "<$dircache") ||
792	die "$0: error opening $dircache for reading: $!\n";
793    $/ = "\n";
794    $tests = 0;
795
796    # is it the same podpath?
797    $_ = <CACHE>;
798    chomp($_);
799    $tests++ if (join(":", @$podpath) eq $_);
800
801    # is it the same podroot?
802    $_ = <CACHE>;
803    chomp($_);
804    $tests++ if ($podroot eq $_);
805
806    # load the cache if its good
807    if ($tests != 2) {
808	close(CACHE);
809	return 0;
810    }
811
812    warn "loading directory cache\n" if $Verbose;
813    while (<CACHE>) {
814	/(.*?) (.*)$/;
815	$Pages{$1} = $2;
816    }
817
818    close(CACHE);
819
820    return 1;
821}
822
823#
824# scan_podpath - scans the directories specified in @podpath for directories,
825#  .pod files, and .pm files.  it also scans the pod files specified in
826#  @Libpods for =item directives.
827#
828sub scan_podpath {
829    my($podroot, $recurse, $append) = @_;
830    my($pwd, $dir);
831    my($libpod, $dirname, $pod, @files, @poddata);
832
833    unless($append) {
834	%Items = ();
835	%Pages = ();
836    }
837
838    # scan each directory listed in @Podpath
839    $pwd = getcwd();
840    chdir($podroot)
841	|| die "$0: error changing to directory $podroot: $!\n";
842    foreach $dir (@Podpath) {
843	scan_dir($dir, $recurse);
844    }
845
846    # scan the pods listed in @Libpods for =item directives
847    foreach $libpod (@Libpods) {
848	# if the page isn't defined then we won't know where to find it
849	# on the system.
850	next unless defined $Pages{$libpod} && $Pages{$libpod};
851
852	# if there is a directory then use the .pod and .pm files within it.
853	# NOTE: Only finds the first so-named directory in the tree.
854#	if ($Pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
855	if ($Pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
856	    #  find all the .pod and .pm files within the directory
857	    $dirname = $1;
858	    opendir(DIR, $dirname) ||
859		die "$0: error opening directory $dirname: $!\n";
860	    @files = grep(/(\.pod|\.pm)\z/ && ! -d $_, readdir(DIR));
861	    closedir(DIR);
862
863	    # scan each .pod and .pm file for =item directives
864	    foreach $pod (@files) {
865		open(POD, "<$dirname/$pod") ||
866		    die "$0: error opening $dirname/$pod for input: $!\n";
867		@poddata = <POD>;
868		close(POD);
869		clean_data( \@poddata );
870
871		scan_items( \%Items, "$dirname/$pod", @poddata);
872	    }
873
874	    # use the names of files as =item directives too.
875### Don't think this should be done this way - confuses issues.(WL)
876###	    foreach $pod (@files) {
877###		$pod =~ /^(.*)(\.pod|\.pm)$/;
878###		$Items{$1} = "$dirname/$1.html" if $1;
879###	    }
880	} elsif ($Pages{$libpod} =~ /([^:]*\.pod):/ ||
881		 $Pages{$libpod} =~ /([^:]*\.pm):/) {
882	    # scan the .pod or .pm file for =item directives
883	    $pod = $1;
884	    open(POD, "<$pod") ||
885		die "$0: error opening $pod for input: $!\n";
886	    @poddata = <POD>;
887	    close(POD);
888	    clean_data( \@poddata );
889
890	    scan_items( \%Items, "$pod", @poddata);
891	} else {
892	    warn "$0: shouldn't be here (line ".__LINE__."\n";
893	}
894    }
895    @poddata = ();	# clean-up a bit
896
897    chdir($pwd)
898	|| die "$0: error changing to directory $pwd: $!\n";
899
900    # cache the item list for later use
901    warn "caching items for later use\n" if $Verbose;
902    open(CACHE, ">$Itemcache") ||
903	die "$0: error open $Itemcache for writing: $!\n";
904
905    print CACHE join(":", @Podpath) . "\n$podroot\n";
906    foreach my $key (keys %Items) {
907	print CACHE "$key $Items{$key}\n";
908    }
909
910    close(CACHE);
911
912    # cache the directory list for later use
913    warn "caching directories for later use\n" if $Verbose;
914    open(CACHE, ">$Dircache") ||
915	die "$0: error open $Dircache for writing: $!\n";
916
917    print CACHE join(":", @Podpath) . "\n$podroot\n";
918    foreach my $key (keys %Pages) {
919	print CACHE "$key $Pages{$key}\n";
920    }
921
922    close(CACHE);
923}
924
925#
926# scan_dir - scans the directory specified in $dir for subdirectories, .pod
927#  files, and .pm files.  notes those that it finds.  this information will
928#  be used later in order to figure out where the pages specified in L<>
929#  links are on the filesystem.
930#
931sub scan_dir {
932    my($dir, $recurse) = @_;
933    my($t, @subdirs, @pods, $pod, $dirname, @dirs);
934    local $_;
935
936    @subdirs = ();
937    @pods = ();
938
939    opendir(DIR, $dir) ||
940	die "$0: error opening directory $dir: $!\n";
941    while (defined($_ = readdir(DIR))) {
942	if (-d "$dir/$_" && $_ ne "." && $_ ne ".."
943	    && ($HiddenDirs || !/^\./)
944	) {         # directory
945	    $Pages{$_}  = "" unless defined $Pages{$_};
946	    $Pages{$_} .= "$dir/$_:";
947	    push(@subdirs, $_);
948	} elsif (/\.pod\z/) {	    	    	    	    # .pod
949	    s/\.pod\z//;
950	    $Pages{$_}  = "" unless defined $Pages{$_};
951	    $Pages{$_} .= "$dir/$_.pod:";
952	    push(@pods, "$dir/$_.pod");
953	} elsif (/\.html\z/) { 	    	    	    	    # .html
954	    s/\.html\z//;
955	    $Pages{$_}  = "" unless defined $Pages{$_};
956	    $Pages{$_} .= "$dir/$_.pod:";
957	} elsif (/\.pm\z/) { 	    	    	    	    # .pm
958	    s/\.pm\z//;
959	    $Pages{$_}  = "" unless defined $Pages{$_};
960	    $Pages{$_} .= "$dir/$_.pm:";
961	    push(@pods, "$dir/$_.pm");
962	}
963    }
964    closedir(DIR);
965
966    # recurse on the subdirectories if necessary
967    if ($recurse) {
968	foreach my $subdir (@subdirs) {
969	    scan_dir("$dir/$subdir", $recurse);
970	}
971    }
972}
973
974#
975# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
976#  build an index.
977#
978sub scan_headings {
979    my($sections, @data) = @_;
980    my($tag, $which_head, $otitle, $listdepth, $index);
981
982    local $Ignore = 0;
983
984    $listdepth = 0;
985    $index = "";
986
987    # scan for =head directives, note their name, and build an index
988    #  pointing to each of them.
989    foreach my $line (@data) {
990      if ($line =~ /^=(head)([1-6])\s+(.*)/) {
991        ($tag, $which_head, $otitle) = ($1,$2,$3);
992
993        my $title = depod( $otitle );
994        my $name = anchorify( $title );
995        $$sections{$name} = 1;
996        $title = process_text( \$otitle );
997
998	    while ($which_head != $listdepth) {
999		if ($which_head > $listdepth) {
1000		    $index .= "\n" . ("\t" x $listdepth) . "<ul>\n";
1001		    $listdepth++;
1002		} elsif ($which_head < $listdepth) {
1003		    $listdepth--;
1004		    $index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
1005		}
1006	    }
1007
1008	    $index .= "\n" . ("\t" x $listdepth) . "<li>" .
1009	              "<a href=\"#" . $name . "\">" .
1010		      $title . "</a></li>";
1011	}
1012    }
1013
1014    # finish off the lists
1015    while ($listdepth--) {
1016	$index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
1017    }
1018
1019    # get rid of bogus lists
1020    $index =~ s,\t*<ul>\s*</ul>\n,,g;
1021
1022    return $index;
1023}
1024
1025#
1026# scan_items - scans the pod specified by $pod for =item directives.  we
1027#  will use this information later on in resolving C<> links.
1028#
1029sub scan_items {
1030    my( $itemref, $pod, @poddata ) = @_;
1031    my($i, $item);
1032    local $_;
1033
1034    $pod =~ s/\.pod\z//;
1035    $pod .= ".html" if $pod;
1036
1037    foreach $i (0..$#poddata) {
1038	my $txt = depod( $poddata[$i] );
1039
1040	# figure out what kind of item it is.
1041	# Build string for referencing this item.
1042	if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
1043	    next unless $1;
1044	    $item = $1;
1045        } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
1046	    $item = $1;
1047	} elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
1048	    $item = $1;
1049	} else {
1050	    next;
1051	}
1052	my $fid = fragment_id( $item );
1053	$$itemref{$fid} = "$pod" if $fid;
1054    }
1055}
1056
1057#
1058# process_head - convert a pod head[1-6] tag and convert it to HTML format.
1059#
1060sub process_head {
1061    my($tag, $heading, $hasindex) = @_;
1062
1063    # figure out the level of the =head
1064    $tag =~ /head([1-6])/;
1065    my $level = $1;
1066
1067    if( $Listlevel ){
1068	warn "$0: $Podfile: unterminated list at =head in paragraph $Paragraph.  ignoring.\n";
1069        while( $Listlevel ){
1070            process_back();
1071        }
1072    }
1073
1074    print HTML "<p>\n";
1075    if( $level == 1 && ! $Top ){
1076      print HTML "<a href=\"#__index__\"><small>$Backlink</small></a>\n"
1077        if $hasindex and $Backlink;
1078      print HTML "</p>\n<hr />\n"
1079    } else {
1080      print HTML "</p>\n";
1081    }
1082
1083    my $name = anchorify( depod( $heading ) );
1084    my $convert = process_text( \$heading );
1085    print HTML "<h$level><a name=\"$name\">$convert</a></h$level>\n";
1086}
1087
1088
1089#
1090# emit_item_tag - print an =item's text
1091# Note: The global $EmittedItem is used for inhibiting self-references.
1092#
1093my $EmittedItem;
1094
1095sub emit_item_tag($$$){
1096    my( $otext, $text, $compact ) = @_;
1097    my $item = fragment_id( $text );
1098
1099    $EmittedItem = $item;
1100    ### print STDERR "emit_item_tag=$item ($text)\n";
1101
1102    print HTML '<strong>';
1103    if ($Items_Named{$item}++) {
1104	print HTML process_text( \$otext );
1105    } else {
1106        my $name = 'item_' . $item;
1107        $name = anchorify($name);
1108	print HTML qq{<a name="$name">}, process_text( \$otext ), '</a>';
1109    }
1110    print HTML "</strong><br />\n";
1111    undef( $EmittedItem );
1112}
1113
1114sub emit_li {
1115    my( $tag ) = @_;
1116    if( $Items_Seen[$Listlevel]++ == 0 ){
1117	push( @Listend, "</$tag>" );
1118	print HTML "<$tag>\n";
1119    }
1120    my $emitted = $tag eq 'dl' ? 'dt' : 'li';
1121    print HTML "<$emitted>";
1122    return $emitted;
1123}
1124
1125#
1126# process_item - convert a pod item tag and convert it to HTML format.
1127#
1128sub process_item {
1129    my( $otext ) = @_;
1130    my $need_dd = 0; # set to 1 if we need a <dd></dd> after an item
1131
1132    # lots of documents start a list without doing an =over.  this is
1133    # bad!  but, the proper thing to do seems to be to just assume
1134    # they did do an =over.  so warn them once and then continue.
1135    if( $Listlevel == 0 ){
1136	warn "$0: $Podfile: unexpected =item directive in paragraph $Paragraph.  ignoring.\n";
1137	process_over();
1138    }
1139
1140    # formatting: insert a paragraph if preceding item has >1 paragraph
1141    if( $After_Lpar ){
1142	print HTML "<p></p>\n";
1143	$After_Lpar = 0;
1144    }
1145
1146    # remove formatting instructions from the text
1147    my $text = depod( $otext );
1148
1149    my $emitted; # the tag actually emitted, used for closing
1150
1151    # all the list variants:
1152    if( $text =~ /\A\*/ ){ # bullet
1153        $emitted = emit_li( 'ul' );
1154        if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text
1155            my $tag = $1;
1156            $otext =~ s/\A\*\s+//;
1157            emit_item_tag( $otext, $tag, 1 );
1158        }
1159
1160    } elsif( $text =~ /\A\d+/ ){ # numbered list
1161        $emitted = emit_li( 'ol' );
1162        if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text
1163            my $tag = $1;
1164            $otext =~ s/\A\d+\.?\s*//;
1165            emit_item_tag( $otext, $tag, 1 );
1166        }
1167
1168    } else {			# definition list
1169        $emitted = emit_li( 'dl' );
1170        if ($text =~ /\A(.+)\Z/s ){ # should have text
1171            emit_item_tag( $otext, $text, 1 );
1172        }
1173        $need_dd = 1;
1174    }
1175    print HTML "</$emitted>" if $emitted;
1176    print HTML "\n";
1177    return $need_dd;
1178}
1179
1180#
1181# process_over - process a pod over tag and start a corresponding HTML list.
1182#
1183sub process_over {
1184    # start a new list
1185    $Listlevel++;
1186    push( @Items_Seen, 0 );
1187    $After_Lpar = 0;
1188}
1189
1190#
1191# process_back - process a pod back tag and convert it to HTML format.
1192#
1193sub process_back {
1194    if( $Listlevel == 0 ){
1195	warn "$0: $Podfile: unexpected =back directive in paragraph $Paragraph.  ignoring.\n";
1196	return;
1197    }
1198
1199    # close off the list.  note, I check to see if $Listend[$Listlevel] is
1200    # defined because an =item directive may have never appeared and thus
1201    # $Listend[$Listlevel] may have never been initialized.
1202    $Listlevel--;
1203    if( defined $Listend[$Listlevel] ){
1204	print HTML '<p></p>' if $After_Lpar;
1205	print HTML $Listend[$Listlevel];
1206        print HTML "\n";
1207        pop( @Listend );
1208    }
1209    $After_Lpar = 0;
1210
1211    # clean up item count
1212    pop( @Items_Seen );
1213}
1214
1215#
1216# process_cut - process a pod cut tag, thus start ignoring pod directives.
1217#
1218sub process_cut {
1219    $Ignore = 1;
1220}
1221
1222#
1223# process_pod - process a pod tag, thus stop ignoring pod directives
1224# until we see a corresponding cut.
1225#
1226sub process_pod {
1227    # no need to set $Ignore to 0 cause the main loop did it
1228}
1229
1230#
1231# process_for - process a =for pod tag.  if it's for html, spit
1232# it out verbatim, if illustration, center it, otherwise ignore it.
1233#
1234sub process_for {
1235    my($whom, $text) = @_;
1236    if ( $whom =~ /^(pod2)?html$/i) {
1237	print HTML $text;
1238    } elsif ($whom =~ /^illustration$/i) {
1239        1 while chomp $text;
1240	for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1241	  $text .= $ext, last if -r "$text$ext";
1242	}
1243        print HTML qq{<p align="center"><img src="$text" alt="$text illustration" /></p>};
1244    }
1245}
1246
1247#
1248# process_begin - process a =begin pod tag.  this pushes
1249# whom we're beginning on the begin stack.  if there's a
1250# begin stack, we only print if it us.
1251#
1252sub process_begin {
1253    my($whom, $text) = @_;
1254    $whom = lc($whom);
1255    push (@Begin_Stack, $whom);
1256    if ( $whom =~ /^(pod2)?html$/) {
1257	print HTML $text if $text;
1258    }
1259}
1260
1261#
1262# process_end - process a =end pod tag.  pop the
1263# begin stack.  die if we're mismatched.
1264#
1265sub process_end {
1266    my($whom, $text) = @_;
1267    $whom = lc($whom);
1268    if ($Begin_Stack[-1] ne $whom ) {
1269	die "Unmatched begin/end at chunk $Paragraph\n"
1270    }
1271    pop( @Begin_Stack );
1272}
1273
1274#
1275# process_pre - indented paragraph, made into <pre></pre>
1276#
1277sub process_pre {
1278    my( $text ) = @_;
1279    my( $rest );
1280    return if $Ignore;
1281
1282    $rest = $$text;
1283
1284    # insert spaces in place of tabs
1285    $rest =~ s#(.+)#
1286	    my $line = $1;
1287            1 while $line =~ s/(\t+)/' ' x ((length($1) * 8) - $-[0] % 8)/e;
1288	    $line;
1289	#eg;
1290
1291    # convert some special chars to HTML escapes
1292    $rest = html_escape($rest);
1293
1294    # try and create links for all occurrences of perl.* within
1295    # the preformatted text.
1296    $rest =~ s{
1297	         (\s*)(perl\w+)
1298	      }{
1299		 if ( defined $Pages{$2} ){	# is a link
1300		     qq($1<a href="$Htmlroot/$Pages{$2}">$2</a>);
1301		 } elsif (defined $Pages{dosify($2)}) {	# is a link
1302		     qq($1<a href="$Htmlroot/$Pages{dosify($2)}">$2</a>);
1303		 } else {
1304		     "$1$2";
1305		 }
1306	      }xeg;
1307     $rest =~ s{
1308		 (<a\ href="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1309               }{
1310                  my $url ;
1311                  if ( $Htmlfileurl ne '' ){
1312		     # Here, we take advantage of the knowledge
1313		     # that $Htmlfileurl ne '' implies $Htmlroot eq ''.
1314		     # Since $Htmlroot eq '', we need to prepend $Htmldir
1315		     # on the fron of the link to get the absolute path
1316		     # of the link's target. We check for a leading '/'
1317		     # to avoid corrupting links that are #, file:, etc.
1318		     my $old_url = $3 ;
1319		     $old_url = "$Htmldir$old_url" if $old_url =~ m{^\/};
1320 		     $url = relativize_url( "$old_url.html", $Htmlfileurl );
1321	          } else {
1322		     $url = "$3.html" ;
1323		  }
1324		  "$1$url" ;
1325	       }xeg;
1326
1327    # Look for embedded URLs and make them into links.  We don't
1328    # relativize them since they are best left as the author intended.
1329
1330    my $urls = '(' . join ('|', qw{
1331                http
1332                telnet
1333		mailto
1334		news
1335                gopher
1336                file
1337                wais
1338                ftp
1339            } )
1340        . ')';
1341
1342    my $ltrs = '\w';
1343    my $gunk = '/#~:.?+=&%@!\-';
1344    my $punc = '.:!?\-;';
1345    my $any  = "${ltrs}${gunk}${punc}";
1346
1347    $rest =~ s{
1348	\b			# start at word boundary
1349	(			# begin $1  {
1350	    $urls :		# need resource and a colon
1351	    (?!:)		# Ignore File::, among others.
1352	    [$any] +?		# followed by one or more of any valid
1353				#   character, but be conservative and
1354				#   take only what you need to....
1355	)			# end   $1  }
1356	(?=
1357	    &quot; &gt;		# maybe pre-quoted '<a href="...">'
1358	|			# or:
1359	    [$punc]*		# 0 or more punctuation
1360	    (?:			#   followed
1361		[^$any]		#   by a non-url char
1362	    |			#   or
1363		$		#   end of the string
1364	    )			#
1365	|			# or else
1366	    $			#   then end of the string
1367        )
1368      }{<a href="$1">$1</a>}igox;
1369
1370    # text should be as it is (verbatim)
1371    $$text = $rest;
1372}
1373
1374
1375#
1376# pure text processing
1377#
1378# pure_text/inIS_text: differ with respect to automatic C<> recognition.
1379# we don't want this to happen within IS
1380#
1381sub pure_text($){
1382    my $text = shift();
1383    process_puretext( $text, \$PTQuote, 1 );
1384}
1385
1386sub inIS_text($){
1387    my $text = shift();
1388    process_puretext( $text, \$PTQuote, 0 );
1389}
1390
1391#
1392# process_puretext - process pure text (without pod-escapes) converting
1393#  double-quotes and handling implicit C<> links.
1394#
1395sub process_puretext {
1396    my($text, $quote, $notinIS) = @_;
1397
1398    ## Guessing at func() or [$@%&]*var references in plain text is destined
1399    ## to produce some strange looking ref's. uncomment to disable:
1400    ## $notinIS = 0;
1401
1402    my(@words, $lead, $trail);
1403
1404    # convert double-quotes to single-quotes
1405    if( $$quote && $text =~ s/"/''/s ){
1406        $$quote = 0;
1407    }
1408    while ($text =~ s/"([^"]*)"/``$1''/sg) {};
1409    $$quote = 1 if $text =~ s/"/``/s;
1410
1411    # keep track of leading and trailing white-space
1412    $lead  = ($text =~ s/\A(\s+)//s ? $1 : "");
1413    $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
1414
1415    # split at space/non-space boundaries
1416    @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
1417
1418    # process each word individually
1419    foreach my $word (@words) {
1420	# skip space runs
1421 	next if $word =~ /^\s*$/;
1422	# see if we can infer a link
1423	if( $notinIS && $word =~ /^(\w+)\((.*)\)$/ ) {
1424	    # has parenthesis so should have been a C<> ref
1425            ## try for a pagename (perlXXX(1))?
1426            my( $func, $args ) = ( $1, $2 );
1427            if( $args =~ /^\d+$/ ){
1428                my $url = page_sect( $word, '' );
1429                if( defined $url ){
1430                    $word = "<a href=\"$url\">the $word manpage</a>";
1431                    next;
1432                }
1433            }
1434            ## try function name for a link, append tt'ed argument list
1435            $word = emit_C( $func, '', "($args)");
1436
1437#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1438##      } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1439##	    # perl variables, should be a C<> ref
1440##	    $word = emit_C( $word );
1441
1442	} elsif ($word =~ m,^\w+://\w,) {
1443	    # looks like a URL
1444            # Don't relativize it: leave it as the author intended
1445	    $word = qq(<a href="$word">$word</a>);
1446	} elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
1447	    # looks like an e-mail address
1448	    my ($w1, $w2, $w3) = ("", $word, "");
1449	    ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1450	    ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1451	    $word = qq($w1<a href="mailto:$w2">$w2</a>$w3);
1452	} else {
1453	    $word = html_escape($word) if $word =~ /["&<>]/;
1454	}
1455    }
1456
1457    # put everything back together
1458    return $lead . join( '', @words ) . $trail;
1459}
1460
1461
1462#
1463# process_text - handles plaintext that appears in the input pod file.
1464# there may be pod commands embedded within the text so those must be
1465# converted to html commands.
1466#
1467
1468sub process_text1($$;$$);
1469sub pattern ($) { $_[0] ? '[^\S\n]+'.('>' x ($_[0] + 1)) : '>' }
1470sub closing ($) { local($_) = shift; (defined && s/\s+$//) ? length : 0 }
1471
1472sub process_text {
1473    return if $Ignore;
1474    my( $tref ) = @_;
1475    my $res = process_text1( 0, $tref );
1476    $$tref = $res;
1477}
1478
1479sub process_text1($$;$$){
1480    my( $lev, $rstr, $func, $closing ) = @_;
1481    my $res = '';
1482
1483    unless (defined $func) {
1484	$func = '';
1485	$lev++;
1486    }
1487
1488    if( $func eq 'B' ){
1489	# B<text> - boldface
1490	$res = '<strong>' . process_text1( $lev, $rstr ) . '</strong>';
1491
1492    } elsif( $func eq 'C' ){
1493	# C<code> - can be a ref or <code></code>
1494	# need to extract text
1495	my $par = go_ahead( $rstr, 'C', $closing );
1496
1497	## clean-up of the link target
1498        my $text = depod( $par );
1499
1500	### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
1501        ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
1502
1503	$res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1504
1505    } elsif( $func eq 'E' ){
1506	# E<x> - convert to character
1507	$$rstr =~ s/^([^>]*)>//;
1508	my $escape = $1;
1509	$escape =~ s/^(\d+|X[\dA-F]+)$/#$1/i;
1510	$res = "&$escape;";
1511
1512    } elsif( $func eq 'F' ){
1513	# F<filename> - italizice
1514	$res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
1515
1516    } elsif( $func eq 'I' ){
1517	# I<text> - italizice
1518	$res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
1519
1520    } elsif( $func eq 'L' ){
1521	# L<link> - link
1522	## L<text|cross-ref> => produce text, use cross-ref for linking
1523	## L<cross-ref> => make text from cross-ref
1524	## need to extract text
1525	my $par = go_ahead( $rstr, 'L', $closing );
1526
1527        # some L<>'s that shouldn't be:
1528	# a) full-blown URL's are emitted as-is
1529        if( $par =~ m{^\w+://}s ){
1530	    return make_URL_href( $par );
1531	}
1532        # b) C<...> is stripped and treated as C<>
1533        if( $par =~ /^C<(.*)>$/ ){
1534	    my $text = depod( $1 );
1535 	    return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1536	}
1537
1538	# analyze the contents
1539	$par =~ s/\n/ /g;   # undo word-wrapped tags
1540        my $opar = $par;
1541	my $linktext;
1542	if( $par =~ s{^([^|]+)\|}{} ){
1543	    $linktext = $1;
1544	}
1545
1546	# make sure sections start with a /
1547	$par =~ s{^"}{/"};
1548
1549	my( $page, $section, $ident );
1550
1551	# check for link patterns
1552	if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){     # name/ident
1553            # we've got a name/ident (no quotes)
1554            ( $page, $ident ) = ( $1, $2 );
1555            ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1556
1557	} elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1558            # even though this should be a "section", we go for ident first
1559	    ( $page, $ident ) = ( $1, $2 );
1560            ### print STDERR "--> L<$par> to page $page, section $section\n";
1561
1562	} elsif( $par =~ /\s/ ){  # this must be a section with missing quotes
1563	    ( $page, $section ) = ( '', $par );
1564            ### print STDERR "--> L<$par> to void page, section $section\n";
1565
1566        } else {
1567	    ( $page, $section ) = ( $par, '' );
1568            ### print STDERR "--> L<$par> to page $par, void section\n";
1569	}
1570
1571        # now, either $section or $ident is defined. the convoluted logic
1572        # below tries to resolve L<> according to what the user specified.
1573        # failing this, we try to find the next best thing...
1574        my( $url, $ltext, $fid );
1575
1576        RESOLVE: {
1577            if( defined $ident ){
1578                ## try to resolve $ident as an item
1579	        ( $url, $fid ) = coderef( $page, $ident );
1580                if( $url ){
1581                    if( ! defined( $linktext ) ){
1582                        $linktext = $ident;
1583                        $linktext .= " in " if $ident && $page;
1584                        $linktext .= "the $page manpage" if $page;
1585                    }
1586                    ###  print STDERR "got coderef url=$url\n";
1587                    last RESOLVE;
1588                }
1589                ## no luck: go for a section (auto-quoting!)
1590                $section = $ident;
1591            }
1592            ## now go for a section
1593            my $htmlsection = htmlify( $section );
1594 	    $url = page_sect( $page, $htmlsection );
1595            if( $url ){
1596                if( ! defined( $linktext ) ){
1597                    $linktext = $section;
1598                    $linktext .= " in " if $section && $page;
1599                    $linktext .= "the $page manpage" if $page;
1600                }
1601                ### print STDERR "got page/section url=$url\n";
1602                last RESOLVE;
1603            }
1604            ## no luck: go for an ident
1605            if( $section ){
1606                $ident = $section;
1607            } else {
1608                $ident = $page;
1609                $page  = undef();
1610            }
1611            ( $url, $fid ) = coderef( $page, $ident );
1612            if( $url ){
1613                if( ! defined( $linktext ) ){
1614                    $linktext = $ident;
1615                    $linktext .= " in " if $ident && $page;
1616                    $linktext .= "the $page manpage" if $page;
1617                }
1618                ### print STDERR "got section=>coderef url=$url\n";
1619                last RESOLVE;
1620            }
1621
1622            # warning; show some text.
1623            $linktext = $opar unless defined $linktext;
1624            warn "$0: $Podfile: cannot resolve L<$opar> in paragraph $Paragraph.\n";
1625        }
1626
1627        # now we have a URL or just plain code
1628        $$rstr = $linktext . '>' . $$rstr;
1629        if( defined( $url ) ){
1630            $res = "<a href=\"$url\">" . process_text1( $lev, $rstr ) . '</a>';
1631        } else {
1632	    $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
1633        }
1634
1635    } elsif( $func eq 'S' ){
1636	# S<text> - non-breaking spaces
1637	$res = process_text1( $lev, $rstr );
1638	$res =~ s/ /&nbsp;/g;
1639
1640    } elsif( $func eq 'X' ){
1641	# X<> - ignore
1642	$$rstr =~ s/^[^>]*>//;
1643
1644    } elsif( $func eq 'Z' ){
1645	# Z<> - empty
1646	warn "$0: $Podfile: invalid X<> in paragraph $Paragraph.\n"
1647	    unless $$rstr =~ s/^>//;
1648
1649    } else {
1650        my $term = pattern $closing;
1651	while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
1652	    # all others: either recurse into new function or
1653	    # terminate at closing angle bracket(s)
1654	    my $pt = $1;
1655            $pt .= $2 if !$3 &&  $lev == 1;
1656	    $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
1657	    return $res if !$3 && $lev > 1;
1658            if( $3 ){
1659		$res .= process_text1( $lev, $rstr, $3, closing $4 );
1660 	    }
1661	}
1662	if( $lev == 1 ){
1663	    $res .= pure_text( $$rstr );
1664	} else {
1665	    warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph.\n";
1666	}
1667    }
1668    return $res;
1669}
1670
1671#
1672# go_ahead: extract text of an IS (can be nested)
1673#
1674sub go_ahead($$$){
1675    my( $rstr, $func, $closing ) = @_;
1676    my $res = '';
1677    my @closing = ($closing);
1678    while( $$rstr =~
1679      s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|@{[pattern $closing[0]]})//s ){
1680	$res .= $1;
1681	unless( $3 ){
1682	    shift @closing;
1683	    return $res unless @closing;
1684	} else {
1685	    unshift @closing, closing $4;
1686	}
1687	$res .= $2;
1688    }
1689    warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph.\n";
1690    return $res;
1691}
1692
1693#
1694# emit_C - output result of C<text>
1695#    $text is the depod-ed text
1696#
1697sub emit_C($;$$){
1698    my( $text, $nocode, $args ) = @_;
1699    $args = '' unless defined $args;
1700    my $res;
1701    my( $url, $fid ) = coderef( undef(), $text );
1702
1703    # need HTML-safe text
1704    my $linktext = html_escape( "$text$args" );
1705
1706    if( defined( $url ) &&
1707        (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
1708	$res = "<a href=\"$url\"><code>$linktext</code></a>";
1709    } elsif( 0 && $nocode ){
1710	$res = $linktext;
1711    } else {
1712	$res = "<code>$linktext</code>";
1713    }
1714    return $res;
1715}
1716
1717#
1718# html_escape: make text safe for HTML
1719#
1720sub html_escape {
1721    my $rest = $_[0];
1722    $rest   =~ s/&/&amp;/g;
1723    $rest   =~ s/</&lt;/g;
1724    $rest   =~ s/>/&gt;/g;
1725    $rest   =~ s/"/&quot;/g;
1726    # &apos; is only in XHTML, not HTML4.  Be conservative
1727    #$rest   =~ s/'/&apos;/g;
1728    return $rest;
1729}
1730
1731
1732#
1733# dosify - convert filenames to 8.3
1734#
1735sub dosify {
1736    my($str) = @_;
1737    return lc($str) if $^O eq 'VMS';     # VMS just needs casing
1738    if ($Is83) {
1739        $str = lc $str;
1740        $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1741        $str =~ s/(\w+)/substr ($1,0,8)/ge;
1742    }
1743    return $str;
1744}
1745
1746#
1747# page_sect - make a URL from the text of a L<>
1748#
1749sub page_sect($$) {
1750    my( $page, $section ) = @_;
1751    my( $linktext, $page83, $link);	# work strings
1752
1753    # check if we know that this is a section in this page
1754    if (!defined $Pages{$page} && defined $Sections{$page}) {
1755	$section = $page;
1756	$page = "";
1757        ### print STDERR "reset page='', section=$section\n";
1758    }
1759
1760    $page83=dosify($page);
1761    $page=$page83 if (defined $Pages{$page83});
1762    if ($page eq "") {
1763        $link = "#" . anchorify( $section );
1764    } elsif ( $page =~ /::/ ) {
1765	$page =~ s,::,/,g;
1766	# Search page cache for an entry keyed under the html page name,
1767	# then look to see what directory that page might be in.  NOTE:
1768	# this will only find one page. A better solution might be to produce
1769	# an intermediate page that is an index to all such pages.
1770	my $page_name = $page ;
1771	$page_name =~ s,^.*/,,s ;
1772	if ( defined( $Pages{ $page_name } ) &&
1773	     $Pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1774	   ) {
1775	    $page = $1 ;
1776	}
1777	else {
1778	    # NOTE: This branch assumes that all A::B pages are located in
1779	    # $Htmlroot/A/B.html . This is often incorrect, since they are
1780	    # often in $Htmlroot/lib/A/B.html or such like. Perhaps we could
1781	    # analyze the contents of %Pages and figure out where any
1782	    # cousins of A::B are, then assume that.  So, if A::B isn't found,
1783	    # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1784	    # lib/A/B.pm. This is also limited, but it's an improvement.
1785	    # Maybe a hints file so that the links point to the correct places
1786	    # nonetheless?
1787
1788	}
1789	$link = "$Htmlroot/$page.html";
1790	$link .= "#" . anchorify( $section ) if ($section);
1791    } elsif (!defined $Pages{$page}) {
1792	$link = "";
1793    } else {
1794	$section = anchorify( $section ) if $section ne "";
1795        ### print STDERR "...section=$section\n";
1796
1797	# if there is a directory by the name of the page, then assume that an
1798	# appropriate section will exist in the subdirectory
1799#	if ($section ne "" && $Pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1800	if ($section ne "" && $Pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
1801	    $link = "$Htmlroot/$1/$section.html";
1802            ### print STDERR "...link=$link\n";
1803
1804	# since there is no directory by the name of the page, the section will
1805	# have to exist within a .html of the same name.  thus, make sure there
1806	# is a .pod or .pm that might become that .html
1807	} else {
1808	    $section = "#$section" if $section;
1809            ### print STDERR "...section=$section\n";
1810
1811	    # check if there is a .pod with the page name
1812	    if ($Pages{$page} =~ /([^:]*)\.pod:/) {
1813		$link = "$Htmlroot/$1.html$section";
1814	    } elsif ($Pages{$page} =~ /([^:]*)\.pm:/) {
1815		$link = "$Htmlroot/$1.html$section";
1816	    } else {
1817		$link = "";
1818	    }
1819	}
1820    }
1821
1822    if ($link) {
1823	# Here, we take advantage of the knowledge that $Htmlfileurl ne ''
1824	# implies $Htmlroot eq ''. This means that the link in question
1825	# needs a prefix of $Htmldir if it begins with '/'. The test for
1826	# the initial '/' is done to avoid '#'-only links, and to allow
1827	# for other kinds of links, like file:, ftp:, etc.
1828        my $url ;
1829        if (  $Htmlfileurl ne '' ) {
1830            $link = "$Htmldir$link" if $link =~ m{^/}s;
1831            $url = relativize_url( $link, $Htmlfileurl );
1832# print( "  b: [$link,$Htmlfileurl,$url]\n" );
1833	}
1834	else {
1835            $url = $link ;
1836	}
1837	return $url;
1838
1839    } else {
1840	return undef();
1841    }
1842}
1843
1844#
1845# relativize_url - convert an absolute URL to one relative to a base URL.
1846# Assumes both end in a filename.
1847#
1848sub relativize_url {
1849    my ($dest,$source) = @_ ;
1850
1851    my ($dest_volume,$dest_directory,$dest_file) =
1852        File::Spec::Unix->splitpath( $dest ) ;
1853    $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1854
1855    my ($source_volume,$source_directory,$source_file) =
1856        File::Spec::Unix->splitpath( $source ) ;
1857    $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1858
1859    my $rel_path = '' ;
1860    if ( $dest ne '' ) {
1861       $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1862    }
1863
1864    if ( $rel_path ne ''                &&
1865         substr( $rel_path, -1 ) ne '/' &&
1866         substr( $dest_file, 0, 1 ) ne '#'
1867        ) {
1868        $rel_path .= "/$dest_file" ;
1869    }
1870    else {
1871        $rel_path .= "$dest_file" ;
1872    }
1873
1874    return $rel_path ;
1875}
1876
1877
1878#
1879# coderef - make URL from the text of a C<>
1880#
1881sub coderef($$){
1882    my( $page, $item ) = @_;
1883    my( $url );
1884
1885    my $fid = fragment_id( $item );
1886    if( defined( $page ) ){
1887	# we have been given a $page...
1888	$page =~ s{::}{/}g;
1889
1890	# Do we take it? Item could be a section!
1891	my $base = $Items{$fid} || "";
1892	$base =~ s{[^/]*/}{};
1893	if( $base ne "$page.html" ){
1894            ###   print STDERR "coderef( $page, $item ): items{$fid} = $Items{$fid} = $base => discard page!\n";
1895	    $page = undef();
1896	}
1897
1898    } else {
1899        # no page - local items precede cached items
1900	if( defined( $fid ) ){
1901	    if(  exists $Local_Items{$fid} ){
1902		$page = $Local_Items{$fid};
1903	    } else {
1904		$page = $Items{$fid};
1905	    }
1906	}
1907    }
1908
1909    # if there was a pod file that we found earlier with an appropriate
1910    # =item directive, then create a link to that page.
1911    if( defined $page ){
1912	if( $page ){
1913            if( exists $Pages{$page} and $Pages{$page} =~ /([^:.]*)\.[^:]*:/){
1914		$page = $1 . '.html';
1915	    }
1916	    my $link = "$Htmlroot/$page#item_" . anchorify($fid);
1917
1918	    # Here, we take advantage of the knowledge that $Htmlfileurl
1919	    # ne '' implies $Htmlroot eq ''.
1920	    if (  $Htmlfileurl ne '' ) {
1921		$link = "$Htmldir$link" ;
1922		$url = relativize_url( $link, $Htmlfileurl ) ;
1923	    } else {
1924		$url = $link ;
1925	    }
1926	} else {
1927	    $url = "#item_" . anchorify($fid);
1928	}
1929
1930	confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
1931    }
1932    return( $url, $fid );
1933}
1934
1935
1936
1937#
1938# Adapted from Nick Ing-Simmons' PodToHtml package.
1939sub relative_url {
1940    my $source_file = shift ;
1941    my $destination_file = shift;
1942
1943    my $source = URI::file->new_abs($source_file);
1944    my $uo = URI::file->new($destination_file,$source)->abs;
1945    return $uo->rel->as_string;
1946}
1947
1948
1949#
1950# finish_list - finish off any pending HTML lists.  this should be called
1951# after the entire pod file has been read and converted.
1952#
1953sub finish_list {
1954    while ($Listlevel > 0) {
1955	print HTML "</dl>\n";
1956	$Listlevel--;
1957    }
1958}
1959
1960#
1961# htmlify - converts a pod section specification to a suitable section
1962# specification for HTML. Note that we keep spaces and special characters
1963# except ", ? (Netscape problem) and the hyphen (writer's problem...).
1964#
1965sub htmlify {
1966    my( $heading) = @_;
1967    $heading =~ s/(\s+)/ /g;
1968    $heading =~ s/\s+\Z//;
1969    $heading =~ s/\A\s+//;
1970    # The hyphen is a disgrace to the English language.
1971    $heading =~ s/[-"?]//g;
1972    $heading = lc( $heading );
1973    return $heading;
1974}
1975
1976#
1977# similar to htmlify, but turns non-alphanumerics into underscores
1978#
1979sub anchorify {
1980    my ($anchor) = @_;
1981    $anchor = htmlify($anchor);
1982    $anchor =~ s/\W/_/g;
1983    return $anchor;
1984}
1985
1986#
1987# depod - convert text by eliminating all interior sequences
1988# Note: can be called with copy or modify semantics
1989#
1990my %E2c;
1991$E2c{lt}     = '<';
1992$E2c{gt}     = '>';
1993$E2c{sol}    = '/';
1994$E2c{verbar} = '|';
1995$E2c{amp}    = '&'; # in Tk's pods
1996
1997sub depod1($;$$);
1998
1999sub depod($){
2000    my $string;
2001    if( ref( $_[0] ) ){
2002	$string =  ${$_[0]};
2003        ${$_[0]} = depod1( \$string );
2004    } else {
2005	$string =  $_[0];
2006        depod1( \$string );
2007    }
2008}
2009
2010sub depod1($;$$){
2011  my( $rstr, $func, $closing ) = @_;
2012  my $res = '';
2013  return $res unless defined $$rstr;
2014  if( ! defined( $func ) ){
2015      # skip to next begin of an interior sequence
2016      while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?// ){
2017         # recurse into its text
2018	  $res .= $1 . depod1( $rstr, $2, closing $3);
2019      }
2020      $res .= $$rstr;
2021  } elsif( $func eq 'E' ){
2022      # E<x> - convert to character
2023      $$rstr =~ s/^([^>]*)>//;
2024      $res .= $E2c{$1} || "";
2025  } elsif( $func eq 'X' ){
2026      # X<> - ignore
2027      $$rstr =~ s/^[^>]*>//;
2028  } elsif( $func eq 'Z' ){
2029      # Z<> - empty
2030      $$rstr =~ s/^>//;
2031  } else {
2032      # all others: either recurse into new function or
2033      # terminate at closing angle bracket
2034      my $term = pattern $closing;
2035      while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)// ){
2036	  $res .= $1;
2037	  last unless $3;
2038          $res .= depod1( $rstr, $3, closing $4 );
2039      }
2040      ## If we're here and $2 ne '>': undelimited interior sequence.
2041      ## Ignored, as this is called without proper indication of where we are.
2042      ## Rely on process_text to produce diagnostics.
2043  }
2044  return $res;
2045}
2046
2047#
2048# fragment_id - construct a fragment identifier from:
2049#   a) =item text
2050#   b) contents of C<...>
2051#
2052my @HC;
2053sub fragment_id {
2054    my $text = shift();
2055    $text =~ s/\s+\Z//s;
2056    if( $text ){
2057	# a method or function?
2058	return $1 if $text =~ /(\w+)\s*\(/;
2059	return $1 if $text =~ /->\s*(\w+)\s*\(?/;
2060
2061	# a variable name?
2062	return $1 if $text =~ /^([$@%*]\S+)/;
2063
2064	# some pattern matching operator?
2065	return $1 if $text =~ m|^(\w+/).*/\w*$|;
2066
2067	# fancy stuff... like "do { }"
2068	return $1 if $text =~ m|^(\w+)\s*{.*}$|;
2069
2070	# honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
2071	# and some funnies with ... Module ...
2072	return $1 if $text =~ m{^([a-z\d]+)(\s+[A-Z\d,/& ]+)?$};
2073	return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
2074
2075	# text? normalize!
2076	$text =~ s/\s+/_/sg;
2077	$text =~ s{(\W)}{
2078         defined( $HC[ord($1)] ) ? $HC[ord($1)]
2079                 : ( $HC[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe;
2080        $text = substr( $text, 0, 50 );
2081    } else {
2082	return undef();
2083    }
2084}
2085
2086#
2087# make_URL_href - generate HTML href from URL
2088# Special treatment for CGI queries.
2089#
2090sub make_URL_href($){
2091    my( $url ) = @_;
2092    if( $url !~
2093        s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<a href="$1$2">$1</a>}i ){
2094        $url = "<a href=\"$url\">$url</a>";
2095    }
2096    return $url;
2097}
2098
20991;
2100