xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/lib/Pod/Checker.pm (revision 0:68f95e015346)
1#############################################################################
2# Pod/Checker.pm -- check pod documents for syntax errors
3#
4# Copyright (C) 1994-2000 by Bradford Appleton. All rights reserved.
5# This file is part of "PodParser". PodParser is free software;
6# you can redistribute it and/or modify it under the same terms
7# as Perl itself.
8#############################################################################
9
10package Pod::Checker;
11
12use vars qw($VERSION);
13$VERSION = 1.41;  ## Current version of this package
14require  5.005;    ## requires this Perl version or later
15
16use Pod::ParseUtils; ## for hyperlinks and lists
17
18=head1 NAME
19
20Pod::Checker, podchecker() - check pod documents for syntax errors
21
22=head1 SYNOPSIS
23
24  use Pod::Checker;
25
26  $syntax_okay = podchecker($filepath, $outputpath, %options);
27
28  my $checker = new Pod::Checker %options;
29  $checker->parse_from_file($filepath, \*STDERR);
30
31=head1 OPTIONS/ARGUMENTS
32
33C<$filepath> is the input POD to read and C<$outputpath> is
34where to write POD syntax error messages. Either argument may be a scalar
35indicating a file-path, or else a reference to an open filehandle.
36If unspecified, the input-file it defaults to C<\*STDIN>, and
37the output-file defaults to C<\*STDERR>.
38
39=head2 podchecker()
40
41This function can take a hash of options:
42
43=over 4
44
45=item B<-warnings> =E<gt> I<val>
46
47Turn warnings on/off. I<val> is usually 1 for on, but higher values
48trigger additional warnings. See L<"Warnings">.
49
50=back
51
52=head1 DESCRIPTION
53
54B<podchecker> will perform syntax checking of Perl5 POD format documentation.
55
56Curious/ambitious users are welcome to propose additional features they wish
57to see in B<Pod::Checker> and B<podchecker> and verify that the checks are
58consistent with L<perlpod>.
59
60The following checks are currently preformed:
61
62=over 4
63
64=item *
65
66Unknown '=xxxx' commands, unknown 'XE<lt>...E<gt>' interior-sequences,
67and unterminated interior sequences.
68
69=item *
70
71Check for proper balancing of C<=begin> and C<=end>. The contents of such
72a block are generally ignored, i.e. no syntax checks are performed.
73
74=item *
75
76Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>.
77
78=item *
79
80Check for same nested interior-sequences (e.g.
81C<LE<lt>...LE<lt>...E<gt>...E<gt>>).
82
83=item *
84
85Check for malformed or nonexisting entities C<EE<lt>...E<gt>>.
86
87=item *
88
89Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod>
90for details.
91
92=item *
93
94Check for unresolved document-internal links. This check may also reveal
95misspelled links that seem to be internal links but should be links
96to something else.
97
98=back
99
100=head1 DIAGNOSTICS
101
102=head2 Errors
103
104=over 4
105
106=item * empty =headn
107
108A heading (C<=head1> or C<=head2>) without any text? That ain't no
109heading!
110
111=item * =over on line I<N> without closing =back
112
113The C<=over> command does not have a corresponding C<=back> before the
114next heading (C<=head1> or C<=head2>) or the end of the file.
115
116=item * =item without previous =over
117
118=item * =back without previous =over
119
120An C<=item> or C<=back> command has been found outside a
121C<=over>/C<=back> block.
122
123=item * No argument for =begin
124
125A C<=begin> command was found that is not followed by the formatter
126specification.
127
128=item * =end without =begin
129
130A standalone C<=end> command was found.
131
132=item * Nested =begin's
133
134There were at least two consecutive C<=begin> commands without
135the corresponding C<=end>. Only one C<=begin> may be active at
136a time.
137
138=item * =for without formatter specification
139
140There is no specification of the formatter after the C<=for> command.
141
142=item * unresolved internal link I<NAME>
143
144The given link to I<NAME> does not have a matching node in the current
145POD. This also happend when a single word node name is not enclosed in
146C<"">.
147
148=item * Unknown command "I<CMD>"
149
150An invalid POD command has been found. Valid are C<=head1>, C<=head2>,
151C<=head3>, C<=head4>, C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>,
152C<=for>, C<=pod>, C<=cut>
153
154=item * Unknown interior-sequence "I<SEQ>"
155
156An invalid markup command has been encountered. Valid are:
157C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>,
158C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>,
159C<ZE<lt>E<gt>>
160
161=item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt>
162
163Two nested identical markup commands have been found. Generally this
164does not make sense.
165
166=item * garbled entity I<STRING>
167
168The I<STRING> found cannot be interpreted as a character entity.
169
170=item * Entity number out of range
171
172An entity specified by number (dec, hex, oct) is out of range (1-255).
173
174=item * malformed link LE<lt>E<gt>
175
176The link found cannot be parsed because it does not conform to the
177syntax described in L<perlpod>.
178
179=item * nonempty ZE<lt>E<gt>
180
181The C<ZE<lt>E<gt>> sequence is supposed to be empty.
182
183=item * empty XE<lt>E<gt>
184
185The index entry specified contains nothing but whitespace.
186
187=item * Spurious text after =pod / =cut
188
189The commands C<=pod> and C<=cut> do not take any arguments.
190
191=item * Spurious character(s) after =back
192
193The C<=back> command does not take any arguments.
194
195=back
196
197=head2 Warnings
198
199These may not necessarily cause trouble, but indicate mediocre style.
200
201=over 4
202
203=item * multiple occurrence of link target I<name>
204
205The POD file has some C<=item> and/or C<=head> commands that have
206the same text. Potential hyperlinks to such a text cannot be unique then.
207
208=item * line containing nothing but whitespace in paragraph
209
210There is some whitespace on a seemingly empty line. POD is very sensitive
211to such things, so this is flagged. B<vi> users switch on the B<list>
212option to avoid this problem.
213
214=begin _disabled_
215
216=item * file does not start with =head
217
218The file starts with a different POD directive than head.
219This is most probably something you do not want.
220
221=end _disabled_
222
223=item * previous =item has no contents
224
225There is a list C<=item> right above the flagged line that has no
226text contents. You probably want to delete empty items.
227
228=item * preceding non-item paragraph(s)
229
230A list introduced by C<=over> starts with a text or verbatim paragraph,
231but continues with C<=item>s. Move the non-item paragraph out of the
232C<=over>/C<=back> block.
233
234=item * =item type mismatch (I<one> vs. I<two>)
235
236A list started with e.g. a bulletted C<=item> and continued with a
237numbered one. This is obviously inconsistent. For most translators the
238type of the I<first> C<=item> determines the type of the list.
239
240=item * I<N> unescaped C<E<lt>E<gt>> in paragraph
241
242Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>>
243can potentially cause errors as they could be misinterpreted as
244markup commands. This is only printed when the -warnings level is
245greater than 1.
246
247=item * Unknown entity
248
249A character entity was found that does not belong to the standard
250ISO set or the POD specials C<verbar> and C<sol>.
251
252=item * No items in =over
253
254The list opened with C<=over> does not contain any items.
255
256=item * No argument for =item
257
258C<=item> without any parameters is deprecated. It should either be followed
259by C<*> to indicate an unordered list, by a number (optionally followed
260by a dot) to indicate an ordered (numbered) list or simple text for a
261definition list.
262
263=item * empty section in previous paragraph
264
265The previous section (introduced by a C<=head> command) does not contain
266any text. This usually indicates that something is missing. Note: A
267C<=head1> followed immediately by C<=head2> does not trigger this warning.
268
269=item * Verbatim paragraph in NAME section
270
271The NAME section (C<=head1 NAME>) should consist of a single paragraph
272with the script/module name, followed by a dash `-' and a very short
273description of what the thing is good for.
274
275=item * =headI<n> without preceding higher level
276
277For example if there is a C<=head2> in the POD file prior to a
278C<=head1>.
279
280=back
281
282=head2 Hyperlinks
283
284There are some warnings wrt. malformed hyperlinks.
285
286=over 4
287
288=item * ignoring leading/trailing whitespace in link
289
290There is whitespace at the beginning or the end of the contents of
291LE<lt>...E<gt>.
292
293=item * (section) in '$page' deprecated
294
295There is a section detected in the page name of LE<lt>...E<gt>, e.g.
296C<LE<lt>passwd(2)E<gt>>. POD hyperlinks may point to POD documents only.
297Please write C<CE<lt>passwd(2)E<gt>> instead. Some formatters are able
298to expand this to appropriate code. For links to (builtin) functions,
299please say C<LE<lt>perlfunc/mkdirE<gt>>, without ().
300
301=item * alternative text/node '%s' contains non-escaped | or /
302
303The characters C<|> and C</> are special in the LE<lt>...E<gt> context.
304Although the hyperlink parser does its best to determine which "/" is
305text and which is a delimiter in case of doubt, one ought to escape
306these literal characters like this:
307
308  /     E<sol>
309  |     E<verbar>
310
311=back
312
313=head1 RETURN VALUE
314
315B<podchecker> returns the number of POD syntax errors found or -1 if
316there were no POD commands at all found in the file.
317
318=head1 EXAMPLES
319
320See L</SYNOPSIS>
321
322=head1 INTERFACE
323
324While checking, this module collects document properties, e.g. the nodes
325for hyperlinks (C<=headX>, C<=item>) and index entries (C<XE<lt>E<gt>>).
326POD translators can use this feature to syntax-check and get the nodes in
327a first pass before actually starting to convert. This is expensive in terms
328of execution time, but allows for very robust conversions.
329
330Since PodParser-1.24 the B<Pod::Checker> module uses only the B<poderror>
331method to print errors and warnings. The summary output (e.g.
332"Pod syntax OK") has been dropped from the module and has been included in
333B<podchecker> (the script). This allows users of B<Pod::Checker> to
334control completely the output behaviour. Users of B<podchecker> (the script)
335get the well-known behaviour.
336
337=cut
338
339#############################################################################
340
341use strict;
342#use diagnostics;
343use Carp;
344use Exporter;
345use Pod::Parser;
346
347use vars qw(@ISA @EXPORT);
348@ISA = qw(Pod::Parser);
349@EXPORT = qw(&podchecker);
350
351use vars qw(%VALID_COMMANDS %VALID_SEQUENCES);
352
353my %VALID_COMMANDS = (
354    'pod'    =>  1,
355    'cut'    =>  1,
356    'head1'  =>  1,
357    'head2'  =>  1,
358    'head3'  =>  1,
359    'head4'  =>  1,
360    'over'   =>  1,
361    'back'   =>  1,
362    'item'   =>  1,
363    'for'    =>  1,
364    'begin'  =>  1,
365    'end'    =>  1,
366);
367
368my %VALID_SEQUENCES = (
369    'I'  =>  1,
370    'B'  =>  1,
371    'S'  =>  1,
372    'C'  =>  1,
373    'L'  =>  1,
374    'F'  =>  1,
375    'X'  =>  1,
376    'Z'  =>  1,
377    'E'  =>  1,
378);
379
380# stolen from HTML::Entities
381my %ENTITIES = (
382 # Some normal chars that have special meaning in SGML context
383 amp    => '&',  # ampersand
384'gt'    => '>',  # greater than
385'lt'    => '<',  # less than
386 quot   => '"',  # double quote
387
388 # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML
389 AElig	=> '�',  # capital AE diphthong (ligature)
390 Aacute	=> '�',  # capital A, acute accent
391 Acirc	=> '�',  # capital A, circumflex accent
392 Agrave	=> '�',  # capital A, grave accent
393 Aring	=> '�',  # capital A, ring
394 Atilde	=> '�',  # capital A, tilde
395 Auml	=> '�',  # capital A, dieresis or umlaut mark
396 Ccedil	=> '�',  # capital C, cedilla
397 ETH	=> '�',  # capital Eth, Icelandic
398 Eacute	=> '�',  # capital E, acute accent
399 Ecirc	=> '�',  # capital E, circumflex accent
400 Egrave	=> '�',  # capital E, grave accent
401 Euml	=> '�',  # capital E, dieresis or umlaut mark
402 Iacute	=> '�',  # capital I, acute accent
403 Icirc	=> '�',  # capital I, circumflex accent
404 Igrave	=> '�',  # capital I, grave accent
405 Iuml	=> '�',  # capital I, dieresis or umlaut mark
406 Ntilde	=> '�',  # capital N, tilde
407 Oacute	=> '�',  # capital O, acute accent
408 Ocirc	=> '�',  # capital O, circumflex accent
409 Ograve	=> '�',  # capital O, grave accent
410 Oslash	=> '�',  # capital O, slash
411 Otilde	=> '�',  # capital O, tilde
412 Ouml	=> '�',  # capital O, dieresis or umlaut mark
413 THORN	=> '�',  # capital THORN, Icelandic
414 Uacute	=> '�',  # capital U, acute accent
415 Ucirc	=> '�',  # capital U, circumflex accent
416 Ugrave	=> '�',  # capital U, grave accent
417 Uuml	=> '�',  # capital U, dieresis or umlaut mark
418 Yacute	=> '�',  # capital Y, acute accent
419 aacute	=> '�',  # small a, acute accent
420 acirc	=> '�',  # small a, circumflex accent
421 aelig	=> '�',  # small ae diphthong (ligature)
422 agrave	=> '�',  # small a, grave accent
423 aring	=> '�',  # small a, ring
424 atilde	=> '�',  # small a, tilde
425 auml	=> '�',  # small a, dieresis or umlaut mark
426 ccedil	=> '�',  # small c, cedilla
427 eacute	=> '�',  # small e, acute accent
428 ecirc	=> '�',  # small e, circumflex accent
429 egrave	=> '�',  # small e, grave accent
430 eth	=> '�',  # small eth, Icelandic
431 euml	=> '�',  # small e, dieresis or umlaut mark
432 iacute	=> '�',  # small i, acute accent
433 icirc	=> '�',  # small i, circumflex accent
434 igrave	=> '�',  # small i, grave accent
435 iuml	=> '�',  # small i, dieresis or umlaut mark
436 ntilde	=> '�',  # small n, tilde
437 oacute	=> '�',  # small o, acute accent
438 ocirc	=> '�',  # small o, circumflex accent
439 ograve	=> '�',  # small o, grave accent
440 oslash	=> '�',  # small o, slash
441 otilde	=> '�',  # small o, tilde
442 ouml	=> '�',  # small o, dieresis or umlaut mark
443 szlig	=> '�',  # small sharp s, German (sz ligature)
444 thorn	=> '�',  # small thorn, Icelandic
445 uacute	=> '�',  # small u, acute accent
446 ucirc	=> '�',  # small u, circumflex accent
447 ugrave	=> '�',  # small u, grave accent
448 uuml	=> '�',  # small u, dieresis or umlaut mark
449 yacute	=> '�',  # small y, acute accent
450 yuml	=> '�',  # small y, dieresis or umlaut mark
451
452 # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96)
453 copy   => '�',  # copyright sign
454 reg    => '�',  # registered sign
455 nbsp   => "\240", # non breaking space
456
457 # Additional ISO-8859/1 entities listed in rfc1866 (section 14)
458 iexcl  => '�',
459 cent   => '�',
460 pound  => '�',
461 curren => '�',
462 yen    => '�',
463 brvbar => '�',
464 sect   => '�',
465 uml    => '�',
466 ordf   => '�',
467 laquo  => '�',
468'not'   => '�',    # not is a keyword in perl
469 shy    => '�',
470 macr   => '�',
471 deg    => '�',
472 plusmn => '�',
473 sup1   => '�',
474 sup2   => '�',
475 sup3   => '�',
476 acute  => '�',
477 micro  => '�',
478 para   => '�',
479 middot => '�',
480 cedil  => '�',
481 ordm   => '�',
482 raquo  => '�',
483 frac14 => '�',
484 frac12 => '�',
485 frac34 => '�',
486 iquest => '�',
487'times' => '�',    # times is a keyword in perl
488 divide => '�',
489
490# some POD special entities
491 verbar => '|',
492 sol => '/'
493);
494
495##---------------------------------------------------------------------------
496
497##---------------------------------
498## Function definitions begin here
499##---------------------------------
500
501sub podchecker( $ ; $ % ) {
502    my ($infile, $outfile, %options) = @_;
503    local $_;
504
505    ## Set defaults
506    $infile  ||= \*STDIN;
507    $outfile ||= \*STDERR;
508
509    ## Now create a pod checker
510    my $checker = new Pod::Checker(%options);
511
512    ## Now check the pod document for errors
513    $checker->parse_from_file($infile, $outfile);
514
515    ## Return the number of errors found
516    return $checker->num_errors();
517}
518
519##---------------------------------------------------------------------------
520
521##-------------------------------
522## Method definitions begin here
523##-------------------------------
524
525##################################
526
527=over 4
528
529=item C<Pod::Checker-E<gt>new( %options )>
530
531Return a reference to a new Pod::Checker object that inherits from
532Pod::Parser and is used for calling the required methods later. The
533following options are recognized:
534
535C<-warnings =E<gt> num>
536  Print warnings if C<num> is true. The higher the value of C<num>,
537the more warnings are printed. Currently there are only levels 1 and 2.
538
539C<-quiet =E<gt> num>
540  If C<num> is true, do not print any errors/warnings. This is useful
541when Pod::Checker is used to munge POD code into plain text from within
542POD formatters.
543
544=cut
545
546## sub new {
547##     my $this = shift;
548##     my $class = ref($this) || $this;
549##     my %params = @_;
550##     my $self = {%params};
551##     bless $self, $class;
552##     $self->initialize();
553##     return $self;
554## }
555
556sub initialize {
557    my $self = shift;
558    ## Initialize number of errors, and setup an error function to
559    ## increment this number and then print to the designated output.
560    $self->{_NUM_ERRORS} = 0;
561    $self->{_NUM_WARNINGS} = 0;
562    $self->{-quiet} ||= 0;
563    # set the error handling subroutine
564    $self->errorsub($self->{-quiet} ? sub { 1; } : 'poderror');
565    $self->{_commands} = 0; # total number of POD commands encountered
566    $self->{_list_stack} = []; # stack for nested lists
567    $self->{_have_begin} = ''; # stores =begin
568    $self->{_links} = []; # stack for internal hyperlinks
569    $self->{_nodes} = []; # stack for =head/=item nodes
570    $self->{_index} = []; # text in X<>
571    # print warnings?
572    $self->{-warnings} = 1 unless(defined $self->{-warnings});
573    $self->{_current_head1} = ''; # the current =head1 block
574    $self->parseopts(-process_cut_cmd => 1, -warnings => $self->{-warnings});
575}
576
577##################################
578
579=item C<$checker-E<gt>poderror( @args )>
580
581=item C<$checker-E<gt>poderror( {%opts}, @args )>
582
583Internal method for printing errors and warnings. If no options are
584given, simply prints "@_". The following options are recognized and used
585to form the output:
586
587  -msg
588
589A message to print prior to C<@args>.
590
591  -line
592
593The line number the error occurred in.
594
595  -file
596
597The file (name) the error occurred in.
598
599  -severity
600
601The error level, should be 'WARNING' or 'ERROR'.
602
603=cut
604
605# Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args )
606sub poderror {
607    my $self = shift;
608    my %opts = (ref $_[0]) ? %{shift()} : ();
609
610    ## Retrieve options
611    chomp( my $msg  = ($opts{-msg} || "")."@_" );
612    my $line = (exists $opts{-line}) ? " at line $opts{-line}" : "";
613    my $file = (exists $opts{-file}) ? " in file $opts{-file}" : "";
614    unless (exists $opts{-severity}) {
615       ## See if can find severity in message prefix
616       $opts{-severity} = $1  if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// );
617    }
618    my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : "";
619
620    ## Increment error count and print message "
621    ++($self->{_NUM_ERRORS})
622        if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR'));
623    ++($self->{_NUM_WARNINGS})
624        if(!%opts || ($opts{-severity} && $opts{-severity} eq 'WARNING'));
625    my $out_fh = $self->output_handle() || \*STDERR;
626    print $out_fh ($severity, $msg, $line, $file, "\n")
627      if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING');
628}
629
630##################################
631
632=item C<$checker-E<gt>num_errors()>
633
634Set (if argument specified) and retrieve the number of errors found.
635
636=cut
637
638sub num_errors {
639   return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS};
640}
641
642##################################
643
644=item C<$checker-E<gt>num_warnings()>
645
646Set (if argument specified) and retrieve the number of warnings found.
647
648=cut
649
650sub num_warnings {
651   return (@_ > 1) ? ($_[0]->{_NUM_WARNINGS} = $_[1]) : $_[0]->{_NUM_WARNINGS};
652}
653
654##################################
655
656=item C<$checker-E<gt>name()>
657
658Set (if argument specified) and retrieve the canonical name of POD as
659found in the C<=head1 NAME> section.
660
661=cut
662
663sub name {
664    return (@_ > 1 && $_[1]) ?
665        ($_[0]->{-name} = $_[1]) : $_[0]->{-name};
666}
667
668##################################
669
670=item C<$checker-E<gt>node()>
671
672Add (if argument specified) and retrieve the nodes (as defined by C<=headX>
673and C<=item>) of the current POD. The nodes are returned in the order of
674their occurrence. They consist of plain text, each piece of whitespace is
675collapsed to a single blank.
676
677=cut
678
679sub node {
680    my ($self,$text) = @_;
681    if(defined $text) {
682        $text =~ s/\s+$//s; # strip trailing whitespace
683        $text =~ s/\s+/ /gs; # collapse whitespace
684        # add node, order important!
685        push(@{$self->{_nodes}}, $text);
686        # keep also a uniqueness counter
687        $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
688        return $text;
689    }
690    @{$self->{_nodes}};
691}
692
693##################################
694
695=item C<$checker-E<gt>idx()>
696
697Add (if argument specified) and retrieve the index entries (as defined by
698C<XE<lt>E<gt>>) of the current POD. They consist of plain text, each piece
699of whitespace is collapsed to a single blank.
700
701=cut
702
703# set/return index entries of current POD
704sub idx {
705    my ($self,$text) = @_;
706    if(defined $text) {
707        $text =~ s/\s+$//s; # strip trailing whitespace
708        $text =~ s/\s+/ /gs; # collapse whitespace
709        # add node, order important!
710        push(@{$self->{_index}}, $text);
711        # keep also a uniqueness counter
712        $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
713        return $text;
714    }
715    @{$self->{_index}};
716}
717
718##################################
719
720=item C<$checker-E<gt>hyperlink()>
721
722Add (if argument specified) and retrieve the hyperlinks (as defined by
723C<LE<lt>E<gt>>) of the current POD. They consist of a 2-item array: line
724number and C<Pod::Hyperlink> object.
725
726=back
727
728=cut
729
730# set/return hyperlinks of the current POD
731sub hyperlink {
732    my $self = shift;
733    if($_[0]) {
734        push(@{$self->{_links}}, $_[0]);
735        return $_[0];
736    }
737    @{$self->{_links}};
738}
739
740## overrides for Pod::Parser
741
742sub end_pod {
743    ## Do some final checks and
744    ## print the number of errors found
745    my $self   = shift;
746    my $infile = $self->input_file();
747    my $out_fh = $self->output_handle();
748
749    if(@{$self->{_list_stack}}) {
750        my $list;
751        while(($list = $self->_close_list('EOF',$infile)) &&
752          $list->indent() ne 'auto') {
753            $self->poderror({ -line => 'EOF', -file => $infile,
754                -severity => 'ERROR', -msg => "=over on line " .
755                $list->start() . " without closing =back" }); #"
756        }
757    }
758
759    # check validity of document internal hyperlinks
760    # first build the node names from the paragraph text
761    my %nodes;
762    foreach($self->node()) {
763        $nodes{$_} = 1;
764        if(/^(\S+)\s+\S/) {
765            # we have more than one word. Use the first as a node, too.
766            # This is used heavily in perlfunc.pod
767            $nodes{$1} ||= 2; # derived node
768        }
769    }
770    foreach($self->idx()) {
771        $nodes{$_} = 3; # index node
772    }
773    foreach($self->hyperlink()) {
774        my ($line,$link) = @$_;
775        # _TODO_ what if there is a link to the page itself by the name,
776        # e.g. in Tk::Pod : L<Tk::Pod/"DESCRIPTION">
777        if($link->node() && !$link->page() && $link->type() ne 'hyperlink') {
778            my $node = $self->_check_ptree($self->parse_text($link->node(),
779                $line), $line, $infile, 'L');
780            if($node && !$nodes{$node}) {
781                $self->poderror({ -line => $line || '', -file => $infile,
782                    -severity => 'ERROR',
783                    -msg => "unresolved internal link '$node'"});
784            }
785        }
786    }
787
788    # check the internal nodes for uniqueness. This pertains to
789    # =headX, =item and X<...>
790    foreach(grep($self->{_unique_nodes}->{$_} > 1,
791      keys %{$self->{_unique_nodes}})) {
792        $self->poderror({ -line => '-', -file => $infile,
793            -severity => 'WARNING',
794            -msg => "multiple occurrence of link target '$_'"});
795    }
796
797    # no POD found here
798    $self->num_errors(-1) if($self->{_commands} == 0);
799}
800
801# check a POD command directive
802sub command {
803    my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_;
804    my ($file, $line) = $pod_para->file_line;
805    ## Check the command syntax
806    my $arg; # this will hold the command argument
807    if (! $VALID_COMMANDS{$cmd}) {
808       $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR',
809                         -msg => "Unknown command '$cmd'" });
810    }
811    else { # found a valid command
812        $self->{_commands}++; # delete this line if below is enabled again
813
814        ##### following check disabled due to strong request
815        #if(!$self->{_commands}++ && $cmd !~ /^head/) {
816        #    $self->poderror({ -line => $line, -file => $file,
817        #         -severity => 'WARNING',
818        #         -msg => "file does not start with =head" });
819        #}
820
821        # check syntax of particular command
822        if($cmd eq 'over') {
823            # check for argument
824            $arg = $self->interpolate_and_check($paragraph, $line,$file);
825            my $indent = 4; # default
826            if($arg && $arg =~ /^\s*(\d+)\s*$/) {
827                $indent = $1;
828            }
829            # start a new list
830            $self->_open_list($indent,$line,$file);
831        }
832        elsif($cmd eq 'item') {
833            # are we in a list?
834            unless(@{$self->{_list_stack}}) {
835                $self->poderror({ -line => $line, -file => $file,
836                     -severity => 'ERROR',
837                     -msg => "=item without previous =over" });
838                # auto-open in case we encounter many more
839                $self->_open_list('auto',$line,$file);
840            }
841            my $list = $self->{_list_stack}->[0];
842            # check whether the previous item had some contents
843            if(defined $self->{_list_item_contents} &&
844              $self->{_list_item_contents} == 0) {
845                $self->poderror({ -line => $line, -file => $file,
846                     -severity => 'WARNING',
847                     -msg => "previous =item has no contents" });
848            }
849            if($list->{_has_par}) {
850                $self->poderror({ -line => $line, -file => $file,
851                     -severity => 'WARNING',
852                     -msg => "preceding non-item paragraph(s)" });
853                delete $list->{_has_par};
854            }
855            # check for argument
856            $arg = $self->interpolate_and_check($paragraph, $line, $file);
857            if($arg && $arg =~ /(\S+)/) {
858                $arg =~ s/[\s\n]+$//;
859                my $type;
860                if($arg =~ /^[*]\s*(\S*.*)/) {
861                  $type = 'bullet';
862                  $self->{_list_item_contents} = $1 ? 1 : 0;
863                  $arg = $1;
864                }
865                elsif($arg =~ /^\d+\.?\s*(\S*)/) {
866                  $type = 'number';
867                  $self->{_list_item_contents} = $1 ? 1 : 0;
868                  $arg = $1;
869                }
870                else {
871                  $type = 'definition';
872                  $self->{_list_item_contents} = 1;
873                }
874                my $first = $list->type();
875                if($first && $first ne $type) {
876                    $self->poderror({ -line => $line, -file => $file,
877                       -severity => 'WARNING',
878                       -msg => "=item type mismatch ('$first' vs. '$type')"});
879                }
880                else { # first item
881                    $list->type($type);
882                }
883            }
884            else {
885                $self->poderror({ -line => $line, -file => $file,
886                     -severity => 'WARNING',
887                     -msg => "No argument for =item" });
888		$arg = ' '; # empty
889                $self->{_list_item_contents} = 0;
890            }
891            # add this item
892            $list->item($arg);
893            # remember this node
894            $self->node($arg);
895        }
896        elsif($cmd eq 'back') {
897            # check if we have an open list
898            unless(@{$self->{_list_stack}}) {
899                $self->poderror({ -line => $line, -file => $file,
900                         -severity => 'ERROR',
901                         -msg => "=back without previous =over" });
902            }
903            else {
904                # check for spurious characters
905                $arg = $self->interpolate_and_check($paragraph, $line,$file);
906                if($arg && $arg =~ /\S/) {
907                    $self->poderror({ -line => $line, -file => $file,
908                         -severity => 'ERROR',
909                         -msg => "Spurious character(s) after =back" });
910                }
911                # close list
912                my $list = $self->_close_list($line,$file);
913                # check for empty lists
914                if(!$list->item() && $self->{-warnings}) {
915                    $self->poderror({ -line => $line, -file => $file,
916                         -severity => 'WARNING',
917                         -msg => "No items in =over (at line " .
918                         $list->start() . ") / =back list"}); #"
919                }
920            }
921        }
922        elsif($cmd =~ /^head(\d+)/) {
923            my $hnum = $1;
924            $self->{"_have_head_$hnum"}++; # count head types
925            if($hnum > 1 && !$self->{"_have_head_".($hnum -1)}) {
926              $self->poderror({ -line => $line, -file => $file,
927                   -severity => 'WARNING',
928                   -msg => "=head$hnum without preceding higher level"});
929            }
930            # check whether the previous =head section had some contents
931            if(defined $self->{_commands_in_head} &&
932              $self->{_commands_in_head} == 0 &&
933              defined $self->{_last_head} &&
934              $self->{_last_head} >= $hnum) {
935                $self->poderror({ -line => $line, -file => $file,
936                     -severity => 'WARNING',
937                     -msg => "empty section in previous paragraph"});
938            }
939            $self->{_commands_in_head} = -1;
940            $self->{_last_head} = $hnum;
941            # check if there is an open list
942            if(@{$self->{_list_stack}}) {
943                my $list;
944                while(($list = $self->_close_list($line,$file)) &&
945                  $list->indent() ne 'auto') {
946                    $self->poderror({ -line => $line, -file => $file,
947                         -severity => 'ERROR',
948                         -msg => "=over on line ". $list->start() .
949                         " without closing =back (at $cmd)" });
950                }
951            }
952            # remember this node
953            $arg = $self->interpolate_and_check($paragraph, $line,$file);
954            $arg =~ s/[\s\n]+$//s;
955            $self->node($arg);
956            unless(length($arg)) {
957                $self->poderror({ -line => $line, -file => $file,
958                     -severity => 'ERROR',
959                     -msg => "empty =$cmd"});
960            }
961            if($cmd eq 'head1') {
962                $self->{_current_head1} = $arg;
963            } else {
964                $self->{_current_head1} = '';
965            }
966        }
967        elsif($cmd eq 'begin') {
968            if($self->{_have_begin}) {
969                # already have a begin
970                $self->poderror({ -line => $line, -file => $file,
971                     -severity => 'ERROR',
972                     -msg => "Nested =begin's (first at line " .
973                     $self->{_have_begin} . ")"});
974            }
975            else {
976                # check for argument
977                $arg = $self->interpolate_and_check($paragraph, $line,$file);
978                unless($arg && $arg =~ /(\S+)/) {
979                    $self->poderror({ -line => $line, -file => $file,
980                         -severity => 'ERROR',
981                         -msg => "No argument for =begin"});
982                }
983                # remember the =begin
984                $self->{_have_begin} = "$line:$1";
985            }
986        }
987        elsif($cmd eq 'end') {
988            if($self->{_have_begin}) {
989                # close the existing =begin
990                $self->{_have_begin} = '';
991                # check for spurious characters
992                $arg = $self->interpolate_and_check($paragraph, $line,$file);
993                # the closing argument is optional
994                #if($arg && $arg =~ /\S/) {
995                #    $self->poderror({ -line => $line, -file => $file,
996                #         -severity => 'WARNING',
997                #         -msg => "Spurious character(s) after =end" });
998                #}
999            }
1000            else {
1001                # don't have a matching =begin
1002                $self->poderror({ -line => $line, -file => $file,
1003                     -severity => 'ERROR',
1004                     -msg => "=end without =begin" });
1005            }
1006        }
1007        elsif($cmd eq 'for') {
1008            unless($paragraph =~ /\s*(\S+)\s*/) {
1009                $self->poderror({ -line => $line, -file => $file,
1010                     -severity => 'ERROR',
1011                     -msg => "=for without formatter specification" });
1012            }
1013            $arg = ''; # do not expand paragraph below
1014        }
1015        elsif($cmd =~ /^(pod|cut)$/) {
1016            # check for argument
1017            $arg = $self->interpolate_and_check($paragraph, $line,$file);
1018            if($arg && $arg =~ /(\S+)/) {
1019                $self->poderror({ -line => $line, -file => $file,
1020                      -severity => 'ERROR',
1021                      -msg => "Spurious text after =$cmd"});
1022            }
1023        }
1024    $self->{_commands_in_head}++;
1025    ## Check the interior sequences in the command-text
1026    $self->interpolate_and_check($paragraph, $line,$file)
1027        unless(defined $arg);
1028    }
1029}
1030
1031sub _open_list
1032{
1033    my ($self,$indent,$line,$file) = @_;
1034    my $list = Pod::List->new(
1035           -indent => $indent,
1036           -start => $line,
1037           -file => $file);
1038    unshift(@{$self->{_list_stack}}, $list);
1039    undef $self->{_list_item_contents};
1040    $list;
1041}
1042
1043sub _close_list
1044{
1045    my ($self,$line,$file) = @_;
1046    my $list = shift(@{$self->{_list_stack}});
1047    if(defined $self->{_list_item_contents} &&
1048      $self->{_list_item_contents} == 0) {
1049        $self->poderror({ -line => $line, -file => $file,
1050            -severity => 'WARNING',
1051            -msg => "previous =item has no contents" });
1052    }
1053    undef $self->{_list_item_contents};
1054    $list;
1055}
1056
1057# process a block of some text
1058sub interpolate_and_check {
1059    my ($self, $paragraph, $line, $file) = @_;
1060    ## Check the interior sequences in the command-text
1061    # and return the text
1062    $self->_check_ptree(
1063        $self->parse_text($paragraph,$line), $line, $file, '');
1064}
1065
1066sub _check_ptree {
1067    my ($self,$ptree,$line,$file,$nestlist) = @_;
1068    local($_);
1069    my $text = '';
1070    # process each node in the parse tree
1071    foreach(@$ptree) {
1072        # regular text chunk
1073        unless(ref) {
1074            # count the unescaped angle brackets
1075            # complain only when warning level is greater than 1
1076            if($self->{-warnings} && $self->{-warnings}>1) {
1077              my $count;
1078              if($count = tr/<>/<>/) {
1079                $self->poderror({ -line => $line, -file => $file,
1080                     -severity => 'WARNING',
1081                     -msg => "$count unescaped <> in paragraph" });
1082                }
1083            }
1084            $text .= $_;
1085            next;
1086        }
1087        # have an interior sequence
1088        my $cmd = $_->cmd_name();
1089        my $contents = $_->parse_tree();
1090        ($file,$line) = $_->file_line();
1091        # check for valid tag
1092        if (! $VALID_SEQUENCES{$cmd}) {
1093            $self->poderror({ -line => $line, -file => $file,
1094                 -severity => 'ERROR',
1095                 -msg => qq(Unknown interior-sequence '$cmd')});
1096            # expand it anyway
1097            $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1098            next;
1099        }
1100        if($nestlist =~ /$cmd/) {
1101            $self->poderror({ -line => $line, -file => $file,
1102                 -severity => 'ERROR',
1103                 -msg => "nested commands $cmd<...$cmd<...>...>"});
1104            # _TODO_ should we add the contents anyway?
1105            # expand it anyway, see below
1106        }
1107        if($cmd eq 'E') {
1108            # preserve entities
1109            if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) {
1110                $self->poderror({ -line => $line, -file => $file,
1111                    -severity => 'ERROR',
1112                    -msg => "garbled entity " . $_->raw_text()});
1113                next;
1114            }
1115            my $ent = $$contents[0];
1116            my $val;
1117            if($ent =~ /^0x[0-9a-f]+$/i) {
1118                # hexadec entity
1119                $val = hex($ent);
1120            }
1121            elsif($ent =~ /^0\d+$/) {
1122                # octal
1123                $val = oct($ent);
1124            }
1125            elsif($ent =~ /^\d+$/) {
1126                # numeric entity
1127                $val = $ent;
1128            }
1129            if(defined $val) {
1130                if($val>0 && $val<256) {
1131                    $text .= chr($val);
1132                }
1133                else {
1134                    $self->poderror({ -line => $line, -file => $file,
1135                        -severity => 'ERROR',
1136                        -msg => "Entity number out of range " . $_->raw_text()});
1137                }
1138            }
1139            elsif($ENTITIES{$ent}) {
1140                # known ISO entity
1141                $text .= $ENTITIES{$ent};
1142            }
1143            else {
1144                $self->poderror({ -line => $line, -file => $file,
1145                    -severity => 'WARNING',
1146                    -msg => "Unknown entity " . $_->raw_text()});
1147                $text .= "E<$ent>";
1148            }
1149        }
1150        elsif($cmd eq 'L') {
1151            # try to parse the hyperlink
1152            my $link = Pod::Hyperlink->new($contents->raw_text());
1153            unless(defined $link) {
1154                $self->poderror({ -line => $line, -file => $file,
1155                    -severity => 'ERROR',
1156                    -msg => "malformed link " . $_->raw_text() ." : $@"});
1157                next;
1158            }
1159            $link->line($line); # remember line
1160            if($self->{-warnings}) {
1161                foreach my $w ($link->warning()) {
1162                    $self->poderror({ -line => $line, -file => $file,
1163                        -severity => 'WARNING',
1164                        -msg => $w });
1165                }
1166            }
1167            # check the link text
1168            $text .= $self->_check_ptree($self->parse_text($link->text(),
1169                $line), $line, $file, "$nestlist$cmd");
1170            # remember link
1171            $self->hyperlink([$line,$link]);
1172        }
1173        elsif($cmd =~ /[BCFIS]/) {
1174            # add the guts
1175            $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1176        }
1177        elsif($cmd eq 'Z') {
1178            if(length($contents->raw_text())) {
1179                $self->poderror({ -line => $line, -file => $file,
1180                    -severity => 'ERROR',
1181                    -msg => "Nonempty Z<>"});
1182            }
1183        }
1184        elsif($cmd eq 'X') {
1185            my $idx = $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1186            if($idx =~ /^\s*$/s) {
1187                $self->poderror({ -line => $line, -file => $file,
1188                    -severity => 'ERROR',
1189                    -msg => "Empty X<>"});
1190            }
1191            else {
1192                # remember this node
1193                $self->idx($idx);
1194            }
1195        }
1196        else {
1197            # not reached
1198            die "internal error";
1199        }
1200    }
1201    $text;
1202}
1203
1204# process a block of verbatim text
1205sub verbatim {
1206    ## Nothing particular to check
1207    my ($self, $paragraph, $line_num, $pod_para) = @_;
1208
1209    $self->_preproc_par($paragraph);
1210
1211    if($self->{_current_head1} eq 'NAME') {
1212        my ($file, $line) = $pod_para->file_line;
1213        $self->poderror({ -line => $line, -file => $file,
1214            -severity => 'WARNING',
1215            -msg => 'Verbatim paragraph in NAME section' });
1216    }
1217}
1218
1219# process a block of regular text
1220sub textblock {
1221    my ($self, $paragraph, $line_num, $pod_para) = @_;
1222    my ($file, $line) = $pod_para->file_line;
1223
1224    $self->_preproc_par($paragraph);
1225
1226    # skip this paragraph if in a =begin block
1227    unless($self->{_have_begin}) {
1228        my $block = $self->interpolate_and_check($paragraph, $line,$file);
1229        if($self->{_current_head1} eq 'NAME') {
1230            if($block =~ /^\s*(\S+?)\s*[,-]/) {
1231                # this is the canonical name
1232                $self->{-name} = $1 unless(defined $self->{-name});
1233            }
1234        }
1235    }
1236}
1237
1238sub _preproc_par
1239{
1240    my $self = shift;
1241    $_[0] =~ s/[\s\n]+$//;
1242    if($_[0]) {
1243        $self->{_commands_in_head}++;
1244        $self->{_list_item_contents}++ if(defined $self->{_list_item_contents});
1245        if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) {
1246            $self->{_list_stack}->[0]->{_has_par} = 1;
1247        }
1248    }
1249}
1250
12511;
1252
1253__END__
1254
1255=head1 AUTHOR
1256
1257Please report bugs using L<http://rt.cpan.org>.
1258
1259Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version),
1260Marek Rouchal E<lt>marekr@cpan.orgE<gt>
1261
1262Based on code for B<Pod::Text::pod2text()> written by
1263Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
1264
1265=cut
1266
1267