1############################################################################# 2# Pod/Checker.pm -- check pod documents for syntax errors 3# 4# Copyright (C) 1994-2000 by Bradford Appleton. All rights reserved. 5# This file is part of "PodParser". PodParser is free software; 6# you can redistribute it and/or modify it under the same terms 7# as Perl itself. 8############################################################################# 9 10package Pod::Checker; 11 12use vars qw($VERSION); 13$VERSION = 1.41; ## Current version of this package 14require 5.005; ## requires this Perl version or later 15 16use Pod::ParseUtils; ## for hyperlinks and lists 17 18=head1 NAME 19 20Pod::Checker, podchecker() - check pod documents for syntax errors 21 22=head1 SYNOPSIS 23 24 use Pod::Checker; 25 26 $syntax_okay = podchecker($filepath, $outputpath, %options); 27 28 my $checker = new Pod::Checker %options; 29 $checker->parse_from_file($filepath, \*STDERR); 30 31=head1 OPTIONS/ARGUMENTS 32 33C<$filepath> is the input POD to read and C<$outputpath> is 34where to write POD syntax error messages. Either argument may be a scalar 35indicating a file-path, or else a reference to an open filehandle. 36If unspecified, the input-file it defaults to C<\*STDIN>, and 37the output-file defaults to C<\*STDERR>. 38 39=head2 podchecker() 40 41This function can take a hash of options: 42 43=over 4 44 45=item B<-warnings> =E<gt> I<val> 46 47Turn warnings on/off. I<val> is usually 1 for on, but higher values 48trigger additional warnings. See L<"Warnings">. 49 50=back 51 52=head1 DESCRIPTION 53 54B<podchecker> will perform syntax checking of Perl5 POD format documentation. 55 56Curious/ambitious users are welcome to propose additional features they wish 57to see in B<Pod::Checker> and B<podchecker> and verify that the checks are 58consistent with L<perlpod>. 59 60The following checks are currently preformed: 61 62=over 4 63 64=item * 65 66Unknown '=xxxx' commands, unknown 'XE<lt>...E<gt>' interior-sequences, 67and unterminated interior sequences. 68 69=item * 70 71Check for proper balancing of C<=begin> and C<=end>. The contents of such 72a block are generally ignored, i.e. no syntax checks are performed. 73 74=item * 75 76Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>. 77 78=item * 79 80Check for same nested interior-sequences (e.g. 81C<LE<lt>...LE<lt>...E<gt>...E<gt>>). 82 83=item * 84 85Check for malformed or nonexisting entities C<EE<lt>...E<gt>>. 86 87=item * 88 89Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod> 90for details. 91 92=item * 93 94Check for unresolved document-internal links. This check may also reveal 95misspelled links that seem to be internal links but should be links 96to something else. 97 98=back 99 100=head1 DIAGNOSTICS 101 102=head2 Errors 103 104=over 4 105 106=item * empty =headn 107 108A heading (C<=head1> or C<=head2>) without any text? That ain't no 109heading! 110 111=item * =over on line I<N> without closing =back 112 113The C<=over> command does not have a corresponding C<=back> before the 114next heading (C<=head1> or C<=head2>) or the end of the file. 115 116=item * =item without previous =over 117 118=item * =back without previous =over 119 120An C<=item> or C<=back> command has been found outside a 121C<=over>/C<=back> block. 122 123=item * No argument for =begin 124 125A C<=begin> command was found that is not followed by the formatter 126specification. 127 128=item * =end without =begin 129 130A standalone C<=end> command was found. 131 132=item * Nested =begin's 133 134There were at least two consecutive C<=begin> commands without 135the corresponding C<=end>. Only one C<=begin> may be active at 136a time. 137 138=item * =for without formatter specification 139 140There is no specification of the formatter after the C<=for> command. 141 142=item * unresolved internal link I<NAME> 143 144The given link to I<NAME> does not have a matching node in the current 145POD. This also happend when a single word node name is not enclosed in 146C<"">. 147 148=item * Unknown command "I<CMD>" 149 150An invalid POD command has been found. Valid are C<=head1>, C<=head2>, 151C<=head3>, C<=head4>, C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>, 152C<=for>, C<=pod>, C<=cut> 153 154=item * Unknown interior-sequence "I<SEQ>" 155 156An invalid markup command has been encountered. Valid are: 157C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>, 158C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>, 159C<ZE<lt>E<gt>> 160 161=item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt> 162 163Two nested identical markup commands have been found. Generally this 164does not make sense. 165 166=item * garbled entity I<STRING> 167 168The I<STRING> found cannot be interpreted as a character entity. 169 170=item * Entity number out of range 171 172An entity specified by number (dec, hex, oct) is out of range (1-255). 173 174=item * malformed link LE<lt>E<gt> 175 176The link found cannot be parsed because it does not conform to the 177syntax described in L<perlpod>. 178 179=item * nonempty ZE<lt>E<gt> 180 181The C<ZE<lt>E<gt>> sequence is supposed to be empty. 182 183=item * empty XE<lt>E<gt> 184 185The index entry specified contains nothing but whitespace. 186 187=item * Spurious text after =pod / =cut 188 189The commands C<=pod> and C<=cut> do not take any arguments. 190 191=item * Spurious character(s) after =back 192 193The C<=back> command does not take any arguments. 194 195=back 196 197=head2 Warnings 198 199These may not necessarily cause trouble, but indicate mediocre style. 200 201=over 4 202 203=item * multiple occurrence of link target I<name> 204 205The POD file has some C<=item> and/or C<=head> commands that have 206the same text. Potential hyperlinks to such a text cannot be unique then. 207 208=item * line containing nothing but whitespace in paragraph 209 210There is some whitespace on a seemingly empty line. POD is very sensitive 211to such things, so this is flagged. B<vi> users switch on the B<list> 212option to avoid this problem. 213 214=begin _disabled_ 215 216=item * file does not start with =head 217 218The file starts with a different POD directive than head. 219This is most probably something you do not want. 220 221=end _disabled_ 222 223=item * previous =item has no contents 224 225There is a list C<=item> right above the flagged line that has no 226text contents. You probably want to delete empty items. 227 228=item * preceding non-item paragraph(s) 229 230A list introduced by C<=over> starts with a text or verbatim paragraph, 231but continues with C<=item>s. Move the non-item paragraph out of the 232C<=over>/C<=back> block. 233 234=item * =item type mismatch (I<one> vs. I<two>) 235 236A list started with e.g. a bulletted C<=item> and continued with a 237numbered one. This is obviously inconsistent. For most translators the 238type of the I<first> C<=item> determines the type of the list. 239 240=item * I<N> unescaped C<E<lt>E<gt>> in paragraph 241 242Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>> 243can potentially cause errors as they could be misinterpreted as 244markup commands. This is only printed when the -warnings level is 245greater than 1. 246 247=item * Unknown entity 248 249A character entity was found that does not belong to the standard 250ISO set or the POD specials C<verbar> and C<sol>. 251 252=item * No items in =over 253 254The list opened with C<=over> does not contain any items. 255 256=item * No argument for =item 257 258C<=item> without any parameters is deprecated. It should either be followed 259by C<*> to indicate an unordered list, by a number (optionally followed 260by a dot) to indicate an ordered (numbered) list or simple text for a 261definition list. 262 263=item * empty section in previous paragraph 264 265The previous section (introduced by a C<=head> command) does not contain 266any text. This usually indicates that something is missing. Note: A 267C<=head1> followed immediately by C<=head2> does not trigger this warning. 268 269=item * Verbatim paragraph in NAME section 270 271The NAME section (C<=head1 NAME>) should consist of a single paragraph 272with the script/module name, followed by a dash `-' and a very short 273description of what the thing is good for. 274 275=item * =headI<n> without preceding higher level 276 277For example if there is a C<=head2> in the POD file prior to a 278C<=head1>. 279 280=back 281 282=head2 Hyperlinks 283 284There are some warnings wrt. malformed hyperlinks. 285 286=over 4 287 288=item * ignoring leading/trailing whitespace in link 289 290There is whitespace at the beginning or the end of the contents of 291LE<lt>...E<gt>. 292 293=item * (section) in '$page' deprecated 294 295There is a section detected in the page name of LE<lt>...E<gt>, e.g. 296C<LE<lt>passwd(2)E<gt>>. POD hyperlinks may point to POD documents only. 297Please write C<CE<lt>passwd(2)E<gt>> instead. Some formatters are able 298to expand this to appropriate code. For links to (builtin) functions, 299please say C<LE<lt>perlfunc/mkdirE<gt>>, without (). 300 301=item * alternative text/node '%s' contains non-escaped | or / 302 303The characters C<|> and C</> are special in the LE<lt>...E<gt> context. 304Although the hyperlink parser does its best to determine which "/" is 305text and which is a delimiter in case of doubt, one ought to escape 306these literal characters like this: 307 308 / E<sol> 309 | E<verbar> 310 311=back 312 313=head1 RETURN VALUE 314 315B<podchecker> returns the number of POD syntax errors found or -1 if 316there were no POD commands at all found in the file. 317 318=head1 EXAMPLES 319 320See L</SYNOPSIS> 321 322=head1 INTERFACE 323 324While checking, this module collects document properties, e.g. the nodes 325for hyperlinks (C<=headX>, C<=item>) and index entries (C<XE<lt>E<gt>>). 326POD translators can use this feature to syntax-check and get the nodes in 327a first pass before actually starting to convert. This is expensive in terms 328of execution time, but allows for very robust conversions. 329 330Since PodParser-1.24 the B<Pod::Checker> module uses only the B<poderror> 331method to print errors and warnings. The summary output (e.g. 332"Pod syntax OK") has been dropped from the module and has been included in 333B<podchecker> (the script). This allows users of B<Pod::Checker> to 334control completely the output behaviour. Users of B<podchecker> (the script) 335get the well-known behaviour. 336 337=cut 338 339############################################################################# 340 341use strict; 342#use diagnostics; 343use Carp; 344use Exporter; 345use Pod::Parser; 346 347use vars qw(@ISA @EXPORT); 348@ISA = qw(Pod::Parser); 349@EXPORT = qw(&podchecker); 350 351use vars qw(%VALID_COMMANDS %VALID_SEQUENCES); 352 353my %VALID_COMMANDS = ( 354 'pod' => 1, 355 'cut' => 1, 356 'head1' => 1, 357 'head2' => 1, 358 'head3' => 1, 359 'head4' => 1, 360 'over' => 1, 361 'back' => 1, 362 'item' => 1, 363 'for' => 1, 364 'begin' => 1, 365 'end' => 1, 366); 367 368my %VALID_SEQUENCES = ( 369 'I' => 1, 370 'B' => 1, 371 'S' => 1, 372 'C' => 1, 373 'L' => 1, 374 'F' => 1, 375 'X' => 1, 376 'Z' => 1, 377 'E' => 1, 378); 379 380# stolen from HTML::Entities 381my %ENTITIES = ( 382 # Some normal chars that have special meaning in SGML context 383 amp => '&', # ampersand 384'gt' => '>', # greater than 385'lt' => '<', # less than 386 quot => '"', # double quote 387 388 # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML 389 AElig => '�', # capital AE diphthong (ligature) 390 Aacute => '�', # capital A, acute accent 391 Acirc => '�', # capital A, circumflex accent 392 Agrave => '�', # capital A, grave accent 393 Aring => '�', # capital A, ring 394 Atilde => '�', # capital A, tilde 395 Auml => '�', # capital A, dieresis or umlaut mark 396 Ccedil => '�', # capital C, cedilla 397 ETH => '�', # capital Eth, Icelandic 398 Eacute => '�', # capital E, acute accent 399 Ecirc => '�', # capital E, circumflex accent 400 Egrave => '�', # capital E, grave accent 401 Euml => '�', # capital E, dieresis or umlaut mark 402 Iacute => '�', # capital I, acute accent 403 Icirc => '�', # capital I, circumflex accent 404 Igrave => '�', # capital I, grave accent 405 Iuml => '�', # capital I, dieresis or umlaut mark 406 Ntilde => '�', # capital N, tilde 407 Oacute => '�', # capital O, acute accent 408 Ocirc => '�', # capital O, circumflex accent 409 Ograve => '�', # capital O, grave accent 410 Oslash => '�', # capital O, slash 411 Otilde => '�', # capital O, tilde 412 Ouml => '�', # capital O, dieresis or umlaut mark 413 THORN => '�', # capital THORN, Icelandic 414 Uacute => '�', # capital U, acute accent 415 Ucirc => '�', # capital U, circumflex accent 416 Ugrave => '�', # capital U, grave accent 417 Uuml => '�', # capital U, dieresis or umlaut mark 418 Yacute => '�', # capital Y, acute accent 419 aacute => '�', # small a, acute accent 420 acirc => '�', # small a, circumflex accent 421 aelig => '�', # small ae diphthong (ligature) 422 agrave => '�', # small a, grave accent 423 aring => '�', # small a, ring 424 atilde => '�', # small a, tilde 425 auml => '�', # small a, dieresis or umlaut mark 426 ccedil => '�', # small c, cedilla 427 eacute => '�', # small e, acute accent 428 ecirc => '�', # small e, circumflex accent 429 egrave => '�', # small e, grave accent 430 eth => '�', # small eth, Icelandic 431 euml => '�', # small e, dieresis or umlaut mark 432 iacute => '�', # small i, acute accent 433 icirc => '�', # small i, circumflex accent 434 igrave => '�', # small i, grave accent 435 iuml => '�', # small i, dieresis or umlaut mark 436 ntilde => '�', # small n, tilde 437 oacute => '�', # small o, acute accent 438 ocirc => '�', # small o, circumflex accent 439 ograve => '�', # small o, grave accent 440 oslash => '�', # small o, slash 441 otilde => '�', # small o, tilde 442 ouml => '�', # small o, dieresis or umlaut mark 443 szlig => '�', # small sharp s, German (sz ligature) 444 thorn => '�', # small thorn, Icelandic 445 uacute => '�', # small u, acute accent 446 ucirc => '�', # small u, circumflex accent 447 ugrave => '�', # small u, grave accent 448 uuml => '�', # small u, dieresis or umlaut mark 449 yacute => '�', # small y, acute accent 450 yuml => '�', # small y, dieresis or umlaut mark 451 452 # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96) 453 copy => '�', # copyright sign 454 reg => '�', # registered sign 455 nbsp => "\240", # non breaking space 456 457 # Additional ISO-8859/1 entities listed in rfc1866 (section 14) 458 iexcl => '�', 459 cent => '�', 460 pound => '�', 461 curren => '�', 462 yen => '�', 463 brvbar => '�', 464 sect => '�', 465 uml => '�', 466 ordf => '�', 467 laquo => '�', 468'not' => '�', # not is a keyword in perl 469 shy => '�', 470 macr => '�', 471 deg => '�', 472 plusmn => '�', 473 sup1 => '�', 474 sup2 => '�', 475 sup3 => '�', 476 acute => '�', 477 micro => '�', 478 para => '�', 479 middot => '�', 480 cedil => '�', 481 ordm => '�', 482 raquo => '�', 483 frac14 => '�', 484 frac12 => '�', 485 frac34 => '�', 486 iquest => '�', 487'times' => '�', # times is a keyword in perl 488 divide => '�', 489 490# some POD special entities 491 verbar => '|', 492 sol => '/' 493); 494 495##--------------------------------------------------------------------------- 496 497##--------------------------------- 498## Function definitions begin here 499##--------------------------------- 500 501sub podchecker( $ ; $ % ) { 502 my ($infile, $outfile, %options) = @_; 503 local $_; 504 505 ## Set defaults 506 $infile ||= \*STDIN; 507 $outfile ||= \*STDERR; 508 509 ## Now create a pod checker 510 my $checker = new Pod::Checker(%options); 511 512 ## Now check the pod document for errors 513 $checker->parse_from_file($infile, $outfile); 514 515 ## Return the number of errors found 516 return $checker->num_errors(); 517} 518 519##--------------------------------------------------------------------------- 520 521##------------------------------- 522## Method definitions begin here 523##------------------------------- 524 525################################## 526 527=over 4 528 529=item C<Pod::Checker-E<gt>new( %options )> 530 531Return a reference to a new Pod::Checker object that inherits from 532Pod::Parser and is used for calling the required methods later. The 533following options are recognized: 534 535C<-warnings =E<gt> num> 536 Print warnings if C<num> is true. The higher the value of C<num>, 537the more warnings are printed. Currently there are only levels 1 and 2. 538 539C<-quiet =E<gt> num> 540 If C<num> is true, do not print any errors/warnings. This is useful 541when Pod::Checker is used to munge POD code into plain text from within 542POD formatters. 543 544=cut 545 546## sub new { 547## my $this = shift; 548## my $class = ref($this) || $this; 549## my %params = @_; 550## my $self = {%params}; 551## bless $self, $class; 552## $self->initialize(); 553## return $self; 554## } 555 556sub initialize { 557 my $self = shift; 558 ## Initialize number of errors, and setup an error function to 559 ## increment this number and then print to the designated output. 560 $self->{_NUM_ERRORS} = 0; 561 $self->{_NUM_WARNINGS} = 0; 562 $self->{-quiet} ||= 0; 563 # set the error handling subroutine 564 $self->errorsub($self->{-quiet} ? sub { 1; } : 'poderror'); 565 $self->{_commands} = 0; # total number of POD commands encountered 566 $self->{_list_stack} = []; # stack for nested lists 567 $self->{_have_begin} = ''; # stores =begin 568 $self->{_links} = []; # stack for internal hyperlinks 569 $self->{_nodes} = []; # stack for =head/=item nodes 570 $self->{_index} = []; # text in X<> 571 # print warnings? 572 $self->{-warnings} = 1 unless(defined $self->{-warnings}); 573 $self->{_current_head1} = ''; # the current =head1 block 574 $self->parseopts(-process_cut_cmd => 1, -warnings => $self->{-warnings}); 575} 576 577################################## 578 579=item C<$checker-E<gt>poderror( @args )> 580 581=item C<$checker-E<gt>poderror( {%opts}, @args )> 582 583Internal method for printing errors and warnings. If no options are 584given, simply prints "@_". The following options are recognized and used 585to form the output: 586 587 -msg 588 589A message to print prior to C<@args>. 590 591 -line 592 593The line number the error occurred in. 594 595 -file 596 597The file (name) the error occurred in. 598 599 -severity 600 601The error level, should be 'WARNING' or 'ERROR'. 602 603=cut 604 605# Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args ) 606sub poderror { 607 my $self = shift; 608 my %opts = (ref $_[0]) ? %{shift()} : (); 609 610 ## Retrieve options 611 chomp( my $msg = ($opts{-msg} || "")."@_" ); 612 my $line = (exists $opts{-line}) ? " at line $opts{-line}" : ""; 613 my $file = (exists $opts{-file}) ? " in file $opts{-file}" : ""; 614 unless (exists $opts{-severity}) { 615 ## See if can find severity in message prefix 616 $opts{-severity} = $1 if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// ); 617 } 618 my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : ""; 619 620 ## Increment error count and print message " 621 ++($self->{_NUM_ERRORS}) 622 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR')); 623 ++($self->{_NUM_WARNINGS}) 624 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'WARNING')); 625 my $out_fh = $self->output_handle() || \*STDERR; 626 print $out_fh ($severity, $msg, $line, $file, "\n") 627 if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING'); 628} 629 630################################## 631 632=item C<$checker-E<gt>num_errors()> 633 634Set (if argument specified) and retrieve the number of errors found. 635 636=cut 637 638sub num_errors { 639 return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS}; 640} 641 642################################## 643 644=item C<$checker-E<gt>num_warnings()> 645 646Set (if argument specified) and retrieve the number of warnings found. 647 648=cut 649 650sub num_warnings { 651 return (@_ > 1) ? ($_[0]->{_NUM_WARNINGS} = $_[1]) : $_[0]->{_NUM_WARNINGS}; 652} 653 654################################## 655 656=item C<$checker-E<gt>name()> 657 658Set (if argument specified) and retrieve the canonical name of POD as 659found in the C<=head1 NAME> section. 660 661=cut 662 663sub name { 664 return (@_ > 1 && $_[1]) ? 665 ($_[0]->{-name} = $_[1]) : $_[0]->{-name}; 666} 667 668################################## 669 670=item C<$checker-E<gt>node()> 671 672Add (if argument specified) and retrieve the nodes (as defined by C<=headX> 673and C<=item>) of the current POD. The nodes are returned in the order of 674their occurrence. They consist of plain text, each piece of whitespace is 675collapsed to a single blank. 676 677=cut 678 679sub node { 680 my ($self,$text) = @_; 681 if(defined $text) { 682 $text =~ s/\s+$//s; # strip trailing whitespace 683 $text =~ s/\s+/ /gs; # collapse whitespace 684 # add node, order important! 685 push(@{$self->{_nodes}}, $text); 686 # keep also a uniqueness counter 687 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s); 688 return $text; 689 } 690 @{$self->{_nodes}}; 691} 692 693################################## 694 695=item C<$checker-E<gt>idx()> 696 697Add (if argument specified) and retrieve the index entries (as defined by 698C<XE<lt>E<gt>>) of the current POD. They consist of plain text, each piece 699of whitespace is collapsed to a single blank. 700 701=cut 702 703# set/return index entries of current POD 704sub idx { 705 my ($self,$text) = @_; 706 if(defined $text) { 707 $text =~ s/\s+$//s; # strip trailing whitespace 708 $text =~ s/\s+/ /gs; # collapse whitespace 709 # add node, order important! 710 push(@{$self->{_index}}, $text); 711 # keep also a uniqueness counter 712 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s); 713 return $text; 714 } 715 @{$self->{_index}}; 716} 717 718################################## 719 720=item C<$checker-E<gt>hyperlink()> 721 722Add (if argument specified) and retrieve the hyperlinks (as defined by 723C<LE<lt>E<gt>>) of the current POD. They consist of a 2-item array: line 724number and C<Pod::Hyperlink> object. 725 726=back 727 728=cut 729 730# set/return hyperlinks of the current POD 731sub hyperlink { 732 my $self = shift; 733 if($_[0]) { 734 push(@{$self->{_links}}, $_[0]); 735 return $_[0]; 736 } 737 @{$self->{_links}}; 738} 739 740## overrides for Pod::Parser 741 742sub end_pod { 743 ## Do some final checks and 744 ## print the number of errors found 745 my $self = shift; 746 my $infile = $self->input_file(); 747 my $out_fh = $self->output_handle(); 748 749 if(@{$self->{_list_stack}}) { 750 my $list; 751 while(($list = $self->_close_list('EOF',$infile)) && 752 $list->indent() ne 'auto') { 753 $self->poderror({ -line => 'EOF', -file => $infile, 754 -severity => 'ERROR', -msg => "=over on line " . 755 $list->start() . " without closing =back" }); #" 756 } 757 } 758 759 # check validity of document internal hyperlinks 760 # first build the node names from the paragraph text 761 my %nodes; 762 foreach($self->node()) { 763 $nodes{$_} = 1; 764 if(/^(\S+)\s+\S/) { 765 # we have more than one word. Use the first as a node, too. 766 # This is used heavily in perlfunc.pod 767 $nodes{$1} ||= 2; # derived node 768 } 769 } 770 foreach($self->idx()) { 771 $nodes{$_} = 3; # index node 772 } 773 foreach($self->hyperlink()) { 774 my ($line,$link) = @$_; 775 # _TODO_ what if there is a link to the page itself by the name, 776 # e.g. in Tk::Pod : L<Tk::Pod/"DESCRIPTION"> 777 if($link->node() && !$link->page() && $link->type() ne 'hyperlink') { 778 my $node = $self->_check_ptree($self->parse_text($link->node(), 779 $line), $line, $infile, 'L'); 780 if($node && !$nodes{$node}) { 781 $self->poderror({ -line => $line || '', -file => $infile, 782 -severity => 'ERROR', 783 -msg => "unresolved internal link '$node'"}); 784 } 785 } 786 } 787 788 # check the internal nodes for uniqueness. This pertains to 789 # =headX, =item and X<...> 790 foreach(grep($self->{_unique_nodes}->{$_} > 1, 791 keys %{$self->{_unique_nodes}})) { 792 $self->poderror({ -line => '-', -file => $infile, 793 -severity => 'WARNING', 794 -msg => "multiple occurrence of link target '$_'"}); 795 } 796 797 # no POD found here 798 $self->num_errors(-1) if($self->{_commands} == 0); 799} 800 801# check a POD command directive 802sub command { 803 my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_; 804 my ($file, $line) = $pod_para->file_line; 805 ## Check the command syntax 806 my $arg; # this will hold the command argument 807 if (! $VALID_COMMANDS{$cmd}) { 808 $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR', 809 -msg => "Unknown command '$cmd'" }); 810 } 811 else { # found a valid command 812 $self->{_commands}++; # delete this line if below is enabled again 813 814 ##### following check disabled due to strong request 815 #if(!$self->{_commands}++ && $cmd !~ /^head/) { 816 # $self->poderror({ -line => $line, -file => $file, 817 # -severity => 'WARNING', 818 # -msg => "file does not start with =head" }); 819 #} 820 821 # check syntax of particular command 822 if($cmd eq 'over') { 823 # check for argument 824 $arg = $self->interpolate_and_check($paragraph, $line,$file); 825 my $indent = 4; # default 826 if($arg && $arg =~ /^\s*(\d+)\s*$/) { 827 $indent = $1; 828 } 829 # start a new list 830 $self->_open_list($indent,$line,$file); 831 } 832 elsif($cmd eq 'item') { 833 # are we in a list? 834 unless(@{$self->{_list_stack}}) { 835 $self->poderror({ -line => $line, -file => $file, 836 -severity => 'ERROR', 837 -msg => "=item without previous =over" }); 838 # auto-open in case we encounter many more 839 $self->_open_list('auto',$line,$file); 840 } 841 my $list = $self->{_list_stack}->[0]; 842 # check whether the previous item had some contents 843 if(defined $self->{_list_item_contents} && 844 $self->{_list_item_contents} == 0) { 845 $self->poderror({ -line => $line, -file => $file, 846 -severity => 'WARNING', 847 -msg => "previous =item has no contents" }); 848 } 849 if($list->{_has_par}) { 850 $self->poderror({ -line => $line, -file => $file, 851 -severity => 'WARNING', 852 -msg => "preceding non-item paragraph(s)" }); 853 delete $list->{_has_par}; 854 } 855 # check for argument 856 $arg = $self->interpolate_and_check($paragraph, $line, $file); 857 if($arg && $arg =~ /(\S+)/) { 858 $arg =~ s/[\s\n]+$//; 859 my $type; 860 if($arg =~ /^[*]\s*(\S*.*)/) { 861 $type = 'bullet'; 862 $self->{_list_item_contents} = $1 ? 1 : 0; 863 $arg = $1; 864 } 865 elsif($arg =~ /^\d+\.?\s*(\S*)/) { 866 $type = 'number'; 867 $self->{_list_item_contents} = $1 ? 1 : 0; 868 $arg = $1; 869 } 870 else { 871 $type = 'definition'; 872 $self->{_list_item_contents} = 1; 873 } 874 my $first = $list->type(); 875 if($first && $first ne $type) { 876 $self->poderror({ -line => $line, -file => $file, 877 -severity => 'WARNING', 878 -msg => "=item type mismatch ('$first' vs. '$type')"}); 879 } 880 else { # first item 881 $list->type($type); 882 } 883 } 884 else { 885 $self->poderror({ -line => $line, -file => $file, 886 -severity => 'WARNING', 887 -msg => "No argument for =item" }); 888 $arg = ' '; # empty 889 $self->{_list_item_contents} = 0; 890 } 891 # add this item 892 $list->item($arg); 893 # remember this node 894 $self->node($arg); 895 } 896 elsif($cmd eq 'back') { 897 # check if we have an open list 898 unless(@{$self->{_list_stack}}) { 899 $self->poderror({ -line => $line, -file => $file, 900 -severity => 'ERROR', 901 -msg => "=back without previous =over" }); 902 } 903 else { 904 # check for spurious characters 905 $arg = $self->interpolate_and_check($paragraph, $line,$file); 906 if($arg && $arg =~ /\S/) { 907 $self->poderror({ -line => $line, -file => $file, 908 -severity => 'ERROR', 909 -msg => "Spurious character(s) after =back" }); 910 } 911 # close list 912 my $list = $self->_close_list($line,$file); 913 # check for empty lists 914 if(!$list->item() && $self->{-warnings}) { 915 $self->poderror({ -line => $line, -file => $file, 916 -severity => 'WARNING', 917 -msg => "No items in =over (at line " . 918 $list->start() . ") / =back list"}); #" 919 } 920 } 921 } 922 elsif($cmd =~ /^head(\d+)/) { 923 my $hnum = $1; 924 $self->{"_have_head_$hnum"}++; # count head types 925 if($hnum > 1 && !$self->{"_have_head_".($hnum -1)}) { 926 $self->poderror({ -line => $line, -file => $file, 927 -severity => 'WARNING', 928 -msg => "=head$hnum without preceding higher level"}); 929 } 930 # check whether the previous =head section had some contents 931 if(defined $self->{_commands_in_head} && 932 $self->{_commands_in_head} == 0 && 933 defined $self->{_last_head} && 934 $self->{_last_head} >= $hnum) { 935 $self->poderror({ -line => $line, -file => $file, 936 -severity => 'WARNING', 937 -msg => "empty section in previous paragraph"}); 938 } 939 $self->{_commands_in_head} = -1; 940 $self->{_last_head} = $hnum; 941 # check if there is an open list 942 if(@{$self->{_list_stack}}) { 943 my $list; 944 while(($list = $self->_close_list($line,$file)) && 945 $list->indent() ne 'auto') { 946 $self->poderror({ -line => $line, -file => $file, 947 -severity => 'ERROR', 948 -msg => "=over on line ". $list->start() . 949 " without closing =back (at $cmd)" }); 950 } 951 } 952 # remember this node 953 $arg = $self->interpolate_and_check($paragraph, $line,$file); 954 $arg =~ s/[\s\n]+$//s; 955 $self->node($arg); 956 unless(length($arg)) { 957 $self->poderror({ -line => $line, -file => $file, 958 -severity => 'ERROR', 959 -msg => "empty =$cmd"}); 960 } 961 if($cmd eq 'head1') { 962 $self->{_current_head1} = $arg; 963 } else { 964 $self->{_current_head1} = ''; 965 } 966 } 967 elsif($cmd eq 'begin') { 968 if($self->{_have_begin}) { 969 # already have a begin 970 $self->poderror({ -line => $line, -file => $file, 971 -severity => 'ERROR', 972 -msg => "Nested =begin's (first at line " . 973 $self->{_have_begin} . ")"}); 974 } 975 else { 976 # check for argument 977 $arg = $self->interpolate_and_check($paragraph, $line,$file); 978 unless($arg && $arg =~ /(\S+)/) { 979 $self->poderror({ -line => $line, -file => $file, 980 -severity => 'ERROR', 981 -msg => "No argument for =begin"}); 982 } 983 # remember the =begin 984 $self->{_have_begin} = "$line:$1"; 985 } 986 } 987 elsif($cmd eq 'end') { 988 if($self->{_have_begin}) { 989 # close the existing =begin 990 $self->{_have_begin} = ''; 991 # check for spurious characters 992 $arg = $self->interpolate_and_check($paragraph, $line,$file); 993 # the closing argument is optional 994 #if($arg && $arg =~ /\S/) { 995 # $self->poderror({ -line => $line, -file => $file, 996 # -severity => 'WARNING', 997 # -msg => "Spurious character(s) after =end" }); 998 #} 999 } 1000 else { 1001 # don't have a matching =begin 1002 $self->poderror({ -line => $line, -file => $file, 1003 -severity => 'ERROR', 1004 -msg => "=end without =begin" }); 1005 } 1006 } 1007 elsif($cmd eq 'for') { 1008 unless($paragraph =~ /\s*(\S+)\s*/) { 1009 $self->poderror({ -line => $line, -file => $file, 1010 -severity => 'ERROR', 1011 -msg => "=for without formatter specification" }); 1012 } 1013 $arg = ''; # do not expand paragraph below 1014 } 1015 elsif($cmd =~ /^(pod|cut)$/) { 1016 # check for argument 1017 $arg = $self->interpolate_and_check($paragraph, $line,$file); 1018 if($arg && $arg =~ /(\S+)/) { 1019 $self->poderror({ -line => $line, -file => $file, 1020 -severity => 'ERROR', 1021 -msg => "Spurious text after =$cmd"}); 1022 } 1023 } 1024 $self->{_commands_in_head}++; 1025 ## Check the interior sequences in the command-text 1026 $self->interpolate_and_check($paragraph, $line,$file) 1027 unless(defined $arg); 1028 } 1029} 1030 1031sub _open_list 1032{ 1033 my ($self,$indent,$line,$file) = @_; 1034 my $list = Pod::List->new( 1035 -indent => $indent, 1036 -start => $line, 1037 -file => $file); 1038 unshift(@{$self->{_list_stack}}, $list); 1039 undef $self->{_list_item_contents}; 1040 $list; 1041} 1042 1043sub _close_list 1044{ 1045 my ($self,$line,$file) = @_; 1046 my $list = shift(@{$self->{_list_stack}}); 1047 if(defined $self->{_list_item_contents} && 1048 $self->{_list_item_contents} == 0) { 1049 $self->poderror({ -line => $line, -file => $file, 1050 -severity => 'WARNING', 1051 -msg => "previous =item has no contents" }); 1052 } 1053 undef $self->{_list_item_contents}; 1054 $list; 1055} 1056 1057# process a block of some text 1058sub interpolate_and_check { 1059 my ($self, $paragraph, $line, $file) = @_; 1060 ## Check the interior sequences in the command-text 1061 # and return the text 1062 $self->_check_ptree( 1063 $self->parse_text($paragraph,$line), $line, $file, ''); 1064} 1065 1066sub _check_ptree { 1067 my ($self,$ptree,$line,$file,$nestlist) = @_; 1068 local($_); 1069 my $text = ''; 1070 # process each node in the parse tree 1071 foreach(@$ptree) { 1072 # regular text chunk 1073 unless(ref) { 1074 # count the unescaped angle brackets 1075 # complain only when warning level is greater than 1 1076 if($self->{-warnings} && $self->{-warnings}>1) { 1077 my $count; 1078 if($count = tr/<>/<>/) { 1079 $self->poderror({ -line => $line, -file => $file, 1080 -severity => 'WARNING', 1081 -msg => "$count unescaped <> in paragraph" }); 1082 } 1083 } 1084 $text .= $_; 1085 next; 1086 } 1087 # have an interior sequence 1088 my $cmd = $_->cmd_name(); 1089 my $contents = $_->parse_tree(); 1090 ($file,$line) = $_->file_line(); 1091 # check for valid tag 1092 if (! $VALID_SEQUENCES{$cmd}) { 1093 $self->poderror({ -line => $line, -file => $file, 1094 -severity => 'ERROR', 1095 -msg => qq(Unknown interior-sequence '$cmd')}); 1096 # expand it anyway 1097 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); 1098 next; 1099 } 1100 if($nestlist =~ /$cmd/) { 1101 $self->poderror({ -line => $line, -file => $file, 1102 -severity => 'ERROR', 1103 -msg => "nested commands $cmd<...$cmd<...>...>"}); 1104 # _TODO_ should we add the contents anyway? 1105 # expand it anyway, see below 1106 } 1107 if($cmd eq 'E') { 1108 # preserve entities 1109 if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) { 1110 $self->poderror({ -line => $line, -file => $file, 1111 -severity => 'ERROR', 1112 -msg => "garbled entity " . $_->raw_text()}); 1113 next; 1114 } 1115 my $ent = $$contents[0]; 1116 my $val; 1117 if($ent =~ /^0x[0-9a-f]+$/i) { 1118 # hexadec entity 1119 $val = hex($ent); 1120 } 1121 elsif($ent =~ /^0\d+$/) { 1122 # octal 1123 $val = oct($ent); 1124 } 1125 elsif($ent =~ /^\d+$/) { 1126 # numeric entity 1127 $val = $ent; 1128 } 1129 if(defined $val) { 1130 if($val>0 && $val<256) { 1131 $text .= chr($val); 1132 } 1133 else { 1134 $self->poderror({ -line => $line, -file => $file, 1135 -severity => 'ERROR', 1136 -msg => "Entity number out of range " . $_->raw_text()}); 1137 } 1138 } 1139 elsif($ENTITIES{$ent}) { 1140 # known ISO entity 1141 $text .= $ENTITIES{$ent}; 1142 } 1143 else { 1144 $self->poderror({ -line => $line, -file => $file, 1145 -severity => 'WARNING', 1146 -msg => "Unknown entity " . $_->raw_text()}); 1147 $text .= "E<$ent>"; 1148 } 1149 } 1150 elsif($cmd eq 'L') { 1151 # try to parse the hyperlink 1152 my $link = Pod::Hyperlink->new($contents->raw_text()); 1153 unless(defined $link) { 1154 $self->poderror({ -line => $line, -file => $file, 1155 -severity => 'ERROR', 1156 -msg => "malformed link " . $_->raw_text() ." : $@"}); 1157 next; 1158 } 1159 $link->line($line); # remember line 1160 if($self->{-warnings}) { 1161 foreach my $w ($link->warning()) { 1162 $self->poderror({ -line => $line, -file => $file, 1163 -severity => 'WARNING', 1164 -msg => $w }); 1165 } 1166 } 1167 # check the link text 1168 $text .= $self->_check_ptree($self->parse_text($link->text(), 1169 $line), $line, $file, "$nestlist$cmd"); 1170 # remember link 1171 $self->hyperlink([$line,$link]); 1172 } 1173 elsif($cmd =~ /[BCFIS]/) { 1174 # add the guts 1175 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); 1176 } 1177 elsif($cmd eq 'Z') { 1178 if(length($contents->raw_text())) { 1179 $self->poderror({ -line => $line, -file => $file, 1180 -severity => 'ERROR', 1181 -msg => "Nonempty Z<>"}); 1182 } 1183 } 1184 elsif($cmd eq 'X') { 1185 my $idx = $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); 1186 if($idx =~ /^\s*$/s) { 1187 $self->poderror({ -line => $line, -file => $file, 1188 -severity => 'ERROR', 1189 -msg => "Empty X<>"}); 1190 } 1191 else { 1192 # remember this node 1193 $self->idx($idx); 1194 } 1195 } 1196 else { 1197 # not reached 1198 die "internal error"; 1199 } 1200 } 1201 $text; 1202} 1203 1204# process a block of verbatim text 1205sub verbatim { 1206 ## Nothing particular to check 1207 my ($self, $paragraph, $line_num, $pod_para) = @_; 1208 1209 $self->_preproc_par($paragraph); 1210 1211 if($self->{_current_head1} eq 'NAME') { 1212 my ($file, $line) = $pod_para->file_line; 1213 $self->poderror({ -line => $line, -file => $file, 1214 -severity => 'WARNING', 1215 -msg => 'Verbatim paragraph in NAME section' }); 1216 } 1217} 1218 1219# process a block of regular text 1220sub textblock { 1221 my ($self, $paragraph, $line_num, $pod_para) = @_; 1222 my ($file, $line) = $pod_para->file_line; 1223 1224 $self->_preproc_par($paragraph); 1225 1226 # skip this paragraph if in a =begin block 1227 unless($self->{_have_begin}) { 1228 my $block = $self->interpolate_and_check($paragraph, $line,$file); 1229 if($self->{_current_head1} eq 'NAME') { 1230 if($block =~ /^\s*(\S+?)\s*[,-]/) { 1231 # this is the canonical name 1232 $self->{-name} = $1 unless(defined $self->{-name}); 1233 } 1234 } 1235 } 1236} 1237 1238sub _preproc_par 1239{ 1240 my $self = shift; 1241 $_[0] =~ s/[\s\n]+$//; 1242 if($_[0]) { 1243 $self->{_commands_in_head}++; 1244 $self->{_list_item_contents}++ if(defined $self->{_list_item_contents}); 1245 if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) { 1246 $self->{_list_stack}->[0]->{_has_par} = 1; 1247 } 1248 } 1249} 1250 12511; 1252 1253__END__ 1254 1255=head1 AUTHOR 1256 1257Please report bugs using L<http://rt.cpan.org>. 1258 1259Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version), 1260Marek Rouchal E<lt>marekr@cpan.orgE<gt> 1261 1262Based on code for B<Pod::Text::pod2text()> written by 1263Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> 1264 1265=cut 1266 1267