1############################################################################# 2# Pod/Select.pm -- function to select portions of POD docs 3# 4# Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved. 5# This file is part of "PodParser". PodParser is free software; 6# you can redistribute it and/or modify it under the same terms 7# as Perl itself. 8############################################################################# 9 10package Pod::Select; 11 12use vars qw($VERSION); 13$VERSION = 1.13; ## Current version of this package 14require 5.005; ## requires this Perl version or later 15 16############################################################################# 17 18=head1 NAME 19 20Pod::Select, podselect() - extract selected sections of POD from input 21 22=head1 SYNOPSIS 23 24 use Pod::Select; 25 26 ## Select all the POD sections for each file in @filelist 27 ## and print the result on standard output. 28 podselect(@filelist); 29 30 ## Same as above, but write to tmp.out 31 podselect({-output => "tmp.out"}, @filelist): 32 33 ## Select from the given filelist, only those POD sections that are 34 ## within a 1st level section named any of: NAME, SYNOPSIS, OPTIONS. 35 podselect({-sections => ["NAME|SYNOPSIS", "OPTIONS"]}, @filelist): 36 37 ## Select the "DESCRIPTION" section of the PODs from STDIN and write 38 ## the result to STDERR. 39 podselect({-output => ">&STDERR", -sections => ["DESCRIPTION"]}, \*STDIN); 40 41or 42 43 use Pod::Select; 44 45 ## Create a parser object for selecting POD sections from the input 46 $parser = new Pod::Select(); 47 48 ## Select all the POD sections for each file in @filelist 49 ## and print the result to tmp.out. 50 $parser->parse_from_file("<&STDIN", "tmp.out"); 51 52 ## Select from the given filelist, only those POD sections that are 53 ## within a 1st level section named any of: NAME, SYNOPSIS, OPTIONS. 54 $parser->select("NAME|SYNOPSIS", "OPTIONS"); 55 for (@filelist) { $parser->parse_from_file($_); } 56 57 ## Select the "DESCRIPTION" and "SEE ALSO" sections of the PODs from 58 ## STDIN and write the result to STDERR. 59 $parser->select("DESCRIPTION"); 60 $parser->add_selection("SEE ALSO"); 61 $parser->parse_from_filehandle(\*STDIN, \*STDERR); 62 63=head1 REQUIRES 64 65perl5.005, Pod::Parser, Exporter, Carp 66 67=head1 EXPORTS 68 69podselect() 70 71=head1 DESCRIPTION 72 73B<podselect()> is a function which will extract specified sections of 74pod documentation from an input stream. This ability is provided by the 75B<Pod::Select> module which is a subclass of B<Pod::Parser>. 76B<Pod::Select> provides a method named B<select()> to specify the set of 77POD sections to select for processing/printing. B<podselect()> merely 78creates a B<Pod::Select> object and then invokes the B<podselect()> 79followed by B<parse_from_file()>. 80 81=head1 SECTION SPECIFICATIONS 82 83B<podselect()> and B<Pod::Select::select()> may be given one or more 84"section specifications" to restrict the text processed to only the 85desired set of sections and their corresponding subsections. A section 86specification is a string containing one or more Perl-style regular 87expressions separated by forward slashes ("/"). If you need to use a 88forward slash literally within a section title you can escape it with a 89backslash ("\/"). 90 91The formal syntax of a section specification is: 92 93=over 4 94 95=item * 96 97I<head1-title-regex>/I<head2-title-regex>/... 98 99=back 100 101Any omitted or empty regular expressions will default to ".*". 102Please note that each regular expression given is implicitly 103anchored by adding "^" and "$" to the beginning and end. Also, if a 104given regular expression starts with a "!" character, then the 105expression is I<negated> (so C<!foo> would match anything I<except> 106C<foo>). 107 108Some example section specifications follow. 109 110=over 4 111 112=item * 113 114Match the C<NAME> and C<SYNOPSIS> sections and all of their subsections: 115 116C<NAME|SYNOPSIS> 117 118=item * 119 120Match only the C<Question> and C<Answer> subsections of the C<DESCRIPTION> 121section: 122 123C<DESCRIPTION/Question|Answer> 124 125=item * 126 127Match the C<Comments> subsection of I<all> sections: 128 129C</Comments> 130 131=item * 132 133Match all subsections of C<DESCRIPTION> I<except> for C<Comments>: 134 135C<DESCRIPTION/!Comments> 136 137=item * 138 139Match the C<DESCRIPTION> section but do I<not> match any of its subsections: 140 141C<DESCRIPTION/!.+> 142 143=item * 144 145Match all top level sections but none of their subsections: 146 147C</!.+> 148 149=back 150 151=begin _NOT_IMPLEMENTED_ 152 153=head1 RANGE SPECIFICATIONS 154 155B<podselect()> and B<Pod::Select::select()> may be given one or more 156"range specifications" to restrict the text processed to only the 157desired ranges of paragraphs in the desired set of sections. A range 158specification is a string containing a single Perl-style regular 159expression (a regex), or else two Perl-style regular expressions 160(regexs) separated by a ".." (Perl's "range" operator is ".."). 161The regexs in a range specification are delimited by forward slashes 162("/"). If you need to use a forward slash literally within a regex you 163can escape it with a backslash ("\/"). 164 165The formal syntax of a range specification is: 166 167=over 4 168 169=item * 170 171/I<start-range-regex>/[../I<end-range-regex>/] 172 173=back 174 175Where each the item inside square brackets (the ".." followed by the 176end-range-regex) is optional. Each "range-regex" is of the form: 177 178 =cmd-expr text-expr 179 180Where I<cmd-expr> is intended to match the name of one or more POD 181commands, and I<text-expr> is intended to match the paragraph text for 182the command. If a range-regex is supposed to match a POD command, then 183the first character of the regex (the one after the initial '/') 184absolutely I<must> be a single '=' character; it may not be anything 185else (not even a regex meta-character) if it is supposed to match 186against the name of a POD command. 187 188If no I<=cmd-expr> is given then the text-expr will be matched against 189plain textblocks unless it is preceded by a space, in which case it is 190matched against verbatim text-blocks. If no I<text-expr> is given then 191only the command-portion of the paragraph is matched against. 192 193Note that these two expressions are each implicitly anchored. This 194means that when matching against the command-name, there will be an 195implicit '^' and '$' around the given I<=cmd-expr>; and when matching 196against the paragraph text there will be an implicit '\A' and '\Z' 197around the given I<text-expr>. 198 199Unlike with section-specs, the '!' character does I<not> have any special 200meaning (negation or otherwise) at the beginning of a range-spec! 201 202Some example range specifications follow. 203 204=over 4 205 206=item 207Match all C<=for html> paragraphs: 208 209C</=for html/> 210 211=item 212Match all paragraphs between C<=begin html> and C<=end html> 213(note that this will I<not> work correctly if such sections 214are nested): 215 216C</=begin html/../=end html/> 217 218=item 219Match all paragraphs between the given C<=item> name until the end of the 220current section: 221 222C</=item mine/../=head\d/> 223 224=item 225Match all paragraphs between the given C<=item> until the next item, or 226until the end of the itemized list (note that this will I<not> work as 227desired if the item contains an itemized list nested within it): 228 229C</=item mine/../=(item|back)/> 230 231=back 232 233=end _NOT_IMPLEMENTED_ 234 235=cut 236 237############################################################################# 238 239use strict; 240#use diagnostics; 241use Carp; 242use Pod::Parser 1.04; 243use vars qw(@ISA @EXPORT $MAX_HEADING_LEVEL); 244 245@ISA = qw(Pod::Parser); 246@EXPORT = qw(&podselect); 247 248## Maximum number of heading levels supported for '=headN' directives 249*MAX_HEADING_LEVEL = \3; 250 251############################################################################# 252 253=head1 OBJECT METHODS 254 255The following methods are provided in this module. Each one takes a 256reference to the object itself as an implicit first parameter. 257 258=cut 259 260##--------------------------------------------------------------------------- 261 262## =begin _PRIVATE_ 263## 264## =head1 B<_init_headings()> 265## 266## Initialize the current set of active section headings. 267## 268## =cut 269## 270## =end _PRIVATE_ 271 272use vars qw(%myData @section_headings); 273 274sub _init_headings { 275 my $self = shift; 276 local *myData = $self; 277 278 ## Initialize current section heading titles if necessary 279 unless (defined $myData{_SECTION_HEADINGS}) { 280 local *section_headings = $myData{_SECTION_HEADINGS} = []; 281 for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) { 282 $section_headings[$i] = ''; 283 } 284 } 285} 286 287##--------------------------------------------------------------------------- 288 289=head1 B<curr_headings()> 290 291 ($head1, $head2, $head3, ...) = $parser->curr_headings(); 292 $head1 = $parser->curr_headings(1); 293 294This method returns a list of the currently active section headings and 295subheadings in the document being parsed. The list of headings returned 296corresponds to the most recently parsed paragraph of the input. 297 298If an argument is given, it must correspond to the desired section 299heading number, in which case only the specified section heading is 300returned. If there is no current section heading at the specified 301level, then C<undef> is returned. 302 303=cut 304 305sub curr_headings { 306 my $self = shift; 307 $self->_init_headings() unless (defined $self->{_SECTION_HEADINGS}); 308 my @headings = @{ $self->{_SECTION_HEADINGS} }; 309 return (@_ > 0 and $_[0] =~ /^\d+$/) ? $headings[$_[0] - 1] : @headings; 310} 311 312##--------------------------------------------------------------------------- 313 314=head1 B<select()> 315 316 $parser->select($section_spec1,$section_spec2,...); 317 318This method is used to select the particular sections and subsections of 319POD documentation that are to be printed and/or processed. The existing 320set of selected sections is I<replaced> with the given set of sections. 321See B<add_selection()> for adding to the current set of selected 322sections. 323 324Each of the C<$section_spec> arguments should be a section specification 325as described in L<"SECTION SPECIFICATIONS">. The section specifications 326are parsed by this method and the resulting regular expressions are 327stored in the invoking object. 328 329If no C<$section_spec> arguments are given, then the existing set of 330selected sections is cleared out (which means C<all> sections will be 331processed). 332 333This method should I<not> normally be overridden by subclasses. 334 335=cut 336 337use vars qw(@selected_sections); 338 339sub select { 340 my $self = shift; 341 my @sections = @_; 342 local *myData = $self; 343 local $_; 344 345### NEED TO DISCERN A SECTION-SPEC FROM A RANGE-SPEC (look for m{^/.+/$}?) 346 347 ##--------------------------------------------------------------------- 348 ## The following is a blatant hack for backward compatibility, and for 349 ## implementing add_selection(). If the *first* *argument* is the 350 ## string "+", then the remaining section specifications are *added* 351 ## to the current set of selections; otherwise the given section 352 ## specifications will *replace* the current set of selections. 353 ## 354 ## This should probably be fixed someday, but for the present time, 355 ## it seems incredibly unlikely that "+" would ever correspond to 356 ## a legitimate section heading 357 ##--------------------------------------------------------------------- 358 my $add = ($sections[0] eq "+") ? shift(@sections) : ""; 359 360 ## Reset the set of sections to use 361 unless (@sections > 0) { 362 delete $myData{_SELECTED_SECTIONS} unless ($add); 363 return; 364 } 365 $myData{_SELECTED_SECTIONS} = [] 366 unless ($add && exists $myData{_SELECTED_SECTIONS}); 367 local *selected_sections = $myData{_SELECTED_SECTIONS}; 368 369 ## Compile each spec 370 my $spec; 371 for $spec (@sections) { 372 if ( defined($_ = &_compile_section_spec($spec)) ) { 373 ## Store them in our sections array 374 push(@selected_sections, $_); 375 } 376 else { 377 carp "Ignoring section spec \"$spec\"!\n"; 378 } 379 } 380} 381 382##--------------------------------------------------------------------------- 383 384=head1 B<add_selection()> 385 386 $parser->add_selection($section_spec1,$section_spec2,...); 387 388This method is used to add to the currently selected sections and 389subsections of POD documentation that are to be printed and/or 390processed. See <select()> for replacing the currently selected sections. 391 392Each of the C<$section_spec> arguments should be a section specification 393as described in L<"SECTION SPECIFICATIONS">. The section specifications 394are parsed by this method and the resulting regular expressions are 395stored in the invoking object. 396 397This method should I<not> normally be overridden by subclasses. 398 399=cut 400 401sub add_selection { 402 my $self = shift; 403 $self->select("+", @_); 404} 405 406##--------------------------------------------------------------------------- 407 408=head1 B<clear_selections()> 409 410 $parser->clear_selections(); 411 412This method takes no arguments, it has the exact same effect as invoking 413<select()> with no arguments. 414 415=cut 416 417sub clear_selections { 418 my $self = shift; 419 $self->select(); 420} 421 422##--------------------------------------------------------------------------- 423 424=head1 B<match_section()> 425 426 $boolean = $parser->match_section($heading1,$heading2,...); 427 428Returns a value of true if the given section and subsection heading 429titles match any of the currently selected section specifications in 430effect from prior calls to B<select()> and B<add_selection()> (or if 431there are no explictly selected/deselected sections). 432 433The arguments C<$heading1>, C<$heading2>, etc. are the heading titles of 434the corresponding sections, subsections, etc. to try and match. If 435C<$headingN> is omitted then it defaults to the current corresponding 436section heading title in the input. 437 438This method should I<not> normally be overridden by subclasses. 439 440=cut 441 442sub match_section { 443 my $self = shift; 444 my (@headings) = @_; 445 local *myData = $self; 446 447 ## Return true if no restrictions were explicitly specified 448 my $selections = (exists $myData{_SELECTED_SECTIONS}) 449 ? $myData{_SELECTED_SECTIONS} : undef; 450 return 1 unless ((defined $selections) && (@{$selections} > 0)); 451 452 ## Default any unspecified sections to the current one 453 my @current_headings = $self->curr_headings(); 454 for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) { 455 (defined $headings[$i]) or $headings[$i] = $current_headings[$i]; 456 } 457 458 ## Look for a match against the specified section expressions 459 my ($section_spec, $regex, $negated, $match); 460 for $section_spec ( @{$selections} ) { 461 ##------------------------------------------------------ 462 ## Each portion of this spec must match in order for 463 ## the spec to be matched. So we will start with a 464 ## match-value of 'true' and logically 'and' it with 465 ## the results of matching a given element of the spec. 466 ##------------------------------------------------------ 467 $match = 1; 468 for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) { 469 $regex = $section_spec->[$i]; 470 $negated = ($regex =~ s/^\!//); 471 $match &= ($negated ? ($headings[$i] !~ /${regex}/) 472 : ($headings[$i] =~ /${regex}/)); 473 last unless ($match); 474 } 475 return 1 if ($match); 476 } 477 return 0; ## no match 478} 479 480##--------------------------------------------------------------------------- 481 482=head1 B<is_selected()> 483 484 $boolean = $parser->is_selected($paragraph); 485 486This method is used to determine if the block of text given in 487C<$paragraph> falls within the currently selected set of POD sections 488and subsections to be printed or processed. This method is also 489responsible for keeping track of the current input section and 490subsections. It is assumed that C<$paragraph> is the most recently read 491(but not yet processed) input paragraph. 492 493The value returned will be true if the C<$paragraph> and the rest of the 494text in the same section as C<$paragraph> should be selected (included) 495for processing; otherwise a false value is returned. 496 497=cut 498 499sub is_selected { 500 my ($self, $paragraph) = @_; 501 local $_; 502 local *myData = $self; 503 504 $self->_init_headings() unless (defined $myData{_SECTION_HEADINGS}); 505 506 ## Keep track of current sections levels and headings 507 $_ = $paragraph; 508 if (/^=((?:sub)*)(?:head(?:ing)?|sec(?:tion)?)(\d*)\s+(.*)\s*$/) { 509 ## This is a section heading command 510 my ($level, $heading) = ($2, $3); 511 $level = 1 + (length($1) / 3) if ((! length $level) || (length $1)); 512 ## Reset the current section heading at this level 513 $myData{_SECTION_HEADINGS}->[$level - 1] = $heading; 514 ## Reset subsection headings of this one to empty 515 for (my $i = $level; $i < $MAX_HEADING_LEVEL; ++$i) { 516 $myData{_SECTION_HEADINGS}->[$i] = ''; 517 } 518 } 519 520 return $self->match_section(); 521} 522 523############################################################################# 524 525=head1 EXPORTED FUNCTIONS 526 527The following functions are exported by this module. Please note that 528these are functions (not methods) and therefore C<do not> take an 529implicit first argument. 530 531=cut 532 533##--------------------------------------------------------------------------- 534 535=head1 B<podselect()> 536 537 podselect(\%options,@filelist); 538 539B<podselect> will print the raw (untranslated) POD paragraphs of all 540POD sections in the given input files specified by C<@filelist> 541according to the given options. 542 543If any argument to B<podselect> is a reference to a hash 544(associative array) then the values with the following keys are 545processed as follows: 546 547=over 4 548 549=item B<-output> 550 551A string corresponding to the desired output file (or ">&STDOUT" 552or ">&STDERR"). The default is to use standard output. 553 554=item B<-sections> 555 556A reference to an array of sections specifications (as described in 557L<"SECTION SPECIFICATIONS">) which indicate the desired set of POD 558sections and subsections to be selected from input. If no section 559specifications are given, then all sections of the PODs are used. 560 561=begin _NOT_IMPLEMENTED_ 562 563=item B<-ranges> 564 565A reference to an array of range specifications (as described in 566L<"RANGE SPECIFICATIONS">) which indicate the desired range of POD 567paragraphs to be selected from the desired input sections. If no range 568specifications are given, then all paragraphs of the desired sections 569are used. 570 571=end _NOT_IMPLEMENTED_ 572 573=back 574 575All other arguments should correspond to the names of input files 576containing POD sections. A file name of "-" or "<&STDIN" will 577be interpeted to mean standard input (which is the default if no 578filenames are given). 579 580=cut 581 582sub podselect { 583 my(@argv) = @_; 584 my %defaults = (); 585 my $pod_parser = new Pod::Select(%defaults); 586 my $num_inputs = 0; 587 my $output = ">&STDOUT"; 588 my %opts = (); 589 local $_; 590 for (@argv) { 591 if (ref($_)) { 592 next unless (ref($_) eq 'HASH'); 593 %opts = (%defaults, %{$_}); 594 595 ##------------------------------------------------------------- 596 ## Need this for backward compatibility since we formerly used 597 ## options that were all uppercase words rather than ones that 598 ## looked like Unix command-line options. 599 ## to be uppercase keywords) 600 ##------------------------------------------------------------- 601 %opts = map { 602 my ($key, $val) = (lc $_, $opts{$_}); 603 $key =~ s/^(?=\w)/-/; 604 $key =~ /^-se[cl]/ and $key = '-sections'; 605 #! $key eq '-range' and $key .= 's'; 606 ($key => $val); 607 } (keys %opts); 608 609 ## Process the options 610 (exists $opts{'-output'}) and $output = $opts{'-output'}; 611 612 ## Select the desired sections 613 $pod_parser->select(@{ $opts{'-sections'} }) 614 if ( (defined $opts{'-sections'}) 615 && ((ref $opts{'-sections'}) eq 'ARRAY') ); 616 617 #! ## Select the desired paragraph ranges 618 #! $pod_parser->select(@{ $opts{'-ranges'} }) 619 #! if ( (defined $opts{'-ranges'}) 620 #! && ((ref $opts{'-ranges'}) eq 'ARRAY') ); 621 } 622 else { 623 $pod_parser->parse_from_file($_, $output); 624 ++$num_inputs; 625 } 626 } 627 $pod_parser->parse_from_file("-") unless ($num_inputs > 0); 628} 629 630############################################################################# 631 632=head1 PRIVATE METHODS AND DATA 633 634B<Pod::Select> makes uses a number of internal methods and data fields 635which clients should not need to see or use. For the sake of avoiding 636name collisions with client data and methods, these methods and fields 637are briefly discussed here. Determined hackers may obtain further 638information about them by reading the B<Pod::Select> source code. 639 640Private data fields are stored in the hash-object whose reference is 641returned by the B<new()> constructor for this class. The names of all 642private methods and data-fields used by B<Pod::Select> begin with a 643prefix of "_" and match the regular expression C</^_\w+$/>. 644 645=cut 646 647##--------------------------------------------------------------------------- 648 649=begin _PRIVATE_ 650 651=head1 B<_compile_section_spec()> 652 653 $listref = $parser->_compile_section_spec($section_spec); 654 655This function (note it is a function and I<not> a method) takes a 656section specification (as described in L<"SECTION SPECIFICATIONS">) 657given in C<$section_sepc>, and compiles it into a list of regular 658expressions. If C<$section_spec> has no syntax errors, then a reference 659to the list (array) of corresponding regular expressions is returned; 660otherwise C<undef> is returned and an error message is printed (using 661B<carp>) for each invalid regex. 662 663=end _PRIVATE_ 664 665=cut 666 667sub _compile_section_spec { 668 my ($section_spec) = @_; 669 my (@regexs, $negated); 670 671 ## Compile the spec into a list of regexs 672 local $_ = $section_spec; 673 s|\\\\|\001|g; ## handle escaped backward slashes 674 s|\\/|\002|g; ## handle escaped forward slashes 675 676 ## Parse the regexs for the heading titles 677 @regexs = split('/', $_, $MAX_HEADING_LEVEL); 678 679 ## Set default regex for ommitted levels 680 for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) { 681 $regexs[$i] = '.*' unless ((defined $regexs[$i]) 682 && (length $regexs[$i])); 683 } 684 ## Modify the regexs as needed and validate their syntax 685 my $bad_regexs = 0; 686 for (@regexs) { 687 $_ .= '.+' if ($_ eq '!'); 688 s|\001|\\\\|g; ## restore escaped backward slashes 689 s|\002|\\/|g; ## restore escaped forward slashes 690 $negated = s/^\!//; ## check for negation 691 eval "/$_/"; ## check regex syntax 692 if ($@) { 693 ++$bad_regexs; 694 carp "Bad regular expression /$_/ in \"$section_spec\": $@\n"; 695 } 696 else { 697 ## Add the forward and rear anchors (and put the negator back) 698 $_ = '^' . $_ unless (/^\^/); 699 $_ = $_ . '$' unless (/\$$/); 700 $_ = '!' . $_ if ($negated); 701 } 702 } 703 return (! $bad_regexs) ? [ @regexs ] : undef; 704} 705 706##--------------------------------------------------------------------------- 707 708=begin _PRIVATE_ 709 710=head2 $self->{_SECTION_HEADINGS} 711 712A reference to an array of the current section heading titles for each 713heading level (note that the first heading level title is at index 0). 714 715=end _PRIVATE_ 716 717=cut 718 719##--------------------------------------------------------------------------- 720 721=begin _PRIVATE_ 722 723=head2 $self->{_SELECTED_SECTIONS} 724 725A reference to an array of references to arrays. Each subarray is a list 726of anchored regular expressions (preceded by a "!" if the expression is to 727be negated). The index of the expression in the subarray should correspond 728to the index of the heading title in C<$self-E<gt>{_SECTION_HEADINGS}> 729that it is to be matched against. 730 731=end _PRIVATE_ 732 733=cut 734 735############################################################################# 736 737=head1 SEE ALSO 738 739L<Pod::Parser> 740 741=head1 AUTHOR 742 743Please report bugs using L<http://rt.cpan.org>. 744 745Brad Appleton E<lt>bradapp@enteract.comE<gt> 746 747Based on code for B<pod2text> written by 748Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> 749 750=cut 751 7521; 753 754