1############################################################################# 2# Pod/ParseUtils.pm -- helpers for POD parsing and conversion 3# 4# Copyright (C) 1999-2000 by Marek Rouchal. All rights reserved. 5# This file is part of "PodParser". PodParser is free software; 6# you can redistribute it and/or modify it under the same terms 7# as Perl itself. 8############################################################################# 9 10package Pod::ParseUtils; 11 12use vars qw($VERSION); 13$VERSION = 0.30; ## Current version of this package 14require 5.005; ## requires this Perl version or later 15 16=head1 NAME 17 18Pod::ParseUtils - helpers for POD parsing and conversion 19 20=head1 SYNOPSIS 21 22 use Pod::ParseUtils; 23 24 my $list = new Pod::List; 25 my $link = Pod::Hyperlink->new('Pod::Parser'); 26 27=head1 DESCRIPTION 28 29B<Pod::ParseUtils> contains a few object-oriented helper packages for 30POD parsing and processing (i.e. in POD formatters and translators). 31 32=cut 33 34#----------------------------------------------------------------------------- 35# Pod::List 36# 37# class to hold POD list info (=over, =item, =back) 38#----------------------------------------------------------------------------- 39 40package Pod::List; 41 42use Carp; 43 44=head2 Pod::List 45 46B<Pod::List> can be used to hold information about POD lists 47(written as =over ... =item ... =back) for further processing. 48The following methods are available: 49 50=over 4 51 52=item Pod::List-E<gt>new() 53 54Create a new list object. Properties may be specified through a hash 55reference like this: 56 57 my $list = Pod::List->new({ -start => $., -indent => 4 }); 58 59See the individual methods/properties for details. 60 61=cut 62 63sub new { 64 my $this = shift; 65 my $class = ref($this) || $this; 66 my %params = @_; 67 my $self = {%params}; 68 bless $self, $class; 69 $self->initialize(); 70 return $self; 71} 72 73sub initialize { 74 my $self = shift; 75 $self->{-file} ||= 'unknown'; 76 $self->{-start} ||= 'unknown'; 77 $self->{-indent} ||= 4; # perlpod: "should be the default" 78 $self->{_items} = []; 79 $self->{-type} ||= ''; 80} 81 82=item $list-E<gt>file() 83 84Without argument, retrieves the file name the list is in. This must 85have been set before by either specifying B<-file> in the B<new()> 86method or by calling the B<file()> method with a scalar argument. 87 88=cut 89 90# The POD file name the list appears in 91sub file { 92 return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file}; 93} 94 95=item $list-E<gt>start() 96 97Without argument, retrieves the line number where the list started. 98This must have been set before by either specifying B<-start> in the 99B<new()> method or by calling the B<start()> method with a scalar 100argument. 101 102=cut 103 104# The line in the file the node appears 105sub start { 106 return (@_ > 1) ? ($_[0]->{-start} = $_[1]) : $_[0]->{-start}; 107} 108 109=item $list-E<gt>indent() 110 111Without argument, retrieves the indent level of the list as specified 112in C<=over n>. This must have been set before by either specifying 113B<-indent> in the B<new()> method or by calling the B<indent()> method 114with a scalar argument. 115 116=cut 117 118# indent level 119sub indent { 120 return (@_ > 1) ? ($_[0]->{-indent} = $_[1]) : $_[0]->{-indent}; 121} 122 123=item $list-E<gt>type() 124 125Without argument, retrieves the list type, which can be an arbitrary value, 126e.g. C<OL>, C<UL>, ... when thinking the HTML way. 127This must have been set before by either specifying 128B<-type> in the B<new()> method or by calling the B<type()> method 129with a scalar argument. 130 131=cut 132 133# The type of the list (UL, OL, ...) 134sub type { 135 return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type}; 136} 137 138=item $list-E<gt>rx() 139 140Without argument, retrieves a regular expression for simplifying the 141individual item strings once the list type has been determined. Usage: 142E.g. when converting to HTML, one might strip the leading number in 143an ordered list as C<E<lt>OLE<gt>> already prints numbers itself. 144This must have been set before by either specifying 145B<-rx> in the B<new()> method or by calling the B<rx()> method 146with a scalar argument. 147 148=cut 149 150# The regular expression to simplify the items 151sub rx { 152 return (@_ > 1) ? ($_[0]->{-rx} = $_[1]) : $_[0]->{-rx}; 153} 154 155=item $list-E<gt>item() 156 157Without argument, retrieves the array of the items in this list. 158The items may be represented by any scalar. 159If an argument has been given, it is pushed on the list of items. 160 161=cut 162 163# The individual =items of this list 164sub item { 165 my ($self,$item) = @_; 166 if(defined $item) { 167 push(@{$self->{_items}}, $item); 168 return $item; 169 } 170 else { 171 return @{$self->{_items}}; 172 } 173} 174 175=item $list-E<gt>parent() 176 177Without argument, retrieves information about the parent holding this 178list, which is represented as an arbitrary scalar. 179This must have been set before by either specifying 180B<-parent> in the B<new()> method or by calling the B<parent()> method 181with a scalar argument. 182 183=cut 184 185# possibility for parsers/translators to store information about the 186# lists's parent object 187sub parent { 188 return (@_ > 1) ? ($_[0]->{-parent} = $_[1]) : $_[0]->{-parent}; 189} 190 191=item $list-E<gt>tag() 192 193Without argument, retrieves information about the list tag, which can be 194any scalar. 195This must have been set before by either specifying 196B<-tag> in the B<new()> method or by calling the B<tag()> method 197with a scalar argument. 198 199=back 200 201=cut 202 203# possibility for parsers/translators to store information about the 204# list's object 205sub tag { 206 return (@_ > 1) ? ($_[0]->{-tag} = $_[1]) : $_[0]->{-tag}; 207} 208 209#----------------------------------------------------------------------------- 210# Pod::Hyperlink 211# 212# class to manipulate POD hyperlinks (L<>) 213#----------------------------------------------------------------------------- 214 215package Pod::Hyperlink; 216 217=head2 Pod::Hyperlink 218 219B<Pod::Hyperlink> is a class for manipulation of POD hyperlinks. Usage: 220 221 my $link = Pod::Hyperlink->new('alternative text|page/"section in page"'); 222 223The B<Pod::Hyperlink> class is mainly designed to parse the contents of the 224C<LE<lt>...E<gt>> sequence, providing a simple interface for accessing the 225different parts of a POD hyperlink for further processing. It can also be 226used to construct hyperlinks. 227 228=over 4 229 230=item Pod::Hyperlink-E<gt>new() 231 232The B<new()> method can either be passed a set of key/value pairs or a single 233scalar value, namely the contents of a C<LE<lt>...E<gt>> sequence. An object 234of the class C<Pod::Hyperlink> is returned. The value C<undef> indicates a 235failure, the error message is stored in C<$@>. 236 237=cut 238 239use Carp; 240 241sub new { 242 my $this = shift; 243 my $class = ref($this) || $this; 244 my $self = +{}; 245 bless $self, $class; 246 $self->initialize(); 247 if(defined $_[0]) { 248 if(ref($_[0])) { 249 # called with a list of parameters 250 %$self = %{$_[0]}; 251 $self->_construct_text(); 252 } 253 else { 254 # called with L<> contents 255 return undef unless($self->parse($_[0])); 256 } 257 } 258 return $self; 259} 260 261sub initialize { 262 my $self = shift; 263 $self->{-line} ||= 'undef'; 264 $self->{-file} ||= 'undef'; 265 $self->{-page} ||= ''; 266 $self->{-node} ||= ''; 267 $self->{-alttext} ||= ''; 268 $self->{-type} ||= 'undef'; 269 $self->{_warnings} = []; 270} 271 272=item $link-E<gt>parse($string) 273 274This method can be used to (re)parse a (new) hyperlink, i.e. the contents 275of a C<LE<lt>...E<gt>> sequence. The result is stored in the current object. 276Warnings are stored in the B<warnings> property. 277E.g. sections like C<LE<lt>open(2)E<gt>> are deprecated, as they do not point 278to Perl documents. C<LE<lt>DBI::foo(3p)E<gt>> is wrong as well, the manpage 279section can simply be dropped. 280 281=cut 282 283sub parse { 284 my $self = shift; 285 local($_) = $_[0]; 286 # syntax check the link and extract destination 287 my ($alttext,$page,$node,$type,$quoted) = (undef,'','','',0); 288 289 $self->{_warnings} = []; 290 291 # collapse newlines with whitespace 292 s/\s*\n+\s*/ /g; 293 294 # strip leading/trailing whitespace 295 if(s/^[\s\n]+//) { 296 $self->warning("ignoring leading whitespace in link"); 297 } 298 if(s/[\s\n]+$//) { 299 $self->warning("ignoring trailing whitespace in link"); 300 } 301 unless(length($_)) { 302 _invalid_link("empty link"); 303 return undef; 304 } 305 306 ## Check for different possibilities. This is tedious and error-prone 307 # we match all possibilities (alttext, page, section/item) 308 #warn "DEBUG: link=$_\n"; 309 310 # only page 311 # problem: a lot of people use (), or (1) or the like to indicate 312 # man page sections. But this collides with L<func()> that is supposed 313 # to point to an internal funtion... 314 my $page_rx = '[\w.-]+(?:::[\w.-]+)*(?:[(](?:\d\w*|)[)]|)'; 315 # page name only 316 if(m!^($page_rx)$!o) { 317 $page = $1; 318 $type = 'page'; 319 } 320 # alttext, page and "section" 321 elsif(m!^(.*?)\s*[|]\s*($page_rx)\s*/\s*"(.+)"$!o) { 322 ($alttext, $page, $node) = ($1, $2, $3); 323 $type = 'section'; 324 $quoted = 1; #... therefore | and / are allowed 325 } 326 # alttext and page 327 elsif(m!^(.*?)\s*[|]\s*($page_rx)$!o) { 328 ($alttext, $page) = ($1, $2); 329 $type = 'page'; 330 } 331 # alttext and "section" 332 elsif(m!^(.*?)\s*[|]\s*(?:/\s*|)"(.+)"$!) { 333 ($alttext, $node) = ($1,$2); 334 $type = 'section'; 335 $quoted = 1; 336 } 337 # page and "section" 338 elsif(m!^($page_rx)\s*/\s*"(.+)"$!o) { 339 ($page, $node) = ($1, $2); 340 $type = 'section'; 341 $quoted = 1; 342 } 343 # page and item 344 elsif(m!^($page_rx)\s*/\s*(.+)$!o) { 345 ($page, $node) = ($1, $2); 346 $type = 'item'; 347 } 348 # only "section" 349 elsif(m!^/?"(.+)"$!) { 350 $node = $1; 351 $type = 'section'; 352 $quoted = 1; 353 } 354 # only item 355 elsif(m!^\s*/(.+)$!) { 356 $node = $1; 357 $type = 'item'; 358 } 359 # non-standard: Hyperlink 360 elsif(m!^((?:http|ftp|mailto|news):.+)$!i) { 361 $node = $1; 362 $type = 'hyperlink'; 363 } 364 # alttext, page and item 365 elsif(m!^(.*?)\s*[|]\s*($page_rx)\s*/\s*(.+)$!o) { 366 ($alttext, $page, $node) = ($1, $2, $3); 367 $type = 'item'; 368 } 369 # alttext and item 370 elsif(m!^(.*?)\s*[|]\s*/(.+)$!) { 371 ($alttext, $node) = ($1,$2); 372 } 373 # nonstandard: alttext and hyperlink 374 elsif(m!^(.*?)\s*[|]\s*((?:http|ftp|mailto|news):.+)$!) { 375 ($alttext, $node) = ($1,$2); 376 $type = 'hyperlink'; 377 } 378 # must be an item or a "malformed" section (without "") 379 else { 380 $node = $_; 381 $type = 'item'; 382 } 383 # collapse whitespace in nodes 384 $node =~ s/\s+/ /gs; 385 386 # empty alternative text expands to node name 387 if(defined $alttext) { 388 if(!length($alttext)) { 389 $alttext = $node | $page; 390 } 391 } 392 else { 393 $alttext = ''; 394 } 395 396 if($page =~ /[(]\w*[)]$/) { 397 $self->warning("(section) in '$page' deprecated"); 398 } 399 if(!$quoted && $node =~ m:[|/]:) { 400 $self->warning("node '$node' contains non-escaped | or /"); 401 } 402 if($alttext =~ m:[|/]:) { 403 $self->warning("alternative text '$node' contains non-escaped | or /"); 404 } 405 $self->{-page} = $page; 406 $self->{-node} = $node; 407 $self->{-alttext} = $alttext; 408 #warn "DEBUG: page=$page section=$section item=$item alttext=$alttext\n"; 409 $self->{-type} = $type; 410 $self->_construct_text(); 411 1; 412} 413 414sub _construct_text { 415 my $self = shift; 416 my $alttext = $self->alttext(); 417 my $type = $self->type(); 418 my $section = $self->node(); 419 my $page = $self->page(); 420 my $page_ext = ''; 421 $page =~ s/([(]\w*[)])$// && ($page_ext = $1); 422 if($alttext) { 423 $self->{_text} = $alttext; 424 } 425 elsif($type eq 'hyperlink') { 426 $self->{_text} = $section; 427 } 428 else { 429 $self->{_text} = ($section || '') . 430 (($page && $section) ? ' in ' : '') . 431 "$page$page_ext"; 432 } 433 # for being marked up later 434 # use the non-standard markers P<> and Q<>, so that the resulting 435 # text can be parsed by the translators. It's their job to put 436 # the correct hypertext around the linktext 437 if($alttext) { 438 $self->{_markup} = "Q<$alttext>"; 439 } 440 elsif($type eq 'hyperlink') { 441 $self->{_markup} = "Q<$section>"; 442 } 443 else { 444 $self->{_markup} = (!$section ? '' : "Q<$section>") . 445 ($page ? ($section ? ' in ':'') . "P<$page>$page_ext" : ''); 446 } 447} 448 449=item $link-E<gt>markup($string) 450 451Set/retrieve the textual value of the link. This string contains special 452markers C<PE<lt>E<gt>> and C<QE<lt>E<gt>> that should be expanded by the 453translator's interior sequence expansion engine to the 454formatter-specific code to highlight/activate the hyperlink. The details 455have to be implemented in the translator. 456 457=cut 458 459#' retrieve/set markuped text 460sub markup { 461 return (@_ > 1) ? ($_[0]->{_markup} = $_[1]) : $_[0]->{_markup}; 462} 463 464=item $link-E<gt>text() 465 466This method returns the textual representation of the hyperlink as above, 467but without markers (read only). Depending on the link type this is one of 468the following alternatives (the + and * denote the portions of the text 469that are marked up): 470 471 +perl+ L<perl> 472 *$|* in +perlvar+ L<perlvar/$|> 473 *OPTIONS* in +perldoc+ L<perldoc/"OPTIONS"> 474 *DESCRIPTION* L<"DESCRIPTION"> 475 476=cut 477 478# The complete link's text 479sub text { 480 $_[0]->{_text}; 481} 482 483=item $link-E<gt>warning() 484 485After parsing, this method returns any warnings encountered during the 486parsing process. 487 488=cut 489 490# Set/retrieve warnings 491sub warning { 492 my $self = shift; 493 if(@_) { 494 push(@{$self->{_warnings}}, @_); 495 return @_; 496 } 497 return @{$self->{_warnings}}; 498} 499 500=item $link-E<gt>file() 501 502=item $link-E<gt>line() 503 504Just simple slots for storing information about the line and the file 505the link was encountered in. Has to be filled in manually. 506 507=cut 508 509# The line in the file the link appears 510sub line { 511 return (@_ > 1) ? ($_[0]->{-line} = $_[1]) : $_[0]->{-line}; 512} 513 514# The POD file name the link appears in 515sub file { 516 return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file}; 517} 518 519=item $link-E<gt>page() 520 521This method sets or returns the POD page this link points to. 522 523=cut 524 525# The POD page the link appears on 526sub page { 527 if (@_ > 1) { 528 $_[0]->{-page} = $_[1]; 529 $_[0]->_construct_text(); 530 } 531 $_[0]->{-page}; 532} 533 534=item $link-E<gt>node() 535 536As above, but the destination node text of the link. 537 538=cut 539 540# The link destination 541sub node { 542 if (@_ > 1) { 543 $_[0]->{-node} = $_[1]; 544 $_[0]->_construct_text(); 545 } 546 $_[0]->{-node}; 547} 548 549=item $link-E<gt>alttext() 550 551Sets or returns an alternative text specified in the link. 552 553=cut 554 555# Potential alternative text 556sub alttext { 557 if (@_ > 1) { 558 $_[0]->{-alttext} = $_[1]; 559 $_[0]->_construct_text(); 560 } 561 $_[0]->{-alttext}; 562} 563 564=item $link-E<gt>type() 565 566The node type, either C<section> or C<item>. As an unofficial type, 567there is also C<hyperlink>, derived from e.g. C<LE<lt>http://perl.comE<gt>> 568 569=cut 570 571# The type: item or headn 572sub type { 573 return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type}; 574} 575 576=item $link-E<gt>link() 577 578Returns the link as contents of C<LE<lt>E<gt>>. Reciprocal to B<parse()>. 579 580=back 581 582=cut 583 584# The link itself 585sub link { 586 my $self = shift; 587 my $link = $self->page() || ''; 588 if($self->node()) { 589 my $node = $self->node(); 590 $text =~ s/\|/E<verbar>/g; 591 $text =~ s:/:E<sol>:g; 592 if($self->type() eq 'section') { 593 $link .= ($link ? '/' : '') . '"' . $node . '"'; 594 } 595 elsif($self->type() eq 'hyperlink') { 596 $link = $self->node(); 597 } 598 else { # item 599 $link .= '/' . $node; 600 } 601 } 602 if($self->alttext()) { 603 my $text = $self->alttext(); 604 $text =~ s/\|/E<verbar>/g; 605 $text =~ s:/:E<sol>:g; 606 $link = "$text|$link"; 607 } 608 $link; 609} 610 611sub _invalid_link { 612 my ($msg) = @_; 613 # this sets @_ 614 #eval { die "$msg\n" }; 615 #chomp $@; 616 $@ = $msg; # this seems to work, too! 617 undef; 618} 619 620#----------------------------------------------------------------------------- 621# Pod::Cache 622# 623# class to hold POD page details 624#----------------------------------------------------------------------------- 625 626package Pod::Cache; 627 628=head2 Pod::Cache 629 630B<Pod::Cache> holds information about a set of POD documents, 631especially the nodes for hyperlinks. 632The following methods are available: 633 634=over 4 635 636=item Pod::Cache-E<gt>new() 637 638Create a new cache object. This object can hold an arbitrary number of 639POD documents of class Pod::Cache::Item. 640 641=cut 642 643sub new { 644 my $this = shift; 645 my $class = ref($this) || $this; 646 my $self = []; 647 bless $self, $class; 648 return $self; 649} 650 651=item $cache-E<gt>item() 652 653Add a new item to the cache. Without arguments, this method returns a 654list of all cache elements. 655 656=cut 657 658sub item { 659 my ($self,%param) = @_; 660 if(%param) { 661 my $item = Pod::Cache::Item->new(%param); 662 push(@$self, $item); 663 return $item; 664 } 665 else { 666 return @{$self}; 667 } 668} 669 670=item $cache-E<gt>find_page($name) 671 672Look for a POD document named C<$name> in the cache. Returns the 673reference to the corresponding Pod::Cache::Item object or undef if 674not found. 675 676=back 677 678=cut 679 680sub find_page { 681 my ($self,$page) = @_; 682 foreach(@$self) { 683 if($_->page() eq $page) { 684 return $_; 685 } 686 } 687 undef; 688} 689 690package Pod::Cache::Item; 691 692=head2 Pod::Cache::Item 693 694B<Pod::Cache::Item> holds information about individual POD documents, 695that can be grouped in a Pod::Cache object. 696It is intended to hold information about the hyperlink nodes of POD 697documents. 698The following methods are available: 699 700=over 4 701 702=item Pod::Cache::Item-E<gt>new() 703 704Create a new object. 705 706=cut 707 708sub new { 709 my $this = shift; 710 my $class = ref($this) || $this; 711 my %params = @_; 712 my $self = {%params}; 713 bless $self, $class; 714 $self->initialize(); 715 return $self; 716} 717 718sub initialize { 719 my $self = shift; 720 $self->{-nodes} = [] unless(defined $self->{-nodes}); 721} 722 723=item $cacheitem-E<gt>page() 724 725Set/retrieve the POD document name (e.g. "Pod::Parser"). 726 727=cut 728 729# The POD page 730sub page { 731 return (@_ > 1) ? ($_[0]->{-page} = $_[1]) : $_[0]->{-page}; 732} 733 734=item $cacheitem-E<gt>description() 735 736Set/retrieve the POD short description as found in the C<=head1 NAME> 737section. 738 739=cut 740 741# The POD description, taken out of NAME if present 742sub description { 743 return (@_ > 1) ? ($_[0]->{-description} = $_[1]) : $_[0]->{-description}; 744} 745 746=item $cacheitem-E<gt>path() 747 748Set/retrieve the POD file storage path. 749 750=cut 751 752# The file path 753sub path { 754 return (@_ > 1) ? ($_[0]->{-path} = $_[1]) : $_[0]->{-path}; 755} 756 757=item $cacheitem-E<gt>file() 758 759Set/retrieve the POD file name. 760 761=cut 762 763# The POD file name 764sub file { 765 return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file}; 766} 767 768=item $cacheitem-E<gt>nodes() 769 770Add a node (or a list of nodes) to the document's node list. Note that 771the order is kept, i.e. start with the first node and end with the last. 772If no argument is given, the current list of nodes is returned in the 773same order the nodes have been added. 774A node can be any scalar, but usually is a pair of node string and 775unique id for the C<find_node> method to work correctly. 776 777=cut 778 779# The POD nodes 780sub nodes { 781 my ($self,@nodes) = @_; 782 if(@nodes) { 783 push(@{$self->{-nodes}}, @nodes); 784 return @nodes; 785 } 786 else { 787 return @{$self->{-nodes}}; 788 } 789} 790 791=item $cacheitem-E<gt>find_node($name) 792 793Look for a node or index entry named C<$name> in the object. 794Returns the unique id of the node (i.e. the second element of the array 795stored in the node arry) or undef if not found. 796 797=cut 798 799sub find_node { 800 my ($self,$node) = @_; 801 my @search; 802 push(@search, @{$self->{-nodes}}) if($self->{-nodes}); 803 push(@search, @{$self->{-idx}}) if($self->{-idx}); 804 foreach(@search) { 805 if($_->[0] eq $node) { 806 return $_->[1]; # id 807 } 808 } 809 undef; 810} 811 812=item $cacheitem-E<gt>idx() 813 814Add an index entry (or a list of them) to the document's index list. Note that 815the order is kept, i.e. start with the first node and end with the last. 816If no argument is given, the current list of index entries is returned in the 817same order the entries have been added. 818An index entry can be any scalar, but usually is a pair of string and 819unique id. 820 821=back 822 823=cut 824 825# The POD index entries 826sub idx { 827 my ($self,@idx) = @_; 828 if(@idx) { 829 push(@{$self->{-idx}}, @idx); 830 return @idx; 831 } 832 else { 833 return @{$self->{-idx}}; 834 } 835} 836 837=head1 AUTHOR 838 839Please report bugs using L<http://rt.cpan.org>. 840 841Marek Rouchal E<lt>marekr@cpan.orgE<gt>, borrowing 842a lot of things from L<pod2man> and L<pod2roff> as well as other POD 843processing tools by Tom Christiansen, Brad Appleton and Russ Allbery. 844 845=head1 SEE ALSO 846 847L<pod2man>, L<pod2roff>, L<Pod::Parser>, L<Pod::Checker>, 848L<pod2html> 849 850=cut 851 8521; 853