xref: /openbsd-src/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParser.pm (revision 3d61058aa5c692477b6d18acfbbdb653a9930ff9)
1package Pod::Simple::PullParser;
2use strict;
3our $VERSION = '3.45';
4use Pod::Simple ();
5BEGIN {our @ISA = ('Pod::Simple')}
6
7use Carp ();
8
9use Pod::Simple::PullParserStartToken;
10use Pod::Simple::PullParserEndToken;
11use Pod::Simple::PullParserTextToken;
12
13BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG }
14
15__PACKAGE__->_accessorize(
16  'source_fh',         # the filehandle we're reading from
17  'source_scalar_ref', # the scalarref we're reading from
18  'source_arrayref',   # the arrayref we're reading from
19);
20
21#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
22#
23#  And here is how we implement a pull-parser on top of a push-parser...
24
25sub filter {
26  my($self, $source) = @_;
27  $self = $self->new unless ref $self;
28
29  $source = *STDIN{IO} unless defined $source;
30  $self->set_source($source);
31  $self->output_fh(*STDOUT{IO});
32
33  $self->run; # define run() in a subclass if you want to use filter()!
34  return $self;
35}
36
37# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
38
39sub parse_string_document {
40  my $this = shift;
41  $this->set_source(\ $_[0]);
42  $this->run;
43}
44
45sub parse_file {
46  my($this, $filename) = @_;
47  $this->set_source($filename);
48  $this->run;
49}
50
51# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
52#  In case anyone tries to use them:
53
54sub run {
55  use Carp ();
56  if( __PACKAGE__ eq (ref($_[0]) || $_[0])) { # I'm not being subclassed!
57    Carp::croak "You can call run() only on subclasses of "
58     . __PACKAGE__;
59  } else {
60    Carp::croak join '',
61      "You can't call run() because ",
62      ref($_[0]) || $_[0], " didn't define a run() method";
63  }
64}
65
66sub parse_lines {
67  use Carp ();
68  Carp::croak "Use set_source with ", __PACKAGE__,
69    " and subclasses, not parse_lines";
70}
71
72sub parse_line {
73  use Carp ();
74  Carp::croak "Use set_source with ", __PACKAGE__,
75    " and subclasses, not parse_line";
76}
77
78#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
79
80sub new {
81  my $class = shift;
82  my $self = $class->SUPER::new(@_);
83  die "Couldn't construct for $class" unless $self;
84
85  $self->{'token_buffer'} ||= [];
86  $self->{'start_token_class'} ||= 'Pod::Simple::PullParserStartToken';
87  $self->{'text_token_class'}  ||= 'Pod::Simple::PullParserTextToken';
88  $self->{'end_token_class'}   ||= 'Pod::Simple::PullParserEndToken';
89
90  DEBUG > 1 and print STDERR "New pullparser object: $self\n";
91
92  return $self;
93}
94
95# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
96
97sub get_token {
98  my $self = shift;
99  DEBUG > 1 and print STDERR "\nget_token starting up on $self.\n";
100  DEBUG > 2 and print STDERR " Items in token-buffer (",
101   scalar( @{ $self->{'token_buffer'} } ) ,
102   ") :\n", map(
103     "    " . $_->dump . "\n", @{ $self->{'token_buffer'} }
104   ),
105   @{ $self->{'token_buffer'} } ? '' : '       (no tokens)',
106   "\n"
107  ;
108
109  until( @{ $self->{'token_buffer'} } ) {
110    DEBUG > 3 and print STDERR "I need to get something into my empty token buffer...\n";
111    if($self->{'source_dead'}) {
112      DEBUG and print STDERR "$self 's source is dead.\n";
113      push @{ $self->{'token_buffer'} }, undef;
114    } elsif(exists $self->{'source_fh'}) {
115      my @lines;
116      my $fh = $self->{'source_fh'}
117       || Carp::croak('You have to call set_source before you can call get_token');
118
119      DEBUG and print STDERR "$self 's source is filehandle $fh.\n";
120      # Read those many lines at a time
121      for(my $i = Pod::Simple::MANY_LINES; $i--;) {
122        DEBUG > 3 and print STDERR " Fetching a line from source filehandle $fh...\n";
123        local $/ = $Pod::Simple::NL;
124        push @lines, scalar(<$fh>); # readline
125        DEBUG > 3 and print STDERR "  Line is: ",
126          defined($lines[-1]) ? $lines[-1] : "<undef>\n";
127        unless( defined $lines[-1] ) {
128          DEBUG and print STDERR "That's it for that source fh!  Killing.\n";
129          delete $self->{'source_fh'}; # so it can be GC'd
130          last;
131        }
132         # but pass thru the undef, which will set source_dead to true
133
134        # TODO: look to see if $lines[-1] is =encoding, and if so,
135        # do horribly magic things
136
137      }
138
139      if(DEBUG > 8) {
140        print STDERR "* I've gotten ", scalar(@lines), " lines:\n";
141        foreach my $l (@lines) {
142          if(defined $l) {
143            print STDERR "  line {$l}\n";
144          } else {
145            print STDERR "  line undef\n";
146          }
147        }
148        print STDERR "* end of ", scalar(@lines), " lines\n";
149      }
150
151      $self->SUPER::parse_lines(@lines);
152
153    } elsif(exists $self->{'source_arrayref'}) {
154      DEBUG and print STDERR "$self 's source is arrayref $self->{'source_arrayref'}, with ",
155       scalar(@{$self->{'source_arrayref'}}), " items left in it.\n";
156
157      DEBUG > 3 and print STDERR "  Fetching ", Pod::Simple::MANY_LINES, " lines.\n";
158      $self->SUPER::parse_lines(
159        splice @{ $self->{'source_arrayref'} },
160        0,
161        Pod::Simple::MANY_LINES
162      );
163      unless( @{ $self->{'source_arrayref'} } ) {
164        DEBUG and print STDERR "That's it for that source arrayref!  Killing.\n";
165        $self->SUPER::parse_lines(undef);
166        delete $self->{'source_arrayref'}; # so it can be GC'd
167      }
168       # to make sure that an undef is always sent to signal end-of-stream
169
170    } elsif(exists $self->{'source_scalar_ref'}) {
171
172      DEBUG and print STDERR "$self 's source is scalarref $self->{'source_scalar_ref'}, with ",
173        length(${ $self->{'source_scalar_ref'} }) -
174        (pos(${ $self->{'source_scalar_ref'} }) || 0),
175        " characters left to parse.\n";
176
177      DEBUG > 3 and print STDERR " Fetching a line from source-string...\n";
178      if( ${ $self->{'source_scalar_ref'} } =~
179        m/([^\n\r]*)((?:\r?\n)?)/g
180      ) {
181        #print(">> $1\n"),
182        $self->SUPER::parse_lines($1)
183         if length($1) or length($2)
184          or pos(     ${ $self->{'source_scalar_ref'} })
185           != length( ${ $self->{'source_scalar_ref'} });
186         # I.e., unless it's a zero-length "empty line" at the very
187         #  end of "foo\nbar\n" (i.e., between the \n and the EOS).
188      } else { # that's the end.  Byebye
189        $self->SUPER::parse_lines(undef);
190        delete $self->{'source_scalar_ref'};
191        DEBUG and print STDERR "That's it for that source scalarref!  Killing.\n";
192      }
193
194
195    } else {
196      die "What source??";
197    }
198  }
199  DEBUG and print STDERR "get_token about to return ",
200   Pod::Simple::pretty( @{$self->{'token_buffer'}}
201     ? $self->{'token_buffer'}[-1] : undef
202   ), "\n";
203  return shift @{$self->{'token_buffer'}}; # that's an undef if empty
204}
205
206sub unget_token {
207  my $self = shift;
208  DEBUG and print STDERR "Ungetting ", scalar(@_), " tokens: ",
209   @_ ? "@_\n" : "().\n";
210  foreach my $t (@_) {
211    Carp::croak "Can't unget that, because it's not a token -- it's undef!"
212     unless defined $t;
213    Carp::croak "Can't unget $t, because it's not a token -- it's a string!"
214     unless ref $t;
215    Carp::croak "Can't unget $t, because it's not a token object!"
216     unless UNIVERSAL::can($t, 'type');
217  }
218
219  unshift @{$self->{'token_buffer'}}, @_;
220  DEBUG > 1 and print STDERR "Token buffer now has ",
221   scalar(@{$self->{'token_buffer'}}), " items in it.\n";
222  return;
223}
224
225#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
226
227# $self->{'source_filename'} = $source;
228
229sub set_source {
230  my $self = shift @_;
231  return $self->{'source_fh'} unless @_;
232  Carp::croak("Cannot assign new source to pull parser; create a new instance, instead")
233      if $self->{'source_fh'} || $self->{'source_scalar_ref'} || $self->{'source_arrayref'};
234  my $handle;
235  if(!defined $_[0]) {
236    Carp::croak("Can't use empty-string as a source for set_source");
237  } elsif(ref(\( $_[0] )) eq 'GLOB') {
238    $self->{'source_filename'} = '' . ($handle = $_[0]);
239    DEBUG and print STDERR "$self 's source is glob $_[0]\n";
240    # and fall thru
241  } elsif(ref( $_[0] ) eq 'SCALAR') {
242    $self->{'source_scalar_ref'} = $_[0];
243    DEBUG and print STDERR "$self 's source is scalar ref $_[0]\n";
244    return;
245  } elsif(ref( $_[0] ) eq 'ARRAY') {
246    $self->{'source_arrayref'} = $_[0];
247    DEBUG and print STDERR "$self 's source is array ref $_[0]\n";
248    return;
249  } elsif(ref $_[0]) {
250    $self->{'source_filename'} = '' . ($handle = $_[0]);
251    DEBUG and print STDERR "$self 's source is fh-obj $_[0]\n";
252  } elsif(!length $_[0]) {
253    Carp::croak("Can't use empty-string as a source for set_source");
254  } else {  # It's a filename!
255    DEBUG and print STDERR "$self 's source is filename $_[0]\n";
256    {
257      local *PODSOURCE;
258      open(PODSOURCE, "<$_[0]") || Carp::croak "Can't open $_[0]: $!";
259      $handle = *PODSOURCE{IO};
260    }
261    $self->{'source_filename'} = $_[0];
262    DEBUG and print STDERR "  Its name is $_[0].\n";
263
264    # TODO: file-discipline things here!
265  }
266
267  $self->{'source_fh'} = $handle;
268  DEBUG and print STDERR "  Its handle is $handle\n";
269  return 1;
270}
271
272# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
273
274sub get_title_short {  shift->get_short_title(@_)  } # alias
275
276sub get_short_title {
277  my $title = shift->get_title(@_);
278  $title = $1 if $title =~ m/^(\S{1,60})\s+--?\s+./s;
279    # turn "Foo::Bar -- bars for your foo" into "Foo::Bar"
280  return $title;
281}
282
283sub get_title       { shift->_get_titled_section(
284  'NAME', max_token => 50, desperate => 1, @_)
285}
286sub get_version     { shift->_get_titled_section(
287   'VERSION',
288    max_token => 400,
289    accept_verbatim => 1,
290    max_content_length => 3_000,
291   @_,
292  );
293}
294sub get_description { shift->_get_titled_section(
295   'DESCRIPTION',
296    max_token => 400,
297    max_content_length => 3_000,
298   @_,
299) }
300
301sub get_authors     { shift->get_author(@_) }  # a harmless alias
302
303sub get_author      {
304  my $this = shift;
305  # Max_token is so high because these are
306  #  typically at the end of the document:
307  $this->_get_titled_section('AUTHOR' , max_token => 10_000, @_) ||
308  $this->_get_titled_section('AUTHORS', max_token => 10_000, @_);
309}
310
311#--------------------------------------------------------------------------
312
313sub _get_titled_section {
314  # Based on a get_title originally contributed by Graham Barr
315  my($self, $titlename, %options) = (@_);
316
317  my $max_token            = delete $options{'max_token'};
318  my $desperate_for_title  = delete $options{'desperate'};
319  my $accept_verbatim      = delete $options{'accept_verbatim'};
320  my $max_content_length   = delete $options{'max_content_length'};
321  my $nocase               = delete $options{'nocase'};
322  $max_content_length = 120 unless defined $max_content_length;
323
324  Carp::croak( "Unknown " . ((1 == keys %options) ? "option: " : "options: ")
325    . join " ", map "[$_]", sort keys %options
326  )
327   if keys %options;
328
329  my %content_containers;
330  $content_containers{'Para'} = 1;
331  if($accept_verbatim) {
332    $content_containers{'Verbatim'} = 1;
333    $content_containers{'VerbatimFormatted'} = 1;
334  }
335
336  my $token_count = 0;
337  my $title;
338  my @to_unget;
339  my $state = 0;
340  my $depth = 0;
341
342  Carp::croak "What kind of titlename is \"$titlename\"?!" unless
343   defined $titlename and $titlename =~ m/^[A-Z ]{1,60}$/s; #sanity
344  my $titlename_re = quotemeta($titlename);
345
346  my $head1_text_content;
347  my $para_text_content;
348  my $skipX;
349
350  while(
351    ++$token_count <= ($max_token || 1_000_000)
352    and defined(my $token = $self->get_token)
353  ) {
354    push @to_unget, $token;
355
356    if ($state == 0) { # seeking =head1
357      if( $token->is_start and $token->tagname eq 'head1' ) {
358        DEBUG and print STDERR "  Found head1.  Seeking content...\n";
359        ++$state;
360        $head1_text_content = '';
361      }
362    }
363
364    elsif($state == 1) { # accumulating text until end of head1
365      if( $token->is_text ) {
366          unless ($skipX) {
367            DEBUG and print STDERR "   Adding \"", $token->text, "\" to head1-content.\n";
368            $head1_text_content .= $token->text;
369          }
370      } elsif( $token->is_tagname('X') ) {
371          # We're going to want to ignore X<> stuff.
372          $skipX = $token->is_start;
373          DEBUG and print STDERR +($skipX ? 'Start' : 'End'), 'ing ignoring of X<> tag';
374      } elsif( $token->is_end and $token->tagname eq 'head1' ) {
375        DEBUG and print STDERR "  Found end of head1.  Considering content...\n";
376        $head1_text_content = uc $head1_text_content if $nocase;
377        if($head1_text_content eq $titlename
378          or $head1_text_content =~ m/\($titlename_re\)/s
379          # We accept "=head1 Nomen Modularis (NAME)" for sake of i18n
380        ) {
381          DEBUG and print STDERR "  Yup, it was $titlename.  Seeking next para-content...\n";
382          ++$state;
383        } elsif(
384          $desperate_for_title
385           # if we're so desperate we'll take the first
386           #  =head1's content as a title
387          and $head1_text_content =~ m/\S/
388          and $head1_text_content !~ m/^[ A-Z]+$/s
389          and $head1_text_content !~
390            m/\((?:
391             NAME | TITLE | VERSION | AUTHORS? | DESCRIPTION | SYNOPSIS
392             | COPYRIGHT | LICENSE | NOTES? | FUNCTIONS? | METHODS?
393             | CAVEATS? | BUGS? | SEE\ ALSO | SWITCHES | ENVIRONMENT
394            )\)/sx
395            # avoid accepting things like =head1 Thingy Thongy (DESCRIPTION)
396          and ($max_content_length
397            ? (length($head1_text_content) <= $max_content_length) # sanity
398            : 1)
399        ) {
400          # Looks good; trim it
401          ($title = $head1_text_content) =~ s/\s+$//;
402          DEBUG and print STDERR "  It looks titular: \"$title\".\n\n  Using that.\n";
403          last;
404        } else {
405          --$state;
406          DEBUG and print STDERR "  Didn't look titular ($head1_text_content).\n",
407            "\n  Dropping back to seeking-head1-content mode...\n";
408        }
409      }
410    }
411
412    elsif($state == 2) {
413      # seeking start of para (which must immediately follow)
414      if($token->is_start and $content_containers{ $token->tagname }) {
415        DEBUG and print STDERR "  Found start of Para.  Accumulating content...\n";
416        $para_text_content = '';
417        ++$state;
418      } else {
419        DEBUG and print
420         "  Didn't see an immediately subsequent start-Para.  Reseeking H1\n";
421        $state = 0;
422      }
423    }
424
425    elsif($state == 3) {
426      # accumulating text until end of Para
427      if( $token->is_text ) {
428        DEBUG and print STDERR "   Adding \"", $token->text, "\" to para-content.\n";
429        $para_text_content .= $token->text;
430        # and keep looking
431
432      } elsif( $token->is_end and $content_containers{ $token->tagname } ) {
433        DEBUG and print STDERR "  Found end of Para.  Considering content: ",
434          $para_text_content, "\n";
435
436        if( $para_text_content =~ m/\S/
437          and ($max_content_length
438           ? (length($para_text_content) <= $max_content_length)
439           : 1)
440        ) {
441          # Some minimal sanity constraints, I think.
442          DEBUG and print STDERR "  It looks contentworthy, I guess.  Using it.\n";
443          $title = $para_text_content;
444          last;
445        } else {
446          DEBUG and print STDERR "  Doesn't look at all contentworthy!\n  Giving up.\n";
447          undef $title;
448          last;
449        }
450      }
451    }
452
453    else {
454      die "IMPOSSIBLE STATE $state!\n";  # should never happen
455    }
456
457  }
458
459  # Put it all back!
460  $self->unget_token(@to_unget);
461
462  if(DEBUG) {
463    if(defined $title) { print STDERR "  Returning title <$title>\n" }
464    else { print STDERR "Returning title <>\n" }
465  }
466
467  return '' unless defined $title;
468  $title =~ s/^\s+//;
469  return $title;
470}
471
472#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
473#
474#  Methods that actually do work at parse-time:
475
476sub _handle_element_start {
477  my $self = shift;   # leaving ($element_name, $attr_hash_r)
478  DEBUG > 2 and print STDERR "++ $_[0] (", map("<$_> ", %{$_[1]}), ")\n";
479
480  push @{ $self->{'token_buffer'} },
481       $self->{'start_token_class'}->new(@_);
482  return;
483}
484
485sub _handle_text {
486  my $self = shift;   # leaving ($text)
487  DEBUG > 2 and print STDERR "== $_[0]\n";
488  push @{ $self->{'token_buffer'} },
489       $self->{'text_token_class'}->new(@_);
490  return;
491}
492
493sub _handle_element_end {
494  my $self = shift;   # leaving ($element_name);
495  DEBUG > 2 and print STDERR "-- $_[0]\n";
496  push @{ $self->{'token_buffer'} },
497       $self->{'end_token_class'}->new(@_);
498  return;
499}
500
501#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
502
5031;
504
505
506__END__
507
508=head1 NAME
509
510Pod::Simple::PullParser -- a pull-parser interface to parsing Pod
511
512=head1 SYNOPSIS
513
514 my $parser = SomePodProcessor->new;
515 $parser->set_source( "whatever.pod" );
516 $parser->run;
517
518Or:
519
520 my $parser = SomePodProcessor->new;
521 $parser->set_source( $some_filehandle_object );
522 $parser->run;
523
524Or:
525
526 my $parser = SomePodProcessor->new;
527 $parser->set_source( \$document_source );
528 $parser->run;
529
530Or:
531
532 my $parser = SomePodProcessor->new;
533 $parser->set_source( \@document_lines );
534 $parser->run;
535
536And elsewhere:
537
538 require 5;
539 package SomePodProcessor;
540 use strict;
541 use base qw(Pod::Simple::PullParser);
542
543 sub run {
544   my $self = shift;
545  Token:
546   while(my $token = $self->get_token) {
547     ...process each token...
548   }
549 }
550
551=head1 DESCRIPTION
552
553This class is for using Pod::Simple to build a Pod processor -- but
554one that uses an interface based on a stream of token objects,
555instead of based on events.
556
557This is a subclass of L<Pod::Simple> and inherits all its methods.
558
559A subclass of Pod::Simple::PullParser should define a C<run> method
560that calls C<< $token = $parser->get_token >> to pull tokens.
561
562See the source for Pod::Simple::RTF for an example of a formatter
563that uses Pod::Simple::PullParser.
564
565=head1 METHODS
566
567=over
568
569=item my $token = $parser->get_token
570
571This returns the next token object (which will be of a subclass of
572L<Pod::Simple::PullParserToken>), or undef if the parser-stream has hit
573the end of the document.
574
575=item $parser->unget_token( $token )
576
577=item $parser->unget_token( $token1, $token2, ... )
578
579This restores the token object(s) to the front of the parser stream.
580
581=back
582
583The source has to be set before you can parse anything.  The lowest-level
584way is to call C<set_source>:
585
586=over
587
588=item $parser->set_source( $filename )
589
590=item $parser->set_source( $filehandle_object )
591
592=item $parser->set_source( \$document_source )
593
594=item $parser->set_source( \@document_lines )
595
596=back
597
598Or you can call these methods, which Pod::Simple::PullParser has defined
599to work just like Pod::Simple's same-named methods:
600
601=over
602
603=item $parser->parse_file(...)
604
605=item $parser->parse_string_document(...)
606
607=item $parser->filter(...)
608
609=item $parser->parse_from_file(...)
610
611=back
612
613For those to work, the Pod-processing subclass of
614Pod::Simple::PullParser has to have defined a $parser->run method --
615so it is advised that all Pod::Simple::PullParser subclasses do so.
616See the Synopsis above, or the source for Pod::Simple::RTF.
617
618Authors of formatter subclasses might find these methods useful to
619call on a parser object that you haven't started pulling tokens
620from yet:
621
622=over
623
624=item my $title_string = $parser->get_title
625
626This tries to get the title string out of $parser, by getting some tokens,
627and scanning them for the title, and then ungetting them so that you can
628process the token-stream from the beginning.
629
630For example, suppose you have a document that starts out:
631
632  =head1 NAME
633
634  Hoo::Boy::Wowza -- Stuff B<wow> yeah!
635
636$parser->get_title on that document will return "Hoo::Boy::Wowza --
637Stuff wow yeah!". If the document starts with:
638
639  =head1 Name
640
641  Hoo::Boy::W00t -- Stuff B<w00t> yeah!
642
643Then you'll need to pass the C<nocase> option in order to recognize "Name":
644
645  $parser->get_title(nocase => 1);
646
647In cases where get_title can't find the title, it will return empty-string
648("").
649
650=item my $title_string = $parser->get_short_title
651
652This is just like get_title, except that it returns just the modulename, if
653the title seems to be of the form "SomeModuleName -- description".
654
655For example, suppose you have a document that starts out:
656
657  =head1 NAME
658
659  Hoo::Boy::Wowza -- Stuff B<wow> yeah!
660
661then $parser->get_short_title on that document will return
662"Hoo::Boy::Wowza".
663
664But if the document starts out:
665
666  =head1 NAME
667
668  Hooboy, stuff B<wow> yeah!
669
670then $parser->get_short_title on that document will return "Hooboy,
671stuff wow yeah!". If the document starts with:
672
673  =head1 Name
674
675  Hoo::Boy::W00t -- Stuff B<w00t> yeah!
676
677Then you'll need to pass the C<nocase> option in order to recognize "Name":
678
679  $parser->get_short_title(nocase => 1);
680
681If the title can't be found, then get_short_title returns empty-string
682("").
683
684=item $author_name   = $parser->get_author
685
686This works like get_title except that it returns the contents of the
687"=head1 AUTHOR\n\nParagraph...\n" section, assuming that that section
688isn't terribly long. To recognize a "=head1 Author\n\nParagraph\n"
689section, pass the C<nocase> option:
690
691  $parser->get_author(nocase => 1);
692
693(This method tolerates "AUTHORS" instead of "AUTHOR" too.)
694
695=item $description_name = $parser->get_description
696
697This works like get_title except that it returns the contents of the
698"=head1 DESCRIPTION\n\nParagraph...\n" section, assuming that that section
699isn't terribly long. To recognize a "=head1 Description\n\nParagraph\n"
700section, pass the C<nocase> option:
701
702  $parser->get_description(nocase => 1);
703
704=item $version_block = $parser->get_version
705
706This works like get_title except that it returns the contents of
707the "=head1 VERSION\n\n[BIG BLOCK]\n" block.  Note that this does NOT
708return the module's C<$VERSION>!! To recognize a
709"=head1 Version\n\n[BIG BLOCK]\n" section, pass the C<nocase> option:
710
711  $parser->get_version(nocase => 1);
712
713=back
714
715=head1 NOTE
716
717You don't actually I<have> to define a C<run> method.  If you're
718writing a Pod-formatter class, you should define a C<run> just so
719that users can call C<parse_file> etc, but you don't I<have> to.
720
721And if you're not writing a formatter class, but are instead just
722writing a program that does something simple with a Pod::PullParser
723object (and not an object of a subclass), then there's no reason to
724bother subclassing to add a C<run> method.
725
726=head1 SEE ALSO
727
728L<Pod::Simple>
729
730L<Pod::Simple::PullParserToken> -- and its subclasses
731L<Pod::Simple::PullParserStartToken>,
732L<Pod::Simple::PullParserTextToken>, and
733L<Pod::Simple::PullParserEndToken>.
734
735L<HTML::TokeParser>, which inspired this.
736
737=head1 SUPPORT
738
739Questions or discussion about POD and Pod::Simple should be sent to the
740pod-people@perl.org mail list. Send an empty email to
741pod-people-subscribe@perl.org to subscribe.
742
743This module is managed in an open GitHub repository,
744L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or
745to clone L<https://github.com/perl-pod/pod-simple.git> and send patches!
746
747Patches against Pod::Simple are welcome. Please send bug reports to
748<bug-pod-simple@rt.cpan.org>.
749
750=head1 COPYRIGHT AND DISCLAIMERS
751
752Copyright (c) 2002 Sean M. Burke.
753
754This library is free software; you can redistribute it and/or modify it
755under the same terms as Perl itself.
756
757This program is distributed in the hope that it will be useful, but
758without any warranty; without even the implied warranty of
759merchantability or fitness for a particular purpose.
760
761=head1 AUTHOR
762
763Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
764But don't bother him, he's retired.
765
766Pod::Simple is maintained by:
767
768=over
769
770=item * Allison Randal C<allison@perl.org>
771
772=item * Hans Dieter Pearcey C<hdp@cpan.org>
773
774=item * David E. Wheeler C<dwheeler@cpan.org>
775
776=back
777
778=cut
779
780JUNK:
781
782sub _old_get_title {  # some witchery in here
783  my $self = $_[0];
784  my $title;
785  my @to_unget;
786
787  while(1) {
788    push @to_unget, $self->get_token;
789    unless(defined $to_unget[-1]) { # whoops, short doc!
790      pop @to_unget;
791      last;
792    }
793
794    DEBUG and print STDERR "-Got token ", $to_unget[-1]->dump, "\n";
795
796    (DEBUG and print STDERR "Too much in the buffer.\n"),
797     last if @to_unget > 25; # sanity
798
799    my $pattern = '';
800    if( #$to_unget[-1]->type eq 'end'
801        #and $to_unget[-1]->tagname eq 'Para'
802        #and
803        ($pattern = join('',
804         map {;
805            ($_->type eq 'start') ? ("<" . $_->tagname .">")
806          : ($_->type eq 'end'  ) ? ("</". $_->tagname .">")
807          : ($_->type eq 'text' ) ? ($_->text =~ m<^([A-Z]+)$>s ? $1 : 'X')
808          : "BLORP"
809         } @to_unget
810       )) =~ m{<head1>NAME</head1><Para>(X|</?[BCIFLS]>)+</Para>$}s
811    ) {
812      # Whee, it fits the pattern
813      DEBUG and print STDERR "Seems to match =head1 NAME pattern.\n";
814      $title = '';
815      foreach my $t (reverse @to_unget) {
816        last if $t->type eq 'start' and $t->tagname eq 'Para';
817        $title = $t->text . $title if $t->type eq 'text';
818      }
819      undef $title if $title =~ m<^\s*$>; # make sure it's contentful!
820      last;
821
822    } elsif ($pattern =~ m{<head(\d)>(.+)</head\d>$}
823      and !( $1 eq '1' and $2 eq 'NAME' )
824    ) {
825      # Well, it fits a fallback pattern
826      DEBUG and print STDERR "Seems to match NAMEless pattern.\n";
827      $title = '';
828      foreach my $t (reverse @to_unget) {
829        last if $t->type eq 'start' and $t->tagname =~ m/^head\d$/s;
830        $title = $t->text . $title if $t->type eq 'text';
831      }
832      undef $title if $title =~ m<^\s*$>; # make sure it's contentful!
833      last;
834
835    } else {
836      DEBUG and $pattern and print STDERR "Leading pattern: $pattern\n";
837    }
838  }
839
840  # Put it all back:
841  $self->unget_token(@to_unget);
842
843  if(DEBUG) {
844    if(defined $title) { print STDERR "  Returning title <$title>\n" }
845    else { print STDERR "Returning title <>\n" }
846  }
847
848  return '' unless defined $title;
849  return $title;
850}
851
852use warnings;
853