xref: /openbsd-src/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/PullParser.pm (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1
2require 5;
3package Pod::Simple::PullParser;
4$VERSION = '3.28';
5use Pod::Simple ();
6BEGIN {@ISA = ('Pod::Simple')}
7
8use strict;
9use Carp ();
10
11use Pod::Simple::PullParserStartToken;
12use Pod::Simple::PullParserEndToken;
13use Pod::Simple::PullParserTextToken;
14
15BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG }
16
17__PACKAGE__->_accessorize(
18  'source_fh',         # the filehandle we're reading from
19  'source_scalar_ref', # the scalarref we're reading from
20  'source_arrayref',   # the arrayref we're reading from
21);
22
23#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
24#
25#  And here is how we implement a pull-parser on top of a push-parser...
26
27sub filter {
28  my($self, $source) = @_;
29  $self = $self->new unless ref $self;
30
31  $source = *STDIN{IO} unless defined $source;
32  $self->set_source($source);
33  $self->output_fh(*STDOUT{IO});
34
35  $self->run; # define run() in a subclass if you want to use filter()!
36  return $self;
37}
38
39# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
40
41sub parse_string_document {
42  my $this = shift;
43  $this->set_source(\ $_[0]);
44  $this->run;
45}
46
47sub parse_file {
48  my($this, $filename) = @_;
49  $this->set_source($filename);
50  $this->run;
51}
52
53# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
54#  In case anyone tries to use them:
55
56sub run {
57  use Carp ();
58  if( __PACKAGE__ eq ref($_[0]) || $_[0]) { # I'm not being subclassed!
59    Carp::croak "You can call run() only on subclasses of "
60     . __PACKAGE__;
61  } else {
62    Carp::croak join '',
63      "You can't call run() because ",
64      ref($_[0]) || $_[0], " didn't define a run() method";
65  }
66}
67
68sub parse_lines {
69  use Carp ();
70  Carp::croak "Use set_source with ", __PACKAGE__,
71    " and subclasses, not parse_lines";
72}
73
74sub parse_line {
75  use Carp ();
76  Carp::croak "Use set_source with ", __PACKAGE__,
77    " and subclasses, not parse_line";
78}
79
80#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
81
82sub new {
83  my $class = shift;
84  my $self = $class->SUPER::new(@_);
85  die "Couldn't construct for $class" unless $self;
86
87  $self->{'token_buffer'} ||= [];
88  $self->{'start_token_class'} ||= 'Pod::Simple::PullParserStartToken';
89  $self->{'text_token_class'}  ||= 'Pod::Simple::PullParserTextToken';
90  $self->{'end_token_class'}   ||= 'Pod::Simple::PullParserEndToken';
91
92  DEBUG > 1 and print "New pullparser object: $self\n";
93
94  return $self;
95}
96
97# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
98
99sub get_token {
100  my $self = shift;
101  DEBUG > 1 and print "\nget_token starting up on $self.\n";
102  DEBUG > 2 and print " Items in token-buffer (",
103   scalar( @{ $self->{'token_buffer'} } ) ,
104   ") :\n", map(
105     "    " . $_->dump . "\n", @{ $self->{'token_buffer'} }
106   ),
107   @{ $self->{'token_buffer'} } ? '' : '       (no tokens)',
108   "\n"
109  ;
110
111  until( @{ $self->{'token_buffer'} } ) {
112    DEBUG > 3 and print "I need to get something into my empty token buffer...\n";
113    if($self->{'source_dead'}) {
114      DEBUG and print "$self 's source is dead.\n";
115      push @{ $self->{'token_buffer'} }, undef;
116    } elsif(exists $self->{'source_fh'}) {
117      my @lines;
118      my $fh = $self->{'source_fh'}
119       || Carp::croak('You have to call set_source before you can call get_token');
120
121      DEBUG and print "$self 's source is filehandle $fh.\n";
122      # Read those many lines at a time
123      for(my $i = Pod::Simple::MANY_LINES; $i--;) {
124        DEBUG > 3 and print " Fetching a line from source filehandle $fh...\n";
125        local $/ = $Pod::Simple::NL;
126        push @lines, scalar(<$fh>); # readline
127        DEBUG > 3 and print "  Line is: ",
128          defined($lines[-1]) ? $lines[-1] : "<undef>\n";
129        unless( defined $lines[-1] ) {
130          DEBUG and print "That's it for that source fh!  Killing.\n";
131          delete $self->{'source_fh'}; # so it can be GC'd
132          last;
133        }
134         # but pass thru the undef, which will set source_dead to true
135
136        # TODO: look to see if $lines[-1] is =encoding, and if so,
137        # do horribly magic things
138
139      }
140
141      if(DEBUG > 8) {
142        print "* I've gotten ", scalar(@lines), " lines:\n";
143        foreach my $l (@lines) {
144          if(defined $l) {
145            print "  line {$l}\n";
146          } else {
147            print "  line undef\n";
148          }
149        }
150        print "* end of ", scalar(@lines), " lines\n";
151      }
152
153      $self->SUPER::parse_lines(@lines);
154
155    } elsif(exists $self->{'source_arrayref'}) {
156      DEBUG and print "$self 's source is arrayref $self->{'source_arrayref'}, with ",
157       scalar(@{$self->{'source_arrayref'}}), " items left in it.\n";
158
159      DEBUG > 3 and print "  Fetching ", Pod::Simple::MANY_LINES, " lines.\n";
160      $self->SUPER::parse_lines(
161        splice @{ $self->{'source_arrayref'} },
162        0,
163        Pod::Simple::MANY_LINES
164      );
165      unless( @{ $self->{'source_arrayref'} } ) {
166        DEBUG and print "That's it for that source arrayref!  Killing.\n";
167        $self->SUPER::parse_lines(undef);
168        delete $self->{'source_arrayref'}; # so it can be GC'd
169      }
170       # to make sure that an undef is always sent to signal end-of-stream
171
172    } elsif(exists $self->{'source_scalar_ref'}) {
173
174      DEBUG and print "$self 's source is scalarref $self->{'source_scalar_ref'}, with ",
175        length(${ $self->{'source_scalar_ref'} }) -
176        (pos(${ $self->{'source_scalar_ref'} }) || 0),
177        " characters left to parse.\n";
178
179      DEBUG > 3 and print " Fetching a line from source-string...\n";
180      if( ${ $self->{'source_scalar_ref'} } =~
181        m/([^\n\r]*)((?:\r?\n)?)/g
182      ) {
183        #print(">> $1\n"),
184        $self->SUPER::parse_lines($1)
185         if length($1) or length($2)
186          or pos(     ${ $self->{'source_scalar_ref'} })
187           != length( ${ $self->{'source_scalar_ref'} });
188         # I.e., unless it's a zero-length "empty line" at the very
189         #  end of "foo\nbar\n" (i.e., between the \n and the EOS).
190      } else { # that's the end.  Byebye
191        $self->SUPER::parse_lines(undef);
192        delete $self->{'source_scalar_ref'};
193        DEBUG and print "That's it for that source scalarref!  Killing.\n";
194      }
195
196
197    } else {
198      die "What source??";
199    }
200  }
201  DEBUG and print "get_token about to return ",
202   Pod::Simple::pretty( @{$self->{'token_buffer'}}
203     ? $self->{'token_buffer'}[-1] : undef
204   ), "\n";
205  return shift @{$self->{'token_buffer'}}; # that's an undef if empty
206}
207
208sub unget_token {
209  my $self = shift;
210  DEBUG and print "Ungetting ", scalar(@_), " tokens: ",
211   @_ ? "@_\n" : "().\n";
212  foreach my $t (@_) {
213    Carp::croak "Can't unget that, because it's not a token -- it's undef!"
214     unless defined $t;
215    Carp::croak "Can't unget $t, because it's not a token -- it's a string!"
216     unless ref $t;
217    Carp::croak "Can't unget $t, because it's not a token object!"
218     unless UNIVERSAL::can($t, 'type');
219  }
220
221  unshift @{$self->{'token_buffer'}}, @_;
222  DEBUG > 1 and print "Token buffer now has ",
223   scalar(@{$self->{'token_buffer'}}), " items in it.\n";
224  return;
225}
226
227#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
228
229# $self->{'source_filename'} = $source;
230
231sub set_source {
232  my $self = shift @_;
233  return $self->{'source_fh'} unless @_;
234  Carp::croak("Cannot assign new source to pull parser; create a new instance, instead")
235      if $self->{'source_fh'} || $self->{'source_scalar_ref'} || $self->{'source_arrayref'};
236  my $handle;
237  if(!defined $_[0]) {
238    Carp::croak("Can't use empty-string as a source for set_source");
239  } elsif(ref(\( $_[0] )) eq 'GLOB') {
240    $self->{'source_filename'} = '' . ($handle = $_[0]);
241    DEBUG and print "$self 's source is glob $_[0]\n";
242    # and fall thru
243  } elsif(ref( $_[0] ) eq 'SCALAR') {
244    $self->{'source_scalar_ref'} = $_[0];
245    DEBUG and print "$self 's source is scalar ref $_[0]\n";
246    return;
247  } elsif(ref( $_[0] ) eq 'ARRAY') {
248    $self->{'source_arrayref'} = $_[0];
249    DEBUG and print "$self 's source is array ref $_[0]\n";
250    return;
251  } elsif(ref $_[0]) {
252    $self->{'source_filename'} = '' . ($handle = $_[0]);
253    DEBUG and print "$self 's source is fh-obj $_[0]\n";
254  } elsif(!length $_[0]) {
255    Carp::croak("Can't use empty-string as a source for set_source");
256  } else {  # It's a filename!
257    DEBUG and print "$self 's source is filename $_[0]\n";
258    {
259      local *PODSOURCE;
260      open(PODSOURCE, "<$_[0]") || Carp::croak "Can't open $_[0]: $!";
261      $handle = *PODSOURCE{IO};
262    }
263    $self->{'source_filename'} = $_[0];
264    DEBUG and print "  Its name is $_[0].\n";
265
266    # TODO: file-discipline things here!
267  }
268
269  $self->{'source_fh'} = $handle;
270  DEBUG and print "  Its handle is $handle\n";
271  return 1;
272}
273
274# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
275
276sub get_title_short {  shift->get_short_title(@_)  } # alias
277
278sub get_short_title {
279  my $title = shift->get_title(@_);
280  $title = $1 if $title =~ m/^(\S{1,60})\s+--?\s+./s;
281    # turn "Foo::Bar -- bars for your foo" into "Foo::Bar"
282  return $title;
283}
284
285sub get_title       { shift->_get_titled_section(
286  'NAME', max_token => 50, desperate => 1, @_)
287}
288sub get_version     { shift->_get_titled_section(
289   'VERSION',
290    max_token => 400,
291    accept_verbatim => 1,
292    max_content_length => 3_000,
293   @_,
294  );
295}
296sub get_description { shift->_get_titled_section(
297   'DESCRIPTION',
298    max_token => 400,
299    max_content_length => 3_000,
300   @_,
301) }
302
303sub get_authors     { shift->get_author(@_) }  # a harmless alias
304
305sub get_author      {
306  my $this = shift;
307  # Max_token is so high because these are
308  #  typically at the end of the document:
309  $this->_get_titled_section('AUTHOR' , max_token => 10_000, @_) ||
310  $this->_get_titled_section('AUTHORS', max_token => 10_000, @_);
311}
312
313#--------------------------------------------------------------------------
314
315sub _get_titled_section {
316  # Based on a get_title originally contributed by Graham Barr
317  my($self, $titlename, %options) = (@_);
318
319  my $max_token            = delete $options{'max_token'};
320  my $desperate_for_title  = delete $options{'desperate'};
321  my $accept_verbatim      = delete $options{'accept_verbatim'};
322  my $max_content_length   = delete $options{'max_content_length'};
323  my $nocase               = delete $options{'nocase'};
324  $max_content_length = 120 unless defined $max_content_length;
325
326  Carp::croak( "Unknown " . ((1 == keys %options) ? "option: " : "options: ")
327    . join " ", map "[$_]", sort keys %options
328  )
329   if keys %options;
330
331  my %content_containers;
332  $content_containers{'Para'} = 1;
333  if($accept_verbatim) {
334    $content_containers{'Verbatim'} = 1;
335    $content_containers{'VerbatimFormatted'} = 1;
336  }
337
338  my $token_count = 0;
339  my $title;
340  my @to_unget;
341  my $state = 0;
342  my $depth = 0;
343
344  Carp::croak "What kind of titlename is \"$titlename\"?!" unless
345   defined $titlename and $titlename =~ m/^[A-Z ]{1,60}$/s; #sanity
346  my $titlename_re = quotemeta($titlename);
347
348  my $head1_text_content;
349  my $para_text_content;
350  my $skipX;
351
352  while(
353    ++$token_count <= ($max_token || 1_000_000)
354    and defined(my $token = $self->get_token)
355  ) {
356    push @to_unget, $token;
357
358    if ($state == 0) { # seeking =head1
359      if( $token->is_start and $token->tagname eq 'head1' ) {
360        DEBUG and print "  Found head1.  Seeking content...\n";
361        ++$state;
362        $head1_text_content = '';
363      }
364    }
365
366    elsif($state == 1) { # accumulating text until end of head1
367      if( $token->is_text ) {
368          unless ($skipX) {
369            DEBUG and print "   Adding \"", $token->text, "\" to head1-content.\n";
370            $head1_text_content .= $token->text;
371          }
372      } elsif( $token->is_tagname('X') ) {
373          # We're going to want to ignore X<> stuff.
374          $skipX = $token->is_start;
375          DEBUG and print +($skipX ? 'Start' : 'End'), 'ing ignoring of X<> tag';
376      } elsif( $token->is_end and $token->tagname eq 'head1' ) {
377        DEBUG and print "  Found end of head1.  Considering content...\n";
378        $head1_text_content = uc $head1_text_content if $nocase;
379        if($head1_text_content eq $titlename
380          or $head1_text_content =~ m/\($titlename_re\)/s
381          # We accept "=head1 Nomen Modularis (NAME)" for sake of i18n
382        ) {
383          DEBUG and print "  Yup, it was $titlename.  Seeking next para-content...\n";
384          ++$state;
385        } elsif(
386          $desperate_for_title
387           # if we're so desperate we'll take the first
388           #  =head1's content as a title
389          and $head1_text_content =~ m/\S/
390          and $head1_text_content !~ m/^[ A-Z]+$/s
391          and $head1_text_content !~
392            m/\((?:
393             NAME | TITLE | VERSION | AUTHORS? | DESCRIPTION | SYNOPSIS
394             | COPYRIGHT | LICENSE | NOTES? | FUNCTIONS? | METHODS?
395             | CAVEATS? | BUGS? | SEE\ ALSO | SWITCHES | ENVIRONMENT
396            )\)/sx
397            # avoid accepting things like =head1 Thingy Thongy (DESCRIPTION)
398          and ($max_content_length
399            ? (length($head1_text_content) <= $max_content_length) # sanity
400            : 1)
401        ) {
402          # Looks good; trim it
403          ($title = $head1_text_content) =~ s/\s+$//;
404          DEBUG and print "  It looks titular: \"$title\".\n\n  Using that.\n";
405          last;
406        } else {
407          --$state;
408          DEBUG and print "  Didn't look titular ($head1_text_content).\n",
409            "\n  Dropping back to seeking-head1-content mode...\n";
410        }
411      }
412    }
413
414    elsif($state == 2) {
415      # seeking start of para (which must immediately follow)
416      if($token->is_start and $content_containers{ $token->tagname }) {
417        DEBUG and print "  Found start of Para.  Accumulating content...\n";
418        $para_text_content = '';
419        ++$state;
420      } else {
421        DEBUG and print
422         "  Didn't see an immediately subsequent start-Para.  Reseeking H1\n";
423        $state = 0;
424      }
425    }
426
427    elsif($state == 3) {
428      # accumulating text until end of Para
429      if( $token->is_text ) {
430        DEBUG and print "   Adding \"", $token->text, "\" to para-content.\n";
431        $para_text_content .= $token->text;
432        # and keep looking
433
434      } elsif( $token->is_end and $content_containers{ $token->tagname } ) {
435        DEBUG and print "  Found end of Para.  Considering content: ",
436          $para_text_content, "\n";
437
438        if( $para_text_content =~ m/\S/
439          and ($max_content_length
440           ? (length($para_text_content) <= $max_content_length)
441           : 1)
442        ) {
443          # Some minimal sanity constraints, I think.
444          DEBUG and print "  It looks contentworthy, I guess.  Using it.\n";
445          $title = $para_text_content;
446          last;
447        } else {
448          DEBUG and print "  Doesn't look at all contentworthy!\n  Giving up.\n";
449          undef $title;
450          last;
451        }
452      }
453    }
454
455    else {
456      die "IMPOSSIBLE STATE $state!\n";  # should never happen
457    }
458
459  }
460
461  # Put it all back!
462  $self->unget_token(@to_unget);
463
464  if(DEBUG) {
465    if(defined $title) { print "  Returning title <$title>\n" }
466    else { print "Returning title <>\n" }
467  }
468
469  return '' unless defined $title;
470  $title =~ s/^\s+//;
471  return $title;
472}
473
474#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
475#
476#  Methods that actually do work at parse-time:
477
478sub _handle_element_start {
479  my $self = shift;   # leaving ($element_name, $attr_hash_r)
480  DEBUG > 2 and print "++ $_[0] (", map("<$_> ", %{$_[1]}), ")\n";
481
482  push @{ $self->{'token_buffer'} },
483       $self->{'start_token_class'}->new(@_);
484  return;
485}
486
487sub _handle_text {
488  my $self = shift;   # leaving ($text)
489  DEBUG > 2 and print "== $_[0]\n";
490  push @{ $self->{'token_buffer'} },
491       $self->{'text_token_class'}->new(@_);
492  return;
493}
494
495sub _handle_element_end {
496  my $self = shift;   # leaving ($element_name);
497  DEBUG > 2 and print "-- $_[0]\n";
498  push @{ $self->{'token_buffer'} },
499       $self->{'end_token_class'}->new(@_);
500  return;
501}
502
503#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
504
5051;
506
507
508__END__
509
510=head1 NAME
511
512Pod::Simple::PullParser -- a pull-parser interface to parsing Pod
513
514=head1 SYNOPSIS
515
516 my $parser = SomePodProcessor->new;
517 $parser->set_source( "whatever.pod" );
518 $parser->run;
519
520Or:
521
522 my $parser = SomePodProcessor->new;
523 $parser->set_source( $some_filehandle_object );
524 $parser->run;
525
526Or:
527
528 my $parser = SomePodProcessor->new;
529 $parser->set_source( \$document_source );
530 $parser->run;
531
532Or:
533
534 my $parser = SomePodProcessor->new;
535 $parser->set_source( \@document_lines );
536 $parser->run;
537
538And elsewhere:
539
540 require 5;
541 package SomePodProcessor;
542 use strict;
543 use base qw(Pod::Simple::PullParser);
544
545 sub run {
546   my $self = shift;
547  Token:
548   while(my $token = $self->get_token) {
549     ...process each token...
550   }
551 }
552
553=head1 DESCRIPTION
554
555This class is for using Pod::Simple to build a Pod processor -- but
556one that uses an interface based on a stream of token objects,
557instead of based on events.
558
559This is a subclass of L<Pod::Simple> and inherits all its methods.
560
561A subclass of Pod::Simple::PullParser should define a C<run> method
562that calls C<< $token = $parser->get_token >> to pull tokens.
563
564See the source for Pod::Simple::RTF for an example of a formatter
565that uses Pod::Simple::PullParser.
566
567=head1 METHODS
568
569=over
570
571=item my $token = $parser->get_token
572
573This returns the next token object (which will be of a subclass of
574L<Pod::Simple::PullParserToken>), or undef if the parser-stream has hit
575the end of the document.
576
577=item $parser->unget_token( $token )
578
579=item $parser->unget_token( $token1, $token2, ... )
580
581This restores the token object(s) to the front of the parser stream.
582
583=back
584
585The source has to be set before you can parse anything.  The lowest-level
586way is to call C<set_source>:
587
588=over
589
590=item $parser->set_source( $filename )
591
592=item $parser->set_source( $filehandle_object )
593
594=item $parser->set_source( \$document_source )
595
596=item $parser->set_source( \@document_lines )
597
598=back
599
600Or you can call these methods, which Pod::Simple::PullParser has defined
601to work just like Pod::Simple's same-named methods:
602
603=over
604
605=item $parser->parse_file(...)
606
607=item $parser->parse_string_document(...)
608
609=item $parser->filter(...)
610
611=item $parser->parse_from_file(...)
612
613=back
614
615For those to work, the Pod-processing subclass of
616Pod::Simple::PullParser has to have defined a $parser->run method --
617so it is advised that all Pod::Simple::PullParser subclasses do so.
618See the Synopsis above, or the source for Pod::Simple::RTF.
619
620Authors of formatter subclasses might find these methods useful to
621call on a parser object that you haven't started pulling tokens
622from yet:
623
624=over
625
626=item my $title_string = $parser->get_title
627
628This tries to get the title string out of $parser, by getting some tokens,
629and scanning them for the title, and then ungetting them so that you can
630process the token-stream from the beginning.
631
632For example, suppose you have a document that starts out:
633
634  =head1 NAME
635
636  Hoo::Boy::Wowza -- Stuff B<wow> yeah!
637
638$parser->get_title on that document will return "Hoo::Boy::Wowza --
639Stuff wow yeah!". If the document starts with:
640
641  =head1 Name
642
643  Hoo::Boy::W00t -- Stuff B<w00t> yeah!
644
645Then you'll need to pass the C<nocase> option in order to recognize "Name":
646
647  $parser->get_title(nocase => 1);
648
649In cases where get_title can't find the title, it will return empty-string
650("").
651
652=item my $title_string = $parser->get_short_title
653
654This is just like get_title, except that it returns just the modulename, if
655the title seems to be of the form "SomeModuleName -- description".
656
657For example, suppose you have a document that starts out:
658
659  =head1 NAME
660
661  Hoo::Boy::Wowza -- Stuff B<wow> yeah!
662
663then $parser->get_short_title on that document will return
664"Hoo::Boy::Wowza".
665
666But if the document starts out:
667
668  =head1 NAME
669
670  Hooboy, stuff B<wow> yeah!
671
672then $parser->get_short_title on that document will return "Hooboy,
673stuff wow yeah!". If the document starts with:
674
675  =head1 Name
676
677  Hoo::Boy::W00t -- Stuff B<w00t> yeah!
678
679Then you'll need to pass the C<nocase> option in order to recognize "Name":
680
681  $parser->get_short_title(nocase => 1);
682
683If the title can't be found, then get_short_title returns empty-string
684("").
685
686=item $author_name   = $parser->get_author
687
688This works like get_title except that it returns the contents of the
689"=head1 AUTHOR\n\nParagraph...\n" section, assuming that that section
690isn't terribly long. To recognize a "=head1 Author\n\nParagraph\n"
691section, pass the C<nocase> otpion:
692
693  $parser->get_author(nocase => 1);
694
695(This method tolerates "AUTHORS" instead of "AUTHOR" too.)
696
697=item $description_name = $parser->get_description
698
699This works like get_title except that it returns the contents of the
700"=head1 DESCRIPTION\n\nParagraph...\n" section, assuming that that section
701isn't terribly long. To recognize a "=head1 Description\n\nParagraph\n"
702section, pass the C<nocase> otpion:
703
704  $parser->get_description(nocase => 1);
705
706=item $version_block = $parser->get_version
707
708This works like get_title except that it returns the contents of
709the "=head1 VERSION\n\n[BIG BLOCK]\n" block.  Note that this does NOT
710return the module's C<$VERSION>!! To recognize a
711"=head1 Version\n\n[BIG BLOCK]\n" section, pass the C<nocase> otpion:
712
713  $parser->get_version(nocase => 1);
714
715=back
716
717=head1 NOTE
718
719You don't actually I<have> to define a C<run> method.  If you're
720writing a Pod-formatter class, you should define a C<run> just so
721that users can call C<parse_file> etc, but you don't I<have> to.
722
723And if you're not writing a formatter class, but are instead just
724writing a program that does something simple with a Pod::PullParser
725object (and not an object of a subclass), then there's no reason to
726bother subclassing to add a C<run> method.
727
728=head1 SEE ALSO
729
730L<Pod::Simple>
731
732L<Pod::Simple::PullParserToken> -- and its subclasses
733L<Pod::Simple::PullParserStartToken>,
734L<Pod::Simple::PullParserTextToken>, and
735L<Pod::Simple::PullParserEndToken>.
736
737L<HTML::TokeParser>, which inspired this.
738
739=head1 SUPPORT
740
741Questions or discussion about POD and Pod::Simple should be sent to the
742pod-people@perl.org mail list. Send an empty email to
743pod-people-subscribe@perl.org to subscribe.
744
745This module is managed in an open GitHub repository,
746L<https://github.com/theory/pod-simple/>. Feel free to fork and contribute, or
747to clone L<git://github.com/theory/pod-simple.git> and send patches!
748
749Patches against Pod::Simple are welcome. Please send bug reports to
750<bug-pod-simple@rt.cpan.org>.
751
752=head1 COPYRIGHT AND DISCLAIMERS
753
754Copyright (c) 2002 Sean M. Burke.
755
756This library is free software; you can redistribute it and/or modify it
757under the same terms as Perl itself.
758
759This program is distributed in the hope that it will be useful, but
760without any warranty; without even the implied warranty of
761merchantability or fitness for a particular purpose.
762
763=head1 AUTHOR
764
765Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
766But don't bother him, he's retired.
767
768Pod::Simple is maintained by:
769
770=over
771
772=item * Allison Randal C<allison@perl.org>
773
774=item * Hans Dieter Pearcey C<hdp@cpan.org>
775
776=item * David E. Wheeler C<dwheeler@cpan.org>
777
778=back
779
780=cut
781
782JUNK:
783
784sub _old_get_title {  # some witchery in here
785  my $self = $_[0];
786  my $title;
787  my @to_unget;
788
789  while(1) {
790    push @to_unget, $self->get_token;
791    unless(defined $to_unget[-1]) { # whoops, short doc!
792      pop @to_unget;
793      last;
794    }
795
796    DEBUG and print "-Got token ", $to_unget[-1]->dump, "\n";
797
798    (DEBUG and print "Too much in the buffer.\n"),
799     last if @to_unget > 25; # sanity
800
801    my $pattern = '';
802    if( #$to_unget[-1]->type eq 'end'
803        #and $to_unget[-1]->tagname eq 'Para'
804        #and
805        ($pattern = join('',
806         map {;
807            ($_->type eq 'start') ? ("<" . $_->tagname .">")
808          : ($_->type eq 'end'  ) ? ("</". $_->tagname .">")
809          : ($_->type eq 'text' ) ? ($_->text =~ m<^([A-Z]+)$>s ? $1 : 'X')
810          : "BLORP"
811         } @to_unget
812       )) =~ m{<head1>NAME</head1><Para>(X|</?[BCIFLS]>)+</Para>$}s
813    ) {
814      # Whee, it fits the pattern
815      DEBUG and print "Seems to match =head1 NAME pattern.\n";
816      $title = '';
817      foreach my $t (reverse @to_unget) {
818        last if $t->type eq 'start' and $t->tagname eq 'Para';
819        $title = $t->text . $title if $t->type eq 'text';
820      }
821      undef $title if $title =~ m<^\s*$>; # make sure it's contentful!
822      last;
823
824    } elsif ($pattern =~ m{<head(\d)>(.+)</head\d>$}
825      and !( $1 eq '1' and $2 eq 'NAME' )
826    ) {
827      # Well, it fits a fallback pattern
828      DEBUG and print "Seems to match NAMEless pattern.\n";
829      $title = '';
830      foreach my $t (reverse @to_unget) {
831        last if $t->type eq 'start' and $t->tagname =~ m/^head\d$/s;
832        $title = $t->text . $title if $t->type eq 'text';
833      }
834      undef $title if $title =~ m<^\s*$>; # make sure it's contentful!
835      last;
836
837    } else {
838      DEBUG and $pattern and print "Leading pattern: $pattern\n";
839    }
840  }
841
842  # Put it all back:
843  $self->unget_token(@to_unget);
844
845  if(DEBUG) {
846    if(defined $title) { print "  Returning title <$title>\n" }
847    else { print "Returning title <>\n" }
848  }
849
850  return '' unless defined $title;
851  return $title;
852}
853
854