1 2package Pod::Simple::BlackBox; 3# 4# "What's in the box?" "Pain." 5# 6########################################################################### 7# 8# This is where all the scary things happen: parsing lines into 9# paragraphs; and then into directives, verbatims, and then also 10# turning formatting sequences into treelets. 11# 12# Are you really sure you want to read this code? 13# 14#----------------------------------------------------------------------------- 15# 16# The basic work of this module Pod::Simple::BlackBox is doing the dirty work 17# of parsing Pod into treelets (generally one per non-verbatim paragraph), and 18# to call the proper callbacks on the treelets. 19# 20# Every node in a treelet is a ['name', {attrhash}, ...children...] 21 22use integer; # vroom! 23use strict; 24use Carp (); 25use vars qw($VERSION ); 26$VERSION = '3.14'; 27#use constant DEBUG => 7; 28BEGIN { 29 require Pod::Simple; 30 *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG 31} 32 33#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 34 35sub parse_line { shift->parse_lines(@_) } # alias 36 37# - - - Turn back now! Run away! - - - 38 39sub parse_lines { # Usage: $parser->parse_lines(@lines) 40 # an undef means end-of-stream 41 my $self = shift; 42 43 my $code_handler = $self->{'code_handler'}; 44 my $cut_handler = $self->{'cut_handler'}; 45 $self->{'line_count'} ||= 0; 46 47 my $scratch; 48 49 DEBUG > 4 and 50 print "# Parsing starting at line ", $self->{'line_count'}, ".\n"; 51 52 DEBUG > 5 and 53 print "# About to parse lines: ", 54 join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n"; 55 56 my $paras = ($self->{'paras'} ||= []); 57 # paragraph buffer. Because we need to defer processing of =over 58 # directives and verbatim paragraphs. We call _ponder_paragraph_buffer 59 # to process this. 60 61 $self->{'pod_para_count'} ||= 0; 62 63 my $line; 64 foreach my $source_line (@_) { 65 if( $self->{'source_dead'} ) { 66 DEBUG > 4 and print "# Source is dead.\n"; 67 last; 68 } 69 70 unless( defined $source_line ) { 71 DEBUG > 4 and print "# Undef-line seen.\n"; 72 73 push @$paras, ['~end', {'start_line' => $self->{'line_count'}}]; 74 push @$paras, $paras->[-1], $paras->[-1]; 75 # So that it definitely fills the buffer. 76 $self->{'source_dead'} = 1; 77 $self->_ponder_paragraph_buffer; 78 next; 79 } 80 81 82 if( $self->{'line_count'}++ ) { 83 ($line = $source_line) =~ tr/\n\r//d; 84 # If we don't have two vars, we'll end up with that there 85 # tr/// modding the (potentially read-only) original source line! 86 87 } else { 88 DEBUG > 2 and print "First line: [$source_line]\n"; 89 90 if( ($line = $source_line) =~ s/^\xEF\xBB\xBF//s ) { 91 DEBUG and print "UTF-8 BOM seen. Faking a '=encode utf8'.\n"; 92 $self->_handle_encoding_line( "=encode utf8" ); 93 $line =~ tr/\n\r//d; 94 95 } elsif( $line =~ s/^\xFE\xFF//s ) { 96 DEBUG and print "Big-endian UTF-16 BOM seen. Aborting parsing.\n"; 97 $self->scream( 98 $self->{'line_count'}, 99 "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." 100 ); 101 splice @_; 102 push @_, undef; 103 next; 104 105 # TODO: implement somehow? 106 107 } elsif( $line =~ s/^\xFF\xFE//s ) { 108 DEBUG and print "Little-endian UTF-16 BOM seen. Aborting parsing.\n"; 109 $self->scream( 110 $self->{'line_count'}, 111 "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." 112 ); 113 splice @_; 114 push @_, undef; 115 next; 116 117 # TODO: implement somehow? 118 119 } else { 120 DEBUG > 2 and print "First line is BOM-less.\n"; 121 ($line = $source_line) =~ tr/\n\r//d; 122 } 123 } 124 125 126 DEBUG > 5 and print "# Parsing line: [$line]\n"; 127 128 if(!$self->{'in_pod'}) { 129 if($line =~ m/^=([a-zA-Z]+)/s) { 130 if($1 eq 'cut') { 131 $self->scream( 132 $self->{'line_count'}, 133 "=cut found outside a pod block. Skipping to next block." 134 ); 135 136 ## Before there were errata sections in the world, it was 137 ## least-pessimal to abort processing the file. But now we can 138 ## just barrel on thru (but still not start a pod block). 139 #splice @_; 140 #push @_, undef; 141 142 next; 143 } else { 144 $self->{'in_pod'} = $self->{'start_of_pod_block'} 145 = $self->{'last_was_blank'} = 1; 146 # And fall thru to the pod-mode block further down 147 } 148 } else { 149 DEBUG > 5 and print "# It's a code-line.\n"; 150 $code_handler->(map $_, $line, $self->{'line_count'}, $self) 151 if $code_handler; 152 # Note: this may cause code to be processed out of order relative 153 # to pods, but in order relative to cuts. 154 155 # Note also that we haven't yet applied the transcoding to $line 156 # by time we call $code_handler! 157 158 if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) { 159 # That RE is from perlsyn, section "Plain Old Comments (Not!)", 160 #$fname = $2 if defined $2; 161 #DEBUG > 1 and defined $2 and print "# Setting fname to \"$fname\"\n"; 162 DEBUG > 1 and print "# Setting nextline to $1\n"; 163 $self->{'line_count'} = $1 - 1; 164 } 165 166 next; 167 } 168 } 169 170 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 171 # Else we're in pod mode: 172 173 # Apply any necessary transcoding: 174 $self->{'_transcoder'} && $self->{'_transcoder'}->($line); 175 176 # HERE WE CATCH =encoding EARLY! 177 if( $line =~ m/^=encoding\s+\S+\s*$/s ) { 178 $line = $self->_handle_encoding_line( $line ); 179 } 180 181 if($line =~ m/^=cut/s) { 182 # here ends the pod block, and therefore the previous pod para 183 DEBUG > 1 and print "Noting =cut at line ${$self}{'line_count'}\n"; 184 $self->{'in_pod'} = 0; 185 # ++$self->{'pod_para_count'}; 186 $self->_ponder_paragraph_buffer(); 187 # by now it's safe to consider the previous paragraph as done. 188 $cut_handler->(map $_, $line, $self->{'line_count'}, $self) 189 if $cut_handler; 190 191 # TODO: add to docs: Note: this may cause cuts to be processed out 192 # of order relative to pods, but in order relative to code. 193 194 } elsif($line =~ m/^\s*$/s) { # it's a blank line 195 if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { 196 DEBUG > 1 and print "Saving blank line at line ${$self}{'line_count'}\n"; 197 push @{$paras->[-1]}, $line; 198 } # otherwise it's not interesting 199 200 if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) { 201 DEBUG > 1 and print "Noting para ends with blank line at ${$self}{'line_count'}\n"; 202 } 203 204 $self->{'last_was_blank'} = 1; 205 206 } elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para... 207 208 if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) { 209 # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS 210 my $new = [$1, {'start_line' => $self->{'line_count'}}, $2]; 211 # Note that in "=head1 foo", the WS is lost. 212 # Example: ['=head1', {'start_line' => 123}, ' foo'] 213 214 ++$self->{'pod_para_count'}; 215 216 $self->_ponder_paragraph_buffer(); 217 # by now it's safe to consider the previous paragraph as done. 218 219 push @$paras, $new; # the new incipient paragraph 220 DEBUG > 1 and print "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n"; 221 222 } elsif($line =~ m/^\s/s) { 223 224 if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { 225 DEBUG > 1 and print "Resuming verbatim para at line ${$self}{'line_count'}\n"; 226 push @{$paras->[-1]}, $line; 227 } else { 228 ++$self->{'pod_para_count'}; 229 $self->_ponder_paragraph_buffer(); 230 # by now it's safe to consider the previous paragraph as done. 231 DEBUG > 1 and print "Starting verbatim para at line ${$self}{'line_count'}\n"; 232 push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line]; 233 } 234 } else { 235 ++$self->{'pod_para_count'}; 236 $self->_ponder_paragraph_buffer(); 237 # by now it's safe to consider the previous paragraph as done. 238 push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line]; 239 DEBUG > 1 and print "Starting plain para at line ${$self}{'line_count'}\n"; 240 } 241 $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; 242 243 } else { 244 # It's a non-blank line /continuing/ the current para 245 if(@$paras) { 246 DEBUG > 2 and print "Line ${$self}{'line_count'} continues current paragraph\n"; 247 push @{$paras->[-1]}, $line; 248 } else { 249 # Unexpected case! 250 die "Continuing a paragraph but \@\$paras is empty?"; 251 } 252 $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; 253 } 254 255 } # ends the big while loop 256 257 DEBUG > 1 and print(pretty(@$paras), "\n"); 258 return $self; 259} 260 261#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 262 263sub _handle_encoding_line { 264 my($self, $line) = @_; 265 266 # The point of this routine is to set $self->{'_transcoder'} as indicated. 267 268 return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s; 269 DEBUG > 1 and print "Found an encoding line \"=encoding $1\"\n"; 270 271 my $e = $1; 272 my $orig = $e; 273 push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig"; 274 275 my $enc_error; 276 277 # Cf. perldoc Encode and perldoc Encode::Supported 278 279 require Pod::Simple::Transcode; 280 281 if( $self->{'encoding'} ) { 282 my $norm_current = $self->{'encoding'}; 283 my $norm_e = $e; 284 foreach my $that ($norm_current, $norm_e) { 285 $that = lc($that); 286 $that =~ s/[-_]//g; 287 } 288 if($norm_current eq $norm_e) { 289 DEBUG > 1 and print "The '=encoding $orig' line is ", 290 "redundant. ($norm_current eq $norm_e). Ignoring.\n"; 291 $enc_error = ''; 292 # But that doesn't necessarily mean that the earlier one went okay 293 } else { 294 $enc_error = "Encoding is already set to " . $self->{'encoding'}; 295 DEBUG > 1 and print $enc_error; 296 } 297 } elsif ( 298 # OK, let's turn on the encoding 299 do { 300 DEBUG > 1 and print " Setting encoding to $e\n"; 301 $self->{'encoding'} = $e; 302 1; 303 } 304 and $e eq 'HACKRAW' 305 ) { 306 DEBUG and print " Putting in HACKRAW (no-op) encoding mode.\n"; 307 308 } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) { 309 310 die($enc_error = "WHAT? _transcoder is already set?!") 311 if $self->{'_transcoder'}; # should never happen 312 require Pod::Simple::Transcode; 313 $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e); 314 eval { 315 my @x = ('', "abc", "123"); 316 $self->{'_transcoder'}->(@x); 317 }; 318 $@ && die( $enc_error = 319 "Really unexpected error setting up encoding $e: $@\nAborting" 320 ); 321 322 } else { 323 my @supported = Pod::Simple::Transcode::->all_encodings; 324 325 # Note unsupported, and complain 326 DEBUG and print " Encoding [$e] is unsupported.", 327 "\nSupporteds: @supported\n"; 328 my $suggestion = ''; 329 330 # Look for a near match: 331 my $norm = lc($e); 332 $norm =~ tr[-_][]d; 333 my $n; 334 foreach my $enc (@supported) { 335 $n = lc($enc); 336 $n =~ tr[-_][]d; 337 next unless $n eq $norm; 338 $suggestion = " (Maybe \"$e\" should be \"$enc\"?)"; 339 last; 340 } 341 my $encmodver = Pod::Simple::Transcode::->encmodver; 342 $enc_error = join '' => 343 "This document probably does not appear as it should, because its ", 344 "\"=encoding $e\" line calls for an unsupported encoding.", 345 $suggestion, " [$encmodver\'s supported encodings are: @supported]" 346 ; 347 348 $self->scream( $self->{'line_count'}, $enc_error ); 349 } 350 push @{ $self->{'encoding_command_statuses'} }, $enc_error; 351 352 return '=encoding ALREADYDONE'; 353} 354 355# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 356 357sub _handle_encoding_second_level { 358 # By time this is called, the encoding (if well formed) will already 359 # have been acted one. 360 my($self, $para) = @_; 361 my @x = @$para; 362 my $content = join ' ', splice @x, 2; 363 $content =~ s/^\s+//s; 364 $content =~ s/\s+$//s; 365 366 DEBUG > 2 and print "Ogling encoding directive: =encoding $content\n"; 367 368 if($content eq 'ALREADYDONE') { 369 # It's already been handled. Check for errors. 370 if(! $self->{'encoding_command_statuses'} ) { 371 DEBUG > 2 and print " CRAZY ERROR: It wasn't really handled?!\n"; 372 } elsif( $self->{'encoding_command_statuses'}[-1] ) { 373 $self->whine( $para->[1]{'start_line'}, 374 sprintf "Couldn't do %s: %s", 375 $self->{'encoding_command_reqs' }[-1], 376 $self->{'encoding_command_statuses'}[-1], 377 ); 378 } else { 379 DEBUG > 2 and print " (Yup, it was successfully handled already.)\n"; 380 } 381 382 } else { 383 # Otherwise it's a syntax error 384 $self->whine( $para->[1]{'start_line'}, 385 "Invalid =encoding syntax: $content" 386 ); 387 } 388 389 return; 390} 391 392#~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~` 393 394{ 395my $m = -321; # magic line number 396 397sub _gen_errata { 398 my $self = $_[0]; 399 # Return 0 or more fake-o paragraphs explaining the accumulated 400 # errors on this document. 401 402 return() unless $self->{'errata'} and keys %{$self->{'errata'}}; 403 404 my @out; 405 406 foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) { 407 push @out, 408 ['=item', {'start_line' => $m}, "Around line $line:"], 409 map( ['~Para', {'start_line' => $m, '~cooked' => 1}, 410 #['~Top', {'start_line' => $m}, 411 $_ 412 #] 413 ], 414 @{$self->{'errata'}{$line}} 415 ) 416 ; 417 } 418 419 # TODO: report of unknown entities? unrenderable characters? 420 421 unshift @out, 422 ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'], 423 ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1}, 424 "Hey! ", 425 ['B', {}, 426 'The above document had some coding errors, which are explained below:' 427 ] 428 ], 429 ['=over', {'start_line' => $m, 'errata' => 1}, ''], 430 ; 431 432 push @out, 433 ['=back', {'start_line' => $m, 'errata' => 1}, ''], 434 ; 435 436 DEBUG and print "\n<<\n", pretty(\@out), "\n>>\n\n"; 437 438 return @out; 439} 440 441} 442 443#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 444 445############################################################################## 446## 447## stop reading now stop reading now stop reading now stop reading now stop 448## 449## HERE IT BECOMES REALLY SCARY 450## 451## stop reading now stop reading now stop reading now stop reading now stop 452## 453############################################################################## 454 455sub _ponder_paragraph_buffer { 456 457 # Para-token types as found in the buffer. 458 # ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end, 459 # =over, =back, =item 460 # and the null =pod (to be complained about if over one line) 461 # 462 # "~data" paragraphs are something we generate at this level, depending on 463 # a currently open =over region 464 465 # Events fired: Begin and end for: 466 # directivename (like head1 .. head4), item, extend, 467 # for (from =begin...=end, =for), 468 # over-bullet, over-number, over-text, over-block, 469 # item-bullet, item-number, item-text, 470 # Document, 471 # Data, Para, Verbatim 472 # B, C, longdirname (TODO -- wha?), etc. for all directives 473 # 474 475 my $self = $_[0]; 476 my $paras; 477 return unless @{$paras = $self->{'paras'}}; 478 my $curr_open = ($self->{'curr_open'} ||= []); 479 480 my $scratch; 481 482 DEBUG > 10 and print "# Paragraph buffer: <<", pretty($paras), ">>\n"; 483 484 # We have something in our buffer. So apparently the document has started. 485 unless($self->{'doc_has_started'}) { 486 $self->{'doc_has_started'} = 1; 487 488 my $starting_contentless; 489 $starting_contentless = 490 ( 491 !@$curr_open 492 and @$paras and ! grep $_->[0] ne '~end', @$paras 493 # i.e., if the paras is all ~ends 494 ) 495 ; 496 DEBUG and print "# Starting ", 497 $starting_contentless ? 'contentless' : 'contentful', 498 " document\n" 499 ; 500 501 $self->_handle_element_start( 502 ($scratch = 'Document'), 503 { 504 'start_line' => $paras->[0][1]{'start_line'}, 505 $starting_contentless ? ( 'contentless' => 1 ) : (), 506 }, 507 ); 508 } 509 510 my($para, $para_type); 511 while(@$paras) { 512 last if @$paras == 1 and 513 ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim' 514 or $paras->[0][0] eq '=item' ) 515 ; 516 # Those're the three kinds of paragraphs that require lookahead. 517 # Actually, an "=item Foo" inside an <over type=text> region 518 # and any =item inside an <over type=block> region (rare) 519 # don't require any lookahead, but all others (bullets 520 # and numbers) do. 521 522# TODO: winge about many kinds of directives in non-resolving =for regions? 523# TODO: many? like what? =head1 etc? 524 525 $para = shift @$paras; 526 $para_type = $para->[0]; 527 528 DEBUG > 1 and print "Pondering a $para_type paragraph, given the stack: (", 529 $self->_dump_curr_open(), ")\n"; 530 531 if($para_type eq '=for') { 532 next if $self->_ponder_for($para,$curr_open,$paras); 533 534 } elsif($para_type eq '=begin') { 535 next if $self->_ponder_begin($para,$curr_open,$paras); 536 537 } elsif($para_type eq '=end') { 538 next if $self->_ponder_end($para,$curr_open,$paras); 539 540 } elsif($para_type eq '~end') { # The virtual end-document signal 541 next if $self->_ponder_doc_end($para,$curr_open,$paras); 542 } 543 544 545 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 546 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 547 if(grep $_->[1]{'~ignore'}, @$curr_open) { 548 DEBUG > 1 and 549 print "Skipping $para_type paragraph because in ignore mode.\n"; 550 next; 551 } 552 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 553 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 554 555 if($para_type eq '=pod') { 556 $self->_ponder_pod($para,$curr_open,$paras); 557 558 } elsif($para_type eq '=over') { 559 next if $self->_ponder_over($para,$curr_open,$paras); 560 561 } elsif($para_type eq '=back') { 562 next if $self->_ponder_back($para,$curr_open,$paras); 563 564 } else { 565 566 # All non-magical codes!!! 567 568 # Here we start using $para_type for our own twisted purposes, to 569 # mean how it should get treated, not as what the element name 570 # should be. 571 572 DEBUG > 1 and print "Pondering non-magical $para_type\n"; 573 574 my $i; 575 576 # Enforce some =headN discipline 577 if($para_type =~ m/^=head\d$/s 578 and ! $self->{'accept_heads_anywhere'} 579 and @$curr_open 580 and $curr_open->[-1][0] eq '=over' 581 ) { 582 DEBUG > 2 and print "'=$para_type' inside an '=over'!\n"; 583 $self->whine( 584 $para->[1]{'start_line'}, 585 "You forgot a '=back' before '$para_type'" 586 ); 587 unshift @$paras, ['=back', {}, ''], $para; # close the =over 588 next; 589 } 590 591 592 if($para_type eq '=item') { 593 594 my $over; 595 unless(@$curr_open and ($over = $curr_open->[-1])->[0] eq '=over') { 596 $self->whine( 597 $para->[1]{'start_line'}, 598 "'=item' outside of any '=over'" 599 ); 600 unshift @$paras, 601 ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], 602 $para 603 ; 604 next; 605 } 606 607 608 my $over_type = $over->[1]{'~type'}; 609 610 if(!$over_type) { 611 # Shouldn't happen1 612 die "Typeless over in stack, starting at line " 613 . $over->[1]{'start_line'}; 614 615 } elsif($over_type eq 'block') { 616 unless($curr_open->[-1][1]{'~bitched_about'}) { 617 $curr_open->[-1][1]{'~bitched_about'} = 1; 618 $self->whine( 619 $curr_open->[-1][1]{'start_line'}, 620 "You can't have =items (as at line " 621 . $para->[1]{'start_line'} 622 . ") unless the first thing after the =over is an =item" 623 ); 624 } 625 # Just turn it into a paragraph and reconsider it 626 $para->[0] = '~Para'; 627 unshift @$paras, $para; 628 next; 629 630 } elsif($over_type eq 'text') { 631 my $item_type = $self->_get_item_type($para); 632 # That kills the content of the item if it's a number or bullet. 633 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; 634 635 if($item_type eq 'text') { 636 # Nothing special needs doing for 'text' 637 } elsif($item_type eq 'number' or $item_type eq 'bullet') { 638 die "Unknown item type $item_type" 639 unless $item_type eq 'number' or $item_type eq 'bullet'; 640 # Undo our clobbering: 641 push @$para, $para->[1]{'~orig_content'}; 642 delete $para->[1]{'number'}; 643 # Only a PROPER item-number element is allowed 644 # to have a number attribute. 645 } else { 646 die "Unhandled item type $item_type"; # should never happen 647 } 648 649 # =item-text thingies don't need any assimilation, it seems. 650 651 } elsif($over_type eq 'number') { 652 my $item_type = $self->_get_item_type($para); 653 # That kills the content of the item if it's a number or bullet. 654 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; 655 656 my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; 657 658 if($item_type eq 'bullet') { 659 # Hm, it's not numeric. Correct for this. 660 $para->[1]{'number'} = $expected_value; 661 $self->whine( 662 $para->[1]{'start_line'}, 663 "Expected '=item $expected_value'" 664 ); 665 push @$para, $para->[1]{'~orig_content'}; 666 # restore the bullet, blocking the assimilation of next para 667 668 } elsif($item_type eq 'text') { 669 # Hm, it's not numeric. Correct for this. 670 $para->[1]{'number'} = $expected_value; 671 $self->whine( 672 $para->[1]{'start_line'}, 673 "Expected '=item $expected_value'" 674 ); 675 # Text content will still be there and will block next ~Para 676 677 } elsif($item_type ne 'number') { 678 die "Unknown item type $item_type"; # should never happen 679 680 } elsif($expected_value == $para->[1]{'number'}) { 681 DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n"; 682 683 } else { 684 DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'}, 685 " instead of the expected value of $expected_value\n"; 686 $self->whine( 687 $para->[1]{'start_line'}, 688 "You have '=item " . $para->[1]{'number'} . 689 "' instead of the expected '=item $expected_value'" 690 ); 691 $para->[1]{'number'} = $expected_value; # correcting!! 692 } 693 694 if(@$para == 2) { 695 # For the cases where we /didn't/ push to @$para 696 if($paras->[0][0] eq '~Para') { 697 DEBUG and print "Assimilating following ~Para content into $over_type item\n"; 698 push @$para, splice @{shift @$paras},2; 699 } else { 700 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; 701 push @$para, ''; # Just so it's not contentless 702 } 703 } 704 705 706 } elsif($over_type eq 'bullet') { 707 my $item_type = $self->_get_item_type($para); 708 # That kills the content of the item if it's a number or bullet. 709 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; 710 711 if($item_type eq 'bullet') { 712 # as expected! 713 714 if( $para->[1]{'~_freaky_para_hack'} ) { 715 DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n"; 716 push @$para, delete $para->[1]{'~_freaky_para_hack'}; 717 } 718 719 } elsif($item_type eq 'number') { 720 $self->whine( 721 $para->[1]{'start_line'}, 722 "Expected '=item *'" 723 ); 724 push @$para, $para->[1]{'~orig_content'}; 725 # and block assimilation of the next paragraph 726 delete $para->[1]{'number'}; 727 # Only a PROPER item-number element is allowed 728 # to have a number attribute. 729 } elsif($item_type eq 'text') { 730 $self->whine( 731 $para->[1]{'start_line'}, 732 "Expected '=item *'" 733 ); 734 # But doesn't need processing. But it'll block assimilation 735 # of the next para. 736 } else { 737 die "Unhandled item type $item_type"; # should never happen 738 } 739 740 if(@$para == 2) { 741 # For the cases where we /didn't/ push to @$para 742 if($paras->[0][0] eq '~Para') { 743 DEBUG and print "Assimilating following ~Para content into $over_type item\n"; 744 push @$para, splice @{shift @$paras},2; 745 } else { 746 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; 747 push @$para, ''; # Just so it's not contentless 748 } 749 } 750 751 } else { 752 die "Unhandled =over type \"$over_type\"?"; 753 # Shouldn't happen! 754 } 755 756 $para_type = 'Plain'; 757 $para->[0] .= '-' . $over_type; 758 # Whew. Now fall thru and process it. 759 760 761 } elsif($para_type eq '=extend') { 762 # Well, might as well implement it here. 763 $self->_ponder_extend($para); 764 next; # and skip 765 } elsif($para_type eq '=encoding') { 766 # Not actually acted on here, but we catch errors here. 767 $self->_handle_encoding_second_level($para); 768 769 next; # and skip 770 } elsif($para_type eq '~Verbatim') { 771 $para->[0] = 'Verbatim'; 772 $para_type = '?Verbatim'; 773 } elsif($para_type eq '~Para') { 774 $para->[0] = 'Para'; 775 $para_type = '?Plain'; 776 } elsif($para_type eq 'Data') { 777 $para->[0] = 'Data'; 778 $para_type = '?Data'; 779 } elsif( $para_type =~ s/^=//s 780 and defined( $para_type = $self->{'accept_directives'}{$para_type} ) 781 ) { 782 DEBUG > 1 and print " Pondering known directive ${$para}[0] as $para_type\n"; 783 } else { 784 # An unknown directive! 785 DEBUG > 1 and printf "Unhandled directive %s (Handled: %s)\n", 786 $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} ) 787 ; 788 $self->whine( 789 $para->[1]{'start_line'}, 790 "Unknown directive: $para->[0]" 791 ); 792 793 # And maybe treat it as text instead of just letting it go? 794 next; 795 } 796 797 if($para_type =~ s/^\?//s) { 798 if(! @$curr_open) { # usual case 799 DEBUG and print "Treating $para_type paragraph as such because stack is empty.\n"; 800 } else { 801 my @fors = grep $_->[0] eq '=for', @$curr_open; 802 DEBUG > 1 and print "Containing fors: ", 803 join(',', map $_->[1]{'target'}, @fors), "\n"; 804 805 if(! @fors) { 806 DEBUG and print "Treating $para_type paragraph as such because stack has no =for's\n"; 807 808 #} elsif(grep $_->[1]{'~resolve'}, @fors) { 809 #} elsif(not grep !$_->[1]{'~resolve'}, @fors) { 810 } elsif( $fors[-1][1]{'~resolve'} ) { 811 # Look to the immediately containing for 812 813 if($para_type eq 'Data') { 814 DEBUG and print "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; 815 $para->[0] = 'Para'; 816 $para_type = 'Plain'; 817 } else { 818 DEBUG and print "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; 819 } 820 } else { 821 DEBUG and print "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n"; 822 $para->[0] = $para_type = 'Data'; 823 } 824 } 825 } 826 827 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 828 if($para_type eq 'Plain') { 829 $self->_ponder_Plain($para); 830 } elsif($para_type eq 'Verbatim') { 831 $self->_ponder_Verbatim($para); 832 } elsif($para_type eq 'Data') { 833 $self->_ponder_Data($para); 834 } else { 835 die "\$para type is $para_type -- how did that happen?"; 836 # Shouldn't happen. 837 } 838 839 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 840 $para->[0] =~ s/^[~=]//s; 841 842 DEBUG and print "\n", pretty($para), "\n"; 843 844 # traverse the treelet (which might well be just one string scalar) 845 $self->{'content_seen'} ||= 1; 846 $self->_traverse_treelet_bit(@$para); 847 } 848 } 849 850 return; 851} 852 853########################################################################### 854# The sub-ponderers... 855 856 857 858sub _ponder_for { 859 my ($self,$para,$curr_open,$paras) = @_; 860 861 # Fake it out as a begin/end 862 my $target; 863 864 if(grep $_->[1]{'~ignore'}, @$curr_open) { 865 DEBUG > 1 and print "Ignoring ignorable =for\n"; 866 return 1; 867 } 868 869 for(my $i = 2; $i < @$para; ++$i) { 870 if($para->[$i] =~ s/^\s*(\S+)\s*//s) { 871 $target = $1; 872 last; 873 } 874 } 875 unless(defined $target) { 876 $self->whine( 877 $para->[1]{'start_line'}, 878 "=for without a target?" 879 ); 880 return 1; 881 } 882 DEBUG > 1 and 883 print "Faking out a =for $target as a =begin $target / =end $target\n"; 884 885 $para->[0] = 'Data'; 886 887 unshift @$paras, 888 ['=begin', 889 {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, 890 $target, 891 ], 892 $para, 893 ['=end', 894 {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, 895 $target, 896 ], 897 ; 898 899 return 1; 900} 901 902sub _ponder_begin { 903 my ($self,$para,$curr_open,$paras) = @_; 904 my $content = join ' ', splice @$para, 2; 905 $content =~ s/^\s+//s; 906 $content =~ s/\s+$//s; 907 unless(length($content)) { 908 $self->whine( 909 $para->[1]{'start_line'}, 910 "=begin without a target?" 911 ); 912 DEBUG and print "Ignoring targetless =begin\n"; 913 return 1; 914 } 915 916 my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/; 917 $para->[1]{'title'} = $title if ($title); 918 $para->[1]{'target'} = $target; # without any ':' 919 $content = $target; # strip off the title 920 921 $content =~ s/^:!/!:/s; 922 my $neg; # whether this is a negation-match 923 $neg = 1 if $content =~ s/^!//s; 924 my $to_resolve; # whether to process formatting codes 925 $to_resolve = 1 if $content =~ s/^://s; 926 927 my $dont_ignore; # whether this target matches us 928 929 foreach my $target_name ( 930 split(',', $content, -1), 931 $neg ? () : '*' 932 ) { 933 DEBUG > 2 and 934 print " Considering whether =begin $content matches $target_name\n"; 935 next unless $self->{'accept_targets'}{$target_name}; 936 937 DEBUG > 2 and 938 print " It DOES match the acceptable target $target_name!\n"; 939 $to_resolve = 1 940 if $self->{'accept_targets'}{$target_name} eq 'force_resolve'; 941 $dont_ignore = 1; 942 $para->[1]{'target_matching'} = $target_name; 943 last; # stop looking at other target names 944 } 945 946 if($neg) { 947 if( $dont_ignore ) { 948 $dont_ignore = ''; 949 delete $para->[1]{'target_matching'}; 950 DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n"; 951 } else { 952 $dont_ignore = 1; 953 $para->[1]{'target_matching'} = '!'; 954 DEBUG > 2 and print " But the leading ! means that this IS a match!\n"; 955 } 956 } 957 958 $para->[0] = '=for'; # Just what we happen to call these, internally 959 $para->[1]{'~really'} ||= '=begin'; 960 $para->[1]{'~ignore'} = (! $dont_ignore) || 0; 961 $para->[1]{'~resolve'} = $to_resolve || 0; 962 963 DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '', 964 "ignore contents of this region\n"; 965 DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ", 966 ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n"; 967 DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n"; 968 969 push @$curr_open, $para; 970 if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) { 971 DEBUG > 1 and print "Ignoring ignorable =begin\n"; 972 } else { 973 $self->{'content_seen'} ||= 1; 974 $self->_handle_element_start((my $scratch='for'), $para->[1]); 975 } 976 977 return 1; 978} 979 980sub _ponder_end { 981 my ($self,$para,$curr_open,$paras) = @_; 982 my $content = join ' ', splice @$para, 2; 983 $content =~ s/^\s+//s; 984 $content =~ s/\s+$//s; 985 DEBUG and print "Ogling '=end $content' directive\n"; 986 987 unless(length($content)) { 988 $self->whine( 989 $para->[1]{'start_line'}, 990 "'=end' without a target?" . ( 991 ( @$curr_open and $curr_open->[-1][0] eq '=for' ) 992 ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' ) 993 : '' 994 ) 995 ); 996 DEBUG and print "Ignoring targetless =end\n"; 997 return 1; 998 } 999 1000 unless($content =~ m/^\S+$/) { # i.e., unless it's one word 1001 $self->whine( 1002 $para->[1]{'start_line'}, 1003 "'=end $content' is invalid. (Stack: " 1004 . $self->_dump_curr_open() . ')' 1005 ); 1006 DEBUG and print "Ignoring mistargetted =end $content\n"; 1007 return 1; 1008 } 1009 1010 unless(@$curr_open and $curr_open->[-1][0] eq '=for') { 1011 $self->whine( 1012 $para->[1]{'start_line'}, 1013 "=end $content without matching =begin. (Stack: " 1014 . $self->_dump_curr_open() . ')' 1015 ); 1016 DEBUG and print "Ignoring mistargetted =end $content\n"; 1017 return 1; 1018 } 1019 1020 unless($content eq $curr_open->[-1][1]{'target'}) { 1021 $self->whine( 1022 $para->[1]{'start_line'}, 1023 "=end $content doesn't match =begin " 1024 . $curr_open->[-1][1]{'target'} 1025 . ". (Stack: " 1026 . $self->_dump_curr_open() . ')' 1027 ); 1028 DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n"; 1029 return 1; 1030 } 1031 1032 # Else it's okay to close... 1033 if(grep $_->[1]{'~ignore'}, @$curr_open) { 1034 DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n"; 1035 # And that may be because of this to-be-closed =for region, or some 1036 # other one, but it doesn't matter. 1037 } else { 1038 $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'}; 1039 # what's that for? 1040 1041 $self->{'content_seen'} ||= 1; 1042 $self->_handle_element_end( my $scratch = 'for' ); 1043 } 1044 DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n"; 1045 pop @$curr_open; 1046 1047 return 1; 1048} 1049 1050sub _ponder_doc_end { 1051 my ($self,$para,$curr_open,$paras) = @_; 1052 if(@$curr_open) { # Deal with things left open 1053 DEBUG and print "Stack is nonempty at end-document: (", 1054 $self->_dump_curr_open(), ")\n"; 1055 1056 DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n"; 1057 unshift @$paras, $self->_closers_for_all_curr_open; 1058 # Make sure there is exactly one ~end in the parastack, at the end: 1059 @$paras = grep $_->[0] ne '~end', @$paras; 1060 push @$paras, $para, $para; 1061 # We need two -- once for the next cycle where we 1062 # generate errata, and then another to be at the end 1063 # when that loop back around to process the errata. 1064 return 1; 1065 1066 } else { 1067 DEBUG and print "Okay, stack is empty now.\n"; 1068 } 1069 1070 # Try generating errata section, if applicable 1071 unless($self->{'~tried_gen_errata'}) { 1072 $self->{'~tried_gen_errata'} = 1; 1073 my @extras = $self->_gen_errata(); 1074 if(@extras) { 1075 unshift @$paras, @extras; 1076 DEBUG and print "Generated errata... relooping...\n"; 1077 return 1; # I.e., loop around again to process these fake-o paragraphs 1078 } 1079 } 1080 1081 splice @$paras; # Well, that's that for this paragraph buffer. 1082 DEBUG and print "Throwing end-document event.\n"; 1083 1084 $self->_handle_element_end( my $scratch = 'Document' ); 1085 return 1; # Hasta la byebye 1086} 1087 1088sub _ponder_pod { 1089 my ($self,$para,$curr_open,$paras) = @_; 1090 $self->whine( 1091 $para->[1]{'start_line'}, 1092 "=pod directives shouldn't be over one line long! Ignoring all " 1093 . (@$para - 2) . " lines of content" 1094 ) if @$para > 3; 1095 # Content is always ignored. 1096 return; 1097} 1098 1099sub _ponder_over { 1100 my ($self,$para,$curr_open,$paras) = @_; 1101 return 1 unless @$paras; 1102 my $list_type; 1103 1104 if($paras->[0][0] eq '=item') { # most common case 1105 $list_type = $self->_get_initial_item_type($paras->[0]); 1106 1107 } elsif($paras->[0][0] eq '=back') { 1108 # Ignore empty lists. TODO: make this an option? 1109 shift @$paras; 1110 return 1; 1111 1112 } elsif($paras->[0][0] eq '~end') { 1113 $self->whine( 1114 $para->[1]{'start_line'}, 1115 "=over is the last thing in the document?!" 1116 ); 1117 return 1; # But feh, ignore it. 1118 } else { 1119 $list_type = 'block'; 1120 } 1121 $para->[1]{'~type'} = $list_type; 1122 push @$curr_open, $para; 1123 # yes, we reuse the paragraph as a stack item 1124 1125 my $content = join ' ', splice @$para, 2; 1126 my $overness; 1127 if($content =~ m/^\s*$/s) { 1128 $para->[1]{'indent'} = 4; 1129 } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) { 1130 no integer; 1131 $para->[1]{'indent'} = $1; 1132 if($1 == 0) { 1133 $self->whine( 1134 $para->[1]{'start_line'}, 1135 "Can't have a 0 in =over $content" 1136 ); 1137 $para->[1]{'indent'} = 4; 1138 } 1139 } else { 1140 $self->whine( 1141 $para->[1]{'start_line'}, 1142 "=over should be: '=over' or '=over positive_number'" 1143 ); 1144 $para->[1]{'indent'} = 4; 1145 } 1146 DEBUG > 1 and print "=over found of type $list_type\n"; 1147 1148 $self->{'content_seen'} ||= 1; 1149 $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]); 1150 1151 return; 1152} 1153 1154sub _ponder_back { 1155 my ($self,$para,$curr_open,$paras) = @_; 1156 # TODO: fire off </item-number> or </item-bullet> or </item-text> ?? 1157 1158 my $content = join ' ', splice @$para, 2; 1159 if($content =~ m/\S/) { 1160 $self->whine( 1161 $para->[1]{'start_line'}, 1162 "=back doesn't take any parameters, but you said =back $content" 1163 ); 1164 } 1165 1166 if(@$curr_open and $curr_open->[-1][0] eq '=over') { 1167 DEBUG > 1 and print "=back happily closes matching =over\n"; 1168 # Expected case: we're closing the most recently opened thing 1169 #my $over = pop @$curr_open; 1170 $self->{'content_seen'} ||= 1; 1171 $self->_handle_element_end( my $scratch = 1172 'over-' . ( (pop @$curr_open)->[1]{'~type'} ) 1173 ); 1174 } else { 1175 DEBUG > 1 and print "=back found without a matching =over. Stack: (", 1176 join(', ', map $_->[0], @$curr_open), ").\n"; 1177 $self->whine( 1178 $para->[1]{'start_line'}, 1179 '=back without =over' 1180 ); 1181 return 1; # and ignore it 1182 } 1183} 1184 1185sub _ponder_item { 1186 my ($self,$para,$curr_open,$paras) = @_; 1187 my $over; 1188 unless(@$curr_open and ($over = $curr_open->[-1])->[0] eq '=over') { 1189 $self->whine( 1190 $para->[1]{'start_line'}, 1191 "'=item' outside of any '=over'" 1192 ); 1193 unshift @$paras, 1194 ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], 1195 $para 1196 ; 1197 return 1; 1198 } 1199 1200 1201 my $over_type = $over->[1]{'~type'}; 1202 1203 if(!$over_type) { 1204 # Shouldn't happen1 1205 die "Typeless over in stack, starting at line " 1206 . $over->[1]{'start_line'}; 1207 1208 } elsif($over_type eq 'block') { 1209 unless($curr_open->[-1][1]{'~bitched_about'}) { 1210 $curr_open->[-1][1]{'~bitched_about'} = 1; 1211 $self->whine( 1212 $curr_open->[-1][1]{'start_line'}, 1213 "You can't have =items (as at line " 1214 . $para->[1]{'start_line'} 1215 . ") unless the first thing after the =over is an =item" 1216 ); 1217 } 1218 # Just turn it into a paragraph and reconsider it 1219 $para->[0] = '~Para'; 1220 unshift @$paras, $para; 1221 return 1; 1222 1223 } elsif($over_type eq 'text') { 1224 my $item_type = $self->_get_item_type($para); 1225 # That kills the content of the item if it's a number or bullet. 1226 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; 1227 1228 if($item_type eq 'text') { 1229 # Nothing special needs doing for 'text' 1230 } elsif($item_type eq 'number' or $item_type eq 'bullet') { 1231 die "Unknown item type $item_type" 1232 unless $item_type eq 'number' or $item_type eq 'bullet'; 1233 # Undo our clobbering: 1234 push @$para, $para->[1]{'~orig_content'}; 1235 delete $para->[1]{'number'}; 1236 # Only a PROPER item-number element is allowed 1237 # to have a number attribute. 1238 } else { 1239 die "Unhandled item type $item_type"; # should never happen 1240 } 1241 1242 # =item-text thingies don't need any assimilation, it seems. 1243 1244 } elsif($over_type eq 'number') { 1245 my $item_type = $self->_get_item_type($para); 1246 # That kills the content of the item if it's a number or bullet. 1247 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; 1248 1249 my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; 1250 1251 if($item_type eq 'bullet') { 1252 # Hm, it's not numeric. Correct for this. 1253 $para->[1]{'number'} = $expected_value; 1254 $self->whine( 1255 $para->[1]{'start_line'}, 1256 "Expected '=item $expected_value'" 1257 ); 1258 push @$para, $para->[1]{'~orig_content'}; 1259 # restore the bullet, blocking the assimilation of next para 1260 1261 } elsif($item_type eq 'text') { 1262 # Hm, it's not numeric. Correct for this. 1263 $para->[1]{'number'} = $expected_value; 1264 $self->whine( 1265 $para->[1]{'start_line'}, 1266 "Expected '=item $expected_value'" 1267 ); 1268 # Text content will still be there and will block next ~Para 1269 1270 } elsif($item_type ne 'number') { 1271 die "Unknown item type $item_type"; # should never happen 1272 1273 } elsif($expected_value == $para->[1]{'number'}) { 1274 DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n"; 1275 1276 } else { 1277 DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'}, 1278 " instead of the expected value of $expected_value\n"; 1279 $self->whine( 1280 $para->[1]{'start_line'}, 1281 "You have '=item " . $para->[1]{'number'} . 1282 "' instead of the expected '=item $expected_value'" 1283 ); 1284 $para->[1]{'number'} = $expected_value; # correcting!! 1285 } 1286 1287 if(@$para == 2) { 1288 # For the cases where we /didn't/ push to @$para 1289 if($paras->[0][0] eq '~Para') { 1290 DEBUG and print "Assimilating following ~Para content into $over_type item\n"; 1291 push @$para, splice @{shift @$paras},2; 1292 } else { 1293 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; 1294 push @$para, ''; # Just so it's not contentless 1295 } 1296 } 1297 1298 1299 } elsif($over_type eq 'bullet') { 1300 my $item_type = $self->_get_item_type($para); 1301 # That kills the content of the item if it's a number or bullet. 1302 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; 1303 1304 if($item_type eq 'bullet') { 1305 # as expected! 1306 1307 if( $para->[1]{'~_freaky_para_hack'} ) { 1308 DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n"; 1309 push @$para, delete $para->[1]{'~_freaky_para_hack'}; 1310 } 1311 1312 } elsif($item_type eq 'number') { 1313 $self->whine( 1314 $para->[1]{'start_line'}, 1315 "Expected '=item *'" 1316 ); 1317 push @$para, $para->[1]{'~orig_content'}; 1318 # and block assimilation of the next paragraph 1319 delete $para->[1]{'number'}; 1320 # Only a PROPER item-number element is allowed 1321 # to have a number attribute. 1322 } elsif($item_type eq 'text') { 1323 $self->whine( 1324 $para->[1]{'start_line'}, 1325 "Expected '=item *'" 1326 ); 1327 # But doesn't need processing. But it'll block assimilation 1328 # of the next para. 1329 } else { 1330 die "Unhandled item type $item_type"; # should never happen 1331 } 1332 1333 if(@$para == 2) { 1334 # For the cases where we /didn't/ push to @$para 1335 if($paras->[0][0] eq '~Para') { 1336 DEBUG and print "Assimilating following ~Para content into $over_type item\n"; 1337 push @$para, splice @{shift @$paras},2; 1338 } else { 1339 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; 1340 push @$para, ''; # Just so it's not contentless 1341 } 1342 } 1343 1344 } else { 1345 die "Unhandled =over type \"$over_type\"?"; 1346 # Shouldn't happen! 1347 } 1348 $para->[0] .= '-' . $over_type; 1349 1350 return; 1351} 1352 1353sub _ponder_Plain { 1354 my ($self,$para) = @_; 1355 DEBUG and print " giving plain treatment...\n"; 1356 unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' ) 1357 or $para->[1]{'~cooked'} 1358 ) { 1359 push @$para, 1360 @{$self->_make_treelet( 1361 join("\n", splice(@$para, 2)), 1362 $para->[1]{'start_line'} 1363 )}; 1364 } 1365 # Empty paragraphs don't need a treelet for any reason I can see. 1366 # And precooked paragraphs already have a treelet. 1367 return; 1368} 1369 1370sub _ponder_Verbatim { 1371 my ($self,$para) = @_; 1372 DEBUG and print " giving verbatim treatment...\n"; 1373 1374 $para->[1]{'xml:space'} = 'preserve'; 1375 1376 my $indent = $self->strip_verbatim_indent; 1377 if ($indent && ref $indent eq 'CODE') { 1378 my @shifted = (shift @{$para}, shift @{$para}); 1379 $indent = $indent->($para); 1380 unshift @{$para}, @shifted; 1381 } 1382 1383 for(my $i = 2; $i < @$para; $i++) { 1384 foreach my $line ($para->[$i]) { # just for aliasing 1385 # Strip indentation. 1386 $line =~ s/^\E$indent// if $indent 1387 && !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted}); 1388 while( $line =~ 1389 # Sort of adapted from Text::Tabs -- yes, it's hardwired in that 1390 # tabs are at every EIGHTH column. For portability, it has to be 1391 # one setting everywhere, and 8th wins. 1392 s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e 1393 ) {} 1394 1395 # TODO: whinge about (or otherwise treat) unindented or overlong lines 1396 1397 } 1398 } 1399 1400 # Now the VerbatimFormatted hoodoo... 1401 if( $self->{'accept_codes'} and 1402 $self->{'accept_codes'}{'VerbatimFormatted'} 1403 ) { 1404 while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para } 1405 # Kill any number of terminal newlines 1406 $self->_verbatim_format($para); 1407 } elsif ($self->{'codes_in_verbatim'}) { 1408 push @$para, 1409 @{$self->_make_treelet( 1410 join("\n", splice(@$para, 2)), 1411 $para->[1]{'start_line'}, $para->[1]{'xml:space'} 1412 )}; 1413 $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines 1414 } else { 1415 push @$para, join "\n", splice(@$para, 2) if @$para > 3; 1416 $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines 1417 } 1418 return; 1419} 1420 1421sub _ponder_Data { 1422 my ($self,$para) = @_; 1423 DEBUG and print " giving data treatment...\n"; 1424 $para->[1]{'xml:space'} = 'preserve'; 1425 push @$para, join "\n", splice(@$para, 2) if @$para > 3; 1426 return; 1427} 1428 1429 1430 1431 1432########################################################################### 1433 1434sub _traverse_treelet_bit { # for use only by the routine above 1435 my($self, $name) = splice @_,0,2; 1436 1437 my $scratch; 1438 $self->_handle_element_start(($scratch=$name), shift @_); 1439 1440 foreach my $x (@_) { 1441 if(ref($x)) { 1442 &_traverse_treelet_bit($self, @$x); 1443 } else { 1444 $self->_handle_text($x); 1445 } 1446 } 1447 1448 $self->_handle_element_end($scratch=$name); 1449 return; 1450} 1451 1452#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1453 1454sub _closers_for_all_curr_open { 1455 my $self = $_[0]; 1456 my @closers; 1457 foreach my $still_open (@{ $self->{'curr_open'} || return }) { 1458 my @copy = @$still_open; 1459 $copy[1] = {%{ $copy[1] }}; 1460 #$copy[1]{'start_line'} = -1; 1461 if($copy[0] eq '=for') { 1462 $copy[0] = '=end'; 1463 } elsif($copy[0] eq '=over') { 1464 $copy[0] = '=back'; 1465 } else { 1466 die "I don't know how to auto-close an open $copy[0] region"; 1467 } 1468 1469 unless( @copy > 2 ) { 1470 push @copy, $copy[1]{'target'}; 1471 $copy[-1] = '' unless defined $copy[-1]; 1472 # since =over's don't have targets 1473 } 1474 1475 DEBUG and print "Queuing up fake-o event: ", pretty(\@copy), "\n"; 1476 unshift @closers, \@copy; 1477 } 1478 return @closers; 1479} 1480 1481#-------------------------------------------------------------------------- 1482 1483sub _verbatim_format { 1484 my($it, $p) = @_; 1485 1486 my $formatting; 1487 1488 for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines 1489 DEBUG and print "_verbatim_format appends a newline to $i: $p->[$i]\n"; 1490 $p->[$i] .= "\n"; 1491 # Unlike with simple Verbatim blocks, we don't end up just doing 1492 # a join("\n", ...) on the contents, so we have to append a 1493 # newline to ever line, and then nix the last one later. 1494 } 1495 1496 if( DEBUG > 4 ) { 1497 print "<<\n"; 1498 for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines 1499 print "_verbatim_format $i: $p->[$i]"; 1500 } 1501 print ">>\n"; 1502 } 1503 1504 for(my $i = $#$p; $i > 2; $i--) { 1505 # work backwards over the lines, except the first (#2) 1506 1507 #next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s 1508 # and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s; 1509 # look at a formatty line preceding a nonformatty one 1510 DEBUG > 5 and print "Scrutinizing line $i: $$p[$i]\n"; 1511 if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) { 1512 DEBUG > 5 and print " It's a formatty line. ", 1513 "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n"; 1514 1515 if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) { 1516 DEBUG > 5 and print " Previous line is formatty! Skipping this one.\n"; 1517 next; 1518 } else { 1519 DEBUG > 5 and print " Previous line is non-formatty! Yay!\n"; 1520 } 1521 } else { 1522 DEBUG > 5 and print " It's not a formatty line. Ignoring\n"; 1523 next; 1524 } 1525 1526 # A formatty line has to have #: in the first two columns, and uses 1527 # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic. 1528 # Example: 1529 # What do you want? i like pie. [or whatever] 1530 # #:^^^^^^^^^^^^^^^^^ ///////////// 1531 1532 1533 DEBUG > 4 and print "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n"; 1534 1535 $formatting = ' ' . $1; 1536 $formatting =~ s/\s+$//s; # nix trailing whitespace 1537 unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op 1538 splice @$p,$i,1; # remove this line 1539 $i--; # don't consider next line 1540 next; 1541 } 1542 1543 if( length($formatting) >= length($p->[$i-1]) ) { 1544 $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' '; 1545 } else { 1546 $formatting .= ' ' x (length($p->[$i-1]) - length($formatting)); 1547 } 1548 # Make $formatting and the previous line be exactly the same length, 1549 # with $formatting having a " " as the last character. 1550 1551 DEBUG > 4 and print "Formatting <$formatting> on <", $p->[$i-1], ">\n"; 1552 1553 1554 my @new_line; 1555 while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) { 1556 #print "Format matches $1\n"; 1557 1558 if($2) { 1559 #print "SKIPPING <$2>\n"; 1560 push @new_line, 1561 substr($p->[$i-1], pos($formatting)-length($1), length($1)); 1562 } else { 1563 #print "SNARING $+\n"; 1564 push @new_line, [ 1565 ( 1566 $3 ? 'VerbatimB' : 1567 $4 ? 'VerbatimI' : 1568 $5 ? 'VerbatimBI' : die("Should never get called") 1569 ), {}, 1570 substr($p->[$i-1], pos($formatting)-length($1), length($1)) 1571 ]; 1572 #print "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n"; 1573 } 1574 } 1575 my @nixed = 1576 splice @$p, $i-1, 2, @new_line; # replace myself and the next line 1577 DEBUG > 10 and print "Nixed count: ", scalar(@nixed), "\n"; 1578 1579 DEBUG > 6 and print "New version of the above line is these tokens (", 1580 scalar(@new_line), "):", 1581 map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n"; 1582 $i--; # So the next line we scrutinize is the line before the one 1583 # that we just went and formatted 1584 } 1585 1586 $p->[0] = 'VerbatimFormatted'; 1587 1588 # Collapse adjacent text nodes, just for kicks. 1589 for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last 1590 if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) { 1591 DEBUG > 5 and print "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n"; 1592 $p->[$i] .= splice @$p, $i+1, 1; # merge 1593 --$i; # and back up 1594 } 1595 } 1596 1597 # Now look for the last text token, and remove the terminal newline 1598 for( my $i = $#$p; $i >= 2; $i-- ) { 1599 # work backwards over the tokens, even the first 1600 if( !ref($p->[$i]) ) { 1601 if($p->[$i] =~ s/\n$//s) { 1602 DEBUG > 5 and print "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n"; 1603 } else { 1604 DEBUG > 5 and print 1605 "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n"; 1606 } 1607 last; # we only want the next one 1608 } 1609 } 1610 1611 return; 1612} 1613 1614 1615#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1616 1617 1618sub _treelet_from_formatting_codes { 1619 # Given a paragraph, returns a treelet. Full of scary tokenizing code. 1620 # Like [ '~Top', {'start_line' => $start_line}, 1621 # "I like ", 1622 # [ 'B', {}, "pie" ], 1623 # "!" 1624 # ] 1625 1626 my($self, $para, $start_line, $preserve_space) = @_; 1627 1628 my $treelet = ['~Top', {'start_line' => $start_line},]; 1629 1630 unless ($preserve_space || $self->{'preserve_whitespace'}) { 1631 $para =~ s/\s+/ /g; # collapse and trim all whitespace first. 1632 $para =~ s/ $//; 1633 $para =~ s/^ //; 1634 } 1635 1636 # Only apparent problem the above code is that N<< >> turns into 1637 # N<< >>. But then, word wrapping does that too! So don't do that! 1638 1639 my @stack; 1640 my @lineage = ($treelet); 1641 1642 DEBUG > 4 and print "Paragraph:\n$para\n\n"; 1643 1644 # Here begins our frightening tokenizer RE. The following regex matches 1645 # text in four main parts: 1646 # 1647 # * Start-codes. The first alternative matches C< or C<<, the latter 1648 # followed by some whitespace. $1 will hold the entire start code 1649 # (including any space following a multiple-angle-bracket delimiter), 1650 # and $2 will hold only the additional brackets past the first in a 1651 # multiple-bracket delimiter. length($2) + 1 will be the number of 1652 # closing brackets we have to find. 1653 # 1654 # * Closing brackets. Match some amount of whitespace followed by 1655 # multiple close brackets. The logic to see if this closes anything 1656 # is down below. Note that in order to parse C<< >> correctly, we 1657 # have to use look-behind (?<=\s\s), since the match of the starting 1658 # code will have consumed the whitespace. 1659 # 1660 # * A single closing bracket, to close a simple code like C<>. 1661 # 1662 # * Something that isn't a start or end code. We have to be careful 1663 # about accepting whitespace, since perlpodspec says that any whitespace 1664 # before a multiple-bracket closing delimiter should be ignored. 1665 # 1666 while($para =~ 1667 m/\G 1668 (?: 1669 # Match starting codes, including the whitespace following a 1670 # multiple-delimiter start code. $1 gets the whole start code and 1671 # $2 gets all but one of the <s in the multiple-bracket case. 1672 ([A-Z]<(?:(<+)\s+)?) 1673 | 1674 # Match multiple-bracket end codes. $3 gets the whitespace that 1675 # should be discarded before an end bracket but kept in other cases 1676 # and $4 gets the end brackets themselves. 1677 (\s+|(?<=\s\s))(>{2,}) 1678 | 1679 (\s?>) # $5: simple end-codes 1680 | 1681 ( # $6: stuff containing no start-codes or end-codes 1682 (?: 1683 [^A-Z\s>] 1684 | 1685 (?: 1686 [A-Z](?!<) 1687 ) 1688 | 1689 # whitespace is ok, but we don't want to eat the whitespace before 1690 # a multiple-bracket end code. 1691 # NOTE: we may still have problems with e.g. S<< >> 1692 (?: 1693 \s(?!\s*>{2,}) 1694 ) 1695 )+ 1696 ) 1697 ) 1698 /xgo 1699 ) { 1700 DEBUG > 4 and print "\nParagraphic tokenstack = (@stack)\n"; 1701 if(defined $1) { 1702 if(defined $2) { 1703 DEBUG > 3 and print "Found complex start-text code \"$1\"\n"; 1704 push @stack, length($2) + 1; 1705 # length of the necessary complex end-code string 1706 } else { 1707 DEBUG > 3 and print "Found simple start-text code \"$1\"\n"; 1708 push @stack, 0; # signal that we're looking for simple 1709 } 1710 push @lineage, [ substr($1,0,1), {}, ]; # new node object 1711 push @{ $lineage[-2] }, $lineage[-1]; 1712 1713 } elsif(defined $4) { 1714 DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n"; 1715 # This is where it gets messy... 1716 if(! @stack) { 1717 # We saw " >>>>" but needed nothing. This is ALL just stuff then. 1718 DEBUG > 4 and print " But it's really just stuff.\n"; 1719 push @{ $lineage[-1] }, $3, $4; 1720 next; 1721 } elsif(!$stack[-1]) { 1722 # We saw " >>>>" but needed only ">". Back pos up. 1723 DEBUG > 4 and print " And that's more than we needed to close simple.\n"; 1724 push @{ $lineage[-1] }, $3; # That was a for-real space, too. 1725 pos($para) = pos($para) - length($4) + 1; 1726 } elsif($stack[-1] == length($4)) { 1727 # We found " >>>>", and it was exactly what we needed. Commonest case. 1728 DEBUG > 4 and print " And that's exactly what we needed to close complex.\n"; 1729 } elsif($stack[-1] < length($4)) { 1730 # We saw " >>>>" but needed only " >>". Back pos up. 1731 DEBUG > 4 and print " And that's more than we needed to close complex.\n"; 1732 pos($para) = pos($para) - length($4) + $stack[-1]; 1733 } else { 1734 # We saw " >>>>" but needed " >>>>>>". So this is all just stuff! 1735 DEBUG > 4 and print " But it's really just stuff, because we needed more.\n"; 1736 push @{ $lineage[-1] }, $3, $4; 1737 next; 1738 } 1739 #print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n"; 1740 1741 push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; 1742 # Keep the element from being childless 1743 1744 pop @stack; 1745 pop @lineage; 1746 1747 } elsif(defined $5) { 1748 DEBUG > 3 and print "Found apparent simple end-text code \"$5\"\n"; 1749 1750 if(@stack and ! $stack[-1]) { 1751 # We're indeed expecting a simple end-code 1752 DEBUG > 4 and print " It's indeed an end-code.\n"; 1753 1754 if(length($5) == 2) { # There was a space there: " >" 1755 push @{ $lineage[-1] }, ' '; 1756 } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element 1757 push @{ $lineage[-1] }, ''; # keep it from being really childless 1758 } 1759 1760 pop @stack; 1761 pop @lineage; 1762 } else { 1763 DEBUG > 4 and print " It's just stuff.\n"; 1764 push @{ $lineage[-1] }, $5; 1765 } 1766 1767 } elsif(defined $6) { 1768 DEBUG > 3 and print "Found stuff \"$6\"\n"; 1769 push @{ $lineage[-1] }, $6; 1770 1771 } else { 1772 # should never ever ever ever happen 1773 DEBUG and print "AYYAYAAAAA at line ", __LINE__, "\n"; 1774 die "SPORK 512512!"; 1775 } 1776 } 1777 1778 if(@stack) { # Uhoh, some sequences weren't closed. 1779 my $x= "..."; 1780 while(@stack) { 1781 push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; 1782 # Hmmmmm! 1783 1784 my $code = (pop @lineage)->[0]; 1785 my $ender_length = pop @stack; 1786 if($ender_length) { 1787 --$ender_length; 1788 $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length); 1789 } else { 1790 $x = $code . "<$x>"; 1791 } 1792 } 1793 DEBUG > 1 and print "Unterminated $x sequence\n"; 1794 $self->whine($start_line, 1795 "Unterminated $x sequence", 1796 ); 1797 } 1798 1799 return $treelet; 1800} 1801 1802#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1803 1804sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol) 1805 return stringify_lol($_[1]); 1806} 1807 1808sub stringify_lol { # function: stringify_lol($lol) 1809 my $string_form = ''; 1810 _stringify_lol( $_[0] => \$string_form ); 1811 return $string_form; 1812} 1813 1814sub _stringify_lol { # the real recursor 1815 my($lol, $to) = @_; 1816 for(my $i = 2; $i < @$lol; ++$i) { 1817 if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) { 1818 _stringify_lol( $lol->[$i], $to); # recurse! 1819 } else { 1820 $$to .= $lol->[$i]; 1821 } 1822 } 1823 return; 1824} 1825 1826#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1827 1828sub _dump_curr_open { # return a string representation of the stack 1829 my $curr_open = $_[0]{'curr_open'}; 1830 1831 return '[empty]' unless @$curr_open; 1832 return join '; ', 1833 map {; 1834 ($_->[0] eq '=for') 1835 ? ( ($_->[1]{'~really'} || '=over') 1836 . ' ' . $_->[1]{'target'}) 1837 : $_->[0] 1838 } 1839 @$curr_open 1840 ; 1841} 1842 1843########################################################################### 1844my %pretty_form = ( 1845 "\a" => '\a', # ding! 1846 "\b" => '\b', # BS 1847 "\e" => '\e', # ESC 1848 "\f" => '\f', # FF 1849 "\t" => '\t', # tab 1850 "\cm" => '\cm', 1851 "\cj" => '\cj', 1852 "\n" => '\n', # probably overrides one of either \cm or \cj 1853 '"' => '\"', 1854 '\\' => '\\\\', 1855 '$' => '\\$', 1856 '@' => '\\@', 1857 '%' => '\\%', 1858 '#' => '\\#', 1859); 1860 1861sub pretty { # adopted from Class::Classless 1862 # Not the most brilliant routine, but passable. 1863 # Don't give it a cyclic data structure! 1864 my @stuff = @_; # copy 1865 my $x; 1866 my $out = 1867 # join ",\n" . 1868 join ", ", 1869 map {; 1870 if(!defined($_)) { 1871 "undef"; 1872 } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') { 1873 $x = "[ " . pretty(@$_) . " ]" ; 1874 $x; 1875 } elsif(ref($_) eq 'SCALAR') { 1876 $x = "\\" . pretty($$_) ; 1877 $x; 1878 } elsif(ref($_) eq 'HASH') { 1879 my $hr = $_; 1880 $x = "{" . join(", ", 1881 map(pretty($_) . '=>' . pretty($hr->{$_}), 1882 sort keys %$hr ) ) . "}" ; 1883 $x; 1884 } elsif(!length($_)) { q{''} # empty string 1885 } elsif( 1886 $_ eq '0' # very common case 1887 or( 1888 m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s 1889 and $_ ne '-0' # the strange case that that RE lets thru 1890 ) 1891 ) { $_; 1892 } else { 1893 if( chr(65) eq 'A' ) { 1894 s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])> 1895 #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; 1896 <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; 1897 } else { 1898 # We're in some crazy non-ASCII world! 1899 s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])> 1900 #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; 1901 <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; 1902 } 1903 qq{"$_"}; 1904 } 1905 } @stuff; 1906 # $out =~ s/\n */ /g if length($out) < 75; 1907 return $out; 1908} 1909 1910#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1911 1912# A rather unsubtle method of blowing away all the state information 1913# from a parser object so it can be reused. Provided as a utility for 1914# backward compatibilty in Pod::Man, etc. but not recommended for 1915# general use. 1916 1917sub reinit { 1918 my $self = shift; 1919 foreach (qw(source_dead source_filename doc_has_started 1920start_of_pod_block content_seen last_was_blank paras curr_open 1921line_count pod_para_count in_pod ~tried_gen_errata errata errors_seen 1922Title)) { 1923 1924 delete $self->{$_}; 1925 } 1926} 1927 1928#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 19291; 1930 1931