1package Pod::Simple::BlackBox; 2# 3# "What's in the box?" "Pain." 4# 5########################################################################### 6# 7# This is where all the scary things happen: parsing lines into 8# paragraphs; and then into directives, verbatims, and then also 9# turning formatting sequences into treelets. 10# 11# Are you really sure you want to read this code? 12# 13#----------------------------------------------------------------------------- 14# 15# The basic work of this module Pod::Simple::BlackBox is doing the dirty work 16# of parsing Pod into treelets (generally one per non-verbatim paragraph), and 17# to call the proper callbacks on the treelets. 18# 19# Every node in a treelet is a ['name', {attrhash}, ...children...] 20 21use integer; # vroom! 22use strict; 23use Carp (); 24use vars qw($VERSION ); 25$VERSION = '3.40'; 26#use constant DEBUG => 7; 27 28sub my_qr ($$) { 29 30 # $1 is a pattern to compile and return. Older perls compile any 31 # syntactically valid property, even if it isn't legal. To cope with 32 # this, return an empty string unless the compiled pattern also 33 # successfully matches $2, which the caller furnishes. 34 35 my ($input_re, $should_match) = @_; 36 # XXX could have a third parameter $shouldnt_match for extra safety 37 38 my $use_utf8 = ($] le 5.006002) ? 'use utf8;' : ""; 39 40 my $re = eval "no warnings; $use_utf8 qr/$input_re/"; 41 #print STDERR __LINE__, ": $input_re: $@\n" if $@; 42 return "" if $@; 43 44 my $matches = eval "no warnings; $use_utf8 '$should_match' =~ /$re/"; 45 #print STDERR __LINE__, ": $input_re: $@\n" if $@; 46 return "" if $@; 47 48 #print STDERR __LINE__, ": SUCCESS: $re\n" if $matches; 49 return $re if $matches; 50 51 #print STDERR __LINE__, ": $re: didn't match\n"; 52 return ""; 53} 54 55BEGIN { 56 require Pod::Simple; 57 *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG 58} 59 60# Matches a character iff the character will have a different meaning 61# if we choose CP1252 vs UTF-8 if there is no =encoding line. 62# This is broken for early Perls on non-ASCII platforms. 63my $non_ascii_re = my_qr('[[:^ascii:]]', "\xB6"); 64$non_ascii_re = qr/[\x80-\xFF]/ unless $non_ascii_re; 65 66# Use patterns understandable by Perl 5.6, if possible 67my $cs_re = my_qr('\p{IsCs}', "\x{D800}"); 68my $cn_re = my_qr('\p{IsCn}', "\x{09E4}"); # <reserved> code point unlikely 69 # to get assigned 70my $rare_blocks_re = my_qr('[\p{InIPAExtensions}\p{InSpacingModifierLetters}]', 71 "\x{250}"); 72$rare_blocks_re = my_qr('[\x{0250}-\x{02FF}]', "\x{250}") unless $rare_blocks_re; 73 74my $script_run_re = eval 'no warnings "experimental::script_run"; 75 qr/(*script_run: ^ .* $ )/x'; 76my $latin_re = my_qr('[\p{IsLatin}\p{IsInherited}\p{IsCommon}]', "\x{100}"); 77unless ($latin_re) { 78 # This was machine generated to be the ranges of the union of the above 79 # three properties, with things that were undefined by Unicode 4.1 filling 80 # gaps. That is the version in use when Perl advanced enough to 81 # successfully compile and execute the above pattern. 82 $latin_re = my_qr('[\x00-\x{02E9}\x{02EC}-\x{0374}\x{037E}\x{0385}\x{0387}\x{0485}\x{0486}\x{0589}\x{060C}\x{061B}\x{061F}\x{0640}\x{064B}-\x{0655}\x{0670}\x{06DD}\x{0951}-\x{0954}\x{0964}\x{0965}\x{0E3F}\x{10FB}\x{16EB}-\x{16ED}\x{1735}\x{1736}\x{1802}\x{1803}\x{1805}\x{1D00}-\x{1D25}\x{1D2C}-\x{1D5C}\x{1D62}-\x{1D65}\x{1D6B}-\x{1D77}\x{1D79}-\x{1DBE}\x{1DC0}-\x{1EF9}\x{2000}-\x{2125}\x{2127}-\x{27FF}\x{2900}-\x{2B13}\x{2E00}-\x{2E1D}\x{2FF0}-\x{3004}\x{3006}\x{3008}-\x{3020}\x{302A}-\x{302D}\x{3030}-\x{3037}\x{303C}-\x{303F}\x{3099}-\x{309C}\x{30A0}\x{30FB}\x{30FC}\x{3190}-\x{319F}\x{31C0}-\x{31CF}\x{3220}-\x{325F}\x{327F}-\x{32CF}\x{3358}-\x{33FF}\x{4DC0}-\x{4DFF}\x{A700}-\x{A716}\x{FB00}-\x{FB06}\x{FD3E}\x{FD3F}\x{FE00}-\x{FE6B}\x{FEFF}-\x{FF65}\x{FF70}\x{FF9E}\x{FF9F}\x{FFE0}-\x{FFFD}\x{10100}-\x{1013F}\x{1D000}-\x{1D1DD}\x{1D300}-\x{1D7FF}]', "\x{100}"); 83} 84 85my $every_char_is_latin_re = my_qr("^(?:$latin_re)*\\z", "A"); 86 87# Latin script code points not in the first release of Unicode 88my $later_latin_re = my_qr('[^\P{IsLatin}\p{IsAge=1.1}]', "\x{1F6}"); 89 90# If this perl doesn't have the Deprecated property, there's only one code 91# point in it that we need be concerned with. 92my $deprecated_re = my_qr('\p{IsDeprecated}', "\x{149}"); 93$deprecated_re = qr/\x{149}/ unless $deprecated_re; 94 95my $utf8_bom; 96if (($] ge 5.007_003)) { 97 $utf8_bom = "\x{FEFF}"; 98 utf8::encode($utf8_bom); 99} else { 100 $utf8_bom = "\xEF\xBB\xBF"; # No EBCDIC BOM detection for early Perls. 101} 102 103# This is used so that the 'content_seen' method doesn't return true on a 104# file that just happens to have a line that matches /^=[a-zA-z]/. Only if 105# there is a valid =foo line will we return that content was seen. 106my $seen_legal_directive = 0; 107 108#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 109 110sub parse_line { shift->parse_lines(@_) } # alias 111 112# - - - Turn back now! Run away! - - - 113 114sub parse_lines { # Usage: $parser->parse_lines(@lines) 115 # an undef means end-of-stream 116 my $self = shift; 117 118 my $code_handler = $self->{'code_handler'}; 119 my $cut_handler = $self->{'cut_handler'}; 120 my $wl_handler = $self->{'whiteline_handler'}; 121 $self->{'line_count'} ||= 0; 122 123 my $scratch; 124 125 DEBUG > 4 and 126 print STDERR "# Parsing starting at line ", $self->{'line_count'}, ".\n"; 127 128 DEBUG > 5 and 129 print STDERR "# About to parse lines: ", 130 join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n"; 131 132 my $paras = ($self->{'paras'} ||= []); 133 # paragraph buffer. Because we need to defer processing of =over 134 # directives and verbatim paragraphs. We call _ponder_paragraph_buffer 135 # to process this. 136 137 $self->{'pod_para_count'} ||= 0; 138 139 # An attempt to match the pod portions of a line. This is not fool proof, 140 # but is good enough to serve as part of the heuristic for guessing the pod 141 # encoding if not specified. 142 my $format_codes = join "", '[', grep { / ^ [A-Za-z] $/x } 143 keys %{$self->{accept_codes}}; 144 $format_codes .= ']'; 145 my $pod_chars_re = qr/ ^ = [A-Za-z]+ | $format_codes < /x; 146 147 my $line; 148 foreach my $source_line (@_) { 149 if( $self->{'source_dead'} ) { 150 DEBUG > 4 and print STDERR "# Source is dead.\n"; 151 last; 152 } 153 154 unless( defined $source_line ) { 155 DEBUG > 4 and print STDERR "# Undef-line seen.\n"; 156 157 push @$paras, ['~end', {'start_line' => $self->{'line_count'}}]; 158 push @$paras, $paras->[-1], $paras->[-1]; 159 # So that it definitely fills the buffer. 160 $self->{'source_dead'} = 1; 161 $self->_ponder_paragraph_buffer; 162 next; 163 } 164 165 166 if( $self->{'line_count'}++ ) { 167 ($line = $source_line) =~ tr/\n\r//d; 168 # If we don't have two vars, we'll end up with that there 169 # tr/// modding the (potentially read-only) original source line! 170 171 } else { 172 DEBUG > 2 and print STDERR "First line: [$source_line]\n"; 173 174 if( ($line = $source_line) =~ s/^$utf8_bom//s ) { 175 DEBUG and print STDERR "UTF-8 BOM seen. Faking a '=encoding utf8'.\n"; 176 $self->_handle_encoding_line( "=encoding utf8" ); 177 delete $self->{'_processed_encoding'}; 178 $line =~ tr/\n\r//d; 179 180 } elsif( $line =~ s/^\xFE\xFF//s ) { 181 DEBUG and print STDERR "Big-endian UTF-16 BOM seen. Aborting parsing.\n"; 182 $self->scream( 183 $self->{'line_count'}, 184 "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." 185 ); 186 splice @_; 187 push @_, undef; 188 next; 189 190 # TODO: implement somehow? 191 192 } elsif( $line =~ s/^\xFF\xFE//s ) { 193 DEBUG and print STDERR "Little-endian UTF-16 BOM seen. Aborting parsing.\n"; 194 $self->scream( 195 $self->{'line_count'}, 196 "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." 197 ); 198 splice @_; 199 push @_, undef; 200 next; 201 202 # TODO: implement somehow? 203 204 } else { 205 DEBUG > 2 and print STDERR "First line is BOM-less.\n"; 206 ($line = $source_line) =~ tr/\n\r//d; 207 } 208 } 209 210 if(!$self->{'parse_characters'} && !$self->{'encoding'} 211 && ($self->{'in_pod'} || $line =~ /^=/s) 212 && $line =~ /$non_ascii_re/ 213 ) { 214 215 my $encoding; 216 217 # No =encoding line, and we are at the first pod line in the input that 218 # contains a non-ascii byte, that is, one whose meaning varies depending 219 # on whether the file is encoded in UTF-8 or CP1252, which are the two 220 # possibilities permitted by the pod spec. (ASCII is assumed if the 221 # file only contains ASCII bytes.) In order to process this line, we 222 # need to figure out what encoding we will use for the file. 223 # 224 # Strictly speaking ISO 8859-1 (Latin 1) refers to the code points 225 # 160-255, but it is used here, as it often colloquially is, to refer to 226 # the complete set of code points 0-255, including ASCII (0-127), the C1 227 # controls (128-159), and strict Latin 1 (160-255). 228 # 229 # CP1252 is effectively a superset of Latin 1, because it differs only 230 # from colloquial 8859-1 in the C1 controls, which are very unlikely to 231 # actually be present in 8859-1 files, so can be used for other purposes 232 # without conflict. CP 1252 uses most of them for graphic characters. 233 # 234 # Note that all ASCII-range bytes represent their corresponding code 235 # points in both CP1252 and UTF-8. In ASCII platform UTF-8, all other 236 # code points require multiple (non-ASCII) bytes to represent. (A 237 # separate paragraph for EBCDIC is below.) The multi-byte 238 # representation is quite structured. If we find an isolated byte that 239 # would require multiple bytes to represent in UTF-8, we know that the 240 # encoding is not UTF-8. If we find a sequence of bytes that violates 241 # the UTF-8 structure, we also can presume the encoding isn't UTF-8, and 242 # hence must be 1252. 243 # 244 # But there are ambiguous cases where we could guess wrong. If so, the 245 # user will end up having to supply an =encoding line. We use all 246 # readily available information to improve our chances of guessing 247 # right. The odds of something not being UTF-8, but still passing a 248 # UTF-8 validity test go down very rapidly with increasing length of the 249 # sequence. Therefore we look at all non-ascii sequences on the line. 250 # If any of the sequences can't be UTF-8, we quit there and choose 251 # CP1252. If all could be UTF-8, we see if any of the code points 252 # represented are unlikely to be in pod. If so, we guess CP1252. If 253 # not, we check if the line is all in the same script; if not guess 254 # CP1252; otherwise UTF-8. For perls that don't have convenient script 255 # run testing, see if there is both Latin and non-Latin. If so, CP1252, 256 # otherwise UTF-8. 257 # 258 # On EBCDIC platforms, the situation is somewhat different. In 259 # UTF-EBCDIC, not only do ASCII-range bytes represent their code points, 260 # but so do the bytes that are for the C1 controls. Recall that these 261 # correspond to the unused portion of 8859-1 that 1252 mostly takes 262 # over. That means that there are fewer code points that are 263 # represented by multi-bytes. But, note that the these controls are 264 # very unlikely to be in pod text. So if we encounter one of them, it 265 # means that it is quite likely CP1252 and not UTF-8. The net result is 266 # the same code below is used for both platforms. 267 # 268 # XXX probably if the line has E<foo> that evaluates to illegal CP1252, 269 # then it is UTF-8. But we haven't processed E<> yet. 270 271 goto set_1252 if $] lt 5.006_000; # No UTF-8 on very early perls 272 273 my $copy; 274 275 no warnings 'utf8'; 276 277 if ($] ge 5.007_003) { 278 $copy = $line; 279 280 # On perls that have this function, we can use it to easily see if the 281 # sequence is valid UTF-8 or not; if valid it turns on the UTF-8 flag 282 # needed below for script run detection 283 goto set_1252 if ! utf8::decode($copy); 284 } 285 elsif (ord("A") != 65) { # Early EBCDIC, assume UTF-8. What's a windows 286 # code page doing here anyway? 287 goto set_utf8; 288 } 289 else { # ASCII, no decode(): do it ourselves using the fundamental 290 # characteristics of UTF-8 291 use if $] le 5.006002, 'utf8'; 292 293 my $char_ord; 294 my $needed; # How many continuation bytes to gobble up 295 296 # Initialize the translated line with a dummy character that will be 297 # deleted after everything else is done. This dummy makes sure that 298 # $copy will be in UTF-8. Doing it now avoids the bugs in early perls 299 # with upgrading in the middle 300 $copy = chr(0x100); 301 302 # Parse through the line 303 for (my $i = 0; $i < length $line; $i++) { 304 my $byte = substr($line, $i, 1); 305 306 # ASCII bytes are trivially dealt with 307 if ($byte !~ $non_ascii_re) { 308 $copy .= $byte; 309 next; 310 } 311 312 my $b_ord = ord $byte; 313 314 # Now figure out what this code point would be if the input is 315 # actually in UTF-8. If, in the process, we discover that it isn't 316 # well-formed UTF-8, we guess CP1252. 317 # 318 # Start the process. If it is UTF-8, we are at the first, start 319 # byte, of a multi-byte sequence. We look at this byte to figure 320 # out how many continuation bytes are needed, and to initialize the 321 # code point accumulator with the data from this byte. 322 # 323 # Normally the minimum continuation byte is 0x80, but in certain 324 # instances the minimum is a higher number. So the code below 325 # overrides this for those instances. 326 my $min_cont = 0x80; 327 328 if ($b_ord < 0xC2) { # A start byte < C2 is malformed 329 goto set_1252; 330 } 331 elsif ($b_ord <= 0xDF) { 332 $needed = 1; 333 $char_ord = $b_ord & 0x1F; 334 } 335 elsif ($b_ord <= 0xEF) { 336 $min_cont = 0xA0 if $b_ord == 0xE0; 337 $needed = 2; 338 $char_ord = $b_ord & (0x1F >> 1); 339 } 340 elsif ($b_ord <= 0xF4) { 341 $min_cont = 0x90 if $b_ord == 0xF0; 342 $needed = 3; 343 $char_ord = $b_ord & (0x1F >> 2); 344 } 345 else { # F4 is the highest start byte for legal Unicode; higher is 346 # unlikely to be in pod. 347 goto set_1252; 348 } 349 350 # ? not enough continuation bytes available 351 goto set_1252 if $i + $needed >= length $line; 352 353 # Accumulate the ordinal of the character from the remaining 354 # (continuation) bytes. 355 while ($needed-- > 0) { 356 my $cont = substr($line, ++$i, 1); 357 $b_ord = ord $cont; 358 goto set_1252 if $b_ord < $min_cont || $b_ord > 0xBF; 359 360 # In all cases, any next continuation bytes all have the same 361 # minimum legal value 362 $min_cont = 0x80; 363 364 # Accumulate this byte's contribution to the code point 365 $char_ord <<= 6; 366 $char_ord |= ($b_ord & 0x3F); 367 } 368 369 # Here, the sequence that formed this code point was valid UTF-8, 370 # so add the completed character to the output 371 $copy .= chr $char_ord; 372 } # End of loop through line 373 374 # Delete the dummy first character 375 $copy = substr($copy, 1); 376 } 377 378 # Here, $copy is legal UTF-8. 379 380 # If it can't be legal CP1252, no need to look further. (These bytes 381 # aren't valid in CP1252.) This test could have been placed higher in 382 # the code, but it seemed wrong to set the encoding to UTF-8 without 383 # making sure that the very first instance is well-formed. But what if 384 # it isn't legal CP1252 either? We have to choose one or the other, and 385 # It seems safer to favor the single-byte encoding over the multi-byte. 386 goto set_utf8 if ord("A") == 65 && $line =~ /[\x81\x8D\x8F\x90\x9D]/; 387 388 # The C1 controls are not likely to appear in pod 389 goto set_1252 if ord("A") == 65 && $copy =~ /[\x80-\x9F]/; 390 391 # Nor are surrogates nor unassigned, nor deprecated. 392 DEBUG > 8 and print STDERR __LINE__, ": $copy: surrogate\n" if $copy =~ $cs_re; 393 goto set_1252 if $cs_re && $copy =~ $cs_re; 394 DEBUG > 8 and print STDERR __LINE__, ": $copy: unassigned\n" if $cn_re && $copy =~ $cn_re; 395 goto set_1252 if $cn_re && $copy =~ $cn_re; 396 DEBUG > 8 and print STDERR __LINE__, ": $copy: deprecated\n" if $copy =~ $deprecated_re; 397 goto set_1252 if $copy =~ $deprecated_re; 398 399 # Nor are rare code points. But this is hard to determine. khw 400 # believes that IPA characters and the modifier letters are unlikely to 401 # be in pod (and certainly very unlikely to be the in the first line in 402 # the pod containing non-ASCII) 403 DEBUG > 8 and print STDERR __LINE__, ": $copy: rare\n" if $copy =~ $rare_blocks_re; 404 goto set_1252 if $rare_blocks_re && $copy =~ $rare_blocks_re; 405 406 # The first Unicode version included essentially every Latin character 407 # in modern usage. So, a Latin character not in the first release will 408 # unlikely be in pod. 409 DEBUG > 8 and print STDERR __LINE__, ": $copy: later_latin\n" if $later_latin_re && $copy =~ $later_latin_re; 410 goto set_1252 if $later_latin_re && $copy =~ $later_latin_re; 411 412 # On perls that handle script runs, if the UTF-8 interpretation yields 413 # a single script, we guess UTF-8, otherwise just having a mixture of 414 # scripts is suspicious, so guess CP1252. We first strip off, as best 415 # we can, the ASCII characters that look like they are pod directives, 416 # as these would always show as mixed with non-Latin text. 417 $copy =~ s/$pod_chars_re//g; 418 419 if ($script_run_re) { 420 goto set_utf8 if $copy =~ $script_run_re; 421 DEBUG > 8 and print STDERR __LINE__, ": not script run\n"; 422 goto set_1252; 423 } 424 425 # Even without script runs, but on recent enough perls and Unicodes, we 426 # can check if there is a mixture of both Latin and non-Latin. Again, 427 # having a mixture of scripts is suspicious, so assume CP1252 428 429 # If it's all non-Latin, there is no CP1252, as that is Latin 430 # characters and punct, etc. 431 DEBUG > 8 and print STDERR __LINE__, ": $copy: not latin\n" if $copy !~ $latin_re; 432 goto set_utf8 if $copy !~ $latin_re; 433 434 DEBUG > 8 and print STDERR __LINE__, ": $copy: all latin\n" if $copy =~ $every_char_is_latin_re; 435 goto set_utf8 if $copy =~ $every_char_is_latin_re; 436 437 DEBUG > 8 and print STDERR __LINE__, ": $copy: mixed\n"; 438 439 set_1252: 440 DEBUG > 9 and print STDERR __LINE__, ": $copy: is 1252\n"; 441 $encoding = 'CP1252'; 442 goto done_set; 443 444 set_utf8: 445 DEBUG > 9 and print STDERR __LINE__, ": $copy: is UTF-8\n"; 446 $encoding = 'UTF-8'; 447 448 done_set: 449 $self->_handle_encoding_line( "=encoding $encoding" ); 450 delete $self->{'_processed_encoding'}; 451 $self->{'_transcoder'} && $self->{'_transcoder'}->($line); 452 453 my ($word) = $line =~ /(\S*$non_ascii_re\S*)/; 454 455 $self->whine( 456 $self->{'line_count'}, 457 "Non-ASCII character seen before =encoding in '$word'. Assuming $encoding" 458 ); 459 } 460 461 DEBUG > 5 and print STDERR "# Parsing line: [$line]\n"; 462 463 if(!$self->{'in_pod'}) { 464 if($line =~ m/^=([a-zA-Z][a-zA-Z0-9]*)(?:\s|$)/s) { 465 if($1 eq 'cut') { 466 $self->scream( 467 $self->{'line_count'}, 468 "=cut found outside a pod block. Skipping to next block." 469 ); 470 471 ## Before there were errata sections in the world, it was 472 ## least-pessimal to abort processing the file. But now we can 473 ## just barrel on thru (but still not start a pod block). 474 #splice @_; 475 #push @_, undef; 476 477 next; 478 } else { 479 $self->{'in_pod'} = $self->{'start_of_pod_block'} 480 = $self->{'last_was_blank'} = 1; 481 # And fall thru to the pod-mode block further down 482 } 483 } else { 484 DEBUG > 5 and print STDERR "# It's a code-line.\n"; 485 $code_handler->(map $_, $line, $self->{'line_count'}, $self) 486 if $code_handler; 487 # Note: this may cause code to be processed out of order relative 488 # to pods, but in order relative to cuts. 489 490 # Note also that we haven't yet applied the transcoding to $line 491 # by time we call $code_handler! 492 493 if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) { 494 # That RE is from perlsyn, section "Plain Old Comments (Not!)", 495 #$fname = $2 if defined $2; 496 #DEBUG > 1 and defined $2 and print STDERR "# Setting fname to \"$fname\"\n"; 497 DEBUG > 1 and print STDERR "# Setting nextline to $1\n"; 498 $self->{'line_count'} = $1 - 1; 499 } 500 501 next; 502 } 503 } 504 505 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 506 # Else we're in pod mode: 507 508 # Apply any necessary transcoding: 509 $self->{'_transcoder'} && $self->{'_transcoder'}->($line); 510 511 # HERE WE CATCH =encoding EARLY! 512 if( $line =~ m/^=encoding\s+\S+\s*$/s ) { 513 next if $self->parse_characters; # Ignore this line 514 $line = $self->_handle_encoding_line( $line ); 515 } 516 517 if($line =~ m/^=cut/s) { 518 # here ends the pod block, and therefore the previous pod para 519 DEBUG > 1 and print STDERR "Noting =cut at line ${$self}{'line_count'}\n"; 520 $self->{'in_pod'} = 0; 521 # ++$self->{'pod_para_count'}; 522 $self->_ponder_paragraph_buffer(); 523 # by now it's safe to consider the previous paragraph as done. 524 DEBUG > 6 and print STDERR "Processing any cut handler, line ${$self}{'line_count'}\n"; 525 $cut_handler->(map $_, $line, $self->{'line_count'}, $self) 526 if $cut_handler; 527 528 # TODO: add to docs: Note: this may cause cuts to be processed out 529 # of order relative to pods, but in order relative to code. 530 531 } elsif($line =~ m/^(\s*)$/s) { # it's a blank line 532 if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line 533 $wl_handler->(map $_, $line, $self->{'line_count'}, $self) 534 if $wl_handler; 535 } 536 537 if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { 538 DEBUG > 1 and print STDERR "Saving blank line at line ${$self}{'line_count'}\n"; 539 push @{$paras->[-1]}, $line; 540 } # otherwise it's not interesting 541 542 if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) { 543 DEBUG > 1 and print STDERR "Noting para ends with blank line at ${$self}{'line_count'}\n"; 544 } 545 546 $self->{'last_was_blank'} = 1; 547 548 } elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para... 549 550 if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(\s+|$)(.*)/s) { 551 # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS 552 my $new = [$1, {'start_line' => $self->{'line_count'}}, $3]; 553 $new->[1]{'~orig_spacer'} = $2 if $2 && $2 ne " "; 554 # Note that in "=head1 foo", the WS is lost. 555 # Example: ['=head1', {'start_line' => 123}, ' foo'] 556 557 ++$self->{'pod_para_count'}; 558 559 $self->_ponder_paragraph_buffer(); 560 # by now it's safe to consider the previous paragraph as done. 561 562 push @$paras, $new; # the new incipient paragraph 563 DEBUG > 1 and print STDERR "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n"; 564 565 } elsif($line =~ m/^\s/s) { 566 567 if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { 568 DEBUG > 1 and print STDERR "Resuming verbatim para at line ${$self}{'line_count'}\n"; 569 push @{$paras->[-1]}, $line; 570 } else { 571 ++$self->{'pod_para_count'}; 572 $self->_ponder_paragraph_buffer(); 573 # by now it's safe to consider the previous paragraph as done. 574 DEBUG > 1 and print STDERR "Starting verbatim para at line ${$self}{'line_count'}\n"; 575 push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line]; 576 } 577 } else { 578 ++$self->{'pod_para_count'}; 579 $self->_ponder_paragraph_buffer(); 580 # by now it's safe to consider the previous paragraph as done. 581 push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line]; 582 DEBUG > 1 and print STDERR "Starting plain para at line ${$self}{'line_count'}\n"; 583 } 584 $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; 585 586 } else { 587 # It's a non-blank line /continuing/ the current para 588 if(@$paras) { 589 DEBUG > 2 and print STDERR "Line ${$self}{'line_count'} continues current paragraph\n"; 590 push @{$paras->[-1]}, $line; 591 } else { 592 # Unexpected case! 593 die "Continuing a paragraph but \@\$paras is empty?"; 594 } 595 $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; 596 } 597 598 } # ends the big while loop 599 600 DEBUG > 1 and print STDERR (pretty(@$paras), "\n"); 601 return $self; 602} 603 604#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 605 606sub _handle_encoding_line { 607 my($self, $line) = @_; 608 609 return if $self->parse_characters; 610 611 # The point of this routine is to set $self->{'_transcoder'} as indicated. 612 613 return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s; 614 DEBUG > 1 and print STDERR "Found an encoding line \"=encoding $1\"\n"; 615 616 my $e = $1; 617 my $orig = $e; 618 push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig"; 619 620 my $enc_error; 621 622 # Cf. perldoc Encode and perldoc Encode::Supported 623 624 require Pod::Simple::Transcode; 625 626 if( $self->{'encoding'} ) { 627 my $norm_current = $self->{'encoding'}; 628 my $norm_e = $e; 629 foreach my $that ($norm_current, $norm_e) { 630 $that = lc($that); 631 $that =~ s/[-_]//g; 632 } 633 if($norm_current eq $norm_e) { 634 DEBUG > 1 and print STDERR "The '=encoding $orig' line is ", 635 "redundant. ($norm_current eq $norm_e). Ignoring.\n"; 636 $enc_error = ''; 637 # But that doesn't necessarily mean that the earlier one went okay 638 } else { 639 $enc_error = "Encoding is already set to " . $self->{'encoding'}; 640 DEBUG > 1 and print STDERR $enc_error; 641 } 642 } elsif ( 643 # OK, let's turn on the encoding 644 do { 645 DEBUG > 1 and print STDERR " Setting encoding to $e\n"; 646 $self->{'encoding'} = $e; 647 1; 648 } 649 and $e eq 'HACKRAW' 650 ) { 651 DEBUG and print STDERR " Putting in HACKRAW (no-op) encoding mode.\n"; 652 653 } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) { 654 655 die($enc_error = "WHAT? _transcoder is already set?!") 656 if $self->{'_transcoder'}; # should never happen 657 require Pod::Simple::Transcode; 658 $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e); 659 eval { 660 my @x = ('', "abc", "123"); 661 $self->{'_transcoder'}->(@x); 662 }; 663 $@ && die( $enc_error = 664 "Really unexpected error setting up encoding $e: $@\nAborting" 665 ); 666 $self->{'detected_encoding'} = $e; 667 668 } else { 669 my @supported = Pod::Simple::Transcode::->all_encodings; 670 671 # Note unsupported, and complain 672 DEBUG and print STDERR " Encoding [$e] is unsupported.", 673 "\nSupporteds: @supported\n"; 674 my $suggestion = ''; 675 676 # Look for a near match: 677 my $norm = lc($e); 678 $norm =~ tr[-_][]d; 679 my $n; 680 foreach my $enc (@supported) { 681 $n = lc($enc); 682 $n =~ tr[-_][]d; 683 next unless $n eq $norm; 684 $suggestion = " (Maybe \"$e\" should be \"$enc\"?)"; 685 last; 686 } 687 my $encmodver = Pod::Simple::Transcode::->encmodver; 688 $enc_error = join '' => 689 "This document probably does not appear as it should, because its ", 690 "\"=encoding $e\" line calls for an unsupported encoding.", 691 $suggestion, " [$encmodver\'s supported encodings are: @supported]" 692 ; 693 694 $self->scream( $self->{'line_count'}, $enc_error ); 695 } 696 push @{ $self->{'encoding_command_statuses'} }, $enc_error; 697 if (defined($self->{'_processed_encoding'})) { 698 # Double declaration. 699 $self->scream( $self->{'line_count'}, 'Cannot have multiple =encoding directives'); 700 } 701 $self->{'_processed_encoding'} = $orig; 702 703 return $line; 704} 705 706# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 707 708sub _handle_encoding_second_level { 709 # By time this is called, the encoding (if well formed) will already 710 # have been acted on. 711 my($self, $para) = @_; 712 my @x = @$para; 713 my $content = join ' ', splice @x, 2; 714 $content =~ s/^\s+//s; 715 $content =~ s/\s+$//s; 716 717 DEBUG > 2 and print STDERR "Ogling encoding directive: =encoding $content\n"; 718 719 if (defined($self->{'_processed_encoding'})) { 720 #if($content ne $self->{'_processed_encoding'}) { 721 # Could it happen? 722 #} 723 delete $self->{'_processed_encoding'}; 724 # It's already been handled. Check for errors. 725 if(! $self->{'encoding_command_statuses'} ) { 726 DEBUG > 2 and print STDERR " CRAZY ERROR: It wasn't really handled?!\n"; 727 } elsif( $self->{'encoding_command_statuses'}[-1] ) { 728 $self->whine( $para->[1]{'start_line'}, 729 sprintf "Couldn't do %s: %s", 730 $self->{'encoding_command_reqs' }[-1], 731 $self->{'encoding_command_statuses'}[-1], 732 ); 733 } else { 734 DEBUG > 2 and print STDERR " (Yup, it was successfully handled already.)\n"; 735 } 736 737 } else { 738 # Otherwise it's a syntax error 739 $self->whine( $para->[1]{'start_line'}, 740 "Invalid =encoding syntax: $content" 741 ); 742 } 743 744 return; 745} 746 747#~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~` 748 749{ 750my $m = -321; # magic line number 751 752sub _gen_errata { 753 my $self = $_[0]; 754 # Return 0 or more fake-o paragraphs explaining the accumulated 755 # errors on this document. 756 757 return() unless $self->{'errata'} and keys %{$self->{'errata'}}; 758 759 my @out; 760 761 foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) { 762 push @out, 763 ['=item', {'start_line' => $m}, "Around line $line:"], 764 map( ['~Para', {'start_line' => $m, '~cooked' => 1}, 765 #['~Top', {'start_line' => $m}, 766 $_ 767 #] 768 ], 769 @{$self->{'errata'}{$line}} 770 ) 771 ; 772 } 773 774 # TODO: report of unknown entities? unrenderable characters? 775 776 unshift @out, 777 ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'], 778 ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1}, 779 "Hey! ", 780 ['B', {}, 781 'The above document had some coding errors, which are explained below:' 782 ] 783 ], 784 ['=over', {'start_line' => $m, 'errata' => 1}, ''], 785 ; 786 787 push @out, 788 ['=back', {'start_line' => $m, 'errata' => 1}, ''], 789 ; 790 791 DEBUG and print STDERR "\n<<\n", pretty(\@out), "\n>>\n\n"; 792 793 return @out; 794} 795 796} 797 798#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 799 800############################################################################## 801## 802## stop reading now stop reading now stop reading now stop reading now stop 803## 804## HERE IT BECOMES REALLY SCARY 805## 806## stop reading now stop reading now stop reading now stop reading now stop 807## 808############################################################################## 809 810sub _ponder_paragraph_buffer { 811 812 # Para-token types as found in the buffer. 813 # ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end, 814 # =over, =back, =item 815 # and the null =pod (to be complained about if over one line) 816 # 817 # "~data" paragraphs are something we generate at this level, depending on 818 # a currently open =over region 819 820 # Events fired: Begin and end for: 821 # directivename (like head1 .. head4), item, extend, 822 # for (from =begin...=end, =for), 823 # over-bullet, over-number, over-text, over-block, 824 # item-bullet, item-number, item-text, 825 # Document, 826 # Data, Para, Verbatim 827 # B, C, longdirname (TODO -- wha?), etc. for all directives 828 # 829 830 my $self = $_[0]; 831 my $paras; 832 return unless @{$paras = $self->{'paras'}}; 833 my $curr_open = ($self->{'curr_open'} ||= []); 834 835 my $scratch; 836 837 DEBUG > 10 and print STDERR "# Paragraph buffer: <<", pretty($paras), ">>\n"; 838 839 # We have something in our buffer. So apparently the document has started. 840 unless($self->{'doc_has_started'}) { 841 $self->{'doc_has_started'} = 1; 842 843 my $starting_contentless; 844 $starting_contentless = 845 ( 846 !@$curr_open 847 and @$paras and ! grep $_->[0] ne '~end', @$paras 848 # i.e., if the paras is all ~ends 849 ) 850 ; 851 DEBUG and print STDERR "# Starting ", 852 $starting_contentless ? 'contentless' : 'contentful', 853 " document\n" 854 ; 855 856 $self->_handle_element_start( 857 ($scratch = 'Document'), 858 { 859 'start_line' => $paras->[0][1]{'start_line'}, 860 $starting_contentless ? ( 'contentless' => 1 ) : (), 861 }, 862 ); 863 } 864 865 my($para, $para_type); 866 while(@$paras) { 867 868 # If a directive, assume it's legal; subtract below if found not to be 869 $seen_legal_directive++ if $paras->[0][0] =~ /^=/; 870 871 last if @$paras == 1 872 and ( $paras->[0][0] eq '=over' 873 or $paras->[0][0] eq '=item' 874 or ($paras->[0][0] eq '~Verbatim' and $self->{'in_pod'})); 875 # Those're the three kinds of paragraphs that require lookahead. 876 # Actually, an "=item Foo" inside an <over type=text> region 877 # and any =item inside an <over type=block> region (rare) 878 # don't require any lookahead, but all others (bullets 879 # and numbers) do. 880 # The verbatim is different from the other two, because those might be 881 # like: 882 # 883 # =item 884 # ... 885 # =cut 886 # ... 887 # =item 888 # 889 # The =cut here finishes the paragraph but doesn't terminate the =over 890 # they should be in. (khw apologizes that he didn't comment at the time 891 # why the 'in_pod' works, and no longer remembers why, and doesn't think 892 # it is currently worth the effort to re-figure it out.) 893 894# TODO: whinge about many kinds of directives in non-resolving =for regions? 895# TODO: many? like what? =head1 etc? 896 897 $para = shift @$paras; 898 $para_type = $para->[0]; 899 900 DEBUG > 1 and print STDERR "Pondering a $para_type paragraph, given the stack: (", 901 $self->_dump_curr_open(), ")\n"; 902 903 if($para_type eq '=for') { 904 next if $self->_ponder_for($para,$curr_open,$paras); 905 906 } elsif($para_type eq '=begin') { 907 next if $self->_ponder_begin($para,$curr_open,$paras); 908 909 } elsif($para_type eq '=end') { 910 next if $self->_ponder_end($para,$curr_open,$paras); 911 912 } elsif($para_type eq '~end') { # The virtual end-document signal 913 next if $self->_ponder_doc_end($para,$curr_open,$paras); 914 } 915 916 917 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 918 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 919 if(grep $_->[1]{'~ignore'}, @$curr_open) { 920 DEBUG > 1 and 921 print STDERR "Skipping $para_type paragraph because in ignore mode.\n"; 922 next; 923 } 924 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 925 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 926 927 if($para_type eq '=pod') { 928 $self->_ponder_pod($para,$curr_open,$paras); 929 930 } elsif($para_type eq '=over') { 931 next if $self->_ponder_over($para,$curr_open,$paras); 932 933 } elsif($para_type eq '=back') { 934 next if $self->_ponder_back($para,$curr_open,$paras); 935 936 } else { 937 938 # All non-magical codes!!! 939 940 # Here we start using $para_type for our own twisted purposes, to 941 # mean how it should get treated, not as what the element name 942 # should be. 943 944 DEBUG > 1 and print STDERR "Pondering non-magical $para_type\n"; 945 946 my $i; 947 948 # Enforce some =headN discipline 949 if($para_type =~ m/^=head\d$/s 950 and ! $self->{'accept_heads_anywhere'} 951 and @$curr_open 952 and $curr_open->[-1][0] eq '=over' 953 ) { 954 DEBUG > 2 and print STDERR "'=$para_type' inside an '=over'!\n"; 955 $self->whine( 956 $para->[1]{'start_line'}, 957 "You forgot a '=back' before '$para_type'" 958 ); 959 unshift @$paras, ['=back', {}, ''], $para; # close the =over 960 next; 961 } 962 963 964 if($para_type eq '=item') { 965 966 my $over; 967 unless(@$curr_open and 968 $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) { 969 $self->whine( 970 $para->[1]{'start_line'}, 971 "'=item' outside of any '=over'" 972 ); 973 unshift @$paras, 974 ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], 975 $para 976 ; 977 next; 978 } 979 980 981 my $over_type = $over->[1]{'~type'}; 982 983 if(!$over_type) { 984 # Shouldn't happen1 985 die "Typeless over in stack, starting at line " 986 . $over->[1]{'start_line'}; 987 988 } elsif($over_type eq 'block') { 989 unless($curr_open->[-1][1]{'~bitched_about'}) { 990 $curr_open->[-1][1]{'~bitched_about'} = 1; 991 $self->whine( 992 $curr_open->[-1][1]{'start_line'}, 993 "You can't have =items (as at line " 994 . $para->[1]{'start_line'} 995 . ") unless the first thing after the =over is an =item" 996 ); 997 } 998 # Just turn it into a paragraph and reconsider it 999 $para->[0] = '~Para'; 1000 unshift @$paras, $para; 1001 next; 1002 1003 } elsif($over_type eq 'text') { 1004 my $item_type = $self->_get_item_type($para); 1005 # That kills the content of the item if it's a number or bullet. 1006 DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; 1007 1008 if($item_type eq 'text') { 1009 # Nothing special needs doing for 'text' 1010 } elsif($item_type eq 'number' or $item_type eq 'bullet') { 1011 $self->whine( 1012 $para->[1]{'start_line'}, 1013 "Expected text after =item, not a $item_type" 1014 ); 1015 # Undo our clobbering: 1016 push @$para, $para->[1]{'~orig_content'}; 1017 delete $para->[1]{'number'}; 1018 # Only a PROPER item-number element is allowed 1019 # to have a number attribute. 1020 } else { 1021 die "Unhandled item type $item_type"; # should never happen 1022 } 1023 1024 # =item-text thingies don't need any assimilation, it seems. 1025 1026 } elsif($over_type eq 'number') { 1027 my $item_type = $self->_get_item_type($para); 1028 # That kills the content of the item if it's a number or bullet. 1029 DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; 1030 1031 my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; 1032 1033 if($item_type eq 'bullet') { 1034 # Hm, it's not numeric. Correct for this. 1035 $para->[1]{'number'} = $expected_value; 1036 $self->whine( 1037 $para->[1]{'start_line'}, 1038 "Expected '=item $expected_value'" 1039 ); 1040 push @$para, $para->[1]{'~orig_content'}; 1041 # restore the bullet, blocking the assimilation of next para 1042 1043 } elsif($item_type eq 'text') { 1044 # Hm, it's not numeric. Correct for this. 1045 $para->[1]{'number'} = $expected_value; 1046 $self->whine( 1047 $para->[1]{'start_line'}, 1048 "Expected '=item $expected_value'" 1049 ); 1050 # Text content will still be there and will block next ~Para 1051 1052 } elsif($item_type ne 'number') { 1053 die "Unknown item type $item_type"; # should never happen 1054 1055 } elsif($expected_value == $para->[1]{'number'}) { 1056 DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n"; 1057 1058 } else { 1059 DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'}, 1060 " instead of the expected value of $expected_value\n"; 1061 $self->whine( 1062 $para->[1]{'start_line'}, 1063 "You have '=item " . $para->[1]{'number'} . 1064 "' instead of the expected '=item $expected_value'" 1065 ); 1066 $para->[1]{'number'} = $expected_value; # correcting!! 1067 } 1068 1069 if(@$para == 2) { 1070 # For the cases where we /didn't/ push to @$para 1071 if($paras->[0][0] eq '~Para') { 1072 DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n"; 1073 push @$para, splice @{shift @$paras},2; 1074 } else { 1075 DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n"; 1076 push @$para, ''; # Just so it's not contentless 1077 } 1078 } 1079 1080 1081 } elsif($over_type eq 'bullet') { 1082 my $item_type = $self->_get_item_type($para); 1083 # That kills the content of the item if it's a number or bullet. 1084 DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; 1085 1086 if($item_type eq 'bullet') { 1087 # as expected! 1088 1089 if( $para->[1]{'~_freaky_para_hack'} ) { 1090 DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n"; 1091 push @$para, $para->[1]{'~_freaky_para_hack'}; 1092 } 1093 1094 } elsif($item_type eq 'number') { 1095 $self->whine( 1096 $para->[1]{'start_line'}, 1097 "Expected '=item *'" 1098 ); 1099 push @$para, $para->[1]{'~orig_content'}; 1100 # and block assimilation of the next paragraph 1101 delete $para->[1]{'number'}; 1102 # Only a PROPER item-number element is allowed 1103 # to have a number attribute. 1104 } elsif($item_type eq 'text') { 1105 $self->whine( 1106 $para->[1]{'start_line'}, 1107 "Expected '=item *'" 1108 ); 1109 # But doesn't need processing. But it'll block assimilation 1110 # of the next para. 1111 } else { 1112 die "Unhandled item type $item_type"; # should never happen 1113 } 1114 1115 if(@$para == 2) { 1116 # For the cases where we /didn't/ push to @$para 1117 if($paras->[0][0] eq '~Para') { 1118 DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n"; 1119 push @$para, splice @{shift @$paras},2; 1120 } else { 1121 DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n"; 1122 push @$para, ''; # Just so it's not contentless 1123 } 1124 } 1125 1126 } else { 1127 die "Unhandled =over type \"$over_type\"?"; 1128 # Shouldn't happen! 1129 } 1130 1131 $para_type = 'Plain'; 1132 $para->[0] .= '-' . $over_type; 1133 # Whew. Now fall thru and process it. 1134 1135 1136 } elsif($para_type eq '=extend') { 1137 # Well, might as well implement it here. 1138 $self->_ponder_extend($para); 1139 next; # and skip 1140 } elsif($para_type eq '=encoding') { 1141 # Not actually acted on here, but we catch errors here. 1142 $self->_handle_encoding_second_level($para); 1143 next unless $self->keep_encoding_directive; 1144 $para_type = 'Plain'; 1145 } elsif($para_type eq '~Verbatim') { 1146 $para->[0] = 'Verbatim'; 1147 $para_type = '?Verbatim'; 1148 } elsif($para_type eq '~Para') { 1149 $para->[0] = 'Para'; 1150 $para_type = '?Plain'; 1151 } elsif($para_type eq 'Data') { 1152 $para->[0] = 'Data'; 1153 $para_type = '?Data'; 1154 } elsif( $para_type =~ s/^=//s 1155 and defined( $para_type = $self->{'accept_directives'}{$para_type} ) 1156 ) { 1157 DEBUG > 1 and print STDERR " Pondering known directive ${$para}[0] as $para_type\n"; 1158 } else { 1159 # An unknown directive! 1160 $seen_legal_directive--; 1161 DEBUG > 1 and printf STDERR "Unhandled directive %s (Handled: %s)\n", 1162 $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} ) 1163 ; 1164 $self->whine( 1165 $para->[1]{'start_line'}, 1166 "Unknown directive: $para->[0]" 1167 ); 1168 1169 # And maybe treat it as text instead of just letting it go? 1170 next; 1171 } 1172 1173 if($para_type =~ s/^\?//s) { 1174 if(! @$curr_open) { # usual case 1175 DEBUG and print STDERR "Treating $para_type paragraph as such because stack is empty.\n"; 1176 } else { 1177 my @fors = grep $_->[0] eq '=for', @$curr_open; 1178 DEBUG > 1 and print STDERR "Containing fors: ", 1179 join(',', map $_->[1]{'target'}, @fors), "\n"; 1180 1181 if(! @fors) { 1182 DEBUG and print STDERR "Treating $para_type paragraph as such because stack has no =for's\n"; 1183 1184 #} elsif(grep $_->[1]{'~resolve'}, @fors) { 1185 #} elsif(not grep !$_->[1]{'~resolve'}, @fors) { 1186 } elsif( $fors[-1][1]{'~resolve'} ) { 1187 # Look to the immediately containing for 1188 1189 if($para_type eq 'Data') { 1190 DEBUG and print STDERR "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; 1191 $para->[0] = 'Para'; 1192 $para_type = 'Plain'; 1193 } else { 1194 DEBUG and print STDERR "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; 1195 } 1196 } else { 1197 DEBUG and print STDERR "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n"; 1198 $para->[0] = $para_type = 'Data'; 1199 } 1200 } 1201 } 1202 1203 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1204 if($para_type eq 'Plain') { 1205 $self->_ponder_Plain($para); 1206 } elsif($para_type eq 'Verbatim') { 1207 $self->_ponder_Verbatim($para); 1208 } elsif($para_type eq 'Data') { 1209 $self->_ponder_Data($para); 1210 } else { 1211 die "\$para type is $para_type -- how did that happen?"; 1212 # Shouldn't happen. 1213 } 1214 1215 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1216 $para->[0] =~ s/^[~=]//s; 1217 1218 DEBUG and print STDERR "\n", pretty($para), "\n"; 1219 1220 # traverse the treelet (which might well be just one string scalar) 1221 $self->{'content_seen'} ||= 1 if $seen_legal_directive 1222 && ! $self->{'~tried_gen_errata'}; 1223 $self->_traverse_treelet_bit(@$para); 1224 } 1225 } 1226 1227 return; 1228} 1229 1230########################################################################### 1231# The sub-ponderers... 1232 1233 1234 1235sub _ponder_for { 1236 my ($self,$para,$curr_open,$paras) = @_; 1237 1238 # Fake it out as a begin/end 1239 my $target; 1240 1241 if(grep $_->[1]{'~ignore'}, @$curr_open) { 1242 DEBUG > 1 and print STDERR "Ignoring ignorable =for\n"; 1243 return 1; 1244 } 1245 1246 for(my $i = 2; $i < @$para; ++$i) { 1247 if($para->[$i] =~ s/^\s*(\S+)\s*//s) { 1248 $target = $1; 1249 last; 1250 } 1251 } 1252 unless(defined $target) { 1253 $self->whine( 1254 $para->[1]{'start_line'}, 1255 "=for without a target?" 1256 ); 1257 return 1; 1258 } 1259 DEBUG > 1 and 1260 print STDERR "Faking out a =for $target as a =begin $target / =end $target\n"; 1261 1262 $para->[0] = 'Data'; 1263 1264 unshift @$paras, 1265 ['=begin', 1266 {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, 1267 $target, 1268 ], 1269 $para, 1270 ['=end', 1271 {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, 1272 $target, 1273 ], 1274 ; 1275 1276 return 1; 1277} 1278 1279sub _ponder_begin { 1280 my ($self,$para,$curr_open,$paras) = @_; 1281 my $content = join ' ', splice @$para, 2; 1282 $content =~ s/^\s+//s; 1283 $content =~ s/\s+$//s; 1284 unless(length($content)) { 1285 $self->whine( 1286 $para->[1]{'start_line'}, 1287 "=begin without a target?" 1288 ); 1289 DEBUG and print STDERR "Ignoring targetless =begin\n"; 1290 return 1; 1291 } 1292 1293 my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/; 1294 $para->[1]{'title'} = $title if ($title); 1295 $para->[1]{'target'} = $target; # without any ':' 1296 $content = $target; # strip off the title 1297 1298 $content =~ s/^:!/!:/s; 1299 my $neg; # whether this is a negation-match 1300 $neg = 1 if $content =~ s/^!//s; 1301 my $to_resolve; # whether to process formatting codes 1302 $to_resolve = 1 if $content =~ s/^://s; 1303 1304 my $dont_ignore; # whether this target matches us 1305 1306 foreach my $target_name ( 1307 split(',', $content, -1), 1308 $neg ? () : '*' 1309 ) { 1310 DEBUG > 2 and 1311 print STDERR " Considering whether =begin $content matches $target_name\n"; 1312 next unless $self->{'accept_targets'}{$target_name}; 1313 1314 DEBUG > 2 and 1315 print STDERR " It DOES match the acceptable target $target_name!\n"; 1316 $to_resolve = 1 1317 if $self->{'accept_targets'}{$target_name} eq 'force_resolve'; 1318 $dont_ignore = 1; 1319 $para->[1]{'target_matching'} = $target_name; 1320 last; # stop looking at other target names 1321 } 1322 1323 if($neg) { 1324 if( $dont_ignore ) { 1325 $dont_ignore = ''; 1326 delete $para->[1]{'target_matching'}; 1327 DEBUG > 2 and print STDERR " But the leading ! means that this is a NON-match!\n"; 1328 } else { 1329 $dont_ignore = 1; 1330 $para->[1]{'target_matching'} = '!'; 1331 DEBUG > 2 and print STDERR " But the leading ! means that this IS a match!\n"; 1332 } 1333 } 1334 1335 $para->[0] = '=for'; # Just what we happen to call these, internally 1336 $para->[1]{'~really'} ||= '=begin'; 1337 $para->[1]{'~ignore'} = (! $dont_ignore) || 0; 1338 $para->[1]{'~resolve'} = $to_resolve || 0; 1339 1340 DEBUG > 1 and print STDERR " Making note to ", $dont_ignore ? 'not ' : '', 1341 "ignore contents of this region\n"; 1342 DEBUG > 1 and $dont_ignore and print STDERR " Making note to treat contents as ", 1343 ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n"; 1344 DEBUG > 1 and print STDERR " (Stack now: ", $self->_dump_curr_open(), ")\n"; 1345 1346 push @$curr_open, $para; 1347 if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) { 1348 DEBUG > 1 and print STDERR "Ignoring ignorable =begin\n"; 1349 } else { 1350 $self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'}; 1351 $self->_handle_element_start((my $scratch='for'), $para->[1]); 1352 } 1353 1354 return 1; 1355} 1356 1357sub _ponder_end { 1358 my ($self,$para,$curr_open,$paras) = @_; 1359 my $content = join ' ', splice @$para, 2; 1360 $content =~ s/^\s+//s; 1361 $content =~ s/\s+$//s; 1362 DEBUG and print STDERR "Ogling '=end $content' directive\n"; 1363 1364 unless(length($content)) { 1365 $self->whine( 1366 $para->[1]{'start_line'}, 1367 "'=end' without a target?" . ( 1368 ( @$curr_open and $curr_open->[-1][0] eq '=for' ) 1369 ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' ) 1370 : '' 1371 ) 1372 ); 1373 DEBUG and print STDERR "Ignoring targetless =end\n"; 1374 return 1; 1375 } 1376 1377 unless($content =~ m/^\S+$/) { # i.e., unless it's one word 1378 $self->whine( 1379 $para->[1]{'start_line'}, 1380 "'=end $content' is invalid. (Stack: " 1381 . $self->_dump_curr_open() . ')' 1382 ); 1383 DEBUG and print STDERR "Ignoring mistargetted =end $content\n"; 1384 return 1; 1385 } 1386 1387 unless(@$curr_open and $curr_open->[-1][0] eq '=for') { 1388 $self->whine( 1389 $para->[1]{'start_line'}, 1390 "=end $content without matching =begin. (Stack: " 1391 . $self->_dump_curr_open() . ')' 1392 ); 1393 DEBUG and print STDERR "Ignoring mistargetted =end $content\n"; 1394 return 1; 1395 } 1396 1397 unless($content eq $curr_open->[-1][1]{'target'}) { 1398 $self->whine( 1399 $para->[1]{'start_line'}, 1400 "=end $content doesn't match =begin " 1401 . $curr_open->[-1][1]{'target'} 1402 . ". (Stack: " 1403 . $self->_dump_curr_open() . ')' 1404 ); 1405 DEBUG and print STDERR "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n"; 1406 return 1; 1407 } 1408 1409 # Else it's okay to close... 1410 if(grep $_->[1]{'~ignore'}, @$curr_open) { 1411 DEBUG > 1 and print STDERR "Not firing any event for this =end $content because in an ignored region\n"; 1412 # And that may be because of this to-be-closed =for region, or some 1413 # other one, but it doesn't matter. 1414 } else { 1415 $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'}; 1416 # what's that for? 1417 1418 $self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'}; 1419 $self->_handle_element_end( my $scratch = 'for', $para->[1]); 1420 } 1421 DEBUG > 1 and print STDERR "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n"; 1422 pop @$curr_open; 1423 1424 return 1; 1425} 1426 1427sub _ponder_doc_end { 1428 my ($self,$para,$curr_open,$paras) = @_; 1429 if(@$curr_open) { # Deal with things left open 1430 DEBUG and print STDERR "Stack is nonempty at end-document: (", 1431 $self->_dump_curr_open(), ")\n"; 1432 1433 DEBUG > 9 and print STDERR "Stack: ", pretty($curr_open), "\n"; 1434 unshift @$paras, $self->_closers_for_all_curr_open; 1435 # Make sure there is exactly one ~end in the parastack, at the end: 1436 @$paras = grep $_->[0] ne '~end', @$paras; 1437 push @$paras, $para, $para; 1438 # We need two -- once for the next cycle where we 1439 # generate errata, and then another to be at the end 1440 # when that loop back around to process the errata. 1441 return 1; 1442 1443 } else { 1444 DEBUG and print STDERR "Okay, stack is empty now.\n"; 1445 } 1446 1447 # Try generating errata section, if applicable 1448 unless($self->{'~tried_gen_errata'}) { 1449 $self->{'~tried_gen_errata'} = 1; 1450 my @extras = $self->_gen_errata(); 1451 if(@extras) { 1452 unshift @$paras, @extras; 1453 DEBUG and print STDERR "Generated errata... relooping...\n"; 1454 return 1; # I.e., loop around again to process these fake-o paragraphs 1455 } 1456 } 1457 1458 splice @$paras; # Well, that's that for this paragraph buffer. 1459 DEBUG and print STDERR "Throwing end-document event.\n"; 1460 1461 $self->_handle_element_end( my $scratch = 'Document' ); 1462 return 1; # Hasta la byebye 1463} 1464 1465sub _ponder_pod { 1466 my ($self,$para,$curr_open,$paras) = @_; 1467 $self->whine( 1468 $para->[1]{'start_line'}, 1469 "=pod directives shouldn't be over one line long! Ignoring all " 1470 . (@$para - 2) . " lines of content" 1471 ) if @$para > 3; 1472 1473 # Content ignored unless 'pod_handler' is set 1474 if (my $pod_handler = $self->{'pod_handler'}) { 1475 my ($line_num, $line) = map $_, $para->[1]{'start_line'}, $para->[2]; 1476 $line = $line eq '' ? "=pod" : "=pod $line"; # imitate cut_handler output 1477 $pod_handler->($line, $line_num, $self); 1478 } 1479 1480 # The surrounding methods set content_seen, so let us remain consistent. 1481 # I do not know why it was not here before -- should it not be here? 1482 # $self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'}; 1483 1484 return; 1485} 1486 1487sub _ponder_over { 1488 my ($self,$para,$curr_open,$paras) = @_; 1489 return 1 unless @$paras; 1490 my $list_type; 1491 1492 if($paras->[0][0] eq '=item') { # most common case 1493 $list_type = $self->_get_initial_item_type($paras->[0]); 1494 1495 } elsif($paras->[0][0] eq '=back') { 1496 # Ignore empty lists by default 1497 if ($self->{'parse_empty_lists'}) { 1498 $list_type = 'empty'; 1499 } else { 1500 shift @$paras; 1501 return 1; 1502 } 1503 } elsif($paras->[0][0] eq '~end') { 1504 $self->whine( 1505 $para->[1]{'start_line'}, 1506 "=over is the last thing in the document?!" 1507 ); 1508 return 1; # But feh, ignore it. 1509 } else { 1510 $list_type = 'block'; 1511 } 1512 $para->[1]{'~type'} = $list_type; 1513 push @$curr_open, $para; 1514 # yes, we reuse the paragraph as a stack item 1515 1516 my $content = join ' ', splice @$para, 2; 1517 $para->[1]{'~orig_content'} = $content; 1518 my $overness; 1519 if($content =~ m/^\s*$/s) { 1520 $para->[1]{'indent'} = 4; 1521 } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) { 1522 no integer; 1523 $para->[1]{'indent'} = $1; 1524 if($1 == 0) { 1525 $self->whine( 1526 $para->[1]{'start_line'}, 1527 "Can't have a 0 in =over $content" 1528 ); 1529 $para->[1]{'indent'} = 4; 1530 } 1531 } else { 1532 $self->whine( 1533 $para->[1]{'start_line'}, 1534 "=over should be: '=over' or '=over positive_number'" 1535 ); 1536 $para->[1]{'indent'} = 4; 1537 } 1538 DEBUG > 1 and print STDERR "=over found of type $list_type\n"; 1539 1540 $self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'}; 1541 $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]); 1542 1543 return; 1544} 1545 1546sub _ponder_back { 1547 my ($self,$para,$curr_open,$paras) = @_; 1548 # TODO: fire off </item-number> or </item-bullet> or </item-text> ?? 1549 1550 my $content = join ' ', splice @$para, 2; 1551 if($content =~ m/\S/) { 1552 $self->whine( 1553 $para->[1]{'start_line'}, 1554 "=back doesn't take any parameters, but you said =back $content" 1555 ); 1556 } 1557 1558 if(@$curr_open and $curr_open->[-1][0] eq '=over') { 1559 DEBUG > 1 and print STDERR "=back happily closes matching =over\n"; 1560 # Expected case: we're closing the most recently opened thing 1561 #my $over = pop @$curr_open; 1562 $self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'}; 1563 $self->_handle_element_end( my $scratch = 1564 'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1] 1565 ); 1566 } else { 1567 DEBUG > 1 and print STDERR "=back found without a matching =over. Stack: (", 1568 join(', ', map $_->[0], @$curr_open), ").\n"; 1569 $self->whine( 1570 $para->[1]{'start_line'}, 1571 '=back without =over' 1572 ); 1573 return 1; # and ignore it 1574 } 1575} 1576 1577sub _ponder_item { 1578 my ($self,$para,$curr_open,$paras) = @_; 1579 my $over; 1580 unless(@$curr_open and 1581 $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) { 1582 $self->whine( 1583 $para->[1]{'start_line'}, 1584 "'=item' outside of any '=over'" 1585 ); 1586 unshift @$paras, 1587 ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], 1588 $para 1589 ; 1590 return 1; 1591 } 1592 1593 1594 my $over_type = $over->[1]{'~type'}; 1595 1596 if(!$over_type) { 1597 # Shouldn't happen1 1598 die "Typeless over in stack, starting at line " 1599 . $over->[1]{'start_line'}; 1600 1601 } elsif($over_type eq 'block') { 1602 unless($curr_open->[-1][1]{'~bitched_about'}) { 1603 $curr_open->[-1][1]{'~bitched_about'} = 1; 1604 $self->whine( 1605 $curr_open->[-1][1]{'start_line'}, 1606 "You can't have =items (as at line " 1607 . $para->[1]{'start_line'} 1608 . ") unless the first thing after the =over is an =item" 1609 ); 1610 } 1611 # Just turn it into a paragraph and reconsider it 1612 $para->[0] = '~Para'; 1613 unshift @$paras, $para; 1614 return 1; 1615 1616 } elsif($over_type eq 'text') { 1617 my $item_type = $self->_get_item_type($para); 1618 # That kills the content of the item if it's a number or bullet. 1619 DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; 1620 1621 if($item_type eq 'text') { 1622 # Nothing special needs doing for 'text' 1623 } elsif($item_type eq 'number' or $item_type eq 'bullet') { 1624 $self->whine( 1625 $para->[1]{'start_line'}, 1626 "Expected text after =item, not a $item_type" 1627 ); 1628 # Undo our clobbering: 1629 push @$para, $para->[1]{'~orig_content'}; 1630 delete $para->[1]{'number'}; 1631 # Only a PROPER item-number element is allowed 1632 # to have a number attribute. 1633 } else { 1634 die "Unhandled item type $item_type"; # should never happen 1635 } 1636 1637 # =item-text thingies don't need any assimilation, it seems. 1638 1639 } elsif($over_type eq 'number') { 1640 my $item_type = $self->_get_item_type($para); 1641 # That kills the content of the item if it's a number or bullet. 1642 DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; 1643 1644 my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; 1645 1646 if($item_type eq 'bullet') { 1647 # Hm, it's not numeric. Correct for this. 1648 $para->[1]{'number'} = $expected_value; 1649 $self->whine( 1650 $para->[1]{'start_line'}, 1651 "Expected '=item $expected_value'" 1652 ); 1653 push @$para, $para->[1]{'~orig_content'}; 1654 # restore the bullet, blocking the assimilation of next para 1655 1656 } elsif($item_type eq 'text') { 1657 # Hm, it's not numeric. Correct for this. 1658 $para->[1]{'number'} = $expected_value; 1659 $self->whine( 1660 $para->[1]{'start_line'}, 1661 "Expected '=item $expected_value'" 1662 ); 1663 # Text content will still be there and will block next ~Para 1664 1665 } elsif($item_type ne 'number') { 1666 die "Unknown item type $item_type"; # should never happen 1667 1668 } elsif($expected_value == $para->[1]{'number'}) { 1669 DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n"; 1670 1671 } else { 1672 DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'}, 1673 " instead of the expected value of $expected_value\n"; 1674 $self->whine( 1675 $para->[1]{'start_line'}, 1676 "You have '=item " . $para->[1]{'number'} . 1677 "' instead of the expected '=item $expected_value'" 1678 ); 1679 $para->[1]{'number'} = $expected_value; # correcting!! 1680 } 1681 1682 if(@$para == 2) { 1683 # For the cases where we /didn't/ push to @$para 1684 if($paras->[0][0] eq '~Para') { 1685 DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n"; 1686 push @$para, splice @{shift @$paras},2; 1687 } else { 1688 DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n"; 1689 push @$para, ''; # Just so it's not contentless 1690 } 1691 } 1692 1693 1694 } elsif($over_type eq 'bullet') { 1695 my $item_type = $self->_get_item_type($para); 1696 # That kills the content of the item if it's a number or bullet. 1697 DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; 1698 1699 if($item_type eq 'bullet') { 1700 # as expected! 1701 1702 if( $para->[1]{'~_freaky_para_hack'} ) { 1703 DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n"; 1704 push @$para, $para->[1]{'~_freaky_para_hack'}; 1705 } 1706 1707 } elsif($item_type eq 'number') { 1708 $self->whine( 1709 $para->[1]{'start_line'}, 1710 "Expected '=item *'" 1711 ); 1712 push @$para, $para->[1]{'~orig_content'}; 1713 # and block assimilation of the next paragraph 1714 delete $para->[1]{'number'}; 1715 # Only a PROPER item-number element is allowed 1716 # to have a number attribute. 1717 } elsif($item_type eq 'text') { 1718 $self->whine( 1719 $para->[1]{'start_line'}, 1720 "Expected '=item *'" 1721 ); 1722 # But doesn't need processing. But it'll block assimilation 1723 # of the next para. 1724 } else { 1725 die "Unhandled item type $item_type"; # should never happen 1726 } 1727 1728 if(@$para == 2) { 1729 # For the cases where we /didn't/ push to @$para 1730 if($paras->[0][0] eq '~Para') { 1731 DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n"; 1732 push @$para, splice @{shift @$paras},2; 1733 } else { 1734 DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n"; 1735 push @$para, ''; # Just so it's not contentless 1736 } 1737 } 1738 1739 } else { 1740 die "Unhandled =over type \"$over_type\"?"; 1741 # Shouldn't happen! 1742 } 1743 $para->[0] .= '-' . $over_type; 1744 1745 return; 1746} 1747 1748sub _ponder_Plain { 1749 my ($self,$para) = @_; 1750 DEBUG and print STDERR " giving plain treatment...\n"; 1751 unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' ) 1752 or $para->[1]{'~cooked'} 1753 ) { 1754 push @$para, 1755 @{$self->_make_treelet( 1756 join("\n", splice(@$para, 2)), 1757 $para->[1]{'start_line'} 1758 )}; 1759 } 1760 # Empty paragraphs don't need a treelet for any reason I can see. 1761 # And precooked paragraphs already have a treelet. 1762 return; 1763} 1764 1765sub _ponder_Verbatim { 1766 my ($self,$para) = @_; 1767 DEBUG and print STDERR " giving verbatim treatment...\n"; 1768 1769 $para->[1]{'xml:space'} = 'preserve'; 1770 1771 unless ($self->{'_output_is_for_JustPod'}) { 1772 # Fix illegal settings for expand_verbatim_tabs() 1773 # This is because this module doesn't do input error checking, but khw 1774 # doesn't want to add yet another instance of that. 1775 $self->expand_verbatim_tabs(8) 1776 if ! defined $self->expand_verbatim_tabs() 1777 || $self->expand_verbatim_tabs() =~ /\D/; 1778 1779 my $indent = $self->strip_verbatim_indent; 1780 if ($indent && ref $indent eq 'CODE') { 1781 my @shifted = (shift @{$para}, shift @{$para}); 1782 $indent = $indent->($para); 1783 unshift @{$para}, @shifted; 1784 } 1785 1786 for(my $i = 2; $i < @$para; $i++) { 1787 foreach my $line ($para->[$i]) { # just for aliasing 1788 # Strip indentation. 1789 $line =~ s/^\Q$indent// if $indent; 1790 next unless $self->expand_verbatim_tabs; 1791 1792 # This is commented out because of github issue #85, and the 1793 # current maintainers don't know why it was there in the first 1794 # place. 1795 #&& !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted}); 1796 while( $line =~ 1797 # Sort of adapted from Text::Tabs. 1798 s/^([^\t]*)(\t+)/$1.(" " x ((length($2) 1799 * $self->expand_verbatim_tabs) 1800 -(length($1)&7)))/e 1801 ) {} 1802 1803 # TODO: whinge about (or otherwise treat) unindented or overlong lines 1804 1805 } 1806 } 1807 } 1808 1809 # Now the VerbatimFormatted hoodoo... 1810 if( $self->{'accept_codes'} and 1811 $self->{'accept_codes'}{'VerbatimFormatted'} 1812 ) { 1813 while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para } 1814 # Kill any number of terminal newlines 1815 $self->_verbatim_format($para); 1816 } elsif ($self->{'codes_in_verbatim'}) { 1817 push @$para, 1818 @{$self->_make_treelet( 1819 join("\n", splice(@$para, 2)), 1820 $para->[1]{'start_line'}, $para->[1]{'xml:space'} 1821 )}; 1822 $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines 1823 } else { 1824 push @$para, join "\n", splice(@$para, 2) if @$para > 3; 1825 $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines 1826 } 1827 return; 1828} 1829 1830sub _ponder_Data { 1831 my ($self,$para) = @_; 1832 DEBUG and print STDERR " giving data treatment...\n"; 1833 $para->[1]{'xml:space'} = 'preserve'; 1834 push @$para, join "\n", splice(@$para, 2) if @$para > 3; 1835 return; 1836} 1837 1838 1839 1840 1841########################################################################### 1842 1843sub _traverse_treelet_bit { # for use only by the routine above 1844 my($self, $name) = splice @_,0,2; 1845 1846 my $scratch; 1847 $self->_handle_element_start(($scratch=$name), shift @_); 1848 1849 while (@_) { 1850 my $x = shift; 1851 if (ref($x)) { 1852 &_traverse_treelet_bit($self, @$x); 1853 } else { 1854 $x .= shift while @_ && !ref($_[0]); 1855 $self->_handle_text($x); 1856 } 1857 } 1858 1859 $self->_handle_element_end($scratch=$name); 1860 return; 1861} 1862 1863#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1864 1865sub _closers_for_all_curr_open { 1866 my $self = $_[0]; 1867 my @closers; 1868 foreach my $still_open (@{ $self->{'curr_open'} || return }) { 1869 my @copy = @$still_open; 1870 $copy[1] = {%{ $copy[1] }}; 1871 #$copy[1]{'start_line'} = -1; 1872 if($copy[0] eq '=for') { 1873 $copy[0] = '=end'; 1874 } elsif($copy[0] eq '=over') { 1875 $self->whine( 1876 $still_open->[1]{start_line} , 1877 "=over without closing =back" 1878 ); 1879 1880 $copy[0] = '=back'; 1881 } else { 1882 die "I don't know how to auto-close an open $copy[0] region"; 1883 } 1884 1885 unless( @copy > 2 ) { 1886 push @copy, $copy[1]{'target'}; 1887 $copy[-1] = '' unless defined $copy[-1]; 1888 # since =over's don't have targets 1889 } 1890 1891 $copy[1]{'fake-closer'} = 1; 1892 1893 DEBUG and print STDERR "Queuing up fake-o event: ", pretty(\@copy), "\n"; 1894 unshift @closers, \@copy; 1895 } 1896 return @closers; 1897} 1898 1899#-------------------------------------------------------------------------- 1900 1901sub _verbatim_format { 1902 my($it, $p) = @_; 1903 1904 my $formatting; 1905 1906 for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines 1907 DEBUG and print STDERR "_verbatim_format appends a newline to $i: $p->[$i]\n"; 1908 $p->[$i] .= "\n"; 1909 # Unlike with simple Verbatim blocks, we don't end up just doing 1910 # a join("\n", ...) on the contents, so we have to append a 1911 # newline to every line, and then nix the last one later. 1912 } 1913 1914 if( DEBUG > 4 ) { 1915 print STDERR "<<\n"; 1916 for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines 1917 print STDERR "_verbatim_format $i: $p->[$i]"; 1918 } 1919 print STDERR ">>\n"; 1920 } 1921 1922 for(my $i = $#$p; $i > 2; $i--) { 1923 # work backwards over the lines, except the first (#2) 1924 1925 #next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s 1926 # and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s; 1927 # look at a formatty line preceding a nonformatty one 1928 DEBUG > 5 and print STDERR "Scrutinizing line $i: $$p[$i]\n"; 1929 if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) { 1930 DEBUG > 5 and print STDERR " It's a formatty line. ", 1931 "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n"; 1932 1933 if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) { 1934 DEBUG > 5 and print STDERR " Previous line is formatty! Skipping this one.\n"; 1935 next; 1936 } else { 1937 DEBUG > 5 and print STDERR " Previous line is non-formatty! Yay!\n"; 1938 } 1939 } else { 1940 DEBUG > 5 and print STDERR " It's not a formatty line. Ignoring\n"; 1941 next; 1942 } 1943 1944 # A formatty line has to have #: in the first two columns, and uses 1945 # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic. 1946 # Example: 1947 # What do you want? i like pie. [or whatever] 1948 # #:^^^^^^^^^^^^^^^^^ ///////////// 1949 1950 1951 DEBUG > 4 and print STDERR "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n"; 1952 1953 $formatting = ' ' . $1; 1954 $formatting =~ s/\s+$//s; # nix trailing whitespace 1955 unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op 1956 splice @$p,$i,1; # remove this line 1957 $i--; # don't consider next line 1958 next; 1959 } 1960 1961 if( length($formatting) >= length($p->[$i-1]) ) { 1962 $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' '; 1963 } else { 1964 $formatting .= ' ' x (length($p->[$i-1]) - length($formatting)); 1965 } 1966 # Make $formatting and the previous line be exactly the same length, 1967 # with $formatting having a " " as the last character. 1968 1969 DEBUG > 4 and print STDERR "Formatting <$formatting> on <", $p->[$i-1], ">\n"; 1970 1971 1972 my @new_line; 1973 while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) { 1974 #print STDERR "Format matches $1\n"; 1975 1976 if($2) { 1977 #print STDERR "SKIPPING <$2>\n"; 1978 push @new_line, 1979 substr($p->[$i-1], pos($formatting)-length($1), length($1)); 1980 } else { 1981 #print STDERR "SNARING $+\n"; 1982 push @new_line, [ 1983 ( 1984 $3 ? 'VerbatimB' : 1985 $4 ? 'VerbatimI' : 1986 $5 ? 'VerbatimBI' : die("Should never get called") 1987 ), {}, 1988 substr($p->[$i-1], pos($formatting)-length($1), length($1)) 1989 ]; 1990 #print STDERR "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n"; 1991 } 1992 } 1993 my @nixed = 1994 splice @$p, $i-1, 2, @new_line; # replace myself and the next line 1995 DEBUG > 10 and print STDERR "Nixed count: ", scalar(@nixed), "\n"; 1996 1997 DEBUG > 6 and print STDERR "New version of the above line is these tokens (", 1998 scalar(@new_line), "):", 1999 map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n"; 2000 $i--; # So the next line we scrutinize is the line before the one 2001 # that we just went and formatted 2002 } 2003 2004 $p->[0] = 'VerbatimFormatted'; 2005 2006 # Collapse adjacent text nodes, just for kicks. 2007 for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last 2008 if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) { 2009 DEBUG > 5 and print STDERR "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n"; 2010 $p->[$i] .= splice @$p, $i+1, 1; # merge 2011 --$i; # and back up 2012 } 2013 } 2014 2015 # Now look for the last text token, and remove the terminal newline 2016 for( my $i = $#$p; $i >= 2; $i-- ) { 2017 # work backwards over the tokens, even the first 2018 if( !ref($p->[$i]) ) { 2019 if($p->[$i] =~ s/\n$//s) { 2020 DEBUG > 5 and print STDERR "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n"; 2021 } else { 2022 DEBUG > 5 and print STDERR 2023 "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n"; 2024 } 2025 last; # we only want the next one 2026 } 2027 } 2028 2029 return; 2030} 2031 2032 2033#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 2034 2035 2036sub _treelet_from_formatting_codes { 2037 # Given a paragraph, returns a treelet. Full of scary tokenizing code. 2038 # Like [ '~Top', {'start_line' => $start_line}, 2039 # "I like ", 2040 # [ 'B', {}, "pie" ], 2041 # "!" 2042 # ] 2043 # This illustrates the general format of a treelet. It is an array: 2044 # [0] is a scalar indicating its type. In the example above, the 2045 # types are '~Top' and 'B' 2046 # [1] is a hash of various flags about it, possibly empty 2047 # [2] - [N] are an ordered list of the subcomponents of the treelet. 2048 # Scalars are literal text, refs are sub-treelets, to 2049 # arbitrary levels. Stringifying a treelet will recursively 2050 # stringify the sub-treelets, concatentating everything 2051 # together to form the exact text of the treelet. 2052 2053 my($self, $para, $start_line, $preserve_space) = @_; 2054 2055 my $treelet = ['~Top', {'start_line' => $start_line},]; 2056 2057 unless ($preserve_space || $self->{'preserve_whitespace'}) { 2058 $para =~ s/\s+/ /g; # collapse and trim all whitespace first. 2059 $para =~ s/ $//; 2060 $para =~ s/^ //; 2061 } 2062 2063 # Only apparent problem the above code is that N<< >> turns into 2064 # N<< >>. But then, word wrapping does that too! So don't do that! 2065 2066 2067 # As a Start-code is encountered, the number of opening bracket '<' 2068 # characters minus 1 is pushed onto @stack (so 0 means a single bracket, 2069 # etc). When closing brackets are found in the text, at least this number 2070 # (plus the 1) will be required to mean the Start-code is terminated. When 2071 # those are found, @stack is popped. 2072 my @stack; 2073 2074 my @lineage = ($treelet); 2075 my $raw = ''; # raw content of L<> fcode before splitting/processing 2076 # XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed 2077 # into just 1 ' '. Is this the regex's doing or 'raw's? Answer is it's 2078 # the 'collapse and trim all whitespace first' lines just above. 2079 my $inL = 0; 2080 2081 DEBUG > 4 and print STDERR "Paragraph:\n$para\n\n"; 2082 2083 # Here begins our frightening tokenizer RE. The following regex matches 2084 # text in four main parts: 2085 # 2086 # * Start-codes. The first alternative matches C< or C<<, the latter 2087 # followed by some whitespace. $1 will hold the entire start code 2088 # (including any space following a multiple-angle-bracket delimiter), 2089 # and $2 will hold only the additional brackets past the first in a 2090 # multiple-bracket delimiter. length($2) + 1 will be the number of 2091 # closing brackets we have to find. 2092 # 2093 # * Closing brackets. Match some amount of whitespace followed by 2094 # multiple close brackets. The logic to see if this closes anything 2095 # is down below. Note that in order to parse C<< >> correctly, we 2096 # have to use look-behind (?<=\s\s), since the match of the starting 2097 # code will have consumed the whitespace. 2098 # 2099 # * A single closing bracket, to close a simple code like C<>. 2100 # 2101 # * Something that isn't a start or end code. We have to be careful 2102 # about accepting whitespace, since perlpodspec says that any whitespace 2103 # before a multiple-bracket closing delimiter should be ignored. 2104 # 2105 while($para =~ 2106 m/\G 2107 (?: 2108 # Match starting codes, including the whitespace following a 2109 # multiple-delimiter start code. $1 gets the whole start code and 2110 # $2 gets all but one of the <s in the multiple-bracket case. 2111 ([A-Z]<(?:(<+)\s+)?) 2112 | 2113 # Match multiple-bracket end codes. $3 gets the whitespace that 2114 # should be discarded before an end bracket but kept in other cases 2115 # and $4 gets the end brackets themselves. ($3 can be empty if the 2116 # construct is empty, like C<< >>, and all the white-space has been 2117 # gobbled up already, considered to be space after the opening 2118 # bracket. In this case we use look-behind to verify that there are 2119 # at least 2 spaces in a row before the ">".) 2120 (\s+|(?<=\s\s))(>{2,}) 2121 | 2122 (\s?>) # $5: simple end-codes 2123 | 2124 ( # $6: stuff containing no start-codes or end-codes 2125 (?: 2126 [^A-Z\s>] 2127 | 2128 (?: 2129 [A-Z](?!<) 2130 ) 2131 | 2132 # whitespace is ok, but we don't want to eat the whitespace before 2133 # a multiple-bracket end code. 2134 # NOTE: we may still have problems with e.g. S<< >> 2135 (?: 2136 \s(?!\s*>{2,}) 2137 ) 2138 )+ 2139 ) 2140 ) 2141 /xgo 2142 ) { 2143 DEBUG > 4 and print STDERR "\nParagraphic tokenstack = (@stack)\n"; 2144 if(defined $1) { 2145 my $bracket_count; # How many '<<<' in a row this has. Needed for 2146 # Pod::Simple::JustPod 2147 if(defined $2) { 2148 DEBUG > 3 and print STDERR "Found complex start-text code \"$1\"\n"; 2149 $bracket_count = length($2) + 1; 2150 push @stack, $bracket_count; # length of the necessary complex 2151 # end-code string 2152 } else { 2153 DEBUG > 3 and print STDERR "Found simple start-text code \"$1\"\n"; 2154 push @stack, 0; # signal that we're looking for simple 2155 $bracket_count = 1; 2156 } 2157 my $code = substr($1,0,1); 2158 if ('L' eq $code) { 2159 if ($inL) { 2160 $raw .= $1; 2161 $self->scream( $start_line, 2162 'Nested L<> are illegal. Pretending inner one is ' 2163 . 'X<...> so can continue looking for other errors.'); 2164 $code = "X"; 2165 } 2166 else { 2167 $raw = ""; # reset raw content accumulator 2168 $inL = @stack; 2169 } 2170 } else { 2171 $raw .= $1 if $inL; 2172 } 2173 push @lineage, [ $code, {}, ]; # new node object 2174 2175 # Tell Pod::Simple::JustPod how many brackets there were, but to save 2176 # space, not in the most usual case of there was just 1. It can be 2177 # inferred by the absence of this element. Similarly, if there is more 2178 # than one bracket, extract the white space between the final bracket 2179 # and the real beginning of the interior. Save that if it isn't just a 2180 # single space 2181 if ($self->{'_output_is_for_JustPod'} && $bracket_count > 1) { 2182 $lineage[-1][1]{'~bracket_count'} = $bracket_count; 2183 my $lspacer = substr($1, 1 + $bracket_count); 2184 $lineage[-1][1]{'~lspacer'} = $lspacer if $lspacer ne " "; 2185 } 2186 push @{ $lineage[-2] }, $lineage[-1]; 2187 } elsif(defined $4) { 2188 DEBUG > 3 and print STDERR "Found apparent complex end-text code \"$3$4\"\n"; 2189 # This is where it gets messy... 2190 if(! @stack) { 2191 # We saw " >>>>" but needed nothing. This is ALL just stuff then. 2192 DEBUG > 4 and print STDERR " But it's really just stuff.\n"; 2193 push @{ $lineage[-1] }, $3, $4; 2194 next; 2195 } elsif(!$stack[-1]) { 2196 # We saw " >>>>" but needed only ">". Back pos up. 2197 DEBUG > 4 and print STDERR " And that's more than we needed to close simple.\n"; 2198 push @{ $lineage[-1] }, $3; # That was a for-real space, too. 2199 pos($para) = pos($para) - length($4) + 1; 2200 } elsif($stack[-1] == length($4)) { 2201 # We found " >>>>", and it was exactly what we needed. Commonest case. 2202 DEBUG > 4 and print STDERR " And that's exactly what we needed to close complex.\n"; 2203 } elsif($stack[-1] < length($4)) { 2204 # We saw " >>>>" but needed only " >>". Back pos up. 2205 DEBUG > 4 and print STDERR " And that's more than we needed to close complex.\n"; 2206 pos($para) = pos($para) - length($4) + $stack[-1]; 2207 } else { 2208 # We saw " >>>>" but needed " >>>>>>". So this is all just stuff! 2209 DEBUG > 4 and print STDERR " But it's really just stuff, because we needed more.\n"; 2210 push @{ $lineage[-1] }, $3, $4; 2211 next; 2212 } 2213 #print STDERR "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n"; 2214 2215 if ($3 ne " " && $self->{'_output_is_for_JustPod'}) { 2216 if ($3 ne "") { 2217 $lineage[-1][1]{'~rspacer'} = $3; 2218 } 2219 elsif ($lineage[-1][1]{'~lspacer'} eq " ") { 2220 2221 # Here we had something like C<< >> which was a false positive 2222 delete $lineage[-1][1]{'~lspacer'}; 2223 } 2224 else { 2225 $lineage[-1][1]{'~rspacer'} 2226 = substr($lineage[-1][1]{'~lspacer'}, -1, 1); 2227 chop $lineage[-1][1]{'~lspacer'}; 2228 } 2229 } 2230 2231 push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; 2232 # Keep the element from being childless 2233 2234 if ($inL == @stack) { 2235 $lineage[-1][1]{'raw'} = $raw; 2236 $inL = 0; 2237 } 2238 2239 pop @stack; 2240 pop @lineage; 2241 2242 $raw .= $3.$4 if $inL; 2243 2244 } elsif(defined $5) { 2245 DEBUG > 3 and print STDERR "Found apparent simple end-text code \"$5\"\n"; 2246 2247 if(@stack and ! $stack[-1]) { 2248 # We're indeed expecting a simple end-code 2249 DEBUG > 4 and print STDERR " It's indeed an end-code.\n"; 2250 2251 if(length($5) == 2) { # There was a space there: " >" 2252 push @{ $lineage[-1] }, ' '; 2253 } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element 2254 push @{ $lineage[-1] }, ''; # keep it from being really childless 2255 } 2256 2257 if ($inL == @stack) { 2258 $lineage[-1][1]{'raw'} = $raw; 2259 $inL = 0; 2260 } 2261 2262 pop @stack; 2263 pop @lineage; 2264 } else { 2265 DEBUG > 4 and print STDERR " It's just stuff.\n"; 2266 push @{ $lineage[-1] }, $5; 2267 } 2268 2269 $raw .= $5 if $inL; 2270 2271 } elsif(defined $6) { 2272 DEBUG > 3 and print STDERR "Found stuff \"$6\"\n"; 2273 push @{ $lineage[-1] }, $6; 2274 $raw .= $6 if $inL; 2275 # XXX does not capture multiplace whitespaces -- 'raw' ends up with 2276 # at most 1 leading/trailing whitespace, why not all of it? 2277 # Answer, because we deliberately trimmed it above 2278 2279 } else { 2280 # should never ever ever ever happen 2281 DEBUG and print STDERR "AYYAYAAAAA at line ", __LINE__, "\n"; 2282 die "SPORK 512512!"; 2283 } 2284 } 2285 2286 if(@stack) { # Uhoh, some sequences weren't closed. 2287 my $x= "..."; 2288 while(@stack) { 2289 push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; 2290 # Hmmmmm! 2291 2292 my $code = (pop @lineage)->[0]; 2293 my $ender_length = pop @stack; 2294 if($ender_length) { 2295 --$ender_length; 2296 $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length); 2297 } else { 2298 $x = $code . "<$x>"; 2299 } 2300 } 2301 DEBUG > 1 and print STDERR "Unterminated $x sequence\n"; 2302 $self->whine($start_line, 2303 "Unterminated $x sequence", 2304 ); 2305 } 2306 2307 return $treelet; 2308} 2309 2310#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 2311 2312sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol) 2313 return stringify_lol($_[1]); 2314} 2315 2316sub stringify_lol { # function: stringify_lol($lol) 2317 my $string_form = ''; 2318 _stringify_lol( $_[0] => \$string_form ); 2319 return $string_form; 2320} 2321 2322sub _stringify_lol { # the real recursor 2323 my($lol, $to) = @_; 2324 for(my $i = 2; $i < @$lol; ++$i) { 2325 if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) { 2326 _stringify_lol( $lol->[$i], $to); # recurse! 2327 } else { 2328 $$to .= $lol->[$i]; 2329 } 2330 } 2331 return; 2332} 2333 2334#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 2335 2336sub _dump_curr_open { # return a string representation of the stack 2337 my $curr_open = $_[0]{'curr_open'}; 2338 2339 return '[empty]' unless @$curr_open; 2340 return join '; ', 2341 map {; 2342 ($_->[0] eq '=for') 2343 ? ( ($_->[1]{'~really'} || '=over') 2344 . ' ' . $_->[1]{'target'}) 2345 : $_->[0] 2346 } 2347 @$curr_open 2348 ; 2349} 2350 2351########################################################################### 2352my %pretty_form = ( 2353 "\a" => '\a', # ding! 2354 "\b" => '\b', # BS 2355 "\e" => '\e', # ESC 2356 "\f" => '\f', # FF 2357 "\t" => '\t', # tab 2358 "\cm" => '\cm', 2359 "\cj" => '\cj', 2360 "\n" => '\n', # probably overrides one of either \cm or \cj 2361 '"' => '\"', 2362 '\\' => '\\\\', 2363 '$' => '\\$', 2364 '@' => '\\@', 2365 '%' => '\\%', 2366 '#' => '\\#', 2367); 2368 2369sub pretty { # adopted from Class::Classless 2370 # Not the most brilliant routine, but passable. 2371 # Don't give it a cyclic data structure! 2372 my @stuff = @_; # copy 2373 my $x; 2374 my $out = 2375 # join ",\n" . 2376 join ", ", 2377 map {; 2378 if(!defined($_)) { 2379 "undef"; 2380 } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') { 2381 $x = "[ " . pretty(@$_) . " ]" ; 2382 $x; 2383 } elsif(ref($_) eq 'SCALAR') { 2384 $x = "\\" . pretty($$_) ; 2385 $x; 2386 } elsif(ref($_) eq 'HASH') { 2387 my $hr = $_; 2388 $x = "{" . join(", ", 2389 map(pretty($_) . '=>' . pretty($hr->{$_}), 2390 sort keys %$hr ) ) . "}" ; 2391 $x; 2392 } elsif(!length($_)) { q{''} # empty string 2393 } elsif( 2394 $_ eq '0' # very common case 2395 or( 2396 m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s 2397 and $_ ne '-0' # the strange case that RE lets thru 2398 ) 2399 ) { $_; 2400 } else { 2401 # Yes, explicitly name every character desired. There are shorcuts one 2402 # could make, but I (Karl Williamson) was afraid that some Perl 2403 # releases would have bugs in some of them. For example [A-Z] works 2404 # even on EBCDIC platforms to match exactly the 26 uppercase English 2405 # letters, but I don't know if it has always worked without bugs. It 2406 # seemed safest just to list the characters. 2407 # s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])> 2408 s<([^ !"#'()*+,\-./0123456789:;\<=\>?ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]^_`abcdefghijklmnopqrstuvwxyz{|}~])> 2409 <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; 2410 #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; 2411 qq{"$_"}; 2412 } 2413 } @stuff; 2414 # $out =~ s/\n */ /g if length($out) < 75; 2415 return $out; 2416} 2417 2418#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 2419 2420# A rather unsubtle method of blowing away all the state information 2421# from a parser object so it can be reused. Provided as a utility for 2422# backward compatibility in Pod::Man, etc. but not recommended for 2423# general use. 2424 2425sub reinit { 2426 my $self = shift; 2427 foreach (qw(source_dead source_filename doc_has_started 2428start_of_pod_block content_seen last_was_blank paras curr_open 2429line_count pod_para_count in_pod ~tried_gen_errata all_errata errata errors_seen 2430Title)) { 2431 2432 delete $self->{$_}; 2433 } 2434} 2435 2436#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 24371; 2438 2439