1package re; 2 3# pragma for controlling the regexp engine 4use strict; 5use warnings; 6 7our $VERSION = "0.47"; 8our @ISA = qw(Exporter); 9our @EXPORT_OK = qw{ 10 is_regexp regexp_pattern 11 regname regnames regnames_count 12 regmust optimization 13}; 14our %EXPORT_OK = map { $_ => 1 } @EXPORT_OK; 15 16my %bitmask = ( 17 taint => 0x00100000, # HINT_RE_TAINT 18 eval => 0x00200000, # HINT_RE_EVAL 19); 20 21my $flags_hint = 0x02000000; # HINT_RE_FLAGS 22my $PMMOD_SHIFT = 0; 23my %reflags = ( 24 m => 1 << ($PMMOD_SHIFT + 0), 25 s => 1 << ($PMMOD_SHIFT + 1), 26 i => 1 << ($PMMOD_SHIFT + 2), 27 x => 1 << ($PMMOD_SHIFT + 3), 28 xx => 1 << ($PMMOD_SHIFT + 4), 29 n => 1 << ($PMMOD_SHIFT + 5), 30 p => 1 << ($PMMOD_SHIFT + 6), 31 strict => 1 << ($PMMOD_SHIFT + 10), 32# special cases: 33 d => 0, 34 l => 1, 35 u => 2, 36 a => 3, 37 aa => 4, 38); 39 40sub setcolor { 41 eval { # Ignore errors 42 require Term::Cap; 43 44 my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning. 45 my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue'; 46 my @props = split /,/, $props; 47 my $colors = join "\t", map {$terminal->Tputs($_,1)} @props; 48 49 $colors =~ s/\0//g; 50 $ENV{PERL_RE_COLORS} = $colors; 51 }; 52 if ($@) { 53 $ENV{PERL_RE_COLORS} ||= qq'\t\t> <\t> <\t\t'; 54 } 55 56} 57 58my %flags = ( 59 COMPILE => 0x0000FF, 60 PARSE => 0x000001, 61 OPTIMISE => 0x000002, 62 TRIEC => 0x000004, 63 DUMP => 0x000008, 64 FLAGS => 0x000010, 65 TEST => 0x000020, 66 67 EXECUTE => 0x00FF00, 68 INTUIT => 0x000100, 69 MATCH => 0x000200, 70 TRIEE => 0x000400, 71 72 EXTRA => 0x3FF0000, 73 TRIEM => 0x0010000, 74 STATE => 0x0080000, 75 OPTIMISEM => 0x0100000, 76 STACK => 0x0280000, 77 BUFFERS => 0x0400000, 78 GPOS => 0x0800000, 79 DUMP_PRE_OPTIMIZE => 0x1000000, 80 WILDCARD => 0x2000000, 81); 82$flags{ALL} = -1 & ~($flags{BUFFERS} 83 |$flags{DUMP_PRE_OPTIMIZE} 84 |$flags{WILDCARD} 85 ); 86$flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE}; 87$flags{Extra} = $flags{EXECUTE} | $flags{COMPILE} | $flags{GPOS}; 88$flags{More} = $flags{MORE} = 89 $flags{All} | $flags{TRIEC} | $flags{TRIEM} | $flags{STATE}; 90$flags{State} = $flags{DUMP} | $flags{EXECUTE} | $flags{STATE}; 91$flags{TRIE} = $flags{DUMP} | $flags{EXECUTE} | $flags{TRIEC}; 92 93if (defined &DynaLoader::boot_DynaLoader) { 94 require XSLoader; 95 XSLoader::load(); 96} 97# else we're miniperl 98# We need to work for miniperl, because the XS toolchain uses Text::Wrap, which 99# uses re 'taint'. 100 101sub _load_unload { 102 my ($on)= @_; 103 if ($on) { 104 # We call install() every time, as if we didn't, we wouldn't 105 # "see" any changes to the color environment var since 106 # the last time it was called. 107 108 # install() returns an integer, which if casted properly 109 # in C resolves to a structure containing the regexp 110 # hooks. Setting it to a random integer will guarantee 111 # segfaults. 112 $^H{regcomp} = install(); 113 } else { 114 delete $^H{regcomp}; 115 } 116} 117 118sub bits { 119 my $on = shift; 120 my $bits = 0; 121 my $turning_all_off = ! @_ && ! $on; 122 my $seen_Debug = 0; 123 my $seen_debug = 0; 124 if ($turning_all_off) { 125 126 # Pretend were called with certain parameters, which are best dealt 127 # with that way. 128 push @_, keys %bitmask; # taint and eval 129 push @_, 'strict'; 130 } 131 132 # Process each subpragma parameter 133 ARG: 134 foreach my $idx (0..$#_){ 135 my $s=$_[$idx]; 136 if ($s eq 'Debug' or $s eq 'Debugcolor') { 137 if (! $seen_Debug) { 138 $seen_Debug = 1; 139 140 # Reset to nothing, and then add what follows. $seen_Debug 141 # allows, though unlikely someone would do it, more than one 142 # Debug and flags in the arguments 143 ${^RE_DEBUG_FLAGS} = 0; 144 } 145 setcolor() if $s =~/color/i; 146 for my $idx ($idx+1..$#_) { 147 if ($flags{$_[$idx]}) { 148 if ($on) { 149 ${^RE_DEBUG_FLAGS} |= $flags{$_[$idx]}; 150 } else { 151 ${^RE_DEBUG_FLAGS} &= ~ $flags{$_[$idx]}; 152 } 153 } else { 154 require Carp; 155 Carp::carp("Unknown \"re\" Debug flag '$_[$idx]', possible flags: ", 156 join(", ",sort keys %flags ) ); 157 } 158 } 159 _load_unload($on ? 1 : ${^RE_DEBUG_FLAGS}); 160 last; 161 } elsif ($s eq 'debug' or $s eq 'debugcolor') { 162 163 # These default flags should be kept in sync with the same values 164 # in regcomp.h 165 ${^RE_DEBUG_FLAGS} = $flags{'EXECUTE'} | $flags{'DUMP'}; 166 setcolor() if $s =~/color/i; 167 _load_unload($on); 168 $seen_debug = 1; 169 } elsif (exists $bitmask{$s}) { 170 $bits |= $bitmask{$s}; 171 } elsif ($EXPORT_OK{$s}) { 172 require Exporter; 173 re->export_to_level(2, 're', $s); 174 } elsif ($s eq 'strict') { 175 if ($on) { 176 $^H{reflags} |= $reflags{$s}; 177 warnings::warnif('experimental::re_strict', 178 "\"use re 'strict'\" is experimental"); 179 180 # Turn on warnings if not already done. 181 if (! warnings::enabled('regexp')) { 182 require warnings; 183 warnings->import('regexp'); 184 $^H{re_strict} = 1; 185 } 186 } 187 else { 188 $^H{reflags} &= ~$reflags{$s} if $^H{reflags}; 189 190 # Turn off warnings if we turned them on. 191 warnings->unimport('regexp') if $^H{re_strict}; 192 } 193 if ($^H{reflags}) { 194 $^H |= $flags_hint; 195 } 196 else { 197 $^H &= ~$flags_hint; 198 } 199 } elsif ($s =~ s/^\///) { 200 my $reflags = $^H{reflags} || 0; 201 my $seen_charset; 202 my $x_count = 0; 203 while ($s =~ m/( . )/gx) { 204 local $_ = $1; 205 if (/[adul]/) { 206 # The 'a' may be repeated; hide this from the rest of the 207 # code by counting and getting rid of all of them, then 208 # changing to 'aa' if there is a repeat. 209 if ($_ eq 'a') { 210 my $sav_pos = pos $s; 211 my $a_count = $s =~ s/a//g; 212 pos $s = $sav_pos - 1; # -1 because got rid of the 'a' 213 if ($a_count > 2) { 214 require Carp; 215 Carp::carp( 216 qq 'The "a" flag may only appear a maximum of twice' 217 ); 218 } 219 elsif ($a_count == 2) { 220 $_ = 'aa'; 221 } 222 } 223 if ($on) { 224 if ($seen_charset) { 225 require Carp; 226 if ($seen_charset ne $_) { 227 Carp::carp( 228 qq 'The "$seen_charset" and "$_" flags ' 229 .qq 'are exclusive' 230 ); 231 } 232 else { 233 Carp::carp( 234 qq 'The "$seen_charset" flag may not appear ' 235 .qq 'twice' 236 ); 237 } 238 } 239 $^H{reflags_charset} = $reflags{$_}; 240 $seen_charset = $_; 241 } 242 else { 243 delete $^H{reflags_charset} 244 if defined $^H{reflags_charset} 245 && $^H{reflags_charset} == $reflags{$_}; 246 } 247 } elsif (exists $reflags{$_}) { 248 if ($_ eq 'x') { 249 $x_count++; 250 if ($x_count > 2) { 251 require Carp; 252 Carp::carp( 253 qq 'The "x" flag may only appear a maximum of twice' 254 ); 255 } 256 elsif ($x_count == 2) { 257 $_ = 'xx'; # First time through got the /x 258 } 259 } 260 261 $on 262 ? $reflags |= $reflags{$_} 263 : ($reflags &= ~$reflags{$_}); 264 } else { 265 require Carp; 266 Carp::carp( 267 qq'Unknown regular expression flag "$_"' 268 ); 269 next ARG; 270 } 271 } 272 ($^H{reflags} = $reflags or defined $^H{reflags_charset}) 273 ? $^H |= $flags_hint 274 : ($^H &= ~$flags_hint); 275 } else { 276 require Carp; 277 if ($seen_debug && defined $flags{$s}) { 278 Carp::carp("Use \"Debug\" not \"debug\", to list debug types" 279 . " in \"re\". \"$s\" ignored"); 280 } 281 else { 282 Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ", 283 join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask), 284 ")"); 285 } 286 } 287 } 288 289 if ($turning_all_off) { 290 _load_unload(0); 291 $^H{reflags} = 0; 292 $^H{reflags_charset} = 0; 293 $^H &= ~$flags_hint; 294 } 295 296 $bits; 297} 298 299sub import { 300 shift; 301 $^H |= bits(1, @_); 302} 303 304sub unimport { 305 shift; 306 $^H &= ~ bits(0, @_); 307} 308 3091; 310 311__END__ 312 313=head1 NAME 314 315re - Perl pragma to alter regular expression behaviour 316 317=head1 SYNOPSIS 318 319 use re 'taint'; 320 ($x) = ($^X =~ /^(.*)$/s); # $x is tainted here 321 322 $pat = '(?{ $foo = 1 })'; 323 use re 'eval'; 324 /foo${pat}bar/; # won't fail (when not under -T 325 # switch) 326 327 { 328 no re 'taint'; # the default 329 ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here 330 331 no re 'eval'; # the default 332 /foo${pat}bar/; # disallowed (with or without -T 333 # switch) 334 } 335 336 use re 'strict'; # Raise warnings for more conditions 337 338 use re '/ix'; 339 "FOO" =~ / foo /; # /ix implied 340 no re '/x'; 341 "FOO" =~ /foo/; # just /i implied 342 343 use re 'debug'; # output debugging info during 344 /^(.*)$/s; # compile and run time 345 346 347 use re 'debugcolor'; # same as 'debug', but with colored 348 # output 349 ... 350 351 use re qw(Debug All); # Same as "use re 'debug'", but you 352 # can use "Debug" with things other 353 # than 'All' 354 use re qw(Debug More); # 'All' plus output more details 355 no re qw(Debug ALL); # Turn on (almost) all re debugging 356 # in this scope 357 358 use re qw(is_regexp regexp_pattern); # import utility functions 359 my ($pat,$mods)=regexp_pattern(qr/foo/i); 360 if (is_regexp($obj)) { 361 print "Got regexp: ", 362 scalar regexp_pattern($obj); # just as perl would stringify 363 } # it but no hassle with blessed 364 # re's. 365 366(We use $^X in these examples because it's tainted by default.) 367 368=head1 DESCRIPTION 369 370=head2 'taint' mode 371 372When C<use re 'taint'> is in effect, and a tainted string is the target 373of a regexp, the regexp memories (or values returned by the m// operator 374in list context) are tainted. This feature is useful when regexp operations 375on tainted data aren't meant to extract safe substrings, but to perform 376other transformations. 377 378=head2 'eval' mode 379 380When C<use re 'eval'> is in effect, a regexp is allowed to contain 381C<(?{ ... })> zero-width assertions and C<(??{ ... })> postponed 382subexpressions that are derived from variable interpolation, rather than 383appearing literally within the regexp. That is normally disallowed, since 384it is a 385potential security risk. Note that this pragma is ignored when the regular 386expression is obtained from tainted data, i.e. evaluation is always 387disallowed with tainted regular expressions. See L<perlre/(?{ code })> 388and L<perlre/(??{ code })>. 389 390For the purpose of this pragma, interpolation of precompiled regular 391expressions (i.e., the result of C<qr//>) is I<not> considered variable 392interpolation. Thus: 393 394 /foo${pat}bar/ 395 396I<is> allowed if $pat is a precompiled regular expression, even 397if $pat contains C<(?{ ... })> assertions or C<(??{ ... })> subexpressions. 398 399=head2 'strict' mode 400 401Note that this is an experimental feature which may be changed or removed in a 402future Perl release. 403 404When C<use re 'strict'> is in effect, stricter checks are applied than 405otherwise when compiling regular expressions patterns. These may cause more 406warnings to be raised than otherwise, and more things to be fatal instead of 407just warnings. The purpose of this is to find and report at compile time some 408things, which may be legal, but have a reasonable possibility of not being the 409programmer's actual intent. This automatically turns on the C<"regexp"> 410warnings category (if not already on) within its scope. 411 412As an example of something that is caught under C<"strict'>, but not 413otherwise, is the pattern 414 415 qr/\xABC/ 416 417The C<"\x"> construct without curly braces should be followed by exactly two 418hex digits; this one is followed by three. This currently evaluates as 419equivalent to 420 421 qr/\x{AB}C/ 422 423that is, the character whose code point value is C<0xAB>, followed by the 424letter C<C>. But since C<C> is a hex digit, there is a reasonable chance 425that the intent was 426 427 qr/\x{ABC}/ 428 429that is the single character at C<0xABC>. Under C<'strict'> it is an error to 430not follow C<\x> with exactly two hex digits. When not under C<'strict'> a 431warning is generated if there is only one hex digit, and no warning is raised 432if there are more than two. 433 434It is expected that what exactly C<'strict'> does will evolve over time as we 435gain experience with it. This means that programs that compile under it in 436today's Perl may not compile, or may have more or fewer warnings, in future 437Perls. There is no backwards compatibility promises with regards to it. Also 438there are already proposals for an alternate syntax for enabling it. For 439these reasons, using it will raise a C<experimental::re_strict> class warning, 440unless that category is turned off. 441 442Note that if a pattern compiled within C<'strict'> is recompiled, say by 443interpolating into another pattern, outside of C<'strict'>, it is not checked 444again for strictness. This is because if it works under strict it must work 445under non-strict. 446 447=head2 '/flags' mode 448 449When C<use re '/I<flags>'> is specified, the given I<flags> are automatically 450added to every regular expression till the end of the lexical scope. 451I<flags> can be any combination of 452C<'a'>, 453C<'aa'>, 454C<'d'>, 455C<'i'>, 456C<'l'>, 457C<'m'>, 458C<'n'>, 459C<'p'>, 460C<'s'>, 461C<'u'>, 462C<'x'>, 463and/or 464C<'xx'>. 465 466C<no re '/I<flags>'> will turn off the effect of C<use re '/I<flags>'> for the 467given flags. 468 469For example, if you want all your regular expressions to have /msxx on by 470default, simply put 471 472 use re '/msxx'; 473 474at the top of your code. 475 476The character set C</adul> flags cancel each other out. So, in this example, 477 478 use re "/u"; 479 "ss" =~ /\xdf/; 480 use re "/d"; 481 "ss" =~ /\xdf/; 482 483the second C<use re> does an implicit C<no re '/u'>. 484 485Similarly, 486 487 use re "/xx"; # Doubled-x 488 ... 489 use re "/x"; # Single x from here on 490 ... 491 492Turning on one of the character set flags with C<use re> takes precedence over the 493C<locale> pragma and the 'unicode_strings' C<feature>, for regular 494expressions. Turning off one of these flags when it is active reverts to 495the behaviour specified by whatever other pragmata are in scope. For 496example: 497 498 use feature "unicode_strings"; 499 no re "/u"; # does nothing 500 use re "/l"; 501 no re "/l"; # reverts to unicode_strings behaviour 502 503Default flags are applied to wherever a pattern is compiled with the exception 504of the C</x> flag, which is not applied to patterns compiled from string arguments 505to C<split>. Thus `use re "/x";` does not affect the behaviour of C<split " "> but 506B<does> affect the behavior of C<split / />. 507 508=head2 'debug' mode 509 510When C<use re 'debug'> is in effect, perl emits debugging messages when 511compiling and using regular expressions. The output is the same as that 512obtained by running a C<-DDEBUGGING>-enabled perl interpreter with the 513B<-Dr> switch. It may be quite voluminous depending on the complexity 514of the match. Using C<debugcolor> instead of C<debug> enables a 515form of output that can be used to get a colorful display on terminals 516that understand termcap color sequences. Set C<$ENV{PERL_RE_TC}> to a 517comma-separated list of C<termcap> properties to use for highlighting 518strings on/off, pre-point part on/off. 519See L<perldebug/"Debugging Regular Expressions"> for additional info. 520 521B<NOTE> that the exact format of the C<debug> mode is B<NOT> considered 522to be an officially supported API of Perl. It is intended for debugging 523only and may change as the core development team deems appropriate 524without notice or deprecation in any release of Perl, major or minor. 525Any documentation of the output is purely advisory. 526 527As of 5.9.5 the directive C<use re 'debug'> and its equivalents are 528lexically scoped, as the other directives are. However they have both 529compile-time and run-time effects. 530 531See L<perlmodlib/Pragmatic Modules>. 532 533=head2 'Debug' mode 534 535Similarly C<use re 'Debug'> produces debugging output, the difference 536being that it allows the fine tuning of what debugging output will be 537emitted. Options are divided into three groups, those related to 538compilation, those related to execution and those related to special 539purposes. 540 541B<NOTE> that the options provided under the C<Debug> mode and the exact 542format of the output they create is B<NOT> considered to be an 543officially supported API of Perl. It is intended for debugging only and 544may change as the core development team deems appropriate without notice 545or deprecation in any release of Perl, major or minor. Any documentation 546of the format or options available is advisory only and is subject to 547change without notice. 548 549The options are as follows: 550 551=over 4 552 553=item Compile related options 554 555=over 4 556 557=item COMPILE 558 559Turns on all non-extra compile related debug options. 560 561=item PARSE 562 563Turns on debug output related to the process of parsing the pattern. 564 565=item OPTIMISE 566 567Enables output related to the optimisation phase of compilation. 568 569=item TRIEC 570 571Detailed info about trie compilation. 572 573=item DUMP 574 575Dump the final program out after it is compiled and optimised. 576 577=item FLAGS 578 579Dump the flags associated with the program 580 581=item TEST 582 583Print output intended for testing the internals of the compile process 584 585=back 586 587=item Execute related options 588 589=over 4 590 591=item EXECUTE 592 593Turns on all non-extra execute related debug options. 594 595=item MATCH 596 597Turns on debugging of the main matching loop. 598 599=item TRIEE 600 601Extra debugging of how tries execute. 602 603=item INTUIT 604 605Enable debugging of start-point optimisations. 606 607=back 608 609=item Extra debugging options 610 611=over 4 612 613=item EXTRA 614 615Turns on all "extra" debugging options. 616 617=item BUFFERS 618 619Enable debugging the capture group storage during match. Warning, 620this can potentially produce extremely large output. 621 622=item TRIEM 623 624Enable enhanced TRIE debugging. Enhances both TRIEE 625and TRIEC. 626 627=item STATE 628 629Enable debugging of states in the engine. 630 631=item STACK 632 633Enable debugging of the recursion stack in the engine. Enabling 634or disabling this option automatically does the same for debugging 635states as well. This output from this can be quite large. 636 637=item GPOS 638 639Enable debugging of the \G modifier. 640 641=item OPTIMISEM 642 643Enable enhanced optimisation debugging and start-point optimisations. 644Probably not useful except when debugging the regexp engine itself. 645 646=item DUMP_PRE_OPTIMIZE 647 648Enable the dumping of the compiled pattern before the optimization phase. 649 650=item WILDCARD 651 652When Perl encounters a wildcard subpattern, (see L<perlunicode/Wildcards in 653Property Values>), it suspends compilation of the main pattern, compiles the 654subpattern, and then matches that against all legal possibilities to determine 655the actual code points the subpattern matches. After that it adds these to 656the main pattern, and continues its compilation. 657 658You may very well want to see how your subpattern gets compiled, but it is 659likely of less use to you to see how Perl matches that against all the legal 660possibilities, as that is under control of Perl, not you. Therefore, the 661debugging information of the compilation portion is as specified by the other 662options, but the debugging output of the matching portion is normally 663suppressed. 664 665You can use the WILDCARD option to enable the debugging output of this 666subpattern matching. Careful! This can lead to voluminous outputs, and it 667may not make much sense to you what and why Perl is doing what it is. 668But it may be helpful to you to see why things aren't going the way you 669expect. 670 671Note that this option alone doesn't cause any debugging information to be 672output. What it does is stop the normal suppression of execution-related 673debugging information during the matching portion of the compilation of 674wildcards. You also have to specify which execution debugging information you 675want, such as by also including the EXECUTE option. 676 677=back 678 679=item Other useful flags 680 681These are useful shortcuts to save on the typing. 682 683=over 4 684 685=item ALL 686 687Enable all options at once except BUFFERS, WILDCARD, and DUMP_PRE_OPTIMIZE. 688(To get every single option without exception, use both ALL and EXTRA, or 689starting in 5.30 on a C<-DDEBUGGING>-enabled perl interpreter, use 690the B<-Drv> command-line switches.) 691 692=item All 693 694Enable DUMP and all non-extra execute options. Equivalent to: 695 696 use re 'debug'; 697 698=item MORE 699 700=item More 701 702Enable the options enabled by "All", plus STATE, TRIEC, and TRIEM. 703 704=back 705 706=back 707 708As of 5.9.5 the directive C<use re 'debug'> and its equivalents are 709lexically scoped, as are the other directives. However they have both 710compile-time and run-time effects. 711 712=head2 Exportable Functions 713 714As of perl 5.9.5, the C<re> module contains a number of utility functions that 715may be optionally exported into the caller's namespace. They are listed below. 716 717=over 4 718 719=item is_regexp($ref) 720 721Returns true if the argument is a compiled regular expression as returned 722by C<qr//>, false if it is not. 723 724This function will not be confused by overloading or blessing. In 725internals terms, this extracts the regexp pointer out of the 726PERL_MAGIC_qr structure so it cannot be fooled. 727 728=item regexp_pattern($ref) 729 730If the argument is a compiled regular expression as returned by C<qr//>, 731then this function returns the pattern. 732 733In list context it returns a two element list, the first element 734containing the pattern and the second containing the modifiers used when 735the pattern was compiled. 736 737 my ($pat, $mods) = regexp_pattern($ref); 738 739In scalar context it returns the same as perl would when stringifying a raw 740C<qr//> with the same pattern inside. If the argument is not a compiled 741reference then this routine returns false but defined in scalar context, 742and the empty list in list context. Thus the following 743 744 if (regexp_pattern($ref) eq '(?^i:foo)') 745 746will be warning free regardless of what $ref actually is. 747 748Like C<is_regexp> this function will not be confused by overloading 749or blessing of the object. 750 751=item regname($name,$all) 752 753Returns the contents of a named buffer of the last successful match. If 754$all is true, then returns an array ref containing one entry per buffer, 755otherwise returns the first defined buffer. 756 757=item regnames($all) 758 759Returns a list of all of the named buffers defined in the last successful 760match. If $all is true, then it returns all names defined, if not it returns 761only names which were involved in the match. 762 763=item regnames_count() 764 765Returns the number of distinct names defined in the pattern used 766for the last successful match. 767 768B<Note:> this result is always the actual number of distinct 769named buffers defined, it may not actually match that which is 770returned by C<regnames()> and related routines when those routines 771have not been called with the $all parameter set. 772 773=item regmust($ref) 774 775If the argument is a compiled regular expression as returned by C<qr//>, 776then this function returns what the optimiser considers to be the longest 777anchored fixed string and longest floating fixed string in the pattern. 778 779A I<fixed string> is defined as being a substring that must appear for the 780pattern to match. An I<anchored fixed string> is a fixed string that must 781appear at a particular offset from the beginning of the match. A I<floating 782fixed string> is defined as a fixed string that can appear at any point in 783a range of positions relative to the start of the match. For example, 784 785 my $qr = qr/here .* there/x; 786 my ($anchored, $floating) = regmust($qr); 787 print "anchored:'$anchored'\nfloating:'$floating'\n"; 788 789results in 790 791 anchored:'here' 792 floating:'there' 793 794Because the C<here> is before the C<.*> in the pattern, its position 795can be determined exactly. That's not true, however, for the C<there>; 796it could appear at any point after where the anchored string appeared. 797Perl uses both for its optimisations, preferring the longer, or, if they are 798equal, the floating. 799 800B<NOTE:> This may not necessarily be the definitive longest anchored and 801floating string. This will be what the optimiser of the Perl that you 802are using thinks is the longest. If you believe that the result is wrong 803please report it via the L<perlbug> utility. 804 805=item optimization($ref) 806 807If the argument is a compiled regular expression as returned by C<qr//>, 808then this function returns a hashref of the optimization information 809discovered at compile time, so we can write tests around it. If any 810other argument is given, returns C<undef>. 811 812The hash contents are expected to change from time to time as we develop 813new ways to optimize - no assumption of stability should be made, not 814even between minor versions of perl. 815 816For the current version, the hash will have the following contents: 817 818=over 4 819 820=item minlen 821 822An integer, the least number of characters in any string that can match. 823 824=item minlenret 825 826An integer, the least number of characters that can be in C<$&> after a 827match. (Consider eg C< /ns(?=\d)/ >.) 828 829=item gofs 830 831An integer, the number of characters before C<pos()> to start match at. 832 833=item noscan 834 835A boolean, C<TRUE> to indicate that any anchored/floating substrings 836found should not be used. (CHECKME: apparently this is set for an 837anchored pattern with no floating substring, but never used.) 838 839=item isall 840 841A boolean, C<TRUE> to indicate that the optimizer information is all 842that the regular expression contains, and thus one does not need to 843enter the regexp runtime engine at all. 844 845=item anchor SBOL 846 847A boolean, C<TRUE> if the pattern is anchored to start of string. 848 849=item anchor MBOL 850 851A boolean, C<TRUE> if the pattern is anchored to any start of line 852within the string. 853 854=item anchor GPOS 855 856A boolean, C<TRUE> if the pattern is anchored to the end of the previous 857match. 858 859=item skip 860 861A boolean, C<TRUE> if the start class can match only the first of a run. 862 863=item implicit 864 865A boolean, C<TRUE> if a C</.*/> has been turned implicitly into a C</^.*/>. 866 867=item anchored/floating 868 869A byte string representing an anchored or floating substring respectively 870that any match must contain, or undef if no such substring was found, or 871if the substring would require utf8 to represent. 872 873=item anchored utf8/floating utf8 874 875A utf8 string representing an anchored or floating substring respectively 876that any match must contain, or undef if no such substring was found, or 877if the substring contains only 7-bit ASCII characters. 878 879=item anchored min offset/floating min offset 880 881An integer, the first offset in characters from a match location at which 882we should look for the corresponding substring. 883 884=item anchored max offset/floating max offset 885 886An integer, the last offset in characters from a match location at which 887we should look for the corresponding substring. 888 889Ignored for anchored, so may be 0 or same as min. 890 891=item anchored end shift/floating end shift 892 893FIXME: not sure what this is, something to do with lookbehind. regcomp.c 894says: 895 When the final pattern is compiled and the data is moved from the 896 scan_data_t structure into the regexp structure the information 897 about lookbehind is factored in, with the information that would 898 have been lost precalculated in the end_shift field for the 899 associated string. 900 901=item checking 902 903A constant string, one of "anchored", "floating" or "none" to indicate 904which substring (if any) should be checked for first. 905 906=item stclass 907 908A string representation of a character class ("start class") that must 909be the first character of any match. 910 911TODO: explain the representations. 912 913=back 914 915=back 916 917=head1 SEE ALSO 918 919L<perlmodlib/Pragmatic Modules>. 920 921=cut 922