xref: /openbsd-src/gnu/usr.bin/perl/t/re/opt.t (revision 256a93a44f36679bee503f12e49566c2183f6181)
1#!./perl
2#
3# ex: set ts=8 sts=4 sw=4 et:
4#
5# Here we test for optimizations in the regexp engine.
6# We try to distinguish between "nice to have" optimizations and those
7# we consider essential: failure of the latter should be considered bugs,
8# while failure of the former should at worst be TODO.
9#
10# Format of data lines is tab-separated: pattern, minlen, anchored, floating,
11# other-options, comment.
12# - pattern will be subject to string eval as "qr{$pattern}".
13# - minlen is a non-negative integer.
14# - anchored/floating are of the form "u23:45+string". If initial "u" is
15#   present we expect a utf8 substring, else a byte substring; subsequent
16#   digits are the min offset; optional /:\d+/ is the max offset (not
17#   supported for anchored; assumed undef if not present for floating);
18#   subsequent '-' or '+' indicates if this is the substring being checked;
19#   "string" is the substring to expect. Use "-" for the whole entry to
20#   indicate no substring of this type.
21# - other-options is a comma-separated list of bare flags or option=value
22#   strings. Those with an initial "T" mark the corresponding test TODO.
23#   Booleans (noscan, isall, skip, implicit, anchor SBOL, anchor MBOL,
24#   anchor GPOS) are expected false if not mentioned, expected true if
25#   supplied as bare flags. stclass may be supplied as a pattern match
26#   as eg "stclass=~^ANYOF".
27# - as a special-case, minlenret is expected to be the same as minlen
28#   unless specified in other-options.
29#
30
31use strict;
32use warnings;
33use 5.010;
34
35$| = 1;
36
37BEGIN {
38    chdir 't' if -d 't';
39    require './test.pl';
40    set_up_inc('../lib');
41    skip_all_if_miniperl("no dynamic loading on miniperl, no re::optimization");
42}
43
44no warnings qw{ experimental };
45use feature qw{ refaliasing declared_refs };
46our \$TODO = \$::TODO;
47
48use re ();
49
50while (<DATA>) {
51    chomp;
52    if (m{^\s*(?:#|\z)}) {
53        # skip blank/comment lines
54        next;
55    }
56    my($pat, $minlen, $anchored, $floating, $other, $comment) = split /\t/;
57    my %todo;
58    my %opt = map {
59        my($k, $v) = split /=/, $_, 2;
60        ($k =~ s/^T//) ? do { $todo{$k} = $v; () } : ($k => $v);
61    } split /,/, $other // '';
62    $comment = (defined $comment && length $comment)
63        ? "$pat ($comment):"
64        : "$pat:";
65
66    my $o = re::optimization(eval "qr{$pat}");
67    ok($o, "$comment compiled ok");
68
69    my $skip = $o ? undef : "could not get info for qr{$pat}";
70    my $test = 0;
71
72    my($got, $expect) = ($o->{minlen}, $minlen);
73    if (exists $todo{minlen}) {
74        ++$test;
75        $skip || ok($got >= $expect, "$comment minlen $got >= $expect");
76        my $todo = $todo{minlen};
77        local $TODO = 1;
78        $skip || is($got, $todo, "$comment minlen $got = $todo");
79    } else {
80        ++$test;
81        $skip || is($got, $expect, "$comment minlen $got = $expect");
82    }
83
84    ($got, $expect) = ($o->{minlenret}, $opt{minlenret} // $minlen);
85    if (exists $todo{minlenret}) {
86        ++$test;
87        $skip || ok($got >= $expect, "$comment minlenret $got >= $expect");
88        my $todo = $todo{minlenret};
89        local $TODO = 1;
90        $skip || is($got, $todo, "$comment minlenret $got = $todo");
91    } else {
92        ++$test;
93        $skip || is($got, $expect, "$comment minlenret $got = $expect");
94    }
95
96    my($autf, $aoff, $acheck, $astr) = ($anchored =~ m{
97        ^ (u?) (\d*) ([-+]) (.*) \z
98    }sx) or die "Can't parse anchored test '$anchored'";
99    if ($autf eq 'u') {
100        ++$test;
101        $skip || is($o->{anchored}, undef, "$comment no anchored");
102        ++$test;
103        local $TODO = 1 if exists $todo{'anchored utf8'};
104        $skip || is($o->{'anchored utf8'}, $astr, "$comment got anchored utf8");
105    } elsif (length $astr) {
106        ++$test;
107        $skip || is($o->{anchored_utf8}, undef, "$comment no anchored utf8");
108        ++$test;
109        local $TODO = 1 if exists $todo{anchored};
110        $skip || is($o->{anchored}, $astr, "$comment got anchored");
111    } else {
112        ++$test;
113        $skip || is($o->{anchored}, undef, "$comment no anchored");
114        ++$test;
115        $skip || is($o->{anchored_utf8}, undef, "$comment no anchored utf8");
116    }
117    # skip offset checks if we failed to find a string
118    my $local_skip = (
119        !$skip && !defined($o->{anchored} // $o->{anchored_utf8})
120    ) ? 'no anchored string' : undef;
121    if (length $aoff) {
122        ++$test;
123        SKIP: {
124            skip($local_skip) if $local_skip;
125            local $TODO = 1 if exists $todo{'anchored min offset'};
126            $skip || is($o->{'anchored min offset'}, $aoff,
127                    "$comment anchored min offset");
128        }
129        # we don't care about anchored max: it may be set same as min or 0
130    }
131
132    my($futf, $fmin, $fmax, $fcheck, $fstr) = ($floating =~ m{
133        ^ (u?) (\d*) (?: : (\d*) )? ([-+]) (.*) \z
134    }sx) or die "Can't parse floating test '$floating'";
135    if ($futf eq 'u') {
136        ++$test;
137        $skip || is($o->{floating}, undef, "$comment no floating");
138        ++$test;
139        local $TODO = 1 if exists $todo{'floating utf8'};
140        $skip || is($o->{'floating utf8'}, $fstr, "$comment got floating utf8");
141    } elsif (length $fstr) {
142        ++$test;
143        $skip || is($o->{floating_utf8}, undef, "$comment no floating utf8");
144        ++$test;
145        local $TODO = 1 if exists $todo{floating};
146        $skip || is($o->{floating}, $fstr, "$comment got floating");
147    } else {
148        ++$test;
149        $skip || is($o->{floating}, undef, "$comment no floating");
150        ++$test;
151        $skip || is($o->{floating_utf8}, undef, "$comment no floating utf8");
152    }
153    # skip offset checks if we failed to find a string
154    $local_skip = (
155        !$skip && !defined($o->{floating} // $o->{floating_utf8})
156    ) ? 'no floating string' : undef;
157    if (length $fmin) {
158        ++$test;
159        SKIP: {
160            skip($local_skip) if $local_skip;
161            local $TODO = 1 if exists $todo{'floating min offset'};
162            $skip || is($o->{'floating min offset'}, $fmin,
163                    "$comment floating min offset");
164        }
165    }
166    if (defined $fmax) {
167        ++$test;
168        SKIP: {
169            skip($local_skip) if $local_skip;
170            local $TODO = 1 if exists $todo{'floating max offset'};
171            $skip || is($o->{'floating max offset'}, $fmax,
172                    "$comment floating max offset");
173        }
174    }
175
176    my $check = ($acheck eq '+') ? 'anchored'
177            : ($fcheck eq '+') ? 'floating'
178            : ($acheck eq '-') ? undef
179            : 'none';
180    $local_skip = (
181        !$skip && $check && (
182            ($check eq 'anchored'
183                    && !defined($o->{anchored} // $o->{anchored_utf8}))
184            || ($check eq 'floating'
185                    && !defined($o->{floating} // $o->{floating_utf8}))
186        )
187    ) ? "$check not found" : undef;
188    if (defined $check) {
189        ++$test;
190        SKIP: {
191            skip($local_skip) if $local_skip;
192            local $TODO = 1 if exists $todo{checking};
193            $skip || is($o->{checking}, $check, "$comment checking $check");
194        }
195    }
196
197    # booleans
198    for (qw{ noscan isall skip implicit },
199        'anchor SBOL', 'anchor MBOL', 'anchor GPOS'
200    ) {
201        my $got = $o->{$_};
202        my $expect = exists($opt{$_}) ? ($opt{$_} // 1) : 0;
203        ++$test;
204        local $TODO = 1 if exists $todo{"T$_"};
205        $skip || is($got, $expect ? 1 : 0, "$comment $_");
206    }
207
208    # integer
209    for (qw{ gofs }) {
210        my $got = $o->{$_};
211        my $expect = $opt{$_} // 0;
212        ++$test;
213        local $TODO = 1 if exists $todo{"T$_"};
214        $skip || is($got, $expect || 0, "$comment $_");
215    }
216
217    # string
218    for (qw{ stclass }) {
219        my $got = $o->{$_};
220        my $expect = $opt{$_};
221        my $qr = (defined($expect) && ($expect =~ s{^~}{})) ? 1 : 0;
222        ++$test;
223        local $TODO = 1 if exists $todo{"T$_"};
224        $skip || ($qr
225            ? like($got, qr{$expect}, "$comment $_")
226            : is($got, $expect, "$comment $_")
227        );
228    }
229
230    skip($skip, $test) if $skip;
231}
232done_testing();
233__END__
234(?:)	0	-	-	Tisall
235
236# various forms of anchored substring
237abc	3	0+abc	-	isall
238.{10}abc	13	10+abc	-	-
239(?i:)abc	3	0+abc	-	isall
240a(?:)bc	3	0+abc	-	isall
241a()bc	3	0+abc	-	-
242a(?i:)bc	3	0+abc	-	isall
243a(b)c	3	0+abc	-	-
244a((?i:b))c	3	0+abc	-	Tanchored
245a[bB]c	3	0+abc	-	Tanchored
246(?=abc)	0	0+abc	-	Tanchored,Tminlen=3,minlenret=0
247abc|abc	3	0+abc	-	isall
248abcd|abce	4	0+abc	-	-
249acde|bcde	4	1+cde	-	Tanchored,stclass=~[ab]
250acdef|bcdeg	5	1+cde	-	Tanchored,stclass=~[ab]
251
252# same as above, floating
253.?abc	3	-	0:1+abc	-
254.?.{10}abc	13	-	10:11+abc	-
255.?(?i:)abc	3	-	0:1+abc	-
256.?a(?:)bc	3	-	0:1+abc	-
257.?a()bc	3	-	0:1+abc	-
258.?a(?i:)bc	3	-	0:1+abc	-
259.?a(b)c	3	-	0+abc	-
260.?a((?i:b))c	3	-	0+abc	Tfloating
261.?a[bB]c	3	-	0:1+abc	Tfloating
262.?(?=abc)	0	-	0:1+abc	Tfloating,Tminlen=3,minlenret=0
263.?(?:abc|abc)	3	-	0:1+abc	-
264.?(?:abcd|abce)	4	-	0:1+abc	-
265.?(?:acde|bcde)	4	-	1:2+cde	Tfloating
266.?(?:acdef|bcdeg)	5	-	1:2+cde	Tfloating
267
268a(b){2,3}c	4	-abb	1+bbc
269a(b|bb)c	3	-ab	1-bc	Tfloating,Tfloating min offset
270a(b|bb){2}c	4	-abb	1-bbc	Tanchored,Tfloating,Tfloating min offset
271
272abc(*COMMIT)xyz	6	0+abc	-	-
273abc(*ACCEPT)xyz	3	0+abc	-	-
274# Must not have stclass=[x]
275(*ACCEPT)xyz	0	-	-	-
276(a(*ACCEPT)){2}	1	0+a	-	-
277