xref: /openbsd-src/gnu/usr.bin/perl/t/re/opt.t (revision 256a93a44f36679bee503f12e49566c2183f6181)
1*256a93a4Safresh1#!./perl
2*256a93a4Safresh1#
3*256a93a4Safresh1# ex: set ts=8 sts=4 sw=4 et:
4*256a93a4Safresh1#
5*256a93a4Safresh1# Here we test for optimizations in the regexp engine.
6*256a93a4Safresh1# We try to distinguish between "nice to have" optimizations and those
7*256a93a4Safresh1# we consider essential: failure of the latter should be considered bugs,
8*256a93a4Safresh1# while failure of the former should at worst be TODO.
9*256a93a4Safresh1#
10*256a93a4Safresh1# Format of data lines is tab-separated: pattern, minlen, anchored, floating,
11*256a93a4Safresh1# other-options, comment.
12*256a93a4Safresh1# - pattern will be subject to string eval as "qr{$pattern}".
13*256a93a4Safresh1# - minlen is a non-negative integer.
14*256a93a4Safresh1# - anchored/floating are of the form "u23:45+string". If initial "u" is
15*256a93a4Safresh1#   present we expect a utf8 substring, else a byte substring; subsequent
16*256a93a4Safresh1#   digits are the min offset; optional /:\d+/ is the max offset (not
17*256a93a4Safresh1#   supported for anchored; assumed undef if not present for floating);
18*256a93a4Safresh1#   subsequent '-' or '+' indicates if this is the substring being checked;
19*256a93a4Safresh1#   "string" is the substring to expect. Use "-" for the whole entry to
20*256a93a4Safresh1#   indicate no substring of this type.
21*256a93a4Safresh1# - other-options is a comma-separated list of bare flags or option=value
22*256a93a4Safresh1#   strings. Those with an initial "T" mark the corresponding test TODO.
23*256a93a4Safresh1#   Booleans (noscan, isall, skip, implicit, anchor SBOL, anchor MBOL,
24*256a93a4Safresh1#   anchor GPOS) are expected false if not mentioned, expected true if
25*256a93a4Safresh1#   supplied as bare flags. stclass may be supplied as a pattern match
26*256a93a4Safresh1#   as eg "stclass=~^ANYOF".
27*256a93a4Safresh1# - as a special-case, minlenret is expected to be the same as minlen
28*256a93a4Safresh1#   unless specified in other-options.
29*256a93a4Safresh1#
30*256a93a4Safresh1
31*256a93a4Safresh1use strict;
32*256a93a4Safresh1use warnings;
33*256a93a4Safresh1use 5.010;
34*256a93a4Safresh1
35*256a93a4Safresh1$| = 1;
36*256a93a4Safresh1
37*256a93a4Safresh1BEGIN {
38*256a93a4Safresh1    chdir 't' if -d 't';
39*256a93a4Safresh1    require './test.pl';
40*256a93a4Safresh1    set_up_inc('../lib');
41*256a93a4Safresh1    skip_all_if_miniperl("no dynamic loading on miniperl, no re::optimization");
42*256a93a4Safresh1}
43*256a93a4Safresh1
44*256a93a4Safresh1no warnings qw{ experimental };
45*256a93a4Safresh1use feature qw{ refaliasing declared_refs };
46*256a93a4Safresh1our \$TODO = \$::TODO;
47*256a93a4Safresh1
48*256a93a4Safresh1use re ();
49*256a93a4Safresh1
50*256a93a4Safresh1while (<DATA>) {
51*256a93a4Safresh1    chomp;
52*256a93a4Safresh1    if (m{^\s*(?:#|\z)}) {
53*256a93a4Safresh1        # skip blank/comment lines
54*256a93a4Safresh1        next;
55*256a93a4Safresh1    }
56*256a93a4Safresh1    my($pat, $minlen, $anchored, $floating, $other, $comment) = split /\t/;
57*256a93a4Safresh1    my %todo;
58*256a93a4Safresh1    my %opt = map {
59*256a93a4Safresh1        my($k, $v) = split /=/, $_, 2;
60*256a93a4Safresh1        ($k =~ s/^T//) ? do { $todo{$k} = $v; () } : ($k => $v);
61*256a93a4Safresh1    } split /,/, $other // '';
62*256a93a4Safresh1    $comment = (defined $comment && length $comment)
63*256a93a4Safresh1        ? "$pat ($comment):"
64*256a93a4Safresh1        : "$pat:";
65*256a93a4Safresh1
66*256a93a4Safresh1    my $o = re::optimization(eval "qr{$pat}");
67*256a93a4Safresh1    ok($o, "$comment compiled ok");
68*256a93a4Safresh1
69*256a93a4Safresh1    my $skip = $o ? undef : "could not get info for qr{$pat}";
70*256a93a4Safresh1    my $test = 0;
71*256a93a4Safresh1
72*256a93a4Safresh1    my($got, $expect) = ($o->{minlen}, $minlen);
73*256a93a4Safresh1    if (exists $todo{minlen}) {
74*256a93a4Safresh1        ++$test;
75*256a93a4Safresh1        $skip || ok($got >= $expect, "$comment minlen $got >= $expect");
76*256a93a4Safresh1        my $todo = $todo{minlen};
77*256a93a4Safresh1        local $TODO = 1;
78*256a93a4Safresh1        $skip || is($got, $todo, "$comment minlen $got = $todo");
79*256a93a4Safresh1    } else {
80*256a93a4Safresh1        ++$test;
81*256a93a4Safresh1        $skip || is($got, $expect, "$comment minlen $got = $expect");
82*256a93a4Safresh1    }
83*256a93a4Safresh1
84*256a93a4Safresh1    ($got, $expect) = ($o->{minlenret}, $opt{minlenret} // $minlen);
85*256a93a4Safresh1    if (exists $todo{minlenret}) {
86*256a93a4Safresh1        ++$test;
87*256a93a4Safresh1        $skip || ok($got >= $expect, "$comment minlenret $got >= $expect");
88*256a93a4Safresh1        my $todo = $todo{minlenret};
89*256a93a4Safresh1        local $TODO = 1;
90*256a93a4Safresh1        $skip || is($got, $todo, "$comment minlenret $got = $todo");
91*256a93a4Safresh1    } else {
92*256a93a4Safresh1        ++$test;
93*256a93a4Safresh1        $skip || is($got, $expect, "$comment minlenret $got = $expect");
94*256a93a4Safresh1    }
95*256a93a4Safresh1
96*256a93a4Safresh1    my($autf, $aoff, $acheck, $astr) = ($anchored =~ m{
97*256a93a4Safresh1        ^ (u?) (\d*) ([-+]) (.*) \z
98*256a93a4Safresh1    }sx) or die "Can't parse anchored test '$anchored'";
99*256a93a4Safresh1    if ($autf eq 'u') {
100*256a93a4Safresh1        ++$test;
101*256a93a4Safresh1        $skip || is($o->{anchored}, undef, "$comment no anchored");
102*256a93a4Safresh1        ++$test;
103*256a93a4Safresh1        local $TODO = 1 if exists $todo{'anchored utf8'};
104*256a93a4Safresh1        $skip || is($o->{'anchored utf8'}, $astr, "$comment got anchored utf8");
105*256a93a4Safresh1    } elsif (length $astr) {
106*256a93a4Safresh1        ++$test;
107*256a93a4Safresh1        $skip || is($o->{anchored_utf8}, undef, "$comment no anchored utf8");
108*256a93a4Safresh1        ++$test;
109*256a93a4Safresh1        local $TODO = 1 if exists $todo{anchored};
110*256a93a4Safresh1        $skip || is($o->{anchored}, $astr, "$comment got anchored");
111*256a93a4Safresh1    } else {
112*256a93a4Safresh1        ++$test;
113*256a93a4Safresh1        $skip || is($o->{anchored}, undef, "$comment no anchored");
114*256a93a4Safresh1        ++$test;
115*256a93a4Safresh1        $skip || is($o->{anchored_utf8}, undef, "$comment no anchored utf8");
116*256a93a4Safresh1    }
117*256a93a4Safresh1    # skip offset checks if we failed to find a string
118*256a93a4Safresh1    my $local_skip = (
119*256a93a4Safresh1        !$skip && !defined($o->{anchored} // $o->{anchored_utf8})
120*256a93a4Safresh1    ) ? 'no anchored string' : undef;
121*256a93a4Safresh1    if (length $aoff) {
122*256a93a4Safresh1        ++$test;
123*256a93a4Safresh1        SKIP: {
124*256a93a4Safresh1            skip($local_skip) if $local_skip;
125*256a93a4Safresh1            local $TODO = 1 if exists $todo{'anchored min offset'};
126*256a93a4Safresh1            $skip || is($o->{'anchored min offset'}, $aoff,
127*256a93a4Safresh1                    "$comment anchored min offset");
128*256a93a4Safresh1        }
129*256a93a4Safresh1        # we don't care about anchored max: it may be set same as min or 0
130*256a93a4Safresh1    }
131*256a93a4Safresh1
132*256a93a4Safresh1    my($futf, $fmin, $fmax, $fcheck, $fstr) = ($floating =~ m{
133*256a93a4Safresh1        ^ (u?) (\d*) (?: : (\d*) )? ([-+]) (.*) \z
134*256a93a4Safresh1    }sx) or die "Can't parse floating test '$floating'";
135*256a93a4Safresh1    if ($futf eq 'u') {
136*256a93a4Safresh1        ++$test;
137*256a93a4Safresh1        $skip || is($o->{floating}, undef, "$comment no floating");
138*256a93a4Safresh1        ++$test;
139*256a93a4Safresh1        local $TODO = 1 if exists $todo{'floating utf8'};
140*256a93a4Safresh1        $skip || is($o->{'floating utf8'}, $fstr, "$comment got floating utf8");
141*256a93a4Safresh1    } elsif (length $fstr) {
142*256a93a4Safresh1        ++$test;
143*256a93a4Safresh1        $skip || is($o->{floating_utf8}, undef, "$comment no floating utf8");
144*256a93a4Safresh1        ++$test;
145*256a93a4Safresh1        local $TODO = 1 if exists $todo{floating};
146*256a93a4Safresh1        $skip || is($o->{floating}, $fstr, "$comment got floating");
147*256a93a4Safresh1    } else {
148*256a93a4Safresh1        ++$test;
149*256a93a4Safresh1        $skip || is($o->{floating}, undef, "$comment no floating");
150*256a93a4Safresh1        ++$test;
151*256a93a4Safresh1        $skip || is($o->{floating_utf8}, undef, "$comment no floating utf8");
152*256a93a4Safresh1    }
153*256a93a4Safresh1    # skip offset checks if we failed to find a string
154*256a93a4Safresh1    $local_skip = (
155*256a93a4Safresh1        !$skip && !defined($o->{floating} // $o->{floating_utf8})
156*256a93a4Safresh1    ) ? 'no floating string' : undef;
157*256a93a4Safresh1    if (length $fmin) {
158*256a93a4Safresh1        ++$test;
159*256a93a4Safresh1        SKIP: {
160*256a93a4Safresh1            skip($local_skip) if $local_skip;
161*256a93a4Safresh1            local $TODO = 1 if exists $todo{'floating min offset'};
162*256a93a4Safresh1            $skip || is($o->{'floating min offset'}, $fmin,
163*256a93a4Safresh1                    "$comment floating min offset");
164*256a93a4Safresh1        }
165*256a93a4Safresh1    }
166*256a93a4Safresh1    if (defined $fmax) {
167*256a93a4Safresh1        ++$test;
168*256a93a4Safresh1        SKIP: {
169*256a93a4Safresh1            skip($local_skip) if $local_skip;
170*256a93a4Safresh1            local $TODO = 1 if exists $todo{'floating max offset'};
171*256a93a4Safresh1            $skip || is($o->{'floating max offset'}, $fmax,
172*256a93a4Safresh1                    "$comment floating max offset");
173*256a93a4Safresh1        }
174*256a93a4Safresh1    }
175*256a93a4Safresh1
176*256a93a4Safresh1    my $check = ($acheck eq '+') ? 'anchored'
177*256a93a4Safresh1            : ($fcheck eq '+') ? 'floating'
178*256a93a4Safresh1            : ($acheck eq '-') ? undef
179*256a93a4Safresh1            : 'none';
180*256a93a4Safresh1    $local_skip = (
181*256a93a4Safresh1        !$skip && $check && (
182*256a93a4Safresh1            ($check eq 'anchored'
183*256a93a4Safresh1                    && !defined($o->{anchored} // $o->{anchored_utf8}))
184*256a93a4Safresh1            || ($check eq 'floating'
185*256a93a4Safresh1                    && !defined($o->{floating} // $o->{floating_utf8}))
186*256a93a4Safresh1        )
187*256a93a4Safresh1    ) ? "$check not found" : undef;
188*256a93a4Safresh1    if (defined $check) {
189*256a93a4Safresh1        ++$test;
190*256a93a4Safresh1        SKIP: {
191*256a93a4Safresh1            skip($local_skip) if $local_skip;
192*256a93a4Safresh1            local $TODO = 1 if exists $todo{checking};
193*256a93a4Safresh1            $skip || is($o->{checking}, $check, "$comment checking $check");
194*256a93a4Safresh1        }
195*256a93a4Safresh1    }
196*256a93a4Safresh1
197*256a93a4Safresh1    # booleans
198*256a93a4Safresh1    for (qw{ noscan isall skip implicit },
199*256a93a4Safresh1        'anchor SBOL', 'anchor MBOL', 'anchor GPOS'
200*256a93a4Safresh1    ) {
201*256a93a4Safresh1        my $got = $o->{$_};
202*256a93a4Safresh1        my $expect = exists($opt{$_}) ? ($opt{$_} // 1) : 0;
203*256a93a4Safresh1        ++$test;
204*256a93a4Safresh1        local $TODO = 1 if exists $todo{"T$_"};
205*256a93a4Safresh1        $skip || is($got, $expect ? 1 : 0, "$comment $_");
206*256a93a4Safresh1    }
207*256a93a4Safresh1
208*256a93a4Safresh1    # integer
209*256a93a4Safresh1    for (qw{ gofs }) {
210*256a93a4Safresh1        my $got = $o->{$_};
211*256a93a4Safresh1        my $expect = $opt{$_} // 0;
212*256a93a4Safresh1        ++$test;
213*256a93a4Safresh1        local $TODO = 1 if exists $todo{"T$_"};
214*256a93a4Safresh1        $skip || is($got, $expect || 0, "$comment $_");
215*256a93a4Safresh1    }
216*256a93a4Safresh1
217*256a93a4Safresh1    # string
218*256a93a4Safresh1    for (qw{ stclass }) {
219*256a93a4Safresh1        my $got = $o->{$_};
220*256a93a4Safresh1        my $expect = $opt{$_};
221*256a93a4Safresh1        my $qr = (defined($expect) && ($expect =~ s{^~}{})) ? 1 : 0;
222*256a93a4Safresh1        ++$test;
223*256a93a4Safresh1        local $TODO = 1 if exists $todo{"T$_"};
224*256a93a4Safresh1        $skip || ($qr
225*256a93a4Safresh1            ? like($got, qr{$expect}, "$comment $_")
226*256a93a4Safresh1            : is($got, $expect, "$comment $_")
227*256a93a4Safresh1        );
228*256a93a4Safresh1    }
229*256a93a4Safresh1
230*256a93a4Safresh1    skip($skip, $test) if $skip;
231*256a93a4Safresh1}
232*256a93a4Safresh1done_testing();
233*256a93a4Safresh1__END__
234*256a93a4Safresh1(?:)	0	-	-	Tisall
235*256a93a4Safresh1
236*256a93a4Safresh1# various forms of anchored substring
237*256a93a4Safresh1abc	3	0+abc	-	isall
238*256a93a4Safresh1.{10}abc	13	10+abc	-	-
239*256a93a4Safresh1(?i:)abc	3	0+abc	-	isall
240*256a93a4Safresh1a(?:)bc	3	0+abc	-	isall
241*256a93a4Safresh1a()bc	3	0+abc	-	-
242*256a93a4Safresh1a(?i:)bc	3	0+abc	-	isall
243*256a93a4Safresh1a(b)c	3	0+abc	-	-
244*256a93a4Safresh1a((?i:b))c	3	0+abc	-	Tanchored
245*256a93a4Safresh1a[bB]c	3	0+abc	-	Tanchored
246*256a93a4Safresh1(?=abc)	0	0+abc	-	Tanchored,Tminlen=3,minlenret=0
247*256a93a4Safresh1abc|abc	3	0+abc	-	isall
248*256a93a4Safresh1abcd|abce	4	0+abc	-	-
249*256a93a4Safresh1acde|bcde	4	1+cde	-	Tanchored,stclass=~[ab]
250*256a93a4Safresh1acdef|bcdeg	5	1+cde	-	Tanchored,stclass=~[ab]
251*256a93a4Safresh1
252*256a93a4Safresh1# same as above, floating
253*256a93a4Safresh1.?abc	3	-	0:1+abc	-
254*256a93a4Safresh1.?.{10}abc	13	-	10:11+abc	-
255*256a93a4Safresh1.?(?i:)abc	3	-	0:1+abc	-
256*256a93a4Safresh1.?a(?:)bc	3	-	0:1+abc	-
257*256a93a4Safresh1.?a()bc	3	-	0:1+abc	-
258*256a93a4Safresh1.?a(?i:)bc	3	-	0:1+abc	-
259*256a93a4Safresh1.?a(b)c	3	-	0+abc	-
260*256a93a4Safresh1.?a((?i:b))c	3	-	0+abc	Tfloating
261*256a93a4Safresh1.?a[bB]c	3	-	0:1+abc	Tfloating
262*256a93a4Safresh1.?(?=abc)	0	-	0:1+abc	Tfloating,Tminlen=3,minlenret=0
263*256a93a4Safresh1.?(?:abc|abc)	3	-	0:1+abc	-
264*256a93a4Safresh1.?(?:abcd|abce)	4	-	0:1+abc	-
265*256a93a4Safresh1.?(?:acde|bcde)	4	-	1:2+cde	Tfloating
266*256a93a4Safresh1.?(?:acdef|bcdeg)	5	-	1:2+cde	Tfloating
267*256a93a4Safresh1
268*256a93a4Safresh1a(b){2,3}c	4	-abb	1+bbc
269*256a93a4Safresh1a(b|bb)c	3	-ab	1-bc	Tfloating,Tfloating min offset
270*256a93a4Safresh1a(b|bb){2}c	4	-abb	1-bbc	Tanchored,Tfloating,Tfloating min offset
271*256a93a4Safresh1
272*256a93a4Safresh1abc(*COMMIT)xyz	6	0+abc	-	-
273*256a93a4Safresh1abc(*ACCEPT)xyz	3	0+abc	-	-
274*256a93a4Safresh1# Must not have stclass=[x]
275*256a93a4Safresh1(*ACCEPT)xyz	0	-	-	-
276*256a93a4Safresh1(a(*ACCEPT)){2}	1	0+a	-	-
277