xref: /openbsd-src/gnu/usr.bin/perl/t/re/fold_grind.pl (revision 5486feefcc8cb79b19e014ab332cc5dfd05b3b33)
1f3efcd01Safresh1# Grind out a lot of combinatoric tests for folding.
2f3efcd01Safresh1# It uses various charset modifiers, passed in via $::TEST_CHUNK.  The caller
3f3efcd01Safresh1# will also have set the locale to use if /l is the modifier.
4f3efcd01Safresh1#   L is a pseudo-modifier that indicates to use the modifier /l instead, and
5f3efcd01Safresh1#     the locale set by the caller is known to be UTF-8,
6f3efcd01Safresh1#   T is a pseudo-modifier that indicates to use the pseudo modifier /L
7f3efcd01Safresh1#     instead, and the locale set by the caller is known to be Turkic UTF-8,
8f3efcd01Safresh1
9f3efcd01Safresh1binmode STDOUT, ":utf8";
10f3efcd01Safresh1
11f3efcd01Safresh1BEGIN {
12f3efcd01Safresh1    chdir 't' if -d 't';
13f3efcd01Safresh1    require './test.pl';
14f3efcd01Safresh1    set_up_inc('../lib');
15*5486feefSafresh1    require Config; Config->import;
16f3efcd01Safresh1    skip_all_if_miniperl("no dynamic loading on miniperl, no Encode nor POSIX");
17f3efcd01Safresh1    if ($^O eq 'dec_osf') {
18f3efcd01Safresh1      skip_all("$^O cannot handle this test");
19f3efcd01Safresh1    }
20f3efcd01Safresh1
21f2a19305Safresh1    watchdog(5 * 60);
22f3efcd01Safresh1    require './loc_tools.pl';
23f3efcd01Safresh1}
24f3efcd01Safresh1
25f3efcd01Safresh1use charnames ":full";
26f3efcd01Safresh1
27f3efcd01Safresh1my $DEBUG = 0;  # Outputs extra information for debugging this .t
28f3efcd01Safresh1
29f3efcd01Safresh1use strict;
30f3efcd01Safresh1use warnings;
31f3efcd01Safresh1no warnings 'locale';   # Plenty of these would otherwise get generated
32f3efcd01Safresh1use Encode;
33f3efcd01Safresh1use POSIX;
34f3efcd01Safresh1
35f3efcd01Safresh1my $charset = $::TEST_CHUNK;
36f3efcd01Safresh1my $use_turkic_rules = 0;
37f3efcd01Safresh1
38f3efcd01Safresh1if ($charset eq 'T') {
39f3efcd01Safresh1    $charset = 'L';
40f3efcd01Safresh1    $use_turkic_rules = 1;
41f3efcd01Safresh1}
42f3efcd01Safresh1
43f2a19305Safresh1my $has_LC_CTYPE = is_category_valid('LC_CTYPE');
44f2a19305Safresh1
45f3efcd01Safresh1# Special-cased characters in the .c's that we want to make sure get tested.
46f3efcd01Safresh1my %be_sure_to_test = (
47f3efcd01Safresh1        chr utf8::unicode_to_native(0xDF) => 1, # LATIN_SMALL_LETTER_SHARP_S
48256a93a4Safresh1
49256a93a4Safresh1        # This is included because the uppercase occupies more bytes, but the
50256a93a4Safresh1        # first two bytes of their representations differ only in one bit,
51256a93a4Safresh1        # that could lead the code looking for shortcuts astray; you can't do
52256a93a4Safresh1        # certain shortcuts if the lengths differ
53256a93a4Safresh1        "\x{29E}" => 1, # LATIN SMALL LETTER TURNED K
54256a93a4Safresh1
55f3efcd01Safresh1        "\x{390}" => 1, # GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA_AND_TONOS
56f3efcd01Safresh1        "\x{3B0}" => 1, # GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS
57256a93a4Safresh1
58256a93a4Safresh1        # This is included because the uppercase and lowercase differ by only
59256a93a4Safresh1        # a single bit and it is in the first of the two byte representations.
60256a93a4Safresh1        # This showed that a previous way was erroneous of calculating if
61256a93a4Safresh1        # initial substrings were closely-related bit-wise.
62256a93a4Safresh1        "\x{3CC}" => 1, # GREEK SMALL LETTER OMICRON WITH TONOS
63256a93a4Safresh1
64256a93a4Safresh1        "\x{1E9E}" => 1, # LATIN_CAPITAL_LETTER_SHARP_S
65f3efcd01Safresh1        "\x{1FD3}" => 1, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
66f3efcd01Safresh1        "\x{1FE3}" => 1, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
67256a93a4Safresh1
68256a93a4Safresh1        # These are included because they are adjacent and fold to the same
69256a93a4Safresh1        # result, U+01C6.  This has tripped up the code in the past that
70256a93a4Safresh1        # wrongly thought that sequential code points must fold to sequential
71256a93a4Safresh1        # code points
72256a93a4Safresh1        "\x{01C4}" => 1, # LATIN CAPITAL LETTER DZ WITH CARON
73256a93a4Safresh1        "\x{01C5}" => 1, # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
74256a93a4Safresh1
75f3efcd01Safresh1        "I" => 1,
76f3efcd01Safresh1);
77f3efcd01Safresh1
78f3efcd01Safresh1# Tests both unicode and not, so make sure not implicitly testing unicode
79f3efcd01Safresh1no feature 'unicode_strings';
80f3efcd01Safresh1
81f3efcd01Safresh1# Case-insensitive matching is a large and complicated issue.  Perl does not
82f3efcd01Safresh1# implement it fully, properly.  For example, it doesn't include normalization
83f3efcd01Safresh1# as part of the equation.  To test every conceivable combination is clearly
84f3efcd01Safresh1# impossible; these tests are mostly drawn from visual inspection of the code
85f3efcd01Safresh1# and experience, trying to exercise all areas.
86f3efcd01Safresh1
87f3efcd01Safresh1# There are three basic ranges of characters that Perl may treat differently:
88f3efcd01Safresh1# 1) Invariants under utf8 which on ASCII-ish machines are ASCII, and are
89f3efcd01Safresh1#    referred to here as ASCII.  On EBCDIC machines, the non-ASCII invariants
90f3efcd01Safresh1#    are all controls that fold to themselves.
91f3efcd01Safresh1my $ASCII = 1;
92f3efcd01Safresh1
93f3efcd01Safresh1# 2) Other characters that fit into a byte but are different in utf8 than not;
94f3efcd01Safresh1#    here referred to, taking some liberties, as Latin1.
95f3efcd01Safresh1my $Latin1 = 2;
96f3efcd01Safresh1
97f3efcd01Safresh1# 3) Characters that won't fit in a byte; here referred to as Unicode
98f3efcd01Safresh1my $Unicode = 3;
99f3efcd01Safresh1
100f3efcd01Safresh1# Within these basic groups are equivalence classes that testing any character
101f3efcd01Safresh1# in is likely to lead to the same results as any other character.  This is
102f3efcd01Safresh1# used to cut down the number of tests needed, unless PERL_RUN_SLOW_TESTS is
103f3efcd01Safresh1# set.
104f3efcd01Safresh1my $skip_apparently_redundant = ! $ENV{PERL_RUN_SLOW_TESTS};
105f3efcd01Safresh1
106f3efcd01Safresh1# Additionally parts of this test run a lot of subtests, outputting the
107f3efcd01Safresh1# resulting TAP can be expensive so the tests are summarised internally. The
108f3efcd01Safresh1# PERL_DEBUG_FULL_TEST environment variable can be set to produce the full
109f3efcd01Safresh1# output for debugging purposes.
110f3efcd01Safresh1
111f3efcd01Safresh1sub range_type {
112f3efcd01Safresh1    my $ord = ord shift;
113f3efcd01Safresh1
114f3efcd01Safresh1    return $ASCII if utf8::native_to_unicode($ord) < 128;
115f3efcd01Safresh1    return $Latin1 if $ord < 256;
116f3efcd01Safresh1    return $Unicode;
117f3efcd01Safresh1}
118f3efcd01Safresh1
119f3efcd01Safresh1sub numerically {
120f3efcd01Safresh1    return $a <=> $b
121f3efcd01Safresh1}
122f3efcd01Safresh1
123f3efcd01Safresh1my $list_all_tests = $ENV{PERL_DEBUG_FULL_TEST} || $DEBUG;
124f3efcd01Safresh1$| = 1 if $list_all_tests;
125f3efcd01Safresh1
126f3efcd01Safresh1# Significant time is saved by not outputting each test but grouping the
127f3efcd01Safresh1# output into subtests
128f3efcd01Safresh1my $okays;          # Number of ok's in current subtest
129f3efcd01Safresh1my $this_iteration; # Number of possible tests in current subtest
130f3efcd01Safresh1my $count = 0;      # Number of subtests = number of total tests
131f3efcd01Safresh1
132f3efcd01Safresh1sub run_test($$$$) {
133f3efcd01Safresh1    my ($test, $todo, $do_we_output_locale_name, $debug) = @_;
134f3efcd01Safresh1
135f3efcd01Safresh1    $debug = "" unless $DEBUG;
136f3efcd01Safresh1    my $res = eval $test;
137f3efcd01Safresh1
138f3efcd01Safresh1    if ($do_we_output_locale_name) {
139f3efcd01Safresh1        $do_we_output_locale_name = 'setlocale(LC_CTYPE, "'
140f3efcd01Safresh1                         .  POSIX::setlocale(&POSIX::LC_CTYPE)
141f3efcd01Safresh1                         . '"); ';
142f3efcd01Safresh1    }
143f3efcd01Safresh1    if (!$res || $list_all_tests) {
144f3efcd01Safresh1      # Failed or debug; output the result
145f3efcd01Safresh1      $count++;
146f3efcd01Safresh1      ok($res, "$do_we_output_locale_name$test; $debug");
147f3efcd01Safresh1    } else {
148f3efcd01Safresh1      # Just count the test as passed
149f3efcd01Safresh1      $okays++;
150f3efcd01Safresh1    }
151f3efcd01Safresh1    $this_iteration++;
152f3efcd01Safresh1}
153f3efcd01Safresh1
154f3efcd01Safresh1my %has_test_by_participants;   # Makes sure has tests for each range and each
155f3efcd01Safresh1                                # number of characters that fold to the same
156f3efcd01Safresh1                                # thing
157f3efcd01Safresh1my %has_test_by_byte_count; # Makes sure has tests for each combination of
158f3efcd01Safresh1                            # n bytes folds to m bytes
159f3efcd01Safresh1
160f3efcd01Safresh1my %tests; # The set of tests we expect to pass.
161f3efcd01Safresh1# Each key is a code point that folds to something else.
162f3efcd01Safresh1# Each value is a list of things that the key folds to.  If the 'thing' is a
163f3efcd01Safresh1# single code point, it is that ordinal.  If it is a multi-char fold, it is an
164f3efcd01Safresh1# ordered list of the code points in that fold.  Here's an example for 'S':
165f3efcd01Safresh1#  '83' => [ 115, 383 ]
166f3efcd01Safresh1#
167f3efcd01Safresh1# And one for a multi-char fold: \xDF
168f3efcd01Safresh1#  223 => [
169f3efcd01Safresh1#            [  # 'ss'
170f3efcd01Safresh1#                83,
171f3efcd01Safresh1#                83
172f3efcd01Safresh1#            ],
173f3efcd01Safresh1#            [  # 'SS'
174f3efcd01Safresh1#                115,
175f3efcd01Safresh1#                115
176f3efcd01Safresh1#            ],
177f3efcd01Safresh1#            [  # LATIN SMALL LETTER LONG S
178f3efcd01Safresh1#                383,
179f3efcd01Safresh1#                383
180f3efcd01Safresh1#            ],
181f3efcd01Safresh1#          7838 # LATIN_CAPITAL_LETTER_SHARP_S
182f3efcd01Safresh1#        ],
183f3efcd01Safresh1
184f3efcd01Safresh1my %neg_tests;  # Same format, but we expect these tests to fail
185f3efcd01Safresh1
186f3efcd01Safresh1my %folds; # keys are code points that fold; values are either 0 or 1 which
187f3efcd01Safresh1           # in turn are keys with their values each a list of code points the
188f3efcd01Safresh1           # code point key folds to.  The folds under 1 are the ones that are
189f3efcd01Safresh1           # valid in this run; the ones under 0 are ones valid under other
190f3efcd01Safresh1           # circumstances.
191f3efcd01Safresh1
192f3efcd01Safresh1my %inverse_folds;  # keys are strings of the folded-to; then come a layer of
193f3efcd01Safresh1                    # 0 or 1, like %folds.  The lowest values are lists of
194f3efcd01Safresh1                    # characters that fold to them
195f3efcd01Safresh1
196f3efcd01Safresh1# Here's a portion of an %inverse_folds in a run where Turkic folds are not
197f3efcd01Safresh1# legal, so \x{130} doesn't fold to 'i' in this run.
198f3efcd01Safresh1#         'h' => {
199f3efcd01Safresh1#                  '1' => [
200f3efcd01Safresh1#                           'H'
201f3efcd01Safresh1#                         ]
202f3efcd01Safresh1#                },
203f3efcd01Safresh1#         "h\x{331}" => {
204f3efcd01Safresh1#                         '1' => [
205f3efcd01Safresh1#                                  "\x{1e96}"
206f3efcd01Safresh1#                                ]
207f3efcd01Safresh1#                       },
208f3efcd01Safresh1#         'i' => {
209f3efcd01Safresh1#                  '0' => [
210f3efcd01Safresh1#                           "\x{130}"
211f3efcd01Safresh1#                         ],
212f3efcd01Safresh1#                  '1' => [
213f3efcd01Safresh1#                           'I'
214f3efcd01Safresh1#                         ]
215f3efcd01Safresh1#                },
216f3efcd01Safresh1#         "i\x{307}" => {
217f3efcd01Safresh1#                         '1' => [
218f3efcd01Safresh1#                                  "\x{130}"
219f3efcd01Safresh1#                                ]
220f3efcd01Safresh1#                       },
221f3efcd01Safresh1#         'j' => {
222f3efcd01Safresh1#                  '1' => [
223f3efcd01Safresh1#                           'J'
224f3efcd01Safresh1#                         ]
225f3efcd01Safresh1#                },
226f3efcd01Safresh1
227f3efcd01Safresh1sub add_test($$@) {
228f3efcd01Safresh1    my ($tests_ref, $to, @from) = @_;
229f3efcd01Safresh1
230f3efcd01Safresh1    # Called to cause the input to be tested by adding to $%tests_ref.  @from
231f3efcd01Safresh1    # is the list of characters that fold to the string $to.  @from should be
232f3efcd01Safresh1    # sorted so the lowest code point is first....
233f3efcd01Safresh1    # The input is in string form; %tests uses code points, so have to
234f3efcd01Safresh1    # convert.
235f3efcd01Safresh1
236f3efcd01Safresh1    my $to_chars = length $to;
237f3efcd01Safresh1    my @test_to;        # List of tests for $to
238f3efcd01Safresh1
239f3efcd01Safresh1    if ($to_chars == 1) {
240f3efcd01Safresh1        @test_to = ord $to;
241f3efcd01Safresh1    }
242f3efcd01Safresh1    else {
243f3efcd01Safresh1        push @test_to, [ map { ord $_ } split "", $to ];
244f3efcd01Safresh1
245f3efcd01Safresh1        # For multi-char folds, we also test that things that can fold to each
246f3efcd01Safresh1        # individual character in the fold also work.  If we were testing
247f3efcd01Safresh1        # comprehensively, we would try every combination of upper and lower
248f3efcd01Safresh1        # case in the fold, but it will have to suffice to avoid running
249f3efcd01Safresh1        # forever to make sure that each thing that folds to these is tested
250f3efcd01Safresh1        # at least once.  Because of complement matching ([^...]), we need to
251f3efcd01Safresh1        # do both the folded, and the folded-from.
252f3efcd01Safresh1        # We first look at each character in the multi-char fold, and save how
253f3efcd01Safresh1        # many characters fold to it; and also the maximum number of such
254f3efcd01Safresh1        # folds
255f3efcd01Safresh1        my @folds_to_count;     # 0th char in fold is index 0 ...
256f3efcd01Safresh1        my $max_folds_to = 0;
257f3efcd01Safresh1
258f3efcd01Safresh1        for (my $i = 0; $i < $to_chars; $i++) {
259f3efcd01Safresh1            my $to_char = substr($to, $i, 1);
260f3efcd01Safresh1            if (exists $inverse_folds{$to_char}{1}) {
261f3efcd01Safresh1                $folds_to_count[$i] = scalar @{$inverse_folds{$to_char}{1}};
262f3efcd01Safresh1                $max_folds_to = $folds_to_count[$i] if $max_folds_to < $folds_to_count[$i];
263f3efcd01Safresh1            }
264f3efcd01Safresh1            else {
265f3efcd01Safresh1                $folds_to_count[$i] = 0;
266f3efcd01Safresh1            }
267f3efcd01Safresh1        }
268f3efcd01Safresh1
269f3efcd01Safresh1        # We will need to generate as many tests as the maximum number of
270f3efcd01Safresh1        # folds, so that each fold will have at least one test.
271f3efcd01Safresh1        # For example, consider character X which folds to the three character
272f3efcd01Safresh1        # string 'xyz'.  If 2 things fold to x (X and x), 4 to y (Y, Y'
273f3efcd01Safresh1        # (Y-prime), Y'' (Y-prime-prime), and y), and 1 thing to z (itself), 4
274f3efcd01Safresh1        # tests will be generated:
275f3efcd01Safresh1        #   xyz
276f3efcd01Safresh1        #   XYz
277f3efcd01Safresh1        #   xY'z
278f3efcd01Safresh1        #   xY''z
279f3efcd01Safresh1        for (my $i = 0; $i < $max_folds_to; $i++) {
280f3efcd01Safresh1            my @this_test_to;   # Assemble a single test
281f3efcd01Safresh1
282f3efcd01Safresh1            # For each character in the multi-char fold ...
283f3efcd01Safresh1            for (my $j = 0; $j < $to_chars; $j++) {
284f3efcd01Safresh1                my $this_char = substr($to, $j, 1);
285f3efcd01Safresh1
286f3efcd01Safresh1                # Use its corresponding inverse fold, if available.
287f3efcd01Safresh1                if (   $i < $folds_to_count[$j]
288f3efcd01Safresh1                    && exists $inverse_folds{$this_char}{1})
289f3efcd01Safresh1                  {
290f3efcd01Safresh1                    push @this_test_to, ord $inverse_folds{$this_char}{1}[$i];
291f3efcd01Safresh1                }
292f3efcd01Safresh1                else {  # Or else itself.
293f3efcd01Safresh1                    push @this_test_to, ord $this_char;
294f3efcd01Safresh1                }
295f3efcd01Safresh1            }
296f3efcd01Safresh1
297f3efcd01Safresh1            # Add this test to the list
298f3efcd01Safresh1            push @test_to, [ @this_test_to ];
299f3efcd01Safresh1        }
300f3efcd01Safresh1
301f3efcd01Safresh1        # Here, have assembled all the tests for the multi-char fold.  Sort so
302f3efcd01Safresh1        # lowest code points are first for consistency and aesthetics in
303f3efcd01Safresh1        # output.  We know there are at least two characters in the fold, but
304f3efcd01Safresh1        # I haven't bothered to worry about sorting on an optional third
305f3efcd01Safresh1        # character if the first two are identical.
306f3efcd01Safresh1        @test_to = sort { ($a->[0] == $b->[0])
307f3efcd01Safresh1                           ? $a->[1] <=> $b->[1]
308f3efcd01Safresh1                           : $a->[0] <=> $b->[0]
309f3efcd01Safresh1                        } @test_to;
310f3efcd01Safresh1    }
311f3efcd01Safresh1
312f3efcd01Safresh1
313f3efcd01Safresh1    # This test is from n bytes to m bytes.  Record that so won't try to add
314f3efcd01Safresh1    # another test that does the same.
315f3efcd01Safresh1    use bytes;
316f3efcd01Safresh1    my $to_bytes = length $to;
317f3efcd01Safresh1    foreach my $from_map (@from) {
318f3efcd01Safresh1        $has_test_by_byte_count{length $from_map}{$to_bytes} = $to;
319f3efcd01Safresh1    }
320f3efcd01Safresh1    no bytes;
321f3efcd01Safresh1
322f3efcd01Safresh1    my $ord_smallest_from = ord shift @from;
323f3efcd01Safresh1    if (exists $tests_ref->{$ord_smallest_from}) {
324f3efcd01Safresh1        die "There are already tests for $ord_smallest_from"
325f3efcd01Safresh1    };
326f3efcd01Safresh1
327f3efcd01Safresh1    # Add in the fold tests,
328f3efcd01Safresh1    push @{$tests_ref->{$ord_smallest_from}}, @test_to;
329f3efcd01Safresh1
330f3efcd01Safresh1    # Then any remaining froms in the equivalence class.
331f3efcd01Safresh1    push @{$tests_ref->{$ord_smallest_from}}, map { ord $_ } @from;
332f3efcd01Safresh1}
333f3efcd01Safresh1
334f3efcd01Safresh1# Get the Unicode rules and construct inverse mappings from them
335f3efcd01Safresh1
336f3efcd01Safresh1use Unicode::UCD;
337f3efcd01Safresh1my $file="../lib/unicore/CaseFolding.txt";
338f3efcd01Safresh1
339f3efcd01Safresh1# Use the Unicode data file if we are on an ASCII platform (which its data is
340f3efcd01Safresh1# for), and it is in the modern format (starting in Unicode 3.1.0) and it is
341f3efcd01Safresh1# available.  This avoids being affected by potential bugs introduced by other
342f3efcd01Safresh1# layers of Perl
343f3efcd01Safresh1if ($::IS_ASCII
344f3efcd01Safresh1    && pack("C*", split /\./, Unicode::UCD::UnicodeVersion()) ge v3.1.0
345f3efcd01Safresh1    && open my $fh, "<", $file)
346f3efcd01Safresh1{
347f3efcd01Safresh1    # We process the file in reverse order because its easier to see the T
348f3efcd01Safresh1    # entry first and then know that the next line we process is the
349f3efcd01Safresh1    # corresponding one for non-T.
350f3efcd01Safresh1    my @rules = <$fh>;
351f3efcd01Safresh1    my $prev_was_turkic = 0;
352f3efcd01Safresh1    while (defined ($_ = pop @rules)) {
353f3efcd01Safresh1        chomp;
354f3efcd01Safresh1
355f3efcd01Safresh1        # Lines look like (though without the initial '#')
356f3efcd01Safresh1        #0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
357f3efcd01Safresh1
358f3efcd01Safresh1        # Get rid of comments, ignore blank or comment-only lines
359f3efcd01Safresh1        my $line = $_ =~ s/ (?: \s* \# .* )? $ //rx;
360f3efcd01Safresh1        next unless length $line;
361f3efcd01Safresh1        my ($hex_from, $fold_type, @hex_folded) = split /[\s;]+/, $line;
362f3efcd01Safresh1
363f3efcd01Safresh1        next if $fold_type eq 'S';  # If Unicode's tables are correct, the F
364f3efcd01Safresh1                                    # should be a superset of S
365f3efcd01Safresh1        next if $fold_type eq 'I';  # Perl doesn't do old Turkish folding
366f3efcd01Safresh1
367f3efcd01Safresh1        my $test_type;
368f3efcd01Safresh1        if ($fold_type eq 'T') {
369f3efcd01Safresh1            $test_type = 0 + $use_turkic_rules;
370f3efcd01Safresh1            $prev_was_turkic = 1;
371f3efcd01Safresh1        }
372f3efcd01Safresh1        elsif ($prev_was_turkic) {
373f3efcd01Safresh1            $test_type = 0 + ! $use_turkic_rules;
374f3efcd01Safresh1            $prev_was_turkic = 0;
375f3efcd01Safresh1        }
376f3efcd01Safresh1        else {
377f3efcd01Safresh1            $test_type = 1;
378f3efcd01Safresh1            $prev_was_turkic = 0;
379f3efcd01Safresh1        }
380f3efcd01Safresh1
381f3efcd01Safresh1        my $from = hex $hex_from;
382f3efcd01Safresh1        my @to = map { hex $_ } @hex_folded;
383f3efcd01Safresh1        push @{$folds{$from}{$test_type}}, @to;
384f3efcd01Safresh1
385f3efcd01Safresh1        my $folded_str = pack ("U0U*", @to);
386f3efcd01Safresh1        push @{$inverse_folds{$folded_str}{$test_type}}, chr $from;
387f3efcd01Safresh1    }
388f3efcd01Safresh1}
389f3efcd01Safresh1else {  # Here, can't use the .txt file: read the Unicode rules file and
390f3efcd01Safresh1        # construct inverse mappings from it
391f3efcd01Safresh1
392f3efcd01Safresh1    skip_all "Don't know how to generate turkic rules on this platform"
393f3efcd01Safresh1                                                            if $use_turkic_rules;
394f3efcd01Safresh1    my ($invlist_ref, $invmap_ref, undef, $default)
395f3efcd01Safresh1                                    = Unicode::UCD::prop_invmap('Case_Folding');
396f3efcd01Safresh1    for my $i (0 .. @$invlist_ref - 1 - 1) {
397f3efcd01Safresh1        next if $invmap_ref->[$i] == $default;
398f3efcd01Safresh1
399f3efcd01Safresh1        # Make into an array if not so already, so can treat uniformly below
400f3efcd01Safresh1        $invmap_ref->[$i] = [ $invmap_ref->[$i] ] if ! ref $invmap_ref->[$i];
401f3efcd01Safresh1
402f3efcd01Safresh1        # Each subsequent element of the range requires adjustment of +1 from
403f3efcd01Safresh1        # the previous element
404f3efcd01Safresh1        my $adjust = -1;
405f3efcd01Safresh1        for my $j ($invlist_ref->[$i] .. $invlist_ref->[$i+1] -1) {
406f3efcd01Safresh1            $adjust++;
407f3efcd01Safresh1            my @to = map { $_ + $adjust } @{$invmap_ref->[$i]};
408f3efcd01Safresh1            push @{$folds{$j}{1}}, @to;
409f3efcd01Safresh1            my $folded_str = join "", map { chr } @to;
410f3efcd01Safresh1            utf8::upgrade($folded_str);
411f3efcd01Safresh1            #note (sprintf "%d: %04X: %s", __LINE__, $j, join " ",
412f3efcd01Safresh1            #    map { sprintf "%04X", $_  + $adjust } @{$invmap_ref->[$i]});
413f3efcd01Safresh1            push @{$inverse_folds{$folded_str}{1}}, chr $j;
414f3efcd01Safresh1        }
415f3efcd01Safresh1    }
416f3efcd01Safresh1}
417f3efcd01Safresh1
418f3efcd01Safresh1# Analyze the data and generate tests to get adequate test coverage.  We sort
419f3efcd01Safresh1# things so that smallest code points are done first.
420f3efcd01Safresh1foreach my $to (sort { $a cmp $b } keys %inverse_folds)
421f3efcd01Safresh1{
422f3efcd01Safresh1TO:
423f3efcd01Safresh1  foreach my $tests_ref (\%tests, \%neg_tests) {
424f3efcd01Safresh1    my $test_type = ($tests_ref == \%tests) ? 1 : 0;
425f3efcd01Safresh1
426f3efcd01Safresh1    next unless exists $inverse_folds{$to}{$test_type};
427f3efcd01Safresh1
428f3efcd01Safresh1    # Within each fold, sort so that the smallest code points are done first
429f3efcd01Safresh1    @{$inverse_folds{$to}{$test_type}} = sort { $a cmp $b } @{$inverse_folds{$to}{$test_type}};
430f3efcd01Safresh1    my @from = @{$inverse_folds{$to}{$test_type}};
431f3efcd01Safresh1
432f3efcd01Safresh1    # Just add it to the tests if doing complete coverage
433f3efcd01Safresh1    if (! $skip_apparently_redundant) {
434f3efcd01Safresh1        add_test($tests_ref, $to, @from);
435f3efcd01Safresh1        next TO;
436f3efcd01Safresh1    }
437f3efcd01Safresh1
438f3efcd01Safresh1    my $to_chars = length $to;
439f3efcd01Safresh1    my $to_range_type = range_type(substr($to, 0, 1));
440f3efcd01Safresh1
441f3efcd01Safresh1    # If this is required to be tested, do so.  We check for these first, as
442f3efcd01Safresh1    # they will take up slots of byte-to-byte combinations that we otherwise
443f3efcd01Safresh1    # would have to have other tests to get.
444f3efcd01Safresh1    foreach my $from_map (@from) {
445f3efcd01Safresh1        if (exists $be_sure_to_test{$from_map}) {
446f3efcd01Safresh1            add_test($tests_ref, $to, @from);
447f3efcd01Safresh1            next TO;
448f3efcd01Safresh1        }
449f3efcd01Safresh1    }
450f3efcd01Safresh1
451f3efcd01Safresh1    # If the fold contains heterogeneous range types, is suspect and should be
452f3efcd01Safresh1    # tested.
453f3efcd01Safresh1    if ($to_chars > 1) {
454f3efcd01Safresh1        foreach my $char (split "", $to) {
455f3efcd01Safresh1            if (range_type($char) != $to_range_type) {
456f3efcd01Safresh1                add_test($tests_ref, $to, @from);
457f3efcd01Safresh1                next TO;
458f3efcd01Safresh1            }
459f3efcd01Safresh1        }
460f3efcd01Safresh1    }
461f3efcd01Safresh1
462f3efcd01Safresh1    # If the mapping crosses range types, is suspect and should be tested
463f3efcd01Safresh1    foreach my $from_map (@from) {
464f3efcd01Safresh1        if (range_type($from_map) != $to_range_type) {
465f3efcd01Safresh1            add_test($tests_ref, $to, @from);
466f3efcd01Safresh1            next TO;
467f3efcd01Safresh1        }
468f3efcd01Safresh1    }
469f3efcd01Safresh1
470f3efcd01Safresh1    # Here, all components of the mapping are in the same range type.  For
471f3efcd01Safresh1    # single character folds, we test one case in each range type that has 2
472f3efcd01Safresh1    # particpants, 3 particpants, etc.
473f3efcd01Safresh1    if ($to_chars == 1) {
474f3efcd01Safresh1        if (! exists $has_test_by_participants{scalar @from}{$to_range_type}) {
475f3efcd01Safresh1            add_test($tests_ref, $to, @from);
476f3efcd01Safresh1            $has_test_by_participants{scalar @from}{$to_range_type} = $to;
477f3efcd01Safresh1            next TO;
478f3efcd01Safresh1        }
479f3efcd01Safresh1    }
480f3efcd01Safresh1
481f3efcd01Safresh1    # We also test all combinations of mappings from m to n bytes.  This is
482f3efcd01Safresh1    # because the regex optimizer cares.  (Don't bother worrying about that
483f3efcd01Safresh1    # Latin1 chars will occupy a different number of bytes under utf8, as
484f3efcd01Safresh1    # there are plenty of other cases that catch these byte numbers.)
485f3efcd01Safresh1    use bytes;
486f3efcd01Safresh1    my $to_bytes = length $to;
487f3efcd01Safresh1    foreach my $from_map (@from) {
488f3efcd01Safresh1        if (! exists $has_test_by_byte_count{length $from_map}{$to_bytes}) {
489f3efcd01Safresh1            add_test($tests_ref, $to, @from);
490f3efcd01Safresh1            next TO;
491f3efcd01Safresh1        }
492f3efcd01Safresh1    }
493f3efcd01Safresh1  }
494f3efcd01Safresh1}
495f3efcd01Safresh1
496f3efcd01Safresh1# For each range type, test additionally a character that folds to itself
497f3efcd01Safresh1add_test(\%tests, ":", ":");
498f3efcd01Safresh1add_test(\%tests, chr utf8::unicode_to_native(0xF7), chr utf8::unicode_to_native(0xF7));
499f3efcd01Safresh1add_test(\%tests, chr 0x2C7, chr 0x2C7);
500f3efcd01Safresh1
501f3efcd01Safresh1# To cut down on the number of tests
502f3efcd01Safresh1my $has_tested_aa_above_latin1;
503f3efcd01Safresh1my $has_tested_latin1_aa;
504f3efcd01Safresh1my $has_tested_ascii_aa;
505f3efcd01Safresh1my $has_tested_l_above_latin1;
506f3efcd01Safresh1my $has_tested_above_latin1_l;
507f3efcd01Safresh1my $has_tested_ascii_l;
508f3efcd01Safresh1my $has_tested_above_latin1_d;
509f3efcd01Safresh1my $has_tested_ascii_d;
510f3efcd01Safresh1my $has_tested_non_latin1_d;
511f3efcd01Safresh1my $has_tested_above_latin1_a;
512f3efcd01Safresh1my $has_tested_ascii_a;
513f3efcd01Safresh1my $has_tested_non_latin1_a;
514f3efcd01Safresh1
515f3efcd01Safresh1# For use by pairs() in generating combinations
516f3efcd01Safresh1sub prefix {
517f3efcd01Safresh1    my $p = shift;
518f3efcd01Safresh1    map [ $p, $_ ], @_
519f3efcd01Safresh1}
520f3efcd01Safresh1
521f3efcd01Safresh1# Returns all ordered combinations of pairs of elements from the input array.
522f3efcd01Safresh1# It doesn't return pairs like (a, a), (b, b).  Change the slice to an array
523f3efcd01Safresh1# to do that.  This was just to have fewer tests.
524f3efcd01Safresh1sub pairs (@) {
525f3efcd01Safresh1    #print STDERR __LINE__, ": ", join(" XXX ", map { sprintf "%04X", $_ } @_), "\n";
526f3efcd01Safresh1    map { prefix $_[$_], @_[0..$_-1, $_+1..$#_] } 0..$#_
527f3efcd01Safresh1}
528f3efcd01Safresh1
529f3efcd01Safresh1# Finally ready to do the tests
530f3efcd01Safresh1foreach my $tests_ref (\%neg_tests, \%tests) {
531f3efcd01Safresh1foreach my $test (sort { numerically } keys %{$tests_ref}) {
532f3efcd01Safresh1
533f3efcd01Safresh1  my $previous_target;
534f3efcd01Safresh1  my $previous_pattern;
535f3efcd01Safresh1  my @pairs = pairs(sort numerically $test, @{$tests_ref->{$test}});
536f3efcd01Safresh1
537f3efcd01Safresh1  # Each fold can be viewed as a closure of all the characters that
538f3efcd01Safresh1  # participate in it.  Look at each possible pairing from a closure, with the
539f3efcd01Safresh1  # first member of the pair the target string to match against, and the
540f3efcd01Safresh1  # second member forming the pattern.  Thus each fold member gets tested as
541f3efcd01Safresh1  # the string, and the pattern with every other member in the opposite role.
542f3efcd01Safresh1  while (my $pair = shift @pairs) {
543f3efcd01Safresh1    my ($target, $pattern) = @$pair;
544f3efcd01Safresh1
545f3efcd01Safresh1    # When testing a char that doesn't fold, we can get the same
546f3efcd01Safresh1    # permutation twice; so skip all but the first.
547f3efcd01Safresh1    next if $previous_target
548f3efcd01Safresh1            && $previous_target == $target
549f3efcd01Safresh1            && $previous_pattern == $pattern;
550f3efcd01Safresh1    ($previous_target, $previous_pattern) = ($target, $pattern);
551f3efcd01Safresh1
552f3efcd01Safresh1    # Each side may be either a single char or a string.  Extract each into an
553f3efcd01Safresh1    # array (perhaps of length 1)
554f3efcd01Safresh1    my @target, my @pattern;
555f3efcd01Safresh1    @target = (ref $target) ? @$target : $target;
556f3efcd01Safresh1    @pattern = (ref $pattern) ? @$pattern : $pattern;
557f3efcd01Safresh1
558f3efcd01Safresh1    # We are testing just folds to/from a single character.  If our pairs
559f3efcd01Safresh1    # happens to generate multi/multi, skip.
560f3efcd01Safresh1    next if @target > 1 && @pattern > 1;
561f3efcd01Safresh1
562f3efcd01Safresh1    # Get in hex form.
563f3efcd01Safresh1    my @x_target = map { sprintf "\\x{%04X}", $_ } @target;
564f3efcd01Safresh1    my @x_pattern = map { sprintf "\\x{%04X}", $_ } @pattern;
565f3efcd01Safresh1
566f3efcd01Safresh1    my $target_above_latin1 = grep { $_ > 255 } @target;
567f3efcd01Safresh1    my $pattern_above_latin1 = grep { $_ > 255 } @pattern;
568f3efcd01Safresh1    my $target_has_ascii = grep { utf8::native_to_unicode($_) < 128 } @target;
569f3efcd01Safresh1    my $pattern_has_ascii = grep { utf8::native_to_unicode($_) < 128 } @pattern;
570f3efcd01Safresh1    my $target_only_ascii = ! grep { utf8::native_to_unicode($_) > 127 } @target;
571f3efcd01Safresh1    my $pattern_only_ascii = ! grep { utf8::native_to_unicode($_) > 127 } @pattern;
572f3efcd01Safresh1    my $target_has_latin1 = grep { $_ < 256 } @target;
573f3efcd01Safresh1    my $target_has_upper_latin1
574f3efcd01Safresh1                = grep { $_ < 256 && utf8::native_to_unicode($_) > 127 } @target;
575f3efcd01Safresh1    my $pattern_has_upper_latin1
576f3efcd01Safresh1                = grep { $_ < 256 && utf8::native_to_unicode($_) > 127 } @pattern;
577f3efcd01Safresh1    my $pattern_has_latin1 = grep { $_ < 256 } @pattern;
578f3efcd01Safresh1    my $is_self = @target == 1 && @pattern == 1 && $target[0] == $pattern[0];
579f3efcd01Safresh1
580f3efcd01Safresh1    # We don't test multi-char folding into other multi-chars.  We are testing
581f3efcd01Safresh1    # a code point that folds to or from other characters.  Find the single
582f3efcd01Safresh1    # code point for diagnostic purposes.  (If both are single, choose the
583f3efcd01Safresh1    # target string)
584f3efcd01Safresh1    my $ord = @target == 1 ? $target[0] : $pattern[0];
585f3efcd01Safresh1    my $progress = sprintf "%04X: \"%s\" and /%s/",
586f3efcd01Safresh1                            $test,
587f3efcd01Safresh1                            join("", @x_target),
588f3efcd01Safresh1                            join("", @x_pattern);
589f3efcd01Safresh1    #note $progress;
590f3efcd01Safresh1
591f3efcd01Safresh1    # Now grind out tests, using various combinations.
592f3efcd01Safresh1    {
593f3efcd01Safresh1      my $charset_mod = lc $charset;
594f2a19305Safresh1      my $current_locale = ($has_LC_CTYPE)
595f2a19305Safresh1                           ? setlocale(&POSIX::LC_CTYPE)
596f2a19305Safresh1                           : 'C';
597f3efcd01Safresh1      $current_locale = 'C locale' if $current_locale eq 'C';
598f3efcd01Safresh1      $okays = 0;
599f3efcd01Safresh1      $this_iteration = 0;
600f3efcd01Safresh1
601f3efcd01Safresh1      # To cut down somewhat on the enormous quantity of tests this currently
602f3efcd01Safresh1      # runs, skip some for some of the character sets whose results aren't
603f3efcd01Safresh1      # likely to differ from others.  But run all tests on the code points
604f3efcd01Safresh1      # that don't fold, plus one other set in each range group.
605f3efcd01Safresh1      if (! $is_self) {
606f3efcd01Safresh1
607f3efcd01Safresh1        # /aa should only affect things with folds in the ASCII range.  But, try
608f3efcd01Safresh1        # it on one set in the other ranges just to make sure it doesn't break
609f3efcd01Safresh1        # them.
610f3efcd01Safresh1        if ($charset eq 'aa') {
611f3efcd01Safresh1
612f3efcd01Safresh1          # It may be that this $pair of code points to test are both
613f3efcd01Safresh1          # non-ascii, but if either of them actually fold to ascii, that is
614f3efcd01Safresh1          # suspect and should be tested.  So for /aa, use whether their folds
615f3efcd01Safresh1          # are ascii or not
616f3efcd01Safresh1          my $target_has_ascii = $target_has_ascii;
617f3efcd01Safresh1          my $pattern_has_ascii = $pattern_has_ascii;
618f3efcd01Safresh1          if (! $target_has_ascii) {
619f3efcd01Safresh1            foreach my $cp (@target) {
620f3efcd01Safresh1              if (exists $folds{$cp}{1}
621f3efcd01Safresh1                  && grep { utf8::native_to_unicode($_) < 128 } @{$folds{$cp}{1}} )
622f3efcd01Safresh1              {
623f3efcd01Safresh1                  $target_has_ascii = 1;
624f3efcd01Safresh1                  last;
625f3efcd01Safresh1              }
626f3efcd01Safresh1            }
627f3efcd01Safresh1          }
628f3efcd01Safresh1          if (! $pattern_has_ascii) {
629f3efcd01Safresh1            foreach my $cp (@pattern) {
630f3efcd01Safresh1              if (exists $folds{$cp}{1}
631f3efcd01Safresh1                  && grep { utf8::native_to_unicode($_) < 128 } @{$folds{$cp}}{1} )
632f3efcd01Safresh1              {
633f3efcd01Safresh1                  $pattern_has_ascii = 1;
634f3efcd01Safresh1                  last;
635f3efcd01Safresh1              }
636f3efcd01Safresh1            }
637f3efcd01Safresh1          }
638f3efcd01Safresh1
639f3efcd01Safresh1          if (! $target_has_ascii && ! $pattern_has_ascii) {
640f3efcd01Safresh1            if ($target_above_latin1 || $pattern_above_latin1) {
641f3efcd01Safresh1              next if defined $has_tested_aa_above_latin1
642f3efcd01Safresh1                      && $has_tested_aa_above_latin1 != $test;
643f3efcd01Safresh1              $has_tested_aa_above_latin1 = $test;
644f3efcd01Safresh1            }
645f3efcd01Safresh1            next if defined $has_tested_latin1_aa
646f3efcd01Safresh1                    && $has_tested_latin1_aa != $test;
647f3efcd01Safresh1            $has_tested_latin1_aa = $test;
648f3efcd01Safresh1          }
649f3efcd01Safresh1          elsif ($target_only_ascii && $pattern_only_ascii) {
650f3efcd01Safresh1
651f3efcd01Safresh1              # And, except for one set just to make sure, skip tests
652f3efcd01Safresh1              # where both elements in the pair are ASCII.  If one works for
653f3efcd01Safresh1              # aa, the others are likely too.  This skips tests where the
654f3efcd01Safresh1              # fold is from non-ASCII to ASCII, but this part of the test
655f3efcd01Safresh1              # is just about the ASCII components.
656f3efcd01Safresh1              next if defined $has_tested_ascii_l
657f3efcd01Safresh1                      && $has_tested_ascii_l != $test;
658f3efcd01Safresh1              $has_tested_ascii_l = $test;
659f3efcd01Safresh1          }
660f3efcd01Safresh1        }
661f3efcd01Safresh1        elsif ($charset eq 'l') {
662f3efcd01Safresh1
663f3efcd01Safresh1          # For l, don't need to test beyond one set those things that are
664f3efcd01Safresh1          # all above latin1, because unlikely to have different successes
665f3efcd01Safresh1          # than /u.  But, for the same reason as described in the /aa above,
666f3efcd01Safresh1          # it is suspect and should be tested, if either of the folds are to
667f3efcd01Safresh1          # latin1.
668f3efcd01Safresh1          my $target_has_latin1 = $target_has_latin1;
669f3efcd01Safresh1          my $pattern_has_latin1 = $pattern_has_latin1;
670f3efcd01Safresh1          if (! $target_has_latin1) {
671f3efcd01Safresh1            foreach my $cp (@target) {
672f3efcd01Safresh1              if (exists $folds{$cp}{1}
673f3efcd01Safresh1                  && grep { $_ < 256 } @{$folds{$cp}{1}} )
674f3efcd01Safresh1              {
675f3efcd01Safresh1                $target_has_latin1 = 1;
676f3efcd01Safresh1                last;
677f3efcd01Safresh1              }
678f3efcd01Safresh1            }
679f3efcd01Safresh1          }
680f3efcd01Safresh1          if (! $pattern_has_latin1) {
681f3efcd01Safresh1            foreach my $cp (@pattern) {
682f3efcd01Safresh1              if (exists $folds{$cp}{1}
683f3efcd01Safresh1                  && grep { $_ < 256 } @{$folds{$cp}{1}} )
684f3efcd01Safresh1              {
685f3efcd01Safresh1                $pattern_has_latin1 = 1;
686f3efcd01Safresh1                last;
687f3efcd01Safresh1              }
688f3efcd01Safresh1            }
689f3efcd01Safresh1          }
690f3efcd01Safresh1          if (! $target_has_latin1 && ! $pattern_has_latin1) {
691f3efcd01Safresh1            next if defined $has_tested_above_latin1_l
692f3efcd01Safresh1                    && $has_tested_above_latin1_l != $test;
693f3efcd01Safresh1            $has_tested_above_latin1_l = $test;
694f3efcd01Safresh1          }
695f3efcd01Safresh1          elsif ($target_only_ascii && $pattern_only_ascii) {
696f3efcd01Safresh1
697f3efcd01Safresh1              # And, except for one set just to make sure, skip tests
698f3efcd01Safresh1              # where both elements in the pair are ASCII.  This is
699f3efcd01Safresh1              # essentially the same reasoning as above for /aa.
700f3efcd01Safresh1              next if defined $has_tested_ascii_l
701f3efcd01Safresh1                      && $has_tested_ascii_l != $test;
702f3efcd01Safresh1              $has_tested_ascii_l = $test;
703f3efcd01Safresh1          }
704f3efcd01Safresh1        }
705f3efcd01Safresh1        elsif ($charset eq 'd') {
706f3efcd01Safresh1          # Similarly for d.  Beyond one test (besides self) each, we  don't
707f3efcd01Safresh1          # test pairs that are both ascii; or both above latin1, or are
708f3efcd01Safresh1          # combinations of ascii and above latin1.
709f3efcd01Safresh1          if (! $target_has_upper_latin1 && ! $pattern_has_upper_latin1) {
710f3efcd01Safresh1            if ($target_has_ascii && $pattern_has_ascii) {
711f3efcd01Safresh1              next if defined $has_tested_ascii_d
712f3efcd01Safresh1                      && $has_tested_ascii_d != $test;
713f3efcd01Safresh1              $has_tested_ascii_d = $test
714f3efcd01Safresh1            }
715f3efcd01Safresh1            elsif (! $target_has_latin1 && ! $pattern_has_latin1) {
716f3efcd01Safresh1              next if defined $has_tested_above_latin1_d
717f3efcd01Safresh1                      && $has_tested_above_latin1_d != $test;
718f3efcd01Safresh1              $has_tested_above_latin1_d = $test;
719f3efcd01Safresh1            }
720f3efcd01Safresh1            else {
721f3efcd01Safresh1              next if defined $has_tested_non_latin1_d
722f3efcd01Safresh1                      && $has_tested_non_latin1_d != $test;
723f3efcd01Safresh1              $has_tested_non_latin1_d = $test;
724f3efcd01Safresh1            }
725f3efcd01Safresh1          }
726f3efcd01Safresh1        }
727f3efcd01Safresh1        elsif ($charset eq 'a') {
728f3efcd01Safresh1          # Similarly for a.  This should match identically to /u, so wasn't
729f3efcd01Safresh1          # tested at all until a bug was found that was thereby missed.
730f3efcd01Safresh1          # As a compromise, beyond one test (besides self) each, we  don't
731f3efcd01Safresh1          # test pairs that are both ascii; or both above latin1, or are
732f3efcd01Safresh1          # combinations of ascii and above latin1.
733f3efcd01Safresh1          if (! $target_has_upper_latin1 && ! $pattern_has_upper_latin1) {
734f3efcd01Safresh1            if ($target_has_ascii && $pattern_has_ascii) {
735f3efcd01Safresh1              next if defined $has_tested_ascii_a
736f3efcd01Safresh1                      && $has_tested_ascii_a != $test;
737f3efcd01Safresh1              $has_tested_ascii_a = $test
738f3efcd01Safresh1            }
739f3efcd01Safresh1            elsif (! $target_has_latin1 && ! $pattern_has_latin1) {
740f3efcd01Safresh1              next if defined $has_tested_above_latin1_a
741f3efcd01Safresh1                      && $has_tested_above_latin1_a != $test;
742f3efcd01Safresh1              $has_tested_above_latin1_a = $test;
743f3efcd01Safresh1            }
744f3efcd01Safresh1            else {
745f3efcd01Safresh1              next if defined $has_tested_non_latin1_a
746f3efcd01Safresh1                      && $has_tested_non_latin1_a != $test;
747f3efcd01Safresh1              $has_tested_non_latin1_a = $test;
748f3efcd01Safresh1            }
749f3efcd01Safresh1          }
750f3efcd01Safresh1        }
751f3efcd01Safresh1      }
752f3efcd01Safresh1
753f3efcd01Safresh1      foreach my $utf8_target (0, 1) {    # Both utf8 and not, for
754f3efcd01Safresh1                                          # code points < 256
755f3efcd01Safresh1        my $upgrade_target = "";
756f3efcd01Safresh1
757f3efcd01Safresh1        # These must already be in utf8 because the string to match has
758f3efcd01Safresh1        # something above latin1.  So impossible to test if to not to be in
759f3efcd01Safresh1        # utf8; and otherwise, no upgrade is needed.
760f3efcd01Safresh1        next if $target_above_latin1 && ! $utf8_target;
761f3efcd01Safresh1        $upgrade_target = ' utf8::upgrade($c);' if ! $target_above_latin1 && $utf8_target;
762f3efcd01Safresh1
763f3efcd01Safresh1        foreach my $utf8_pattern (0, 1) {
764f3efcd01Safresh1          next if $pattern_above_latin1 && ! $utf8_pattern;
765f3efcd01Safresh1
766f3efcd01Safresh1          # Our testing of 'l' uses the POSIX locale, which is ASCII-only
767f3efcd01Safresh1          my $uni_semantics = $charset ne 'l' && (    $utf8_target
768f3efcd01Safresh1                                                  ||  $charset eq 'u'
769f3efcd01Safresh1                                                  ||  $charset eq 'L'
770f3efcd01Safresh1                                                  || ($charset eq 'd' && $utf8_pattern)
771f3efcd01Safresh1                                                  ||  $charset =~ /a/);
772f3efcd01Safresh1          my $upgrade_pattern = "";
773f2a19305Safresh1          $upgrade_pattern = ' utf8::upgrade($rhs);'
774f2a19305Safresh1            if ! $pattern_above_latin1 && $utf8_pattern;
775f3efcd01Safresh1
776f3efcd01Safresh1          my $lhs = join "", @x_target;
777f3efcd01Safresh1          my $lhs_str = eval qq{"$lhs"}; fail($@) if $@;
778f3efcd01Safresh1          my @rhs = @x_pattern;
779f3efcd01Safresh1          my $rhs = join "", @rhs;
780f3efcd01Safresh1
781f3efcd01Safresh1          # Unicode created a folding rule that partially emulates what
782f3efcd01Safresh1          # happens in a Turkish locale, by using combining characters.  The
783f3efcd01Safresh1          # result is close enough to what really should happen, that it can
784f3efcd01Safresh1          # pass many of the tests, but not all.  So, if we have a rule that
785f3efcd01Safresh1          # is expecting failure, it may pass instead.  The code in the block
786f3efcd01Safresh1          # below is good enough for skipping the tests, and khw tried to make
787f3efcd01Safresh1          # it general, but should the rules be revised (unlikely at this
788f3efcd01Safresh1          # point), this might need to be tweaked.
789f3efcd01Safresh1          if ($tests_ref == \%neg_tests) {
790f3efcd01Safresh1            my ($shorter_ref, $longer_ref);
791f3efcd01Safresh1
792f3efcd01Safresh1            # Convert the $rhs to a string, like we already did for the lhs
793f3efcd01Safresh1            my $rhs_str = eval qq{"$rhs"}; fail($@) if $@;
794f3efcd01Safresh1
795f3efcd01Safresh1            # If the lengths of the two sides are equal, we don't want to do
796f3efcd01Safresh1            # this; this is only to bypass the combining characters affecting
797f3efcd01Safresh1            # things
798f3efcd01Safresh1            if (length $lhs_str != length $rhs_str) {
799f3efcd01Safresh1
800f3efcd01Safresh1              # Find the shorter and longer of the pair
801f3efcd01Safresh1              if (length $lhs_str < length $rhs_str) {
802f3efcd01Safresh1                  $shorter_ref = \$lhs_str;
803f3efcd01Safresh1                  $longer_ref = \$rhs_str;
804f3efcd01Safresh1              }
805f3efcd01Safresh1              else {
806f3efcd01Safresh1                  $shorter_ref = \$rhs_str;
807f3efcd01Safresh1                  $longer_ref = \$lhs_str;
808f3efcd01Safresh1              }
809f3efcd01Safresh1
810f3efcd01Safresh1              # If the shorter string is entirely contained in the longer, we
811f3efcd01Safresh1              # have generated a test that is likely to succeed, and the
812f3efcd01Safresh1              # reasons it would fail have nothing to do with folding.  But we
813f3efcd01Safresh1              # are expecting it to fail, and so our test is invalid.  Skip
814f3efcd01Safresh1              # it.
815f3efcd01Safresh1              next if index($$longer_ref, $$shorter_ref) >= 0;
816f3efcd01Safresh1
817f3efcd01Safresh1
818f3efcd01Safresh1              # The above eliminates about half the failure cases.  This gets
819f3efcd01Safresh1              # the rest.  If the shorter string is a single character and has
820f3efcd01Safresh1              # a fold legal in this run to a character that is in the longer
821f3efcd01Safresh1              # string, it is also likely to succeed under /i.  So again our
822f3efcd01Safresh1              # computed test is bogus.
823f3efcd01Safresh1              if (   length $$shorter_ref == 1
824f3efcd01Safresh1                  && exists $folds{ord $$shorter_ref}{1})
825f3efcd01Safresh1              {
826f3efcd01Safresh1                my @folded_to = @{$folds{ord $$shorter_ref}{1}};
827f3efcd01Safresh1                next if   @folded_to == 1
828f3efcd01Safresh1                       && index($$longer_ref, chr $folded_to[0]) >= 0;
829f3efcd01Safresh1              }
830f3efcd01Safresh1            }
831f3efcd01Safresh1          }
832f3efcd01Safresh1
833f3efcd01Safresh1          my $should_fail = (! $uni_semantics && $ord < 256 && ! $is_self && utf8::native_to_unicode($ord) >= 128)
834f3efcd01Safresh1                            || ($charset eq 'aa' && $target_has_ascii != $pattern_has_ascii)
835f3efcd01Safresh1                            || ($charset eq 'l' && $target_has_latin1 != $pattern_has_latin1)
836f3efcd01Safresh1                            || $tests_ref == \%neg_tests;
837f3efcd01Safresh1
838f3efcd01Safresh1          # Do simple tests of referencing capture buffers, named and
839f3efcd01Safresh1          # numbered.
840f3efcd01Safresh1          my $op = '=~';
841f3efcd01Safresh1          $op = '!~' if $should_fail;
842f3efcd01Safresh1
843f3efcd01Safresh1          my $todo = 0;  # No longer any todo's
844f2a19305Safresh1          my $eval = "my \$c = \"$lhs$rhs\"; my \$rhs = \"$rhs\"; "
845f2a19305Safresh1                   . $upgrade_pattern
846f2a19305Safresh1                   . " my \$p = qr/(?$charset_mod:^(\$rhs)\\1\$)/i;"
847f2a19305Safresh1                   . "$upgrade_target \$c $op \$p";
848f3efcd01Safresh1          run_test($eval, $todo, ($charset_mod eq 'l'), "");
849f3efcd01Safresh1
850f2a19305Safresh1          $eval = "my \$c = \"$lhs$rhs\"; my \$rhs = \"$rhs\"; "
851f2a19305Safresh1                . $upgrade_pattern
852f2a19305Safresh1                . " my \$p = qr/(?$charset_mod:^(?<grind>\$rhs)\\k<grind>\$)/i;"
853f2a19305Safresh1                . "$upgrade_target \$c $op \$p";
854f3efcd01Safresh1          run_test($eval, $todo, ($charset_mod eq 'l'), "");
855f3efcd01Safresh1
856f3efcd01Safresh1          if ($lhs ne $rhs) {
857f2a19305Safresh1            $eval = "my \$c = \"$rhs$lhs\"; my \$rhs = \"$rhs\"; "
858f2a19305Safresh1                  . $upgrade_pattern
859f2a19305Safresh1                  . " my \$p = qr/(?$charset_mod:^(\$rhs)\\1\$)/i;"
860f2a19305Safresh1                  . "$upgrade_target \$c $op \$p";
861f3efcd01Safresh1            run_test($eval, "", ($charset_mod eq 'l'), "");
862f3efcd01Safresh1
863f2a19305Safresh1            $eval = "my \$c = \"$rhs$lhs\"; my \$rhs = \"$rhs\"; "
864f2a19305Safresh1                  . $upgrade_pattern
865f2a19305Safresh1                  . " my \$p = qr/(?$charset_mod:^(?<grind>\$rhs)\\k<grind>\$)/i;"
866f2a19305Safresh1                  . "$upgrade_target \$c $op \$p";
867f3efcd01Safresh1            run_test($eval, "", ($charset_mod eq 'l'), "");
868f3efcd01Safresh1          }
869f3efcd01Safresh1
870f3efcd01Safresh1          # See if works on what could be a simple trie.
871f3efcd01Safresh1          my $alternate;
872f3efcd01Safresh1          {
873f3efcd01Safresh1            # Keep the alternate | branch the same length as the tested one so
874f3efcd01Safresh1            # that it's length doesn't influence things
875f3efcd01Safresh1            my $evaled = eval "\"$rhs\"";   # Convert e.g. \x{foo} into its
876f3efcd01Safresh1                                            # chr equivalent
877f3efcd01Safresh1            use bytes;
878f3efcd01Safresh1            $alternate = 'q' x length $evaled;
879f3efcd01Safresh1          }
880f2a19305Safresh1          $eval = "my \$c = \"$lhs\"; my \$rhs = \"$rhs\"; "
881f2a19305Safresh1                . $upgrade_pattern
882f2a19305Safresh1                . " my \$p = qr/\$rhs|$alternate/i$charset_mod;"
883f2a19305Safresh1                . "$upgrade_target \$c $op \$p";
884f3efcd01Safresh1          run_test($eval, "", ($charset_mod eq 'l'), "");
885f3efcd01Safresh1
886f3efcd01Safresh1          # Check that works when the folded character follows something that
887f3efcd01Safresh1          # is quantified.  This test knows the regex code internals to the
888f3efcd01Safresh1          # extent that it knows this is a potential problem, and that there
889f3efcd01Safresh1          # are three different types of quantifiers generated: 1) The thing
890f3efcd01Safresh1          # being quantified matches a single character; 2) it matches more
891f3efcd01Safresh1          # than one character, but is fixed width; 3) it can match a variable
892f3efcd01Safresh1          # number of characters.  (It doesn't know that case 3 shouldn't
893f3efcd01Safresh1          # matter, since it doesn't do anything special for the character
894f3efcd01Safresh1          # following the quantifier; nor that some of the different
895f3efcd01Safresh1          # quantifiers execute the same underlying code, as these tests are
896f3efcd01Safresh1          # quick, and this insulates these tests from changes in the
897f3efcd01Safresh1          # implementation.)
898f3efcd01Safresh1          for my $quantifier ('?', '??', '*', '*?', '+', '+?', '{1,2}', '{1,2}?') {
899f2a19305Safresh1            $eval = "my \$c = \"_$lhs\"; my \$rhs = \"$rhs\"; $upgrade_pattern "
900f2a19305Safresh1                  . "my \$p = qr/(?$charset_mod:.$quantifier\$rhs)/i;"
901f2a19305Safresh1                  . "$upgrade_target \$c $op \$p";
902f3efcd01Safresh1            run_test($eval, "", ($charset_mod eq 'l'), "");
903f2a19305Safresh1            $eval = "my \$c = \"__$lhs\"; my \$rhs = \"$rhs\"; $upgrade_pattern "
904f2a19305Safresh1                  . "my \$p = qr/(?$charset_mod:(?:..)$quantifier\$rhs)/i;"
905f2a19305Safresh1                  . "$upgrade_target \$c $op \$p";
906f3efcd01Safresh1            run_test($eval, "", ($charset_mod eq 'l'), "");
907f2a19305Safresh1            $eval = "my \$c = \"__$lhs\"; my \$rhs = \"$rhs\"; $upgrade_pattern "
908f2a19305Safresh1                  . "my \$p = qr/(?$charset_mod:(?:.|\\R)$quantifier\$rhs)/i;"
909f2a19305Safresh1                  . "$upgrade_target \$c $op \$p";
910f3efcd01Safresh1            run_test($eval, "", ($charset_mod eq 'l'), "");
911f3efcd01Safresh1          }
912f3efcd01Safresh1
913f3efcd01Safresh1          foreach my $bracketed (0, 1) {   # Put rhs in [...], or not
914f3efcd01Safresh1            next if $bracketed && @pattern != 1;    # bracketed makes these
915f3efcd01Safresh1                                                    # or's instead of a sequence
916f3efcd01Safresh1            foreach my $optimize_bracketed (0, 1) {
917f3efcd01Safresh1              next if $optimize_bracketed && ! $bracketed;
918f3efcd01Safresh1              foreach my $inverted (0,1) {
919f3efcd01Safresh1                  next if $inverted && ! $bracketed;  # inversion only valid
920f3efcd01Safresh1                                                      # in [^...]
921f3efcd01Safresh1                  next if $inverted && @target != 1;  # [perl #89750] multi-char
922f3efcd01Safresh1                                                      # not valid in [^...]
923f3efcd01Safresh1
924f3efcd01Safresh1                # In some cases, add an extra character that doesn't fold, and
925f3efcd01Safresh1                # looks ok in the output.
926f3efcd01Safresh1                my $extra_char = "_";
927f3efcd01Safresh1                foreach my $prepend ("", $extra_char) {
928f3efcd01Safresh1                  foreach my $append ("", $extra_char) {
929f3efcd01Safresh1
930f3efcd01Safresh1                    # Assemble the rhs.  Put each character in a separate
931f3efcd01Safresh1                    # bracketed if using charclasses.  This creates a stress on
932f3efcd01Safresh1                    # the code to span a match across multiple elements
933f3efcd01Safresh1                    my $rhs = "";
934f3efcd01Safresh1                    foreach my $rhs_char (@rhs) {
935f3efcd01Safresh1                        $rhs .= '[' if $bracketed;
936f3efcd01Safresh1                        $rhs .= '^' if $inverted;
937f3efcd01Safresh1                        $rhs .=  $rhs_char;
938f3efcd01Safresh1
939f3efcd01Safresh1                        # Add a character to the class, so class doesn't get
940f3efcd01Safresh1                        # optimized out, unless we are testing that optimization
941f3efcd01Safresh1                        $rhs .= '_' if $optimize_bracketed;
942f3efcd01Safresh1                        $rhs .= ']' if $bracketed;
943f3efcd01Safresh1                    }
944f3efcd01Safresh1
945f3efcd01Safresh1                    # Add one of: no capturing parens
946f3efcd01Safresh1                    #             a single set
947f3efcd01Safresh1                    #             a nested set
948f3efcd01Safresh1                    # Use quantifiers and extra variable width matches inside
949f3efcd01Safresh1                    # them to keep some optimizations from happening
950f3efcd01Safresh1                    foreach my $parend (0, 1, 2) {
951f3efcd01Safresh1                      my $interior = (! $parend)
952f3efcd01Safresh1                                      ? $rhs
953f3efcd01Safresh1                                      : ($parend == 1)
954f3efcd01Safresh1                                          ? "(${rhs},?)"
955f3efcd01Safresh1                                          : "((${rhs})+,?)";
956f3efcd01Safresh1                      foreach my $quantifier ("", '?', '*', '+', '{1,3}') {
957f3efcd01Safresh1
958f3efcd01Safresh1                        # Perhaps should be TODOs, as are unimplemented, but
959f3efcd01Safresh1                        # maybe will never be implemented
960f3efcd01Safresh1                        next if @pattern != 1 && $quantifier;
961f3efcd01Safresh1
962f3efcd01Safresh1                        # A ? or * quantifier normally causes the thing to be
963f3efcd01Safresh1                        # able to match a null string
964f3efcd01Safresh1                        my $quantifier_can_match_null = $quantifier eq '?'
965f3efcd01Safresh1                                                     || $quantifier eq '*';
966f3efcd01Safresh1
967f3efcd01Safresh1                        # But since we only quantify the last character in a
968f3efcd01Safresh1                        # multiple fold, the other characters will have width,
969f3efcd01Safresh1                        # except if we are quantifying the whole rhs
970f3efcd01Safresh1                        my $can_match_null = $quantifier_can_match_null
971f3efcd01Safresh1                                             && (@rhs == 1 || $parend);
972f3efcd01Safresh1
973f3efcd01Safresh1                        foreach my $l_anchor ("", '^') { # '\A' didn't change
974f3efcd01Safresh1                                                         # result)
975f3efcd01Safresh1                          foreach my $r_anchor ("", '$') { # '\Z', '\z' didn't
976f3efcd01Safresh1                                                           # change result)
977f3efcd01Safresh1                            # The folded part can match the null string if it
978f3efcd01Safresh1                            # isn't required to have width, and there's not
979f3efcd01Safresh1                            # something on one or both sides that force it to.
980f3efcd01Safresh1                            my $both_sides = ($l_anchor && $r_anchor)
981f3efcd01Safresh1                                              || ($l_anchor && $append)
982f3efcd01Safresh1                                              || ($r_anchor && $prepend)
983f3efcd01Safresh1                                              || ($prepend && $append);
984f3efcd01Safresh1                            my $must_match = ! $can_match_null || $both_sides;
985f3efcd01Safresh1                            # for performance, but doing this missed many failures
986f3efcd01Safresh1                            #next unless $must_match;
987f2a19305Safresh1                            my $quantified = "(?$charset_mod:$l_anchor$prepend"
988f2a19305Safresh1                                           . "$interior${quantifier}$append$r_anchor)";
989f3efcd01Safresh1                            my $op;
990f3efcd01Safresh1                            if ($must_match && $should_fail)  {
991f3efcd01Safresh1                                $op = 0;
992f3efcd01Safresh1                            } else {
993f3efcd01Safresh1                                $op = 1;
994f3efcd01Safresh1                            }
995f3efcd01Safresh1                            $op = ! $op if $must_match && $inverted;
996f3efcd01Safresh1
997f3efcd01Safresh1                            if ($inverted && @target > 1) {
998f3efcd01Safresh1                              # When doing an inverted match against a
999f3efcd01Safresh1                              # multi-char target, and there is not something on
1000f3efcd01Safresh1                              # the left to anchor the match, if it shouldn't
1001f3efcd01Safresh1                              # succeed, skip, as what will happen (when working
1002f3efcd01Safresh1                              # correctly) is that it will match the first
1003f3efcd01Safresh1                              # position correctly, and then be inverted to not
1004f3efcd01Safresh1                              # match; then it will go to the second position
1005f3efcd01Safresh1                              # where it won't match, but get inverted to match,
1006f3efcd01Safresh1                              # and hence succeeding.
1007f3efcd01Safresh1                              next if ! ($l_anchor || $prepend) && ! $op;
1008f3efcd01Safresh1
1009f3efcd01Safresh1                              # Can't ever match for latin1 code points non-uni
1010f3efcd01Safresh1                              # semantics that have a inverted multi-char fold
1011f3efcd01Safresh1                              # when there is something on both sides and the
1012f3efcd01Safresh1                              # quantifier isn't such as to span the required
1013f3efcd01Safresh1                              # width, which is 2 or 3.
1014f3efcd01Safresh1                              $op = 0 if $ord < 255
1015f3efcd01Safresh1                                        && ! $uni_semantics
1016f3efcd01Safresh1                                        && $both_sides
1017f3efcd01Safresh1                                        && ( ! $quantifier || $quantifier eq '?')
1018f3efcd01Safresh1                                        && $parend < 2;
1019f3efcd01Safresh1
1020f3efcd01Safresh1                              # Similarly can't ever match when inverting a
1021f3efcd01Safresh1                              # multi-char fold for /aa and the quantifier
1022f3efcd01Safresh1                              # isn't sufficient to allow it to span to both
1023f3efcd01Safresh1                              # sides.
1024f3efcd01Safresh1                              $op = 0 if $target_has_ascii
1025f3efcd01Safresh1                                         && $charset eq 'aa'
1026f3efcd01Safresh1                                         && $both_sides
1027f3efcd01Safresh1                                         && ( ! $quantifier || $quantifier eq '?')
1028f3efcd01Safresh1                                         && $parend < 2;
1029f3efcd01Safresh1
1030f3efcd01Safresh1                              # Or for /l
1031f3efcd01Safresh1                              $op = 0 if $target_has_latin1 && $charset eq 'l'
1032f3efcd01Safresh1                                      && $both_sides
1033f3efcd01Safresh1                                      && ( ! $quantifier || $quantifier eq '?')
1034f3efcd01Safresh1                                      && $parend < 2;
1035f3efcd01Safresh1                            }
1036f3efcd01Safresh1
1037f3efcd01Safresh1
1038f3efcd01Safresh1                            my $desc = "";
1039f3efcd01Safresh1                            if ($charset_mod eq 'l') {
1040f3efcd01Safresh1                                $desc .= 'setlocale(LC_CTYPE, "'
1041f3efcd01Safresh1                                        . POSIX::setlocale(&POSIX::LC_CTYPE)
1042f3efcd01Safresh1                                        . '"); '
1043f3efcd01Safresh1                            }
1044f3efcd01Safresh1                            $desc .= "my \$c = \"$prepend$lhs$append\"; "
1045f2a19305Safresh1                                    . "my \$rhs = \"\"; $upgrade_pattern"
1046f2a19305Safresh1                                    . "my \$p = qr/$quantified\$rhs/i;"
1047f2a19305Safresh1                                    . "$upgrade_target "
1048f3efcd01Safresh1                                    . "\$c " . ($op ? "=~" : "!~") . " \$p; ";
1049f3efcd01Safresh1                            if ($DEBUG) {
1050f3efcd01Safresh1                              $desc .= (
1051f3efcd01Safresh1                              "; uni_semantics=$uni_semantics, "
1052f3efcd01Safresh1                              . "should_fail=$should_fail, "
1053f3efcd01Safresh1                              . "bracketed=$bracketed, "
1054f3efcd01Safresh1                              . "prepend=$prepend, "
1055f3efcd01Safresh1                              . "append=$append, "
1056f3efcd01Safresh1                              . "parend=$parend, "
1057f3efcd01Safresh1                              . "quantifier=$quantifier, "
1058f3efcd01Safresh1                              . "l_anchor=$l_anchor, "
1059f3efcd01Safresh1                              . "r_anchor=$r_anchor; "
1060f3efcd01Safresh1                              . "pattern_above_latin1=$pattern_above_latin1; "
1061f3efcd01Safresh1                              . "utf8_pattern=$utf8_pattern"
1062f3efcd01Safresh1                              );
1063f3efcd01Safresh1                            }
1064f3efcd01Safresh1
1065f3efcd01Safresh1                            my $c = "$prepend$lhs_str$append";
1066f2a19305Safresh1                            my $p = "$quantified"; # string copy deliberate
1067f3efcd01Safresh1                            utf8::upgrade($c) if length($upgrade_target);
1068f3efcd01Safresh1                            utf8::upgrade($p) if length($upgrade_pattern);
1069f2a19305Safresh1                            $p = qr/$p/i;
1070f3efcd01Safresh1                            my $res = $op ? ($c =~ $p): ($c !~ $p);
1071f3efcd01Safresh1
1072f3efcd01Safresh1                            if (!$res || $list_all_tests) {
1073f3efcd01Safresh1                              # Failed or debug; output the result
1074f3efcd01Safresh1                              $count++;
1075f3efcd01Safresh1                              ok($res, "test $count - $desc");
1076f3efcd01Safresh1                            } else {
1077f3efcd01Safresh1                              # Just count the test as passed
1078f3efcd01Safresh1                              $okays++;
1079f3efcd01Safresh1                            }
1080f3efcd01Safresh1                            $this_iteration++;
1081f3efcd01Safresh1                          }
1082f3efcd01Safresh1                        }
1083f3efcd01Safresh1                      }
1084f3efcd01Safresh1                    }
1085f3efcd01Safresh1                  }
1086f3efcd01Safresh1                }
1087f3efcd01Safresh1              }
1088f3efcd01Safresh1            }
1089f3efcd01Safresh1          }
1090f3efcd01Safresh1        }
1091f3efcd01Safresh1      }
1092f3efcd01Safresh1      unless($list_all_tests) {
1093f3efcd01Safresh1        $count++;
1094f3efcd01Safresh1        is $okays, $this_iteration, "$okays subtests ok for"
1095f3efcd01Safresh1          . " /$charset_mod"
1096f3efcd01Safresh1          . (($charset_mod eq 'l') ? " ($current_locale)" : "")
1097f3efcd01Safresh1          . ', target="' . join("", @x_target) . '",'
1098f3efcd01Safresh1          . ' pat="' . join("", @x_pattern) . '"';
1099f3efcd01Safresh1      }
1100f3efcd01Safresh1    }
1101f3efcd01Safresh1  }
1102f3efcd01Safresh1}
1103f3efcd01Safresh1}
1104f3efcd01Safresh1
1105f3efcd01Safresh1plan($count);
1106f3efcd01Safresh1
1107f3efcd01Safresh11
1108