xref: /openbsd-src/gnu/usr.bin/perl/t/re/regexp_unicode_prop.t (revision 56d68f1e19ff848c889ecfa71d3a06340ff64892)
1b39c5158Smillert#!./perl
2b39c5158Smillert#
3b39c5158Smillert# Tests that have to do with checking whether characters have (or not have)
4b39c5158Smillert# certain Unicode properties; belong (or not belong) to blocks, scripts, etc.
5*56d68f1eSafresh1# including user-defined properties
6b39c5158Smillert#
7b39c5158Smillert
8b39c5158Smillertuse strict;
9b39c5158Smillertuse warnings;
10b46d8ef2Safresh1use v5.16;
11b46d8ef2Safresh1use utf8;
12b46d8ef2Safresh1
13b46d8ef2Safresh1# To verify that messages containing the expansions work on UTF-8
14b46d8ef2Safresh1my $utf8_comment;
15b46d8ef2Safresh1
16b46d8ef2Safresh1my @warnings;
17b46d8ef2Safresh1local $SIG {__WARN__} = sub {push @warnings, "@_"};
18b39c5158Smillert
19898184e3SsthenBEGIN {
20b8851fccSafresh1    chdir 't' if -d 't';
21898184e3Ssthen    require './test.pl';
22898184e3Ssthen    skip_all_if_miniperl("no dynamic loading on miniperl, no File::Spec (used by charnames)");
23898184e3Ssthen}
24b39c5158Smillert
25b39c5158Smillertsub run_tests;
26b39c5158Smillert
27b46d8ef2Safresh1sub get_str_name($) {
28b46d8ef2Safresh1    my $char = shift;
29b46d8ef2Safresh1
30b46d8ef2Safresh1    my ($str, $name);
31b46d8ef2Safresh1
32b46d8ef2Safresh1    if ($char =~ /^\\/) {
33b46d8ef2Safresh1        $str  = eval qq ["$char"];
34b46d8ef2Safresh1        $name =      qq ["$char"];
35b46d8ef2Safresh1    }
36b46d8ef2Safresh1    elsif ($char =~ /^0x([0-9A-Fa-f]+)$/) {
37b46d8ef2Safresh1        $str  =  chr hex $1;
38b46d8ef2Safresh1        $name = "chr ($char)";
39b46d8ef2Safresh1    }
40b46d8ef2Safresh1    else {
41b46d8ef2Safresh1        $str  =      $char;
42b46d8ef2Safresh1        $name = qq ["$char"];
43b46d8ef2Safresh1    }
44b46d8ef2Safresh1
45b46d8ef2Safresh1    return ($str, $name);
46b46d8ef2Safresh1}
47b46d8ef2Safresh1
48b39c5158Smillert#
49b39c5158Smillert# This is the data to test.
50b39c5158Smillert#
51b39c5158Smillert# This is a hash; keys are the property to test.
52b39c5158Smillert# Values are arrays containing characters to test. The characters can
53b39c5158Smillert# have the following formats:
54b39c5158Smillert#   '\N{CHARACTER NAME}'  -  Use character with that name
55b39c5158Smillert#   '\x{1234}'            -  Use character with that hex escape
56b39c5158Smillert#   '0x1234'              -  Use chr() to get that character
57b39c5158Smillert#   "a"                   -  Character to use
58b39c5158Smillert#
59b39c5158Smillert# If a character entry starts with ! the character does not belong to the class
60b39c5158Smillert#
61b39c5158Smillert# If the class is just single letter, we use both \pL and \p{L}
62b39c5158Smillert#
63b39c5158Smillert
64b39c5158Smillertuse charnames ':full';
65b39c5158Smillert
66b39c5158Smillertmy @CLASSES = (
67b39c5158Smillert    L                         => ["a", "A"],
68b39c5158Smillert    Ll                        => ["b", "!B"],
69b39c5158Smillert    Lu                        => ["!c", "C"],
70b39c5158Smillert    IsLl                      => ["d", "!D"],
71b39c5158Smillert    IsLu                      => ["!e", "E"],
72b39c5158Smillert    LC                        => ["f", "!1"],
73b39c5158Smillert   'L&'                       => ["g", "!2"],
74b39c5158Smillert   'Lowercase Letter'         => ["h", "!H"],
75b39c5158Smillert
76b39c5158Smillert    Common                    => ["!i", "3"],
77b39c5158Smillert    Inherited                 => ["!j", '\x{300}'],
78b39c5158Smillert
79b39c5158Smillert    InBasicLatin              => ['\N{LATIN CAPITAL LETTER A}'],
80b39c5158Smillert    InLatin1Supplement        => ['\N{LATIN CAPITAL LETTER A WITH GRAVE}'],
81b39c5158Smillert    InLatinExtendedA          => ['\N{LATIN CAPITAL LETTER A WITH MACRON}'],
82b39c5158Smillert    InLatinExtendedB          => ['\N{LATIN SMALL LETTER B WITH STROKE}'],
83b39c5158Smillert    InKatakana                => ['\N{KATAKANA LETTER SMALL A}'],
84b39c5158Smillert    IsLatin                   => ["0x100", "0x212b"],
85b39c5158Smillert    IsHebrew                  => ["0x5d0", "0xfb4f"],
86b39c5158Smillert    IsGreek                   => ["0x37a", "0x386", "!0x387", "0x388",
87b39c5158Smillert                                  "0x38a", "!0x38b", "0x38c"],
88b39c5158Smillert    HangulSyllables           => ['\x{AC00}'],
89b39c5158Smillert   'Script=Latin'             => ['\x{0100}'],
90b39c5158Smillert   'Block=LatinExtendedA'     => ['\x{0100}'],
91b39c5158Smillert   'Category=UppercaseLetter' => ['\x{0100}'],
92b39c5158Smillert
93b39c5158Smillert    #
94b39c5158Smillert    # It's ok to repeat class names.
95b39c5158Smillert    #
96b39c5158Smillert    InLatin1Supplement        =>
97b8851fccSafresh1                            ['!\N{U+7f}',  '\N{U+80}',  '\N{U+ff}', '!\x{100}'],
98b39c5158Smillert    InLatinExtendedA          =>
99b8851fccSafresh1                            ['!\N{U+7f}', '!\N{U+80}', '!\N{U+ff}',  '\x{100}'],
100b39c5158Smillert
101b39c5158Smillert    #
102b39c5158Smillert    # Properties are case-insensitive, and may have whitespace,
103b39c5158Smillert    # dashes and underscores.
104b39c5158Smillert    #
105b8851fccSafresh1   'in-latin1_SUPPLEMENT'     => ['\N{U+80}',
106b39c5158Smillert                                  '\N{LATIN SMALL LETTER Y WITH DIAERESIS}'],
107b39c5158Smillert   '  ^  In Latin 1 Supplement  '
108b8851fccSafresh1                              => ['!\N{U+80}', '\N{COFFIN}'],
109b8851fccSafresh1   'latin-1   supplement'     => ['\N{U+80}', "0xDF"],
110b39c5158Smillert
111b39c5158Smillert);
112b39c5158Smillert
113b46d8ef2Safresh1my @USER_DEFINED_PROPERTIES;
114b46d8ef2Safresh1my @USER_CASELESS_PROPERTIES;
115b46d8ef2Safresh1my @USER_ERROR_PROPERTIES;
116b46d8ef2Safresh1my @DEFERRED;
117b46d8ef2Safresh1my $overflow;
118b46d8ef2Safresh1BEGIN {
119b46d8ef2Safresh1    $utf8_comment = "#\N{U+30CD}";
120b46d8ef2Safresh1
121b46d8ef2Safresh1    use Config;
122b46d8ef2Safresh1    $overflow = $Config{uvsize} < 8 ? "80000000" : "80000000000000000";
123b46d8ef2Safresh1
124b46d8ef2Safresh1    # We defined these at compile time, so that the subroutines that they
125b46d8ef2Safresh1    # refer to aren't known, so that we can test properties not known until
126b46d8ef2Safresh1    # runtime
127b46d8ef2Safresh1
128b46d8ef2Safresh1    @USER_DEFINED_PROPERTIES = (
129b39c5158Smillert        #
130b39c5158Smillert        # User defined properties
131b39c5158Smillert        #
132b39c5158Smillert        InKana1                   => ['\x{3040}', '!\x{303F}'],
133b39c5158Smillert        InKana2                   => ['\x{3040}', '!\x{303F}'],
134b39c5158Smillert        InKana3                   => ['\x{3041}', '!\x{3040}'],
135b39c5158Smillert        InNotKana                 => ['\x{3040}', '!\x{3041}'],
136b39c5158Smillert        InConsonant               => ['d',        '!e'],
137b39c5158Smillert        IsSyriac1                 => ['\x{0712}', '!\x{072F}'],
138898184e3Ssthen        IsSyriac1KanaMark         => ['\x{309A}', '!\x{3090}'],
139898184e3Ssthen        IsSyriac1KanaMark         => ['\x{0730}', '!\x{0712}'],
140898184e3Ssthen        '# User-defined character properties may lack \n at the end',
141b39c5158Smillert        InGreekSmall              => ['\N{GREEK SMALL LETTER PI}',
142b39c5158Smillert                                        '\N{GREEK SMALL LETTER FINAL SIGMA}'],
143b39c5158Smillert        InGreekCapital            => ['\N{GREEK CAPITAL LETTER PI}', '!\x{03A2}'],
144b39c5158Smillert        Dash                      => ['-'],
145b39c5158Smillert        ASCII_Hex_Digit           => ['!-', 'A'],
146898184e3Ssthen        IsAsciiHexAndDash         => ['-', 'A'],
147*56d68f1eSafresh1        InLatin1                  => ['\x{0100}', '!\x{00FF}'],
148898184e3Ssthen    );
149898184e3Ssthen
150b46d8ef2Safresh1    @USER_CASELESS_PROPERTIES = (
151898184e3Ssthen        #
152b46d8ef2Safresh1        # User defined properties which differ depending on /i.  Second entry
153b46d8ef2Safresh1        # is false normally, true under /i
154898184e3Ssthen        #
155898184e3Ssthen        'IsMyUpper'                => ["M", "!m" ],
156b46d8ef2Safresh1        'pkg1::pkg2::IsMyLower'    => ["a", "!A" ],
157b39c5158Smillert    );
158b39c5158Smillert
159b46d8ef2Safresh1    @USER_ERROR_PROPERTIES = (
160b46d8ef2Safresh1        'IsOverflow'    => qr/Code point too large in (?#
161b46d8ef2Safresh1                             )"0\t$overflow$utf8_comment" in expansion of (?#
162b46d8ef2Safresh1                             )main::IsOverflow/,
163b46d8ef2Safresh1        'InRecursedA'   => qr/Infinite recursion in user-defined property (?#
164b46d8ef2Safresh1                             )"main::InRecursedA" in expansion of (?#
165b46d8ef2Safresh1                             )main::InRecursedC in expansion of (?#
166b46d8ef2Safresh1                             )main::InRecursedB in expansion of (?#
167b46d8ef2Safresh1                             )main::InRecursedA/,
168b46d8ef2Safresh1        'IsRangeReversed' => qr/Illegal range in "200 100$utf8_comment" in (?#
169b46d8ef2Safresh1                               )expansion of main::IsRangeReversed/,
170b46d8ef2Safresh1        'IsNonHex'        => qr/Can't find Unicode property definition (?#
171b46d8ef2Safresh1                               )"BEEF CAGED" in expansion of main::IsNonHex/,
172b46d8ef2Safresh1
173b46d8ef2Safresh1        # Could have \n, hence /s
174b46d8ef2Safresh1        'IsDeath'        => qr/Died.* in expansion of main::IsDeath/s,
175b46d8ef2Safresh1    );
176b46d8ef2Safresh1
177b46d8ef2Safresh1    # Now create a list of properties whose definitions won't be known at
178b46d8ef2Safresh1    # runtime.  The qr// below thus will have forward references to them, and
179b46d8ef2Safresh1    # when matched at runtime will not know what's in the property definition
180b46d8ef2Safresh1    my @DEFERRABLE_USER_DEFINED_PROPERTIES;
181b46d8ef2Safresh1    push @DEFERRABLE_USER_DEFINED_PROPERTIES, @USER_DEFINED_PROPERTIES;
182b46d8ef2Safresh1    push @DEFERRABLE_USER_DEFINED_PROPERTIES, @USER_CASELESS_PROPERTIES;
183b46d8ef2Safresh1    unshift @DEFERRABLE_USER_DEFINED_PROPERTIES, @USER_ERROR_PROPERTIES;
184b46d8ef2Safresh1    for (my $i = 0; $i < @DEFERRABLE_USER_DEFINED_PROPERTIES; $i+=2) {
185b46d8ef2Safresh1        my $property = $DEFERRABLE_USER_DEFINED_PROPERTIES[$i];
186b46d8ef2Safresh1        if ($property =~ / ^ \# /x) {
187b46d8ef2Safresh1            $i++;
188b46d8ef2Safresh1            redo;
189b46d8ef2Safresh1        }
190b46d8ef2Safresh1
191b46d8ef2Safresh1        # Only do this for the properties in the list that are user-defined
192b46d8ef2Safresh1        next if ($property !~ / ( ^ | :: ) I[ns] /x);
193b46d8ef2Safresh1
194b46d8ef2Safresh1        push @DEFERRED, qr/\p{$property}/,
195b46d8ef2Safresh1                        $DEFERRABLE_USER_DEFINED_PROPERTIES[$i+1];
196b46d8ef2Safresh1    }
197b46d8ef2Safresh1}
198b46d8ef2Safresh1
199b39c5158Smillert#
200b39c5158Smillert# From the short properties we populate POSIX-like classes.
201b39c5158Smillert#
202b39c5158Smillertmy %SHORT_PROPERTIES = (
203b39c5158Smillert    'Ll'  => ['m', '\N{CYRILLIC SMALL LETTER A}'],
204b39c5158Smillert    'Lu'  => ['M', '\N{GREEK CAPITAL LETTER ALPHA}'],
205b39c5158Smillert    'Lo'  => ['\N{HIRAGANA LETTER SMALL A}'],
206b39c5158Smillert    # is also in other alphabetic
207b39c5158Smillert    'Mn'  => ['\N{HEBREW POINT RAFE}'],
208b39c5158Smillert    'Nd'  => ["0", '\N{ARABIC-INDIC DIGIT ZERO}'],
209b39c5158Smillert    'Pc'  => ["_"],
210b39c5158Smillert    'Po'  => ["!"],
211b39c5158Smillert    'Zs'  => [" "],
212b39c5158Smillert    'Cc'  => ['\x{00}'],
213b39c5158Smillert);
214b39c5158Smillert
215b39c5158Smillert#
216b39c5158Smillert# Illegal properties
217b39c5158Smillert#
218898184e3Ssthenmy @ILLEGAL_PROPERTIES =
219898184e3Ssthen    qw[q qrst f foo isfoo infoo ISfoo INfoo Is::foo In::foo];
220b39c5158Smillert
221b39c5158Smillertmy %d;
222b39c5158Smillert
223b39c5158Smillertwhile (my ($class, $chars) = each %SHORT_PROPERTIES) {
224b39c5158Smillert    push @{$d {IsAlpha}} => map {$class =~ /^[LM]/   ? $_ : "!$_"} @$chars;
225b39c5158Smillert    push @{$d {IsAlnum}} => map {$class =~ /^[LMN]./ ? $_ : "!$_"} @$chars;
226b39c5158Smillert    push @{$d {IsASCII}} => map {length ($_) == 1 || $_ eq '\x{00}'
227b39c5158Smillert                                                     ? $_ : "!$_"} @$chars;
228b39c5158Smillert    push @{$d {IsCntrl}} => map {$class =~ /^C/      ? $_ : "!$_"} @$chars;
229b39c5158Smillert    push @{$d {IsBlank}} => map {$class =~ /^Z[lps]/ ? $_ : "!$_"} @$chars;
230b39c5158Smillert    push @{$d {IsDigit}} => map {$class =~ /^Nd$/    ? $_ : "!$_"} @$chars;
231b39c5158Smillert    push @{$d {IsGraph}} => map {$class =~ /^([LMNPS]|Co)/
232b39c5158Smillert                                                     ? $_ : "!$_"} @$chars;
233b39c5158Smillert    push @{$d {IsPrint}} => map {$class =~ /^([LMNPS]|Co|Zs)/
234b39c5158Smillert                                                     ? $_ : "!$_"} @$chars;
235b39c5158Smillert    push @{$d {IsLower}} => map {$class =~ /^Ll$/    ? $_ : "!$_"} @$chars;
236b39c5158Smillert    push @{$d {IsUpper}} => map {$class =~ /^L[ut]/  ? $_ : "!$_"} @$chars;
237b39c5158Smillert    push @{$d {IsPunct}} => map {$class =~ /^P/      ? $_ : "!$_"} @$chars;
238b39c5158Smillert    push @{$d {IsWord}}  => map {$class =~ /^[LMN]/ || $_ eq "_"
239b39c5158Smillert                                                     ? $_ : "!$_"} @$chars;
240b39c5158Smillert    push @{$d {IsSpace}} => map {$class =~ /^Z/ ||
241b8851fccSafresh1                                 length ($_) == 1 && utf8::native_to_unicode(ord ($_)) >= 0x09
242b8851fccSafresh1                                                  && utf8::native_to_unicode(ord ($_)) <= 0x0D
243b39c5158Smillert                                                     ? $_ : "!$_"} @$chars;
244b39c5158Smillert}
245b39c5158Smillert
246b39c5158Smillertpush @CLASSES => "# Short properties"        => %SHORT_PROPERTIES,
247b39c5158Smillert                 "# POSIX like properties"   => %d,
248*56d68f1eSafresh1                 "# User defined properties" => @USER_DEFINED_PROPERTIES;
249b39c5158Smillert
250b39c5158Smillert
251b39c5158Smillert#
252b39c5158Smillert# Calculate the number of tests.
253b39c5158Smillert#
254b39c5158Smillertmy $count = 0;
255b39c5158Smillertfor (my $i = 0; $i < @CLASSES; $i += 2) {
256b39c5158Smillert    $i ++, redo if $CLASSES [$i] =~ /^\h*#\h*(.*)/;
257898184e3Ssthen    $count += 2 * (length $CLASSES [$i] == 1 ? 4 : 2) * @{$CLASSES [$i + 1]};
258b39c5158Smillert}
259898184e3Ssthen$count += 4 * @ILLEGAL_PROPERTIES;
260898184e3Ssthen$count += 4 * grep {length $_ == 1} @ILLEGAL_PROPERTIES;
261898184e3Ssthen$count += 8 * @USER_CASELESS_PROPERTIES;
262b46d8ef2Safresh1$count += 1 * (@DEFERRED - @USER_ERROR_PROPERTIES) / 2;
263b46d8ef2Safresh1$count += 1 * @USER_ERROR_PROPERTIES;
264b46d8ef2Safresh1$count += 1;    # one bad apple
265b46d8ef2Safresh1$count += 1;    # No warnings generated
266b39c5158Smillert
267898184e3Ssthenplan(tests => $count);
268b39c5158Smillert
269b39c5158Smillertrun_tests unless caller ();
270b39c5158Smillert
271b39c5158Smillertsub match {
272898184e3Ssthen    my ($char, $match, $nomatch, $caseless) = @_;
273898184e3Ssthen    $caseless = "" unless defined $caseless;
274898184e3Ssthen    $caseless = 'i' if $caseless;
275b39c5158Smillert
276b46d8ef2Safresh1    my ($str, $name) = get_str_name($char);
277b39c5158Smillert
278898184e3Ssthen    undef $@;
279898184e3Ssthen    my $pat = "qr/$match/$caseless";
280898184e3Ssthen    my $match_pat = eval $pat;
281b46d8ef2Safresh1    if (is($@, '', "$pat compiled correctly to a regexp: $@")) {
282898184e3Ssthen        like($str, $match_pat, "$name correctly matched");
283b46d8ef2Safresh1    }
284898184e3Ssthen
285898184e3Ssthen    undef $@;
286898184e3Ssthen    $pat = "qr/$nomatch/$caseless";
287898184e3Ssthen    my $nomatch_pat = eval $pat;
288b46d8ef2Safresh1    if (is($@, '', "$pat compiled correctly to a regexp: $@")) {
289898184e3Ssthen        unlike($str, $nomatch_pat, "$name correctly did not match");
290b39c5158Smillert    }
291b46d8ef2Safresh1}
292b39c5158Smillert
293b39c5158Smillertsub run_tests {
294b39c5158Smillert
295b46d8ef2Safresh1    for (my $i = 0; $i < @DEFERRED; $i+=2) {
296b46d8ef2Safresh1        if (ref $DEFERRED[$i+1] eq 'ARRAY') {
297b46d8ef2Safresh1            my ($str, $name) = get_str_name($DEFERRED[$i+1][0]);
298b46d8ef2Safresh1            like($str, $DEFERRED[$i],
299b46d8ef2Safresh1                "$name correctly matched $DEFERRED[$i] (defn. not known until runtime)");
300b46d8ef2Safresh1        }
301b46d8ef2Safresh1        else {  # Single entry rhs indicates a property that is an error
302b46d8ef2Safresh1            undef $@;
303b46d8ef2Safresh1
304b46d8ef2Safresh1            # Using block eval causes the pattern to not be recompiled, so it
305b46d8ef2Safresh1            # retains its deferred status until this is executed.
306b46d8ef2Safresh1            eval { 'A' =~ $DEFERRED[$i] };
307b46d8ef2Safresh1            like($@, $DEFERRED[$i+1],
308b46d8ef2Safresh1                                "$DEFERRED[$i] gave correct failure message (defn. not known until runtime)");
309b46d8ef2Safresh1        }
310b46d8ef2Safresh1    }
311b46d8ef2Safresh1
312b39c5158Smillert    while (@CLASSES) {
313b39c5158Smillert        my $class = shift @CLASSES;
314b39c5158Smillert        if ($class =~ /^\h*#\h*(.*)/) {
315b39c5158Smillert            print "# $1\n";
316b39c5158Smillert            next;
317b39c5158Smillert        }
318b39c5158Smillert        last unless @CLASSES;
319b39c5158Smillert        my $chars   = shift @CLASSES;
320b39c5158Smillert        my @in      =                       grep {!/^!./} @$chars;
321b39c5158Smillert        my @out     = map {s/^!(?=.)//; $_} grep { /^!./} @$chars;
322b39c5158Smillert        my $in_pat  = eval qq ['\\p{$class}'];
323b39c5158Smillert        my $out_pat = eval qq ['\\P{$class}'];
324b39c5158Smillert
325b39c5158Smillert        match $_, $in_pat,  $out_pat for @in;
326b39c5158Smillert        match $_, $out_pat, $in_pat  for @out;
327b39c5158Smillert
328898184e3Ssthen        if (1 == length $class) {   # Repeat without braces if name length 1
329b39c5158Smillert            my $in_pat  = eval qq ['\\p$class'];
330b39c5158Smillert            my $out_pat = eval qq ['\\P$class'];
331b39c5158Smillert
332b39c5158Smillert            match $_, $in_pat,  $out_pat for @in;
333b39c5158Smillert            match $_, $out_pat, $in_pat  for @out;
334b39c5158Smillert        }
335b39c5158Smillert    }
336b39c5158Smillert
337b39c5158Smillert
338b39c5158Smillert    print "# Illegal properties\n";
339b39c5158Smillert    foreach my $p (@ILLEGAL_PROPERTIES) {
340b8851fccSafresh1        my $pat;
341b8851fccSafresh1        if ($p =~ /::/) {
342b8851fccSafresh1            $pat = qr /^Illegal user-defined property name/;
343b8851fccSafresh1        }
344b8851fccSafresh1        else {
345b8851fccSafresh1            $pat = qr /^Can't find Unicode property definition/;
346b8851fccSafresh1        }
347b8851fccSafresh1
348b39c5158Smillert        undef $@;
349b39c5158Smillert        my $r = eval "'a' =~ /\\p{$p}/; 1";
350898184e3Ssthen        is($r, undef, "Unknown Unicode property \\p{$p}");
351898184e3Ssthen        like($@, $pat, "Unknown Unicode property \\p{$p}");
352b39c5158Smillert        undef $@;
353b39c5158Smillert        my $s = eval "'a' =~ /\\P{$p}/; 1";
354898184e3Ssthen        is($s, undef, "Unknown Unicode property \\p{$p}");
355898184e3Ssthen        like($@, $pat, "Unknown Unicode property \\p{$p}");
356b39c5158Smillert        if (length $p == 1) {
357b39c5158Smillert            undef $@;
358b39c5158Smillert            my $r = eval "'a' =~ /\\p$p/; 1";
359898184e3Ssthen            is($r, undef, "Unknown Unicode property \\p$p");
360898184e3Ssthen            like($@, $pat, "Unknown Unicode property \\p$p");
361b39c5158Smillert            undef $@;
362b39c5158Smillert            my $s = eval "'a' =~ /\\P$p/; 1";
363898184e3Ssthen            is($r, undef, "Unknown Unicode property \\P$p");
364898184e3Ssthen            like($@, $pat, "Unknown Unicode property \\P$p");
365b39c5158Smillert        }
366b39c5158Smillert    }
367898184e3Ssthen
368898184e3Ssthen    print "# User-defined properties with /i differences\n";
369b8851fccSafresh1    while (my $class = shift @USER_CASELESS_PROPERTIES) {
370898184e3Ssthen        my $chars_ref = shift @USER_CASELESS_PROPERTIES;
371898184e3Ssthen        my @in      =                       grep {!/^!./} @$chars_ref;
372898184e3Ssthen        my @out     = map {s/^!(?=.)//; $_} grep { /^!./} @$chars_ref;
373898184e3Ssthen        my $in_pat  = eval qq ['\\p{$class}'];
374898184e3Ssthen        my $out_pat = eval qq ['\\P{$class}'];
375898184e3Ssthen
376b46d8ef2Safresh1        # Verify that adding /i does change the out set to match.
377b46d8ef2Safresh1        match $_, $in_pat,  $out_pat, 'i' for @out;
378898184e3Ssthen
379898184e3Ssthen        # Verify that adding /i doesn't change the in set.
380898184e3Ssthen        match $_, $in_pat,  $out_pat, 'i' for @in;
381898184e3Ssthen
382b46d8ef2Safresh1        # Verify works as regularly for not /i
383b46d8ef2Safresh1        match $_, $in_pat,  $out_pat for @in;
384b46d8ef2Safresh1        match $_, $out_pat, $in_pat  for @out;
385b46d8ef2Safresh1    }
386b46d8ef2Safresh1
387b46d8ef2Safresh1    print "# User-defined properties with errors in their definition\n";
388b46d8ef2Safresh1    while (my $error_property = shift @USER_ERROR_PROPERTIES) {
389b46d8ef2Safresh1        my $error_re = shift @USER_ERROR_PROPERTIES;
390b46d8ef2Safresh1
391b46d8ef2Safresh1        undef $@;
392b46d8ef2Safresh1        eval { 'A' =~ /\p{$error_property}/; };
393b46d8ef2Safresh1        like($@, $error_re, "$error_property gave correct failure message");
394898184e3Ssthen    }
395b39c5158Smillert}
396b39c5158Smillert
397b39c5158Smillert
398b39c5158Smillert#
399b39c5158Smillert# User defined properties
400b39c5158Smillert#
401b39c5158Smillert
402b39c5158Smillertsub InKana1 {<<'--'}
403b46d8ef2Safresh13040    309F            # A comment; next line has trailing spaces
404b39c5158Smillert30A0    30FF
405b39c5158Smillert--
406b39c5158Smillert
407b39c5158Smillertsub InKana2 {<<'--'}
408b39c5158Smillert+utf8::InHiragana
409b39c5158Smillert+utf8::InKatakana
410b39c5158Smillert--
411b39c5158Smillert
412b39c5158Smillertsub InKana3 {<<'--'}
413b46d8ef2Safresh1# First line comment
414b39c5158Smillert+utf8::InHiragana
415b46d8ef2Safresh1# Full line comment
416b39c5158Smillert+utf8::InKatakana
417b39c5158Smillert-utf8::IsCn
418b39c5158Smillert--
419b39c5158Smillert
420b39c5158Smillertsub InNotKana {<<'--'}
421b46d8ef2Safresh1!utf8::InHiragana       # A comment; next line has trailing spaces
422b39c5158Smillert-utf8::InKatakana
423b39c5158Smillert+utf8::IsCn
424b46d8ef2Safresh1# Final line comment
425b39c5158Smillert--
426b39c5158Smillert
427b8851fccSafresh1sub InConsonant {
428b8851fccSafresh1
429b8851fccSafresh1    my $return = "+utf8::Lowercase\n&utf8::ASCII\n";
430b8851fccSafresh1    $return .= sprintf("-%X\n", ord "a");
431b8851fccSafresh1    $return .= sprintf("-%X\n", ord "e");
432b8851fccSafresh1    $return .= sprintf("-%X\n", ord "i");
433b8851fccSafresh1    $return .= sprintf("-%X\n", ord "o");
434b8851fccSafresh1    $return .= sprintf("-%X\n", ord "u");
435b8851fccSafresh1    return $return;
436b8851fccSafresh1}
437b39c5158Smillert
438b39c5158Smillertsub IsSyriac1 {<<'--'}
439b39c5158Smillert0712    072C
440b39c5158Smillert0730    074A
441b39c5158Smillert--
442b39c5158Smillert
443b46d8ef2Safresh1sub InRecursedA {
444b46d8ef2Safresh1    return "+main::InRecursedB\n";
445b46d8ef2Safresh1}
446b46d8ef2Safresh1
447b46d8ef2Safresh1sub InRecursedB {
448b46d8ef2Safresh1    return "+main::InRecursedC\n";
449b46d8ef2Safresh1}
450b46d8ef2Safresh1
451b46d8ef2Safresh1sub InRecursedC {
452b46d8ef2Safresh1    return "+main::InRecursedA\n";
453b46d8ef2Safresh1}
454b46d8ef2Safresh1
455b39c5158Smillertsub InGreekSmall   {return "03B1\t03C9"}
456b39c5158Smillertsub InGreekCapital {return "0391\t03A9\n-03A2"}
457b39c5158Smillert
458898184e3Ssthensub IsAsciiHexAndDash {<<'--'}
459b39c5158Smillert+utf8::ASCII_Hex_Digit
460b39c5158Smillert+utf8::Dash
461b39c5158Smillert--
462b39c5158Smillert
4639f11ffb7Safresh1sub InLatin1 {
4649f11ffb7Safresh1    return "0100\t10FFFF";
4659f11ffb7Safresh1}
4669f11ffb7Safresh1
467898184e3Ssthensub IsMyUpper {
468b46d8ef2Safresh1    use feature 'state';
469b46d8ef2Safresh1
470b46d8ef2Safresh1    state $cased_count = 0;
471b46d8ef2Safresh1    state $caseless_count = 0;
472b46d8ef2Safresh1    my $ret= "+utf8::";
473b46d8ef2Safresh1
474898184e3Ssthen    my $caseless = shift;
475b46d8ef2Safresh1    if($caseless) {
476b46d8ef2Safresh1        die "Called twice" if $caseless_count;
477b46d8ef2Safresh1        $caseless_count++;
478b46d8ef2Safresh1        $ret .= 'Alphabetic'
479b46d8ef2Safresh1    }
480b46d8ef2Safresh1    else {
481b46d8ef2Safresh1        die "Called twice" if $cased_count;
482b46d8ef2Safresh1        $cased_count++;
483b46d8ef2Safresh1        $ret .= 'Uppercase';
484898184e3Ssthen    }
485b8851fccSafresh1
486b46d8ef2Safresh1    return $ret . "\n&utf8::ASCII";
487b46d8ef2Safresh1}
488b8851fccSafresh1
489b46d8ef2Safresh1sub pkg1::pkg2::IsMyLower {
490b8851fccSafresh1    my $caseless = shift;
491b8851fccSafresh1    return "+utf8::"
492b8851fccSafresh1        . (($caseless)
493b8851fccSafresh1            ? 'Alphabetic'
494b8851fccSafresh1            : 'Lowercase')
495b8851fccSafresh1        . "\n&utf8::ASCII";
496898184e3Ssthen}
497b8851fccSafresh1
498b46d8ef2Safresh1sub IsRangeReversed {
499b46d8ef2Safresh1    return "200 100$utf8_comment";
500b46d8ef2Safresh1}
501b8851fccSafresh1
502b46d8ef2Safresh1sub IsNonHex {
503b46d8ef2Safresh1    return "BEEF CAGED$utf8_comment";
504b46d8ef2Safresh1}
505b46d8ef2Safresh1
506b46d8ef2Safresh1sub IsDeath {
507b46d8ef2Safresh1    die;
508898184e3Ssthen}
509898184e3Ssthen
510898184e3Ssthen# Verify that can use user-defined properties inside another one
511898184e3Ssthensub IsSyriac1KanaMark {<<'--'}
512898184e3Ssthen+main::IsSyriac1
513898184e3Ssthen+main::InKana3
514898184e3Ssthen&utf8::IsMark
515898184e3Ssthen--
516898184e3Ssthen
517898184e3Ssthen# fake user-defined properties; these subs shouldn't be called, because
518898184e3Ssthen# their names don't start with In or Is
519898184e3Ssthen
520898184e3Ssthensub f       { die }
521898184e3Ssthensub foo     { die }
522898184e3Ssthensub isfoo   { die }
523898184e3Ssthensub infoo   { die }
524898184e3Ssthensub ISfoo   { die }
525898184e3Ssthensub INfoo   { die }
526898184e3Ssthensub Is::foo { die }
527898184e3Ssthensub In::foo { die }
528b46d8ef2Safresh1
529b46d8ef2Safresh1sub IsOverflow {
530b46d8ef2Safresh1    return "0\t$overflow$utf8_comment";
531b46d8ef2Safresh1}
532b46d8ef2Safresh1
533b46d8ef2Safresh1fresh_perl_like(<<'EOP', qr/Can't find Unicode property definition "F000\\tF010" in expansion of InOneBadApple/, {}, "Just one component bad");
534b46d8ef2Safresh1# Extra backslash converts tab to backslash-t
535b46d8ef2Safresh1sub InOneBadApple { return "0100\t0110\n10000\t10010\nF000\\tF010\n0400\t0410" }
536b46d8ef2Safresh1qr/\p{InOneBadApple}/;
537b46d8ef2Safresh1EOP
538b46d8ef2Safresh1
539b46d8ef2Safresh1if (! is(@warnings, 0, "No warnings were generated")) {
540b46d8ef2Safresh1    diag join "\n", @warnings, "\n";
541b46d8ef2Safresh1}
542b46d8ef2Safresh1
543b46d8ef2Safresh11;
544b39c5158Smillert__END__
545