1b39c5158Smillert#!./perl 2b39c5158Smillert# 3b39c5158Smillert# Tests that have to do with checking whether characters have (or not have) 4b39c5158Smillert# certain Unicode properties; belong (or not belong) to blocks, scripts, etc. 5*56d68f1eSafresh1# including user-defined properties 6b39c5158Smillert# 7b39c5158Smillert 8b39c5158Smillertuse strict; 9b39c5158Smillertuse warnings; 10b46d8ef2Safresh1use v5.16; 11b46d8ef2Safresh1use utf8; 12b46d8ef2Safresh1 13b46d8ef2Safresh1# To verify that messages containing the expansions work on UTF-8 14b46d8ef2Safresh1my $utf8_comment; 15b46d8ef2Safresh1 16b46d8ef2Safresh1my @warnings; 17b46d8ef2Safresh1local $SIG {__WARN__} = sub {push @warnings, "@_"}; 18b39c5158Smillert 19898184e3SsthenBEGIN { 20b8851fccSafresh1 chdir 't' if -d 't'; 21898184e3Ssthen require './test.pl'; 22898184e3Ssthen skip_all_if_miniperl("no dynamic loading on miniperl, no File::Spec (used by charnames)"); 23898184e3Ssthen} 24b39c5158Smillert 25b39c5158Smillertsub run_tests; 26b39c5158Smillert 27b46d8ef2Safresh1sub get_str_name($) { 28b46d8ef2Safresh1 my $char = shift; 29b46d8ef2Safresh1 30b46d8ef2Safresh1 my ($str, $name); 31b46d8ef2Safresh1 32b46d8ef2Safresh1 if ($char =~ /^\\/) { 33b46d8ef2Safresh1 $str = eval qq ["$char"]; 34b46d8ef2Safresh1 $name = qq ["$char"]; 35b46d8ef2Safresh1 } 36b46d8ef2Safresh1 elsif ($char =~ /^0x([0-9A-Fa-f]+)$/) { 37b46d8ef2Safresh1 $str = chr hex $1; 38b46d8ef2Safresh1 $name = "chr ($char)"; 39b46d8ef2Safresh1 } 40b46d8ef2Safresh1 else { 41b46d8ef2Safresh1 $str = $char; 42b46d8ef2Safresh1 $name = qq ["$char"]; 43b46d8ef2Safresh1 } 44b46d8ef2Safresh1 45b46d8ef2Safresh1 return ($str, $name); 46b46d8ef2Safresh1} 47b46d8ef2Safresh1 48b39c5158Smillert# 49b39c5158Smillert# This is the data to test. 50b39c5158Smillert# 51b39c5158Smillert# This is a hash; keys are the property to test. 52b39c5158Smillert# Values are arrays containing characters to test. The characters can 53b39c5158Smillert# have the following formats: 54b39c5158Smillert# '\N{CHARACTER NAME}' - Use character with that name 55b39c5158Smillert# '\x{1234}' - Use character with that hex escape 56b39c5158Smillert# '0x1234' - Use chr() to get that character 57b39c5158Smillert# "a" - Character to use 58b39c5158Smillert# 59b39c5158Smillert# If a character entry starts with ! the character does not belong to the class 60b39c5158Smillert# 61b39c5158Smillert# If the class is just single letter, we use both \pL and \p{L} 62b39c5158Smillert# 63b39c5158Smillert 64b39c5158Smillertuse charnames ':full'; 65b39c5158Smillert 66b39c5158Smillertmy @CLASSES = ( 67b39c5158Smillert L => ["a", "A"], 68b39c5158Smillert Ll => ["b", "!B"], 69b39c5158Smillert Lu => ["!c", "C"], 70b39c5158Smillert IsLl => ["d", "!D"], 71b39c5158Smillert IsLu => ["!e", "E"], 72b39c5158Smillert LC => ["f", "!1"], 73b39c5158Smillert 'L&' => ["g", "!2"], 74b39c5158Smillert 'Lowercase Letter' => ["h", "!H"], 75b39c5158Smillert 76b39c5158Smillert Common => ["!i", "3"], 77b39c5158Smillert Inherited => ["!j", '\x{300}'], 78b39c5158Smillert 79b39c5158Smillert InBasicLatin => ['\N{LATIN CAPITAL LETTER A}'], 80b39c5158Smillert InLatin1Supplement => ['\N{LATIN CAPITAL LETTER A WITH GRAVE}'], 81b39c5158Smillert InLatinExtendedA => ['\N{LATIN CAPITAL LETTER A WITH MACRON}'], 82b39c5158Smillert InLatinExtendedB => ['\N{LATIN SMALL LETTER B WITH STROKE}'], 83b39c5158Smillert InKatakana => ['\N{KATAKANA LETTER SMALL A}'], 84b39c5158Smillert IsLatin => ["0x100", "0x212b"], 85b39c5158Smillert IsHebrew => ["0x5d0", "0xfb4f"], 86b39c5158Smillert IsGreek => ["0x37a", "0x386", "!0x387", "0x388", 87b39c5158Smillert "0x38a", "!0x38b", "0x38c"], 88b39c5158Smillert HangulSyllables => ['\x{AC00}'], 89b39c5158Smillert 'Script=Latin' => ['\x{0100}'], 90b39c5158Smillert 'Block=LatinExtendedA' => ['\x{0100}'], 91b39c5158Smillert 'Category=UppercaseLetter' => ['\x{0100}'], 92b39c5158Smillert 93b39c5158Smillert # 94b39c5158Smillert # It's ok to repeat class names. 95b39c5158Smillert # 96b39c5158Smillert InLatin1Supplement => 97b8851fccSafresh1 ['!\N{U+7f}', '\N{U+80}', '\N{U+ff}', '!\x{100}'], 98b39c5158Smillert InLatinExtendedA => 99b8851fccSafresh1 ['!\N{U+7f}', '!\N{U+80}', '!\N{U+ff}', '\x{100}'], 100b39c5158Smillert 101b39c5158Smillert # 102b39c5158Smillert # Properties are case-insensitive, and may have whitespace, 103b39c5158Smillert # dashes and underscores. 104b39c5158Smillert # 105b8851fccSafresh1 'in-latin1_SUPPLEMENT' => ['\N{U+80}', 106b39c5158Smillert '\N{LATIN SMALL LETTER Y WITH DIAERESIS}'], 107b39c5158Smillert ' ^ In Latin 1 Supplement ' 108b8851fccSafresh1 => ['!\N{U+80}', '\N{COFFIN}'], 109b8851fccSafresh1 'latin-1 supplement' => ['\N{U+80}', "0xDF"], 110b39c5158Smillert 111b39c5158Smillert); 112b39c5158Smillert 113b46d8ef2Safresh1my @USER_DEFINED_PROPERTIES; 114b46d8ef2Safresh1my @USER_CASELESS_PROPERTIES; 115b46d8ef2Safresh1my @USER_ERROR_PROPERTIES; 116b46d8ef2Safresh1my @DEFERRED; 117b46d8ef2Safresh1my $overflow; 118b46d8ef2Safresh1BEGIN { 119b46d8ef2Safresh1 $utf8_comment = "#\N{U+30CD}"; 120b46d8ef2Safresh1 121b46d8ef2Safresh1 use Config; 122b46d8ef2Safresh1 $overflow = $Config{uvsize} < 8 ? "80000000" : "80000000000000000"; 123b46d8ef2Safresh1 124b46d8ef2Safresh1 # We defined these at compile time, so that the subroutines that they 125b46d8ef2Safresh1 # refer to aren't known, so that we can test properties not known until 126b46d8ef2Safresh1 # runtime 127b46d8ef2Safresh1 128b46d8ef2Safresh1 @USER_DEFINED_PROPERTIES = ( 129b39c5158Smillert # 130b39c5158Smillert # User defined properties 131b39c5158Smillert # 132b39c5158Smillert InKana1 => ['\x{3040}', '!\x{303F}'], 133b39c5158Smillert InKana2 => ['\x{3040}', '!\x{303F}'], 134b39c5158Smillert InKana3 => ['\x{3041}', '!\x{3040}'], 135b39c5158Smillert InNotKana => ['\x{3040}', '!\x{3041}'], 136b39c5158Smillert InConsonant => ['d', '!e'], 137b39c5158Smillert IsSyriac1 => ['\x{0712}', '!\x{072F}'], 138898184e3Ssthen IsSyriac1KanaMark => ['\x{309A}', '!\x{3090}'], 139898184e3Ssthen IsSyriac1KanaMark => ['\x{0730}', '!\x{0712}'], 140898184e3Ssthen '# User-defined character properties may lack \n at the end', 141b39c5158Smillert InGreekSmall => ['\N{GREEK SMALL LETTER PI}', 142b39c5158Smillert '\N{GREEK SMALL LETTER FINAL SIGMA}'], 143b39c5158Smillert InGreekCapital => ['\N{GREEK CAPITAL LETTER PI}', '!\x{03A2}'], 144b39c5158Smillert Dash => ['-'], 145b39c5158Smillert ASCII_Hex_Digit => ['!-', 'A'], 146898184e3Ssthen IsAsciiHexAndDash => ['-', 'A'], 147*56d68f1eSafresh1 InLatin1 => ['\x{0100}', '!\x{00FF}'], 148898184e3Ssthen ); 149898184e3Ssthen 150b46d8ef2Safresh1 @USER_CASELESS_PROPERTIES = ( 151898184e3Ssthen # 152b46d8ef2Safresh1 # User defined properties which differ depending on /i. Second entry 153b46d8ef2Safresh1 # is false normally, true under /i 154898184e3Ssthen # 155898184e3Ssthen 'IsMyUpper' => ["M", "!m" ], 156b46d8ef2Safresh1 'pkg1::pkg2::IsMyLower' => ["a", "!A" ], 157b39c5158Smillert ); 158b39c5158Smillert 159b46d8ef2Safresh1 @USER_ERROR_PROPERTIES = ( 160b46d8ef2Safresh1 'IsOverflow' => qr/Code point too large in (?# 161b46d8ef2Safresh1 )"0\t$overflow$utf8_comment" in expansion of (?# 162b46d8ef2Safresh1 )main::IsOverflow/, 163b46d8ef2Safresh1 'InRecursedA' => qr/Infinite recursion in user-defined property (?# 164b46d8ef2Safresh1 )"main::InRecursedA" in expansion of (?# 165b46d8ef2Safresh1 )main::InRecursedC in expansion of (?# 166b46d8ef2Safresh1 )main::InRecursedB in expansion of (?# 167b46d8ef2Safresh1 )main::InRecursedA/, 168b46d8ef2Safresh1 'IsRangeReversed' => qr/Illegal range in "200 100$utf8_comment" in (?# 169b46d8ef2Safresh1 )expansion of main::IsRangeReversed/, 170b46d8ef2Safresh1 'IsNonHex' => qr/Can't find Unicode property definition (?# 171b46d8ef2Safresh1 )"BEEF CAGED" in expansion of main::IsNonHex/, 172b46d8ef2Safresh1 173b46d8ef2Safresh1 # Could have \n, hence /s 174b46d8ef2Safresh1 'IsDeath' => qr/Died.* in expansion of main::IsDeath/s, 175b46d8ef2Safresh1 ); 176b46d8ef2Safresh1 177b46d8ef2Safresh1 # Now create a list of properties whose definitions won't be known at 178b46d8ef2Safresh1 # runtime. The qr// below thus will have forward references to them, and 179b46d8ef2Safresh1 # when matched at runtime will not know what's in the property definition 180b46d8ef2Safresh1 my @DEFERRABLE_USER_DEFINED_PROPERTIES; 181b46d8ef2Safresh1 push @DEFERRABLE_USER_DEFINED_PROPERTIES, @USER_DEFINED_PROPERTIES; 182b46d8ef2Safresh1 push @DEFERRABLE_USER_DEFINED_PROPERTIES, @USER_CASELESS_PROPERTIES; 183b46d8ef2Safresh1 unshift @DEFERRABLE_USER_DEFINED_PROPERTIES, @USER_ERROR_PROPERTIES; 184b46d8ef2Safresh1 for (my $i = 0; $i < @DEFERRABLE_USER_DEFINED_PROPERTIES; $i+=2) { 185b46d8ef2Safresh1 my $property = $DEFERRABLE_USER_DEFINED_PROPERTIES[$i]; 186b46d8ef2Safresh1 if ($property =~ / ^ \# /x) { 187b46d8ef2Safresh1 $i++; 188b46d8ef2Safresh1 redo; 189b46d8ef2Safresh1 } 190b46d8ef2Safresh1 191b46d8ef2Safresh1 # Only do this for the properties in the list that are user-defined 192b46d8ef2Safresh1 next if ($property !~ / ( ^ | :: ) I[ns] /x); 193b46d8ef2Safresh1 194b46d8ef2Safresh1 push @DEFERRED, qr/\p{$property}/, 195b46d8ef2Safresh1 $DEFERRABLE_USER_DEFINED_PROPERTIES[$i+1]; 196b46d8ef2Safresh1 } 197b46d8ef2Safresh1} 198b46d8ef2Safresh1 199b39c5158Smillert# 200b39c5158Smillert# From the short properties we populate POSIX-like classes. 201b39c5158Smillert# 202b39c5158Smillertmy %SHORT_PROPERTIES = ( 203b39c5158Smillert 'Ll' => ['m', '\N{CYRILLIC SMALL LETTER A}'], 204b39c5158Smillert 'Lu' => ['M', '\N{GREEK CAPITAL LETTER ALPHA}'], 205b39c5158Smillert 'Lo' => ['\N{HIRAGANA LETTER SMALL A}'], 206b39c5158Smillert # is also in other alphabetic 207b39c5158Smillert 'Mn' => ['\N{HEBREW POINT RAFE}'], 208b39c5158Smillert 'Nd' => ["0", '\N{ARABIC-INDIC DIGIT ZERO}'], 209b39c5158Smillert 'Pc' => ["_"], 210b39c5158Smillert 'Po' => ["!"], 211b39c5158Smillert 'Zs' => [" "], 212b39c5158Smillert 'Cc' => ['\x{00}'], 213b39c5158Smillert); 214b39c5158Smillert 215b39c5158Smillert# 216b39c5158Smillert# Illegal properties 217b39c5158Smillert# 218898184e3Ssthenmy @ILLEGAL_PROPERTIES = 219898184e3Ssthen qw[q qrst f foo isfoo infoo ISfoo INfoo Is::foo In::foo]; 220b39c5158Smillert 221b39c5158Smillertmy %d; 222b39c5158Smillert 223b39c5158Smillertwhile (my ($class, $chars) = each %SHORT_PROPERTIES) { 224b39c5158Smillert push @{$d {IsAlpha}} => map {$class =~ /^[LM]/ ? $_ : "!$_"} @$chars; 225b39c5158Smillert push @{$d {IsAlnum}} => map {$class =~ /^[LMN]./ ? $_ : "!$_"} @$chars; 226b39c5158Smillert push @{$d {IsASCII}} => map {length ($_) == 1 || $_ eq '\x{00}' 227b39c5158Smillert ? $_ : "!$_"} @$chars; 228b39c5158Smillert push @{$d {IsCntrl}} => map {$class =~ /^C/ ? $_ : "!$_"} @$chars; 229b39c5158Smillert push @{$d {IsBlank}} => map {$class =~ /^Z[lps]/ ? $_ : "!$_"} @$chars; 230b39c5158Smillert push @{$d {IsDigit}} => map {$class =~ /^Nd$/ ? $_ : "!$_"} @$chars; 231b39c5158Smillert push @{$d {IsGraph}} => map {$class =~ /^([LMNPS]|Co)/ 232b39c5158Smillert ? $_ : "!$_"} @$chars; 233b39c5158Smillert push @{$d {IsPrint}} => map {$class =~ /^([LMNPS]|Co|Zs)/ 234b39c5158Smillert ? $_ : "!$_"} @$chars; 235b39c5158Smillert push @{$d {IsLower}} => map {$class =~ /^Ll$/ ? $_ : "!$_"} @$chars; 236b39c5158Smillert push @{$d {IsUpper}} => map {$class =~ /^L[ut]/ ? $_ : "!$_"} @$chars; 237b39c5158Smillert push @{$d {IsPunct}} => map {$class =~ /^P/ ? $_ : "!$_"} @$chars; 238b39c5158Smillert push @{$d {IsWord}} => map {$class =~ /^[LMN]/ || $_ eq "_" 239b39c5158Smillert ? $_ : "!$_"} @$chars; 240b39c5158Smillert push @{$d {IsSpace}} => map {$class =~ /^Z/ || 241b8851fccSafresh1 length ($_) == 1 && utf8::native_to_unicode(ord ($_)) >= 0x09 242b8851fccSafresh1 && utf8::native_to_unicode(ord ($_)) <= 0x0D 243b39c5158Smillert ? $_ : "!$_"} @$chars; 244b39c5158Smillert} 245b39c5158Smillert 246b39c5158Smillertpush @CLASSES => "# Short properties" => %SHORT_PROPERTIES, 247b39c5158Smillert "# POSIX like properties" => %d, 248*56d68f1eSafresh1 "# User defined properties" => @USER_DEFINED_PROPERTIES; 249b39c5158Smillert 250b39c5158Smillert 251b39c5158Smillert# 252b39c5158Smillert# Calculate the number of tests. 253b39c5158Smillert# 254b39c5158Smillertmy $count = 0; 255b39c5158Smillertfor (my $i = 0; $i < @CLASSES; $i += 2) { 256b39c5158Smillert $i ++, redo if $CLASSES [$i] =~ /^\h*#\h*(.*)/; 257898184e3Ssthen $count += 2 * (length $CLASSES [$i] == 1 ? 4 : 2) * @{$CLASSES [$i + 1]}; 258b39c5158Smillert} 259898184e3Ssthen$count += 4 * @ILLEGAL_PROPERTIES; 260898184e3Ssthen$count += 4 * grep {length $_ == 1} @ILLEGAL_PROPERTIES; 261898184e3Ssthen$count += 8 * @USER_CASELESS_PROPERTIES; 262b46d8ef2Safresh1$count += 1 * (@DEFERRED - @USER_ERROR_PROPERTIES) / 2; 263b46d8ef2Safresh1$count += 1 * @USER_ERROR_PROPERTIES; 264b46d8ef2Safresh1$count += 1; # one bad apple 265b46d8ef2Safresh1$count += 1; # No warnings generated 266b39c5158Smillert 267898184e3Ssthenplan(tests => $count); 268b39c5158Smillert 269b39c5158Smillertrun_tests unless caller (); 270b39c5158Smillert 271b39c5158Smillertsub match { 272898184e3Ssthen my ($char, $match, $nomatch, $caseless) = @_; 273898184e3Ssthen $caseless = "" unless defined $caseless; 274898184e3Ssthen $caseless = 'i' if $caseless; 275b39c5158Smillert 276b46d8ef2Safresh1 my ($str, $name) = get_str_name($char); 277b39c5158Smillert 278898184e3Ssthen undef $@; 279898184e3Ssthen my $pat = "qr/$match/$caseless"; 280898184e3Ssthen my $match_pat = eval $pat; 281b46d8ef2Safresh1 if (is($@, '', "$pat compiled correctly to a regexp: $@")) { 282898184e3Ssthen like($str, $match_pat, "$name correctly matched"); 283b46d8ef2Safresh1 } 284898184e3Ssthen 285898184e3Ssthen undef $@; 286898184e3Ssthen $pat = "qr/$nomatch/$caseless"; 287898184e3Ssthen my $nomatch_pat = eval $pat; 288b46d8ef2Safresh1 if (is($@, '', "$pat compiled correctly to a regexp: $@")) { 289898184e3Ssthen unlike($str, $nomatch_pat, "$name correctly did not match"); 290b39c5158Smillert } 291b46d8ef2Safresh1} 292b39c5158Smillert 293b39c5158Smillertsub run_tests { 294b39c5158Smillert 295b46d8ef2Safresh1 for (my $i = 0; $i < @DEFERRED; $i+=2) { 296b46d8ef2Safresh1 if (ref $DEFERRED[$i+1] eq 'ARRAY') { 297b46d8ef2Safresh1 my ($str, $name) = get_str_name($DEFERRED[$i+1][0]); 298b46d8ef2Safresh1 like($str, $DEFERRED[$i], 299b46d8ef2Safresh1 "$name correctly matched $DEFERRED[$i] (defn. not known until runtime)"); 300b46d8ef2Safresh1 } 301b46d8ef2Safresh1 else { # Single entry rhs indicates a property that is an error 302b46d8ef2Safresh1 undef $@; 303b46d8ef2Safresh1 304b46d8ef2Safresh1 # Using block eval causes the pattern to not be recompiled, so it 305b46d8ef2Safresh1 # retains its deferred status until this is executed. 306b46d8ef2Safresh1 eval { 'A' =~ $DEFERRED[$i] }; 307b46d8ef2Safresh1 like($@, $DEFERRED[$i+1], 308b46d8ef2Safresh1 "$DEFERRED[$i] gave correct failure message (defn. not known until runtime)"); 309b46d8ef2Safresh1 } 310b46d8ef2Safresh1 } 311b46d8ef2Safresh1 312b39c5158Smillert while (@CLASSES) { 313b39c5158Smillert my $class = shift @CLASSES; 314b39c5158Smillert if ($class =~ /^\h*#\h*(.*)/) { 315b39c5158Smillert print "# $1\n"; 316b39c5158Smillert next; 317b39c5158Smillert } 318b39c5158Smillert last unless @CLASSES; 319b39c5158Smillert my $chars = shift @CLASSES; 320b39c5158Smillert my @in = grep {!/^!./} @$chars; 321b39c5158Smillert my @out = map {s/^!(?=.)//; $_} grep { /^!./} @$chars; 322b39c5158Smillert my $in_pat = eval qq ['\\p{$class}']; 323b39c5158Smillert my $out_pat = eval qq ['\\P{$class}']; 324b39c5158Smillert 325b39c5158Smillert match $_, $in_pat, $out_pat for @in; 326b39c5158Smillert match $_, $out_pat, $in_pat for @out; 327b39c5158Smillert 328898184e3Ssthen if (1 == length $class) { # Repeat without braces if name length 1 329b39c5158Smillert my $in_pat = eval qq ['\\p$class']; 330b39c5158Smillert my $out_pat = eval qq ['\\P$class']; 331b39c5158Smillert 332b39c5158Smillert match $_, $in_pat, $out_pat for @in; 333b39c5158Smillert match $_, $out_pat, $in_pat for @out; 334b39c5158Smillert } 335b39c5158Smillert } 336b39c5158Smillert 337b39c5158Smillert 338b39c5158Smillert print "# Illegal properties\n"; 339b39c5158Smillert foreach my $p (@ILLEGAL_PROPERTIES) { 340b8851fccSafresh1 my $pat; 341b8851fccSafresh1 if ($p =~ /::/) { 342b8851fccSafresh1 $pat = qr /^Illegal user-defined property name/; 343b8851fccSafresh1 } 344b8851fccSafresh1 else { 345b8851fccSafresh1 $pat = qr /^Can't find Unicode property definition/; 346b8851fccSafresh1 } 347b8851fccSafresh1 348b39c5158Smillert undef $@; 349b39c5158Smillert my $r = eval "'a' =~ /\\p{$p}/; 1"; 350898184e3Ssthen is($r, undef, "Unknown Unicode property \\p{$p}"); 351898184e3Ssthen like($@, $pat, "Unknown Unicode property \\p{$p}"); 352b39c5158Smillert undef $@; 353b39c5158Smillert my $s = eval "'a' =~ /\\P{$p}/; 1"; 354898184e3Ssthen is($s, undef, "Unknown Unicode property \\p{$p}"); 355898184e3Ssthen like($@, $pat, "Unknown Unicode property \\p{$p}"); 356b39c5158Smillert if (length $p == 1) { 357b39c5158Smillert undef $@; 358b39c5158Smillert my $r = eval "'a' =~ /\\p$p/; 1"; 359898184e3Ssthen is($r, undef, "Unknown Unicode property \\p$p"); 360898184e3Ssthen like($@, $pat, "Unknown Unicode property \\p$p"); 361b39c5158Smillert undef $@; 362b39c5158Smillert my $s = eval "'a' =~ /\\P$p/; 1"; 363898184e3Ssthen is($r, undef, "Unknown Unicode property \\P$p"); 364898184e3Ssthen like($@, $pat, "Unknown Unicode property \\P$p"); 365b39c5158Smillert } 366b39c5158Smillert } 367898184e3Ssthen 368898184e3Ssthen print "# User-defined properties with /i differences\n"; 369b8851fccSafresh1 while (my $class = shift @USER_CASELESS_PROPERTIES) { 370898184e3Ssthen my $chars_ref = shift @USER_CASELESS_PROPERTIES; 371898184e3Ssthen my @in = grep {!/^!./} @$chars_ref; 372898184e3Ssthen my @out = map {s/^!(?=.)//; $_} grep { /^!./} @$chars_ref; 373898184e3Ssthen my $in_pat = eval qq ['\\p{$class}']; 374898184e3Ssthen my $out_pat = eval qq ['\\P{$class}']; 375898184e3Ssthen 376b46d8ef2Safresh1 # Verify that adding /i does change the out set to match. 377b46d8ef2Safresh1 match $_, $in_pat, $out_pat, 'i' for @out; 378898184e3Ssthen 379898184e3Ssthen # Verify that adding /i doesn't change the in set. 380898184e3Ssthen match $_, $in_pat, $out_pat, 'i' for @in; 381898184e3Ssthen 382b46d8ef2Safresh1 # Verify works as regularly for not /i 383b46d8ef2Safresh1 match $_, $in_pat, $out_pat for @in; 384b46d8ef2Safresh1 match $_, $out_pat, $in_pat for @out; 385b46d8ef2Safresh1 } 386b46d8ef2Safresh1 387b46d8ef2Safresh1 print "# User-defined properties with errors in their definition\n"; 388b46d8ef2Safresh1 while (my $error_property = shift @USER_ERROR_PROPERTIES) { 389b46d8ef2Safresh1 my $error_re = shift @USER_ERROR_PROPERTIES; 390b46d8ef2Safresh1 391b46d8ef2Safresh1 undef $@; 392b46d8ef2Safresh1 eval { 'A' =~ /\p{$error_property}/; }; 393b46d8ef2Safresh1 like($@, $error_re, "$error_property gave correct failure message"); 394898184e3Ssthen } 395b39c5158Smillert} 396b39c5158Smillert 397b39c5158Smillert 398b39c5158Smillert# 399b39c5158Smillert# User defined properties 400b39c5158Smillert# 401b39c5158Smillert 402b39c5158Smillertsub InKana1 {<<'--'} 403b46d8ef2Safresh13040 309F # A comment; next line has trailing spaces 404b39c5158Smillert30A0 30FF 405b39c5158Smillert-- 406b39c5158Smillert 407b39c5158Smillertsub InKana2 {<<'--'} 408b39c5158Smillert+utf8::InHiragana 409b39c5158Smillert+utf8::InKatakana 410b39c5158Smillert-- 411b39c5158Smillert 412b39c5158Smillertsub InKana3 {<<'--'} 413b46d8ef2Safresh1# First line comment 414b39c5158Smillert+utf8::InHiragana 415b46d8ef2Safresh1# Full line comment 416b39c5158Smillert+utf8::InKatakana 417b39c5158Smillert-utf8::IsCn 418b39c5158Smillert-- 419b39c5158Smillert 420b39c5158Smillertsub InNotKana {<<'--'} 421b46d8ef2Safresh1!utf8::InHiragana # A comment; next line has trailing spaces 422b39c5158Smillert-utf8::InKatakana 423b39c5158Smillert+utf8::IsCn 424b46d8ef2Safresh1# Final line comment 425b39c5158Smillert-- 426b39c5158Smillert 427b8851fccSafresh1sub InConsonant { 428b8851fccSafresh1 429b8851fccSafresh1 my $return = "+utf8::Lowercase\n&utf8::ASCII\n"; 430b8851fccSafresh1 $return .= sprintf("-%X\n", ord "a"); 431b8851fccSafresh1 $return .= sprintf("-%X\n", ord "e"); 432b8851fccSafresh1 $return .= sprintf("-%X\n", ord "i"); 433b8851fccSafresh1 $return .= sprintf("-%X\n", ord "o"); 434b8851fccSafresh1 $return .= sprintf("-%X\n", ord "u"); 435b8851fccSafresh1 return $return; 436b8851fccSafresh1} 437b39c5158Smillert 438b39c5158Smillertsub IsSyriac1 {<<'--'} 439b39c5158Smillert0712 072C 440b39c5158Smillert0730 074A 441b39c5158Smillert-- 442b39c5158Smillert 443b46d8ef2Safresh1sub InRecursedA { 444b46d8ef2Safresh1 return "+main::InRecursedB\n"; 445b46d8ef2Safresh1} 446b46d8ef2Safresh1 447b46d8ef2Safresh1sub InRecursedB { 448b46d8ef2Safresh1 return "+main::InRecursedC\n"; 449b46d8ef2Safresh1} 450b46d8ef2Safresh1 451b46d8ef2Safresh1sub InRecursedC { 452b46d8ef2Safresh1 return "+main::InRecursedA\n"; 453b46d8ef2Safresh1} 454b46d8ef2Safresh1 455b39c5158Smillertsub InGreekSmall {return "03B1\t03C9"} 456b39c5158Smillertsub InGreekCapital {return "0391\t03A9\n-03A2"} 457b39c5158Smillert 458898184e3Ssthensub IsAsciiHexAndDash {<<'--'} 459b39c5158Smillert+utf8::ASCII_Hex_Digit 460b39c5158Smillert+utf8::Dash 461b39c5158Smillert-- 462b39c5158Smillert 4639f11ffb7Safresh1sub InLatin1 { 4649f11ffb7Safresh1 return "0100\t10FFFF"; 4659f11ffb7Safresh1} 4669f11ffb7Safresh1 467898184e3Ssthensub IsMyUpper { 468b46d8ef2Safresh1 use feature 'state'; 469b46d8ef2Safresh1 470b46d8ef2Safresh1 state $cased_count = 0; 471b46d8ef2Safresh1 state $caseless_count = 0; 472b46d8ef2Safresh1 my $ret= "+utf8::"; 473b46d8ef2Safresh1 474898184e3Ssthen my $caseless = shift; 475b46d8ef2Safresh1 if($caseless) { 476b46d8ef2Safresh1 die "Called twice" if $caseless_count; 477b46d8ef2Safresh1 $caseless_count++; 478b46d8ef2Safresh1 $ret .= 'Alphabetic' 479b46d8ef2Safresh1 } 480b46d8ef2Safresh1 else { 481b46d8ef2Safresh1 die "Called twice" if $cased_count; 482b46d8ef2Safresh1 $cased_count++; 483b46d8ef2Safresh1 $ret .= 'Uppercase'; 484898184e3Ssthen } 485b8851fccSafresh1 486b46d8ef2Safresh1 return $ret . "\n&utf8::ASCII"; 487b46d8ef2Safresh1} 488b8851fccSafresh1 489b46d8ef2Safresh1sub pkg1::pkg2::IsMyLower { 490b8851fccSafresh1 my $caseless = shift; 491b8851fccSafresh1 return "+utf8::" 492b8851fccSafresh1 . (($caseless) 493b8851fccSafresh1 ? 'Alphabetic' 494b8851fccSafresh1 : 'Lowercase') 495b8851fccSafresh1 . "\n&utf8::ASCII"; 496898184e3Ssthen} 497b8851fccSafresh1 498b46d8ef2Safresh1sub IsRangeReversed { 499b46d8ef2Safresh1 return "200 100$utf8_comment"; 500b46d8ef2Safresh1} 501b8851fccSafresh1 502b46d8ef2Safresh1sub IsNonHex { 503b46d8ef2Safresh1 return "BEEF CAGED$utf8_comment"; 504b46d8ef2Safresh1} 505b46d8ef2Safresh1 506b46d8ef2Safresh1sub IsDeath { 507b46d8ef2Safresh1 die; 508898184e3Ssthen} 509898184e3Ssthen 510898184e3Ssthen# Verify that can use user-defined properties inside another one 511898184e3Ssthensub IsSyriac1KanaMark {<<'--'} 512898184e3Ssthen+main::IsSyriac1 513898184e3Ssthen+main::InKana3 514898184e3Ssthen&utf8::IsMark 515898184e3Ssthen-- 516898184e3Ssthen 517898184e3Ssthen# fake user-defined properties; these subs shouldn't be called, because 518898184e3Ssthen# their names don't start with In or Is 519898184e3Ssthen 520898184e3Ssthensub f { die } 521898184e3Ssthensub foo { die } 522898184e3Ssthensub isfoo { die } 523898184e3Ssthensub infoo { die } 524898184e3Ssthensub ISfoo { die } 525898184e3Ssthensub INfoo { die } 526898184e3Ssthensub Is::foo { die } 527898184e3Ssthensub In::foo { die } 528b46d8ef2Safresh1 529b46d8ef2Safresh1sub IsOverflow { 530b46d8ef2Safresh1 return "0\t$overflow$utf8_comment"; 531b46d8ef2Safresh1} 532b46d8ef2Safresh1 533b46d8ef2Safresh1fresh_perl_like(<<'EOP', qr/Can't find Unicode property definition "F000\\tF010" in expansion of InOneBadApple/, {}, "Just one component bad"); 534b46d8ef2Safresh1# Extra backslash converts tab to backslash-t 535b46d8ef2Safresh1sub InOneBadApple { return "0100\t0110\n10000\t10010\nF000\\tF010\n0400\t0410" } 536b46d8ef2Safresh1qr/\p{InOneBadApple}/; 537b46d8ef2Safresh1EOP 538b46d8ef2Safresh1 539b46d8ef2Safresh1if (! is(@warnings, 0, "No warnings were generated")) { 540b46d8ef2Safresh1 diag join "\n", @warnings, "\n"; 541b46d8ef2Safresh1} 542b46d8ef2Safresh1 543b46d8ef2Safresh11; 544b39c5158Smillert__END__ 545