1BEGIN { 2 chdir 't' if -d 't'; 3 @INC = qw(../lib .); 4 require "test.pl"; 5} 6 7plan tests => 4784; 8 9sub MyUniClass { 10 <<END; 110030 004F 12END 13} 14 15sub Other::Class { 16 <<END; 170040 005F 18END 19} 20 21sub A::B::Intersection { 22 <<END; 23+main::MyUniClass 24&Other::Class 25END 26} 27 28sub test_regexp ($$) { 29 # test that given string consists of N-1 chars matching $qr1, and 1 30 # char matching $qr2 31 my ($str, $blk) = @_; 32 33 # constructing these objects here makes the last test loop go much faster 34 my $qr1 = qr/(\p{$blk}+)/; 35 if ($str =~ $qr1) { 36 is($1, substr($str, 0, -1)); # all except last char 37 } 38 else { 39 fail('first N-1 chars did not match'); 40 } 41 42 my $qr2 = qr/(\P{$blk}+)/; 43 if ($str =~ $qr2) { 44 is($1, substr($str, -1)); # only last char 45 } 46 else { 47 fail('last char did not match'); 48 } 49} 50 51use strict; 52 53my $str; 54 55if (ord('A') == 193) { 56 $str = join "", map chr($_), 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61, 0xF0 .. 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F, 0x7C, 0xC1 .. 0xC9, 0xD1 .. 0xD9, 0xE2 .. 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D, 0x79, 0x81 .. 0x89, 0x91 .. 0x96; # IBM-1047 57} else { 58 $str = join "", map chr($_), 0x20 .. 0x6F; 59} 60 61# make sure it finds built-in class 62is(($str =~ /(\p{Letter}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'); 63is(($str =~ /(\p{l}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'); 64 65# make sure it finds user-defined class 66is(($str =~ /(\p{MyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO'); 67 68# make sure it finds class in other package 69is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_'); 70 71# make sure it finds class in other OTHER package 72is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO'); 73 74# all of these should look in lib/unicore/bc/AL.pl 75$str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}"; 76is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070E}\x{070F}"); 77is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070E}\x{070F}"); 78is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070E}\x{070F}"); 79is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070E}\x{070F}"); 80 81# make sure InGreek works 82$str = "[\x{038B}\x{038C}\x{038D}]"; 83 84is(($str =~ /(\p{InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); 85is(($str =~ /(\p{Script:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); 86is(($str =~ /(\p{Script=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); 87is(($str =~ /(\p{sc:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); 88is(($str =~ /(\p{sc=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); 89 90use File::Spec; 91my $updir = File::Spec->updir; 92 93# the %utf8::... hashes are already in existence 94# because utf8_pva.pl was run by utf8_heavy.pl 95 96*utf8::PropertyAlias = *utf8::PropertyAlias; # thwart a warning 97 98no warnings 'utf8'; # we do not want warnings about surrogates etc 99 100sub char_range { 101 my ($h1, $h2) = @_; 102 103 my $str; 104 105 if (ord('A') == 193 && $h1 < 256) { 106 my $h3 = ($h2 || $h1) + 1; 107 if ($h3 - $h1 == 1) { 108 $str = join "", pack 'U*', $h1 .. $h3; # Using pack since chr doesn't generate Unicode chars for value < 256. 109 } elsif ($h3 - $h1 > 1) { 110 for (my $i = $h1; $i <= $h3; $i++) { 111 $str = join "", $str, pack 'U*', $i; 112 } 113 } 114 } else { 115 $str = join "", map chr, $h1 .. (($h2 || $h1) + 1); 116 } 117 118 return $str; 119} 120 121# non-General Category and non-Script 122while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) { 123 my $prop_name = $utf8::PropertyAlias{$abbrev}; 124 next unless $prop_name; 125 next if $abbrev eq "gc_sc"; 126 127 for (sort keys %$files) { 128 my $filename = File::Spec->catfile( 129 $updir => lib => unicore => lib => $abbrev => "$files->{$_}.pl" 130 ); 131 132 next unless -e $filename; 133 my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1]; 134 135 my $str = char_range($h1, $h2); 136 137 for my $p ($prop_name, $abbrev) { 138 for my $c ($files->{$_}, $_) { 139 is($str =~ /(\p{$p: $c}+)/ && $1, substr($str, 0, -1)); 140 is($str =~ /(\P{$p= $c}+)/ && $1, substr($str, -1)); 141 } 142 } 143 } 144} 145 146# General Category and Script 147for my $p ('gc', 'sc') { 148 while (my ($abbr) = each %{ $utf8::PropValueAlias{$p} }) { 149 my $filename = File::Spec->catfile( 150 $updir => lib => unicore => lib => gc_sc => "$utf8::PVA_abbr_map{gc_sc}{$abbr}.pl" 151 ); 152 153 next unless -e $filename; 154 my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1]; 155 156 my $str = char_range($h1, $h2); 157 158 for my $x ($p, { gc => 'General Category', sc => 'Script' }->{$p}) { 159 for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) { 160 is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1)); 161 is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1)); 162 SKIP: { 163 skip("surrogate", 1) if $abbr eq 'cs'; 164 test_regexp ($str, $y); 165 } 166 } 167 } 168 } 169} 170 171# test extra properties (ASCII_Hex_Digit, Bidi_Control, etc.) 172SKIP: 173{ 174 skip "Can't reliably derive class names from file names", 576 if $^O eq 'VMS'; 175 176 # On case tolerant filesystems, Cf.pl will cause a -e test for cf.pl to 177 # return true. Try to work around this by reading the filenames explicitly 178 # to get a case sensitive test. N.B. This will fail if filename case is 179 # not preserved because you might go looking for a class name of CF or cf 180 # when you really want Cf. Storing case sensitive data in filenames is 181 # simply not portable. 182 183 my %files; 184 185 my $dirname = File::Spec->catdir($updir => lib => unicore => lib => 'gc_sc'); 186 opendir D, $dirname or die $!; 187 @files{readdir(D)} = (); 188 closedir D; 189 190 for (keys %utf8::PA_reverse) { 191 my $leafname = "$utf8::PA_reverse{$_}.pl"; 192 next unless exists $files{$leafname}; 193 194 my $filename = File::Spec->catfile($dirname, $leafname); 195 196 my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1]; 197 198 my $str = char_range($h1, $h2); 199 200 for my $x ('gc', 'General Category') { 201 print "# $filename $x $_, $utf8::PA_reverse{$_}\n"; 202 for my $y ($_, $utf8::PA_reverse{$_}) { 203 is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1)); 204 is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1)); 205 test_regexp ($str, $y); 206 } 207 } 208 } 209} 210 211# test the blocks (InFoobar) 212for (grep $utf8::Canonical{$_} =~ /^In/, keys %utf8::Canonical) { 213 my $filename = File::Spec->catfile( 214 $updir => lib => unicore => lib => gc_sc => "$utf8::Canonical{$_}.pl" 215 ); 216 217 next unless -e $filename; 218 219 print "# In$_ $filename\n"; 220 221 my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1]; 222 223 my $str = char_range($h1, $h2); 224 225 my $blk = $_; 226 227 SKIP: { 228 skip($blk, 2) if $blk =~ /surrogates/i; 229 test_regexp ($str, $blk); 230 $blk =~ s/^In/Block:/; 231 test_regexp ($str, $blk); 232 } 233} 234 235