1#!perl -w 2BEGIN { 3 if (ord("A") == 193) { 4 print "1..0 # Skip: EBCDIC\n"; 5 exit 0; 6 } 7 chdir 't' if -d 't'; 8 @INC = '../lib'; 9 @INC = "::lib" if $^O eq 'MacOS'; # module parses @INC itself 10 require Config; import Config; 11 if ($Config{'extensions'} !~ /\bStorable\b/) { 12 print "1..0 # Skip: Storable was not built; Unicode::UCD uses Storable\n"; 13 exit 0; 14 } 15} 16 17use strict; 18use Unicode::UCD; 19use Test::More; 20 21BEGIN { plan tests => 194 }; 22 23use Unicode::UCD 'charinfo'; 24 25my $charinfo; 26 27$charinfo = charinfo(0x41); 28 29is($charinfo->{code}, '0041', 'LATIN CAPITAL LETTER A'); 30is($charinfo->{name}, 'LATIN CAPITAL LETTER A'); 31is($charinfo->{category}, 'Lu'); 32is($charinfo->{combining}, '0'); 33is($charinfo->{bidi}, 'L'); 34is($charinfo->{decomposition}, ''); 35is($charinfo->{decimal}, ''); 36is($charinfo->{digit}, ''); 37is($charinfo->{numeric}, ''); 38is($charinfo->{mirrored}, 'N'); 39is($charinfo->{unicode10}, ''); 40is($charinfo->{comment}, ''); 41is($charinfo->{upper}, ''); 42is($charinfo->{lower}, '0061'); 43is($charinfo->{title}, ''); 44is($charinfo->{block}, 'Basic Latin'); 45is($charinfo->{script}, 'Latin'); 46 47$charinfo = charinfo(0x100); 48 49is($charinfo->{code}, '0100', 'LATIN CAPITAL LETTER A WITH MACRON'); 50is($charinfo->{name}, 'LATIN CAPITAL LETTER A WITH MACRON'); 51is($charinfo->{category}, 'Lu'); 52is($charinfo->{combining}, '0'); 53is($charinfo->{bidi}, 'L'); 54is($charinfo->{decomposition}, '0041 0304'); 55is($charinfo->{decimal}, ''); 56is($charinfo->{digit}, ''); 57is($charinfo->{numeric}, ''); 58is($charinfo->{mirrored}, 'N'); 59is($charinfo->{unicode10}, 'LATIN CAPITAL LETTER A MACRON'); 60is($charinfo->{comment}, ''); 61is($charinfo->{upper}, ''); 62is($charinfo->{lower}, '0101'); 63is($charinfo->{title}, ''); 64is($charinfo->{block}, 'Latin Extended-A'); 65is($charinfo->{script}, 'Latin'); 66 67# 0x0590 is in the Hebrew block but unused. 68 69$charinfo = charinfo(0x590); 70 71is($charinfo->{code}, undef, '0x0590 - unused Hebrew'); 72is($charinfo->{name}, undef); 73is($charinfo->{category}, undef); 74is($charinfo->{combining}, undef); 75is($charinfo->{bidi}, undef); 76is($charinfo->{decomposition}, undef); 77is($charinfo->{decimal}, undef); 78is($charinfo->{digit}, undef); 79is($charinfo->{numeric}, undef); 80is($charinfo->{mirrored}, undef); 81is($charinfo->{unicode10}, undef); 82is($charinfo->{comment}, undef); 83is($charinfo->{upper}, undef); 84is($charinfo->{lower}, undef); 85is($charinfo->{title}, undef); 86is($charinfo->{block}, undef); 87is($charinfo->{script}, undef); 88 89# 0x05d0 is in the Hebrew block and used. 90 91$charinfo = charinfo(0x5d0); 92 93is($charinfo->{code}, '05D0', '05D0 - used Hebrew'); 94is($charinfo->{name}, 'HEBREW LETTER ALEF'); 95is($charinfo->{category}, 'Lo'); 96is($charinfo->{combining}, '0'); 97is($charinfo->{bidi}, 'R'); 98is($charinfo->{decomposition}, ''); 99is($charinfo->{decimal}, ''); 100is($charinfo->{digit}, ''); 101is($charinfo->{numeric}, ''); 102is($charinfo->{mirrored}, 'N'); 103is($charinfo->{unicode10}, ''); 104is($charinfo->{comment}, ''); 105is($charinfo->{upper}, ''); 106is($charinfo->{lower}, ''); 107is($charinfo->{title}, ''); 108is($charinfo->{block}, 'Hebrew'); 109is($charinfo->{script}, 'Hebrew'); 110 111# An open syllable in Hangul. 112 113$charinfo = charinfo(0xAC00); 114 115is($charinfo->{code}, 'AC00', 'HANGUL SYLLABLE-AC00'); 116is($charinfo->{name}, 'HANGUL SYLLABLE-AC00'); 117is($charinfo->{category}, 'Lo'); 118is($charinfo->{combining}, '0'); 119is($charinfo->{bidi}, 'L'); 120is($charinfo->{decomposition}, undef); 121is($charinfo->{decimal}, ''); 122is($charinfo->{digit}, ''); 123is($charinfo->{numeric}, ''); 124is($charinfo->{mirrored}, 'N'); 125is($charinfo->{unicode10}, ''); 126is($charinfo->{comment}, ''); 127is($charinfo->{upper}, ''); 128is($charinfo->{lower}, ''); 129is($charinfo->{title}, ''); 130is($charinfo->{block}, 'Hangul Syllables'); 131is($charinfo->{script}, 'Hangul'); 132 133# A closed syllable in Hangul. 134 135$charinfo = charinfo(0xAE00); 136 137is($charinfo->{code}, 'AE00', 'HANGUL SYLLABLE-AE00'); 138is($charinfo->{name}, 'HANGUL SYLLABLE-AE00'); 139is($charinfo->{category}, 'Lo'); 140is($charinfo->{combining}, '0'); 141is($charinfo->{bidi}, 'L'); 142is($charinfo->{decomposition}, undef); 143is($charinfo->{decimal}, ''); 144is($charinfo->{digit}, ''); 145is($charinfo->{numeric}, ''); 146is($charinfo->{mirrored}, 'N'); 147is($charinfo->{unicode10}, ''); 148is($charinfo->{comment}, ''); 149is($charinfo->{upper}, ''); 150is($charinfo->{lower}, ''); 151is($charinfo->{title}, ''); 152is($charinfo->{block}, 'Hangul Syllables'); 153is($charinfo->{script}, 'Hangul'); 154 155$charinfo = charinfo(0x1D400); 156 157is($charinfo->{code}, '1D400', 'MATHEMATICAL BOLD CAPITAL A'); 158is($charinfo->{name}, 'MATHEMATICAL BOLD CAPITAL A'); 159is($charinfo->{category}, 'Lu'); 160is($charinfo->{combining}, '0'); 161is($charinfo->{bidi}, 'L'); 162is($charinfo->{decomposition}, '<font> 0041'); 163is($charinfo->{decimal}, ''); 164is($charinfo->{digit}, ''); 165is($charinfo->{numeric}, ''); 166is($charinfo->{mirrored}, 'N'); 167is($charinfo->{unicode10}, ''); 168is($charinfo->{comment}, ''); 169is($charinfo->{upper}, ''); 170is($charinfo->{lower}, ''); 171is($charinfo->{title}, ''); 172is($charinfo->{block}, 'Mathematical Alphanumeric Symbols'); 173is($charinfo->{script}, 'Common'); 174 175use Unicode::UCD qw(charblock charscript); 176 177# 0x0590 is in the Hebrew block but unused. 178 179is(charblock(0x590), 'Hebrew', '0x0590 - Hebrew unused charblock'); 180is(charscript(0x590), undef, '0x0590 - Hebrew unused charscript'); 181 182$charinfo = charinfo(0xbe); 183 184is($charinfo->{code}, '00BE', 'VULGAR FRACTION THREE QUARTERS'); 185is($charinfo->{name}, 'VULGAR FRACTION THREE QUARTERS'); 186is($charinfo->{category}, 'No'); 187is($charinfo->{combining}, '0'); 188is($charinfo->{bidi}, 'ON'); 189is($charinfo->{decomposition}, '<fraction> 0033 2044 0034'); 190is($charinfo->{decimal}, ''); 191is($charinfo->{digit}, ''); 192is($charinfo->{numeric}, '3/4'); 193is($charinfo->{mirrored}, 'N'); 194is($charinfo->{unicode10}, 'FRACTION THREE QUARTERS'); 195is($charinfo->{comment}, ''); 196is($charinfo->{upper}, ''); 197is($charinfo->{lower}, ''); 198is($charinfo->{title}, ''); 199is($charinfo->{block}, 'Latin-1 Supplement'); 200is($charinfo->{script}, 'Common'); 201 202use Unicode::UCD qw(charblocks charscripts); 203 204my $charblocks = charblocks(); 205 206ok(exists $charblocks->{Thai}, 'Thai charblock exists'); 207is($charblocks->{Thai}->[0]->[0], hex('0e00')); 208ok(!exists $charblocks->{PigLatin}, 'PigLatin charblock does not exist'); 209 210my $charscripts = charscripts(); 211 212ok(exists $charscripts->{Armenian}, 'Armenian charscript exists'); 213is($charscripts->{Armenian}->[0]->[0], hex('0531')); 214ok(!exists $charscripts->{PigLatin}, 'PigLatin charscript does not exist'); 215 216my $charscript; 217 218$charscript = charscript("12ab"); 219is($charscript, 'Ethiopic', 'Ethiopic charscript'); 220 221$charscript = charscript("0x12ab"); 222is($charscript, 'Ethiopic'); 223 224$charscript = charscript("U+12ab"); 225is($charscript, 'Ethiopic'); 226 227my $ranges; 228 229$ranges = charscript('Ogham'); 230is($ranges->[1]->[0], hex('1681'), 'Ogham charscript'); 231is($ranges->[1]->[1], hex('169a')); 232 233use Unicode::UCD qw(charinrange); 234 235$ranges = charscript('Cherokee'); 236ok(!charinrange($ranges, "139f"), 'Cherokee charscript'); 237ok( charinrange($ranges, "13a0")); 238ok( charinrange($ranges, "13f4")); 239ok(!charinrange($ranges, "13f5")); 240 241use Unicode::UCD qw(general_categories); 242 243my $gc = general_categories(); 244 245ok(exists $gc->{L}, 'has L'); 246is($gc->{L}, 'Letter', 'L is Letter'); 247is($gc->{Lu}, 'UppercaseLetter', 'Lu is UppercaseLetter'); 248 249use Unicode::UCD qw(bidi_types); 250 251my $bt = bidi_types(); 252 253ok(exists $bt->{L}, 'has L'); 254is($bt->{L}, 'Left-to-Right', 'L is Left-to-Right'); 255is($bt->{AL}, 'Right-to-Left Arabic', 'AL is Right-to-Left Arabic'); 256 257is(Unicode::UCD::UnicodeVersion, '5.0.0', 'UnicodeVersion'); 258 259use Unicode::UCD qw(compexcl); 260 261ok(!compexcl(0x0100), 'compexcl'); 262ok( compexcl(0x0958)); 263 264use Unicode::UCD qw(casefold); 265 266my $casefold; 267 268$casefold = casefold(0x41); 269 270ok($casefold->{code} eq '0041' && 271 $casefold->{status} eq 'C' && 272 $casefold->{mapping} eq '0061', 'casefold 0x41'); 273 274$casefold = casefold(0xdf); 275 276ok($casefold->{code} eq '00DF' && 277 $casefold->{status} eq 'F' && 278 $casefold->{mapping} eq '0073 0073', 'casefold 0xDF'); 279 280ok(!casefold(0x20)); 281 282use Unicode::UCD qw(casespec); 283 284my $casespec; 285 286ok(!casespec(0x41)); 287 288$casespec = casespec(0xdf); 289 290ok($casespec->{code} eq '00DF' && 291 $casespec->{lower} eq '00DF' && 292 $casespec->{title} eq '0053 0073' && 293 $casespec->{upper} eq '0053 0053' && 294 !defined $casespec->{condition}, 'casespec 0xDF'); 295 296$casespec = casespec(0x307); 297 298ok($casespec->{az}->{code} eq '0307' && 299 !defined $casespec->{az}->{lower} && 300 $casespec->{az}->{title} eq '0307' && 301 $casespec->{az}->{upper} eq '0307' && 302 $casespec->{az}->{condition} eq 'az After_I', 303 'casespec 0x307'); 304 305# perl #7305 UnicodeCD::compexcl is weird 306 307for (1) {my $a=compexcl $_} 308ok(1, 'compexcl read-only $_: perl #7305'); 309grep {compexcl $_} %{{1=>2}}; 310ok(1, 'compexcl read-only hash: perl #7305'); 311 312is(Unicode::UCD::_getcode('123'), 123, "_getcode(123)"); 313is(Unicode::UCD::_getcode('0123'), 0x123, "_getcode(0123)"); 314is(Unicode::UCD::_getcode('0x123'), 0x123, "_getcode(0x123)"); 315is(Unicode::UCD::_getcode('0X123'), 0x123, "_getcode(0X123)"); 316is(Unicode::UCD::_getcode('U+123'), 0x123, "_getcode(U+123)"); 317is(Unicode::UCD::_getcode('u+123'), 0x123, "_getcode(u+123)"); 318is(Unicode::UCD::_getcode('U+1234'), 0x1234, "_getcode(U+1234)"); 319is(Unicode::UCD::_getcode('U+12345'), 0x12345, "_getcode(U+12345)"); 320is(Unicode::UCD::_getcode('123x'), undef, "_getcode(123x)"); 321is(Unicode::UCD::_getcode('x123'), undef, "_getcode(x123)"); 322is(Unicode::UCD::_getcode('0x123x'), undef, "_getcode(x123)"); 323is(Unicode::UCD::_getcode('U+123x'), undef, "_getcode(x123)"); 324 325{ 326 my $r1 = charscript('Latin'); 327 my $n1 = @$r1; 328 is($n1, 35, "number of ranges in Latin script (Unicode 5.0.0)"); 329 shift @$r1 while @$r1; 330 my $r2 = charscript('Latin'); 331 is(@$r2, $n1, "modifying results should not mess up internal caches"); 332} 333 334{ 335 is(charinfo(0xdeadbeef), undef, "[perl #23273] warnings in Unicode::UCD"); 336} 337 338use Unicode::UCD qw(namedseq); 339 340is(namedseq("KATAKANA LETTER AINU P"), "\x{31F7}\x{309A}", "namedseq"); 341is(namedseq("KATAKANA LETTER AINU Q"), undef); 342is(namedseq(), undef); 343is(namedseq(qw(foo bar)), undef); 344my @ns = namedseq("KATAKANA LETTER AINU P"); 345is(scalar @ns, 2); 346is($ns[0], 0x31F7); 347is($ns[1], 0x309A); 348my %ns = namedseq(); 349is($ns{"KATAKANA LETTER AINU P"}, "\x{31F7}\x{309A}"); 350@ns = namedseq(42); 351is(@ns, 0); 352 353