1*0Sstevel@tonic-gateBEGIN { 2*0Sstevel@tonic-gate if (ord("A") == 193) { 3*0Sstevel@tonic-gate print "1..0 # Skip: EBCDIC\n"; 4*0Sstevel@tonic-gate exit 0; 5*0Sstevel@tonic-gate } 6*0Sstevel@tonic-gate chdir 't' if -d 't'; 7*0Sstevel@tonic-gate @INC = '../lib'; 8*0Sstevel@tonic-gate @INC = "::lib" if $^O eq 'MacOS'; # module parses @INC itself 9*0Sstevel@tonic-gate} 10*0Sstevel@tonic-gate 11*0Sstevel@tonic-gateuse strict; 12*0Sstevel@tonic-gateuse Unicode::UCD; 13*0Sstevel@tonic-gateuse Test::More; 14*0Sstevel@tonic-gate 15*0Sstevel@tonic-gateBEGIN { plan tests => 179 }; 16*0Sstevel@tonic-gate 17*0Sstevel@tonic-gateuse Unicode::UCD 'charinfo'; 18*0Sstevel@tonic-gate 19*0Sstevel@tonic-gatemy $charinfo; 20*0Sstevel@tonic-gate 21*0Sstevel@tonic-gate$charinfo = charinfo(0x41); 22*0Sstevel@tonic-gate 23*0Sstevel@tonic-gateis($charinfo->{code}, '0041', 'LATIN CAPITAL LETTER A'); 24*0Sstevel@tonic-gateis($charinfo->{name}, 'LATIN CAPITAL LETTER A'); 25*0Sstevel@tonic-gateis($charinfo->{category}, 'Lu'); 26*0Sstevel@tonic-gateis($charinfo->{combining}, '0'); 27*0Sstevel@tonic-gateis($charinfo->{bidi}, 'L'); 28*0Sstevel@tonic-gateis($charinfo->{decomposition}, ''); 29*0Sstevel@tonic-gateis($charinfo->{decimal}, ''); 30*0Sstevel@tonic-gateis($charinfo->{digit}, ''); 31*0Sstevel@tonic-gateis($charinfo->{numeric}, ''); 32*0Sstevel@tonic-gateis($charinfo->{mirrored}, 'N'); 33*0Sstevel@tonic-gateis($charinfo->{unicode10}, ''); 34*0Sstevel@tonic-gateis($charinfo->{comment}, ''); 35*0Sstevel@tonic-gateis($charinfo->{upper}, ''); 36*0Sstevel@tonic-gateis($charinfo->{lower}, '0061'); 37*0Sstevel@tonic-gateis($charinfo->{title}, ''); 38*0Sstevel@tonic-gateis($charinfo->{block}, 'Basic Latin'); 39*0Sstevel@tonic-gateis($charinfo->{script}, 'Latin'); 40*0Sstevel@tonic-gate 41*0Sstevel@tonic-gate$charinfo = charinfo(0x100); 42*0Sstevel@tonic-gate 43*0Sstevel@tonic-gateis($charinfo->{code}, '0100', 'LATIN CAPITAL LETTER A WITH MACRON'); 44*0Sstevel@tonic-gateis($charinfo->{name}, 'LATIN CAPITAL LETTER A WITH MACRON'); 45*0Sstevel@tonic-gateis($charinfo->{category}, 'Lu'); 46*0Sstevel@tonic-gateis($charinfo->{combining}, '0'); 47*0Sstevel@tonic-gateis($charinfo->{bidi}, 'L'); 48*0Sstevel@tonic-gateis($charinfo->{decomposition}, '0041 0304'); 49*0Sstevel@tonic-gateis($charinfo->{decimal}, ''); 50*0Sstevel@tonic-gateis($charinfo->{digit}, ''); 51*0Sstevel@tonic-gateis($charinfo->{numeric}, ''); 52*0Sstevel@tonic-gateis($charinfo->{mirrored}, 'N'); 53*0Sstevel@tonic-gateis($charinfo->{unicode10}, 'LATIN CAPITAL LETTER A MACRON'); 54*0Sstevel@tonic-gateis($charinfo->{comment}, ''); 55*0Sstevel@tonic-gateis($charinfo->{upper}, ''); 56*0Sstevel@tonic-gateis($charinfo->{lower}, '0101'); 57*0Sstevel@tonic-gateis($charinfo->{title}, ''); 58*0Sstevel@tonic-gateis($charinfo->{block}, 'Latin Extended-A'); 59*0Sstevel@tonic-gateis($charinfo->{script}, 'Latin'); 60*0Sstevel@tonic-gate 61*0Sstevel@tonic-gate# 0x0590 is in the Hebrew block but unused. 62*0Sstevel@tonic-gate 63*0Sstevel@tonic-gate$charinfo = charinfo(0x590); 64*0Sstevel@tonic-gate 65*0Sstevel@tonic-gateis($charinfo->{code}, undef, '0x0590 - unused Hebrew'); 66*0Sstevel@tonic-gateis($charinfo->{name}, undef); 67*0Sstevel@tonic-gateis($charinfo->{category}, undef); 68*0Sstevel@tonic-gateis($charinfo->{combining}, undef); 69*0Sstevel@tonic-gateis($charinfo->{bidi}, undef); 70*0Sstevel@tonic-gateis($charinfo->{decomposition}, undef); 71*0Sstevel@tonic-gateis($charinfo->{decimal}, undef); 72*0Sstevel@tonic-gateis($charinfo->{digit}, undef); 73*0Sstevel@tonic-gateis($charinfo->{numeric}, undef); 74*0Sstevel@tonic-gateis($charinfo->{mirrored}, undef); 75*0Sstevel@tonic-gateis($charinfo->{unicode10}, undef); 76*0Sstevel@tonic-gateis($charinfo->{comment}, undef); 77*0Sstevel@tonic-gateis($charinfo->{upper}, undef); 78*0Sstevel@tonic-gateis($charinfo->{lower}, undef); 79*0Sstevel@tonic-gateis($charinfo->{title}, undef); 80*0Sstevel@tonic-gateis($charinfo->{block}, undef); 81*0Sstevel@tonic-gateis($charinfo->{script}, undef); 82*0Sstevel@tonic-gate 83*0Sstevel@tonic-gate# 0x05d0 is in the Hebrew block and used. 84*0Sstevel@tonic-gate 85*0Sstevel@tonic-gate$charinfo = charinfo(0x5d0); 86*0Sstevel@tonic-gate 87*0Sstevel@tonic-gateis($charinfo->{code}, '05D0', '05D0 - used Hebrew'); 88*0Sstevel@tonic-gateis($charinfo->{name}, 'HEBREW LETTER ALEF'); 89*0Sstevel@tonic-gateis($charinfo->{category}, 'Lo'); 90*0Sstevel@tonic-gateis($charinfo->{combining}, '0'); 91*0Sstevel@tonic-gateis($charinfo->{bidi}, 'R'); 92*0Sstevel@tonic-gateis($charinfo->{decomposition}, ''); 93*0Sstevel@tonic-gateis($charinfo->{decimal}, ''); 94*0Sstevel@tonic-gateis($charinfo->{digit}, ''); 95*0Sstevel@tonic-gateis($charinfo->{numeric}, ''); 96*0Sstevel@tonic-gateis($charinfo->{mirrored}, 'N'); 97*0Sstevel@tonic-gateis($charinfo->{unicode10}, ''); 98*0Sstevel@tonic-gateis($charinfo->{comment}, ''); 99*0Sstevel@tonic-gateis($charinfo->{upper}, ''); 100*0Sstevel@tonic-gateis($charinfo->{lower}, ''); 101*0Sstevel@tonic-gateis($charinfo->{title}, ''); 102*0Sstevel@tonic-gateis($charinfo->{block}, 'Hebrew'); 103*0Sstevel@tonic-gateis($charinfo->{script}, 'Hebrew'); 104*0Sstevel@tonic-gate 105*0Sstevel@tonic-gate# An open syllable in Hangul. 106*0Sstevel@tonic-gate 107*0Sstevel@tonic-gate$charinfo = charinfo(0xAC00); 108*0Sstevel@tonic-gate 109*0Sstevel@tonic-gateis($charinfo->{code}, 'AC00', 'HANGUL SYLLABLE-AC00'); 110*0Sstevel@tonic-gateis($charinfo->{name}, 'HANGUL SYLLABLE-AC00'); 111*0Sstevel@tonic-gateis($charinfo->{category}, 'Lo'); 112*0Sstevel@tonic-gateis($charinfo->{combining}, '0'); 113*0Sstevel@tonic-gateis($charinfo->{bidi}, 'L'); 114*0Sstevel@tonic-gateis($charinfo->{decomposition}, undef); 115*0Sstevel@tonic-gateis($charinfo->{decimal}, ''); 116*0Sstevel@tonic-gateis($charinfo->{digit}, ''); 117*0Sstevel@tonic-gateis($charinfo->{numeric}, ''); 118*0Sstevel@tonic-gateis($charinfo->{mirrored}, 'N'); 119*0Sstevel@tonic-gateis($charinfo->{unicode10}, ''); 120*0Sstevel@tonic-gateis($charinfo->{comment}, ''); 121*0Sstevel@tonic-gateis($charinfo->{upper}, ''); 122*0Sstevel@tonic-gateis($charinfo->{lower}, ''); 123*0Sstevel@tonic-gateis($charinfo->{title}, ''); 124*0Sstevel@tonic-gateis($charinfo->{block}, 'Hangul Syllables'); 125*0Sstevel@tonic-gateis($charinfo->{script}, 'Hangul'); 126*0Sstevel@tonic-gate 127*0Sstevel@tonic-gate# A closed syllable in Hangul. 128*0Sstevel@tonic-gate 129*0Sstevel@tonic-gate$charinfo = charinfo(0xAE00); 130*0Sstevel@tonic-gate 131*0Sstevel@tonic-gateis($charinfo->{code}, 'AE00', 'HANGUL SYLLABLE-AE00'); 132*0Sstevel@tonic-gateis($charinfo->{name}, 'HANGUL SYLLABLE-AE00'); 133*0Sstevel@tonic-gateis($charinfo->{category}, 'Lo'); 134*0Sstevel@tonic-gateis($charinfo->{combining}, '0'); 135*0Sstevel@tonic-gateis($charinfo->{bidi}, 'L'); 136*0Sstevel@tonic-gateis($charinfo->{decomposition}, undef); 137*0Sstevel@tonic-gateis($charinfo->{decimal}, ''); 138*0Sstevel@tonic-gateis($charinfo->{digit}, ''); 139*0Sstevel@tonic-gateis($charinfo->{numeric}, ''); 140*0Sstevel@tonic-gateis($charinfo->{mirrored}, 'N'); 141*0Sstevel@tonic-gateis($charinfo->{unicode10}, ''); 142*0Sstevel@tonic-gateis($charinfo->{comment}, ''); 143*0Sstevel@tonic-gateis($charinfo->{upper}, ''); 144*0Sstevel@tonic-gateis($charinfo->{lower}, ''); 145*0Sstevel@tonic-gateis($charinfo->{title}, ''); 146*0Sstevel@tonic-gateis($charinfo->{block}, 'Hangul Syllables'); 147*0Sstevel@tonic-gateis($charinfo->{script}, 'Hangul'); 148*0Sstevel@tonic-gate 149*0Sstevel@tonic-gate$charinfo = charinfo(0x1D400); 150*0Sstevel@tonic-gate 151*0Sstevel@tonic-gateis($charinfo->{code}, '1D400', 'MATHEMATICAL BOLD CAPITAL A'); 152*0Sstevel@tonic-gateis($charinfo->{name}, 'MATHEMATICAL BOLD CAPITAL A'); 153*0Sstevel@tonic-gateis($charinfo->{category}, 'Lu'); 154*0Sstevel@tonic-gateis($charinfo->{combining}, '0'); 155*0Sstevel@tonic-gateis($charinfo->{bidi}, 'L'); 156*0Sstevel@tonic-gateis($charinfo->{decomposition}, '<font> 0041'); 157*0Sstevel@tonic-gateis($charinfo->{decimal}, ''); 158*0Sstevel@tonic-gateis($charinfo->{digit}, ''); 159*0Sstevel@tonic-gateis($charinfo->{numeric}, ''); 160*0Sstevel@tonic-gateis($charinfo->{mirrored}, 'N'); 161*0Sstevel@tonic-gateis($charinfo->{unicode10}, ''); 162*0Sstevel@tonic-gateis($charinfo->{comment}, ''); 163*0Sstevel@tonic-gateis($charinfo->{upper}, ''); 164*0Sstevel@tonic-gateis($charinfo->{lower}, ''); 165*0Sstevel@tonic-gateis($charinfo->{title}, ''); 166*0Sstevel@tonic-gateis($charinfo->{block}, 'Mathematical Alphanumeric Symbols'); 167*0Sstevel@tonic-gateis($charinfo->{script}, 'Common'); 168*0Sstevel@tonic-gate 169*0Sstevel@tonic-gateuse Unicode::UCD qw(charblock charscript); 170*0Sstevel@tonic-gate 171*0Sstevel@tonic-gate# 0x0590 is in the Hebrew block but unused. 172*0Sstevel@tonic-gate 173*0Sstevel@tonic-gateis(charblock(0x590), 'Hebrew', '0x0590 - Hebrew unused charblock'); 174*0Sstevel@tonic-gateis(charscript(0x590), undef, '0x0590 - Hebrew unused charscript'); 175*0Sstevel@tonic-gate 176*0Sstevel@tonic-gate$charinfo = charinfo(0xbe); 177*0Sstevel@tonic-gate 178*0Sstevel@tonic-gateis($charinfo->{code}, '00BE', 'VULGAR FRACTION THREE QUARTERS'); 179*0Sstevel@tonic-gateis($charinfo->{name}, 'VULGAR FRACTION THREE QUARTERS'); 180*0Sstevel@tonic-gateis($charinfo->{category}, 'No'); 181*0Sstevel@tonic-gateis($charinfo->{combining}, '0'); 182*0Sstevel@tonic-gateis($charinfo->{bidi}, 'ON'); 183*0Sstevel@tonic-gateis($charinfo->{decomposition}, '<fraction> 0033 2044 0034'); 184*0Sstevel@tonic-gateis($charinfo->{decimal}, ''); 185*0Sstevel@tonic-gateis($charinfo->{digit}, ''); 186*0Sstevel@tonic-gateis($charinfo->{numeric}, '3/4'); 187*0Sstevel@tonic-gateis($charinfo->{mirrored}, 'N'); 188*0Sstevel@tonic-gateis($charinfo->{unicode10}, 'FRACTION THREE QUARTERS'); 189*0Sstevel@tonic-gateis($charinfo->{comment}, ''); 190*0Sstevel@tonic-gateis($charinfo->{upper}, ''); 191*0Sstevel@tonic-gateis($charinfo->{lower}, ''); 192*0Sstevel@tonic-gateis($charinfo->{title}, ''); 193*0Sstevel@tonic-gateis($charinfo->{block}, 'Latin-1 Supplement'); 194*0Sstevel@tonic-gateis($charinfo->{script}, 'Common'); 195*0Sstevel@tonic-gate 196*0Sstevel@tonic-gateuse Unicode::UCD qw(charblocks charscripts); 197*0Sstevel@tonic-gate 198*0Sstevel@tonic-gatemy $charblocks = charblocks(); 199*0Sstevel@tonic-gate 200*0Sstevel@tonic-gateok(exists $charblocks->{Thai}, 'Thai charblock exists'); 201*0Sstevel@tonic-gateis($charblocks->{Thai}->[0]->[0], hex('0e00')); 202*0Sstevel@tonic-gateok(!exists $charblocks->{PigLatin}, 'PigLatin charblock does not exist'); 203*0Sstevel@tonic-gate 204*0Sstevel@tonic-gatemy $charscripts = charscripts(); 205*0Sstevel@tonic-gate 206*0Sstevel@tonic-gateok(exists $charscripts->{Armenian}, 'Armenian charscript exists'); 207*0Sstevel@tonic-gateis($charscripts->{Armenian}->[0]->[0], hex('0531')); 208*0Sstevel@tonic-gateok(!exists $charscripts->{PigLatin}, 'PigLatin charscript does not exist'); 209*0Sstevel@tonic-gate 210*0Sstevel@tonic-gatemy $charscript; 211*0Sstevel@tonic-gate 212*0Sstevel@tonic-gate$charscript = charscript("12ab"); 213*0Sstevel@tonic-gateis($charscript, 'Ethiopic', 'Ethiopic charscript'); 214*0Sstevel@tonic-gate 215*0Sstevel@tonic-gate$charscript = charscript("0x12ab"); 216*0Sstevel@tonic-gateis($charscript, 'Ethiopic'); 217*0Sstevel@tonic-gate 218*0Sstevel@tonic-gate$charscript = charscript("U+12ab"); 219*0Sstevel@tonic-gateis($charscript, 'Ethiopic'); 220*0Sstevel@tonic-gate 221*0Sstevel@tonic-gatemy $ranges; 222*0Sstevel@tonic-gate 223*0Sstevel@tonic-gate$ranges = charscript('Ogham'); 224*0Sstevel@tonic-gateis($ranges->[1]->[0], hex('1681'), 'Ogham charscript'); 225*0Sstevel@tonic-gateis($ranges->[1]->[1], hex('169a')); 226*0Sstevel@tonic-gate 227*0Sstevel@tonic-gateuse Unicode::UCD qw(charinrange); 228*0Sstevel@tonic-gate 229*0Sstevel@tonic-gate$ranges = charscript('Cherokee'); 230*0Sstevel@tonic-gateok(!charinrange($ranges, "139f"), 'Cherokee charscript'); 231*0Sstevel@tonic-gateok( charinrange($ranges, "13a0")); 232*0Sstevel@tonic-gateok( charinrange($ranges, "13f4")); 233*0Sstevel@tonic-gateok(!charinrange($ranges, "13f5")); 234*0Sstevel@tonic-gate 235*0Sstevel@tonic-gateis(Unicode::UCD::UnicodeVersion, '4.0.1', 'UnicodeVersion'); 236*0Sstevel@tonic-gate 237*0Sstevel@tonic-gateuse Unicode::UCD qw(compexcl); 238*0Sstevel@tonic-gate 239*0Sstevel@tonic-gateok(!compexcl(0x0100), 'compexcl'); 240*0Sstevel@tonic-gateok( compexcl(0x0958)); 241*0Sstevel@tonic-gate 242*0Sstevel@tonic-gateuse Unicode::UCD qw(casefold); 243*0Sstevel@tonic-gate 244*0Sstevel@tonic-gatemy $casefold; 245*0Sstevel@tonic-gate 246*0Sstevel@tonic-gate$casefold = casefold(0x41); 247*0Sstevel@tonic-gate 248*0Sstevel@tonic-gateok($casefold->{code} eq '0041' && 249*0Sstevel@tonic-gate $casefold->{status} eq 'C' && 250*0Sstevel@tonic-gate $casefold->{mapping} eq '0061', 'casefold 0x41'); 251*0Sstevel@tonic-gate 252*0Sstevel@tonic-gate$casefold = casefold(0xdf); 253*0Sstevel@tonic-gate 254*0Sstevel@tonic-gateok($casefold->{code} eq '00DF' && 255*0Sstevel@tonic-gate $casefold->{status} eq 'F' && 256*0Sstevel@tonic-gate $casefold->{mapping} eq '0073 0073', 'casefold 0xDF'); 257*0Sstevel@tonic-gate 258*0Sstevel@tonic-gateok(!casefold(0x20)); 259*0Sstevel@tonic-gate 260*0Sstevel@tonic-gateuse Unicode::UCD qw(casespec); 261*0Sstevel@tonic-gate 262*0Sstevel@tonic-gatemy $casespec; 263*0Sstevel@tonic-gate 264*0Sstevel@tonic-gateok(!casespec(0x41)); 265*0Sstevel@tonic-gate 266*0Sstevel@tonic-gate$casespec = casespec(0xdf); 267*0Sstevel@tonic-gate 268*0Sstevel@tonic-gateok($casespec->{code} eq '00DF' && 269*0Sstevel@tonic-gate $casespec->{lower} eq '00DF' && 270*0Sstevel@tonic-gate $casespec->{title} eq '0053 0073' && 271*0Sstevel@tonic-gate $casespec->{upper} eq '0053 0053' && 272*0Sstevel@tonic-gate $casespec->{condition} eq undef, 'casespec 0xDF'); 273*0Sstevel@tonic-gate 274*0Sstevel@tonic-gate$casespec = casespec(0x307); 275*0Sstevel@tonic-gate 276*0Sstevel@tonic-gateok($casespec->{az}->{code} eq '0307' && 277*0Sstevel@tonic-gate $casespec->{az}->{lower} eq '' && 278*0Sstevel@tonic-gate $casespec->{az}->{title} eq '0307' && 279*0Sstevel@tonic-gate $casespec->{az}->{upper} eq '0307' && 280*0Sstevel@tonic-gate $casespec->{az}->{condition} eq 'az After_I', 281*0Sstevel@tonic-gate 'casespec 0x307'); 282*0Sstevel@tonic-gate 283*0Sstevel@tonic-gate# perl #7305 UnicodeCD::compexcl is weird 284*0Sstevel@tonic-gate 285*0Sstevel@tonic-gatefor (1) {$a=compexcl $_} 286*0Sstevel@tonic-gateok(1, 'compexcl read-only $_: perl #7305'); 287*0Sstevel@tonic-gategrep {compexcl $_} %{{1=>2}}; 288*0Sstevel@tonic-gateok(1, 'compexcl read-only hash: perl #7305'); 289*0Sstevel@tonic-gate 290*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('123'), 123, "_getcode(123)"); 291*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('0123'), 0x123, "_getcode(0123)"); 292*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('0x123'), 0x123, "_getcode(0x123)"); 293*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('0X123'), 0x123, "_getcode(0X123)"); 294*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('U+123'), 0x123, "_getcode(U+123)"); 295*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('u+123'), 0x123, "_getcode(u+123)"); 296*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('U+1234'), 0x1234, "_getcode(U+1234)"); 297*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('U+12345'), 0x12345, "_getcode(U+12345)"); 298*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('123x'), undef, "_getcode(123x)"); 299*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('x123'), undef, "_getcode(x123)"); 300*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('0x123x'), undef, "_getcode(x123)"); 301*0Sstevel@tonic-gateis(Unicode::UCD::_getcode('U+123x'), undef, "_getcode(x123)"); 302*0Sstevel@tonic-gate 303*0Sstevel@tonic-gate{ 304*0Sstevel@tonic-gate my $r1 = charscript('Latin'); 305*0Sstevel@tonic-gate my $n1 = @$r1; 306*0Sstevel@tonic-gate is($n1, 26, "26 ranges in Latin script (Unicode 4.0.0)"); 307*0Sstevel@tonic-gate shift @$r1 while @$r1; 308*0Sstevel@tonic-gate my $r2 = charscript('Latin'); 309*0Sstevel@tonic-gate is(@$r2, $n1, "modifying results should not mess up internal caches"); 310*0Sstevel@tonic-gate} 311*0Sstevel@tonic-gate 312*0Sstevel@tonic-gate{ 313*0Sstevel@tonic-gate is(charinfo(0xdeadbeef), undef, "[perl #23273] warnings in Unicode::UCD"); 314*0Sstevel@tonic-gate}