1 2BEGIN { 3 unless ('A' eq pack('U', 0x41)) { 4 print "1..0 # Unicode::Collate cannot pack a Unicode code point\n"; 5 exit 0; 6 } 7 unless (0x41 == unpack('U', 'A')) { 8 print "1..0 # Unicode::Collate cannot get a Unicode code point\n"; 9 exit 0; 10 } 11 if ($ENV{PERL_CORE}) { 12 chdir('t') if -d 't'; 13 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 14 } 15} 16 17use strict; 18use warnings; 19BEGIN { $| = 1; print "1..118\n"; } 20my $count = 0; 21sub ok ($;$) { 22 my $p = my $r = shift; 23 if (@_) { 24 my $x = shift; 25 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 26 } 27 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 28} 29 30use Unicode::Collate; 31 32ok(1); 33 34######################### 35 36our $kjeEntry = <<'ENTRIES'; 370301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT 380334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY 39043A ; [.0D31.0020.0002.043A] # CYRILLIC SMALL LETTER KA 40041A ; [.0D31.0020.0008.041A] # CYRILLIC CAPITAL LETTER KA 41045C ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE 42043A 0301 ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE 43040C ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE 44041A 0301 ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE 45ENTRIES 46 47our $aaEntry = <<'ENTRIES'; 480304 ; [.0000.005A.0002.0304] # COMBINING MACRON (cc = 230) 49030A ; [.0000.0043.0002.030A] # COMBINING RING ABOVE (cc = 230) 500327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA (cc = 202) 51031A ; [.0000.006B.0002.031A] # COMBINING LEFT ANGLE ABOVE (cc = 232) 520061 ; [.0A15.0020.0002.0061] # LATIN SMALL LETTER A 530041 ; [.0A15.0020.0008.0041] # LATIN CAPITAL LETTER A 54007A ; [.0C13.0020.0002.007A] # LATIN SMALL LETTER Z 55005A ; [.0C13.0020.0008.005A] # LATIN CAPITAL LETTER Z 5600E5 ; [.0C25.0020.0002.00E5] # LATIN SMALL LETTER A WITH RING ABOVE; QQCM 5700C5 ; [.0C25.0020.0008.00C5] # LATIN CAPITAL LETTER A WITH RING ABOVE; QQCM 580061 030A ; [.0C25.0020.0002.0061] # LATIN SMALL LETTER A WITH RING ABOVE 590041 030A ; [.0C25.0020.0008.0041] # LATIN CAPITAL LETTER A WITH RING ABOVE 60ENTRIES 61 62######################### 63 64my $kjeNoN = Unicode::Collate->new( 65 level => 1, 66 table => undef, 67 normalization => undef, 68 entry => $kjeEntry, 69); 70 71ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}")); 72ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}")); 73ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}")); 74ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}")); 75 76# 5 77 78our %sortkeys; 79 80$sortkeys{'KAac'} = $kjeNoN->viewSortKey("\x{43A}\x{301}"); 81$sortkeys{'KAta'} = $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}"); 82$sortkeys{'KAat'} = $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}"); 83 84eval { require Unicode::Normalize }; 85if (!$@) { 86 my $kjeNFD = Unicode::Collate->new( 87 level => 1, 88 table => undef, 89 entry => $kjeEntry, 90 ); 91 92ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{301}")); 93ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{334}\x{301}")); 94ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{334}\x{301}")); 95ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{301}\x{334}")); 96# 9 97 98 my $aaNFD = Unicode::Collate->new( 99 level => 1, 100 table => undef, 101 entry => $aaEntry, 102 ); 103 104ok($aaNFD->lt("Z", "A\x{30A}\x{304}")); 105ok($aaNFD->eq("A", "A\x{304}\x{30A}")); 106ok($aaNFD->eq(pack('U', 0xE5), "A\x{30A}\x{304}")); 107ok($aaNFD->eq("A\x{304}", "A\x{304}\x{30A}")); 108ok($aaNFD->lt("Z", "A\x{327}\x{30A}")); 109ok($aaNFD->lt("Z", "A\x{30A}\x{327}")); 110ok($aaNFD->lt("Z", "A\x{31A}\x{30A}")); 111ok($aaNFD->lt("Z", "A\x{30A}\x{31A}")); 112# 17 113 114 my $aaPre = Unicode::Collate->new( 115 level => 1, 116 normalization => "prenormalized", 117 table => undef, 118 entry => $aaEntry, 119 ); 120 121ok($aaPre->lt("Z", "A\x{30A}\x{304}")); 122ok($aaPre->eq("A", "A\x{304}\x{30A}")); 123ok($aaPre->eq(pack('U', 0xE5), "A\x{30A}\x{304}")); 124ok($aaPre->eq("A\x{304}", "A\x{304}\x{30A}")); 125ok($aaPre->lt("Z", "A\x{327}\x{30A}")); 126ok($aaPre->lt("Z", "A\x{30A}\x{327}")); 127ok($aaPre->lt("Z", "A\x{31A}\x{30A}")); 128ok($aaPre->lt("Z", "A\x{30A}\x{31A}")); 129# 25 130} else { 131 ok(1) for 1..20; 132} 133 134# again: loading Unicode::Normalize should not affect $kjeNoN. 135ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}")); 136ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}")); 137ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}")); 138ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}")); 139 140ok($sortkeys{'KAac'}, $kjeNoN->viewSortKey("\x{43A}\x{301}")); 141ok($sortkeys{'KAta'}, $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}")); 142ok($sortkeys{'KAat'}, $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}")); 143 144# 32 145 146my $aaNoN = Unicode::Collate->new( 147 level => 1, 148 table => undef, 149 entry => $aaEntry, 150 normalization => undef, 151); 152 153ok($aaNoN->lt("Z", "A\x{30A}\x{304}")); 154ok($aaNoN->eq("A", "A\x{304}\x{30A}")); 155ok($aaNoN->eq(pack('U', 0xE5), "A\x{30A}\x{304}")); 156ok($aaNoN->eq("A\x{304}", "A\x{304}\x{30A}")); 157ok($aaNoN->eq("A", "A\x{327}\x{30A}")); 158ok($aaNoN->lt("Z", "A\x{30A}\x{327}")); 159ok($aaNoN->eq("A", "A\x{31A}\x{30A}")); 160ok($aaNoN->lt("Z", "A\x{30A}\x{31A}")); 161 162# 40 163 164# suppress contractions (not affected) 165 166my $kjeSup = Unicode::Collate->new( 167 level => 1, 168 table => undef, 169 normalization => undef, 170 entry => $kjeEntry, 171 suppress => [0x400..0x45F], 172); 173 174ok($kjeSup->lt("\x{43A}", "\x{43A}\x{301}")); 175ok($kjeSup->eq("\x{45C}", "\x{43A}\x{301}")); 176ok($kjeSup->lt("\x{41A}", "\x{41A}\x{301}")); 177ok($kjeSup->eq("\x{40C}", "\x{41A}\x{301}")); 178 179# 44 180 181our $tibetanEntry = <<'ENTRIES'; 1820000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429) 1830FB2 ; [.205B.0020.0002.0FB2] # TIBETAN SUBJOINED LETTER RA 1840FB3 ; [.205E.0020.0002.0FB3] # TIBETAN SUBJOINED LETTER LA 1850F71 ; [.206D.0020.0002.0F71] # TIBETAN VOWEL SIGN AA 1860F72 ; [.206E.0020.0002.0F72] # TIBETAN VOWEL SIGN I 1870F73 ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II 1880F71 0F72 ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II 1890F80 ; [.2070.0020.0002.0F80] # TIBETAN VOWEL SIGN REVERSED I 1900F81 ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II 1910F71 0F80 ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II 1920F74 ; [.2072.0020.0002.0F74] # TIBETAN VOWEL SIGN U 1930F75 ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU 1940F71 0F74 ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU 1950F76 ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R 1960FB2 0F80 ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R 1970F77 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR 1980FB2 0F81 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR 1990FB2 0F71 0F80 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR 2000F78 ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L 2010FB3 0F80 ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L 2020F79 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL 2030FB3 0F81 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL 2040FB3 0F71 0F80 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL 205ENTRIES 206 207# ccc(0F71) = 129 208# ccc(0F80) = 130 209# 0F76 = 0FB2 0F80 210# 0F78 = 0FB3 0F80 211# 0F81 = 0F71 0F80 212# 0F77 = <compat> 0FB2 0F81 = 0FB2 0F71 0F80 = 0F76 0F71 213# 0F79 = <compat> 0FB3 0F81 = 0FB3 0F71 0F80 = 0F78 0F71 214 215eval { require Unicode::Normalize }; 216if (!$@) { 217 my $tibNFD = Unicode::Collate->new( 218 table => undef, 219 entry => $tibetanEntry, 220 UCA_Version => 24, 221 ); 222 223 # VOCALIC RR 224 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F81}")); 225 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\x{334}")); 226 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\0\x{334}")); 227 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{334}\x{F71}")); 228 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\x{334}")); 229 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\0\x{334}")); 230 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F71}\x{F80}")); 231 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{334}\x{F80}")); 232 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\x{334}")); 233 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\0\x{334}")); 234 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F80}\x{F71}")); 235 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{334}\x{F71}")); 236 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\x{334}")); 237 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\0\x{334}")); 238# 58 239 240 # VOCALIC LL 241 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F81}")); 242 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\x{334}")); 243 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\0\x{334}")); 244 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{334}\x{F71}")); 245 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\x{334}")); 246 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\0\x{334}")); 247 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F71}\x{F80}")); 248 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{334}\x{F80}")); 249 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\x{334}")); 250 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\0\x{334}")); 251 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F80}\x{F71}")); 252 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{334}\x{F71}")); 253 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\x{334}")); 254 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\0\x{334}")); 255# 72 256 257 my $a1 = "\x{FB2}\x{334}\x{F81}"; 258 my $b1 = "\x{F77}\0\x{334}"; 259 my $a2 = "\x{FB2}\x{334}\x{F81}"; 260 my $b2 = "\x{FB2}\x{F80}\0\x{334}\x{F71}"; 261 262 for my $v (qw/20 22 24 26 28/) { 263 my $tib = Unicode::Collate->new( 264 table => undef, 265 entry => $tibetanEntry, 266 UCA_Version => $v, 267 ); 268 my $long = 22 <= $v && $v <= 24; 269 ok($tib->cmp($a1, $b1), $long ? 0 : -1); 270 ok($tib->cmp($a2, $b2), $long ? 1 : 0); 271 272 $tib->change(long_contraction => 0); 273 ok($tib->cmp($a1, $b1), -1); 274 ok($tib->cmp($a2, $b2), 0); 275 276 $tib->change(long_contraction => 1); 277 ok($tib->cmp($a1, $b1), 0); 278 ok($tib->cmp($a2, $b2), 1); 279 } 280# 102 281 282 # UCA_Version => 22 283 ok($tibNFD->cmp($a1, $b1), 0); 284 ok($tibNFD->cmp($a2, $b2), 1); 285 286 $tibNFD->change(UCA_Version => 26); # not affect long_contraction 287 ok($tibNFD->cmp($a1, $b1), 0); 288 ok($tibNFD->cmp($a2, $b2), 1); 289# 106 290 291 my $discontNFD = Unicode::Collate->new( 292 table => undef, 293 UCA_Version => 22, 294 entry => <<'ENTRIES', 2950000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429) 2960301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT 2970300 ; [.0000.0035.0002.0300] # COMBINING GRAVE ACCENT 2980327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA 2990334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY 3000041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A 3010041 0327 0301 ; [.0102.0020.0008.0041] 3020041 0300 ; [.0103.0020.0008.0041] 303ENTRIES 304 ); 305 306 ok($discontNFD->eq("A\x{334}\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}")); 307 ok($discontNFD->eq("A\x{327}\x{300}", "A\x{300}\0\x{327}")); 308 309 $discontNFD->change(long_contraction => 0); 310 ok($discontNFD->lt("A\x{334}\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}")); 311 ok($discontNFD->eq("A\x{334}\x{327}\x{301}", "A\0\x{327}\x{301}\x{334}")); 312 ok($discontNFD->eq("A\x{327}\x{300}", "A\x{300}\0\x{327}")); 313 314 $discontNFD->change(level => 1); 315 ok($discontNFD->gt("A\x{327}\x{300}", "A\x{327}\0\x{300}")); 316 317 # discontiguous 318 ok($discontNFD->lt("A\x{334}\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}")); 319 ok($discontNFD->lt("A\x{334}\x{327}\x{301}", "A\x{300}")); 320 ok($discontNFD->eq("A\x{334}\x{327}\x{301}", "A")); 321 322 # contiguous 323 ok($discontNFD->eq("A\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}")); 324 ok($discontNFD->lt("A\x{327}\x{301}", "A\x{300}")); 325 ok($discontNFD->gt("A\x{327}\x{301}", "A")); 326} else { 327 ok(1) for 1..74; 328} 329# 118 330