1 2BEGIN { 3 unless ("A" eq pack('U', 0x41)) { 4 print "1..0 # Unicode::Collate " . 5 "cannot stringify a Unicode code point\n"; 6 exit 0; 7 } 8 if ($ENV{PERL_CORE}) { 9 chdir('t') if -d 't'; 10 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 11 } 12} 13 14use strict; 15use warnings; 16BEGIN { $| = 1; print "1..74\n"; } 17my $count = 0; 18sub ok ($;$) { 19 my $p = my $r = shift; 20 if (@_) { 21 my $x = shift; 22 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 23 } 24 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 25} 26 27use Unicode::Collate; 28 29ok(1); 30 31######################### 32 33our $kjeEntry = <<'ENTRIES'; 340301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT 350334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY 36043A ; [.0D31.0020.0002.043A] # CYRILLIC SMALL LETTER KA 37041A ; [.0D31.0020.0008.041A] # CYRILLIC CAPITAL LETTER KA 38045C ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE 39043A 0301 ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE 40040C ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE 41041A 0301 ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE 42ENTRIES 43 44our $aaEntry = <<'ENTRIES'; 450304 ; [.0000.005A.0002.0304] # COMBINING MACRON (cc = 230) 46030A ; [.0000.0043.0002.030A] # COMBINING RING ABOVE (cc = 230) 470327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA (cc = 202) 48031A ; [.0000.006B.0002.031A] # COMBINING LEFT ANGLE ABOVE (cc = 232) 490061 ; [.0A15.0020.0002.0061] # LATIN SMALL LETTER A 500041 ; [.0A15.0020.0008.0041] # LATIN CAPITAL LETTER A 51007A ; [.0C13.0020.0002.007A] # LATIN SMALL LETTER Z 52005A ; [.0C13.0020.0008.005A] # LATIN CAPITAL LETTER Z 5300E5 ; [.0C25.0020.0002.00E5] # LATIN SMALL LETTER A WITH RING ABOVE; QQCM 5400C5 ; [.0C25.0020.0008.00C5] # LATIN CAPITAL LETTER A WITH RING ABOVE; QQCM 550061 030A ; [.0C25.0020.0002.0061] # LATIN SMALL LETTER A WITH RING ABOVE 560041 030A ; [.0C25.0020.0008.0041] # LATIN CAPITAL LETTER A WITH RING ABOVE 57ENTRIES 58 59######################### 60 61my $kjeNoN = Unicode::Collate->new( 62 level => 1, 63 table => undef, 64 normalization => undef, 65 entry => $kjeEntry, 66); 67 68ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}")); 69ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}")); 70ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}")); 71ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}")); 72 73# 5 74 75our %sortkeys; 76 77$sortkeys{'KAac'} = $kjeNoN->viewSortKey("\x{43A}\x{301}"); 78$sortkeys{'KAta'} = $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}"); 79$sortkeys{'KAat'} = $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}"); 80 81eval { require Unicode::Normalize }; 82if (!$@) { 83 my $kjeNFD = Unicode::Collate->new( 84 level => 1, 85 table => undef, 86 entry => $kjeEntry, 87 ); 88 89ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{301}")); 90ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{334}\x{301}")); 91ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{334}\x{301}")); 92ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{301}\x{334}")); 93# 9 94 95 my $aaNFD = Unicode::Collate->new( 96 level => 1, 97 table => undef, 98 entry => $aaEntry, 99 ); 100 101ok($aaNFD->lt("Z", "A\x{30A}\x{304}")); 102ok($aaNFD->eq("A", "A\x{304}\x{30A}")); 103ok($aaNFD->eq(pack('U', 0xE5), "A\x{30A}\x{304}")); 104ok($aaNFD->eq("A\x{304}", "A\x{304}\x{30A}")); 105ok($aaNFD->lt("Z", "A\x{327}\x{30A}")); 106ok($aaNFD->lt("Z", "A\x{30A}\x{327}")); 107ok($aaNFD->lt("Z", "A\x{31A}\x{30A}")); 108ok($aaNFD->lt("Z", "A\x{30A}\x{31A}")); 109# 17 110 111 my $aaPre = Unicode::Collate->new( 112 level => 1, 113 normalization => "prenormalized", 114 table => undef, 115 entry => $aaEntry, 116 ); 117 118ok($aaPre->lt("Z", "A\x{30A}\x{304}")); 119ok($aaPre->eq("A", "A\x{304}\x{30A}")); 120ok($aaPre->eq(pack('U', 0xE5), "A\x{30A}\x{304}")); 121ok($aaPre->eq("A\x{304}", "A\x{304}\x{30A}")); 122ok($aaPre->lt("Z", "A\x{327}\x{30A}")); 123ok($aaPre->lt("Z", "A\x{30A}\x{327}")); 124ok($aaPre->lt("Z", "A\x{31A}\x{30A}")); 125ok($aaPre->lt("Z", "A\x{30A}\x{31A}")); 126# 25 127} else { 128 ok(1) for 1..20; 129} 130 131# again: loading Unicode::Normalize should not affect $kjeNoN. 132ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}")); 133ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}")); 134ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}")); 135ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}")); 136 137ok($sortkeys{'KAac'}, $kjeNoN->viewSortKey("\x{43A}\x{301}")); 138ok($sortkeys{'KAta'}, $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}")); 139ok($sortkeys{'KAat'}, $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}")); 140 141# 32 142 143my $aaNoN = Unicode::Collate->new( 144 level => 1, 145 table => undef, 146 entry => $aaEntry, 147 normalization => undef, 148); 149 150ok($aaNoN->lt("Z", "A\x{30A}\x{304}")); 151ok($aaNoN->eq("A", "A\x{304}\x{30A}")); 152ok($aaNoN->eq(pack('U', 0xE5), "A\x{30A}\x{304}")); 153ok($aaNoN->eq("A\x{304}", "A\x{304}\x{30A}")); 154ok($aaNoN->eq("A", "A\x{327}\x{30A}")); 155ok($aaNoN->lt("Z", "A\x{30A}\x{327}")); 156ok($aaNoN->eq("A", "A\x{31A}\x{30A}")); 157ok($aaNoN->lt("Z", "A\x{30A}\x{31A}")); 158 159# 40 160 161# suppress contractions (not affected) 162 163my $kjeSup = Unicode::Collate->new( 164 level => 1, 165 table => undef, 166 normalization => undef, 167 entry => $kjeEntry, 168 suppress => [0x400..0x45F], 169); 170 171ok($kjeSup->lt("\x{43A}", "\x{43A}\x{301}")); 172ok($kjeSup->eq("\x{45C}", "\x{43A}\x{301}")); 173ok($kjeSup->lt("\x{41A}", "\x{41A}\x{301}")); 174ok($kjeSup->eq("\x{40C}", "\x{41A}\x{301}")); 175 176# 44 177 178our $tibetanEntry = <<'ENTRIES'; 1790000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429) 1800F71 ; [.206D.0020.0002.0F71] # TIBETAN VOWEL SIGN AA 1810F72 ; [.206E.0020.0002.0F72] # TIBETAN VOWEL SIGN I 1820F73 ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II 1830F71 0F72 ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II 1840F80 ; [.2070.0020.0002.0F80] # TIBETAN VOWEL SIGN REVERSED I 1850F81 ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II 1860F71 0F80 ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II 1870F74 ; [.2072.0020.0002.0F74] # TIBETAN VOWEL SIGN U 1880F75 ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU 1890F71 0F74 ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU 1900F76 ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R 1910FB2 0F80 ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R 1920F77 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR 1930FB2 0F81 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR 1940FB2 0F71 0F80 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR 1950F78 ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L 1960FB3 0F80 ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L 1970F79 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL 1980FB3 0F81 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL 1990FB3 0F71 0F80 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL 200ENTRIES 201 202# ccc(0F71) = 129 203# ccc(0F80) = 130 204# 0F76 = 0FB2 0F80 205# 0F78 = 0FB3 0F80 206# 0F81 = 0F71 0F80 207# 0F77 = <compat> 0FB2 0F81 = 0FB2 0F71 0F80 = 0F76 0F71 208# 0F79 = <compat> 0FB3 0F81 = 0FB3 0F71 0F80 = 0F78 0F71 209 210eval { require Unicode::Normalize }; 211if (!$@) { 212 my $tibNFD = Unicode::Collate->new( 213 table => undef, 214 entry => $tibetanEntry, 215 ); 216 217 # VOCALIC RR 218 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F81}")); 219 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\x{334}")); 220 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\0\x{334}")); 221 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{334}\x{F71}")); 222 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\x{334}")); 223 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\0\x{334}")); 224 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F71}\x{F80}")); 225 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{334}\x{F80}")); 226 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\x{334}")); 227 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\0\x{334}")); 228 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F80}\x{F71}")); 229 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{334}\x{F71}")); 230 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\x{334}")); 231 ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\0\x{334}")); 232# 58 233 234 # VOCALIC LL 235 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F81}")); 236 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\x{334}")); 237 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\0\x{334}")); 238 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{334}\x{F71}")); 239 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\x{334}")); 240 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\0\x{334}")); 241 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F71}\x{F80}")); 242 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{334}\x{F80}")); 243 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\x{334}")); 244 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\0\x{334}")); 245 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F80}\x{F71}")); 246 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{334}\x{F71}")); 247 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\x{334}")); 248 ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\0\x{334}")); 249# 72 250 251 my $discontNFD = Unicode::Collate->new( 252 table => undef, 253 entry => <<'ENTRIES', 2540000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429) 2550301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT 2560300 ; [.0000.0035.0002.0300] # COMBINING GRAVE ACCENT 2570327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA 2580334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY 2590041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A 2600041 0327 0301 ; [.0102.0020.0008.0041] 2610041 0300 ; [.0103.0020.0008.0041] 262ENTRIES 263 ); 264 265 ok($discontNFD->eq("A\x{327}\x{301}\0\x{334}", "A\x{334}\x{327}\x{301}")); 266 ok($discontNFD->eq("A\x{300}\0\x{327}", "A\x{327}\x{300}")); 267} else { 268 ok(1) for 1..30; 269} 270# 74 271