1BEGIN { 2 unless ("A" eq pack('U', 0x41)) { 3 print "1..0 # Unicode::Collate " . 4 "cannot stringify a Unicode code point\n"; 5 exit 0; 6 } 7} 8 9BEGIN { 10 if ($ENV{PERL_CORE}) { 11 chdir('t') if -d 't'; 12 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 13 } 14} 15 16use Test; 17BEGIN { plan tests => 58 }; 18 19use strict; 20use warnings; 21use Unicode::Collate; 22 23######################### 24 25ok(1); 26 27# a standard collator (3.1.1) 28my $Collator = Unicode::Collate->new( 29 level => 1, 30 table => 'keys.txt', 31 normalization => undef, 32 33 entry => <<'ENTRIES', 34326E ; [.1831.0020.0006.326E][.188D.0020.0006.326E] # c.h.s. GA 35326F ; [.1833.0020.0006.326F][.188D.0020.0006.326F] # c.h.s. NA 363270 ; [.1834.0020.0006.3270][.188D.0020.0006.3270] # c.h.s. DA 373271 ; [.1836.0020.0006.3271][.188D.0020.0006.3271] # c.h.s. RA 383272 ; [.1837.0020.0006.3272][.188D.0020.0006.3272] # c.h.s. MA 393273 ; [.1838.0020.0006.3273][.188D.0020.0006.3273] # c.h.s. BA 403274 ; [.183A.0020.0006.3274][.188D.0020.0006.3274] # c.h.s. SA 413275 ; [.183C.0020.0006.3275][.188D.0020.0006.3275] # c.h.s. A 423276 ; [.183D.0020.0006.3276][.188D.0020.0006.3276] # c.h.s. JA 433277 ; [.183F.0020.0006.3277][.188D.0020.0006.3277] # c.h.s. CA 443278 ; [.1840.0020.0006.3278][.188D.0020.0006.3278] # c.h.s. KA 453279 ; [.1841.0020.0006.3279][.188D.0020.0006.3279] # c.h.s. TA 46327A ; [.1842.0020.0006.327A][.188D.0020.0006.327A] # c.h.s. PA 47327B ; [.1843.0020.0006.327B][.188D.0020.0006.327B] # c.h.s. HA 48ENTRIES 49); 50 51my $hangul = Unicode::Collate->new( 52 level => 1, 53 table => 'keys.txt', 54 normalization => undef, 55 hangul_terminator => 16, 56 57 entry => <<'ENTRIES', 58326E ; [.1831.0020.0006.326E][.188D.0020.0006.326E] # c.h.s. GA 59326F ; [.1833.0020.0006.326F][.188D.0020.0006.326F] # c.h.s. NA 603270 ; [.1834.0020.0006.3270][.188D.0020.0006.3270] # c.h.s. DA 613271 ; [.1836.0020.0006.3271][.188D.0020.0006.3271] # c.h.s. RA 623272 ; [.1837.0020.0006.3272][.188D.0020.0006.3272] # c.h.s. MA 633273 ; [.1838.0020.0006.3273][.188D.0020.0006.3273] # c.h.s. BA 643274 ; [.183A.0020.0006.3274][.188D.0020.0006.3274] # c.h.s. SA 653275 ; [.183C.0020.0006.3275][.188D.0020.0006.3275] # c.h.s. A 663276 ; [.183D.0020.0006.3276][.188D.0020.0006.3276] # c.h.s. JA 673277 ; [.183F.0020.0006.3277][.188D.0020.0006.3277] # c.h.s. CA 683278 ; [.1840.0020.0006.3278][.188D.0020.0006.3278] # c.h.s. KA 693279 ; [.1841.0020.0006.3279][.188D.0020.0006.3279] # c.h.s. TA 70327A ; [.1842.0020.0006.327A][.188D.0020.0006.327A] # c.h.s. PA 71327B ; [.1843.0020.0006.327B][.188D.0020.0006.327B] # c.h.s. HA 72ENTRIES 73); 74 75ok(ref $hangul, "Unicode::Collate"); 76 77######################### 78 79# LVX vs LVV: /GAA/ vs /GA/.latinA 80ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 81ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 82 83# LVX vs LVV: /GAA/ vs /GA/.hiraganaA 84ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 85ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 86 87# LVX vs LVV: /GAA/ vs /GA/.hanja 88ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 89ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 90 91# LVL vs LVT: /GA/./G/ vs /GAG/ 92ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 93ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 94 95# LVT vs LVX: /GAG/ vs /GA/.latinA 96ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 97ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 98 99# LVT vs LVX: /GAG/ vs /GA/.hiraganaA 100ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 101ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 102 103# LVT vs LVX: /GAG/ vs /GA/.hanja 104ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 105ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 106 107# LV vs Syl(LV): /GA/ vs /[GA]/ 108ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}")); 109ok($hangul ->eq("\x{1100}\x{1161}", "\x{AC00}")); 110 111# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/ 112ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 113ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 114 115# LVT vs Syl(LVT): /GAG/ vs /[GAG]/ 116ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 117ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 118 119# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ 120ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 121ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 122 123# Syl(LVT) vs : /GAG/ vs /[GAG]/ 124ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 125ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 126 127######################### 128 129my $hangcirc = Unicode::Collate->new( 130 level => 1, 131 table => 'keys.txt', 132 normalization => undef, 133 hangul_terminator => 16, 134 135 entry => <<'ENTRIES', 136326E ; [.1831.0020.0006.326E][.188D.0020.0006.326E][.10.0.0.0] # c.h.s. GA 137326F ; [.1833.0020.0006.326F][.188D.0020.0006.326F][.10.0.0.0] # c.h.s. NA 1383270 ; [.1834.0020.0006.3270][.188D.0020.0006.3270][.10.0.0.0] # c.h.s. DA 1393271 ; [.1836.0020.0006.3271][.188D.0020.0006.3271][.10.0.0.0] # c.h.s. RA 1403272 ; [.1837.0020.0006.3272][.188D.0020.0006.3272][.10.0.0.0] # c.h.s. MA 1413273 ; [.1838.0020.0006.3273][.188D.0020.0006.3273][.10.0.0.0] # c.h.s. BA 1423274 ; [.183A.0020.0006.3274][.188D.0020.0006.3274][.10.0.0.0] # c.h.s. SA 1433275 ; [.183C.0020.0006.3275][.188D.0020.0006.3275][.10.0.0.0] # c.h.s. A 1443276 ; [.183D.0020.0006.3276][.188D.0020.0006.3276][.10.0.0.0] # c.h.s. JA 1453277 ; [.183F.0020.0006.3277][.188D.0020.0006.3277][.10.0.0.0] # c.h.s. CA 1463278 ; [.1840.0020.0006.3278][.188D.0020.0006.3278][.10.0.0.0] # c.h.s. KA 1473279 ; [.1841.0020.0006.3279][.188D.0020.0006.3279][.10.0.0.0] # c.h.s. TA 148327A ; [.1842.0020.0006.327A][.188D.0020.0006.327A][.10.0.0.0] # c.h.s. PA 149327B ; [.1843.0020.0006.327B][.188D.0020.0006.327B][.10.0.0.0] # c.h.s. HA 150ENTRIES 151); 152 153# LV vs Circled Syl(LV): /GA/ vs /(GA)/ 154ok($Collator->eq("\x{1100}\x{1161}", "\x{326E}")); 155ok($hangul ->gt("\x{1100}\x{1161}", "\x{326E}")); 156ok($hangcirc->eq("\x{1100}\x{1161}", "\x{326E}")); 157 158# LV vs Circled Syl(LV): followed by latin A 159ok($Collator->eq("\x{1100}\x{1161}A", "\x{326E}A")); 160ok($hangul ->lt("\x{1100}\x{1161}A", "\x{326E}A")); 161ok($hangcirc->eq("\x{1100}\x{1161}A", "\x{326E}A")); 162 163# LV vs Circled Syl(LV): followed by hiragana A 164ok($Collator->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); 165ok($hangul ->lt("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); 166ok($hangcirc->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); 167 168# LVT vs LVX: /GAG/ vs /GA/.hanja 169ok($Collator->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); 170ok($hangul ->lt("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); 171ok($hangcirc->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); 172 173######################### 174 175# checks contraction in LVT: 176# weights of these contractions may be non-sense. 177 178my $hangcont = Unicode::Collate->new( 179 level => 1, 180 table => 'keys.txt', 181 normalization => undef, 182 hangul_terminator => 16, 183 184 entry => <<'ENTRIES', 1851100 1161 ; [.1831.0020.0002.1100][.188D.0020.0002.1161] # KIYEOK+A 1861161 11A8 ; [.188D.0020.0002.1161][.18CF.0020.0002.11A8] # A+KIYEOK 187ENTRIES 188); 189 190# cont<LV> vs Syl(LV): /<GA>/ vs /[GA]/ 191ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}")); 192ok($hangcont->eq("\x{1100}\x{1161}", "\x{AC00}")); 193 194# cont<LV>.T vs Syl(LV).T: /<GA>G/ vs /[GA]G/ 195ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 196ok($hangcont->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 197 198# cont<LV>.T vs Syl(LVT): /<GA>G/ vs /[GAG]/ 199ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 200ok($hangcont->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 201 202# L.cont<VT> vs Syl(LV).T: /D<AG>/ vs /[DA]G/ 203ok($Collator->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E4}\x{11A8}")); 204ok($hangcont->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E4}\x{11A8}")); 205 206# L.cont<VT> vs Syl(LVT): /D<AG>/ vs /[DAG]/ 207ok($Collator->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E5}")); 208ok($hangcont->eq("\x{1103}\x{1161}\x{11A8}", "\x{B2E5}")); 209 210##### 211 212$Collator->change(hangul_terminator => 16); 213 214ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 215ok($Collator->gt("\x{1100}\x{1161}", "\x{326E}")); 216ok($Collator->lt("\x{1100}\x{1161}A", "\x{326E}A")); 217ok($Collator->lt("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); 218ok($Collator->lt("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); 219 220$Collator->change(hangul_terminator => 0); 221 222ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 223ok($Collator->eq("\x{1100}\x{1161}", "\x{326E}")); 224ok($Collator->eq("\x{1100}\x{1161}A", "\x{326E}A")); 225ok($Collator->eq("\x{1100}\x{1161}\x{3042}", "\x{326E}\x{3042}")); 226ok($Collator->eq("\x{1100}\x{1161}\x{4E00}", "\x{326E}\x{4E00}")); 227 2281; 229__END__ 230