1BEGIN { 2 unless ("A" eq pack('U', 0x41)) { 3 print "1..0 # Unicode::Collate " . 4 "cannot stringify a Unicode code point\n"; 5 exit 0; 6 } 7 if ($ENV{PERL_CORE}) { 8 chdir('t') if -d 't'; 9 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 10 } 11} 12 13use Test; 14BEGIN { plan tests => 72 }; 15 16use strict; 17use warnings; 18use Unicode::Collate; 19 20######################### 21 22ok(1); 23 24# a standard collator (3.1.1) 25my $Collator = Unicode::Collate->new( 26 table => 'keys.txt', 27 normalization => undef, 28); 29 30 31# a collator for hangul sorting, 32# cf. http://std.dkuug.dk/JTC1/SC22/WG20/docs/documents.html 33# http://std.dkuug.dk/JTC1/SC22/WG20/docs/n1051-hangulsort.pdf 34my $hangul = Unicode::Collate->new( 35 level => 3, 36 table => undef, 37 normalization => undef, 38 39 entry => <<'ENTRIES', 400061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A 410041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A 42#1161 ; [.1800.0020.0002] # <comment> initial jungseong A 43#1163 ; [.1801.0020.0002] # <comment> initial jungseong YA 441100 ; [.1831.0020.0002] # choseong KIYEOK 451100 1161 ; [.1831.0020.0002][.1800.0020.0002] # G-A 461100 1163 ; [.1831.0020.0002][.1801.0020.0002] # G-YA 471101 ; [.1831.0020.0002][.1831.0020.0002] # choseong SSANGKIYEOK 481101 1161 ; [.1831.0020.0002][.1831.0020.0002][.1800.0020.0002] # GG-A 491101 1163 ; [.1831.0020.0002][.1831.0020.0002][.1801.0020.0002] # GG-YA 501102 ; [.1833.0020.0002] # choseong NIEUN 511102 1161 ; [.1833.0020.0002][.1800.0020.0002] # N-A 521102 1163 ; [.1833.0020.0002][.1801.0020.0002] # N-YA 533042 ; [.1921.0020.000E] # HIRAGANA LETTER A 5411A8 ; [.FE10.0020.0002] # jongseong KIYEOK 5511A9 ; [.FE10.0020.0002][.FE10.0020.0002] # jongseong SSANGKIYEOK 561161 ; [.FE20.0020.0002] # jungseong A <non-initial> 571163 ; [.FE21.0020.0002] # jungseong YA <non-initial> 58ENTRIES 59); 60 61ok(ref $hangul, "Unicode::Collate"); 62 63my $trailwt = Unicode::Collate->new( 64 level => 3, 65 table => undef, 66 normalization => undef, 67 hangul_terminator => 16, 68 69 entry => <<'ENTRIES', # Term < Jongseong < Jungseong < Choseong 700061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A 710041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A 7211A8 ; [.1801.0020.0002] # HANGUL JONGSEONG KIYEOK 7311A9 ; [.1801.0020.0002][.1801.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK 741161 ; [.1831.0020.0002] # HANGUL JUNGSEONG A 751163 ; [.1832.0020.0002] # HANGUL JUNGSEONG YA 761100 ; [.1861.0020.0002] # HANGUL CHOSEONG KIYEOK 771101 ; [.1861.0020.0002][.1861.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK 781102 ; [.1862.0020.0002] # HANGUL CHOSEONG NIEUN 793042 ; [.1921.0020.000E] # HIRAGANA LETTER A 80ENTRIES 81); 82 83######################### 84 85# L(simp)L(simp) vs L(comp): /GGA/ 86ok($Collator->lt("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); 87ok($hangul ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); 88ok($trailwt ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); 89 90# L(simp) vs L(simp)L(simp): /GA/ vs /GGA/ 91ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); 92ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); 93ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); 94 95# T(simp)T(simp) vs T(comp): /AGG/ 96ok($Collator->lt("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); 97ok($hangul ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); 98ok($trailwt ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); 99 100# T(simp) vs T(simp)T(simp): /AG/ vs /AGG/ 101ok($Collator->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); 102ok($hangul ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); 103ok($trailwt ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); 104 105# LV vs LLV: /GA/ vs /GNA/ 106ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); 107ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); 108ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); 109 110# LVX vs LVV: /GAA/ vs /GA/.latinA 111ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 112ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 113ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 114 115# LVX vs LVV: /GAA/ vs /GA/.hiraganaA 116ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 117ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 118ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 119 120# LVX vs LVV: /GAA/ vs /GA/.hanja 121ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 122ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 123ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 124 125# LVL vs LVT: /GA/./G/ vs /GAG/ 126ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 127ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 128ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 129 130# LVT vs LVX: /GAG/ vs /GA/.latinA 131ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 132ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 133ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 134 135# LVT vs LVX: /GAG/ vs /GA/.hiraganaA 136ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 137ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 138ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 139 140# LVT vs LVX: /GAG/ vs /GA/.hanja 141ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 142ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 143ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 144 145# LVT vs LVV: /GAG/ vs /GAA/ 146ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); 147ok($hangul ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); 148ok($trailwt ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); 149 150# LVL vs LVV: /GA/./G/ vs /GAA/ 151ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); 152ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); 153ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); 154 155# LV vs Syl(LV): /GA/ vs /[GA]/ 156ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}")); 157ok($hangul ->eq("\x{1100}\x{1161}", "\x{AC00}")); 158ok($trailwt ->eq("\x{1100}\x{1161}", "\x{AC00}")); 159 160# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/ 161ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 162ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 163ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 164 165# LVT vs Syl(LVT): /GAG/ vs /[GAG]/ 166ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 167ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 168ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 169 170# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ 171ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 172ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 173ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 174 175# LVTT vs Syl(LVT).T: /GAGG/ vs /[GAG]G/ 176ok($Collator->gt("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); 177ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); 178ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); 179 180# LLVT vs L.Syl(LVT): /GGAG/ vs /G[GAG]/ 181ok($Collator->gt("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); 182ok($hangul ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); 183ok($trailwt ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); 184 185######################### 186 187# checks contraction in LVT: 188# weights of these contractions may be non-sense. 189 190my $hangcont = Unicode::Collate->new( 191 level => 3, 192 table => undef, 193 normalization => undef, 194 entry => <<'ENTRIES', 1951100 ; [.1831.0020.0002] # HANGUL CHOSEONG KIYEOK 1961101 ; [.1832.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK 1971161 ; [.188D.0020.0002] # HANGUL JUNGSEONG A 1981162 ; [.188E.0020.0002] # HANGUL JUNGSEONG AE 1991163 ; [.188F.0020.0002] # HANGUL JUNGSEONG YA 20011A8 ; [.18CF.0020.0002] # HANGUL JONGSEONG KIYEOK 20111A9 ; [.18D0.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK 2021161 11A9 ; [.0000.0000.0000] # A-GG <contraction> 2031100 1163 11A8 ; [.1000.0020.0002] # G-YA-G <contraction> eq. U+AC39 204ENTRIES 205); 206 207# contracted into VT 208ok($Collator->lt("\x{1101}", "\x{1101}\x{1161}\x{11A9}")); 209ok($hangcont->eq("\x{1101}", "\x{1101}\x{1161}\x{11A9}")); 210 211# not contracted into LVT but into VT 212ok($Collator->lt("\x{1100}", "\x{1100}\x{1161}\x{11A9}")); 213ok($hangcont->eq("\x{1100}", "\x{1100}\x{1161}\x{11A9}")); 214 215# contracted into LVT 216ok($Collator->gt("\x{1100}\x{1163}\x{11A8}", "\x{1100}")); 217ok($hangcont->lt("\x{1100}\x{1163}\x{11A8}", "\x{1100}")); 218 219# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ 220ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 221ok($hangcont->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 222 223# LVT vs Syl(LVT): /GYAG/ vs /[GYAG]/ 224ok($Collator->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}")); 225ok($hangcont->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}")); 226 2271; 228__END__ 229