1 2BEGIN { 3 if ($ENV{PERL_CORE}) { 4 chdir('t') if -d 't'; 5 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 6 } 7} 8 9use strict; 10use warnings; 11BEGIN { $| = 1; print "1..72\n"; } 12my $count = 0; 13sub ok ($;$) { 14 my $p = my $r = shift; 15 if (@_) { 16 my $x = shift; 17 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 18 } 19 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 20} 21 22use Unicode::Collate; 23 24ok(1); 25 26sub _pack_U { Unicode::Collate::pack_U(@_) } 27sub _unpack_U { Unicode::Collate::unpack_U(@_) } 28 29######################### 30 31# a standard collator (3.1.1) 32my $Collator = Unicode::Collate->new( 33 table => 'keys.txt', 34 normalization => undef, 35); 36 37 38# a collator for hangul sorting, 39# cf. http://std.dkuug.dk/JTC1/SC22/WG20/docs/documents.html 40# http://std.dkuug.dk/JTC1/SC22/WG20/docs/n1051-hangulsort.pdf 41my $hangul = Unicode::Collate->new( 42 level => 3, 43 table => undef, 44 normalization => undef, 45 46 entry => <<'ENTRIES', 470061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A 480041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A 49#1161 ; [.1800.0020.0002] # <comment> initial jungseong A 50#1163 ; [.1801.0020.0002] # <comment> initial jungseong YA 511100 ; [.1831.0020.0002] # choseong KIYEOK 521100 1161 ; [.1831.0020.0002][.1800.0020.0002] # G-A 531100 1163 ; [.1831.0020.0002][.1801.0020.0002] # G-YA 541101 ; [.1831.0020.0002][.1831.0020.0002] # choseong SSANGKIYEOK 551101 1161 ; [.1831.0020.0002][.1831.0020.0002][.1800.0020.0002] # GG-A 561101 1163 ; [.1831.0020.0002][.1831.0020.0002][.1801.0020.0002] # GG-YA 571102 ; [.1833.0020.0002] # choseong NIEUN 581102 1161 ; [.1833.0020.0002][.1800.0020.0002] # N-A 591102 1163 ; [.1833.0020.0002][.1801.0020.0002] # N-YA 603042 ; [.1921.0020.000E] # HIRAGANA LETTER A 6111A8 ; [.FE10.0020.0002] # jongseong KIYEOK 6211A9 ; [.FE10.0020.0002][.FE10.0020.0002] # jongseong SSANGKIYEOK 631161 ; [.FE20.0020.0002] # jungseong A <non-initial> 641163 ; [.FE21.0020.0002] # jungseong YA <non-initial> 65ENTRIES 66); 67 68ok(ref $hangul, "Unicode::Collate"); 69 70my $trailwt = Unicode::Collate->new( 71 level => 3, 72 table => undef, 73 normalization => undef, 74 hangul_terminator => 16, 75 76 entry => <<'ENTRIES', # Term < Jongseong < Jungseong < Choseong 770061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A 780041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A 7911A8 ; [.1801.0020.0002] # HANGUL JONGSEONG KIYEOK 8011A9 ; [.1801.0020.0002][.1801.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK 811161 ; [.1831.0020.0002] # HANGUL JUNGSEONG A 821163 ; [.1832.0020.0002] # HANGUL JUNGSEONG YA 831100 ; [.1861.0020.0002] # HANGUL CHOSEONG KIYEOK 841101 ; [.1861.0020.0002][.1861.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK 851102 ; [.1862.0020.0002] # HANGUL CHOSEONG NIEUN 863042 ; [.1921.0020.000E] # HIRAGANA LETTER A 87ENTRIES 88); 89 90######################### 91 92# L(simp)L(simp) vs L(comp): /GGA/ 93ok($Collator->lt("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); 94ok($hangul ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); 95ok($trailwt ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); 96 97# L(simp) vs L(simp)L(simp): /GA/ vs /GGA/ 98ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); 99ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); 100ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); 101 102# T(simp)T(simp) vs T(comp): /AGG/ 103ok($Collator->lt("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); 104ok($hangul ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); 105ok($trailwt ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); 106 107# T(simp) vs T(simp)T(simp): /AG/ vs /AGG/ 108ok($Collator->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); 109ok($hangul ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); 110ok($trailwt ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); 111 112# LV vs LLV: /GA/ vs /GNA/ 113ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); 114ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); 115ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); 116 117# LVX vs LVV: /GAA/ vs /GA/.latinA 118ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 119ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 120ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 121 122# LVX vs LVV: /GAA/ vs /GA/.hiraganaA 123ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 124ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 125ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 126 127# LVX vs LVV: /GAA/ vs /GA/.hanja 128ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 129ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 130ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 131 132# LVL vs LVT: /GA/./G/ vs /GAG/ 133ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 134ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 135ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 136 137# LVT vs LVX: /GAG/ vs /GA/.latinA 138ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 139ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 140ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 141 142# LVT vs LVX: /GAG/ vs /GA/.hiraganaA 143ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 144ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 145ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 146 147# LVT vs LVX: /GAG/ vs /GA/.hanja 148ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 149ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 150ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 151 152# LVT vs LVV: /GAG/ vs /GAA/ 153ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); 154ok($hangul ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); 155ok($trailwt ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); 156 157# LVL vs LVV: /GA/./G/ vs /GAA/ 158ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); 159ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); 160ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); 161 162# LV vs Syl(LV): /GA/ vs /[GA]/ 163ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}")); 164ok($hangul ->eq("\x{1100}\x{1161}", "\x{AC00}")); 165ok($trailwt ->eq("\x{1100}\x{1161}", "\x{AC00}")); 166 167# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/ 168ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 169ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 170ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 171 172# LVT vs Syl(LVT): /GAG/ vs /[GAG]/ 173ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 174ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 175ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 176 177# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ 178ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 179ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 180ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 181 182# LVTT vs Syl(LVT).T: /GAGG/ vs /[GAG]G/ 183ok($Collator->gt("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); 184ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); 185ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); 186 187# LLVT vs L.Syl(LVT): /GGAG/ vs /G[GAG]/ 188ok($Collator->gt("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); 189ok($hangul ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); 190ok($trailwt ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); 191 192######################### 193 194# checks contraction in LVT: 195# weights of these contractions may be non-sense. 196 197my $hangcont = Unicode::Collate->new( 198 level => 3, 199 table => undef, 200 normalization => undef, 201 entry => <<'ENTRIES', 2021100 ; [.1831.0020.0002] # HANGUL CHOSEONG KIYEOK 2031101 ; [.1832.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK 2041161 ; [.188D.0020.0002] # HANGUL JUNGSEONG A 2051162 ; [.188E.0020.0002] # HANGUL JUNGSEONG AE 2061163 ; [.188F.0020.0002] # HANGUL JUNGSEONG YA 20711A8 ; [.18CF.0020.0002] # HANGUL JONGSEONG KIYEOK 20811A9 ; [.18D0.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK 2091161 11A9 ; [.0000.0000.0000] # A-GG <contraction> 2101100 1163 11A8 ; [.1000.0020.0002] # G-YA-G <contraction> eq. U+AC39 211ENTRIES 212); 213 214# contracted into VT 215ok($Collator->lt("\x{1101}", "\x{1101}\x{1161}\x{11A9}")); 216ok($hangcont->eq("\x{1101}", "\x{1101}\x{1161}\x{11A9}")); 217 218# not contracted into LVT but into VT 219ok($Collator->lt("\x{1100}", "\x{1100}\x{1161}\x{11A9}")); 220ok($hangcont->eq("\x{1100}", "\x{1100}\x{1161}\x{11A9}")); 221 222# contracted into LVT 223ok($Collator->gt("\x{1100}\x{1163}\x{11A8}", "\x{1100}")); 224ok($hangcont->lt("\x{1100}\x{1163}\x{11A8}", "\x{1100}")); 225 226# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ 227ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 228ok($hangcont->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 229 230# LVT vs Syl(LVT): /GYAG/ vs /[GYAG]/ 231ok($Collator->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}")); 232ok($hangcont->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}")); 233 2341; 235__END__ 236