1898184e3Ssthen 2b39c5158SmillertBEGIN { 3b39c5158Smillert if ($ENV{PERL_CORE}) { 4b39c5158Smillert chdir('t') if -d 't'; 5b39c5158Smillert @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 6b39c5158Smillert } 7b39c5158Smillert} 8b39c5158Smillert 9b39c5158Smillertuse strict; 10b39c5158Smillertuse warnings; 11898184e3SsthenBEGIN { $| = 1; print "1..72\n"; } 12898184e3Ssthenmy $count = 0; 13898184e3Ssthensub ok ($;$) { 14898184e3Ssthen my $p = my $r = shift; 15898184e3Ssthen if (@_) { 16898184e3Ssthen my $x = shift; 17898184e3Ssthen $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 18898184e3Ssthen } 19898184e3Ssthen print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 20898184e3Ssthen} 21898184e3Ssthen 22b39c5158Smillertuse Unicode::Collate; 23b39c5158Smillert 24b39c5158Smillertok(1); 25b39c5158Smillert 26*256a93a4Safresh1sub _pack_U { Unicode::Collate::pack_U(@_) } 27*256a93a4Safresh1sub _unpack_U { Unicode::Collate::unpack_U(@_) } 28*256a93a4Safresh1 29898184e3Ssthen######################### 30898184e3Ssthen 31b39c5158Smillert# a standard collator (3.1.1) 32b39c5158Smillertmy $Collator = Unicode::Collate->new( 33b39c5158Smillert table => 'keys.txt', 34b39c5158Smillert normalization => undef, 35b39c5158Smillert); 36b39c5158Smillert 37b39c5158Smillert 38b39c5158Smillert# a collator for hangul sorting, 39b39c5158Smillert# cf. http://std.dkuug.dk/JTC1/SC22/WG20/docs/documents.html 40b39c5158Smillert# http://std.dkuug.dk/JTC1/SC22/WG20/docs/n1051-hangulsort.pdf 41b39c5158Smillertmy $hangul = Unicode::Collate->new( 42b39c5158Smillert level => 3, 43b39c5158Smillert table => undef, 44b39c5158Smillert normalization => undef, 45b39c5158Smillert 46b39c5158Smillert entry => <<'ENTRIES', 47b39c5158Smillert0061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A 48b39c5158Smillert0041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A 49b39c5158Smillert#1161 ; [.1800.0020.0002] # <comment> initial jungseong A 50b39c5158Smillert#1163 ; [.1801.0020.0002] # <comment> initial jungseong YA 51b39c5158Smillert1100 ; [.1831.0020.0002] # choseong KIYEOK 52b39c5158Smillert1100 1161 ; [.1831.0020.0002][.1800.0020.0002] # G-A 53b39c5158Smillert1100 1163 ; [.1831.0020.0002][.1801.0020.0002] # G-YA 54b39c5158Smillert1101 ; [.1831.0020.0002][.1831.0020.0002] # choseong SSANGKIYEOK 55b39c5158Smillert1101 1161 ; [.1831.0020.0002][.1831.0020.0002][.1800.0020.0002] # GG-A 56b39c5158Smillert1101 1163 ; [.1831.0020.0002][.1831.0020.0002][.1801.0020.0002] # GG-YA 57b39c5158Smillert1102 ; [.1833.0020.0002] # choseong NIEUN 58b39c5158Smillert1102 1161 ; [.1833.0020.0002][.1800.0020.0002] # N-A 59b39c5158Smillert1102 1163 ; [.1833.0020.0002][.1801.0020.0002] # N-YA 60b39c5158Smillert3042 ; [.1921.0020.000E] # HIRAGANA LETTER A 61b39c5158Smillert11A8 ; [.FE10.0020.0002] # jongseong KIYEOK 62b39c5158Smillert11A9 ; [.FE10.0020.0002][.FE10.0020.0002] # jongseong SSANGKIYEOK 63b39c5158Smillert1161 ; [.FE20.0020.0002] # jungseong A <non-initial> 64b39c5158Smillert1163 ; [.FE21.0020.0002] # jungseong YA <non-initial> 65b39c5158SmillertENTRIES 66b39c5158Smillert); 67b39c5158Smillert 68b39c5158Smillertok(ref $hangul, "Unicode::Collate"); 69b39c5158Smillert 70b39c5158Smillertmy $trailwt = Unicode::Collate->new( 71b39c5158Smillert level => 3, 72b39c5158Smillert table => undef, 73b39c5158Smillert normalization => undef, 74b39c5158Smillert hangul_terminator => 16, 75b39c5158Smillert 76b39c5158Smillert entry => <<'ENTRIES', # Term < Jongseong < Jungseong < Choseong 77b39c5158Smillert0061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A 78b39c5158Smillert0041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A 79b39c5158Smillert11A8 ; [.1801.0020.0002] # HANGUL JONGSEONG KIYEOK 80b39c5158Smillert11A9 ; [.1801.0020.0002][.1801.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK 81b39c5158Smillert1161 ; [.1831.0020.0002] # HANGUL JUNGSEONG A 82b39c5158Smillert1163 ; [.1832.0020.0002] # HANGUL JUNGSEONG YA 83b39c5158Smillert1100 ; [.1861.0020.0002] # HANGUL CHOSEONG KIYEOK 84b39c5158Smillert1101 ; [.1861.0020.0002][.1861.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK 85b39c5158Smillert1102 ; [.1862.0020.0002] # HANGUL CHOSEONG NIEUN 86b39c5158Smillert3042 ; [.1921.0020.000E] # HIRAGANA LETTER A 87b39c5158SmillertENTRIES 88b39c5158Smillert); 89b39c5158Smillert 90b39c5158Smillert######################### 91b39c5158Smillert 92b39c5158Smillert# L(simp)L(simp) vs L(comp): /GGA/ 93b39c5158Smillertok($Collator->lt("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); 94b39c5158Smillertok($hangul ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); 95b39c5158Smillertok($trailwt ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); 96b39c5158Smillert 97b39c5158Smillert# L(simp) vs L(simp)L(simp): /GA/ vs /GGA/ 98b39c5158Smillertok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); 99b39c5158Smillertok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); 100b39c5158Smillertok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); 101b39c5158Smillert 102b39c5158Smillert# T(simp)T(simp) vs T(comp): /AGG/ 103b39c5158Smillertok($Collator->lt("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); 104b39c5158Smillertok($hangul ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); 105b39c5158Smillertok($trailwt ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); 106b39c5158Smillert 107b39c5158Smillert# T(simp) vs T(simp)T(simp): /AG/ vs /AGG/ 108b39c5158Smillertok($Collator->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); 109b39c5158Smillertok($hangul ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); 110b39c5158Smillertok($trailwt ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); 111b39c5158Smillert 112b39c5158Smillert# LV vs LLV: /GA/ vs /GNA/ 113b39c5158Smillertok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); 114b39c5158Smillertok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); 115b39c5158Smillertok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); 116b39c5158Smillert 117b39c5158Smillert# LVX vs LVV: /GAA/ vs /GA/.latinA 118b39c5158Smillertok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 119b39c5158Smillertok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 120b39c5158Smillertok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 121b39c5158Smillert 122b39c5158Smillert# LVX vs LVV: /GAA/ vs /GA/.hiraganaA 123b39c5158Smillertok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 124b39c5158Smillertok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 125b39c5158Smillertok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 126b39c5158Smillert 127b39c5158Smillert# LVX vs LVV: /GAA/ vs /GA/.hanja 128b39c5158Smillertok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 129b39c5158Smillertok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 130b39c5158Smillertok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 131b39c5158Smillert 132b39c5158Smillert# LVL vs LVT: /GA/./G/ vs /GAG/ 133b39c5158Smillertok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 134b39c5158Smillertok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 135b39c5158Smillertok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 136b39c5158Smillert 137b39c5158Smillert# LVT vs LVX: /GAG/ vs /GA/.latinA 138b39c5158Smillertok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 139b39c5158Smillertok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 140b39c5158Smillertok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 141b39c5158Smillert 142b39c5158Smillert# LVT vs LVX: /GAG/ vs /GA/.hiraganaA 143b39c5158Smillertok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 144b39c5158Smillertok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 145b39c5158Smillertok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 146b39c5158Smillert 147b39c5158Smillert# LVT vs LVX: /GAG/ vs /GA/.hanja 148b39c5158Smillertok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 149b39c5158Smillertok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 150b39c5158Smillertok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 151b39c5158Smillert 152b39c5158Smillert# LVT vs LVV: /GAG/ vs /GAA/ 153b39c5158Smillertok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); 154b39c5158Smillertok($hangul ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); 155b39c5158Smillertok($trailwt ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); 156b39c5158Smillert 157b39c5158Smillert# LVL vs LVV: /GA/./G/ vs /GAA/ 158b39c5158Smillertok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); 159b39c5158Smillertok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); 160b39c5158Smillertok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); 161b39c5158Smillert 162b39c5158Smillert# LV vs Syl(LV): /GA/ vs /[GA]/ 163b39c5158Smillertok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}")); 164b39c5158Smillertok($hangul ->eq("\x{1100}\x{1161}", "\x{AC00}")); 165b39c5158Smillertok($trailwt ->eq("\x{1100}\x{1161}", "\x{AC00}")); 166b39c5158Smillert 167b39c5158Smillert# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/ 168b39c5158Smillertok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 169b39c5158Smillertok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 170b39c5158Smillertok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 171b39c5158Smillert 172b39c5158Smillert# LVT vs Syl(LVT): /GAG/ vs /[GAG]/ 173b39c5158Smillertok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 174b39c5158Smillertok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 175b39c5158Smillertok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 176b39c5158Smillert 177b39c5158Smillert# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ 178b39c5158Smillertok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 179b39c5158Smillertok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 180b39c5158Smillertok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 181b39c5158Smillert 182b39c5158Smillert# LVTT vs Syl(LVT).T: /GAGG/ vs /[GAG]G/ 183b39c5158Smillertok($Collator->gt("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); 184b39c5158Smillertok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); 185b39c5158Smillertok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); 186b39c5158Smillert 187b39c5158Smillert# LLVT vs L.Syl(LVT): /GGAG/ vs /G[GAG]/ 188b39c5158Smillertok($Collator->gt("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); 189b39c5158Smillertok($hangul ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); 190b39c5158Smillertok($trailwt ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); 191b39c5158Smillert 192b39c5158Smillert######################### 193b39c5158Smillert 194b39c5158Smillert# checks contraction in LVT: 195b39c5158Smillert# weights of these contractions may be non-sense. 196b39c5158Smillert 197b39c5158Smillertmy $hangcont = Unicode::Collate->new( 198b39c5158Smillert level => 3, 199b39c5158Smillert table => undef, 200b39c5158Smillert normalization => undef, 201b39c5158Smillert entry => <<'ENTRIES', 202b39c5158Smillert1100 ; [.1831.0020.0002] # HANGUL CHOSEONG KIYEOK 203b39c5158Smillert1101 ; [.1832.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK 204b39c5158Smillert1161 ; [.188D.0020.0002] # HANGUL JUNGSEONG A 205b39c5158Smillert1162 ; [.188E.0020.0002] # HANGUL JUNGSEONG AE 206b39c5158Smillert1163 ; [.188F.0020.0002] # HANGUL JUNGSEONG YA 207b39c5158Smillert11A8 ; [.18CF.0020.0002] # HANGUL JONGSEONG KIYEOK 208b39c5158Smillert11A9 ; [.18D0.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK 209b39c5158Smillert1161 11A9 ; [.0000.0000.0000] # A-GG <contraction> 210b39c5158Smillert1100 1163 11A8 ; [.1000.0020.0002] # G-YA-G <contraction> eq. U+AC39 211b39c5158SmillertENTRIES 212b39c5158Smillert); 213b39c5158Smillert 214b39c5158Smillert# contracted into VT 215b39c5158Smillertok($Collator->lt("\x{1101}", "\x{1101}\x{1161}\x{11A9}")); 216b39c5158Smillertok($hangcont->eq("\x{1101}", "\x{1101}\x{1161}\x{11A9}")); 217b39c5158Smillert 218b39c5158Smillert# not contracted into LVT but into VT 219b39c5158Smillertok($Collator->lt("\x{1100}", "\x{1100}\x{1161}\x{11A9}")); 220b39c5158Smillertok($hangcont->eq("\x{1100}", "\x{1100}\x{1161}\x{11A9}")); 221b39c5158Smillert 222b39c5158Smillert# contracted into LVT 223b39c5158Smillertok($Collator->gt("\x{1100}\x{1163}\x{11A8}", "\x{1100}")); 224b39c5158Smillertok($hangcont->lt("\x{1100}\x{1163}\x{11A8}", "\x{1100}")); 225b39c5158Smillert 226b39c5158Smillert# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ 227b39c5158Smillertok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 228b39c5158Smillertok($hangcont->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 229b39c5158Smillert 230b39c5158Smillert# LVT vs Syl(LVT): /GYAG/ vs /[GYAG]/ 231b39c5158Smillertok($Collator->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}")); 232b39c5158Smillertok($hangcont->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}")); 233b39c5158Smillert 234b39c5158Smillert1; 235b39c5158Smillert__END__ 236