1 2BEGIN { 3 unless ("A" eq pack('U', 0x41)) { 4 print "1..0 # Unicode::Collate " . 5 "cannot stringify a Unicode code point\n"; 6 exit 0; 7 } 8 if ($ENV{PERL_CORE}) { 9 chdir('t') if -d 't'; 10 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 11 } 12} 13 14use strict; 15use warnings; 16BEGIN { $| = 1; print "1..72\n"; } 17my $count = 0; 18sub ok ($;$) { 19 my $p = my $r = shift; 20 if (@_) { 21 my $x = shift; 22 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 23 } 24 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 25} 26 27use Unicode::Collate; 28 29ok(1); 30 31######################### 32 33# a standard collator (3.1.1) 34my $Collator = Unicode::Collate->new( 35 table => 'keys.txt', 36 normalization => undef, 37); 38 39 40# a collator for hangul sorting, 41# cf. http://std.dkuug.dk/JTC1/SC22/WG20/docs/documents.html 42# http://std.dkuug.dk/JTC1/SC22/WG20/docs/n1051-hangulsort.pdf 43my $hangul = Unicode::Collate->new( 44 level => 3, 45 table => undef, 46 normalization => undef, 47 48 entry => <<'ENTRIES', 490061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A 500041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A 51#1161 ; [.1800.0020.0002] # <comment> initial jungseong A 52#1163 ; [.1801.0020.0002] # <comment> initial jungseong YA 531100 ; [.1831.0020.0002] # choseong KIYEOK 541100 1161 ; [.1831.0020.0002][.1800.0020.0002] # G-A 551100 1163 ; [.1831.0020.0002][.1801.0020.0002] # G-YA 561101 ; [.1831.0020.0002][.1831.0020.0002] # choseong SSANGKIYEOK 571101 1161 ; [.1831.0020.0002][.1831.0020.0002][.1800.0020.0002] # GG-A 581101 1163 ; [.1831.0020.0002][.1831.0020.0002][.1801.0020.0002] # GG-YA 591102 ; [.1833.0020.0002] # choseong NIEUN 601102 1161 ; [.1833.0020.0002][.1800.0020.0002] # N-A 611102 1163 ; [.1833.0020.0002][.1801.0020.0002] # N-YA 623042 ; [.1921.0020.000E] # HIRAGANA LETTER A 6311A8 ; [.FE10.0020.0002] # jongseong KIYEOK 6411A9 ; [.FE10.0020.0002][.FE10.0020.0002] # jongseong SSANGKIYEOK 651161 ; [.FE20.0020.0002] # jungseong A <non-initial> 661163 ; [.FE21.0020.0002] # jungseong YA <non-initial> 67ENTRIES 68); 69 70ok(ref $hangul, "Unicode::Collate"); 71 72my $trailwt = Unicode::Collate->new( 73 level => 3, 74 table => undef, 75 normalization => undef, 76 hangul_terminator => 16, 77 78 entry => <<'ENTRIES', # Term < Jongseong < Jungseong < Choseong 790061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A 800041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A 8111A8 ; [.1801.0020.0002] # HANGUL JONGSEONG KIYEOK 8211A9 ; [.1801.0020.0002][.1801.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK 831161 ; [.1831.0020.0002] # HANGUL JUNGSEONG A 841163 ; [.1832.0020.0002] # HANGUL JUNGSEONG YA 851100 ; [.1861.0020.0002] # HANGUL CHOSEONG KIYEOK 861101 ; [.1861.0020.0002][.1861.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK 871102 ; [.1862.0020.0002] # HANGUL CHOSEONG NIEUN 883042 ; [.1921.0020.000E] # HIRAGANA LETTER A 89ENTRIES 90); 91 92######################### 93 94# L(simp)L(simp) vs L(comp): /GGA/ 95ok($Collator->lt("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); 96ok($hangul ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); 97ok($trailwt ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); 98 99# L(simp) vs L(simp)L(simp): /GA/ vs /GGA/ 100ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); 101ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); 102ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); 103 104# T(simp)T(simp) vs T(comp): /AGG/ 105ok($Collator->lt("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); 106ok($hangul ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); 107ok($trailwt ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); 108 109# T(simp) vs T(simp)T(simp): /AG/ vs /AGG/ 110ok($Collator->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); 111ok($hangul ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); 112ok($trailwt ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); 113 114# LV vs LLV: /GA/ vs /GNA/ 115ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); 116ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); 117ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); 118 119# LVX vs LVV: /GAA/ vs /GA/.latinA 120ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 121ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 122ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 123 124# LVX vs LVV: /GAA/ vs /GA/.hiraganaA 125ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 126ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 127ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 128 129# LVX vs LVV: /GAA/ vs /GA/.hanja 130ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 131ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 132ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 133 134# LVL vs LVT: /GA/./G/ vs /GAG/ 135ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 136ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 137ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 138 139# LVT vs LVX: /GAG/ vs /GA/.latinA 140ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 141ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 142ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 143 144# LVT vs LVX: /GAG/ vs /GA/.hiraganaA 145ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 146ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 147ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 148 149# LVT vs LVX: /GAG/ vs /GA/.hanja 150ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 151ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 152ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 153 154# LVT vs LVV: /GAG/ vs /GAA/ 155ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); 156ok($hangul ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); 157ok($trailwt ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); 158 159# LVL vs LVV: /GA/./G/ vs /GAA/ 160ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); 161ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); 162ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); 163 164# LV vs Syl(LV): /GA/ vs /[GA]/ 165ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}")); 166ok($hangul ->eq("\x{1100}\x{1161}", "\x{AC00}")); 167ok($trailwt ->eq("\x{1100}\x{1161}", "\x{AC00}")); 168 169# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/ 170ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 171ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 172ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 173 174# LVT vs Syl(LVT): /GAG/ vs /[GAG]/ 175ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 176ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 177ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 178 179# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ 180ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 181ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 182ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 183 184# LVTT vs Syl(LVT).T: /GAGG/ vs /[GAG]G/ 185ok($Collator->gt("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); 186ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); 187ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); 188 189# LLVT vs L.Syl(LVT): /GGAG/ vs /G[GAG]/ 190ok($Collator->gt("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); 191ok($hangul ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); 192ok($trailwt ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); 193 194######################### 195 196# checks contraction in LVT: 197# weights of these contractions may be non-sense. 198 199my $hangcont = Unicode::Collate->new( 200 level => 3, 201 table => undef, 202 normalization => undef, 203 entry => <<'ENTRIES', 2041100 ; [.1831.0020.0002] # HANGUL CHOSEONG KIYEOK 2051101 ; [.1832.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK 2061161 ; [.188D.0020.0002] # HANGUL JUNGSEONG A 2071162 ; [.188E.0020.0002] # HANGUL JUNGSEONG AE 2081163 ; [.188F.0020.0002] # HANGUL JUNGSEONG YA 20911A8 ; [.18CF.0020.0002] # HANGUL JONGSEONG KIYEOK 21011A9 ; [.18D0.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK 2111161 11A9 ; [.0000.0000.0000] # A-GG <contraction> 2121100 1163 11A8 ; [.1000.0020.0002] # G-YA-G <contraction> eq. U+AC39 213ENTRIES 214); 215 216# contracted into VT 217ok($Collator->lt("\x{1101}", "\x{1101}\x{1161}\x{11A9}")); 218ok($hangcont->eq("\x{1101}", "\x{1101}\x{1161}\x{11A9}")); 219 220# not contracted into LVT but into VT 221ok($Collator->lt("\x{1100}", "\x{1100}\x{1161}\x{11A9}")); 222ok($hangcont->eq("\x{1100}", "\x{1100}\x{1161}\x{11A9}")); 223 224# contracted into LVT 225ok($Collator->gt("\x{1100}\x{1163}\x{11A8}", "\x{1100}")); 226ok($hangcont->lt("\x{1100}\x{1163}\x{11A8}", "\x{1100}")); 227 228# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ 229ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 230ok($hangcont->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 231 232# LVT vs Syl(LVT): /GYAG/ vs /[GYAG]/ 233ok($Collator->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}")); 234ok($hangcont->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}")); 235 2361; 237__END__ 238