1BEGIN { 2 unless ("A" eq pack('U', 0x41)) { 3 print "1..0 # Unicode::Collate " . 4 "cannot stringify a Unicode code point\n"; 5 exit 0; 6 } 7} 8 9BEGIN { 10 if ($ENV{PERL_CORE}) { 11 chdir('t') if -d 't'; 12 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 13 } 14} 15 16use Test; 17BEGIN { plan tests => 72 }; 18 19use strict; 20use warnings; 21use Unicode::Collate; 22 23use vars qw($IsEBCDIC); 24$IsEBCDIC = ord("A") != 0x41; 25 26######################### 27 28ok(1); 29 30# a standard collator (3.1.1) 31my $Collator = Unicode::Collate->new( 32 table => 'keys.txt', 33 normalization => undef, 34); 35 36 37# a collator for hangul sorting, 38# cf. http://std.dkuug.dk/JTC1/SC22/WG20/docs/documents.html 39# http://std.dkuug.dk/JTC1/SC22/WG20/docs/n1051-hangulsort.pdf 40my $hangul = Unicode::Collate->new( 41 level => 3, 42 table => undef, 43 normalization => undef, 44 45 entry => <<'ENTRIES', 460061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A 470041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A 48#1161 ; [.1800.0020.0002] # <comment> initial jungseong A 49#1163 ; [.1801.0020.0002] # <comment> initial jungseong YA 501100 ; [.1831.0020.0002] # choseong KIYEOK 511100 1161 ; [.1831.0020.0002][.1800.0020.0002] # G-A 521100 1163 ; [.1831.0020.0002][.1801.0020.0002] # G-YA 531101 ; [.1831.0020.0002][.1831.0020.0002] # choseong SSANGKIYEOK 541101 1161 ; [.1831.0020.0002][.1831.0020.0002][.1800.0020.0002] # GG-A 551101 1163 ; [.1831.0020.0002][.1831.0020.0002][.1801.0020.0002] # GG-YA 561102 ; [.1833.0020.0002] # choseong NIEUN 571102 1161 ; [.1833.0020.0002][.1800.0020.0002] # N-A 581102 1163 ; [.1833.0020.0002][.1801.0020.0002] # N-YA 593042 ; [.1921.0020.000E] # HIRAGANA LETTER A 6011A8 ; [.FE10.0020.0002] # jongseong KIYEOK 6111A9 ; [.FE10.0020.0002][.FE10.0020.0002] # jongseong SSANGKIYEOK 621161 ; [.FE20.0020.0002] # jungseong A <non-initial> 631163 ; [.FE21.0020.0002] # jungseong YA <non-initial> 64ENTRIES 65); 66 67ok(ref $hangul, "Unicode::Collate"); 68 69my $trailwt = Unicode::Collate->new( 70 level => 3, 71 table => undef, 72 normalization => undef, 73 hangul_terminator => 16, 74 75 entry => <<'ENTRIES', # Term < Jongseong < Jungseong < Choseong 760061 ; [.0A15.0020.0002] # LATIN SMALL LETTER A 770041 ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A 7811A8 ; [.1801.0020.0002] # HANGUL JONGSEONG KIYEOK 7911A9 ; [.1801.0020.0002][.1801.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK 801161 ; [.1831.0020.0002] # HANGUL JUNGSEONG A 811163 ; [.1832.0020.0002] # HANGUL JUNGSEONG YA 821100 ; [.1861.0020.0002] # HANGUL CHOSEONG KIYEOK 831101 ; [.1861.0020.0002][.1861.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK 841102 ; [.1862.0020.0002] # HANGUL CHOSEONG NIEUN 853042 ; [.1921.0020.000E] # HIRAGANA LETTER A 86ENTRIES 87); 88 89######################### 90 91# L(simp)L(simp) vs L(comp): /GGA/ 92ok($Collator->lt("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); 93ok($hangul ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); 94ok($trailwt ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}")); 95 96# L(simp) vs L(simp)L(simp): /GA/ vs /GGA/ 97ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); 98ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); 99ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}")); 100 101# T(simp)T(simp) vs T(comp): /AGG/ 102ok($Collator->lt("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); 103ok($hangul ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); 104ok($trailwt ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}")); 105 106# T(simp) vs T(simp)T(simp): /AG/ vs /AGG/ 107ok($Collator->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); 108ok($hangul ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); 109ok($trailwt ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}")); 110 111# LV vs LLV: /GA/ vs /GNA/ 112ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); 113ok($hangul ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); 114ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}")); 115 116# LVX vs LVV: /GAA/ vs /GA/.latinA 117ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 118ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 119ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A")); 120 121# LVX vs LVV: /GAA/ vs /GA/.hiraganaA 122ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 123ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 124ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}")); 125 126# LVX vs LVV: /GAA/ vs /GA/.hanja 127ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 128ok($hangul ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 129ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}")); 130 131# LVL vs LVT: /GA/./G/ vs /GAG/ 132ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 133ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 134ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}")); 135 136# LVT vs LVX: /GAG/ vs /GA/.latinA 137ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 138ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 139ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A")); 140 141# LVT vs LVX: /GAG/ vs /GA/.hiraganaA 142ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 143ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 144ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}")); 145 146# LVT vs LVX: /GAG/ vs /GA/.hanja 147ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 148ok($hangul ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 149ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}")); 150 151# LVT vs LVV: /GAG/ vs /GAA/ 152ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); 153ok($hangul ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); 154ok($trailwt ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}")); 155 156# LVL vs LVV: /GA/./G/ vs /GAA/ 157ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); 158ok($hangul ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); 159ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}")); 160 161# LV vs Syl(LV): /GA/ vs /[GA]/ 162ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}")); 163ok($hangul ->eq("\x{1100}\x{1161}", "\x{AC00}")); 164ok($trailwt ->eq("\x{1100}\x{1161}", "\x{AC00}")); 165 166# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/ 167ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 168ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 169ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}")); 170 171# LVT vs Syl(LVT): /GAG/ vs /[GAG]/ 172ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 173ok($hangul ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 174ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}")); 175 176# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ 177ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 178ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 179ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 180 181# LVTT vs Syl(LVT).T: /GAGG/ vs /[GAG]G/ 182ok($Collator->gt("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); 183ok($hangul ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); 184ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}")); 185 186# LLVT vs L.Syl(LVT): /GGAG/ vs /G[GAG]/ 187ok($Collator->gt("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); 188ok($hangul ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); 189ok($trailwt ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}")); 190 191######################### 192 193# checks contraction in LVT: 194# weights of these contractions may be non-sense. 195 196my $hangcont = Unicode::Collate->new( 197 level => 3, 198 table => undef, 199 normalization => undef, 200 entry => <<'ENTRIES', 2011100 ; [.1831.0020.0002] # HANGUL CHOSEONG KIYEOK 2021101 ; [.1832.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK 2031161 ; [.188D.0020.0002] # HANGUL JUNGSEONG A 2041162 ; [.188E.0020.0002] # HANGUL JUNGSEONG AE 2051163 ; [.188F.0020.0002] # HANGUL JUNGSEONG YA 20611A8 ; [.18CF.0020.0002] # HANGUL JONGSEONG KIYEOK 20711A9 ; [.18D0.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK 2081161 11A9 ; [.0000.0000.0000] # A-GG <contraction> 2091100 1163 11A8 ; [.1000.0020.0002] # G-YA-G <contraction> eq. U+AC39 210ENTRIES 211); 212 213# contracted into VT 214ok($Collator->lt("\x{1101}", "\x{1101}\x{1161}\x{11A9}")); 215ok($hangcont->eq("\x{1101}", "\x{1101}\x{1161}\x{11A9}")); 216 217# not contracted into LVT but into VT 218ok($Collator->lt("\x{1100}", "\x{1100}\x{1161}\x{11A9}")); 219ok($hangcont->eq("\x{1100}", "\x{1100}\x{1161}\x{11A9}")); 220 221# contracted into LVT 222ok($Collator->gt("\x{1100}\x{1163}\x{11A8}", "\x{1100}")); 223ok($hangcont->lt("\x{1100}\x{1163}\x{11A8}", "\x{1100}")); 224 225# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/ 226ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 227ok($hangcont->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}")); 228 229# LVT vs Syl(LVT): /GYAG/ vs /[GYAG]/ 230ok($Collator->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}")); 231ok($hangcont->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}")); 232 2331; 234__END__ 235