1BEGIN {
2    unless ("A" eq pack('U', 0x41)) {
3	print "1..0 # Unicode::Collate " .
4	    "cannot stringify a Unicode code point\n";
5	exit 0;
6    }
7}
8
9BEGIN {
10    if ($ENV{PERL_CORE}) {
11        chdir('t') if -d 't';
12        @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
13    }
14}
15
16use Test;
17BEGIN { plan tests => 72 };
18
19use strict;
20use warnings;
21use Unicode::Collate;
22
23use vars qw($IsEBCDIC);
24$IsEBCDIC = ord("A") != 0x41;
25
26#########################
27
28ok(1);
29
30# a standard collator (3.1.1)
31my $Collator = Unicode::Collate->new(
32  table => 'keys.txt',
33  normalization => undef,
34);
35
36
37# a collator for hangul sorting,
38# cf. http://std.dkuug.dk/JTC1/SC22/WG20/docs/documents.html
39#     http://std.dkuug.dk/JTC1/SC22/WG20/docs/n1051-hangulsort.pdf
40my $hangul = Unicode::Collate->new(
41  level => 3,
42  table => undef,
43  normalization => undef,
44
45  entry => <<'ENTRIES',
460061      ; [.0A15.0020.0002] # LATIN SMALL LETTER A
470041      ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A
48#1161     ; [.1800.0020.0002] # <comment> initial jungseong A
49#1163     ; [.1801.0020.0002] # <comment> initial jungseong YA
501100      ; [.1831.0020.0002] # choseong KIYEOK
511100 1161 ; [.1831.0020.0002][.1800.0020.0002] # G-A
521100 1163 ; [.1831.0020.0002][.1801.0020.0002] # G-YA
531101      ; [.1831.0020.0002][.1831.0020.0002] # choseong SSANGKIYEOK
541101 1161 ; [.1831.0020.0002][.1831.0020.0002][.1800.0020.0002] # GG-A
551101 1163 ; [.1831.0020.0002][.1831.0020.0002][.1801.0020.0002] # GG-YA
561102      ; [.1833.0020.0002] # choseong NIEUN
571102 1161 ; [.1833.0020.0002][.1800.0020.0002] # N-A
581102 1163 ; [.1833.0020.0002][.1801.0020.0002] # N-YA
593042      ; [.1921.0020.000E] # HIRAGANA LETTER A
6011A8      ; [.FE10.0020.0002] # jongseong KIYEOK
6111A9      ; [.FE10.0020.0002][.FE10.0020.0002] # jongseong SSANGKIYEOK
621161      ; [.FE20.0020.0002] # jungseong A <non-initial>
631163      ; [.FE21.0020.0002] # jungseong YA <non-initial>
64ENTRIES
65);
66
67ok(ref $hangul, "Unicode::Collate");
68
69my $trailwt = Unicode::Collate->new(
70  level => 3,
71  table => undef,
72  normalization => undef,
73  hangul_terminator => 16,
74
75  entry => <<'ENTRIES', # Term < Jongseong < Jungseong < Choseong
760061  ; [.0A15.0020.0002] # LATIN SMALL LETTER A
770041  ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A
7811A8  ; [.1801.0020.0002] # HANGUL JONGSEONG KIYEOK
7911A9  ; [.1801.0020.0002][.1801.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK
801161  ; [.1831.0020.0002] # HANGUL JUNGSEONG A
811163  ; [.1832.0020.0002] # HANGUL JUNGSEONG YA
821100  ; [.1861.0020.0002] # HANGUL CHOSEONG KIYEOK
831101  ; [.1861.0020.0002][.1861.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK
841102  ; [.1862.0020.0002] # HANGUL CHOSEONG NIEUN
853042  ; [.1921.0020.000E] # HIRAGANA LETTER A
86ENTRIES
87);
88
89#########################
90
91# L(simp)L(simp) vs L(comp): /GGA/
92ok($Collator->lt("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
93ok($hangul  ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
94ok($trailwt ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
95
96# L(simp) vs L(simp)L(simp): /GA/ vs /GGA/
97ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
98ok($hangul  ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
99ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
100
101# T(simp)T(simp) vs T(comp): /AGG/
102ok($Collator->lt("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
103ok($hangul  ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
104ok($trailwt ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
105
106# T(simp) vs T(simp)T(simp): /AG/ vs /AGG/
107ok($Collator->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
108ok($hangul  ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
109ok($trailwt ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
110
111# LV vs LLV: /GA/ vs /GNA/
112ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
113ok($hangul  ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
114ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
115
116# LVX vs LVV: /GAA/ vs /GA/.latinA
117ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
118ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
119ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
120
121# LVX vs LVV: /GAA/ vs /GA/.hiraganaA
122ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
123ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
124ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
125
126# LVX vs LVV: /GAA/ vs /GA/.hanja
127ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
128ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
129ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
130
131# LVL vs LVT: /GA/./G/ vs /GAG/
132ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
133ok($hangul  ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
134ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
135
136# LVT vs LVX: /GAG/ vs /GA/.latinA
137ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
138ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
139ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
140
141# LVT vs LVX: /GAG/ vs /GA/.hiraganaA
142ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
143ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
144ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
145
146# LVT vs LVX: /GAG/ vs /GA/.hanja
147ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
148ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
149ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
150
151# LVT vs LVV: /GAG/ vs /GAA/
152ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
153ok($hangul  ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
154ok($trailwt ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
155
156# LVL vs LVV: /GA/./G/ vs /GAA/
157ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
158ok($hangul  ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
159ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
160
161# LV vs Syl(LV): /GA/ vs /[GA]/
162ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}"));
163ok($hangul  ->eq("\x{1100}\x{1161}", "\x{AC00}"));
164ok($trailwt ->eq("\x{1100}\x{1161}", "\x{AC00}"));
165
166# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/
167ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
168ok($hangul  ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
169ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
170
171# LVT vs Syl(LVT): /GAG/ vs /[GAG]/
172ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
173ok($hangul  ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
174ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
175
176# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
177ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
178ok($hangul  ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
179ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
180
181# LVTT vs Syl(LVT).T: /GAGG/ vs /[GAG]G/
182ok($Collator->gt("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
183ok($hangul  ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
184ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
185
186# LLVT vs L.Syl(LVT): /GGAG/ vs /G[GAG]/
187ok($Collator->gt("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
188ok($hangul  ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
189ok($trailwt ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
190
191#########################
192
193# checks contraction in LVT:
194# weights of these contractions may be non-sense.
195
196my $hangcont = Unicode::Collate->new(
197  level => 3,
198  table => undef,
199  normalization => undef,
200  entry => <<'ENTRIES',
2011100  ; [.1831.0020.0002] # HANGUL CHOSEONG KIYEOK
2021101  ; [.1832.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK
2031161  ; [.188D.0020.0002] # HANGUL JUNGSEONG A
2041162  ; [.188E.0020.0002] # HANGUL JUNGSEONG AE
2051163  ; [.188F.0020.0002] # HANGUL JUNGSEONG YA
20611A8  ; [.18CF.0020.0002] # HANGUL JONGSEONG KIYEOK
20711A9  ; [.18D0.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK
2081161 11A9 ; [.0000.0000.0000] # A-GG <contraction>
2091100 1163 11A8 ; [.1000.0020.0002] # G-YA-G <contraction> eq. U+AC39
210ENTRIES
211);
212
213# contracted into VT
214ok($Collator->lt("\x{1101}", "\x{1101}\x{1161}\x{11A9}"));
215ok($hangcont->eq("\x{1101}", "\x{1101}\x{1161}\x{11A9}"));
216
217# not contracted into LVT but into VT
218ok($Collator->lt("\x{1100}", "\x{1100}\x{1161}\x{11A9}"));
219ok($hangcont->eq("\x{1100}", "\x{1100}\x{1161}\x{11A9}"));
220
221# contracted into LVT
222ok($Collator->gt("\x{1100}\x{1163}\x{11A8}", "\x{1100}"));
223ok($hangcont->lt("\x{1100}\x{1163}\x{11A8}", "\x{1100}"));
224
225# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
226ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
227ok($hangcont->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
228
229# LVT vs Syl(LVT): /GYAG/ vs /[GYAG]/
230ok($Collator->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}"));
231ok($hangcont->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}"));
232
2331;
234__END__
235