xref: /openbsd-src/gnu/usr.bin/perl/cpan/Unicode-Collate/t/hangul.t (revision 256a93a44f36679bee503f12e49566c2183f6181)
1
2BEGIN {
3    if ($ENV{PERL_CORE}) {
4	chdir('t') if -d 't';
5	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
6    }
7}
8
9use strict;
10use warnings;
11BEGIN { $| = 1; print "1..72\n"; }
12my $count = 0;
13sub ok ($;$) {
14    my $p = my $r = shift;
15    if (@_) {
16	my $x = shift;
17	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
18    }
19    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
20}
21
22use Unicode::Collate;
23
24ok(1);
25
26sub _pack_U   { Unicode::Collate::pack_U(@_) }
27sub _unpack_U { Unicode::Collate::unpack_U(@_) }
28
29#########################
30
31# a standard collator (3.1.1)
32my $Collator = Unicode::Collate->new(
33  table => 'keys.txt',
34  normalization => undef,
35);
36
37
38# a collator for hangul sorting,
39# cf. http://std.dkuug.dk/JTC1/SC22/WG20/docs/documents.html
40#     http://std.dkuug.dk/JTC1/SC22/WG20/docs/n1051-hangulsort.pdf
41my $hangul = Unicode::Collate->new(
42  level => 3,
43  table => undef,
44  normalization => undef,
45
46  entry => <<'ENTRIES',
470061      ; [.0A15.0020.0002] # LATIN SMALL LETTER A
480041      ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A
49#1161     ; [.1800.0020.0002] # <comment> initial jungseong A
50#1163     ; [.1801.0020.0002] # <comment> initial jungseong YA
511100      ; [.1831.0020.0002] # choseong KIYEOK
521100 1161 ; [.1831.0020.0002][.1800.0020.0002] # G-A
531100 1163 ; [.1831.0020.0002][.1801.0020.0002] # G-YA
541101      ; [.1831.0020.0002][.1831.0020.0002] # choseong SSANGKIYEOK
551101 1161 ; [.1831.0020.0002][.1831.0020.0002][.1800.0020.0002] # GG-A
561101 1163 ; [.1831.0020.0002][.1831.0020.0002][.1801.0020.0002] # GG-YA
571102      ; [.1833.0020.0002] # choseong NIEUN
581102 1161 ; [.1833.0020.0002][.1800.0020.0002] # N-A
591102 1163 ; [.1833.0020.0002][.1801.0020.0002] # N-YA
603042      ; [.1921.0020.000E] # HIRAGANA LETTER A
6111A8      ; [.FE10.0020.0002] # jongseong KIYEOK
6211A9      ; [.FE10.0020.0002][.FE10.0020.0002] # jongseong SSANGKIYEOK
631161      ; [.FE20.0020.0002] # jungseong A <non-initial>
641163      ; [.FE21.0020.0002] # jungseong YA <non-initial>
65ENTRIES
66);
67
68ok(ref $hangul, "Unicode::Collate");
69
70my $trailwt = Unicode::Collate->new(
71  level => 3,
72  table => undef,
73  normalization => undef,
74  hangul_terminator => 16,
75
76  entry => <<'ENTRIES', # Term < Jongseong < Jungseong < Choseong
770061  ; [.0A15.0020.0002] # LATIN SMALL LETTER A
780041  ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A
7911A8  ; [.1801.0020.0002] # HANGUL JONGSEONG KIYEOK
8011A9  ; [.1801.0020.0002][.1801.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK
811161  ; [.1831.0020.0002] # HANGUL JUNGSEONG A
821163  ; [.1832.0020.0002] # HANGUL JUNGSEONG YA
831100  ; [.1861.0020.0002] # HANGUL CHOSEONG KIYEOK
841101  ; [.1861.0020.0002][.1861.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK
851102  ; [.1862.0020.0002] # HANGUL CHOSEONG NIEUN
863042  ; [.1921.0020.000E] # HIRAGANA LETTER A
87ENTRIES
88);
89
90#########################
91
92# L(simp)L(simp) vs L(comp): /GGA/
93ok($Collator->lt("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
94ok($hangul  ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
95ok($trailwt ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
96
97# L(simp) vs L(simp)L(simp): /GA/ vs /GGA/
98ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
99ok($hangul  ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
100ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
101
102# T(simp)T(simp) vs T(comp): /AGG/
103ok($Collator->lt("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
104ok($hangul  ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
105ok($trailwt ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
106
107# T(simp) vs T(simp)T(simp): /AG/ vs /AGG/
108ok($Collator->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
109ok($hangul  ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
110ok($trailwt ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
111
112# LV vs LLV: /GA/ vs /GNA/
113ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
114ok($hangul  ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
115ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
116
117# LVX vs LVV: /GAA/ vs /GA/.latinA
118ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
119ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
120ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
121
122# LVX vs LVV: /GAA/ vs /GA/.hiraganaA
123ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
124ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
125ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
126
127# LVX vs LVV: /GAA/ vs /GA/.hanja
128ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
129ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
130ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
131
132# LVL vs LVT: /GA/./G/ vs /GAG/
133ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
134ok($hangul  ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
135ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
136
137# LVT vs LVX: /GAG/ vs /GA/.latinA
138ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
139ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
140ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
141
142# LVT vs LVX: /GAG/ vs /GA/.hiraganaA
143ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
144ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
145ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
146
147# LVT vs LVX: /GAG/ vs /GA/.hanja
148ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
149ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
150ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
151
152# LVT vs LVV: /GAG/ vs /GAA/
153ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
154ok($hangul  ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
155ok($trailwt ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
156
157# LVL vs LVV: /GA/./G/ vs /GAA/
158ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
159ok($hangul  ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
160ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
161
162# LV vs Syl(LV): /GA/ vs /[GA]/
163ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}"));
164ok($hangul  ->eq("\x{1100}\x{1161}", "\x{AC00}"));
165ok($trailwt ->eq("\x{1100}\x{1161}", "\x{AC00}"));
166
167# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/
168ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
169ok($hangul  ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
170ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
171
172# LVT vs Syl(LVT): /GAG/ vs /[GAG]/
173ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
174ok($hangul  ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
175ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
176
177# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
178ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
179ok($hangul  ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
180ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
181
182# LVTT vs Syl(LVT).T: /GAGG/ vs /[GAG]G/
183ok($Collator->gt("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
184ok($hangul  ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
185ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
186
187# LLVT vs L.Syl(LVT): /GGAG/ vs /G[GAG]/
188ok($Collator->gt("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
189ok($hangul  ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
190ok($trailwt ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
191
192#########################
193
194# checks contraction in LVT:
195# weights of these contractions may be non-sense.
196
197my $hangcont = Unicode::Collate->new(
198  level => 3,
199  table => undef,
200  normalization => undef,
201  entry => <<'ENTRIES',
2021100  ; [.1831.0020.0002] # HANGUL CHOSEONG KIYEOK
2031101  ; [.1832.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK
2041161  ; [.188D.0020.0002] # HANGUL JUNGSEONG A
2051162  ; [.188E.0020.0002] # HANGUL JUNGSEONG AE
2061163  ; [.188F.0020.0002] # HANGUL JUNGSEONG YA
20711A8  ; [.18CF.0020.0002] # HANGUL JONGSEONG KIYEOK
20811A9  ; [.18D0.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK
2091161 11A9 ; [.0000.0000.0000] # A-GG <contraction>
2101100 1163 11A8 ; [.1000.0020.0002] # G-YA-G <contraction> eq. U+AC39
211ENTRIES
212);
213
214# contracted into VT
215ok($Collator->lt("\x{1101}", "\x{1101}\x{1161}\x{11A9}"));
216ok($hangcont->eq("\x{1101}", "\x{1101}\x{1161}\x{11A9}"));
217
218# not contracted into LVT but into VT
219ok($Collator->lt("\x{1100}", "\x{1100}\x{1161}\x{11A9}"));
220ok($hangcont->eq("\x{1100}", "\x{1100}\x{1161}\x{11A9}"));
221
222# contracted into LVT
223ok($Collator->gt("\x{1100}\x{1163}\x{11A8}", "\x{1100}"));
224ok($hangcont->lt("\x{1100}\x{1163}\x{11A8}", "\x{1100}"));
225
226# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
227ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
228ok($hangcont->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
229
230# LVT vs Syl(LVT): /GYAG/ vs /[GYAG]/
231ok($Collator->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}"));
232ok($hangcont->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}"));
233
2341;
235__END__
236