xref: /openbsd-src/gnu/usr.bin/perl/cpan/Unicode-Collate/t/hangul.t (revision 256a93a44f36679bee503f12e49566c2183f6181)
1898184e3Ssthen
2b39c5158SmillertBEGIN {
3b39c5158Smillert    if ($ENV{PERL_CORE}) {
4b39c5158Smillert	chdir('t') if -d 't';
5b39c5158Smillert	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
6b39c5158Smillert    }
7b39c5158Smillert}
8b39c5158Smillert
9b39c5158Smillertuse strict;
10b39c5158Smillertuse warnings;
11898184e3SsthenBEGIN { $| = 1; print "1..72\n"; }
12898184e3Ssthenmy $count = 0;
13898184e3Ssthensub ok ($;$) {
14898184e3Ssthen    my $p = my $r = shift;
15898184e3Ssthen    if (@_) {
16898184e3Ssthen	my $x = shift;
17898184e3Ssthen	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
18898184e3Ssthen    }
19898184e3Ssthen    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
20898184e3Ssthen}
21898184e3Ssthen
22b39c5158Smillertuse Unicode::Collate;
23b39c5158Smillert
24b39c5158Smillertok(1);
25b39c5158Smillert
26*256a93a4Safresh1sub _pack_U   { Unicode::Collate::pack_U(@_) }
27*256a93a4Safresh1sub _unpack_U { Unicode::Collate::unpack_U(@_) }
28*256a93a4Safresh1
29898184e3Ssthen#########################
30898184e3Ssthen
31b39c5158Smillert# a standard collator (3.1.1)
32b39c5158Smillertmy $Collator = Unicode::Collate->new(
33b39c5158Smillert  table => 'keys.txt',
34b39c5158Smillert  normalization => undef,
35b39c5158Smillert);
36b39c5158Smillert
37b39c5158Smillert
38b39c5158Smillert# a collator for hangul sorting,
39b39c5158Smillert# cf. http://std.dkuug.dk/JTC1/SC22/WG20/docs/documents.html
40b39c5158Smillert#     http://std.dkuug.dk/JTC1/SC22/WG20/docs/n1051-hangulsort.pdf
41b39c5158Smillertmy $hangul = Unicode::Collate->new(
42b39c5158Smillert  level => 3,
43b39c5158Smillert  table => undef,
44b39c5158Smillert  normalization => undef,
45b39c5158Smillert
46b39c5158Smillert  entry => <<'ENTRIES',
47b39c5158Smillert0061      ; [.0A15.0020.0002] # LATIN SMALL LETTER A
48b39c5158Smillert0041      ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A
49b39c5158Smillert#1161     ; [.1800.0020.0002] # <comment> initial jungseong A
50b39c5158Smillert#1163     ; [.1801.0020.0002] # <comment> initial jungseong YA
51b39c5158Smillert1100      ; [.1831.0020.0002] # choseong KIYEOK
52b39c5158Smillert1100 1161 ; [.1831.0020.0002][.1800.0020.0002] # G-A
53b39c5158Smillert1100 1163 ; [.1831.0020.0002][.1801.0020.0002] # G-YA
54b39c5158Smillert1101      ; [.1831.0020.0002][.1831.0020.0002] # choseong SSANGKIYEOK
55b39c5158Smillert1101 1161 ; [.1831.0020.0002][.1831.0020.0002][.1800.0020.0002] # GG-A
56b39c5158Smillert1101 1163 ; [.1831.0020.0002][.1831.0020.0002][.1801.0020.0002] # GG-YA
57b39c5158Smillert1102      ; [.1833.0020.0002] # choseong NIEUN
58b39c5158Smillert1102 1161 ; [.1833.0020.0002][.1800.0020.0002] # N-A
59b39c5158Smillert1102 1163 ; [.1833.0020.0002][.1801.0020.0002] # N-YA
60b39c5158Smillert3042      ; [.1921.0020.000E] # HIRAGANA LETTER A
61b39c5158Smillert11A8      ; [.FE10.0020.0002] # jongseong KIYEOK
62b39c5158Smillert11A9      ; [.FE10.0020.0002][.FE10.0020.0002] # jongseong SSANGKIYEOK
63b39c5158Smillert1161      ; [.FE20.0020.0002] # jungseong A <non-initial>
64b39c5158Smillert1163      ; [.FE21.0020.0002] # jungseong YA <non-initial>
65b39c5158SmillertENTRIES
66b39c5158Smillert);
67b39c5158Smillert
68b39c5158Smillertok(ref $hangul, "Unicode::Collate");
69b39c5158Smillert
70b39c5158Smillertmy $trailwt = Unicode::Collate->new(
71b39c5158Smillert  level => 3,
72b39c5158Smillert  table => undef,
73b39c5158Smillert  normalization => undef,
74b39c5158Smillert  hangul_terminator => 16,
75b39c5158Smillert
76b39c5158Smillert  entry => <<'ENTRIES', # Term < Jongseong < Jungseong < Choseong
77b39c5158Smillert0061  ; [.0A15.0020.0002] # LATIN SMALL LETTER A
78b39c5158Smillert0041  ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A
79b39c5158Smillert11A8  ; [.1801.0020.0002] # HANGUL JONGSEONG KIYEOK
80b39c5158Smillert11A9  ; [.1801.0020.0002][.1801.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK
81b39c5158Smillert1161  ; [.1831.0020.0002] # HANGUL JUNGSEONG A
82b39c5158Smillert1163  ; [.1832.0020.0002] # HANGUL JUNGSEONG YA
83b39c5158Smillert1100  ; [.1861.0020.0002] # HANGUL CHOSEONG KIYEOK
84b39c5158Smillert1101  ; [.1861.0020.0002][.1861.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK
85b39c5158Smillert1102  ; [.1862.0020.0002] # HANGUL CHOSEONG NIEUN
86b39c5158Smillert3042  ; [.1921.0020.000E] # HIRAGANA LETTER A
87b39c5158SmillertENTRIES
88b39c5158Smillert);
89b39c5158Smillert
90b39c5158Smillert#########################
91b39c5158Smillert
92b39c5158Smillert# L(simp)L(simp) vs L(comp): /GGA/
93b39c5158Smillertok($Collator->lt("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
94b39c5158Smillertok($hangul  ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
95b39c5158Smillertok($trailwt ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
96b39c5158Smillert
97b39c5158Smillert# L(simp) vs L(simp)L(simp): /GA/ vs /GGA/
98b39c5158Smillertok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
99b39c5158Smillertok($hangul  ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
100b39c5158Smillertok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
101b39c5158Smillert
102b39c5158Smillert# T(simp)T(simp) vs T(comp): /AGG/
103b39c5158Smillertok($Collator->lt("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
104b39c5158Smillertok($hangul  ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
105b39c5158Smillertok($trailwt ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
106b39c5158Smillert
107b39c5158Smillert# T(simp) vs T(simp)T(simp): /AG/ vs /AGG/
108b39c5158Smillertok($Collator->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
109b39c5158Smillertok($hangul  ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
110b39c5158Smillertok($trailwt ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
111b39c5158Smillert
112b39c5158Smillert# LV vs LLV: /GA/ vs /GNA/
113b39c5158Smillertok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
114b39c5158Smillertok($hangul  ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
115b39c5158Smillertok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
116b39c5158Smillert
117b39c5158Smillert# LVX vs LVV: /GAA/ vs /GA/.latinA
118b39c5158Smillertok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
119b39c5158Smillertok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
120b39c5158Smillertok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
121b39c5158Smillert
122b39c5158Smillert# LVX vs LVV: /GAA/ vs /GA/.hiraganaA
123b39c5158Smillertok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
124b39c5158Smillertok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
125b39c5158Smillertok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
126b39c5158Smillert
127b39c5158Smillert# LVX vs LVV: /GAA/ vs /GA/.hanja
128b39c5158Smillertok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
129b39c5158Smillertok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
130b39c5158Smillertok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
131b39c5158Smillert
132b39c5158Smillert# LVL vs LVT: /GA/./G/ vs /GAG/
133b39c5158Smillertok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
134b39c5158Smillertok($hangul  ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
135b39c5158Smillertok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
136b39c5158Smillert
137b39c5158Smillert# LVT vs LVX: /GAG/ vs /GA/.latinA
138b39c5158Smillertok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
139b39c5158Smillertok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
140b39c5158Smillertok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
141b39c5158Smillert
142b39c5158Smillert# LVT vs LVX: /GAG/ vs /GA/.hiraganaA
143b39c5158Smillertok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
144b39c5158Smillertok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
145b39c5158Smillertok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
146b39c5158Smillert
147b39c5158Smillert# LVT vs LVX: /GAG/ vs /GA/.hanja
148b39c5158Smillertok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
149b39c5158Smillertok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
150b39c5158Smillertok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
151b39c5158Smillert
152b39c5158Smillert# LVT vs LVV: /GAG/ vs /GAA/
153b39c5158Smillertok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
154b39c5158Smillertok($hangul  ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
155b39c5158Smillertok($trailwt ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
156b39c5158Smillert
157b39c5158Smillert# LVL vs LVV: /GA/./G/ vs /GAA/
158b39c5158Smillertok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
159b39c5158Smillertok($hangul  ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
160b39c5158Smillertok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
161b39c5158Smillert
162b39c5158Smillert# LV vs Syl(LV): /GA/ vs /[GA]/
163b39c5158Smillertok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}"));
164b39c5158Smillertok($hangul  ->eq("\x{1100}\x{1161}", "\x{AC00}"));
165b39c5158Smillertok($trailwt ->eq("\x{1100}\x{1161}", "\x{AC00}"));
166b39c5158Smillert
167b39c5158Smillert# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/
168b39c5158Smillertok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
169b39c5158Smillertok($hangul  ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
170b39c5158Smillertok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
171b39c5158Smillert
172b39c5158Smillert# LVT vs Syl(LVT): /GAG/ vs /[GAG]/
173b39c5158Smillertok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
174b39c5158Smillertok($hangul  ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
175b39c5158Smillertok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
176b39c5158Smillert
177b39c5158Smillert# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
178b39c5158Smillertok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
179b39c5158Smillertok($hangul  ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
180b39c5158Smillertok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
181b39c5158Smillert
182b39c5158Smillert# LVTT vs Syl(LVT).T: /GAGG/ vs /[GAG]G/
183b39c5158Smillertok($Collator->gt("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
184b39c5158Smillertok($hangul  ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
185b39c5158Smillertok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
186b39c5158Smillert
187b39c5158Smillert# LLVT vs L.Syl(LVT): /GGAG/ vs /G[GAG]/
188b39c5158Smillertok($Collator->gt("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
189b39c5158Smillertok($hangul  ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
190b39c5158Smillertok($trailwt ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
191b39c5158Smillert
192b39c5158Smillert#########################
193b39c5158Smillert
194b39c5158Smillert# checks contraction in LVT:
195b39c5158Smillert# weights of these contractions may be non-sense.
196b39c5158Smillert
197b39c5158Smillertmy $hangcont = Unicode::Collate->new(
198b39c5158Smillert  level => 3,
199b39c5158Smillert  table => undef,
200b39c5158Smillert  normalization => undef,
201b39c5158Smillert  entry => <<'ENTRIES',
202b39c5158Smillert1100  ; [.1831.0020.0002] # HANGUL CHOSEONG KIYEOK
203b39c5158Smillert1101  ; [.1832.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK
204b39c5158Smillert1161  ; [.188D.0020.0002] # HANGUL JUNGSEONG A
205b39c5158Smillert1162  ; [.188E.0020.0002] # HANGUL JUNGSEONG AE
206b39c5158Smillert1163  ; [.188F.0020.0002] # HANGUL JUNGSEONG YA
207b39c5158Smillert11A8  ; [.18CF.0020.0002] # HANGUL JONGSEONG KIYEOK
208b39c5158Smillert11A9  ; [.18D0.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK
209b39c5158Smillert1161 11A9 ; [.0000.0000.0000] # A-GG <contraction>
210b39c5158Smillert1100 1163 11A8 ; [.1000.0020.0002] # G-YA-G <contraction> eq. U+AC39
211b39c5158SmillertENTRIES
212b39c5158Smillert);
213b39c5158Smillert
214b39c5158Smillert# contracted into VT
215b39c5158Smillertok($Collator->lt("\x{1101}", "\x{1101}\x{1161}\x{11A9}"));
216b39c5158Smillertok($hangcont->eq("\x{1101}", "\x{1101}\x{1161}\x{11A9}"));
217b39c5158Smillert
218b39c5158Smillert# not contracted into LVT but into VT
219b39c5158Smillertok($Collator->lt("\x{1100}", "\x{1100}\x{1161}\x{11A9}"));
220b39c5158Smillertok($hangcont->eq("\x{1100}", "\x{1100}\x{1161}\x{11A9}"));
221b39c5158Smillert
222b39c5158Smillert# contracted into LVT
223b39c5158Smillertok($Collator->gt("\x{1100}\x{1163}\x{11A8}", "\x{1100}"));
224b39c5158Smillertok($hangcont->lt("\x{1100}\x{1163}\x{11A8}", "\x{1100}"));
225b39c5158Smillert
226b39c5158Smillert# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
227b39c5158Smillertok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
228b39c5158Smillertok($hangcont->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
229b39c5158Smillert
230b39c5158Smillert# LVT vs Syl(LVT): /GYAG/ vs /[GYAG]/
231b39c5158Smillertok($Collator->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}"));
232b39c5158Smillertok($hangcont->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}"));
233b39c5158Smillert
234b39c5158Smillert1;
235b39c5158Smillert__END__
236