xref: /openbsd-src/gnu/usr.bin/perl/cpan/Unicode-Collate/t/hangul.t (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1
2BEGIN {
3    unless ("A" eq pack('U', 0x41)) {
4	print "1..0 # Unicode::Collate " .
5	    "cannot stringify a Unicode code point\n";
6	exit 0;
7    }
8    if ($ENV{PERL_CORE}) {
9	chdir('t') if -d 't';
10	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
11    }
12}
13
14use strict;
15use warnings;
16BEGIN { $| = 1; print "1..72\n"; }
17my $count = 0;
18sub ok ($;$) {
19    my $p = my $r = shift;
20    if (@_) {
21	my $x = shift;
22	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
23    }
24    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
25}
26
27use Unicode::Collate;
28
29ok(1);
30
31#########################
32
33# a standard collator (3.1.1)
34my $Collator = Unicode::Collate->new(
35  table => 'keys.txt',
36  normalization => undef,
37);
38
39
40# a collator for hangul sorting,
41# cf. http://std.dkuug.dk/JTC1/SC22/WG20/docs/documents.html
42#     http://std.dkuug.dk/JTC1/SC22/WG20/docs/n1051-hangulsort.pdf
43my $hangul = Unicode::Collate->new(
44  level => 3,
45  table => undef,
46  normalization => undef,
47
48  entry => <<'ENTRIES',
490061      ; [.0A15.0020.0002] # LATIN SMALL LETTER A
500041      ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A
51#1161     ; [.1800.0020.0002] # <comment> initial jungseong A
52#1163     ; [.1801.0020.0002] # <comment> initial jungseong YA
531100      ; [.1831.0020.0002] # choseong KIYEOK
541100 1161 ; [.1831.0020.0002][.1800.0020.0002] # G-A
551100 1163 ; [.1831.0020.0002][.1801.0020.0002] # G-YA
561101      ; [.1831.0020.0002][.1831.0020.0002] # choseong SSANGKIYEOK
571101 1161 ; [.1831.0020.0002][.1831.0020.0002][.1800.0020.0002] # GG-A
581101 1163 ; [.1831.0020.0002][.1831.0020.0002][.1801.0020.0002] # GG-YA
591102      ; [.1833.0020.0002] # choseong NIEUN
601102 1161 ; [.1833.0020.0002][.1800.0020.0002] # N-A
611102 1163 ; [.1833.0020.0002][.1801.0020.0002] # N-YA
623042      ; [.1921.0020.000E] # HIRAGANA LETTER A
6311A8      ; [.FE10.0020.0002] # jongseong KIYEOK
6411A9      ; [.FE10.0020.0002][.FE10.0020.0002] # jongseong SSANGKIYEOK
651161      ; [.FE20.0020.0002] # jungseong A <non-initial>
661163      ; [.FE21.0020.0002] # jungseong YA <non-initial>
67ENTRIES
68);
69
70ok(ref $hangul, "Unicode::Collate");
71
72my $trailwt = Unicode::Collate->new(
73  level => 3,
74  table => undef,
75  normalization => undef,
76  hangul_terminator => 16,
77
78  entry => <<'ENTRIES', # Term < Jongseong < Jungseong < Choseong
790061  ; [.0A15.0020.0002] # LATIN SMALL LETTER A
800041  ; [.0A15.0020.0008] # LATIN CAPITAL LETTER A
8111A8  ; [.1801.0020.0002] # HANGUL JONGSEONG KIYEOK
8211A9  ; [.1801.0020.0002][.1801.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK
831161  ; [.1831.0020.0002] # HANGUL JUNGSEONG A
841163  ; [.1832.0020.0002] # HANGUL JUNGSEONG YA
851100  ; [.1861.0020.0002] # HANGUL CHOSEONG KIYEOK
861101  ; [.1861.0020.0002][.1861.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK
871102  ; [.1862.0020.0002] # HANGUL CHOSEONG NIEUN
883042  ; [.1921.0020.000E] # HIRAGANA LETTER A
89ENTRIES
90);
91
92#########################
93
94# L(simp)L(simp) vs L(comp): /GGA/
95ok($Collator->lt("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
96ok($hangul  ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
97ok($trailwt ->eq("\x{1100}\x{1100}\x{1161}", "\x{1101}\x{1161}"));
98
99# L(simp) vs L(simp)L(simp): /GA/ vs /GGA/
100ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
101ok($hangul  ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
102ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1100}\x{1161}"));
103
104# T(simp)T(simp) vs T(comp): /AGG/
105ok($Collator->lt("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
106ok($hangul  ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
107ok($trailwt ->eq("\x{1161}\x{11A8}\x{11A8}", "\x{1161}\x{11A9}"));
108
109# T(simp) vs T(simp)T(simp): /AG/ vs /AGG/
110ok($Collator->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
111ok($hangul  ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
112ok($trailwt ->lt("\x{1161}\x{11A8}", "\x{1161}\x{11A8}\x{11A8}"));
113
114# LV vs LLV: /GA/ vs /GNA/
115ok($Collator->gt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
116ok($hangul  ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
117ok($trailwt ->lt("\x{1100}\x{1161}", "\x{1100}\x{1102}\x{1161}"));
118
119# LVX vs LVV: /GAA/ vs /GA/.latinA
120ok($Collator->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
121ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
122ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}A"));
123
124# LVX vs LVV: /GAA/ vs /GA/.hiraganaA
125ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
126ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
127ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{3042}"));
128
129# LVX vs LVV: /GAA/ vs /GA/.hanja
130ok($Collator->lt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
131ok($hangul  ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
132ok($trailwt ->gt("\x{1100}\x{1161}\x{1161}", "\x{1100}\x{1161}\x{4E00}"));
133
134# LVL vs LVT: /GA/./G/ vs /GAG/
135ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
136ok($hangul  ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
137ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{11A8}"));
138
139# LVT vs LVX: /GAG/ vs /GA/.latinA
140ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
141ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
142ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}A"));
143
144# LVT vs LVX: /GAG/ vs /GA/.hiraganaA
145ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
146ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
147ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{3042}"));
148
149# LVT vs LVX: /GAG/ vs /GA/.hanja
150ok($Collator->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
151ok($hangul  ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
152ok($trailwt ->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{4E00}"));
153
154# LVT vs LVV: /GAG/ vs /GAA/
155ok($Collator->gt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
156ok($hangul  ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
157ok($trailwt ->lt("\x{1100}\x{1161}\x{11A8}", "\x{1100}\x{1161}\x{1161}"));
158
159# LVL vs LVV: /GA/./G/ vs /GAA/
160ok($Collator->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
161ok($hangul  ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
162ok($trailwt ->lt("\x{1100}\x{1161}\x{1100}", "\x{1100}\x{1161}\x{1161}"));
163
164# LV vs Syl(LV): /GA/ vs /[GA]/
165ok($Collator->eq("\x{1100}\x{1161}", "\x{AC00}"));
166ok($hangul  ->eq("\x{1100}\x{1161}", "\x{AC00}"));
167ok($trailwt ->eq("\x{1100}\x{1161}", "\x{AC00}"));
168
169# LVT vs Syl(LV)T: /GAG/ vs /[GA]G/
170ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
171ok($hangul  ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
172ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC00}\x{11A8}"));
173
174# LVT vs Syl(LVT): /GAG/ vs /[GAG]/
175ok($Collator->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
176ok($hangul  ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
177ok($trailwt ->eq("\x{1100}\x{1161}\x{11A8}", "\x{AC01}"));
178
179# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
180ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
181ok($hangul  ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
182ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
183
184# LVTT vs Syl(LVT).T: /GAGG/ vs /[GAG]G/
185ok($Collator->gt("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
186ok($hangul  ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
187ok($trailwt ->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC01}\x{11A8}"));
188
189# LLVT vs L.Syl(LVT): /GGAG/ vs /G[GAG]/
190ok($Collator->gt("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
191ok($hangul  ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
192ok($trailwt ->eq("\x{1101}\x{1161}\x{11A8}", "\x{1100}\x{AC01}"));
193
194#########################
195
196# checks contraction in LVT:
197# weights of these contractions may be non-sense.
198
199my $hangcont = Unicode::Collate->new(
200  level => 3,
201  table => undef,
202  normalization => undef,
203  entry => <<'ENTRIES',
2041100  ; [.1831.0020.0002] # HANGUL CHOSEONG KIYEOK
2051101  ; [.1832.0020.0002] # HANGUL CHOSEONG SSANGKIYEOK
2061161  ; [.188D.0020.0002] # HANGUL JUNGSEONG A
2071162  ; [.188E.0020.0002] # HANGUL JUNGSEONG AE
2081163  ; [.188F.0020.0002] # HANGUL JUNGSEONG YA
20911A8  ; [.18CF.0020.0002] # HANGUL JONGSEONG KIYEOK
21011A9  ; [.18D0.0020.0002] # HANGUL JONGSEONG SSANGKIYEOK
2111161 11A9 ; [.0000.0000.0000] # A-GG <contraction>
2121100 1163 11A8 ; [.1000.0020.0002] # G-YA-G <contraction> eq. U+AC39
213ENTRIES
214);
215
216# contracted into VT
217ok($Collator->lt("\x{1101}", "\x{1101}\x{1161}\x{11A9}"));
218ok($hangcont->eq("\x{1101}", "\x{1101}\x{1161}\x{11A9}"));
219
220# not contracted into LVT but into VT
221ok($Collator->lt("\x{1100}", "\x{1100}\x{1161}\x{11A9}"));
222ok($hangcont->eq("\x{1100}", "\x{1100}\x{1161}\x{11A9}"));
223
224# contracted into LVT
225ok($Collator->gt("\x{1100}\x{1163}\x{11A8}", "\x{1100}"));
226ok($hangcont->lt("\x{1100}\x{1163}\x{11A8}", "\x{1100}"));
227
228# LVTT vs Syl(LVTT): /GAGG/ vs /[GAGG]/
229ok($Collator->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
230ok($hangcont->eq("\x{1100}\x{1161}\x{11A9}", "\x{AC02}"));
231
232# LVT vs Syl(LVT): /GYAG/ vs /[GYAG]/
233ok($Collator->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}"));
234ok($hangcont->eq("\x{1100}\x{1163}\x{11A8}", "\x{AC39}"));
235
2361;
237__END__
238