xref: /openbsd-src/gnu/usr.bin/perl/cpan/Unicode-Collate/t/contract.t (revision 5a38ef86d0b61900239c7913d24a05e7b88a58f0)
1
2BEGIN {
3    unless ('A' eq pack('U', 0x41)) {
4	print "1..0 # Unicode::Collate cannot pack a Unicode code point\n";
5	exit 0;
6    }
7    unless (0x41 == unpack('U', 'A')) {
8	print "1..0 # Unicode::Collate cannot get a Unicode code point\n";
9	exit 0;
10    }
11    if ($ENV{PERL_CORE}) {
12	chdir('t') if -d 't';
13	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
14    }
15}
16
17use strict;
18use warnings;
19BEGIN { $| = 1; print "1..118\n"; }
20my $count = 0;
21sub ok ($;$) {
22    my $p = my $r = shift;
23    if (@_) {
24	my $x = shift;
25	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
26    }
27    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
28}
29
30use Unicode::Collate;
31
32ok(1);
33
34#########################
35
36our $kjeEntry = <<'ENTRIES';
370301  ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT
380334  ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY
39043A  ; [.0D31.0020.0002.043A] # CYRILLIC SMALL LETTER KA
40041A  ; [.0D31.0020.0008.041A] # CYRILLIC CAPITAL LETTER KA
41045C  ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE
42043A 0301 ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE
43040C  ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE
44041A 0301 ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE
45ENTRIES
46
47our $aaEntry = <<'ENTRIES';
480304  ; [.0000.005A.0002.0304] # COMBINING MACRON (cc = 230)
49030A  ; [.0000.0043.0002.030A] # COMBINING RING ABOVE (cc = 230)
500327  ; [.0000.0055.0002.0327] # COMBINING CEDILLA (cc = 202)
51031A  ; [.0000.006B.0002.031A] # COMBINING LEFT ANGLE ABOVE (cc = 232)
520061  ; [.0A15.0020.0002.0061] # LATIN SMALL LETTER A
530041  ; [.0A15.0020.0008.0041] # LATIN CAPITAL LETTER A
54007A  ; [.0C13.0020.0002.007A] # LATIN SMALL LETTER Z
55005A  ; [.0C13.0020.0008.005A] # LATIN CAPITAL LETTER Z
5600E5  ; [.0C25.0020.0002.00E5] # LATIN SMALL LETTER A WITH RING ABOVE; QQCM
5700C5  ; [.0C25.0020.0008.00C5] # LATIN CAPITAL LETTER A WITH RING ABOVE; QQCM
580061 030A ; [.0C25.0020.0002.0061] # LATIN SMALL LETTER A WITH RING ABOVE
590041 030A ; [.0C25.0020.0008.0041] # LATIN CAPITAL LETTER A WITH RING ABOVE
60ENTRIES
61
62#########################
63
64my $kjeNoN = Unicode::Collate->new(
65    level => 1,
66    table => undef,
67    normalization => undef,
68    entry => $kjeEntry,
69);
70
71ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}"));
72ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}"));
73ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}"));
74ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
75
76# 5
77
78our %sortkeys;
79
80$sortkeys{'KAac'} = $kjeNoN->viewSortKey("\x{43A}\x{301}");
81$sortkeys{'KAta'} = $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}");
82$sortkeys{'KAat'} = $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}");
83
84eval { require Unicode::Normalize };
85if (!$@) {
86    my $kjeNFD = Unicode::Collate->new(
87	level => 1,
88	table => undef,
89	entry => $kjeEntry,
90    );
91
92ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{301}"));
93ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{334}\x{301}"));
94ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{334}\x{301}"));
95ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
96# 9
97
98    my $aaNFD = Unicode::Collate->new(
99	level => 1,
100	table => undef,
101	entry => $aaEntry,
102    );
103
104ok($aaNFD->lt("Z", "A\x{30A}\x{304}"));
105ok($aaNFD->eq("A", "A\x{304}\x{30A}"));
106ok($aaNFD->eq(pack('U', 0xE5), "A\x{30A}\x{304}"));
107ok($aaNFD->eq("A\x{304}", "A\x{304}\x{30A}"));
108ok($aaNFD->lt("Z", "A\x{327}\x{30A}"));
109ok($aaNFD->lt("Z", "A\x{30A}\x{327}"));
110ok($aaNFD->lt("Z", "A\x{31A}\x{30A}"));
111ok($aaNFD->lt("Z", "A\x{30A}\x{31A}"));
112# 17
113
114    my $aaPre = Unicode::Collate->new(
115	level => 1,
116	normalization => "prenormalized",
117	table => undef,
118	entry => $aaEntry,
119    );
120
121ok($aaPre->lt("Z", "A\x{30A}\x{304}"));
122ok($aaPre->eq("A", "A\x{304}\x{30A}"));
123ok($aaPre->eq(pack('U', 0xE5), "A\x{30A}\x{304}"));
124ok($aaPre->eq("A\x{304}", "A\x{304}\x{30A}"));
125ok($aaPre->lt("Z", "A\x{327}\x{30A}"));
126ok($aaPre->lt("Z", "A\x{30A}\x{327}"));
127ok($aaPre->lt("Z", "A\x{31A}\x{30A}"));
128ok($aaPre->lt("Z", "A\x{30A}\x{31A}"));
129# 25
130} else {
131    ok(1) for 1..20;
132}
133
134# again: loading Unicode::Normalize should not affect $kjeNoN.
135ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}"));
136ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}"));
137ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}"));
138ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
139
140ok($sortkeys{'KAac'}, $kjeNoN->viewSortKey("\x{43A}\x{301}"));
141ok($sortkeys{'KAta'}, $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}"));
142ok($sortkeys{'KAat'}, $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}"));
143
144# 32
145
146my $aaNoN = Unicode::Collate->new(
147    level => 1,
148    table => undef,
149    entry => $aaEntry,
150    normalization => undef,
151);
152
153ok($aaNoN->lt("Z", "A\x{30A}\x{304}"));
154ok($aaNoN->eq("A", "A\x{304}\x{30A}"));
155ok($aaNoN->eq(pack('U', 0xE5), "A\x{30A}\x{304}"));
156ok($aaNoN->eq("A\x{304}", "A\x{304}\x{30A}"));
157ok($aaNoN->eq("A", "A\x{327}\x{30A}"));
158ok($aaNoN->lt("Z", "A\x{30A}\x{327}"));
159ok($aaNoN->eq("A", "A\x{31A}\x{30A}"));
160ok($aaNoN->lt("Z", "A\x{30A}\x{31A}"));
161
162# 40
163
164# suppress contractions (not affected)
165
166my $kjeSup = Unicode::Collate->new(
167    level => 1,
168    table => undef,
169    normalization => undef,
170    entry => $kjeEntry,
171    suppress => [0x400..0x45F],
172);
173
174ok($kjeSup->lt("\x{43A}", "\x{43A}\x{301}"));
175ok($kjeSup->eq("\x{45C}", "\x{43A}\x{301}"));
176ok($kjeSup->lt("\x{41A}", "\x{41A}\x{301}"));
177ok($kjeSup->eq("\x{40C}", "\x{41A}\x{301}"));
178
179# 44
180
181our $tibetanEntry = <<'ENTRIES';
1820000           ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
1830FB2           ; [.205B.0020.0002.0FB2] # TIBETAN SUBJOINED LETTER RA
1840FB3           ; [.205E.0020.0002.0FB3] # TIBETAN SUBJOINED LETTER LA
1850F71           ; [.206D.0020.0002.0F71] # TIBETAN VOWEL SIGN AA
1860F72           ; [.206E.0020.0002.0F72] # TIBETAN VOWEL SIGN I
1870F73           ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II
1880F71 0F72      ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II
1890F80           ; [.2070.0020.0002.0F80] # TIBETAN VOWEL SIGN REVERSED I
1900F81           ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II
1910F71 0F80      ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II
1920F74           ; [.2072.0020.0002.0F74] # TIBETAN VOWEL SIGN U
1930F75           ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU
1940F71 0F74      ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU
1950F76           ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R
1960FB2 0F80      ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R
1970F77           ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
1980FB2 0F81      ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
1990FB2 0F71 0F80 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
2000F78           ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L
2010FB3 0F80      ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L
2020F79           ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
2030FB3 0F81      ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
2040FB3 0F71 0F80 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
205ENTRIES
206
207# ccc(0F71) = 129
208# ccc(0F80) = 130
209# 0F76 = 0FB2 0F80
210# 0F78 = 0FB3 0F80
211# 0F81 = 0F71 0F80
212# 0F77 = <compat> 0FB2 0F81 = 0FB2 0F71 0F80 = 0F76 0F71
213# 0F79 = <compat> 0FB3 0F81 = 0FB3 0F71 0F80 = 0F78 0F71
214
215eval { require Unicode::Normalize };
216if (!$@) {
217    my $tibNFD = Unicode::Collate->new(
218	table => undef,
219	entry => $tibetanEntry,
220	UCA_Version => 24,
221    );
222
223    # VOCALIC RR
224    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F81}"));
225    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\x{334}"));
226    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\0\x{334}"));
227    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{334}\x{F71}"));
228    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\x{334}"));
229    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\0\x{334}"));
230    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F71}\x{F80}"));
231    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{334}\x{F80}"));
232    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\x{334}"));
233    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\0\x{334}"));
234    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F80}\x{F71}"));
235    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{334}\x{F71}"));
236    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\x{334}"));
237    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\0\x{334}"));
238# 58
239
240    # VOCALIC LL
241    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F81}"));
242    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\x{334}"));
243    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\0\x{334}"));
244    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{334}\x{F71}"));
245    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\x{334}"));
246    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\0\x{334}"));
247    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F71}\x{F80}"));
248    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{334}\x{F80}"));
249    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\x{334}"));
250    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\0\x{334}"));
251    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F80}\x{F71}"));
252    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{334}\x{F71}"));
253    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\x{334}"));
254    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\0\x{334}"));
255# 72
256
257    my $a1 = "\x{FB2}\x{334}\x{F81}";
258    my $b1 = "\x{F77}\0\x{334}";
259    my $a2 = "\x{FB2}\x{334}\x{F81}";
260    my $b2 = "\x{FB2}\x{F80}\0\x{334}\x{F71}";
261
262    for my $v (qw/20 22 24 26 28/) {
263	my $tib = Unicode::Collate->new(
264	    table => undef,
265	    entry => $tibetanEntry,
266	    UCA_Version => $v,
267	);
268	my $long = 22 <= $v && $v <= 24;
269	ok($tib->cmp($a1, $b1), $long ? 0 : -1);
270	ok($tib->cmp($a2, $b2), $long ? 1 : 0);
271
272	$tib->change(long_contraction => 0);
273	ok($tib->cmp($a1, $b1), -1);
274	ok($tib->cmp($a2, $b2),  0);
275
276	$tib->change(long_contraction => 1);
277	ok($tib->cmp($a1, $b1), 0);
278	ok($tib->cmp($a2, $b2), 1);
279    }
280# 102
281
282    # UCA_Version => 22
283    ok($tibNFD->cmp($a1, $b1), 0);
284    ok($tibNFD->cmp($a2, $b2), 1);
285
286    $tibNFD->change(UCA_Version => 26); # not affect long_contraction
287    ok($tibNFD->cmp($a1, $b1), 0);
288    ok($tibNFD->cmp($a2, $b2), 1);
289# 106
290
291    my $discontNFD = Unicode::Collate->new(
292	table => undef,
293	UCA_Version => 22,
294	entry => <<'ENTRIES',
2950000  ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
2960301  ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT
2970300  ; [.0000.0035.0002.0300] # COMBINING GRAVE ACCENT
2980327  ; [.0000.0055.0002.0327] # COMBINING CEDILLA
2990334  ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY
3000041  ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A
3010041 0327 0301 ; [.0102.0020.0008.0041]
3020041 0300 ; [.0103.0020.0008.0041]
303ENTRIES
304    );
305
306    ok($discontNFD->eq("A\x{334}\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}"));
307    ok($discontNFD->eq("A\x{327}\x{300}",        "A\x{300}\0\x{327}"));
308
309    $discontNFD->change(long_contraction => 0);
310    ok($discontNFD->lt("A\x{334}\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}"));
311    ok($discontNFD->eq("A\x{334}\x{327}\x{301}", "A\0\x{327}\x{301}\x{334}"));
312    ok($discontNFD->eq("A\x{327}\x{300}",        "A\x{300}\0\x{327}"));
313
314    $discontNFD->change(level => 1);
315    ok($discontNFD->gt("A\x{327}\x{300}", "A\x{327}\0\x{300}"));
316
317    # discontiguous
318    ok($discontNFD->lt("A\x{334}\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}"));
319    ok($discontNFD->lt("A\x{334}\x{327}\x{301}", "A\x{300}"));
320    ok($discontNFD->eq("A\x{334}\x{327}\x{301}", "A"));
321
322    # contiguous
323    ok($discontNFD->eq("A\x{327}\x{301}", "A\x{327}\x{301}\0\x{334}"));
324    ok($discontNFD->lt("A\x{327}\x{301}", "A\x{300}"));
325    ok($discontNFD->gt("A\x{327}\x{301}", "A"));
326} else {
327    ok(1) for 1..74;
328}
329# 118
330