xref: /openbsd-src/gnu/usr.bin/perl/cpan/Unicode-Collate/t/contract.t (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1
2BEGIN {
3    unless ("A" eq pack('U', 0x41)) {
4	print "1..0 # Unicode::Collate " .
5	    "cannot stringify a Unicode code point\n";
6	exit 0;
7    }
8    if ($ENV{PERL_CORE}) {
9	chdir('t') if -d 't';
10	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
11    }
12}
13
14use strict;
15use warnings;
16BEGIN { $| = 1; print "1..74\n"; }
17my $count = 0;
18sub ok ($;$) {
19    my $p = my $r = shift;
20    if (@_) {
21	my $x = shift;
22	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
23    }
24    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
25}
26
27use Unicode::Collate;
28
29ok(1);
30
31#########################
32
33our $kjeEntry = <<'ENTRIES';
340301  ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT
350334  ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY
36043A  ; [.0D31.0020.0002.043A] # CYRILLIC SMALL LETTER KA
37041A  ; [.0D31.0020.0008.041A] # CYRILLIC CAPITAL LETTER KA
38045C  ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE
39043A 0301 ; [.0DA1.0020.0002.045C] # CYRILLIC SMALL LETTER KJE
40040C  ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE
41041A 0301 ; [.0DA1.0020.0008.040C] # CYRILLIC CAPITAL LETTER KJE
42ENTRIES
43
44our $aaEntry = <<'ENTRIES';
450304  ; [.0000.005A.0002.0304] # COMBINING MACRON (cc = 230)
46030A  ; [.0000.0043.0002.030A] # COMBINING RING ABOVE (cc = 230)
470327  ; [.0000.0055.0002.0327] # COMBINING CEDILLA (cc = 202)
48031A  ; [.0000.006B.0002.031A] # COMBINING LEFT ANGLE ABOVE (cc = 232)
490061  ; [.0A15.0020.0002.0061] # LATIN SMALL LETTER A
500041  ; [.0A15.0020.0008.0041] # LATIN CAPITAL LETTER A
51007A  ; [.0C13.0020.0002.007A] # LATIN SMALL LETTER Z
52005A  ; [.0C13.0020.0008.005A] # LATIN CAPITAL LETTER Z
5300E5  ; [.0C25.0020.0002.00E5] # LATIN SMALL LETTER A WITH RING ABOVE; QQCM
5400C5  ; [.0C25.0020.0008.00C5] # LATIN CAPITAL LETTER A WITH RING ABOVE; QQCM
550061 030A ; [.0C25.0020.0002.0061] # LATIN SMALL LETTER A WITH RING ABOVE
560041 030A ; [.0C25.0020.0008.0041] # LATIN CAPITAL LETTER A WITH RING ABOVE
57ENTRIES
58
59#########################
60
61my $kjeNoN = Unicode::Collate->new(
62    level => 1,
63    table => undef,
64    normalization => undef,
65    entry => $kjeEntry,
66);
67
68ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}"));
69ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}"));
70ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}"));
71ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
72
73# 5
74
75our %sortkeys;
76
77$sortkeys{'KAac'} = $kjeNoN->viewSortKey("\x{43A}\x{301}");
78$sortkeys{'KAta'} = $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}");
79$sortkeys{'KAat'} = $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}");
80
81eval { require Unicode::Normalize };
82if (!$@) {
83    my $kjeNFD = Unicode::Collate->new(
84	level => 1,
85	table => undef,
86	entry => $kjeEntry,
87    );
88
89ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{301}"));
90ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{334}\x{301}"));
91ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{334}\x{301}"));
92ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
93# 9
94
95    my $aaNFD = Unicode::Collate->new(
96	level => 1,
97	table => undef,
98	entry => $aaEntry,
99    );
100
101ok($aaNFD->lt("Z", "A\x{30A}\x{304}"));
102ok($aaNFD->eq("A", "A\x{304}\x{30A}"));
103ok($aaNFD->eq(pack('U', 0xE5), "A\x{30A}\x{304}"));
104ok($aaNFD->eq("A\x{304}", "A\x{304}\x{30A}"));
105ok($aaNFD->lt("Z", "A\x{327}\x{30A}"));
106ok($aaNFD->lt("Z", "A\x{30A}\x{327}"));
107ok($aaNFD->lt("Z", "A\x{31A}\x{30A}"));
108ok($aaNFD->lt("Z", "A\x{30A}\x{31A}"));
109# 17
110
111    my $aaPre = Unicode::Collate->new(
112	level => 1,
113	normalization => "prenormalized",
114	table => undef,
115	entry => $aaEntry,
116    );
117
118ok($aaPre->lt("Z", "A\x{30A}\x{304}"));
119ok($aaPre->eq("A", "A\x{304}\x{30A}"));
120ok($aaPre->eq(pack('U', 0xE5), "A\x{30A}\x{304}"));
121ok($aaPre->eq("A\x{304}", "A\x{304}\x{30A}"));
122ok($aaPre->lt("Z", "A\x{327}\x{30A}"));
123ok($aaPre->lt("Z", "A\x{30A}\x{327}"));
124ok($aaPre->lt("Z", "A\x{31A}\x{30A}"));
125ok($aaPre->lt("Z", "A\x{30A}\x{31A}"));
126# 25
127} else {
128    ok(1) for 1..20;
129}
130
131# again: loading Unicode::Normalize should not affect $kjeNoN.
132ok($kjeNoN->lt("\x{43A}", "\x{43A}\x{301}"));
133ok($kjeNoN->gt("\x{45C}", "\x{43A}\x{334}\x{301}"));
134ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}"));
135ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
136
137ok($sortkeys{'KAac'}, $kjeNoN->viewSortKey("\x{43A}\x{301}"));
138ok($sortkeys{'KAta'}, $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}"));
139ok($sortkeys{'KAat'}, $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}"));
140
141# 32
142
143my $aaNoN = Unicode::Collate->new(
144    level => 1,
145    table => undef,
146    entry => $aaEntry,
147    normalization => undef,
148);
149
150ok($aaNoN->lt("Z", "A\x{30A}\x{304}"));
151ok($aaNoN->eq("A", "A\x{304}\x{30A}"));
152ok($aaNoN->eq(pack('U', 0xE5), "A\x{30A}\x{304}"));
153ok($aaNoN->eq("A\x{304}", "A\x{304}\x{30A}"));
154ok($aaNoN->eq("A", "A\x{327}\x{30A}"));
155ok($aaNoN->lt("Z", "A\x{30A}\x{327}"));
156ok($aaNoN->eq("A", "A\x{31A}\x{30A}"));
157ok($aaNoN->lt("Z", "A\x{30A}\x{31A}"));
158
159# 40
160
161# suppress contractions (not affected)
162
163my $kjeSup = Unicode::Collate->new(
164    level => 1,
165    table => undef,
166    normalization => undef,
167    entry => $kjeEntry,
168    suppress => [0x400..0x45F],
169);
170
171ok($kjeSup->lt("\x{43A}", "\x{43A}\x{301}"));
172ok($kjeSup->eq("\x{45C}", "\x{43A}\x{301}"));
173ok($kjeSup->lt("\x{41A}", "\x{41A}\x{301}"));
174ok($kjeSup->eq("\x{40C}", "\x{41A}\x{301}"));
175
176# 44
177
178our $tibetanEntry = <<'ENTRIES';
1790000  ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
1800F71           ; [.206D.0020.0002.0F71] # TIBETAN VOWEL SIGN AA
1810F72           ; [.206E.0020.0002.0F72] # TIBETAN VOWEL SIGN I
1820F73           ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II
1830F71 0F72      ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II
1840F80           ; [.2070.0020.0002.0F80] # TIBETAN VOWEL SIGN REVERSED I
1850F81           ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II
1860F71 0F80      ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II
1870F74           ; [.2072.0020.0002.0F74] # TIBETAN VOWEL SIGN U
1880F75           ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU
1890F71 0F74      ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU
1900F76           ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R
1910FB2 0F80      ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R
1920F77           ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
1930FB2 0F81      ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
1940FB2 0F71 0F80 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
1950F78           ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L
1960FB3 0F80      ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L
1970F79           ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
1980FB3 0F81      ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
1990FB3 0F71 0F80 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
200ENTRIES
201
202# ccc(0F71) = 129
203# ccc(0F80) = 130
204# 0F76 = 0FB2 0F80
205# 0F78 = 0FB3 0F80
206# 0F81 = 0F71 0F80
207# 0F77 = <compat> 0FB2 0F81 = 0FB2 0F71 0F80 = 0F76 0F71
208# 0F79 = <compat> 0FB3 0F81 = 0FB3 0F71 0F80 = 0F78 0F71
209
210eval { require Unicode::Normalize };
211if (!$@) {
212    my $tibNFD = Unicode::Collate->new(
213	table => undef,
214	entry => $tibetanEntry,
215    );
216
217    # VOCALIC RR
218    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F81}"));
219    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\x{334}"));
220    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\0\x{334}"));
221    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{334}\x{F71}"));
222    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\x{334}"));
223    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\0\x{334}"));
224    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F71}\x{F80}"));
225    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{334}\x{F80}"));
226    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\x{334}"));
227    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\0\x{334}"));
228    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F80}\x{F71}"));
229    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{334}\x{F71}"));
230    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\x{334}"));
231    ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\0\x{334}"));
232# 58
233
234    # VOCALIC LL
235    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F81}"));
236    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\x{334}"));
237    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\0\x{334}"));
238    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{334}\x{F71}"));
239    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\x{334}"));
240    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\0\x{334}"));
241    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F71}\x{F80}"));
242    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{334}\x{F80}"));
243    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\x{334}"));
244    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\0\x{334}"));
245    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F80}\x{F71}"));
246    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{334}\x{F71}"));
247    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\x{334}"));
248    ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\0\x{334}"));
249# 72
250
251    my $discontNFD = Unicode::Collate->new(
252	table => undef,
253	entry => <<'ENTRIES',
2540000  ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
2550301  ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT
2560300  ; [.0000.0035.0002.0300] # COMBINING GRAVE ACCENT
2570327  ; [.0000.0055.0002.0327] # COMBINING CEDILLA
2580334  ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY
2590041  ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A
2600041 0327 0301 ; [.0102.0020.0008.0041]
2610041 0300 ; [.0103.0020.0008.0041]
262ENTRIES
263    );
264
265    ok($discontNFD->eq("A\x{327}\x{301}\0\x{334}", "A\x{334}\x{327}\x{301}"));
266    ok($discontNFD->eq("A\x{300}\0\x{327}",        "A\x{327}\x{300}"));
267} else {
268    ok(1) for 1..30;
269}
270# 74
271