xref: /openbsd-src/gnu/usr.bin/perl/cpan/Unicode-Collate/t/illegal.t (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1
2BEGIN {
3    unless ('A' eq pack('U', 0x41)) {
4	print "1..0 # Unicode::Collate cannot pack a Unicode code point\n";
5	exit 0;
6    }
7    unless (0x41 == unpack('U', 'A')) {
8	print "1..0 # Unicode::Collate cannot get a Unicode code point\n";
9	exit 0;
10    }
11    if ($ENV{PERL_CORE}) {
12	chdir('t') if -d 't';
13	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
14    }
15}
16
17
18BEGIN {
19    use Unicode::Collate;
20
21    unless (exists &Unicode::Collate::bootstrap or 5.008 <= $]) {
22	print "1..0 # skipped: XSUB, or Perl 5.8.0 or later".
23		" needed for this test\n";
24	print $@;
25	exit;
26    }
27}
28
29use strict;
30use warnings;
31BEGIN { $| = 1; print "1..136\n"; } # 81 + 5 x @Versions
32my $count = 0;
33sub ok ($;$) {
34    my $p = my $r = shift;
35    if (@_) {
36	my $x = shift;
37	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
38    }
39    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
40}
41
42ok(1);
43
44#########################
45
46no warnings 'utf8';
47
48# NULL is tailorable but illegal code points are not.
49# illegal code points should be always ingored
50# (cf. UCA, 7.1.1 Illegal code points).
51
52my $entry = <<'ENTRIES';
530000  ; [.0020.0000.0000.0000] # [0000] NULL
540001  ; [.0021.0000.0000.0001] # [0001] START OF HEADING
55FFFE  ; [.0022.0000.0000.FFFE] # <noncharacter-FFFE> (invalid)
56FFFF  ; [.0023.0000.0000.FFFF] # <noncharacter-FFFF> (invalid)
57D800  ; [.0024.0000.0000.D800] # <surrogate-D800> (invalid)
58DFFF  ; [.0025.0000.0000.DFFF] # <surrogate-DFFF> (invalid)
59FDD0  ; [.0026.0000.0000.FDD0] # <noncharacter-FDD0> (invalid)
60FDEF  ; [.0027.0000.0000.FDEF] # <noncharacter-FDEF> (invalid)
610002  ; [.0030.0000.0000.0002] # [0002] START OF TEXT
6210FFFF; [.0040.0000.0000.10FFFF] # <noncharacter-10FFFF> (invalid)
63110000; [.0041.0000.0000.110000] # <out-of-range 110000> (invalid)
640041  ; [.1000.0020.0008.0041] # latin A
650041 0000 ; [.1100.0020.0008.0041] # latin A + NULL
660041 FFFF ; [.1200.0020.0008.0041] # latin A + FFFF (invalid)
67ENTRIES
68
69##################
70
71my $illeg = Unicode::Collate->new(
72  entry => $entry,
73  level => 1,
74  table => undef,
75  normalization => undef,
76  UCA_Version => 20,
77);
78
79# 2..12
80ok($illeg->lt("", "\x00"));
81ok($illeg->lt("", "\x01"));
82ok($illeg->eq("", "\x{FFFE}"));
83ok($illeg->eq("", "\x{FFFF}"));
84ok($illeg->eq("", "\x{D800}"));
85ok($illeg->eq("", "\x{DFFF}"));
86ok($illeg->eq("", "\x{FDD0}"));
87ok($illeg->eq("", "\x{FDEF}"));
88ok($illeg->lt("", "\x02"));
89ok($illeg->eq("", "\x{10FFFF}"));
90ok($illeg->eq("", "\x{110000}"));
91
92# 13..22
93ok($illeg->lt("\x00", "\x01"));
94ok($illeg->lt("\x01", "\x02"));
95ok($illeg->ne("\0", "\x{D800}"));
96ok($illeg->ne("\0", "\x{DFFF}"));
97ok($illeg->ne("\0", "\x{FDD0}"));
98ok($illeg->ne("\0", "\x{FDEF}"));
99ok($illeg->ne("\0", "\x{FFFE}"));
100ok($illeg->ne("\0", "\x{FFFF}"));
101ok($illeg->ne("\0", "\x{10FFFF}"));
102ok($illeg->ne("\0", "\x{110000}"));
103
104# 23..26
105ok($illeg->eq("A",   "A\x{FFFF}"));
106ok($illeg->gt("A\0", "A\x{FFFF}"));
107ok($illeg->lt("A",  "A\0"));
108ok($illeg->lt("AA", "A\0"));
109
110##################
111
112my $nonch = Unicode::Collate->new(
113  entry => $entry,
114  level => 1,
115  table => undef,
116  normalization => undef,
117  UCA_Version => 22,
118);
119
120# 27..37
121ok($nonch->lt("", "\x00"));
122ok($nonch->lt("", "\x01"));
123ok($nonch->lt("", "\x{FFFE}"));
124ok($nonch->lt("", "\x{FFFF}"));
125ok($nonch->lt("", "\x{D800}"));
126ok($nonch->lt("", "\x{DFFF}"));
127ok($nonch->lt("", "\x{FDD0}"));
128ok($nonch->lt("", "\x{FDEF}"));
129ok($nonch->lt("", "\x02"));
130ok($nonch->lt("", "\x{10FFFF}"));
131ok($nonch->lt("", "\x{110000}"));
132
133# 38..47
134ok($nonch->lt("\x00",     "\x01"));
135ok($nonch->lt("\x01",     "\x{FFFE}"));
136ok($nonch->lt("\x{FFFE}", "\x{FFFF}"));
137ok($nonch->lt("\x{FFFF}", "\x{D800}"));
138ok($nonch->lt("\x{D800}", "\x{DFFF}"));
139ok($nonch->lt("\x{DFFF}", "\x{FDD0}"));
140ok($nonch->lt("\x{FDD0}", "\x{FDEF}"));
141ok($nonch->lt("\x{FDEF}", "\x02"));
142ok($nonch->lt("\x02",     "\x{10FFFF}"));
143ok($nonch->lt("\x{10FFFF}", "\x{110000}"));
144
145# 48..51
146ok($nonch->lt("A",   "A\x{FFFF}"));
147ok($nonch->lt("A\0", "A\x{FFFF}"));
148ok($nonch->lt("A",  "A\0"));
149ok($nonch->lt("AA", "A\0"));
150
151##################
152
153my $Collator = Unicode::Collate->new(
154  table => 'keys.txt',
155  level => 1,
156  normalization => undef,
157  UCA_Version => 8,
158);
159
160my @ret = (
161    "Pe\x{300}\x{301}",
162    "Pe\x{300}\0\0\x{301}",
163    "Pe\x{DA00}\x{301}\x{DFFF}",
164    "Pe\x{FFFF}\x{301}",
165    "Pe\x{110000}\x{301}",
166    "Pe\x{300}\x{d801}\x{301}",
167    "Pe\x{300}\x{ffff}\x{301}",
168    "Pe\x{300}\x{110000}\x{301}",
169    "Pe\x{D9ab}\x{DFFF}",
170    "Pe\x{FFFF}",
171    "Pe\x{110000}",
172    "Pe\x{300}\x{D800}\x{DFFF}",
173    "Pe\x{300}\x{FFFF}",
174    "Pe\x{300}\x{110000}",
175);
176
177# 52..65
178for my $ret (@ret) {
179    my $str = $ret."rl";
180    my($match) = $Collator->match($str, "pe");
181    ok($match eq $ret);
182}
183
184##################
185
186my $out = Unicode::Collate->new(
187    level => 1,
188    table => undef,
189    normalization => undef,
190    overrideOut => sub { 0xFFFD },
191);
192
193my @Versions = (8, 9, 11, 14, 16, 18, 20, 22, 24, 26, 28);
194
195for my $v (@Versions) {
196    $out->change(UCA_Version => $v);
197    ok($out->cmp('',           "\x{10FFFF}") == ($v >= 22 ? -1 : 0));
198    ok($out->cmp('',           "\x{110000}") == ($v >= 22 ? -1 : 0));
199    ok($out->cmp('ABC',        "\x{110000}") == ($v >= 22 ? -1 : 1));
200    ok($out->cmp("\x{10FFFD}", "\x{110000}") == ($v >= 22 ? -1 : 1));
201    ok($out->cmp("\x{11FFFD}", "\x{110000}") == ($v >= 22 ?  0 : 0));
202}
203
204# x+66..x+77
205ok($out->lt('ABC',      "\x{123456}"));
206ok($out->lt("\x{FFFD}", "\x{123456}"));
207
208$out->change(overrideOut => sub {()});
209
210ok($out->eq('',         "\x{123456}"));
211ok($out->gt('ABC',      "\x{123456}"));
212ok($out->gt("\x{FFFD}", "\x{123456}"));
213
214$out->change(overrideOut => undef);
215ok($out->lt('',         "\x{123456}"));
216ok($out->eq("\x{FFFD}", "\x{123456}"));
217
218$out->change(overrideOut => sub { 0xFFFD });
219
220ok($out->lt('',         "\x{123456}"));
221ok($out->lt('ABC',      "\x{123456}"));
222ok($out->lt("\x{FFFD}", "\x{123456}"));
223
224$out->change(overrideOut => 0);
225ok($out->lt('',         "\x{123456}"));
226ok($out->eq("\x{FFFD}", "\x{123456}"));
227
228$out->change(overrideOut => sub { undef });
229ok($out->lt('',         "\x{123456}"));
230ok($out->eq("\x{FFFD}", "\x{123456}"));
231ok($out->eq("\x{FFFD}", "\x{21FFFFF}"));
232ok($out->eq("\x{FFFD}", "\x{2200000}"));
233
234