xref: /openbsd-src/gnu/usr.bin/perl/cpan/Unicode-Collate/t/illegal.t (revision 91f110e064cd7c194e59e019b83bb7496c1c84d4)
1
2BEGIN {
3    unless ("A" eq pack('U', 0x41)) {
4	print "1..0 # Unicode::Collate " .
5	    "cannot stringify a Unicode code point\n";
6	exit 0;
7    }
8    if ($ENV{PERL_CORE}) {
9	chdir('t') if -d 't';
10	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
11    }
12}
13
14
15BEGIN {
16    use Unicode::Collate;
17
18    unless (exists &Unicode::Collate::bootstrap or 5.008 <= $]) {
19	print "1..0 # skipped: XSUB, or Perl 5.8.0 or later".
20		" needed for this test\n";
21	print $@;
22	exit;
23    }
24}
25
26use strict;
27use warnings;
28BEGIN { $| = 1; print "1..65\n"; }
29my $count = 0;
30sub ok ($;$) {
31    my $p = my $r = shift;
32    if (@_) {
33	my $x = shift;
34	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
35    }
36    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
37}
38
39ok(1);
40
41#########################
42
43no warnings 'utf8';
44
45# NULL is tailorable but illegal code points are not.
46# illegal code points should be always ingored
47# (cf. UCA, 7.1.1 Illegal code points).
48
49my $entry = <<'ENTRIES';
500000  ; [.0020.0000.0000.0000] # [0000] NULL
510001  ; [.0021.0000.0000.0001] # [0001] START OF HEADING
52FFFE  ; [.0022.0000.0000.FFFE] # <noncharacter-FFFE> (invalid)
53FFFF  ; [.0023.0000.0000.FFFF] # <noncharacter-FFFF> (invalid)
54D800  ; [.0024.0000.0000.D800] # <surrogate-D800> (invalid)
55DFFF  ; [.0025.0000.0000.DFFF] # <surrogate-DFFF> (invalid)
56FDD0  ; [.0026.0000.0000.FDD0] # <noncharacter-FDD0> (invalid)
57FDEF  ; [.0027.0000.0000.FDEF] # <noncharacter-FDEF> (invalid)
580002  ; [.0030.0000.0000.0002] # [0002] START OF TEXT
5910FFFF; [.0040.0000.0000.10FFFF] # <noncharacter-10FFFF> (invalid)
60110000; [.0041.0000.0000.110000] # <out-of-range 110000> (invalid)
610041  ; [.1000.0020.0008.0041] # latin A
620041 0000 ; [.1100.0020.0008.0041] # latin A + NULL
630041 FFFF ; [.1200.0020.0008.0041] # latin A + FFFF (invalid)
64ENTRIES
65
66##################
67
68my $illeg = Unicode::Collate->new(
69  entry => $entry,
70  level => 1,
71  table => undef,
72  normalization => undef,
73  UCA_Version => 20,
74);
75
76# 2..12
77ok($illeg->lt("", "\x00"));
78ok($illeg->lt("", "\x01"));
79ok($illeg->eq("", "\x{FFFE}"));
80ok($illeg->eq("", "\x{FFFF}"));
81ok($illeg->eq("", "\x{D800}"));
82ok($illeg->eq("", "\x{DFFF}"));
83ok($illeg->eq("", "\x{FDD0}"));
84ok($illeg->eq("", "\x{FDEF}"));
85ok($illeg->lt("", "\x02"));
86ok($illeg->eq("", "\x{10FFFF}"));
87ok($illeg->eq("", "\x{110000}"));
88
89# 13..22
90ok($illeg->lt("\x00", "\x01"));
91ok($illeg->lt("\x01", "\x02"));
92ok($illeg->ne("\0", "\x{D800}"));
93ok($illeg->ne("\0", "\x{DFFF}"));
94ok($illeg->ne("\0", "\x{FDD0}"));
95ok($illeg->ne("\0", "\x{FDEF}"));
96ok($illeg->ne("\0", "\x{FFFE}"));
97ok($illeg->ne("\0", "\x{FFFF}"));
98ok($illeg->ne("\0", "\x{10FFFF}"));
99ok($illeg->ne("\0", "\x{110000}"));
100
101# 23..26
102ok($illeg->eq("A",   "A\x{FFFF}"));
103ok($illeg->gt("A\0", "A\x{FFFF}"));
104ok($illeg->lt("A",  "A\0"));
105ok($illeg->lt("AA", "A\0"));
106
107##################
108
109my $nonch = Unicode::Collate->new(
110  entry => $entry,
111  level => 1,
112  table => undef,
113  normalization => undef,
114  UCA_Version => 22,
115);
116
117# 27..37
118ok($nonch->lt("", "\x00"));
119ok($nonch->lt("", "\x01"));
120ok($nonch->lt("", "\x{FFFE}"));
121ok($nonch->lt("", "\x{FFFF}"));
122ok($nonch->lt("", "\x{D800}"));
123ok($nonch->lt("", "\x{DFFF}"));
124ok($nonch->lt("", "\x{FDD0}"));
125ok($nonch->lt("", "\x{FDEF}"));
126ok($nonch->lt("", "\x02"));
127ok($nonch->lt("", "\x{10FFFF}"));
128ok($nonch->eq("", "\x{110000}"));
129
130# 38..47
131ok($nonch->lt("\x00",     "\x01"));
132ok($nonch->lt("\x01",     "\x{FFFE}"));
133ok($nonch->lt("\x{FFFE}", "\x{FFFF}"));
134ok($nonch->lt("\x{FFFF}", "\x{D800}"));
135ok($nonch->lt("\x{D800}", "\x{DFFF}"));
136ok($nonch->lt("\x{DFFF}", "\x{FDD0}"));
137ok($nonch->lt("\x{FDD0}", "\x{FDEF}"));
138ok($nonch->lt("\x{FDEF}", "\x02"));
139ok($nonch->lt("\x02",     "\x{10FFFF}"));
140ok($nonch->gt("\x{10FFFF}", "\x{110000}"));
141
142# 48..51
143ok($nonch->lt("A",   "A\x{FFFF}"));
144ok($nonch->lt("A\0", "A\x{FFFF}"));
145ok($nonch->lt("A",  "A\0"));
146ok($nonch->lt("AA", "A\0"));
147
148##################
149
150my $Collator = Unicode::Collate->new(
151  table => 'keys.txt',
152  level => 1,
153  normalization => undef,
154  UCA_Version => 8,
155);
156
157my @ret = (
158    "Pe\x{300}\x{301}",
159    "Pe\x{300}\0\0\x{301}",
160    "Pe\x{DA00}\x{301}\x{DFFF}",
161    "Pe\x{FFFF}\x{301}",
162    "Pe\x{110000}\x{301}",
163    "Pe\x{300}\x{d801}\x{301}",
164    "Pe\x{300}\x{ffff}\x{301}",
165    "Pe\x{300}\x{110000}\x{301}",
166    "Pe\x{D9ab}\x{DFFF}",
167    "Pe\x{FFFF}",
168    "Pe\x{110000}",
169    "Pe\x{300}\x{D800}\x{DFFF}",
170    "Pe\x{300}\x{FFFF}",
171    "Pe\x{300}\x{110000}",
172);
173
174# 52..65
175for my $ret (@ret) {
176    my $str = $ret."rl";
177    my($match) = $Collator->match($str, "pe");
178    ok($match eq $ret);
179}
180
181