1
2BEGIN {
3    unless ("A" eq pack('U', 0x41)) {
4	print "1..0 # Unicode::Collate " .
5	    "cannot stringify a Unicode code point\n";
6	exit 0;
7    }
8}
9
10BEGIN {
11    if ($ENV{PERL_CORE}) {
12        chdir('t') if -d 't';
13        @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
14    }
15}
16
17use Test;
18use strict;
19use warnings;
20
21BEGIN {
22    use Unicode::Collate;
23
24    unless (exists &Unicode::Collate::bootstrap or 5.008 <= $]) {
25	print "1..0 # skipped: XSUB, or Perl 5.8.0 or later".
26		" needed for this test\n";
27	print $@;
28	exit;
29    }
30}
31
32BEGIN { plan tests => 40 };
33
34ok(1);
35
36#########################
37
38no warnings 'utf8';
39
40# NULL is tailorable but illegal code points are not.
41# illegal code points should be always ingored
42# (cf. UCA, 7.1.1 Illegal code points).
43
44my $illeg = Unicode::Collate->new(
45  entry => <<'ENTRIES',
460000  ; [.0020.0000.0000.0000] # [0000] NULL
470001  ; [.0021.0000.0000.0001] # [0001] START OF HEADING
48FFFE  ; [.0022.0000.0000.FFFE] # <noncharacter-FFFE> (invalid)
49FFFF  ; [.0023.0000.0000.FFFF] # <noncharacter-FFFF> (invalid)
50D800  ; [.0024.0000.0000.D800] # <surrogate-D800> (invalid)
51DFFF  ; [.0025.0000.0000.DFFF] # <surrogate-DFFF> (invalid)
52FDD0  ; [.0026.0000.0000.FDD0] # <noncharacter-FDD0> (invalid)
53FDEF  ; [.0027.0000.0000.FDEF] # <noncharacter-FDEF> (invalid)
540002  ; [.0030.0000.0000.0002] # [0002] START OF TEXT
5510FFFF; [.0040.0000.0000.10FFFF] # <noncharacter-10FFFF> (invalid)
56110000; [.0041.0000.0000.110000] # <out-of-range 110000> (invalid)
570041  ; [.1000.0020.0008.0041] # latin A
580041 0000 ; [.1100.0020.0008.0041] # latin A + NULL
590041 FFFF ; [.1200.0020.0008.0041] # latin A + FFFF (invalid)
60ENTRIES
61  level => 1,
62  table => undef,
63  normalization => undef,
64);
65
66# 2..12
67ok($illeg->lt("", "\x00"));
68ok($illeg->lt("", "\x01"));
69ok($illeg->eq("", "\x{FFFE}"));
70ok($illeg->eq("", "\x{FFFF}"));
71ok($illeg->eq("", "\x{D800}"));
72ok($illeg->eq("", "\x{DFFF}"));
73ok($illeg->eq("", "\x{FDD0}"));
74ok($illeg->eq("", "\x{FDEF}"));
75ok($illeg->lt("", "\x02"));
76ok($illeg->eq("", "\x{10FFFF}"));
77ok($illeg->eq("", "\x{110000}"));
78
79# 13..22
80ok($illeg->lt("\x00", "\x01"));
81ok($illeg->lt("\x01", "\x02"));
82ok($illeg->ne("\0", "\x{D800}"));
83ok($illeg->ne("\0", "\x{DFFF}"));
84ok($illeg->ne("\0", "\x{FDD0}"));
85ok($illeg->ne("\0", "\x{FDEF}"));
86ok($illeg->ne("\0", "\x{FFFE}"));
87ok($illeg->ne("\0", "\x{FFFF}"));
88ok($illeg->ne("\0", "\x{10FFFF}"));
89ok($illeg->ne("\0", "\x{110000}"));
90
91# 23..26
92ok($illeg->eq("A",   "A\x{FFFF}"));
93ok($illeg->gt("A\0", "A\x{FFFF}"));
94ok($illeg->lt("A",  "A\0"));
95ok($illeg->lt("AA", "A\0"));
96
97##################
98
99my($match, $str, $sub, $ret);
100
101my $Collator = Unicode::Collate->new(
102  table => 'keys.txt',
103  level => 1,
104  normalization => undef,
105);
106
107$sub = "pe";
108
109
110$str = "Pe\x{300}\x{301}rl";
111$ret = "Pe\x{300}\x{301}";
112($match) = $Collator->match($str, $sub);
113ok($match, $ret);
114
115$str = "Pe\x{300}\0\0\x{301}rl";
116$ret = "Pe\x{300}\0\0\x{301}";
117($match) = $Collator->match($str, $sub);
118ok($match, $ret);
119
120$str = "Pe\x{DA00}\x{301}\x{DFFF}rl";
121$ret = "Pe\x{DA00}\x{301}\x{DFFF}";
122($match) = $Collator->match($str, $sub);
123ok($match, $ret);
124
125$str = "Pe\x{FFFF}\x{301}rl";
126$ret = "Pe\x{FFFF}\x{301}";
127($match) = $Collator->match($str, $sub);
128ok($match, $ret);
129
130$str = "Pe\x{110000}\x{301}rl";
131$ret = "Pe\x{110000}\x{301}";
132($match) = $Collator->match($str, $sub);
133ok($match, $ret);
134
135$str = "Pe\x{300}\x{d801}\x{301}rl";
136$ret = "Pe\x{300}\x{d801}\x{301}";
137($match) = $Collator->match($str, $sub);
138ok($match, $ret);
139
140$str = "Pe\x{300}\x{ffff}\x{301}rl";
141$ret = "Pe\x{300}\x{ffff}\x{301}";
142($match) = $Collator->match($str, $sub);
143ok($match, $ret);
144
145$str = "Pe\x{300}\x{110000}\x{301}rl";
146$ret = "Pe\x{300}\x{110000}\x{301}";
147($match) = $Collator->match($str, $sub);
148ok($match, $ret);
149
150$str = "Pe\x{D9ab}\x{DFFF}rl";
151$ret = "Pe\x{D9ab}\x{DFFF}";
152($match) = $Collator->match($str, $sub);
153ok($match, $ret);
154
155$str = "Pe\x{FFFF}rl";
156$ret = "Pe\x{FFFF}";
157($match) = $Collator->match($str, $sub);
158ok($match, $ret);
159
160$str = "Pe\x{110000}rl";
161$ret = "Pe\x{110000}";
162($match) = $Collator->match($str, $sub);
163ok($match, $ret);
164
165$str = "Pe\x{300}\x{D800}\x{DFFF}rl";
166$ret = "Pe\x{300}\x{D800}\x{DFFF}";
167($match) = $Collator->match($str, $sub);
168ok($match, $ret);
169
170$str = "Pe\x{300}\x{FFFF}rl";
171$ret = "Pe\x{300}\x{FFFF}";
172($match) = $Collator->match($str, $sub);
173ok($match, $ret);
174
175$str = "Pe\x{300}\x{110000}rl";
176$ret = "Pe\x{300}\x{110000}";
177($match) = $Collator->match($str, $sub);
178ok($match, $ret);
179
180
181