xref: /openbsd-src/gnu/usr.bin/perl/cpan/Unicode-Collate/t/illegal.t (revision eac174f2741a08d8deb8aae59a7f778ef9b5d770)
1b39c5158Smillert
2b39c5158SmillertBEGIN {
3*eac174f2Safresh1    unless (5.008 <= $]) {
4*eac174f2Safresh1	print "1..0 # skipped: Perl 5.8.0 or later needed for this test\n";
5*eac174f2Safresh1	print $@;
6*eac174f2Safresh1	exit;
7b39c5158Smillert    }
8b39c5158Smillert    if ($ENV{PERL_CORE}) {
9b39c5158Smillert	chdir('t') if -d 't';
10b39c5158Smillert	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
11b39c5158Smillert    }
12b39c5158Smillert}
13b39c5158Smillert
14898184e3Ssthenuse strict;
15898184e3Ssthenuse warnings;
16*eac174f2Safresh1BEGIN { $| = 1; print "1..176\n"; } # 81 + 5 x @Versions
17898184e3Ssthenmy $count = 0;
18898184e3Ssthensub ok ($;$) {
19898184e3Ssthen    my $p = my $r = shift;
20898184e3Ssthen    if (@_) {
21898184e3Ssthen	my $x = shift;
22898184e3Ssthen	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
23898184e3Ssthen    }
24898184e3Ssthen    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
25898184e3Ssthen}
26b39c5158Smillert
27*eac174f2Safresh1use Unicode::Collate;
28*eac174f2Safresh1
29b39c5158Smillertok(1);
30b39c5158Smillert
31*eac174f2Safresh1sub _pack_U   { Unicode::Collate::pack_U(@_) }
32*eac174f2Safresh1sub _unpack_U { Unicode::Collate::unpack_U(@_) }
33*eac174f2Safresh1
34b39c5158Smillert#########################
35b39c5158Smillert
36b39c5158Smillertno warnings 'utf8';
37b39c5158Smillert
38b39c5158Smillert# NULL is tailorable but illegal code points are not.
39b39c5158Smillert# illegal code points should be always ingored
40b39c5158Smillert# (cf. UCA, 7.1.1 Illegal code points).
41b39c5158Smillert
42898184e3Ssthenmy $entry = <<'ENTRIES';
43b39c5158Smillert0000  ; [.0020.0000.0000.0000] # [0000] NULL
44b39c5158Smillert0001  ; [.0021.0000.0000.0001] # [0001] START OF HEADING
45b39c5158SmillertFFFE  ; [.0022.0000.0000.FFFE] # <noncharacter-FFFE> (invalid)
46b39c5158SmillertFFFF  ; [.0023.0000.0000.FFFF] # <noncharacter-FFFF> (invalid)
47b39c5158SmillertD800  ; [.0024.0000.0000.D800] # <surrogate-D800> (invalid)
48b39c5158SmillertDFFF  ; [.0025.0000.0000.DFFF] # <surrogate-DFFF> (invalid)
49b39c5158SmillertFDD0  ; [.0026.0000.0000.FDD0] # <noncharacter-FDD0> (invalid)
50b39c5158SmillertFDEF  ; [.0027.0000.0000.FDEF] # <noncharacter-FDEF> (invalid)
51b39c5158Smillert0002  ; [.0030.0000.0000.0002] # [0002] START OF TEXT
52b39c5158Smillert10FFFF; [.0040.0000.0000.10FFFF] # <noncharacter-10FFFF> (invalid)
53b39c5158Smillert110000; [.0041.0000.0000.110000] # <out-of-range 110000> (invalid)
54b39c5158Smillert0041  ; [.1000.0020.0008.0041] # latin A
55b39c5158Smillert0041 0000 ; [.1100.0020.0008.0041] # latin A + NULL
56b39c5158Smillert0041 FFFF ; [.1200.0020.0008.0041] # latin A + FFFF (invalid)
57b39c5158SmillertENTRIES
58898184e3Ssthen
59898184e3Ssthen##################
60898184e3Ssthen
61898184e3Ssthenmy $illeg = Unicode::Collate->new(
62898184e3Ssthen  entry => $entry,
63b39c5158Smillert  level => 1,
64b39c5158Smillert  table => undef,
65b39c5158Smillert  normalization => undef,
66898184e3Ssthen  UCA_Version => 20,
67b39c5158Smillert);
68b39c5158Smillert
69b39c5158Smillert# 2..12
70b39c5158Smillertok($illeg->lt("", "\x00"));
71b39c5158Smillertok($illeg->lt("", "\x01"));
72b39c5158Smillertok($illeg->eq("", "\x{FFFE}"));
73b39c5158Smillertok($illeg->eq("", "\x{FFFF}"));
74b39c5158Smillertok($illeg->eq("", "\x{D800}"));
75b39c5158Smillertok($illeg->eq("", "\x{DFFF}"));
76b39c5158Smillertok($illeg->eq("", "\x{FDD0}"));
77b39c5158Smillertok($illeg->eq("", "\x{FDEF}"));
78b39c5158Smillertok($illeg->lt("", "\x02"));
79b39c5158Smillertok($illeg->eq("", "\x{10FFFF}"));
80b39c5158Smillertok($illeg->eq("", "\x{110000}"));
81b39c5158Smillert
82b39c5158Smillert# 13..22
83b39c5158Smillertok($illeg->lt("\x00", "\x01"));
84b39c5158Smillertok($illeg->lt("\x01", "\x02"));
85b39c5158Smillertok($illeg->ne("\0", "\x{D800}"));
86b39c5158Smillertok($illeg->ne("\0", "\x{DFFF}"));
87b39c5158Smillertok($illeg->ne("\0", "\x{FDD0}"));
88b39c5158Smillertok($illeg->ne("\0", "\x{FDEF}"));
89b39c5158Smillertok($illeg->ne("\0", "\x{FFFE}"));
90b39c5158Smillertok($illeg->ne("\0", "\x{FFFF}"));
91b39c5158Smillertok($illeg->ne("\0", "\x{10FFFF}"));
92b39c5158Smillertok($illeg->ne("\0", "\x{110000}"));
93b39c5158Smillert
94b39c5158Smillert# 23..26
95b39c5158Smillertok($illeg->eq("A",   "A\x{FFFF}"));
96b39c5158Smillertok($illeg->gt("A\0", "A\x{FFFF}"));
97b39c5158Smillertok($illeg->lt("A",  "A\0"));
98b39c5158Smillertok($illeg->lt("AA", "A\0"));
99b39c5158Smillert
100b39c5158Smillert##################
101b39c5158Smillert
102898184e3Ssthenmy $nonch = Unicode::Collate->new(
103898184e3Ssthen  entry => $entry,
104898184e3Ssthen  level => 1,
105898184e3Ssthen  table => undef,
106898184e3Ssthen  normalization => undef,
107898184e3Ssthen  UCA_Version => 22,
108898184e3Ssthen);
109898184e3Ssthen
110898184e3Ssthen# 27..37
111898184e3Ssthenok($nonch->lt("", "\x00"));
112898184e3Ssthenok($nonch->lt("", "\x01"));
113898184e3Ssthenok($nonch->lt("", "\x{FFFE}"));
114898184e3Ssthenok($nonch->lt("", "\x{FFFF}"));
115898184e3Ssthenok($nonch->lt("", "\x{D800}"));
116898184e3Ssthenok($nonch->lt("", "\x{DFFF}"));
117898184e3Ssthenok($nonch->lt("", "\x{FDD0}"));
118898184e3Ssthenok($nonch->lt("", "\x{FDEF}"));
119898184e3Ssthenok($nonch->lt("", "\x02"));
120898184e3Ssthenok($nonch->lt("", "\x{10FFFF}"));
1216fb12b70Safresh1ok($nonch->lt("", "\x{110000}"));
122898184e3Ssthen
123898184e3Ssthen# 38..47
124898184e3Ssthenok($nonch->lt("\x00",     "\x01"));
125898184e3Ssthenok($nonch->lt("\x01",     "\x{FFFE}"));
126898184e3Ssthenok($nonch->lt("\x{FFFE}", "\x{FFFF}"));
127898184e3Ssthenok($nonch->lt("\x{FFFF}", "\x{D800}"));
128898184e3Ssthenok($nonch->lt("\x{D800}", "\x{DFFF}"));
129898184e3Ssthenok($nonch->lt("\x{DFFF}", "\x{FDD0}"));
130898184e3Ssthenok($nonch->lt("\x{FDD0}", "\x{FDEF}"));
131898184e3Ssthenok($nonch->lt("\x{FDEF}", "\x02"));
132898184e3Ssthenok($nonch->lt("\x02",     "\x{10FFFF}"));
1336fb12b70Safresh1ok($nonch->lt("\x{10FFFF}", "\x{110000}"));
134898184e3Ssthen
135898184e3Ssthen# 48..51
136898184e3Ssthenok($nonch->lt("A",   "A\x{FFFF}"));
137898184e3Ssthenok($nonch->lt("A\0", "A\x{FFFF}"));
138898184e3Ssthenok($nonch->lt("A",  "A\0"));
139898184e3Ssthenok($nonch->lt("AA", "A\0"));
140898184e3Ssthen
141898184e3Ssthen##################
142b39c5158Smillert
143b39c5158Smillertmy $Collator = Unicode::Collate->new(
144b39c5158Smillert  table => 'keys.txt',
145b39c5158Smillert  level => 1,
146b39c5158Smillert  normalization => undef,
147898184e3Ssthen  UCA_Version => 8,
148b39c5158Smillert);
149b39c5158Smillert
150898184e3Ssthenmy @ret = (
151898184e3Ssthen    "Pe\x{300}\x{301}",
152898184e3Ssthen    "Pe\x{300}\0\0\x{301}",
153898184e3Ssthen    "Pe\x{DA00}\x{301}\x{DFFF}",
154898184e3Ssthen    "Pe\x{FFFF}\x{301}",
155898184e3Ssthen    "Pe\x{110000}\x{301}",
156898184e3Ssthen    "Pe\x{300}\x{d801}\x{301}",
157898184e3Ssthen    "Pe\x{300}\x{ffff}\x{301}",
158898184e3Ssthen    "Pe\x{300}\x{110000}\x{301}",
159898184e3Ssthen    "Pe\x{D9ab}\x{DFFF}",
160898184e3Ssthen    "Pe\x{FFFF}",
161898184e3Ssthen    "Pe\x{110000}",
162898184e3Ssthen    "Pe\x{300}\x{D800}\x{DFFF}",
163898184e3Ssthen    "Pe\x{300}\x{FFFF}",
164898184e3Ssthen    "Pe\x{300}\x{110000}",
165898184e3Ssthen);
166b39c5158Smillert
167898184e3Ssthen# 52..65
168898184e3Ssthenfor my $ret (@ret) {
169898184e3Ssthen    my $str = $ret."rl";
170898184e3Ssthen    my($match) = $Collator->match($str, "pe");
171898184e3Ssthen    ok($match eq $ret);
172898184e3Ssthen}
173b39c5158Smillert
1746fb12b70Safresh1##################
1756fb12b70Safresh1
1766fb12b70Safresh1my $out = Unicode::Collate->new(
1776fb12b70Safresh1    level => 1,
1786fb12b70Safresh1    table => undef,
1796fb12b70Safresh1    normalization => undef,
1806fb12b70Safresh1    overrideOut => sub { 0xFFFD },
1816fb12b70Safresh1);
1826fb12b70Safresh1
183*eac174f2Safresh1my @Versions = ( 8,  9, 11, 14, 16, 18, 20, 22, 24, 26,
184*eac174f2Safresh1		28, 30, 32, 34, 36, 38, 40, 41, 43);
1856fb12b70Safresh1
1866fb12b70Safresh1for my $v (@Versions) {
1876fb12b70Safresh1    $out->change(UCA_Version => $v);
1886fb12b70Safresh1    ok($out->cmp('',           "\x{10FFFF}") == ($v >= 22 ? -1 : 0));
1896fb12b70Safresh1    ok($out->cmp('',           "\x{110000}") == ($v >= 22 ? -1 : 0));
1906fb12b70Safresh1    ok($out->cmp('ABC',        "\x{110000}") == ($v >= 22 ? -1 : 1));
1916fb12b70Safresh1    ok($out->cmp("\x{10FFFD}", "\x{110000}") == ($v >= 22 ? -1 : 1));
1926fb12b70Safresh1    ok($out->cmp("\x{11FFFD}", "\x{110000}") == ($v >= 22 ?  0 : 0));
1936fb12b70Safresh1}
1946fb12b70Safresh1
1956fb12b70Safresh1# x+66..x+77
1966fb12b70Safresh1ok($out->lt('ABC',      "\x{123456}"));
1976fb12b70Safresh1ok($out->lt("\x{FFFD}", "\x{123456}"));
1986fb12b70Safresh1
1996fb12b70Safresh1$out->change(overrideOut => sub {()});
2006fb12b70Safresh1
2016fb12b70Safresh1ok($out->eq('',         "\x{123456}"));
2026fb12b70Safresh1ok($out->gt('ABC',      "\x{123456}"));
2036fb12b70Safresh1ok($out->gt("\x{FFFD}", "\x{123456}"));
2046fb12b70Safresh1
2056fb12b70Safresh1$out->change(overrideOut => undef);
2066fb12b70Safresh1ok($out->lt('',         "\x{123456}"));
2076fb12b70Safresh1ok($out->eq("\x{FFFD}", "\x{123456}"));
2086fb12b70Safresh1
2096fb12b70Safresh1$out->change(overrideOut => sub { 0xFFFD });
2106fb12b70Safresh1
2116fb12b70Safresh1ok($out->lt('',         "\x{123456}"));
2126fb12b70Safresh1ok($out->lt('ABC',      "\x{123456}"));
2136fb12b70Safresh1ok($out->lt("\x{FFFD}", "\x{123456}"));
2146fb12b70Safresh1
2156fb12b70Safresh1$out->change(overrideOut => 0);
2166fb12b70Safresh1ok($out->lt('',         "\x{123456}"));
2176fb12b70Safresh1ok($out->eq("\x{FFFD}", "\x{123456}"));
2186fb12b70Safresh1
2196fb12b70Safresh1$out->change(overrideOut => sub { undef });
2206fb12b70Safresh1ok($out->lt('',         "\x{123456}"));
2216fb12b70Safresh1ok($out->eq("\x{FFFD}", "\x{123456}"));
2226fb12b70Safresh1ok($out->eq("\x{FFFD}", "\x{21FFFFF}"));
2236fb12b70Safresh1ok($out->eq("\x{FFFD}", "\x{2200000}"));
2246fb12b70Safresh1
225