1 2BEGIN { 3 unless ('A' eq pack('U', 0x41)) { 4 print "1..0 # Unicode::Collate cannot pack a Unicode code point\n"; 5 exit 0; 6 } 7 unless (0x41 == unpack('U', 'A')) { 8 print "1..0 # Unicode::Collate cannot get a Unicode code point\n"; 9 exit 0; 10 } 11 if ($ENV{PERL_CORE}) { 12 chdir('t') if -d 't'; 13 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 14 } 15} 16 17 18BEGIN { 19 use Unicode::Collate; 20 21 unless (exists &Unicode::Collate::bootstrap or 5.008 <= $]) { 22 print "1..0 # skipped: XSUB, or Perl 5.8.0 or later". 23 " needed for this test\n"; 24 print $@; 25 exit; 26 } 27} 28 29use strict; 30use warnings; 31BEGIN { $| = 1; print "1..136\n"; } # 81 + 5 x @Versions 32my $count = 0; 33sub ok ($;$) { 34 my $p = my $r = shift; 35 if (@_) { 36 my $x = shift; 37 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 38 } 39 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 40} 41 42ok(1); 43 44######################### 45 46no warnings 'utf8'; 47 48# NULL is tailorable but illegal code points are not. 49# illegal code points should be always ingored 50# (cf. UCA, 7.1.1 Illegal code points). 51 52my $entry = <<'ENTRIES'; 530000 ; [.0020.0000.0000.0000] # [0000] NULL 540001 ; [.0021.0000.0000.0001] # [0001] START OF HEADING 55FFFE ; [.0022.0000.0000.FFFE] # <noncharacter-FFFE> (invalid) 56FFFF ; [.0023.0000.0000.FFFF] # <noncharacter-FFFF> (invalid) 57D800 ; [.0024.0000.0000.D800] # <surrogate-D800> (invalid) 58DFFF ; [.0025.0000.0000.DFFF] # <surrogate-DFFF> (invalid) 59FDD0 ; [.0026.0000.0000.FDD0] # <noncharacter-FDD0> (invalid) 60FDEF ; [.0027.0000.0000.FDEF] # <noncharacter-FDEF> (invalid) 610002 ; [.0030.0000.0000.0002] # [0002] START OF TEXT 6210FFFF; [.0040.0000.0000.10FFFF] # <noncharacter-10FFFF> (invalid) 63110000; [.0041.0000.0000.110000] # <out-of-range 110000> (invalid) 640041 ; [.1000.0020.0008.0041] # latin A 650041 0000 ; [.1100.0020.0008.0041] # latin A + NULL 660041 FFFF ; [.1200.0020.0008.0041] # latin A + FFFF (invalid) 67ENTRIES 68 69################## 70 71my $illeg = Unicode::Collate->new( 72 entry => $entry, 73 level => 1, 74 table => undef, 75 normalization => undef, 76 UCA_Version => 20, 77); 78 79# 2..12 80ok($illeg->lt("", "\x00")); 81ok($illeg->lt("", "\x01")); 82ok($illeg->eq("", "\x{FFFE}")); 83ok($illeg->eq("", "\x{FFFF}")); 84ok($illeg->eq("", "\x{D800}")); 85ok($illeg->eq("", "\x{DFFF}")); 86ok($illeg->eq("", "\x{FDD0}")); 87ok($illeg->eq("", "\x{FDEF}")); 88ok($illeg->lt("", "\x02")); 89ok($illeg->eq("", "\x{10FFFF}")); 90ok($illeg->eq("", "\x{110000}")); 91 92# 13..22 93ok($illeg->lt("\x00", "\x01")); 94ok($illeg->lt("\x01", "\x02")); 95ok($illeg->ne("\0", "\x{D800}")); 96ok($illeg->ne("\0", "\x{DFFF}")); 97ok($illeg->ne("\0", "\x{FDD0}")); 98ok($illeg->ne("\0", "\x{FDEF}")); 99ok($illeg->ne("\0", "\x{FFFE}")); 100ok($illeg->ne("\0", "\x{FFFF}")); 101ok($illeg->ne("\0", "\x{10FFFF}")); 102ok($illeg->ne("\0", "\x{110000}")); 103 104# 23..26 105ok($illeg->eq("A", "A\x{FFFF}")); 106ok($illeg->gt("A\0", "A\x{FFFF}")); 107ok($illeg->lt("A", "A\0")); 108ok($illeg->lt("AA", "A\0")); 109 110################## 111 112my $nonch = Unicode::Collate->new( 113 entry => $entry, 114 level => 1, 115 table => undef, 116 normalization => undef, 117 UCA_Version => 22, 118); 119 120# 27..37 121ok($nonch->lt("", "\x00")); 122ok($nonch->lt("", "\x01")); 123ok($nonch->lt("", "\x{FFFE}")); 124ok($nonch->lt("", "\x{FFFF}")); 125ok($nonch->lt("", "\x{D800}")); 126ok($nonch->lt("", "\x{DFFF}")); 127ok($nonch->lt("", "\x{FDD0}")); 128ok($nonch->lt("", "\x{FDEF}")); 129ok($nonch->lt("", "\x02")); 130ok($nonch->lt("", "\x{10FFFF}")); 131ok($nonch->lt("", "\x{110000}")); 132 133# 38..47 134ok($nonch->lt("\x00", "\x01")); 135ok($nonch->lt("\x01", "\x{FFFE}")); 136ok($nonch->lt("\x{FFFE}", "\x{FFFF}")); 137ok($nonch->lt("\x{FFFF}", "\x{D800}")); 138ok($nonch->lt("\x{D800}", "\x{DFFF}")); 139ok($nonch->lt("\x{DFFF}", "\x{FDD0}")); 140ok($nonch->lt("\x{FDD0}", "\x{FDEF}")); 141ok($nonch->lt("\x{FDEF}", "\x02")); 142ok($nonch->lt("\x02", "\x{10FFFF}")); 143ok($nonch->lt("\x{10FFFF}", "\x{110000}")); 144 145# 48..51 146ok($nonch->lt("A", "A\x{FFFF}")); 147ok($nonch->lt("A\0", "A\x{FFFF}")); 148ok($nonch->lt("A", "A\0")); 149ok($nonch->lt("AA", "A\0")); 150 151################## 152 153my $Collator = Unicode::Collate->new( 154 table => 'keys.txt', 155 level => 1, 156 normalization => undef, 157 UCA_Version => 8, 158); 159 160my @ret = ( 161 "Pe\x{300}\x{301}", 162 "Pe\x{300}\0\0\x{301}", 163 "Pe\x{DA00}\x{301}\x{DFFF}", 164 "Pe\x{FFFF}\x{301}", 165 "Pe\x{110000}\x{301}", 166 "Pe\x{300}\x{d801}\x{301}", 167 "Pe\x{300}\x{ffff}\x{301}", 168 "Pe\x{300}\x{110000}\x{301}", 169 "Pe\x{D9ab}\x{DFFF}", 170 "Pe\x{FFFF}", 171 "Pe\x{110000}", 172 "Pe\x{300}\x{D800}\x{DFFF}", 173 "Pe\x{300}\x{FFFF}", 174 "Pe\x{300}\x{110000}", 175); 176 177# 52..65 178for my $ret (@ret) { 179 my $str = $ret."rl"; 180 my($match) = $Collator->match($str, "pe"); 181 ok($match eq $ret); 182} 183 184################## 185 186my $out = Unicode::Collate->new( 187 level => 1, 188 table => undef, 189 normalization => undef, 190 overrideOut => sub { 0xFFFD }, 191); 192 193my @Versions = (8, 9, 11, 14, 16, 18, 20, 22, 24, 26, 28); 194 195for my $v (@Versions) { 196 $out->change(UCA_Version => $v); 197 ok($out->cmp('', "\x{10FFFF}") == ($v >= 22 ? -1 : 0)); 198 ok($out->cmp('', "\x{110000}") == ($v >= 22 ? -1 : 0)); 199 ok($out->cmp('ABC', "\x{110000}") == ($v >= 22 ? -1 : 1)); 200 ok($out->cmp("\x{10FFFD}", "\x{110000}") == ($v >= 22 ? -1 : 1)); 201 ok($out->cmp("\x{11FFFD}", "\x{110000}") == ($v >= 22 ? 0 : 0)); 202} 203 204# x+66..x+77 205ok($out->lt('ABC', "\x{123456}")); 206ok($out->lt("\x{FFFD}", "\x{123456}")); 207 208$out->change(overrideOut => sub {()}); 209 210ok($out->eq('', "\x{123456}")); 211ok($out->gt('ABC', "\x{123456}")); 212ok($out->gt("\x{FFFD}", "\x{123456}")); 213 214$out->change(overrideOut => undef); 215ok($out->lt('', "\x{123456}")); 216ok($out->eq("\x{FFFD}", "\x{123456}")); 217 218$out->change(overrideOut => sub { 0xFFFD }); 219 220ok($out->lt('', "\x{123456}")); 221ok($out->lt('ABC', "\x{123456}")); 222ok($out->lt("\x{FFFD}", "\x{123456}")); 223 224$out->change(overrideOut => 0); 225ok($out->lt('', "\x{123456}")); 226ok($out->eq("\x{FFFD}", "\x{123456}")); 227 228$out->change(overrideOut => sub { undef }); 229ok($out->lt('', "\x{123456}")); 230ok($out->eq("\x{FFFD}", "\x{123456}")); 231ok($out->eq("\x{FFFD}", "\x{21FFFFF}")); 232ok($out->eq("\x{FFFD}", "\x{2200000}")); 233 234