1 2BEGIN { 3 unless ("A" eq pack('U', 0x41)) { 4 print "1..0 # Unicode::Collate " . 5 "cannot stringify a Unicode code point\n"; 6 exit 0; 7 } 8 if ($ENV{PERL_CORE}) { 9 chdir('t') if -d 't'; 10 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 11 } 12} 13 14 15BEGIN { 16 use Unicode::Collate; 17 18 unless (exists &Unicode::Collate::bootstrap or 5.008 <= $]) { 19 print "1..0 # skipped: XSUB, or Perl 5.8.0 or later". 20 " needed for this test\n"; 21 print $@; 22 exit; 23 } 24} 25 26use strict; 27use warnings; 28BEGIN { $| = 1; print "1..65\n"; } 29my $count = 0; 30sub ok ($;$) { 31 my $p = my $r = shift; 32 if (@_) { 33 my $x = shift; 34 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 35 } 36 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 37} 38 39ok(1); 40 41######################### 42 43no warnings 'utf8'; 44 45# NULL is tailorable but illegal code points are not. 46# illegal code points should be always ingored 47# (cf. UCA, 7.1.1 Illegal code points). 48 49my $entry = <<'ENTRIES'; 500000 ; [.0020.0000.0000.0000] # [0000] NULL 510001 ; [.0021.0000.0000.0001] # [0001] START OF HEADING 52FFFE ; [.0022.0000.0000.FFFE] # <noncharacter-FFFE> (invalid) 53FFFF ; [.0023.0000.0000.FFFF] # <noncharacter-FFFF> (invalid) 54D800 ; [.0024.0000.0000.D800] # <surrogate-D800> (invalid) 55DFFF ; [.0025.0000.0000.DFFF] # <surrogate-DFFF> (invalid) 56FDD0 ; [.0026.0000.0000.FDD0] # <noncharacter-FDD0> (invalid) 57FDEF ; [.0027.0000.0000.FDEF] # <noncharacter-FDEF> (invalid) 580002 ; [.0030.0000.0000.0002] # [0002] START OF TEXT 5910FFFF; [.0040.0000.0000.10FFFF] # <noncharacter-10FFFF> (invalid) 60110000; [.0041.0000.0000.110000] # <out-of-range 110000> (invalid) 610041 ; [.1000.0020.0008.0041] # latin A 620041 0000 ; [.1100.0020.0008.0041] # latin A + NULL 630041 FFFF ; [.1200.0020.0008.0041] # latin A + FFFF (invalid) 64ENTRIES 65 66################## 67 68my $illeg = Unicode::Collate->new( 69 entry => $entry, 70 level => 1, 71 table => undef, 72 normalization => undef, 73 UCA_Version => 20, 74); 75 76# 2..12 77ok($illeg->lt("", "\x00")); 78ok($illeg->lt("", "\x01")); 79ok($illeg->eq("", "\x{FFFE}")); 80ok($illeg->eq("", "\x{FFFF}")); 81ok($illeg->eq("", "\x{D800}")); 82ok($illeg->eq("", "\x{DFFF}")); 83ok($illeg->eq("", "\x{FDD0}")); 84ok($illeg->eq("", "\x{FDEF}")); 85ok($illeg->lt("", "\x02")); 86ok($illeg->eq("", "\x{10FFFF}")); 87ok($illeg->eq("", "\x{110000}")); 88 89# 13..22 90ok($illeg->lt("\x00", "\x01")); 91ok($illeg->lt("\x01", "\x02")); 92ok($illeg->ne("\0", "\x{D800}")); 93ok($illeg->ne("\0", "\x{DFFF}")); 94ok($illeg->ne("\0", "\x{FDD0}")); 95ok($illeg->ne("\0", "\x{FDEF}")); 96ok($illeg->ne("\0", "\x{FFFE}")); 97ok($illeg->ne("\0", "\x{FFFF}")); 98ok($illeg->ne("\0", "\x{10FFFF}")); 99ok($illeg->ne("\0", "\x{110000}")); 100 101# 23..26 102ok($illeg->eq("A", "A\x{FFFF}")); 103ok($illeg->gt("A\0", "A\x{FFFF}")); 104ok($illeg->lt("A", "A\0")); 105ok($illeg->lt("AA", "A\0")); 106 107################## 108 109my $nonch = Unicode::Collate->new( 110 entry => $entry, 111 level => 1, 112 table => undef, 113 normalization => undef, 114 UCA_Version => 22, 115); 116 117# 27..37 118ok($nonch->lt("", "\x00")); 119ok($nonch->lt("", "\x01")); 120ok($nonch->lt("", "\x{FFFE}")); 121ok($nonch->lt("", "\x{FFFF}")); 122ok($nonch->lt("", "\x{D800}")); 123ok($nonch->lt("", "\x{DFFF}")); 124ok($nonch->lt("", "\x{FDD0}")); 125ok($nonch->lt("", "\x{FDEF}")); 126ok($nonch->lt("", "\x02")); 127ok($nonch->lt("", "\x{10FFFF}")); 128ok($nonch->eq("", "\x{110000}")); 129 130# 38..47 131ok($nonch->lt("\x00", "\x01")); 132ok($nonch->lt("\x01", "\x{FFFE}")); 133ok($nonch->lt("\x{FFFE}", "\x{FFFF}")); 134ok($nonch->lt("\x{FFFF}", "\x{D800}")); 135ok($nonch->lt("\x{D800}", "\x{DFFF}")); 136ok($nonch->lt("\x{DFFF}", "\x{FDD0}")); 137ok($nonch->lt("\x{FDD0}", "\x{FDEF}")); 138ok($nonch->lt("\x{FDEF}", "\x02")); 139ok($nonch->lt("\x02", "\x{10FFFF}")); 140ok($nonch->gt("\x{10FFFF}", "\x{110000}")); 141 142# 48..51 143ok($nonch->lt("A", "A\x{FFFF}")); 144ok($nonch->lt("A\0", "A\x{FFFF}")); 145ok($nonch->lt("A", "A\0")); 146ok($nonch->lt("AA", "A\0")); 147 148################## 149 150my $Collator = Unicode::Collate->new( 151 table => 'keys.txt', 152 level => 1, 153 normalization => undef, 154 UCA_Version => 8, 155); 156 157my @ret = ( 158 "Pe\x{300}\x{301}", 159 "Pe\x{300}\0\0\x{301}", 160 "Pe\x{DA00}\x{301}\x{DFFF}", 161 "Pe\x{FFFF}\x{301}", 162 "Pe\x{110000}\x{301}", 163 "Pe\x{300}\x{d801}\x{301}", 164 "Pe\x{300}\x{ffff}\x{301}", 165 "Pe\x{300}\x{110000}\x{301}", 166 "Pe\x{D9ab}\x{DFFF}", 167 "Pe\x{FFFF}", 168 "Pe\x{110000}", 169 "Pe\x{300}\x{D800}\x{DFFF}", 170 "Pe\x{300}\x{FFFF}", 171 "Pe\x{300}\x{110000}", 172); 173 174# 52..65 175for my $ret (@ret) { 176 my $str = $ret."rl"; 177 my($match) = $Collator->match($str, "pe"); 178 ok($match eq $ret); 179} 180 181