1 2BEGIN { 3 unless ("A" eq pack('U', 0x41)) { 4 print "1..0 # Unicode::Collate " . 5 "cannot stringify a Unicode code point\n"; 6 exit 0; 7 } 8} 9 10BEGIN { 11 if ($ENV{PERL_CORE}) { 12 chdir('t') if -d 't'; 13 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 14 } 15} 16 17use Test; 18use strict; 19use warnings; 20 21BEGIN { 22 use Unicode::Collate; 23 24 unless (exists &Unicode::Collate::bootstrap or 5.008 <= $]) { 25 print "1..0 # skipped: XSUB, or Perl 5.8.0 or later". 26 " needed for this test\n"; 27 print $@; 28 exit; 29 } 30} 31 32BEGIN { plan tests => 40 }; 33 34ok(1); 35 36######################### 37 38no warnings 'utf8'; 39 40# NULL is tailorable but illegal code points are not. 41# illegal code points should be always ingored 42# (cf. UCA, 7.1.1 Illegal code points). 43 44my $illeg = Unicode::Collate->new( 45 entry => <<'ENTRIES', 460000 ; [.0020.0000.0000.0000] # [0000] NULL 470001 ; [.0021.0000.0000.0001] # [0001] START OF HEADING 48FFFE ; [.0022.0000.0000.FFFE] # <noncharacter-FFFE> (invalid) 49FFFF ; [.0023.0000.0000.FFFF] # <noncharacter-FFFF> (invalid) 50D800 ; [.0024.0000.0000.D800] # <surrogate-D800> (invalid) 51DFFF ; [.0025.0000.0000.DFFF] # <surrogate-DFFF> (invalid) 52FDD0 ; [.0026.0000.0000.FDD0] # <noncharacter-FDD0> (invalid) 53FDEF ; [.0027.0000.0000.FDEF] # <noncharacter-FDEF> (invalid) 540002 ; [.0030.0000.0000.0002] # [0002] START OF TEXT 5510FFFF; [.0040.0000.0000.10FFFF] # <noncharacter-10FFFF> (invalid) 56110000; [.0041.0000.0000.110000] # <out-of-range 110000> (invalid) 570041 ; [.1000.0020.0008.0041] # latin A 580041 0000 ; [.1100.0020.0008.0041] # latin A + NULL 590041 FFFF ; [.1200.0020.0008.0041] # latin A + FFFF (invalid) 60ENTRIES 61 level => 1, 62 table => undef, 63 normalization => undef, 64); 65 66# 2..12 67ok($illeg->lt("", "\x00")); 68ok($illeg->lt("", "\x01")); 69ok($illeg->eq("", "\x{FFFE}")); 70ok($illeg->eq("", "\x{FFFF}")); 71ok($illeg->eq("", "\x{D800}")); 72ok($illeg->eq("", "\x{DFFF}")); 73ok($illeg->eq("", "\x{FDD0}")); 74ok($illeg->eq("", "\x{FDEF}")); 75ok($illeg->lt("", "\x02")); 76ok($illeg->eq("", "\x{10FFFF}")); 77ok($illeg->eq("", "\x{110000}")); 78 79# 13..22 80ok($illeg->lt("\x00", "\x01")); 81ok($illeg->lt("\x01", "\x02")); 82ok($illeg->ne("\0", "\x{D800}")); 83ok($illeg->ne("\0", "\x{DFFF}")); 84ok($illeg->ne("\0", "\x{FDD0}")); 85ok($illeg->ne("\0", "\x{FDEF}")); 86ok($illeg->ne("\0", "\x{FFFE}")); 87ok($illeg->ne("\0", "\x{FFFF}")); 88ok($illeg->ne("\0", "\x{10FFFF}")); 89ok($illeg->ne("\0", "\x{110000}")); 90 91# 23..26 92ok($illeg->eq("A", "A\x{FFFF}")); 93ok($illeg->gt("A\0", "A\x{FFFF}")); 94ok($illeg->lt("A", "A\0")); 95ok($illeg->lt("AA", "A\0")); 96 97################## 98 99my($match, $str, $sub, $ret); 100 101my $Collator = Unicode::Collate->new( 102 table => 'keys.txt', 103 level => 1, 104 normalization => undef, 105); 106 107$sub = "pe"; 108 109 110$str = "Pe\x{300}\x{301}rl"; 111$ret = "Pe\x{300}\x{301}"; 112($match) = $Collator->match($str, $sub); 113ok($match, $ret); 114 115$str = "Pe\x{300}\0\0\x{301}rl"; 116$ret = "Pe\x{300}\0\0\x{301}"; 117($match) = $Collator->match($str, $sub); 118ok($match, $ret); 119 120$str = "Pe\x{DA00}\x{301}\x{DFFF}rl"; 121$ret = "Pe\x{DA00}\x{301}\x{DFFF}"; 122($match) = $Collator->match($str, $sub); 123ok($match, $ret); 124 125$str = "Pe\x{FFFF}\x{301}rl"; 126$ret = "Pe\x{FFFF}\x{301}"; 127($match) = $Collator->match($str, $sub); 128ok($match, $ret); 129 130$str = "Pe\x{110000}\x{301}rl"; 131$ret = "Pe\x{110000}\x{301}"; 132($match) = $Collator->match($str, $sub); 133ok($match, $ret); 134 135$str = "Pe\x{300}\x{d801}\x{301}rl"; 136$ret = "Pe\x{300}\x{d801}\x{301}"; 137($match) = $Collator->match($str, $sub); 138ok($match, $ret); 139 140$str = "Pe\x{300}\x{ffff}\x{301}rl"; 141$ret = "Pe\x{300}\x{ffff}\x{301}"; 142($match) = $Collator->match($str, $sub); 143ok($match, $ret); 144 145$str = "Pe\x{300}\x{110000}\x{301}rl"; 146$ret = "Pe\x{300}\x{110000}\x{301}"; 147($match) = $Collator->match($str, $sub); 148ok($match, $ret); 149 150$str = "Pe\x{D9ab}\x{DFFF}rl"; 151$ret = "Pe\x{D9ab}\x{DFFF}"; 152($match) = $Collator->match($str, $sub); 153ok($match, $ret); 154 155$str = "Pe\x{FFFF}rl"; 156$ret = "Pe\x{FFFF}"; 157($match) = $Collator->match($str, $sub); 158ok($match, $ret); 159 160$str = "Pe\x{110000}rl"; 161$ret = "Pe\x{110000}"; 162($match) = $Collator->match($str, $sub); 163ok($match, $ret); 164 165$str = "Pe\x{300}\x{D800}\x{DFFF}rl"; 166$ret = "Pe\x{300}\x{D800}\x{DFFF}"; 167($match) = $Collator->match($str, $sub); 168ok($match, $ret); 169 170$str = "Pe\x{300}\x{FFFF}rl"; 171$ret = "Pe\x{300}\x{FFFF}"; 172($match) = $Collator->match($str, $sub); 173ok($match, $ret); 174 175$str = "Pe\x{300}\x{110000}rl"; 176$ret = "Pe\x{300}\x{110000}"; 177($match) = $Collator->match($str, $sub); 178ok($match, $ret); 179 180 181