1 2BEGIN { 3 unless ("A" eq pack('U', 0x41)) { 4 print "1..0 # Unicode::Collate " . 5 "cannot stringify a Unicode code point\n"; 6 exit 0; 7 } 8 if ($ENV{PERL_CORE}) { 9 chdir('t') if -d 't'; 10 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 11 } 12} 13 14use Test; 15use strict; 16use warnings; 17 18BEGIN { 19 use Unicode::Collate; 20 21 unless (exists &Unicode::Collate::bootstrap or 5.008 <= $]) { 22 print "1..0 # skipped: XSUB, or Perl 5.8.0 or later". 23 " needed for this test\n"; 24 print $@; 25 exit; 26 } 27} 28 29BEGIN { plan tests => 40 }; 30 31ok(1); 32 33######################### 34 35no warnings 'utf8'; 36 37# NULL is tailorable but illegal code points are not. 38# illegal code points should be always ingored 39# (cf. UCA, 7.1.1 Illegal code points). 40 41my $illeg = Unicode::Collate->new( 42 entry => <<'ENTRIES', 430000 ; [.0020.0000.0000.0000] # [0000] NULL 440001 ; [.0021.0000.0000.0001] # [0001] START OF HEADING 45FFFE ; [.0022.0000.0000.FFFE] # <noncharacter-FFFE> (invalid) 46FFFF ; [.0023.0000.0000.FFFF] # <noncharacter-FFFF> (invalid) 47D800 ; [.0024.0000.0000.D800] # <surrogate-D800> (invalid) 48DFFF ; [.0025.0000.0000.DFFF] # <surrogate-DFFF> (invalid) 49FDD0 ; [.0026.0000.0000.FDD0] # <noncharacter-FDD0> (invalid) 50FDEF ; [.0027.0000.0000.FDEF] # <noncharacter-FDEF> (invalid) 510002 ; [.0030.0000.0000.0002] # [0002] START OF TEXT 5210FFFF; [.0040.0000.0000.10FFFF] # <noncharacter-10FFFF> (invalid) 53110000; [.0041.0000.0000.110000] # <out-of-range 110000> (invalid) 540041 ; [.1000.0020.0008.0041] # latin A 550041 0000 ; [.1100.0020.0008.0041] # latin A + NULL 560041 FFFF ; [.1200.0020.0008.0041] # latin A + FFFF (invalid) 57ENTRIES 58 level => 1, 59 table => undef, 60 normalization => undef, 61); 62 63# 2..12 64ok($illeg->lt("", "\x00")); 65ok($illeg->lt("", "\x01")); 66ok($illeg->eq("", "\x{FFFE}")); 67ok($illeg->eq("", "\x{FFFF}")); 68ok($illeg->eq("", "\x{D800}")); 69ok($illeg->eq("", "\x{DFFF}")); 70ok($illeg->eq("", "\x{FDD0}")); 71ok($illeg->eq("", "\x{FDEF}")); 72ok($illeg->lt("", "\x02")); 73ok($illeg->eq("", "\x{10FFFF}")); 74ok($illeg->eq("", "\x{110000}")); 75 76# 13..22 77ok($illeg->lt("\x00", "\x01")); 78ok($illeg->lt("\x01", "\x02")); 79ok($illeg->ne("\0", "\x{D800}")); 80ok($illeg->ne("\0", "\x{DFFF}")); 81ok($illeg->ne("\0", "\x{FDD0}")); 82ok($illeg->ne("\0", "\x{FDEF}")); 83ok($illeg->ne("\0", "\x{FFFE}")); 84ok($illeg->ne("\0", "\x{FFFF}")); 85ok($illeg->ne("\0", "\x{10FFFF}")); 86ok($illeg->ne("\0", "\x{110000}")); 87 88# 23..26 89ok($illeg->eq("A", "A\x{FFFF}")); 90ok($illeg->gt("A\0", "A\x{FFFF}")); 91ok($illeg->lt("A", "A\0")); 92ok($illeg->lt("AA", "A\0")); 93 94################## 95 96my($match, $str, $sub, $ret); 97 98my $Collator = Unicode::Collate->new( 99 table => 'keys.txt', 100 level => 1, 101 normalization => undef, 102); 103 104$sub = "pe"; 105 106 107$str = "Pe\x{300}\x{301}rl"; 108$ret = "Pe\x{300}\x{301}"; 109($match) = $Collator->match($str, $sub); 110ok($match, $ret); 111 112$str = "Pe\x{300}\0\0\x{301}rl"; 113$ret = "Pe\x{300}\0\0\x{301}"; 114($match) = $Collator->match($str, $sub); 115ok($match, $ret); 116 117$str = "Pe\x{DA00}\x{301}\x{DFFF}rl"; 118$ret = "Pe\x{DA00}\x{301}\x{DFFF}"; 119($match) = $Collator->match($str, $sub); 120ok($match, $ret); 121 122$str = "Pe\x{FFFF}\x{301}rl"; 123$ret = "Pe\x{FFFF}\x{301}"; 124($match) = $Collator->match($str, $sub); 125ok($match, $ret); 126 127$str = "Pe\x{110000}\x{301}rl"; 128$ret = "Pe\x{110000}\x{301}"; 129($match) = $Collator->match($str, $sub); 130ok($match, $ret); 131 132$str = "Pe\x{300}\x{d801}\x{301}rl"; 133$ret = "Pe\x{300}\x{d801}\x{301}"; 134($match) = $Collator->match($str, $sub); 135ok($match, $ret); 136 137$str = "Pe\x{300}\x{ffff}\x{301}rl"; 138$ret = "Pe\x{300}\x{ffff}\x{301}"; 139($match) = $Collator->match($str, $sub); 140ok($match, $ret); 141 142$str = "Pe\x{300}\x{110000}\x{301}rl"; 143$ret = "Pe\x{300}\x{110000}\x{301}"; 144($match) = $Collator->match($str, $sub); 145ok($match, $ret); 146 147$str = "Pe\x{D9ab}\x{DFFF}rl"; 148$ret = "Pe\x{D9ab}\x{DFFF}"; 149($match) = $Collator->match($str, $sub); 150ok($match, $ret); 151 152$str = "Pe\x{FFFF}rl"; 153$ret = "Pe\x{FFFF}"; 154($match) = $Collator->match($str, $sub); 155ok($match, $ret); 156 157$str = "Pe\x{110000}rl"; 158$ret = "Pe\x{110000}"; 159($match) = $Collator->match($str, $sub); 160ok($match, $ret); 161 162$str = "Pe\x{300}\x{D800}\x{DFFF}rl"; 163$ret = "Pe\x{300}\x{D800}\x{DFFF}"; 164($match) = $Collator->match($str, $sub); 165ok($match, $ret); 166 167$str = "Pe\x{300}\x{FFFF}rl"; 168$ret = "Pe\x{300}\x{FFFF}"; 169($match) = $Collator->match($str, $sub); 170ok($match, $ret); 171 172$str = "Pe\x{300}\x{110000}rl"; 173$ret = "Pe\x{300}\x{110000}"; 174($match) = $Collator->match($str, $sub); 175ok($match, $ret); 176 177 178