1b39c5158Smillert 2b39c5158SmillertBEGIN { 3*eac174f2Safresh1 unless (5.008 <= $]) { 4*eac174f2Safresh1 print "1..0 # skipped: Perl 5.8.0 or later needed for this test\n"; 5*eac174f2Safresh1 print $@; 6*eac174f2Safresh1 exit; 7b39c5158Smillert } 8b39c5158Smillert if ($ENV{PERL_CORE}) { 9b39c5158Smillert chdir('t') if -d 't'; 10b39c5158Smillert @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 11b39c5158Smillert } 12b39c5158Smillert} 13b39c5158Smillert 14898184e3Ssthenuse strict; 15898184e3Ssthenuse warnings; 16*eac174f2Safresh1BEGIN { $| = 1; print "1..176\n"; } # 81 + 5 x @Versions 17898184e3Ssthenmy $count = 0; 18898184e3Ssthensub ok ($;$) { 19898184e3Ssthen my $p = my $r = shift; 20898184e3Ssthen if (@_) { 21898184e3Ssthen my $x = shift; 22898184e3Ssthen $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 23898184e3Ssthen } 24898184e3Ssthen print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 25898184e3Ssthen} 26b39c5158Smillert 27*eac174f2Safresh1use Unicode::Collate; 28*eac174f2Safresh1 29b39c5158Smillertok(1); 30b39c5158Smillert 31*eac174f2Safresh1sub _pack_U { Unicode::Collate::pack_U(@_) } 32*eac174f2Safresh1sub _unpack_U { Unicode::Collate::unpack_U(@_) } 33*eac174f2Safresh1 34b39c5158Smillert######################### 35b39c5158Smillert 36b39c5158Smillertno warnings 'utf8'; 37b39c5158Smillert 38b39c5158Smillert# NULL is tailorable but illegal code points are not. 39b39c5158Smillert# illegal code points should be always ingored 40b39c5158Smillert# (cf. UCA, 7.1.1 Illegal code points). 41b39c5158Smillert 42898184e3Ssthenmy $entry = <<'ENTRIES'; 43b39c5158Smillert0000 ; [.0020.0000.0000.0000] # [0000] NULL 44b39c5158Smillert0001 ; [.0021.0000.0000.0001] # [0001] START OF HEADING 45b39c5158SmillertFFFE ; [.0022.0000.0000.FFFE] # <noncharacter-FFFE> (invalid) 46b39c5158SmillertFFFF ; [.0023.0000.0000.FFFF] # <noncharacter-FFFF> (invalid) 47b39c5158SmillertD800 ; [.0024.0000.0000.D800] # <surrogate-D800> (invalid) 48b39c5158SmillertDFFF ; [.0025.0000.0000.DFFF] # <surrogate-DFFF> (invalid) 49b39c5158SmillertFDD0 ; [.0026.0000.0000.FDD0] # <noncharacter-FDD0> (invalid) 50b39c5158SmillertFDEF ; [.0027.0000.0000.FDEF] # <noncharacter-FDEF> (invalid) 51b39c5158Smillert0002 ; [.0030.0000.0000.0002] # [0002] START OF TEXT 52b39c5158Smillert10FFFF; [.0040.0000.0000.10FFFF] # <noncharacter-10FFFF> (invalid) 53b39c5158Smillert110000; [.0041.0000.0000.110000] # <out-of-range 110000> (invalid) 54b39c5158Smillert0041 ; [.1000.0020.0008.0041] # latin A 55b39c5158Smillert0041 0000 ; [.1100.0020.0008.0041] # latin A + NULL 56b39c5158Smillert0041 FFFF ; [.1200.0020.0008.0041] # latin A + FFFF (invalid) 57b39c5158SmillertENTRIES 58898184e3Ssthen 59898184e3Ssthen################## 60898184e3Ssthen 61898184e3Ssthenmy $illeg = Unicode::Collate->new( 62898184e3Ssthen entry => $entry, 63b39c5158Smillert level => 1, 64b39c5158Smillert table => undef, 65b39c5158Smillert normalization => undef, 66898184e3Ssthen UCA_Version => 20, 67b39c5158Smillert); 68b39c5158Smillert 69b39c5158Smillert# 2..12 70b39c5158Smillertok($illeg->lt("", "\x00")); 71b39c5158Smillertok($illeg->lt("", "\x01")); 72b39c5158Smillertok($illeg->eq("", "\x{FFFE}")); 73b39c5158Smillertok($illeg->eq("", "\x{FFFF}")); 74b39c5158Smillertok($illeg->eq("", "\x{D800}")); 75b39c5158Smillertok($illeg->eq("", "\x{DFFF}")); 76b39c5158Smillertok($illeg->eq("", "\x{FDD0}")); 77b39c5158Smillertok($illeg->eq("", "\x{FDEF}")); 78b39c5158Smillertok($illeg->lt("", "\x02")); 79b39c5158Smillertok($illeg->eq("", "\x{10FFFF}")); 80b39c5158Smillertok($illeg->eq("", "\x{110000}")); 81b39c5158Smillert 82b39c5158Smillert# 13..22 83b39c5158Smillertok($illeg->lt("\x00", "\x01")); 84b39c5158Smillertok($illeg->lt("\x01", "\x02")); 85b39c5158Smillertok($illeg->ne("\0", "\x{D800}")); 86b39c5158Smillertok($illeg->ne("\0", "\x{DFFF}")); 87b39c5158Smillertok($illeg->ne("\0", "\x{FDD0}")); 88b39c5158Smillertok($illeg->ne("\0", "\x{FDEF}")); 89b39c5158Smillertok($illeg->ne("\0", "\x{FFFE}")); 90b39c5158Smillertok($illeg->ne("\0", "\x{FFFF}")); 91b39c5158Smillertok($illeg->ne("\0", "\x{10FFFF}")); 92b39c5158Smillertok($illeg->ne("\0", "\x{110000}")); 93b39c5158Smillert 94b39c5158Smillert# 23..26 95b39c5158Smillertok($illeg->eq("A", "A\x{FFFF}")); 96b39c5158Smillertok($illeg->gt("A\0", "A\x{FFFF}")); 97b39c5158Smillertok($illeg->lt("A", "A\0")); 98b39c5158Smillertok($illeg->lt("AA", "A\0")); 99b39c5158Smillert 100b39c5158Smillert################## 101b39c5158Smillert 102898184e3Ssthenmy $nonch = Unicode::Collate->new( 103898184e3Ssthen entry => $entry, 104898184e3Ssthen level => 1, 105898184e3Ssthen table => undef, 106898184e3Ssthen normalization => undef, 107898184e3Ssthen UCA_Version => 22, 108898184e3Ssthen); 109898184e3Ssthen 110898184e3Ssthen# 27..37 111898184e3Ssthenok($nonch->lt("", "\x00")); 112898184e3Ssthenok($nonch->lt("", "\x01")); 113898184e3Ssthenok($nonch->lt("", "\x{FFFE}")); 114898184e3Ssthenok($nonch->lt("", "\x{FFFF}")); 115898184e3Ssthenok($nonch->lt("", "\x{D800}")); 116898184e3Ssthenok($nonch->lt("", "\x{DFFF}")); 117898184e3Ssthenok($nonch->lt("", "\x{FDD0}")); 118898184e3Ssthenok($nonch->lt("", "\x{FDEF}")); 119898184e3Ssthenok($nonch->lt("", "\x02")); 120898184e3Ssthenok($nonch->lt("", "\x{10FFFF}")); 1216fb12b70Safresh1ok($nonch->lt("", "\x{110000}")); 122898184e3Ssthen 123898184e3Ssthen# 38..47 124898184e3Ssthenok($nonch->lt("\x00", "\x01")); 125898184e3Ssthenok($nonch->lt("\x01", "\x{FFFE}")); 126898184e3Ssthenok($nonch->lt("\x{FFFE}", "\x{FFFF}")); 127898184e3Ssthenok($nonch->lt("\x{FFFF}", "\x{D800}")); 128898184e3Ssthenok($nonch->lt("\x{D800}", "\x{DFFF}")); 129898184e3Ssthenok($nonch->lt("\x{DFFF}", "\x{FDD0}")); 130898184e3Ssthenok($nonch->lt("\x{FDD0}", "\x{FDEF}")); 131898184e3Ssthenok($nonch->lt("\x{FDEF}", "\x02")); 132898184e3Ssthenok($nonch->lt("\x02", "\x{10FFFF}")); 1336fb12b70Safresh1ok($nonch->lt("\x{10FFFF}", "\x{110000}")); 134898184e3Ssthen 135898184e3Ssthen# 48..51 136898184e3Ssthenok($nonch->lt("A", "A\x{FFFF}")); 137898184e3Ssthenok($nonch->lt("A\0", "A\x{FFFF}")); 138898184e3Ssthenok($nonch->lt("A", "A\0")); 139898184e3Ssthenok($nonch->lt("AA", "A\0")); 140898184e3Ssthen 141898184e3Ssthen################## 142b39c5158Smillert 143b39c5158Smillertmy $Collator = Unicode::Collate->new( 144b39c5158Smillert table => 'keys.txt', 145b39c5158Smillert level => 1, 146b39c5158Smillert normalization => undef, 147898184e3Ssthen UCA_Version => 8, 148b39c5158Smillert); 149b39c5158Smillert 150898184e3Ssthenmy @ret = ( 151898184e3Ssthen "Pe\x{300}\x{301}", 152898184e3Ssthen "Pe\x{300}\0\0\x{301}", 153898184e3Ssthen "Pe\x{DA00}\x{301}\x{DFFF}", 154898184e3Ssthen "Pe\x{FFFF}\x{301}", 155898184e3Ssthen "Pe\x{110000}\x{301}", 156898184e3Ssthen "Pe\x{300}\x{d801}\x{301}", 157898184e3Ssthen "Pe\x{300}\x{ffff}\x{301}", 158898184e3Ssthen "Pe\x{300}\x{110000}\x{301}", 159898184e3Ssthen "Pe\x{D9ab}\x{DFFF}", 160898184e3Ssthen "Pe\x{FFFF}", 161898184e3Ssthen "Pe\x{110000}", 162898184e3Ssthen "Pe\x{300}\x{D800}\x{DFFF}", 163898184e3Ssthen "Pe\x{300}\x{FFFF}", 164898184e3Ssthen "Pe\x{300}\x{110000}", 165898184e3Ssthen); 166b39c5158Smillert 167898184e3Ssthen# 52..65 168898184e3Ssthenfor my $ret (@ret) { 169898184e3Ssthen my $str = $ret."rl"; 170898184e3Ssthen my($match) = $Collator->match($str, "pe"); 171898184e3Ssthen ok($match eq $ret); 172898184e3Ssthen} 173b39c5158Smillert 1746fb12b70Safresh1################## 1756fb12b70Safresh1 1766fb12b70Safresh1my $out = Unicode::Collate->new( 1776fb12b70Safresh1 level => 1, 1786fb12b70Safresh1 table => undef, 1796fb12b70Safresh1 normalization => undef, 1806fb12b70Safresh1 overrideOut => sub { 0xFFFD }, 1816fb12b70Safresh1); 1826fb12b70Safresh1 183*eac174f2Safresh1my @Versions = ( 8, 9, 11, 14, 16, 18, 20, 22, 24, 26, 184*eac174f2Safresh1 28, 30, 32, 34, 36, 38, 40, 41, 43); 1856fb12b70Safresh1 1866fb12b70Safresh1for my $v (@Versions) { 1876fb12b70Safresh1 $out->change(UCA_Version => $v); 1886fb12b70Safresh1 ok($out->cmp('', "\x{10FFFF}") == ($v >= 22 ? -1 : 0)); 1896fb12b70Safresh1 ok($out->cmp('', "\x{110000}") == ($v >= 22 ? -1 : 0)); 1906fb12b70Safresh1 ok($out->cmp('ABC', "\x{110000}") == ($v >= 22 ? -1 : 1)); 1916fb12b70Safresh1 ok($out->cmp("\x{10FFFD}", "\x{110000}") == ($v >= 22 ? -1 : 1)); 1926fb12b70Safresh1 ok($out->cmp("\x{11FFFD}", "\x{110000}") == ($v >= 22 ? 0 : 0)); 1936fb12b70Safresh1} 1946fb12b70Safresh1 1956fb12b70Safresh1# x+66..x+77 1966fb12b70Safresh1ok($out->lt('ABC', "\x{123456}")); 1976fb12b70Safresh1ok($out->lt("\x{FFFD}", "\x{123456}")); 1986fb12b70Safresh1 1996fb12b70Safresh1$out->change(overrideOut => sub {()}); 2006fb12b70Safresh1 2016fb12b70Safresh1ok($out->eq('', "\x{123456}")); 2026fb12b70Safresh1ok($out->gt('ABC', "\x{123456}")); 2036fb12b70Safresh1ok($out->gt("\x{FFFD}", "\x{123456}")); 2046fb12b70Safresh1 2056fb12b70Safresh1$out->change(overrideOut => undef); 2066fb12b70Safresh1ok($out->lt('', "\x{123456}")); 2076fb12b70Safresh1ok($out->eq("\x{FFFD}", "\x{123456}")); 2086fb12b70Safresh1 2096fb12b70Safresh1$out->change(overrideOut => sub { 0xFFFD }); 2106fb12b70Safresh1 2116fb12b70Safresh1ok($out->lt('', "\x{123456}")); 2126fb12b70Safresh1ok($out->lt('ABC', "\x{123456}")); 2136fb12b70Safresh1ok($out->lt("\x{FFFD}", "\x{123456}")); 2146fb12b70Safresh1 2156fb12b70Safresh1$out->change(overrideOut => 0); 2166fb12b70Safresh1ok($out->lt('', "\x{123456}")); 2176fb12b70Safresh1ok($out->eq("\x{FFFD}", "\x{123456}")); 2186fb12b70Safresh1 2196fb12b70Safresh1$out->change(overrideOut => sub { undef }); 2206fb12b70Safresh1ok($out->lt('', "\x{123456}")); 2216fb12b70Safresh1ok($out->eq("\x{FFFD}", "\x{123456}")); 2226fb12b70Safresh1ok($out->eq("\x{FFFD}", "\x{21FFFFF}")); 2236fb12b70Safresh1ok($out->eq("\x{FFFD}", "\x{2200000}")); 2246fb12b70Safresh1 225