1898184e3Ssthen 2898184e3SsthenBEGIN { 3b8851fccSafresh1 unless (5.008 <= $]) { 4b8851fccSafresh1 print "1..0 # skipped: Perl 5.8.0 or later needed for this test\n"; 5898184e3Ssthen print $@; 6898184e3Ssthen exit; 7898184e3Ssthen } 8*eac174f2Safresh1 if ($ENV{PERL_CORE}) { 9*eac174f2Safresh1 chdir('t') if -d 't'; 10*eac174f2Safresh1 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 11*eac174f2Safresh1 } 12898184e3Ssthen} 13898184e3Ssthen 14898184e3Ssthenuse strict; 15898184e3Ssthenuse warnings; 1691f110e0Safresh1BEGIN { $| = 1; print "1..90\n"; } 17898184e3Ssthenmy $count = 0; 18898184e3Ssthensub ok ($;$) { 19898184e3Ssthen my $p = my $r = shift; 20898184e3Ssthen if (@_) { 21898184e3Ssthen my $x = shift; 22898184e3Ssthen $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 23898184e3Ssthen } 24898184e3Ssthen print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 25898184e3Ssthen} 26898184e3Ssthen 27*eac174f2Safresh1use Unicode::Collate; 28*eac174f2Safresh1 29898184e3Ssthenok(1); 30898184e3Ssthen 31*eac174f2Safresh1sub _pack_U { Unicode::Collate::pack_U(@_) } 32*eac174f2Safresh1sub _unpack_U { Unicode::Collate::unpack_U(@_) } 33*eac174f2Safresh1 34898184e3Ssthen######################### 35898184e3Ssthen 36898184e3Ssthenno warnings 'utf8'; 37898184e3Ssthen 38898184e3Ssthen# Unicode 6.0 Sorting 39898184e3Ssthen# 40898184e3Ssthen# Special Database Values. The data files for CLDR provide 41898184e3Ssthen# special weights for two noncharacters: 42898184e3Ssthen# 43898184e3Ssthen# 1. A special noncharacter <HIGH> (U+FFFF) for specification of a range 44898184e3Ssthen# in a database, allowing "Sch" <= X <= "Sch<HIGH>" to pick all strings 45898184e3Ssthen# starting with "sch" plus those that sort equivalently. 46898184e3Ssthen# 2. A special noncharacter <LOW> (U+FFFE) for merged database fields, 47898184e3Ssthen# allowing "Disi\x{301}lva<LOW>John" to sort next to "Disilva<LOW>John". 48898184e3Ssthen 49898184e3Ssthenmy $entry = <<'ENTRIES'; 5091f110e0Safresh1FFFE ; [.0001.0020.0005.FFFE] # <noncharacter-FFFE> 51898184e3SsthenFFFF ; [.FFFE.0020.0005.FFFF] # <noncharacter-FFFF> 52898184e3SsthenENTRIES 53898184e3Ssthen 54898184e3Ssthenmy @disilva = ("di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva"); 55898184e3Ssthenmy @dsf = map "$_\x{FFFE}Fred", @disilva; 56898184e3Ssthenmy @dsj = map "$_\x{FFFE}John", @disilva; 57898184e3Ssthenmy @dsJ = map "$_ John", @disilva; 58898184e3Ssthen 59898184e3Ssthenfor my $norm (undef, 'NFD') { 60898184e3Ssthen if (defined $norm) { 61898184e3Ssthen eval { require Unicode::Normalize }; 62898184e3Ssthen if ($@) { 6391f110e0Safresh1 ok(1) for 1..34; # silent skip 64898184e3Ssthen next; 65898184e3Ssthen } 66898184e3Ssthen } 67898184e3Ssthen 68898184e3Ssthen my $coll = Unicode::Collate->new( 69898184e3Ssthen table => 'keys.txt', 70898184e3Ssthen level => 1, 71898184e3Ssthen normalization => $norm, 72898184e3Ssthen UCA_Version => 22, 73898184e3Ssthen entry => $entry, 74898184e3Ssthen ); 75898184e3Ssthen 76898184e3Ssthen # 1..4 77898184e3Ssthen ok($coll->lt("\x{FFFD}", "\x{FFFF}")); 78898184e3Ssthen ok($coll->lt("\x{1FFFD}", "\x{1FFFF}")); 79898184e3Ssthen ok($coll->lt("\x{2FFFD}", "\x{2FFFF}")); 80898184e3Ssthen ok($coll->lt("\x{10FFFD}", "\x{10FFFF}")); 81898184e3Ssthen 82898184e3Ssthen # 5..14 83898184e3Ssthen ok($coll->lt("perl\x{FFFD}", "perl\x{FFFF}")); 84898184e3Ssthen ok($coll->lt("perl\x{1FFFD}", "perl\x{FFFF}")); 85898184e3Ssthen ok($coll->lt("perl\x{1FFFE}", "perl\x{FFFF}")); 86898184e3Ssthen ok($coll->lt("perl\x{1FFFF}", "perl\x{FFFF}")); 87898184e3Ssthen ok($coll->lt("perl\x{2FFFD}", "perl\x{FFFF}")); 88898184e3Ssthen ok($coll->lt("perl\x{2FFFE}", "perl\x{FFFF}")); 89898184e3Ssthen ok($coll->lt("perl\x{2FFFF}", "perl\x{FFFF}")); 90898184e3Ssthen ok($coll->lt("perl\x{10FFFD}", "perl\x{FFFF}")); 91898184e3Ssthen ok($coll->lt("perl\x{10FFFE}", "perl\x{FFFF}")); 92898184e3Ssthen ok($coll->lt("perl\x{10FFFF}", "perl\x{FFFF}")); 93898184e3Ssthen 94898184e3Ssthen # 15..16 95898184e3Ssthen ok($coll->gt("perl\x{FFFF}AB", "perl\x{FFFF}")); 96898184e3Ssthen ok($coll->lt("perl\x{FFFF}\x{10FFFF}", "perl\x{FFFF}\x{FFFF}")); 97898184e3Ssthen 98898184e3Ssthen $coll->change(level => 4); 99898184e3Ssthen 100898184e3Ssthen # 17..25 101898184e3Ssthen for my $i (0 .. $#disilva - 1) { 102898184e3Ssthen ok($coll->lt($dsf[$i], $dsf[$i+1])); 103898184e3Ssthen ok($coll->lt($dsj[$i], $dsj[$i+1])); 104898184e3Ssthen ok($coll->lt($dsJ[$i], $dsJ[$i+1])); 105898184e3Ssthen } 106898184e3Ssthen 107898184e3Ssthen # 26 108898184e3Ssthen ok($coll->lt($dsf[-1], $dsj[0])); 109898184e3Ssthen 11091f110e0Safresh1 $coll->change(level => 1); 11191f110e0Safresh1 11291f110e0Safresh1 # 27..34 113898184e3Ssthen for my $i (0 .. $#disilva) { 11491f110e0Safresh1 ok($coll->lt($dsf[$i], $dsJ[$i])); 115898184e3Ssthen ok($coll->lt($dsj[$i], $dsJ[$i])); 116898184e3Ssthen } 117898184e3Ssthen} 118898184e3Ssthen 11991f110e0Safresh1# 69 12091f110e0Safresh1 12191f110e0Safresh1{ 12291f110e0Safresh1 my $coll = Unicode::Collate->new( 12391f110e0Safresh1 table => 'keys.txt', 12491f110e0Safresh1 normalization => undef, 12591f110e0Safresh1 highestFFFF => 1, 12691f110e0Safresh1 minimalFFFE => 1, 12791f110e0Safresh1 ); 12891f110e0Safresh1 12991f110e0Safresh1 $coll->change(level => 1); 13091f110e0Safresh1 ok($coll->lt("perl\x{FFFD}", "perl\x{FFFF}")); 13191f110e0Safresh1 ok($coll->lt("perl\x{1FFFD}", "perl\x{FFFF}")); 13291f110e0Safresh1 ok($coll->lt("perl\x{1FFFE}", "perl\x{FFFF}")); 13391f110e0Safresh1 ok($coll->lt("perl\x{1FFFF}", "perl\x{FFFF}")); 13491f110e0Safresh1 ok($coll->lt("perl\x{2FFFD}", "perl\x{FFFF}")); 13591f110e0Safresh1 ok($coll->lt("perl\x{2FFFE}", "perl\x{FFFF}")); 13691f110e0Safresh1 ok($coll->lt("perl\x{2FFFF}", "perl\x{FFFF}")); 13791f110e0Safresh1 ok($coll->lt("perl\x{10FFFD}", "perl\x{FFFF}")); 13891f110e0Safresh1 ok($coll->lt("perl\x{10FFFE}", "perl\x{FFFF}")); 13991f110e0Safresh1 ok($coll->lt("perl\x{10FFFF}", "perl\x{FFFF}")); 14091f110e0Safresh1 14191f110e0Safresh1# 79 14291f110e0Safresh1 14391f110e0Safresh1 $coll->change(level => 3); 14491f110e0Safresh1 my @list = ( 14591f110e0Safresh1 "ab\x{FFFE}a", 14691f110e0Safresh1 "Ab\x{FFFE}a", 14791f110e0Safresh1 "ab\x{FFFE}c", 14891f110e0Safresh1 "Ab\x{FFFE}c", 14991f110e0Safresh1 "ab\x{FFFE}xyz", 15091f110e0Safresh1 "abc\x{FFFE}def", 15191f110e0Safresh1 "abc\x{FFFE}xYz", 15291f110e0Safresh1 "aBc\x{FFFE}xyz", 15391f110e0Safresh1 "abcX\x{FFFE}def", 15491f110e0Safresh1 "abcx\x{FFFE}xyz", 15591f110e0Safresh1 "b\x{FFFE}aaa", 15691f110e0Safresh1 "bbb\x{FFFE}a", 15791f110e0Safresh1 ); 15891f110e0Safresh1 my $p = shift @list; 15991f110e0Safresh1 for my $c (@list) { 16091f110e0Safresh1 ok($coll->lt($p, $c)); 16191f110e0Safresh1 $p = $c; 16291f110e0Safresh1 } 16391f110e0Safresh1} 16491f110e0Safresh1 16591f110e0Safresh1# 90 166