191f110e0Safresh1 291f110e0Safresh1BEGIN { 391f110e0Safresh1 if ($ENV{PERL_CORE}) { 491f110e0Safresh1 chdir('t') if -d 't'; 591f110e0Safresh1 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 691f110e0Safresh1 } 791f110e0Safresh1} 891f110e0Safresh1 991f110e0Safresh1use strict; 1091f110e0Safresh1use warnings; 1191f110e0Safresh1BEGIN { $| = 1; print "1..32\n"; } 1291f110e0Safresh1my $count = 0; 1391f110e0Safresh1sub ok ($;$) { 1491f110e0Safresh1 my $p = my $r = shift; 1591f110e0Safresh1 if (@_) { 1691f110e0Safresh1 my $x = shift; 1791f110e0Safresh1 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 1891f110e0Safresh1 } 1991f110e0Safresh1 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 2091f110e0Safresh1} 2191f110e0Safresh1 2291f110e0Safresh1use Unicode::Collate; 2391f110e0Safresh1 2491f110e0Safresh1ok(1); 2591f110e0Safresh1 26*256a93a4Safresh1sub _pack_U { Unicode::Collate::pack_U(@_) } 27*256a93a4Safresh1sub _unpack_U { Unicode::Collate::unpack_U(@_) } 28*256a93a4Safresh1 2991f110e0Safresh1######################### 3091f110e0Safresh1 3191f110e0Safresh1{ 3291f110e0Safresh1 # Table is undefined, then no entry is defined. 3391f110e0Safresh1 my $undef_table = Unicode::Collate->new( 3491f110e0Safresh1 table => undef, 3591f110e0Safresh1 normalization => undef, 3691f110e0Safresh1 level => 1, 3791f110e0Safresh1 ); 3891f110e0Safresh1 3991f110e0Safresh1 # in the Unicode code point order 4091f110e0Safresh1 ok($undef_table->lt('', 'A')); 4191f110e0Safresh1 ok($undef_table->lt('ABC', 'B')); 4291f110e0Safresh1 4391f110e0Safresh1 # Hangul should be decomposed (even w/o Unicode::Normalize). 4491f110e0Safresh1 ok($undef_table->lt("Perl", "\x{AC00}")); 4591f110e0Safresh1 ok($undef_table->eq("\x{AC00}", "\x{1100}\x{1161}")); 4691f110e0Safresh1 ok($undef_table->eq("\x{AE00}", "\x{1100}\x{1173}\x{11AF}")); 4791f110e0Safresh1 ok($undef_table->lt("\x{AE00}", "\x{3042}")); 4891f110e0Safresh1 4991f110e0Safresh1 # U+AC00: Hangul GA 5091f110e0Safresh1 # U+AE00: Hangul GEUL 5191f110e0Safresh1 # U+3042: Hiragana A 5291f110e0Safresh1 5391f110e0Safresh1 # Weight for CJK Ideographs is defined, though. 5491f110e0Safresh1 ok($undef_table->lt("", "\x{4E00}")); 5591f110e0Safresh1 ok($undef_table->lt("\x{4E8C}","ABC")); 5691f110e0Safresh1 ok($undef_table->lt("\x{4E00}","\x{3042}")); 5791f110e0Safresh1 ok($undef_table->lt("\x{4E00}","\x{4E8C}")); 5891f110e0Safresh1 5991f110e0Safresh1# 11 6091f110e0Safresh1 6191f110e0Safresh1 # U+4E00: Ideograph "ONE" 6291f110e0Safresh1 # U+4E8C: Ideograph "TWO" 6391f110e0Safresh1 6491f110e0Safresh1 for my $v ('', 8, 9, 11, 14) { 6591f110e0Safresh1 $undef_table->change(UCA_Version => $v) if $v; 6691f110e0Safresh1 ok($undef_table->lt("\x{4E00}","\0")); 6791f110e0Safresh1 } 6891f110e0Safresh1} 6991f110e0Safresh1 7091f110e0Safresh1# 16 7191f110e0Safresh1 7291f110e0Safresh1{ 7391f110e0Safresh1 my $onlyABC = Unicode::Collate->new( 7491f110e0Safresh1 table => undef, 7591f110e0Safresh1 normalization => undef, 7691f110e0Safresh1 entry => << 'ENTRIES', 7791f110e0Safresh10061 ; [.0101.0020.0002.0061] # LATIN SMALL LETTER A 7891f110e0Safresh10041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A 7991f110e0Safresh10062 ; [.0102.0020.0002.0062] # LATIN SMALL LETTER B 8091f110e0Safresh10042 ; [.0102.0020.0008.0042] # LATIN CAPITAL LETTER B 8191f110e0Safresh10063 ; [.0103.0020.0002.0063] # LATIN SMALL LETTER C 8291f110e0Safresh10043 ; [.0103.0020.0008.0043] # LATIN CAPITAL LETTER C 8391f110e0Safresh1ENTRIES 8491f110e0Safresh1 ); 8591f110e0Safresh1 ok( 8691f110e0Safresh1 join(':', $onlyABC->sort( qw/ ABA BAC cc A Ab cAc aB / ) ), 8791f110e0Safresh1 join(':', qw/ A aB Ab ABA BAC cAc cc / ), 8891f110e0Safresh1 ); 8991f110e0Safresh1} 9091f110e0Safresh1 9191f110e0Safresh1# 17 9291f110e0Safresh1 9391f110e0Safresh1{ 9491f110e0Safresh1 my $few_entries = Unicode::Collate->new( 9591f110e0Safresh1 entry => <<'ENTRIES', 9691f110e0Safresh10050 ; [.0101.0020.0002.0050] # P 9791f110e0Safresh10045 ; [.0102.0020.0002.0045] # E 9891f110e0Safresh10052 ; [.0103.0020.0002.0052] # R 9991f110e0Safresh1004C ; [.0104.0020.0002.004C] # L 10091f110e0Safresh11100 ; [.0105.0020.0002.1100] # Hangul Jamo initial G 10191f110e0Safresh11175 ; [.0106.0020.0002.1175] # Hangul Jamo middle I 10291f110e0Safresh15B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter" 10391f110e0Safresh1ENTRIES 10491f110e0Safresh1 table => undef, 10591f110e0Safresh1 normalization => undef, 10691f110e0Safresh1 ); 10791f110e0Safresh1 # defined before undefined 10891f110e0Safresh1 my $sortABC = join '', 10991f110e0Safresh1 $few_entries->sort(split //, "ABCDEFGHIJKLMNOPQRSTUVWXYZ "); 11091f110e0Safresh1 11191f110e0Safresh1 ok($sortABC eq "PERL ABCDFGHIJKMNOQSTUVWXYZ"); 11291f110e0Safresh1 11391f110e0Safresh1 ok($few_entries->lt('E', 'D')); 11491f110e0Safresh1 ok($few_entries->lt("\x{5B57}", "\x{4E00}")); 11591f110e0Safresh1 ok($few_entries->lt("\x{AE30}", "\x{AC00}")); 11691f110e0Safresh1 11791f110e0Safresh1 # Hangul must be decomposed. 11891f110e0Safresh1 ok($few_entries->eq("\x{AC00}", "\x{1100}\x{1161}")); 11991f110e0Safresh1} 12091f110e0Safresh1 12191f110e0Safresh1# 22 12291f110e0Safresh1 12391f110e0Safresh1{ 12491f110e0Safresh1 my $highestNUL = Unicode::Collate->new( 12591f110e0Safresh1 table => undef, 12691f110e0Safresh1 normalization => undef, 12791f110e0Safresh1 level => 1, 12891f110e0Safresh1 entry => '0000 ; [.FFFE.0020.0005.0000]', 12991f110e0Safresh1 ); 13091f110e0Safresh1 13191f110e0Safresh1 for my $v ('', 8, 9, 11, 14) { 13291f110e0Safresh1 $highestNUL->change(UCA_Version => $v) if $v; 13391f110e0Safresh1 ok($highestNUL->lt("abc\x{4E00}", "abc\0")); 13491f110e0Safresh1 ok($highestNUL->lt("abc\x{E0000}","abc\0")); 13591f110e0Safresh1 } 13691f110e0Safresh1} 13791f110e0Safresh1 13891f110e0Safresh1# 32 139