1BEGIN { 2 unless ("A" eq pack('U', 0x41)) { 3 print "1..0 # Unicode::Collate " . 4 "cannot stringify a Unicode code point\n"; 5 exit 0; 6 } 7 if ($ENV{PERL_CORE}) { 8 chdir('t') if -d 't'; 9 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 10 } 11} 12 13use Test; 14BEGIN { plan tests => 76 }; 15 16use strict; 17use warnings; 18use Unicode::Collate; 19 20ok(1); 21 22##### 2..6 23 24my $all_undef_8 = Unicode::Collate->new( 25 table => undef, 26 normalization => undef, 27 overrideCJK => undef, 28 overrideHangul => undef, 29 UCA_Version => 8, 30); 31 32# All in the Unicode code point order. 33# No hangul decomposition. 34 35ok($all_undef_8->lt("\x{3402}", "\x{4E00}")); 36ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}")); 37ok($all_undef_8->lt("\x{4E00}", "\x{AC00}")); 38ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}")); 39ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}")); 40 41 42##### 7..11 43 44my $all_undef_9 = Unicode::Collate->new( 45 table => undef, 46 normalization => undef, 47 overrideCJK => undef, 48 overrideHangul => undef, 49 UCA_Version => 9, 50); 51 52# CJK Ideo. < CJK ext A/B < Others. 53# No hangul decomposition. 54 55ok($all_undef_9->lt("\x{4E00}", "\x{3402}")); 56ok($all_undef_9->lt("\x{3402}", "\x{20000}")); 57ok($all_undef_9->lt("\x{20000}", "\x{AC00}")); 58ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}")); 59ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}")); # U+ABFF: not assigned 60 61##### 12..16 62 63my $ignoreHangul = Unicode::Collate->new( 64 table => undef, 65 normalization => undef, 66 overrideHangul => sub {()}, 67 entry => <<'ENTRIES', 68AE00 ; [.0100.0020.0002.AE00] # Hangul GEUL 69ENTRIES 70); 71 72# All Hangul Syllables except U+AE00 are ignored. 73 74ok($ignoreHangul->eq("\x{AC00}", "")); 75ok($ignoreHangul->lt("\x{AC00}", "\0")); 76ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}")); 77ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored. 78ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned. 79 80 81my $ignoreCJK = Unicode::Collate->new( 82 table => undef, 83 normalization => undef, 84 overrideCJK => sub {()}, 85 entry => <<'ENTRIES', 865B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter" 87ENTRIES 88); 89 90# All CJK Unified Ideographs except U+5B57 are ignored. 91 92##### 17..21 93ok($ignoreCJK->eq("\x{4E00}", "")); 94ok($ignoreCJK->lt("\x{4E00}", "\0")); 95ok($ignoreCJK->eq("Pe\x{4E00}rl", "Perl")); # U+4E00 is a CJK. 96ok($ignoreCJK->gt("\x{4DFF}", "\x{4E00}")); # U+4DFF is not CJK. 97ok($ignoreCJK->lt("Pe\x{5B57}rl", "Perl")); # 'r' is unassigned. 98 99##### 22..29 100ok($ignoreCJK->eq("\x{3400}", "")); 101ok($ignoreCJK->eq("\x{4DB5}", "")); 102ok($ignoreCJK->eq("\x{9FA5}", "")); 103ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0 104ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0 105ok($ignoreCJK->gt("\x{9FBC}", "Perl")); 106ok($ignoreCJK->eq("\x{20000}", "")); 107ok($ignoreCJK->eq("\x{2A6D6}", "")); 108 109##### 30..37 110$ignoreCJK->change(UCA_Version => 9); 111ok($ignoreCJK->eq("\x{3400}", "")); 112ok($ignoreCJK->eq("\x{4DB5}", "")); 113ok($ignoreCJK->eq("\x{9FA5}", "")); 114ok($ignoreCJK->gt("\x{9FA6}", "Perl")); 115ok($ignoreCJK->gt("\x{9FBB}", "Perl")); 116ok($ignoreCJK->gt("\x{9FBC}", "Perl")); 117ok($ignoreCJK->eq("\x{20000}", "")); 118ok($ignoreCJK->eq("\x{2A6D6}", "")); 119 120##### 38..45 121$ignoreCJK->change(UCA_Version => 8); 122ok($ignoreCJK->eq("\x{3400}", "")); 123ok($ignoreCJK->eq("\x{4DB5}", "")); 124ok($ignoreCJK->eq("\x{9FA5}", "")); 125ok($ignoreCJK->gt("\x{9FA6}", "Perl")); 126ok($ignoreCJK->gt("\x{9FBB}", "Perl")); 127ok($ignoreCJK->gt("\x{9FBC}", "Perl")); 128ok($ignoreCJK->eq("\x{20000}", "")); 129ok($ignoreCJK->eq("\x{2A6D6}", "")); 130 131##### 46..53 132$ignoreCJK->change(UCA_Version => 14); 133ok($ignoreCJK->eq("\x{3400}", "")); 134ok($ignoreCJK->eq("\x{4DB5}", "")); 135ok($ignoreCJK->eq("\x{9FA5}", "")); 136ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0 137ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0 138ok($ignoreCJK->gt("\x{9FBC}", "Perl")); 139ok($ignoreCJK->eq("\x{20000}", "")); 140ok($ignoreCJK->eq("\x{2A6D6}", "")); 141 142##### 54..76 143my $overCJK = Unicode::Collate->new( 144 table => undef, 145 normalization => undef, 146 entry => <<'ENTRIES', 1470061 ; [.0101.0020.0002.0061] # latin a 1480041 ; [.0101.0020.0008.0041] # LATIN A 1494E00 ; [.B1FC.0030.0004.4E00] # Ideograph; B1FC = FFFF - 4E03. 150ENTRIES 151 overrideCJK => sub { 152 my $u = 0xFFFF - $_[0]; # reversed 153 [$u, 0x20, 0x2, $u]; 154 }, 155); 156 157ok($overCJK->lt("a", "A")); # diff. at level 3. 158ok($overCJK->lt( "\x{4E03}", "\x{4E00}")); # diff. at level 2. 159ok($overCJK->lt("A\x{4E03}", "A\x{4E00}")); 160ok($overCJK->lt("A\x{4E03}", "a\x{4E00}")); 161ok($overCJK->lt("a\x{4E03}", "A\x{4E00}")); 162 163ok($overCJK->gt("a\x{3400}", "A\x{4DB5}")); 164ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}")); 165ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}")); 166ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}")); 167ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}")); 168ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}")); 169 170$overCJK->change(UCA_Version => 9); 171 172ok($overCJK->gt("a\x{3400}", "A\x{4DB5}")); 173ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}")); 174ok($overCJK->lt("a\x{9FA5}", "A\x{9FA6}")); 175ok($overCJK->lt("a\x{9FA6}", "A\x{9FBB}")); 176ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}")); 177ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}")); 178 179$overCJK->change(UCA_Version => 14); 180 181ok($overCJK->gt("a\x{3400}", "A\x{4DB5}")); 182ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}")); 183ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}")); 184ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}")); 185ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}")); 186ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}")); 187 188