1 2BEGIN { 3 unless ("A" eq pack('U', 0x41)) { 4 print "1..0 # Unicode::Collate " . 5 "cannot stringify a Unicode code point\n"; 6 exit 0; 7 } 8 if ($ENV{PERL_CORE}) { 9 chdir('t') if -d 't'; 10 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 11 } 12} 13 14use strict; 15use warnings; 16BEGIN { $| = 1; print "1..17\n"; } 17my $count = 0; 18sub ok ($;$) { 19 my $p = my $r = shift; 20 if (@_) { 21 my $x = shift; 22 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 23 } 24 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 25} 26 27use Unicode::Collate; 28 29ok(1); 30 31######################### 32 33my $code = sub { 34 my $line = shift; 35 $line =~ s/\[\.0000\..{4}\..{4}\./[.0000.0000.0000./g; 36 return $line; 37 }; 38 39##### 40 41my $Collator = Unicode::Collate->new( 42 table => 'keys.txt', normalization => undef, rewrite => $code, 43); 44 45ok($Collator->eq("camel", "came\x{300}l")); 46ok($Collator->eq("camel", "ca\x{300}me\x{301}l")); 47ok($Collator->lt("camel", "Camel")); 48{ 49 my $s = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l."; 50 $Collator->gsubst($s, "camel", sub { "=$_[0]=" }); 51 ok($s, "Camel donkey zebra =came\x{301}l= CAMEL horse =cam\0e\0l=."); 52} 53 54# 5 55 56my $rewriteDUCET = Unicode::Collate->new( 57 normalization => undef, rewrite => $code, 58); 59 60ok($rewriteDUCET->eq("camel", "came\x{300}l")); 61ok($rewriteDUCET->eq("camel", "ca\x{300}me\x{301}l")); 62ok($rewriteDUCET->lt("camel", "Camel")); 63{ 64 my $s = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l."; 65 $rewriteDUCET->gsubst($s, "camel", sub { "=$_[0]=" }); 66 ok($s, "Camel donkey zebra =came\x{301}l= CAMEL horse =cam\0e\0l=."); 67} 68 69# 9 70 71my $undef_hira = Unicode::Collate->new( 72 table => 'keys.txt', 73 normalization => undef, 74 level => 1, 75 rewrite => sub { 76 my $line = shift; 77 return '' if $line =~ /HIRAGANA/; 78 return $line; 79 }, 80); 81 82my $hiragana = "\x{3042}\x{3044}"; 83my $katakana = "\x{30A2}\x{30A4}"; 84my $cjkkanji = "\x{4E00}"; 85 86# HIRAGANA are undefined via rewrite 87# So they are after CJK Unified Ideographs. 88 89ok($undef_hira->lt("abc", "perl")); 90ok($undef_hira->lt("", "ABC")); 91ok($undef_hira->lt($katakana, $hiragana)); 92ok($undef_hira->lt($katakana, $cjkkanji)); 93ok($undef_hira->lt($cjkkanji, $hiragana)); 94 95$Collator->change(level => 1); 96ok($Collator->eq($katakana, $hiragana)); 97ok($Collator->lt($katakana, $cjkkanji)); 98ok($Collator->gt($cjkkanji, $hiragana)); 99 100# 17 101