1 2BEGIN { 3 unless ("A" eq pack('U', 0x41)) { 4 print "1..0 # Unicode::Collate " . 5 "cannot stringify a Unicode code point\n"; 6 exit 0; 7 } 8} 9 10BEGIN { 11 if ($ENV{PERL_CORE}) { 12 chdir('t') if -d 't'; 13 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 14 } 15} 16 17use Test; 18BEGIN { plan tests => 160 }; 19 20use strict; 21use warnings; 22use Unicode::Collate; 23 24ok(1); 25 26##### 2..6 27 28my $Collator = Unicode::Collate->new( 29 table => 'keys.txt', 30 normalization => undef, 31); 32 33ok(ref $Collator, "Unicode::Collate"); 34 35 36ok( 37 join(':', $Collator->sort( 38 qw/ lib strict Carp ExtUtils CGI Time warnings Math overload Pod CPAN / 39 ) ), 40 join(':', 41 qw/ Carp CGI CPAN ExtUtils lib Math overload Pod strict Time warnings / 42 ), 43); 44 45ok($Collator->cmp("", ""), 0); 46ok($Collator->eq("", "")); 47ok($Collator->cmp("", "perl"), -1); 48 49##### 7..17 50 51sub _pack_U { Unicode::Collate::pack_U(@_) } 52sub _unpack_U { Unicode::Collate::unpack_U(@_) } 53 54my $A_acute = _pack_U(0xC1); 55my $a_acute = _pack_U(0xE1); 56my $acute = _pack_U(0x0301); 57 58ok($Collator->cmp("A$acute", $A_acute), 0); # @version 3.1.1 (prev: -1) 59ok($Collator->cmp($a_acute, $A_acute), -1); 60ok($Collator->eq("A\cA$acute", $A_acute)); # UCA v9. \cA is invariant. 61 62my %old_level = $Collator->change(level => 1); 63ok($Collator->eq("A$acute", $A_acute)); 64ok($Collator->eq("A", $A_acute)); 65 66ok($Collator->change(level => 2)->eq($a_acute, $A_acute)); 67ok($Collator->lt("A", $A_acute)); 68 69ok($Collator->change(%old_level)->lt("A", $A_acute)); 70ok($Collator->lt("A", $A_acute)); 71ok($Collator->lt("A", $a_acute)); 72ok($Collator->lt($a_acute, $A_acute)); 73 74##### 18..20 75 76eval { require Unicode::Normalize }; 77if (!$@) { 78 my $NFD = Unicode::Collate->new( 79 table => 'keys.txt', 80 level => 1, 81 entry => <<'ENTRIES', 820430 ; [.0CB5.0020.0002.0430] # CYRILLIC SMALL LETTER A 830410 ; [.0CB5.0020.0008.0410] # CYRILLIC CAPITAL LETTER A 8404D3 ; [.0CBD.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS 850430 0308 ; [.0CBD.0020.0002.04D3] # CYRILLIC SMALL LETTER A WITH DIAERESIS 8604D2 ; [.0CBD.0020.0008.04D2] # CYRILLIC CAPITAL LETTER A WITH DIAERESIS 870410 0308 ; [.0CBD.0020.0008.04D2] # CYRILLIC CAPITAL LETTER A WITH DIAERESIS 880430 3099 ; [.0CBE.0020.0002.04D3] # A WITH KATAKANA VOICED 890430 3099 0308 ; [.0CBF.0020.0002.04D3] # A WITH KATAKANA VOICED, DIAERESIS 90ENTRIES 91 ); 92 ok($NFD->eq("\x{4D3}\x{325}", "\x{430}\x{308}\x{325}")); 93 ok($NFD->lt("\x{430}\x{308}A", "\x{430}\x{308}B")); 94 ok($NFD->lt("\x{430}\x{3099}B", "\x{430}\x{308}\x{3099}A")); 95} 96else { 97 ok(1); 98 ok(1); 99 ok(1); 100} 101 102##### 21..34 103 104my $trad = Unicode::Collate->new( 105 table => 'keys.txt', 106 normalization => undef, 107 ignoreName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/, 108 level => 3, 109 entry => << 'ENTRIES', 110 0063 0068 ; [.0A3F.0020.0002.0063] % "ch" in traditional Spanish 111 0043 0068 ; [.0A3F.0020.0008.0043] # "Ch" in traditional Spanish 112ENTRIES 113); 114# 0063 ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C 115# 0064 ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D 116# Deutsch sz is included in 'keys.txt'; 117 118ok( 119 join(':', $trad->sort( qw/ acha aca ada acia acka / ) ), 120 join(':', qw/ aca acia acka acha ada / ), 121); 122 123ok( 124 join(':', $Collator->sort( qw/ acha aca ada acia acka / ) ), 125 join(':', qw/ aca acha acia acka ada / ), 126); 127 128ok($trad->eq("ocho", "oc\cAho")); # UCA v9 129ok($trad->eq("ocho", "oc\0\cA\0\cBho")); # UCA v9 130ok($trad->eq("-", "")); 131ok($trad->gt("ocho", "oc-ho")); 132 133$trad->change(UCA_Version => 8); 134 135ok($trad->gt("ocho", "oc\cAho")); 136ok($trad->gt("ocho", "oc\0\cA\0\cBho")); 137ok($trad->eq("-", "")); 138ok($trad->gt("ocho", "oc-ho")); 139 140$trad->change(UCA_Version => 9); 141 142my $hiragana = "\x{3042}\x{3044}"; 143my $katakana = "\x{30A2}\x{30A4}"; 144 145# HIRAGANA and KATAKANA are ignorable via ignoreName 146ok($trad->eq($hiragana, "")); 147ok($trad->eq("", $katakana)); 148ok($trad->eq($hiragana, $katakana)); 149ok($trad->eq($katakana, $hiragana)); 150 151##### 35..41 152 153$Collator->change(level => 2); 154 155ok($Collator->{level}, 2); 156 157ok( $Collator->cmp("ABC","abc"), 0); 158ok( $Collator->eq("ABC","abc") ); 159ok( $Collator->le("ABC","abc") ); 160ok( $Collator->cmp($hiragana, $katakana), 0); 161ok( $Collator->eq($hiragana, $katakana) ); 162ok( $Collator->ge($hiragana, $katakana) ); 163 164##### 42..47 165 166# hangul 167ok( $Collator->eq("a\x{AC00}b", "a\x{1100}\x{1161}b") ); 168ok( $Collator->eq("a\x{AE00}b", "a\x{1100}\x{1173}\x{11AF}b") ); 169ok( $Collator->gt("a\x{AE00}b", "a\x{1100}\x{1173}b\x{11AF}") ); 170ok( $Collator->lt("a\x{AC00}b", "a\x{AE00}b") ); 171ok( $Collator->gt("a\x{D7A3}b", "a\x{C544}b") ); 172ok( $Collator->lt("a\x{C544}b", "a\x{30A2}b") ); # hangul < hiragana 173 174##### 48..56 175 176$Collator->change(%old_level, katakana_before_hiragana => 1); 177 178ok($Collator->{level}, 4); 179 180ok( $Collator->cmp("abc", "ABC"), -1); 181ok( $Collator->ne("abc", "ABC") ); 182ok( $Collator->lt("abc", "ABC") ); 183ok( $Collator->le("abc", "ABC") ); 184ok( $Collator->cmp($hiragana, $katakana), 1); 185ok( $Collator->ne($hiragana, $katakana) ); 186ok( $Collator->gt($hiragana, $katakana) ); 187ok( $Collator->ge($hiragana, $katakana) ); 188 189##### 57..62 190 191$Collator->change(upper_before_lower => 1); 192 193ok( $Collator->cmp("abc", "ABC"), 1); 194ok( $Collator->ge("abc", "ABC"), 1); 195ok( $Collator->gt("abc", "ABC"), 1); 196ok( $Collator->cmp($hiragana, $katakana), 1); 197ok( $Collator->ge($hiragana, $katakana), 1); 198ok( $Collator->gt($hiragana, $katakana), 1); 199 200##### 63..68 201 202$Collator->change(katakana_before_hiragana => 0); 203 204ok( $Collator->cmp("abc", "ABC"), 1); 205ok( $Collator->cmp($hiragana, $katakana), -1); 206 207$Collator->change(upper_before_lower => 0); 208 209ok( $Collator->cmp("abc", "ABC"), -1); 210ok( $Collator->le("abc", "ABC") ); 211ok( $Collator->cmp($hiragana, $katakana), -1); 212ok( $Collator->lt($hiragana, $katakana) ); 213 214##### 69..70 215 216my $ignoreAE = Unicode::Collate->new( 217 table => 'keys.txt', 218 normalization => undef, 219 ignoreChar => qr/^[aAeE]$/, 220); 221 222ok($ignoreAE->eq("element","lament")); 223ok($ignoreAE->eq("Perl","ePrl")); 224 225##### 71 226 227my $onlyABC = Unicode::Collate->new( 228 table => undef, 229 normalization => undef, 230 entry => << 'ENTRIES', 2310061 ; [.0101.0020.0002.0061] # LATIN SMALL LETTER A 2320041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A 2330062 ; [.0102.0020.0002.0062] # LATIN SMALL LETTER B 2340042 ; [.0102.0020.0008.0042] # LATIN CAPITAL LETTER B 2350063 ; [.0103.0020.0002.0063] # LATIN SMALL LETTER C 2360043 ; [.0103.0020.0008.0043] # LATIN CAPITAL LETTER C 237ENTRIES 238); 239 240ok( 241 join(':', $onlyABC->sort( qw/ ABA BAC cc A Ab cAc aB / ) ), 242 join(':', qw/ A aB Ab ABA BAC cAc cc / ), 243); 244 245##### 72..75 246 247my $undefAE = Unicode::Collate->new( 248 table => 'keys.txt', 249 normalization => undef, 250 undefChar => qr/^[aAeE]$/, 251); 252 253ok($undefAE ->gt("edge","fog")); 254ok($Collator->lt("edge","fog")); 255ok($undefAE ->gt("lake","like")); 256ok($Collator->lt("lake","like")); 257 258##### 76..85 259 260# Table is undefined, then no entry is defined. 261 262my $undef_table = Unicode::Collate->new( 263 table => undef, 264 normalization => undef, 265 level => 1, 266); 267 268# in the Unicode code point order 269ok($undef_table->lt('', 'A')); 270ok($undef_table->lt('ABC', 'B')); 271 272# Hangul should be decomposed (even w/o Unicode::Normalize). 273 274ok($undef_table->lt("Perl", "\x{AC00}")); 275ok($undef_table->eq("\x{AC00}", "\x{1100}\x{1161}")); 276ok($undef_table->eq("\x{AE00}", "\x{1100}\x{1173}\x{11AF}")); 277ok($undef_table->lt("\x{AE00}", "\x{3042}")); 278 # U+AC00: Hangul GA 279 # U+AE00: Hangul GEUL 280 # U+3042: Hiragana A 281 282# Weight for CJK Ideographs is defined, though. 283 284ok($undef_table->lt("", "\x{4E00}")); 285ok($undef_table->lt("\x{4E8C}","ABC")); 286ok($undef_table->lt("\x{4E00}","\x{3042}")); 287ok($undef_table->lt("\x{4E00}","\x{4E8C}")); 288 # U+4E00: Ideograph "ONE" 289 # U+4E8C: Ideograph "TWO" 290 291 292##### 86..90 293 294my $few_entries = Unicode::Collate->new( 295 entry => <<'ENTRIES', 2960050 ; [.0101.0020.0002.0050] # P 2970045 ; [.0102.0020.0002.0045] # E 2980052 ; [.0103.0020.0002.0052] # R 299004C ; [.0104.0020.0002.004C] # L 3001100 ; [.0105.0020.0002.1100] # Hangul Jamo initial G 3011175 ; [.0106.0020.0002.1175] # Hangul Jamo middle I 3025B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter" 303ENTRIES 304 table => undef, 305 normalization => undef, 306); 307 308# defined before undefined 309 310my $sortABC = join '', 311 $few_entries->sort(split //, "ABCDEFGHIJKLMNOPQRSTUVWXYZ "); 312 313ok($sortABC eq "PERL ABCDFGHIJKMNOQSTUVWXYZ"); 314 315ok($few_entries->lt('E', 'D')); 316ok($few_entries->lt("\x{5B57}", "\x{4E00}")); 317ok($few_entries->lt("\x{AE30}", "\x{AC00}")); 318 319# Hangul must be decomposed. 320 321ok($few_entries->eq("\x{AC00}", "\x{1100}\x{1161}")); 322 323##### 91..95 324 325my $all_undef_8 = Unicode::Collate->new( 326 table => undef, 327 normalization => undef, 328 overrideCJK => undef, 329 overrideHangul => undef, 330 UCA_Version => 8, 331); 332 333# All in the Unicode code point order. 334# No hangul decomposition. 335 336ok($all_undef_8->lt("\x{3402}", "\x{4E00}")); 337ok($all_undef_8->lt("\x{4DFF}", "\x{4E00}")); 338ok($all_undef_8->lt("\x{4E00}", "\x{AC00}")); 339ok($all_undef_8->gt("\x{AC00}", "\x{1100}\x{1161}")); 340ok($all_undef_8->gt("\x{AC00}", "\x{ABFF}")); 341 342##### 96..100 343 344my $all_undef_9 = Unicode::Collate->new( 345 table => undef, 346 normalization => undef, 347 overrideCJK => undef, 348 overrideHangul => undef, 349 UCA_Version => 9, 350); 351 352# CJK Ideo. < CJK ext A/B < Others. 353# No hangul decomposition. 354 355ok($all_undef_9->lt("\x{4E00}", "\x{3402}")); 356ok($all_undef_9->lt("\x{3402}", "\x{20000}")); 357ok($all_undef_9->lt("\x{20000}", "\x{AC00}")); 358ok($all_undef_9->gt("\x{AC00}", "\x{1100}\x{1161}")); 359ok($all_undef_9->gt("\x{AC00}", "\x{ABFF}")); # U+ABFF: not assigned 360 361##### 101..105 362 363my $ignoreCJK = Unicode::Collate->new( 364 table => undef, 365 normalization => undef, 366 overrideCJK => sub {()}, 367 entry => <<'ENTRIES', 3685B57 ; [.0107.0020.0002.5B57] # CJK Ideograph "Letter" 369ENTRIES 370); 371 372# All CJK Unified Ideographs except U+5B57 are ignored. 373 374ok($ignoreCJK->eq("\x{4E00}", "")); 375ok($ignoreCJK->lt("\x{4E00}", "\0")); 376ok($ignoreCJK->eq("Pe\x{4E00}rl", "Perl")); # U+4E00 is a CJK. 377ok($ignoreCJK->gt("\x{4DFF}", "\x{4E00}")); # U+4DFF is not CJK. 378ok($ignoreCJK->lt("Pe\x{5B57}rl", "Perl")); # 'r' is unassigned. 379 380##### 106..110 381 382my $ignoreHangul = Unicode::Collate->new( 383 table => undef, 384 normalization => undef, 385 overrideHangul => sub {()}, 386 entry => <<'ENTRIES', 387AE00 ; [.0100.0020.0002.AE00] # Hangul GEUL 388ENTRIES 389); 390 391# All Hangul Syllables except U+AE00 are ignored. 392 393ok($ignoreHangul->eq("\x{AC00}", "")); 394ok($ignoreHangul->lt("\x{AC00}", "\0")); 395ok($ignoreHangul->lt("\x{AC00}", "\x{AE00}")); 396ok($ignoreHangul->lt("\x{AC00}", "\x{1100}\x{1161}")); # Jamo are not ignored. 397ok($ignoreHangul->lt("Pe\x{AE00}rl", "Perl")); # 'r' is unassigned. 398 399##### 111..115 400 401my $overCJK = Unicode::Collate->new( 402 table => undef, 403 normalization => undef, 404 entry => <<'ENTRIES', 4050061 ; [.0101.0020.0002.0061] # latin a 4060041 ; [.0101.0020.0008.0041] # LATIN A 4074E00 ; [.B1FC.0030.0004.4E00] # Ideograph; B1FC = FFFF - 4E03. 408ENTRIES 409 overrideCJK => sub { 410 my $u = 0xFFFF - $_[0]; # reversed 411 [$u, 0x20, 0x2, $u]; 412 }, 413); 414 415ok($overCJK->lt("a", "A")); # diff. at level 3. 416ok($overCJK->lt( "\x{4E03}", "\x{4E00}")); # diff. at level 2. 417ok($overCJK->lt("A\x{4E03}", "A\x{4E00}")); 418ok($overCJK->lt("A\x{4E03}", "a\x{4E00}")); 419ok($overCJK->lt("a\x{4E03}", "A\x{4E00}")); 420 421##### 116..120 422 423my $dropArticles = Unicode::Collate->new( 424 table => "keys.txt", 425 normalization => undef, 426 preprocess => sub { 427 my $string = shift; 428 $string =~ s/\b(?:an?|the)\s+//ig; 429 $string; 430 }, 431); 432 433ok($dropArticles->eq("camel", "a camel")); 434ok($dropArticles->eq("Perl", "The Perl")); 435ok($dropArticles->lt("the pen", "a pencil")); 436ok($Collator->lt("Perl", "The Perl")); 437ok($Collator->gt("the pen", "a pencil")); 438 439##### 121..122 440 441my $backLevel1 = Unicode::Collate->new( 442 table => undef, 443 normalization => undef, 444 backwards => [ 1 ], 445); 446 447# all strings are reversed at level 1. 448 449ok($backLevel1->gt("AB", "BA")); 450ok($backLevel1->gt("\x{3042}\x{3044}", "\x{3044}\x{3042}")); 451 452##### 123..130 453 454my $backLevel2 = Unicode::Collate->new( 455 table => "keys.txt", 456 normalization => undef, 457 undefName => qr/HANGUL|HIRAGANA|KATAKANA|BOPOMOFO/, 458 backwards => 2, 459); 460 461ok($backLevel2->gt("Ca\x{300}ca\x{302}", "ca\x{302}ca\x{300}")); 462ok($backLevel2->gt("ca\x{300}ca\x{302}", "Ca\x{302}ca\x{300}")); 463ok($Collator ->lt("Ca\x{300}ca\x{302}", "ca\x{302}ca\x{300}")); 464ok($Collator ->lt("ca\x{300}ca\x{302}", "Ca\x{302}ca\x{300}")); 465 466# HIRAGANA and KATAKANA are made undefined via undefName. 467# So they are after CJK Unified Ideographs. 468 469ok($backLevel2->lt("\x{4E00}", $hiragana)); 470ok($backLevel2->lt("\x{4E03}", $katakana)); 471ok($Collator ->gt("\x{4E00}", $hiragana)); 472ok($Collator ->gt("\x{4E03}", $katakana)); 473 474##### 131..142 475 476# According to Conformance Test, 477# a L3-ignorable is treated as a completely ignorable. 478 479my $L3ignorable = Unicode::Collate->new( 480 alternate => 'Non-ignorable', 481 level => 3, 482 table => undef, 483 normalization => undef, 484 entry => <<'ENTRIES', 4850000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429) 4860001 ; [.0000.0000.0000.0000] # [0001] START OF HEADING (in 6429) 4870591 ; [.0000.0000.0000.0591] # HEBREW ACCENT ETNAHTA 4881D165 ; [.0000.0000.0000.1D165] # MUSICAL SYMBOL COMBINING STEM 4890021 ; [*024B.0020.0002.0021] # EXCLAMATION MARK 49009BE ; [.114E.0020.0002.09BE] # BENGALI VOWEL SIGN AA 49109C7 ; [.1157.0020.0002.09C7] # BENGALI VOWEL SIGN E 49209CB ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O 49309C7 09BE ; [.1159.0020.0002.09CB] # BENGALI VOWEL SIGN O 4941D1B9 ; [*098A.0020.0002.1D1B9] # MUSICAL SYMBOL SEMIBREVIS WHITE 4951D1BA ; [*098B.0020.0002.1D1BA] # MUSICAL SYMBOL SEMIBREVIS BLACK 4961D1BB ; [*098A.0020.0002.1D1B9][.0000.0000.0000.1D165] # M.S. MINIMA 4971D1BC ; [*098B.0020.0002.1D1BA][.0000.0000.0000.1D165] # M.S. MINIMA BLACK 498ENTRIES 499); 500 501ok($L3ignorable->lt("\cA", "!")); 502ok($L3ignorable->lt("\x{591}", "!")); 503ok($L3ignorable->eq("\cA", "\x{591}")); 504ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\cA\x{09BE}A")); 505ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{0591}\x{09BE}A")); 506ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09C7}\x{1D165}\x{09BE}A")); 507ok($L3ignorable->eq("\x{09C7}\x{09BE}A", "\x{09CB}A")); 508ok($L3ignorable->lt("\x{1D1BB}", "\x{1D1BC}")); 509ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}")); 510ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}")); 511ok($L3ignorable->eq("\x{1D1BB}", "\x{1D1B9}\x{1D165}")); 512ok($L3ignorable->eq("\x{1D1BC}", "\x{1D1BA}\x{1D165}")); 513 514##### 143..149 515 516my $O_str = Unicode::Collate->new( 517 table => "keys.txt", 518 normalization => undef, 519 entry => <<'ENTRIES', 5200008 ; [*0008.0000.0000.0000] # BACKSPACE (need to be non-ignorable) 521004F 0337 ; [.0B53.0020.0008.004F] # capital O WITH SHORT SOLIDUS OVERLAY 522006F 0008 002F ; [.0B53.0020.0002.006F] # LATIN SMALL LETTER O WITH STROKE 523004F 0008 002F ; [.0B53.0020.0008.004F] # LATIN CAPITAL LETTER O WITH STROKE 524006F 0337 ; [.0B53.0020.0002.004F] # small O WITH SHORT SOLIDUS OVERLAY 525200B ; [.2000.0000.0000.0000] # ZERO WIDTH SPACE (may be non-sense but ...) 526#00F8 ; [.0B53.0020.0002.00F8] # LATIN SMALL LETTER O WITH STROKE 527#00D8 ; [.0B53.0020.0008.00D8] # LATIN CAPITAL LETTER O WITH STROKE 528ENTRIES 529); 530 531my $o_BS_slash = _pack_U(0x006F, 0x0008, 0x002F); 532my $O_BS_slash = _pack_U(0x004F, 0x0008, 0x002F); 533my $o_sol = _pack_U(0x006F, 0x0337); 534my $O_sol = _pack_U(0x004F, 0x0337); 535my $o_stroke = _pack_U(0x00F8); 536my $O_stroke = _pack_U(0x00D8); 537 538ok($O_str->eq($o_stroke, $o_BS_slash)); 539ok($O_str->eq($O_stroke, $O_BS_slash)); 540 541ok($O_str->eq($o_stroke, $o_sol)); 542ok($O_str->eq($O_stroke, $O_sol)); 543 544ok($Collator->eq("\x{200B}", "\0")); 545ok($O_str ->gt("\x{200B}", "\0")); 546ok($O_str ->gt("\x{200B}", "A")); 547 548##### 150..159 549 550my %origVer = $Collator->change(UCA_Version => 8); 551 552$Collator->change(level => 3); 553 554ok($Collator->gt("!\x{300}", "")); 555ok($Collator->gt("!\x{300}", "!")); 556ok($Collator->eq("!\x{300}", "\x{300}")); 557 558$Collator->change(level => 2); 559 560ok($Collator->eq("!\x{300}", "\x{300}")); 561 562$Collator->change(level => 4); 563 564ok($Collator->gt("!\x{300}", "!")); 565ok($Collator->lt("!\x{300}", "\x{300}")); 566 567$Collator->change(%origVer, level => 3); 568 569ok($Collator->eq("!\x{300}", "")); 570ok($Collator->eq("!\x{300}", "!")); 571ok($Collator->lt("!\x{300}", "\x{300}")); 572 573$Collator->change(level => 4); 574 575ok($Collator->gt("!\x{300}", "")); 576ok($Collator->eq("!\x{300}", "!")); 577 578##### 579 580