1 2BEGIN { 3 unless ('A' eq pack('U', 0x41)) { 4 print "1..0 # Unicode::Collate cannot pack a Unicode code point\n"; 5 exit 0; 6 } 7 unless (0x41 == unpack('U', 'A')) { 8 print "1..0 # Unicode::Collate cannot get a Unicode code point\n"; 9 exit 0; 10 } 11 if ($ENV{PERL_CORE}) { 12 chdir('t') if -d 't'; 13 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 14 } 15} 16 17use strict; 18use warnings; 19BEGIN { $| = 1; print "1..91\n"; } 20my $count = 0; 21sub ok ($;$) { 22 my $p = my $r = shift; 23 if (@_) { 24 my $x = shift; 25 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 26 } 27 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 28} 29 30use Unicode::Collate; 31 32ok(1); 33 34our $IsEBCDIC = ord("A") != 0x41; 35 36my $Collator = Unicode::Collate->new( 37 table => 'keys.txt', 38 normalization => undef, 39); 40 41##### 1 42 43my %old_level = $Collator->change(level => 2); 44 45my $str; 46 47my $orig = "This is a Perl book."; 48my $sub = "PERL"; 49my $rep = "camel"; 50my $ret = "This is a camel book."; 51 52$str = $orig; 53if (my($pos,$len) = $Collator->index($str, $sub)) { 54 substr($str, $pos, $len, $rep); 55} 56 57ok($str, $ret); 58 59$Collator->change(%old_level); 60 61$str = $orig; 62if (my($pos,$len) = $Collator->index($str, $sub)) { 63 substr($str, $pos, $len, $rep); 64} 65 66ok($str, $orig); 67 68##### 3 69 70my $match; 71 72$Collator->change(level => 1); 73 74$str = "Pe\x{300}rl"; 75$sub = "pe"; 76$ret = "Pe\x{300}"; 77$match = undef; 78if (my($pos, $len) = $Collator->index($str, $sub)) { 79 $match = substr($str, $pos, $len); 80} 81ok($match, $ret); 82 83$str = "P\x{300}e\x{300}\x{301}\x{303}rl"; 84$sub = "pE"; 85$ret = "P\x{300}e\x{300}\x{301}\x{303}"; 86$match = undef; 87if (my($pos, $len) = $Collator->index($str, $sub)) { 88 $match = substr($str, $pos, $len); 89} 90ok($match, $ret); 91 92$Collator->change(level => 2); 93 94$str = "Pe\x{300}rl"; 95$sub = "pe"; 96$ret = undef; 97$match = undef; 98if (my($pos, $len) = $Collator->index($str, $sub)) { 99 $match = substr($str, $pos, $len); 100} 101ok($match, $ret); 102 103$str = "P\x{300}e\x{300}\x{301}\x{303}rl"; 104$sub = "pE"; 105$ret = undef; 106$match = undef; 107if (my($pos, $len) = $Collator->index($str, $sub)) { 108 $match = substr($str, $pos, $len); 109} 110ok($match, $ret); 111 112$str = "Pe\x{300}rl"; 113$sub = "pe\x{300}"; 114$ret = "Pe\x{300}"; 115$match = undef; 116if (my($pos, $len) = $Collator->index($str, $sub)) { 117 $match = substr($str, $pos, $len); 118} 119ok($match, $ret); 120 121$str = "P\x{300}e\x{300}\x{301}\x{303}rl"; 122$sub = "p\x{300}E\x{300}\x{301}\x{303}"; 123$ret = "P\x{300}e\x{300}\x{301}\x{303}"; 124$match = undef; 125if (my($pos, $len) = $Collator->index($str, $sub)) { 126 $match = substr($str, $pos, $len); 127} 128ok($match, $ret); 129 130##### 9 131 132$Collator->change(level => 1); 133 134$str = $IsEBCDIC 135 ? "Ich mu\x{0059} studieren Perl." 136 : "Ich mu\x{00DF} studieren Perl."; 137$sub = $IsEBCDIC 138 ? "m\x{00DC}ss" 139 : "m\x{00FC}ss"; 140$ret = $IsEBCDIC 141 ? "mu\x{0059}" 142 : "mu\x{00DF}"; 143$match = undef; 144if (my($pos, $len) = $Collator->index($str, $sub)) { 145 $match = substr($str, $pos, $len); 146} 147ok($match, $ret); 148 149$Collator->change(%old_level); 150 151$match = undef; 152if (my($pos, $len) = $Collator->index($str, $sub)) { 153 $match = substr($str, $pos, $len); 154} 155ok($match, undef); 156 157$match = undef; 158if (my($pos,$len) = $Collator->index("", "")) { 159 $match = substr("", $pos, $len); 160} 161ok($match, ""); 162 163$match = undef; 164if (my($pos,$len) = $Collator->index("", "abc")) { 165 $match = substr("", $pos, $len); 166} 167ok($match, undef); 168 169##### 13 170 171$Collator->change(level => 1); 172 173$str = "\0\cA\0\cAe\0\x{300}\cA\x{301}\cB\x{302}\0 \0\cA"; 174$sub = "e"; 175$ret = "e\0\x{300}\cA\x{301}\cB\x{302}\0"; 176$match = undef; 177if (my($pos, $len) = $Collator->index($str, $sub)) { 178 $match = substr($str, $pos, $len); 179} 180ok($match, $ret); 181 182$Collator->change(level => 1); 183 184$str = "\0\cA\0\cAe\0\cA\x{300}\0\cAe"; 185$sub = "e"; 186$ret = "e\0\cA\x{300}\0\cA"; 187$match = undef; 188if (my($pos, $len) = $Collator->index($str, $sub)) { 189 $match = substr($str, $pos, $len); 190} 191ok($match, $ret); 192 193 194$Collator->change(%old_level); 195 196$str = "e\x{300}"; 197$sub = "e"; 198$ret = undef; 199$match = undef; 200if (my($pos, $len) = $Collator->index($str, $sub)) { 201 $match = substr($str, $pos, $len); 202} 203ok($match, $ret); 204 205##### 16 206 207$Collator->change(level => 1); 208 209$str = "The Perl is a language, and the perl is an interpreter."; 210$sub = "PERL"; 211 212$match = undef; 213if (my($pos, $len) = $Collator->index($str, $sub, -40)) { 214 $match = substr($str, $pos, $len); 215} 216ok($match, "Perl"); 217 218$match = undef; 219if (my($pos, $len) = $Collator->index($str, $sub, 4)) { 220 $match = substr($str, $pos, $len); 221} 222ok($match, "Perl"); 223 224$match = undef; 225if (my($pos, $len) = $Collator->index($str, $sub, 5)) { 226 $match = substr($str, $pos, $len); 227} 228ok($match, "perl"); 229 230$match = undef; 231if (my($pos, $len) = $Collator->index($str, $sub, 32)) { 232 $match = substr($str, $pos, $len); 233} 234ok($match, "perl"); 235 236$match = undef; 237if (my($pos, $len) = $Collator->index($str, $sub, 33)) { 238 $match = substr($str, $pos, $len); 239} 240ok($match, undef); 241 242$match = undef; 243if (my($pos, $len) = $Collator->index($str, $sub, 100)) { 244 $match = substr($str, $pos, $len); 245} 246ok($match, undef); 247 248$Collator->change(%old_level); 249 250##### 22 251 252my @ret; 253 254$Collator->change(level => 1); 255 256$ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe"); 257ok($ret); 258ok($$ret eq "P\cBe\x{300}\cB"); 259 260@ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe"); 261ok($ret[0], "P\cBe\x{300}\cB"); 262 263$str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}"; 264$sub = $IsEBCDIC ? "m\x{00DC}ss" : "m\x{00FC}ss"; 265 266($ret) = $Collator->match($str, $sub); 267ok($ret, $str); 268 269$str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}"; 270$sub = $IsEBCDIC ? "m\x{00DC}s" : "m\x{00FC}s"; 271 272($ret) = $Collator->match($str, $sub); 273ok($ret, undef); 274 275$ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe"); 276ok($ret eq "P\cBe\x{300}\cB:pe:PE"); 277 278$ret = $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe"); 279ok($ret == 3); 280 281$str = "ABCDEF"; 282$sub = "cde"; 283$ret = $Collator->match($str, $sub); 284$str = "01234567"; 285ok($ret && $$ret, "CDE"); 286 287$str = "ABCDEF"; 288$sub = "cde"; 289($ret) = $Collator->match($str, $sub); 290$str = "01234567"; 291ok($ret, "CDE"); 292 293 294$Collator->change(level => 3); 295 296$ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe"); 297ok($ret, undef); 298 299@ret = $Collator->match("P\cBe\x{300}\cBrl and PERL", "pe"); 300ok(@ret == 0); 301 302$ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl and PERL", "pe"); 303ok($ret eq ""); 304 305$ret = $Collator->gmatch("P\cBe\x{300}\cBrl and PERL", "pe"); 306ok($ret == 0); 307 308$ret = join ':', $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe"); 309ok($ret eq "pe"); 310 311$ret = $Collator->gmatch("P\cBe\x{300}\cBrl, perl, and PERL", "pe"); 312ok($ret == 1); 313 314$str = $IsEBCDIC ? "mu\x{0059}" : "mu\x{00DF}"; 315$sub = $IsEBCDIC ? "m\x{00DC}ss" : "m\x{00FC}ss"; 316 317($ret) = $Collator->match($str, $sub); 318ok($ret, undef); 319 320$Collator->change(%old_level); 321 322##### 38 323 324$Collator->change(level => 1); 325 326sub strreverse { scalar reverse shift } 327 328$str = "P\cBe\x{300}\cBrl and PERL."; 329$ret = $Collator->subst($str, "perl", 'Camel'); 330ok($ret, 1); 331ok($str, "Camel and PERL."); 332 333$str = "P\cBe\x{300}\cBrl and PERL."; 334$ret = $Collator->subst($str, "perl", \&strreverse); 335ok($ret, 1); 336ok($str, "lr\cB\x{300}e\cBP and PERL."); 337 338$str = "P\cBe\x{300}\cBrl and PERL."; 339$ret = $Collator->gsubst($str, "perl", 'Camel'); 340ok($ret, 2); 341ok($str, "Camel and Camel."); 342 343$str = "P\cBe\x{300}\cBrl and PERL."; 344$ret = $Collator->gsubst($str, "perl", \&strreverse); 345ok($ret, 2); 346ok($str, "lr\cB\x{300}e\cBP and LREP."); 347 348$str = "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L..."; 349$Collator->gsubst($str, "camel", sub { "<b>$_[0]</b>" }); 350ok($str, "<b>Camel</b> donkey zebra <b>came\x{301}l</b> " 351 . "<b>CAMEL</b> horse <b>cAm\0E\0L</b>..."); 352 353##### 47 354 355# http://www.xray.mpe.mpg.de/mailing-lists/perl-unicode/2010-09/msg00014.html 356# when the substring includes an ignorable element like a space... 357 358$str = "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L..."; 359$Collator->gsubst($str, "camel horse", sub { "<b>$_[0]</b>" }); 360ok($str, "Camel donkey zebra came\x{301}l <b>CAMEL horse</b> cAm\0E\0L..."); 361 362$str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse..."; 363$Collator->gsubst($str, "camel horse", sub { "=$_[0]=" }); 364ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=..."); 365 366$str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse..."; 367$Collator->gsubst($str, "camel-horse", sub { "=$_[0]=" }); 368ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=..."); 369 370$str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse..."; 371$Collator->gsubst($str, "camelhorse", sub { "=$_[0]=" }); 372ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=..."); 373 374$str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse..."; 375$Collator->gsubst($str, " ca mel hor se ", sub { "=$_[0]=" }); 376ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=..."); 377 378$str = "Camel donkey zebra camex{301}l CAMEL horse cAmEL-horse..."; 379$Collator->gsubst($str, "ca\x{300}melho\x{302}rse", sub { "=$_[0]=" }); 380ok($str, "Camel donkey zebra camex{301}l =CAMEL horse= =cAmEL-horse=..."); 381 382##### 53 383 384$Collator->change(level => 3); 385 386$str = "P\cBe\x{300}\cBrl and PERL."; 387$ret = $Collator->subst($str, "perl", "Camel"); 388ok(! $ret); 389ok($str, "P\cBe\x{300}\cBrl and PERL."); 390 391$str = "P\cBe\x{300}\cBrl and PERL."; 392$ret = $Collator->subst($str, "perl", \&strreverse); 393ok(! $ret); 394ok($str, "P\cBe\x{300}\cBrl and PERL."); 395 396$str = "P\cBe\x{300}\cBrl and PERL."; 397$ret = $Collator->gsubst($str, "perl", "Camel"); 398ok($ret, 0); 399ok($str, "P\cBe\x{300}\cBrl and PERL."); 400 401$str = "P\cBe\x{300}\cBrl and PERL."; 402$ret = $Collator->gsubst($str, "perl", \&strreverse); 403ok($ret, 0); 404ok($str, "P\cBe\x{300}\cBrl and PERL."); 405 406$Collator->change(%old_level); 407 408##### 61 409 410$str = "Perl and Camel"; 411$ret = $Collator->gsubst($str, "\cA\cA\0", "AB"); 412ok($ret, 15); 413ok($str, "ABPABeABrABlAB ABaABnABdAB ABCABaABmABeABlAB"); 414 415$str = ''; 416$ret = $Collator->subst($str, "", "ABC"); 417ok($ret, 1); 418ok($str, "ABC"); 419 420$str = ''; 421$ret = $Collator->gsubst($str, "", "ABC"); 422ok($ret, 1); 423ok($str, "ABC"); 424 425$str = 'PPPPP'; 426$ret = $Collator->gsubst($str, 'PP', "ABC"); 427ok($ret, 2); 428ok($str, "ABCABCP"); 429 430##### 69 431 432# Shifted; ignorable after variable 433 434($ret) = $Collator->match("A?\x{300}!\x{301}\x{344}B\x{315}", "?!"); 435ok($ret, "?\x{300}!\x{301}\x{344}"); 436 437$Collator->change(alternate => 'Non-ignorable'); 438 439($ret) = $Collator->match("A?\x{300}!\x{301}B\x{315}", "?!"); 440ok($ret, undef); 441 442##### 71 443 444# Now preprocess is defined. 445 446$Collator->change(preprocess => sub {''}); 447 448eval { $Collator->index("", "") }; 449ok($@ && $@ =~ /Don't use Preprocess with index\(\)/); 450 451eval { $Collator->index("a", "a") }; 452ok($@ && $@ =~ /Don't use Preprocess with index\(\)/); 453 454eval { $Collator->match("", "") }; 455ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/); 456 457eval { $Collator->match("a", "a") }; 458ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/); 459 460$Collator->change(preprocess => sub { uc shift }); 461 462eval { $Collator->index("", "") }; 463ok($@ && $@ =~ /Don't use Preprocess with index\(\)/); 464 465eval { $Collator->index("a", "a") }; 466ok($@ && $@ =~ /Don't use Preprocess with index\(\)/); 467 468eval { $Collator->match("", "") }; 469ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/); 470 471eval { $Collator->match("a", "a") }; 472ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/); 473 474##### 79 475 476eval { require Unicode::Normalize }; 477my $has_norm = !$@; 478 479if ($has_norm) { 480 # Now preprocess and normalization are defined. 481 482 $Collator->change(normalization => 'NFD'); 483 484 eval { $Collator->index("", "") }; 485 ok($@ && $@ =~ /Don't use Preprocess with index\(\)/); 486 487 eval { $Collator->index("a", "a") }; 488 ok($@ && $@ =~ /Don't use Preprocess with index\(\)/); 489 490 eval { $Collator->match("", "") }; 491 ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/); 492 493 eval { $Collator->match("a", "a") }; 494 ok($@ && $@ =~ /Don't use Preprocess with.*match\(\)/); 495} else { 496 ok(1) for 1..4; 497} 498 499$Collator->change(preprocess => undef); 500 501if ($has_norm) { 502 # Now only normalization is defined. 503 504 eval { $Collator->index("", "") }; 505 ok($@ && $@ =~ /Don't use Normalization with index\(\)/); 506 507 eval { $Collator->index("a", "a") }; 508 ok($@ && $@ =~ /Don't use Normalization with index\(\)/); 509 510 eval { $Collator->match("", "") }; 511 ok($@ && $@ =~ /Don't use Normalization with.*match\(\)/); 512 513 eval { $Collator->match("a", "a") }; 514 ok($@ && $@ =~ /Don't use Normalization with.*match\(\)/); 515 516 $Collator->change(normalization => undef); 517} else { 518 ok(1) for 1..4; 519} 520 521##### 87 522 523# Now preprocess and normalization are undef. 524 525eval { $Collator->index("", "") }; 526ok(!$@); 527 528eval { $Collator->index("a", "a") }; 529ok(!$@); 530 531eval { $Collator->match("", "") }; 532ok(!$@); 533 534eval { $Collator->match("a", "a") }; 535ok(!$@); 536 537##### 91 538