1# tr.t 2 3use utf8; 4 5BEGIN { 6 chdir 't' if -d 't'; 7 @INC = '../lib'; 8 require './test.pl'; 9} 10 11plan tests => 134; 12 13my $Is_EBCDIC = (ord('i') == 0x89 & ord('J') == 0xd1); 14 15$_ = "abcdefghijklmnopqrstuvwxyz"; 16 17tr/a-z/A-Z/; 18 19is($_, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 'uc'); 20 21tr/A-Z/a-z/; 22 23is($_, "abcdefghijklmnopqrstuvwxyz", 'lc'); 24 25tr/b-y/B-Y/; 26is($_, "aBCDEFGHIJKLMNOPQRSTUVWXYz", 'partial uc'); 27 28 29# In EBCDIC 'I' is \xc9 and 'J' is \0xd1, 'i' is \x89 and 'j' is \x91. 30# Yes, discontinuities. Regardless, the \xca in the below should stay 31# untouched (and not became \x8a). 32{ 33 no utf8; 34 $_ = "I\xcaJ"; 35 36 tr/I-J/i-j/; 37 38 is($_, "i\xcaj", 'EBCDIC discontinuity'); 39} 40# 41 42 43($x = 12) =~ tr/1/3/; 44(my $y = 12) =~ tr/1/3/; 45($f = 1.5) =~ tr/1/3/; 46(my $g = 1.5) =~ tr/1/3/; 47is($x + $y + $f + $g, 71, 'tr cancels IOK and NOK'); 48 49# /r 50$_ = 'adam'; 51is y/dam/ve/rd, 'eve', '/r'; 52is $_, 'adam', '/r leaves param alone'; 53$g = 'ruby'; 54is $g =~ y/bury/repl/r, 'perl', '/r with explicit param'; 55is $g, 'ruby', '/r leaves explicit param alone'; 56is "aaa" =~ y\a\b\r, 'bbb', '/r with constant param'; 57ok !eval '$_ !~ y///r', "!~ y///r is forbidden"; 58like $@, qr\^Using !~ with tr///r doesn't make sense\, 59 "!~ y///r error message"; 60{ 61 my $w; 62 my $wc; 63 local $SIG{__WARN__} = sub { $w = shift; ++$wc }; 64 local $^W = 1; 65 eval 'y///r; 1'; 66 like $w, qr '^Useless use of non-destructive transliteration \(tr///r\)', 67 '/r warns in void context'; 68 is $wc, 1, '/r warns just once'; 69} 70 71# perlbug [ID 20000511.005] 72$_ = 'fred'; 73/([a-z]{2})/; 74$1 =~ tr/A-Z//; 75s/^(\s*)f/$1F/; 76is($_, 'Fred', 'harmless if explicitly not updating'); 77 78 79# A variant of the above, added in 5.7.2 80$_ = 'fred'; 81/([a-z]{2})/; 82eval '$1 =~ tr/A-Z/A-Z/;'; 83s/^(\s*)f/$1F/; 84is($_, 'Fred', 'harmless if implicitly not updating'); 85is($@, '', ' no error'); 86 87 88# check tr handles UTF8 correctly 89($x = 256.65.258) =~ tr/a/b/; 90is($x, 256.65.258, 'handles UTF8'); 91is(length $x, 3); 92 93$x =~ tr/A/B/; 94is(length $x, 3); 95if (ord("\t") == 9) { # ASCII 96 is($x, 256.66.258); 97} 98else { 99 is($x, 256.65.258); 100} 101 102# EBCDIC variants of the above tests 103($x = 256.193.258) =~ tr/a/b/; 104is(length $x, 3); 105is($x, 256.193.258); 106 107$x =~ tr/A/B/; 108is(length $x, 3); 109if (ord("\t") == 9) { # ASCII 110 is($x, 256.193.258); 111} 112else { 113 is($x, 256.194.258); 114} 115 116 117{ 118 my $l = chr(300); my $r = chr(400); 119 $x = 200.300.400; 120 $x =~ tr/\x{12c}/\x{190}/; 121 is($x, 200.400.400, 122 'changing UTF8 chars in a UTF8 string, same length'); 123 is(length $x, 3); 124 125 $x = 200.300.400; 126 $x =~ tr/\x{12c}/\x{be8}/; 127 is($x, 200.3048.400, ' more bytes'); 128 is(length $x, 3); 129 130 $x = 100.125.60; 131 $x =~ tr/\x{64}/\x{190}/; 132 is($x, 400.125.60, 'Putting UT8 chars into a non-UTF8 string'); 133 is(length $x, 3); 134 135 $x = 400.125.60; 136 $x =~ tr/\x{190}/\x{64}/; 137 is($x, 100.125.60, 'Removing UTF8 chars from UTF8 string'); 138 is(length $x, 3); 139 140 $x = 400.125.60.400; 141 $y = $x =~ tr/\x{190}/\x{190}/; 142 is($y, 2, 'Counting UTF8 chars in UTF8 string'); 143 144 $x = 60.400.125.60.400; 145 $y = $x =~ tr/\x{3c}/\x{3c}/; 146 is($y, 2, ' non-UTF8 chars in UTF8 string'); 147 148 # 17 - counting UTF8 chars in non-UTF8 string 149 $x = 200.125.60; 150 $y = $x =~ tr/\x{190}/\x{190}/; 151 is($y, 0, ' UTF8 chars in non-UTFs string'); 152} 153 154$_ = "abcdefghijklmnopqrstuvwxyz"; 155eval 'tr/a-z-9/ /'; 156like($@, qr/^Ambiguous range in transliteration operator/, 'tr/a-z-9//'); 157 158# 19-21: Make sure leading and trailing hyphens still work 159$_ = "car-rot9"; 160tr/-a-m/./; 161is($_, '..r.rot9', 'hyphens, leading'); 162 163$_ = "car-rot9"; 164tr/a-m-/./; 165is($_, '..r.rot9', ' trailing'); 166 167$_ = "car-rot9"; 168tr/-a-m-/./; 169is($_, '..r.rot9', ' both'); 170 171$_ = "abcdefghijklmnop"; 172tr/ae-hn/./; 173is($_, '.bcd....ijklm.op'); 174 175$_ = "abcdefghijklmnop"; 176tr/a-cf-kn-p/./; 177is($_, '...de......lm...'); 178 179$_ = "abcdefghijklmnop"; 180tr/a-ceg-ikm-o/./; 181is($_, '...d.f...j.l...p'); 182 183 184# 20000705 MJD 185eval "tr/m-d/ /"; 186like($@, qr/^Invalid range "m-d" in transliteration operator/, 187 'reversed range check'); 188 189'abcdef' =~ /(bcd)/; 190is(eval '$1 =~ tr/abcd//', 3, 'explicit read-only count'); 191is($@, '', ' no error'); 192 193'abcdef' =~ /(bcd)/; 194is(eval '$1 =~ tr/abcd/abcd/', 3, 'implicit read-only count'); 195is($@, '', ' no error'); 196 197is(eval '"123" =~ tr/12//', 2, 'LHS of non-updating tr'); 198 199eval '"123" =~ tr/1/2/'; 200like($@, qr|^Can't modify constant item in transliteration \(tr///\)|, 201 'LHS bad on updating tr'); 202 203 204# v300 (0x12c) is UTF-8-encoded as 196 172 (0xc4 0xac) 205# v400 (0x190) is UTF-8-encoded as 198 144 (0xc6 0x90) 206 207# Transliterate a byte to a byte, all four ways. 208 209($a = v300.196.172.300.196.172) =~ tr/\xc4/\xc5/; 210is($a, v300.197.172.300.197.172, 'byte2byte transliteration'); 211 212($a = v300.196.172.300.196.172) =~ tr/\xc4/\x{c5}/; 213is($a, v300.197.172.300.197.172); 214 215($a = v300.196.172.300.196.172) =~ tr/\x{c4}/\xc5/; 216is($a, v300.197.172.300.197.172); 217 218($a = v300.196.172.300.196.172) =~ tr/\x{c4}/\x{c5}/; 219is($a, v300.197.172.300.197.172); 220 221 222($a = v300.196.172.300.196.172) =~ tr/\xc4/\x{12d}/; 223is($a, v300.301.172.300.301.172, 'byte2wide transliteration'); 224 225($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\xc3/; 226is($a, v195.196.172.195.196.172, ' wide2byte'); 227 228($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\x{12d}/; 229is($a, v301.196.172.301.196.172, ' wide2wide'); 230 231 232($a = v300.196.172.300.196.172) =~ tr/\xc4\x{12c}/\x{12d}\xc3/; 233is($a, v195.301.172.195.301.172, 'byte2wide & wide2byte'); 234 235 236($a = v300.196.172.300.196.172.400.198.144) =~ 237 tr/\xac\xc4\x{12c}\x{190}/\xad\x{12d}\xc5\x{191}/; 238is($a, v197.301.173.197.301.173.401.198.144, 'all together now!'); 239 240 241is((($a = v300.196.172.300.196.172) =~ tr/\xc4/\xc5/), 2, 242 'transliterate and count'); 243 244is((($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\x{12d}/), 2); 245 246 247($a = v300.196.172.300.196.172) =~ tr/\xc4/\x{12d}/c; 248is($a, v301.196.301.301.196.301, 'translit w/complement'); 249 250($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\xc5/c; 251is($a, v300.197.197.300.197.197); 252 253 254($a = v300.196.172.300.196.172) =~ tr/\xc4//d; 255is($a, v300.172.300.172, 'translit w/deletion'); 256 257($a = v300.196.172.300.196.172) =~ tr/\x{12c}//d; 258is($a, v196.172.196.172); 259 260 261($a = v196.196.172.300.300.196.172) =~ tr/\xc4/\xc5/s; 262is($a, v197.172.300.300.197.172, 'translit w/squeeze'); 263 264($a = v196.172.300.300.196.172.172) =~ tr/\x{12c}/\x{12d}/s; 265is($a, v196.172.301.196.172.172); 266 267 268# Tricky cases (When Simon Cozens Attacks) 269($a = v196.172.200) =~ tr/\x{12c}/a/; 270is(sprintf("%vd", $a), '196.172.200'); 271 272($a = v196.172.200) =~ tr/\x{12c}/\x{12c}/; 273is(sprintf("%vd", $a), '196.172.200'); 274 275($a = v196.172.200) =~ tr/\x{12c}//d; 276is(sprintf("%vd", $a), '196.172.200'); 277 278 279# UTF8 range tests from Inaba Hiroto 280 281# Not working in EBCDIC as of 12674. 282($a = v300.196.172.302.197.172) =~ tr/\x{12c}-\x{130}/\xc0-\xc4/; 283is($a, v192.196.172.194.197.172, 'UTF range'); 284 285($a = v300.196.172.302.197.172) =~ tr/\xc4-\xc8/\x{12c}-\x{130}/; 286is($a, v300.300.172.302.301.172); 287 288 289# UTF8 range tests from Karsten Sperling (patch #9008 required) 290 291($a = "\x{0100}") =~ tr/\x00-\x{100}/X/; 292is($a, "X"); 293 294($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}/X/c; 295is($a, "X"); 296 297($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c; 298is($a, "X"); 299 300($a = v256) =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c; 301is($a, "X"); 302 303 304# UTF8 range tests from Inaba Hiroto 305 306($a = "\x{200}") =~ tr/\x00-\x{100}/X/c; 307is($a, "X"); 308 309($a = "\x{200}") =~ tr/\x00-\x{100}/X/cs; 310is($a, "X"); 311 312 313# Tricky on EBCDIC: while [a-z] [A-Z] must not match the gap characters, 314# (i-j, r-s, I-J, R-S), [\x89-\x91] [\xc9-\xd1] has to match them, 315# from Karsten Sperling. 316 317$c = ($a = "\x89\x8a\x8b\x8c\x8d\x8f\x90\x91") =~ tr/\x89-\x91/X/; 318is($c, 8); 319is($a, "XXXXXXXX"); 320 321$c = ($a = "\xc9\xca\xcb\xcc\xcd\xcf\xd0\xd1") =~ tr/\xc9-\xd1/X/; 322is($c, 8); 323is($a, "XXXXXXXX"); 324 325SKIP: { 326 skip "not EBCDIC", 4 unless $Is_EBCDIC; 327 328 $c = ($a = "\x89\x8a\x8b\x8c\x8d\x8f\x90\x91") =~ tr/i-j/X/; 329 is($c, 2); 330 is($a, "X\x8a\x8b\x8c\x8d\x8f\x90X"); 331 332 $c = ($a = "\xc9\xca\xcb\xcc\xcd\xcf\xd0\xd1") =~ tr/I-J/X/; 333 is($c, 2); 334 is($a, "X\xca\xcb\xcc\xcd\xcf\xd0X"); 335} 336 337($a = "\x{100}") =~ tr/\x00-\xff/X/c; 338is(ord($a), ord("X")); 339 340($a = "\x{100}") =~ tr/\x00-\xff/X/cs; 341is(ord($a), ord("X")); 342 343($a = "\x{100}\x{100}") =~ tr/\x{101}-\x{200}//c; 344is($a, "\x{100}\x{100}"); 345 346($a = "\x{100}\x{100}") =~ tr/\x{101}-\x{200}//cs; 347is($a, "\x{100}"); 348 349$a = "\xfe\xff"; $a =~ tr/\xfe\xff/\x{1ff}\x{1fe}/; 350is($a, "\x{1ff}\x{1fe}"); 351 352 353# From David Dyck 354($a = "R0_001") =~ tr/R_//d; 355is(hex($a), 1); 356 357# From Inaba Hiroto 358@a = (1,2); map { y/1/./ for $_ } @a; 359is("@a", ". 2"); 360 361@a = (1,2); map { y/1/./ for $_.'' } @a; 362is("@a", "1 2"); 363 364 365# Additional test for Inaba Hiroto patch (robin@kitsite.com) 366($a = "\x{100}\x{102}\x{101}") =~ tr/\x00-\377/XYZ/c; 367is($a, "XZY"); 368 369 370# Used to fail with "Modification of a read-only value attempted" 371%a = (N=>1); 372foreach (keys %a) { 373 eval 'tr/N/n/'; 374 is($_, 'n', 'pp_trans needs to unshare shared hash keys'); 375 is($@, '', ' no error'); 376} 377 378 379$x = eval '"1213" =~ tr/1/1/'; 380is($x, 2, 'implicit count on constant'); 381is($@, '', ' no error'); 382 383 384my @foo = (); 385eval '$foo[-1] =~ tr/N/N/'; 386is( $@, '', 'implicit count outside array bounds, index negative' ); 387is( scalar @foo, 0, " doesn't extend the array"); 388 389eval '$foo[1] =~ tr/N/N/'; 390is( $@, '', 'implicit count outside array bounds, index positive' ); 391is( scalar @foo, 0, " doesn't extend the array"); 392 393 394my %foo = (); 395eval '$foo{bar} =~ tr/N/N/'; 396is( $@, '', 'implicit count outside hash bounds' ); 397is( scalar keys %foo, 0, " doesn't extend the hash"); 398 399$x = \"foo"; 400is( $x =~ tr/A/A/, 2, 'non-modifying tr/// on a scalar ref' ); 401is( ref $x, 'SCALAR', " doesn't stringify its argument" ); 402 403# rt.perl.org 36622. Perl didn't like a y/// at end of file. No trailing 404# newline allowed. 405fresh_perl_is(q[$_ = "foo"; y/A-Z/a-z/], ''); 406 407 408{ # [perl #38293] chr(65535) should be allowed in regexes 409no warnings 'utf8'; # to allow non-characters 410 411$s = "\x{d800}\x{ffff}"; 412$s =~ tr/\0/A/; 413is($s, "\x{d800}\x{ffff}", "do_trans_simple"); 414 415$s = "\x{d800}\x{ffff}"; 416$i = $s =~ tr/\0//; 417is($i, 0, "do_trans_count"); 418 419$s = "\x{d800}\x{ffff}"; 420$s =~ tr/\0/A/s; 421is($s, "\x{d800}\x{ffff}", "do_trans_complex, SQUASH"); 422 423$s = "\x{d800}\x{ffff}"; 424$s =~ tr/\0/A/c; 425is($s, "AA", "do_trans_complex, COMPLEMENT"); 426 427$s = "A\x{ffff}B"; 428$s =~ tr/\x{ffff}/\x{1ffff}/; 429is($s, "A\x{1ffff}B", "utf8, SEARCHLIST"); 430 431$s = "\x{fffd}\x{fffe}\x{ffff}"; 432$s =~ tr/\x{fffd}-\x{ffff}/ABC/; 433is($s, "ABC", "utf8, SEARCHLIST range"); 434 435$s = "ABC"; 436$s =~ tr/ABC/\x{ffff}/; 437is($s, "\x{ffff}"x3, "utf8, REPLACEMENTLIST"); 438 439$s = "ABC"; 440$s =~ tr/ABC/\x{fffd}-\x{ffff}/; 441is($s, "\x{fffd}\x{fffe}\x{ffff}", "utf8, REPLACEMENTLIST range"); 442 443$s = "A\x{ffff}B\x{100}\0\x{fffe}\x{ffff}"; 444$i = $s =~ tr/\x{ffff}//; 445is($i, 2, "utf8, count"); 446 447$s = "A\x{ffff}\x{ffff}C"; 448$s =~ tr/\x{ffff}/\x{100}/s; 449is($s, "A\x{100}C", "utf8, SQUASH"); 450 451$s = "A\x{ffff}\x{ffff}\x{fffe}\x{fffe}\x{fffe}C"; 452$s =~ tr/\x{fffe}\x{ffff}//s; 453is($s, "A\x{ffff}\x{fffe}C", "utf8, SQUASH"); 454 455$s = "xAABBBy"; 456$s =~ tr/AB/\x{ffff}/s; 457is($s, "x\x{ffff}y", "utf8, SQUASH"); 458 459$s = "xAABBBy"; 460$s =~ tr/AB/\x{fffe}\x{ffff}/s; 461is($s, "x\x{fffe}\x{ffff}y", "utf8, SQUASH"); 462 463$s = "A\x{ffff}B\x{fffe}C"; 464$s =~ tr/\x{fffe}\x{ffff}/x/c; 465is($s, "x\x{ffff}x\x{fffe}x", "utf8, COMPLEMENT"); 466 467$s = "A\x{10000}B\x{2abcd}C"; 468$s =~ tr/\0-\x{ffff}/x/c; 469is($s, "AxBxC", "utf8, COMPLEMENT range"); 470 471$s = "A\x{fffe}B\x{ffff}C"; 472$s =~ tr/\x{fffe}\x{ffff}/x/d; 473is($s, "AxBC", "utf8, DELETE"); 474 475} # non-characters end 476 477{ # related to [perl #27940] 478 my $c; 479 480 ($c = "\x20\c@\x30\cA\x40\cZ\x50\c_\x60") =~ tr/\c@-\c_//d; 481 is($c, "\x20\x30\x40\x50\x60", "tr/\\c\@-\\c_//d"); 482 483 ($c = "\x20\x00\x30\x01\x40\x1A\x50\x1F\x60") =~ tr/\x00-\x1f//d; 484 is($c, "\x20\x30\x40\x50\x60", "tr/\\x00-\\x1f//d"); 485} 486 487($s) = keys %{{pie => 3}}; 488SKIP: { 489 if (!eval { require XS::APItest }) { skip "no XS::APItest", 2 } 490 my $wasro = XS::APItest::SvIsCOW($s); 491 ok $wasro, "have a COW"; 492 $s =~ tr/i//; 493 ok( XS::APItest::SvIsCOW($s), 494 "count-only tr doesn't deCOW COWs" ); 495} 496 497# [ RT #61520 ] 498# 499# under threads, unicode tr within a cloned closure would SEGV or assert 500# fail, since the pointer in the pad to the swash was getting zeroed out 501# in the proto-CV 502 503{ 504 my $x = "\x{142}"; 505 sub { 506 $x =~ tr[\x{142}][\x{143}]; 507 }->(); 508 is($x,"\x{143}", "utf8 + closure"); 509} 510 511# Freeing of trans ops prior to pmtrans() [perl #102858]. 512eval q{ $a ~= tr/a/b/; }; 513ok 1; 514SKIP: { 515 no warnings "deprecated"; 516 skip "no encoding", 1 unless eval { require encoding; 1 }; 517 eval q{ use encoding "utf8"; $a ~= tr/a/b/; }; 518 ok 1; 519} 520 521{ # [perl #113584] 522 523 my $x = "Perlα"; 524 $x =~ tr/αα/βγ/; 525 note $x; 526 is($x, "Perlβ", "Only first of multiple transliterations is used"); 527} 528 529# tr/a/b/ should fail even on zero-length read-only strings 530use constant nullrocow => (keys%{{""=>undef}})[0]; 531for ("", nullrocow) { 532 eval { $_ =~ y/a/b/ }; 533 like $@, qr/^Modification of a read-only value attempted at /, 534 'tr/a/b/ fails on zero-length ro string'; 535} 536 5371; 538