1#!./perl 2 3BEGIN { 4 chdir 't' if -d 't'; 5 @INC = '../lib'; 6 require './test.pl'; 7} 8 9plan tests => 251; 10 11$FS = ':'; 12 13$_ = 'a:b:c'; 14 15($a,$b,$c) = split($FS,$_); 16 17is(join(';',$a,$b,$c), 'a;b;c'); 18 19@ary = split(/:b:/); 20$cnt = split(/:b:/); 21is(join("$_",@ary), 'aa:b:cc'); 22is($cnt, scalar(@ary)); 23 24$_ = "abc\n"; 25my @xyz = (@ary = split(//)); 26$cnt = split(//); 27is(join(".",@ary), "a.b.c.\n"); 28is($cnt, scalar(@ary)); 29 30$_ = "a:b:c::::"; 31@ary = split(/:/); 32$cnt = split(/:/); 33is(join(".",@ary), "a.b.c"); 34is($cnt, scalar(@ary)); 35 36$_ = join(':',split(' '," a b\tc \t d ")); 37is($_, 'a:b:c:d'); 38@ary = split(' '," a b\tc \t d "); 39$cnt = split(' '," a b\tc \t d "); 40is($cnt, scalar(@ary)); 41 42$_ = join(':',split(/ */,"foo bar bie\tdoll")); 43is($_ , "f:o:o:b:a:r:b:i:e:\t:d:o:l:l"); 44@ary = split(/ */,"foo bar bie\tdoll"); 45$cnt = split(/ */,"foo bar bie\tdoll"); 46is($cnt, scalar(@ary)); 47 48$_ = join(':', 'foo', split(/ /,'a b c'), 'bar'); 49is($_, "foo:a:b::c:bar"); 50@ary = split(/ /,'a b c'); 51$cnt = split(/ /,'a b c'); 52is($cnt, scalar(@ary)); 53 54# Can we say how many fields to split to? 55$_ = join(':', split(' ','1 2 3 4 5 6', 3)); 56is($_, '1:2:3 4 5 6'); 57@ary = split(' ','1 2 3 4 5 6', 3); 58$cnt = split(' ','1 2 3 4 5 6', 3); 59is($cnt, scalar(@ary)); 60 61# Can we do it as a variable? 62$x = 4; 63$_ = join(':', split(' ','1 2 3 4 5 6', $x)); 64is($_, '1:2:3:4 5 6'); 65@ary = split(' ','1 2 3 4 5 6', $x); 66$cnt = split(' ','1 2 3 4 5 6', $x); 67is($cnt, scalar(@ary)); 68 69# Does the 999 suppress null field chopping? 70$_ = join(':', split(/:/,'1:2:3:4:5:6:::', 999)); 71is($_ , '1:2:3:4:5:6:::'); 72@ary = split(/:/,'1:2:3:4:5:6:::', 999); 73$cnt = split(/:/,'1:2:3:4:5:6:::', 999); 74is($cnt, scalar(@ary)); 75 76# Does assignment to a list imply split to one more field than that? 77$foo = runperl( switches => ['-Dt'], stderr => 1, prog => '($a,$b)=split;' ); 78ok($foo =~ /DEBUGGING/ || $foo =~ /const\n?\Q(IV(3))\E/); 79 80# Can we say how many fields to split to when assigning to a list? 81($a,$b) = split(' ','1 2 3 4 5 6', 2); 82$_ = join(':',$a,$b); 83is($_, '1:2 3 4 5 6'); 84 85# do subpatterns generate additional fields (without trailing nulls)? 86$_ = join '|', split(/,|(-)/, "1-10,20,,,"); 87is($_, "1|-|10||20"); 88@ary = split(/,|(-)/, "1-10,20,,,"); 89$cnt = split(/,|(-)/, "1-10,20,,,"); 90is($cnt, scalar(@ary)); 91 92# do subpatterns generate additional fields (with a limit)? 93$_ = join '|', split(/,|(-)/, "1-10,20,,,", 10); 94is($_, "1|-|10||20||||||"); 95@ary = split(/,|(-)/, "1-10,20,,,", 10); 96$cnt = split(/,|(-)/, "1-10,20,,,", 10); 97is($cnt, scalar(@ary)); 98 99# is the 'two undefs' bug fixed? 100(undef, $a, undef, $b) = qw(1 2 3 4); 101is("$a|$b", "2|4"); 102 103# .. even for locals? 104{ 105 local(undef, $a, undef, $b) = qw(1 2 3 4); 106 is("$a|$b", "2|4"); 107} 108 109# check splitting of null string 110$_ = join('|', split(/x/, '',-1), 'Z'); 111is($_, "Z"); 112@ary = split(/x/, '',-1); 113$cnt = split(/x/, '',-1); 114is($cnt, scalar(@ary)); 115 116$_ = join('|', split(/x/, '', 1), 'Z'); 117is($_, "Z"); 118@ary = split(/x/, '', 1); 119$cnt = split(/x/, '', 1); 120is($cnt, scalar(@ary)); 121 122$_ = join('|', split(/(p+)/,'',-1), 'Z'); 123is($_, "Z"); 124@ary = split(/(p+)/,'',-1); 125$cnt = split(/(p+)/,'',-1); 126is($cnt, scalar(@ary)); 127 128$_ = join('|', split(/.?/, '',-1), 'Z'); 129is($_, "Z"); 130@ary = split(/.?/, '',-1); 131$cnt = split(/.?/, '',-1); 132is($cnt, scalar(@ary)); 133 134 135# Are /^/m patterns scanned? 136$_ = join '|', split(/^a/m, "a b a\na d a", 20); 137is($_, "| b a\n| d a"); 138@ary = split(/^a/m, "a b a\na d a", 20); 139$cnt = split(/^a/m, "a b a\na d a", 20); 140is($cnt, scalar(@ary)); 141 142# Are /$/m patterns scanned? 143$_ = join '|', split(/a$/m, "a b a\na d a", 20); 144is($_, "a b |\na d |"); 145@ary = split(/a$/m, "a b a\na d a", 20); 146$cnt = split(/a$/m, "a b a\na d a", 20); 147is($cnt, scalar(@ary)); 148 149# Are /^/m patterns scanned? 150$_ = join '|', split(/^aa/m, "aa b aa\naa d aa", 20); 151is($_, "| b aa\n| d aa"); 152@ary = split(/^aa/m, "aa b aa\naa d aa", 20); 153$cnt = split(/^aa/m, "aa b aa\naa d aa", 20); 154is($cnt, scalar(@ary)); 155 156# Are /$/m patterns scanned? 157$_ = join '|', split(/aa$/m, "aa b aa\naa d aa", 20); 158is($_, "aa b |\naa d |"); 159@ary = split(/aa$/m, "aa b aa\naa d aa", 20); 160$cnt = split(/aa$/m, "aa b aa\naa d aa", 20); 161is($cnt, scalar(@ary)); 162 163# Greedyness: 164$_ = "a : b :c: d"; 165@ary = split(/\s*:\s*/); 166$cnt = split(/\s*:\s*/); 167is(($res = join(".",@ary)), "a.b.c.d", $res); 168is($cnt, scalar(@ary)); 169 170# use of match result as pattern (!) 171is('p:q:r:s', join ':', split('abc' =~ /b/, 'p1q1r1s')); 172@ary = split('abc' =~ /b/, 'p1q1r1s'); 173$cnt = split('abc' =~ /b/, 'p1q1r1s'); 174is($cnt, scalar(@ary)); 175 176# /^/ treated as /^/m 177$_ = join ':', split /^/, "ab\ncd\nef\n"; 178is($_, "ab\n:cd\n:ef\n"); 179 180# see if @a = @b = split(...) optimization works 181@list1 = @list2 = split ('p',"a p b c p"); 182ok(@list1 == @list2 && 183 "@list1" eq "@list2" && 184 @list1 == 2 && 185 "@list1" eq "a b c "); 186 187# zero-width assertion 188$_ = join ':', split /(?=\w)/, "rm b"; 189is($_, "r:m :b"); 190@ary = split /(?=\w)/, "rm b"; 191$cnt = split /(?=\w)/, "rm b"; 192is($cnt, scalar(@ary)); 193 194# unicode splittage 195 196@ary = map {ord} split //, v1.20.300.4000.50000.4000.300.20.1; 197$cnt = split //, v1.20.300.4000.50000.4000.300.20.1; 198is("@ary", "1 20 300 4000 50000 4000 300 20 1"); 199is($cnt, scalar(@ary)); 200 201@ary = split(/\x{FE}/, "\x{FF}\x{FE}\x{FD}"); # bug id 20010105.016 202$cnt = split(/\x{FE}/, "\x{FF}\x{FE}\x{FD}"); # bug id 20010105.016 203ok(@ary == 2 && 204 $ary[0] eq "\xFF" && $ary[1] eq "\xFD" && 205 $ary[0] eq "\x{FF}" && $ary[1] eq "\x{FD}"); 206is($cnt, scalar(@ary)); 207 208@ary = split(/(\x{FE}\xFE)/, "\xFF\x{FF}\xFE\x{FE}\xFD\x{FD}"); # variant of 31 209$cnt = split(/(\x{FE}\xFE)/, "\xFF\x{FF}\xFE\x{FE}\xFD\x{FD}"); # variant of 31 210ok(@ary == 3 && 211 $ary[0] eq "\xFF\xFF" && 212 $ary[0] eq "\x{FF}\xFF" && 213 $ary[0] eq "\x{FF}\x{FF}" && 214 $ary[1] eq "\xFE\xFE" && 215 $ary[1] eq "\x{FE}\xFE" && 216 $ary[1] eq "\x{FE}\x{FE}" && 217 $ary[2] eq "\xFD\xFD" && 218 $ary[2] eq "\x{FD}\xFD" && 219 $ary[2] eq "\x{FD}\x{FD}"); 220is($cnt, scalar(@ary)); 221 222{ 223 my @a = map ord, split(//, join("", map chr, (1234, 123, 2345))); 224 my $c = split(//, join("", map chr, (1234, 123, 2345))); 225 is("@a", "1234 123 2345"); 226 is($c, scalar(@a)); 227} 228 229{ 230 my $x = 'A'; 231 my @a = map ord, split(/$x/, join("", map chr, (1234, ord($x), 2345))); 232 my $c = split(/$x/, join("", map chr, (1234, ord($x), 2345))); 233 is("@a", "1234 2345"); 234 is($c, scalar(@a)); 235} 236 237{ 238 # bug id 20000427.003 239 240 use warnings; 241 use strict; 242 243 my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}"; 244 245 my @charlist = split //, $sushi; 246 my $charnum = split //, $sushi; 247 is($charnum, scalar(@charlist)); 248 my $r = ''; 249 foreach my $ch (@charlist) { 250 $r = $r . " " . sprintf "U+%04X", ord($ch); 251 } 252 253 is($r, " U+B36C U+5A8C U+FF5B U+5079 U+505B"); 254} 255 256{ 257 my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20"; 258 259 SKIP: { 260 if (ord('A') == 193) { 261 skip("EBCDIC", 1); 262 } else { 263 # bug id 20000426.003 264 265 my ($a, $b, $c) = split(/\x40/, $s); 266 ok($a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a); 267 } 268 } 269 270 my ($a, $b) = split(/\x{100}/, $s); 271 ok($a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20"); 272 273 my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s); 274 ok($a eq "\x20\x40" && $b eq "\x40\x20"); 275 276 SKIP: { 277 if (ord('A') == 193) { 278 skip("EBCDIC", 1); 279 } else { 280 my ($a, $b) = split(/\x40\x{80}/, $s); 281 ok($a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20"); 282 } 283 } 284 285 my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s); 286 ok($a eq "\x20" && $b eq "\x{100}" && $c eq "\x20"); 287} 288 289{ 290 # 20001205.014 291 292 my $a = "ABC\x{263A}"; 293 294 my @b = split( //, $a ); 295 my $c = split( //, $a ); 296 is($c, scalar(@b)); 297 298 is(scalar @b, 4); 299 300 ok(length($b[3]) == 1 && $b[3] eq "\x{263A}"); 301 302 $a =~ s/^A/Z/; 303 ok(length($a) == 4 && $a eq "ZBC\x{263A}"); 304} 305 306{ 307 my @a = split(/\xFE/, "\xFF\xFE\xFD"); 308 my $b = split(/\xFE/, "\xFF\xFE\xFD"); 309 310 ok(@a == 2 && $a[0] eq "\xFF" && $a[1] eq "\xFD"); 311 is($b, scalar(@a)); 312} 313 314{ 315 # check that PMf_WHITE is cleared after \s+ is used 316 # reported in <20010627113312.RWGY6087.viemta06@localhost> 317 my $r; 318 foreach my $pat ( qr/\s+/, qr/ll/ ) { 319 $r = join ':' => split($pat, "hello cruel world"); 320 } 321 is($r, "he:o cruel world"); 322} 323 324 325{ 326 # split /(A)|B/, "1B2" should return (1, undef, 2) 327 my @x = split /(A)|B/, "1B2"; 328 my $y = split /(A)|B/, "1B2"; 329 is($y, scalar(@x)); 330 ok($x[0] eq '1' and (not defined $x[1]) and $x[2] eq '2'); 331} 332 333{ 334 # [perl #17064] 335 my $warn; 336 local $SIG{__WARN__} = sub { $warn = join '', @_; chomp $warn }; 337 my $char = "\x{10f1ff}"; 338 my @a = split /\r?\n/, "$char\n"; 339 my $b = split /\r?\n/, "$char\n"; 340 is($b, scalar(@a)); 341 ok(@a == 1 && $a[0] eq $char && !defined($warn)); 342} 343 344{ 345 # [perl #18195] 346 for my $u (0, 1) { 347 for my $a (0, 1) { 348 $_ = 'readin,database,readout'; 349 utf8::upgrade $_ if $u; 350 /(.+)/; 351 my @d = split /[,]/,$1; 352 my $e = split /[,]/,$1; 353 is($e, scalar(@d)); 354 is(join (':',@d), 'readin:database:readout', "[perl #18195]"); 355 } 356 } 357} 358 359{ 360 $p="a,b"; 361 utf8::upgrade $p; 362 eval { @a=split(/[, ]+/,$p) }; 363 eval { $b=split(/[, ]+/,$p) }; 364 is($b, scalar(@a)); 365 is ("$@-@a-", '-a b-', '#20912 - split() to array with /[]+/ and utf8'); 366} 367 368{ 369 is (\@a, \@{"a"}, '@a must be global for following test'); 370 $p=""; 371 $n = @a = split /,/,$p; 372 is ($n, 0, '#21765 - pmreplroot hack used to return undef for 0 iters'); 373} 374 375{ 376 # [perl #28938] 377 # assigning off the end of the array after a split could leave garbage 378 # in the inner elements 379 380 my $x; 381 @a = split /,/, ',,,,,'; 382 $a[3]=1; 383 $x = \$a[2]; 384 is (ref $x, 'SCALAR', '#28938 - garbage after extend'); 385} 386{ 387 # check the special casing of split /\s/ and unicode 388 use charnames qw(:full); 389 # below test data is extracted from 390 # PropList-5.0.0.txt 391 # Date: 2006-06-07, 23:22:52 GMT [MD] 392 # 393 # Unicode Character Database 394 # Copyright (c) 1991-2006 Unicode, Inc. 395 # For terms of use, see http://www.unicode.org/terms_of_use.html 396 # For documentation, see UCD.html 397 my @spaces=( 398 ord("\t"), # Cc <control-0009> 399 ord("\n"), # Cc <control-000A> 400 # not PerlSpace # Cc <control-000B> 401 ord("\f"), # Cc <control-000C> 402 ord("\r"), # Cc <control-000D> 403 ord(" "), # Zs SPACE 404 ord("\N{NEL}"), # Cc <control-0085> 405 ord("\N{NO-BREAK SPACE}"), 406 # Zs NO-BREAK SPACE 407 0x1680, # Zs OGHAM SPACE MARK 408 0x180E, # Zs MONGOLIAN VOWEL SEPARATOR 409 0x2000..0x200A, # Zs [11] EN QUAD..HAIR SPACE 410 0x2028, # Zl LINE SEPARATOR 411 0x2029, # Zp PARAGRAPH SEPARATOR 412 0x202F, # Zs NARROW NO-BREAK SPACE 413 0x205F, # Zs MEDIUM MATHEMATICAL SPACE 414 0x3000 # Zs IDEOGRAPHIC SPACE 415 ); 416 #diag "Have @{[0+@spaces]} to test\n"; 417 foreach my $cp (@spaces) { 418 my $msg = sprintf "Space: U+%04x", $cp; 419 my $space = chr($cp); 420 my $str="A:$space:B\x{FFFD}"; 421 chop $str; 422 423 my @res=split(/\s+/,$str); 424 my $cnt=split(/\s+/,$str); 425 ok(@res == 2 && join('-',@res) eq "A:-:B", "$msg - /\\s+/"); 426 is($cnt, scalar(@res), "$msg - /\\s+/ (count)"); 427 428 my $s2 = "$space$space:A:$space$space:B\x{FFFD}"; 429 chop $s2; 430 431 my @r2 = split(' ',$s2); 432 my $c2 = split(' ',$s2); 433 ok(@r2 == 2 && join('-', @r2) eq ":A:-:B", "$msg - ' '"); 434 is($c2, scalar(@r2), "$msg - ' ' (count)"); 435 436 my @r3 = split(/\s+/, $s2); 437 my $c3 = split(/\s+/, $s2); 438 ok(@r3 == 3 && join('-', @r3) eq "-:A:-:B", "$msg - /\\s+/ No.2"); 439 is($c3, scalar(@r3), "$msg - /\\s+/ No.2 (count)"); 440 } 441} 442 443{ 444 my $src = "ABC \0 FOO \0 XYZ"; 445 my @s = split(" \0 ", $src); 446 my @r = split(/ \0 /, $src); 447 my $cs = split(" \0 ", $src); 448 my $cr = split(/ \0 /, $src); 449 is(scalar(@s), 3); 450 is($cs, 3); 451 is($cr, 3); 452 is($s[0], "ABC"); 453 is($s[1], "FOO"); 454 is($s[2]," XYZ"); 455 is(join(':',@s), join(':',@r)); 456} 457 458{ 459 use constant BANG => {}; 460 () = split m/,/, "", BANG; 461 ok(1); 462} 463 464{ 465 # Bug #69875 466 # 'Hybrid' scalar-and-array context 467 scalar(our @PATH = split /::/, "Font::GlyphNames"); 468 # 'my' doesn't trigger the bug 469 is "@PATH", "Font GlyphNames", "hybrid scalar-and-array context"; 470} 471