1898184e3Ssthen#!./perl 2898184e3Ssthen 3898184e3SsthenBEGIN { 4b8851fccSafresh1 chdir 't' if -d 't'; 5898184e3Ssthen require './test.pl'; 69f11ffb7Safresh1} 79f11ffb7Safresh1 8898184e3Ssthenskip_all_if_miniperl("no dynamic loading on miniperl, no File::Spec (used by charnames)"); 9*eac174f2Safresh1plan(tests => 147); 10898184e3Ssthen 11898184e3Ssthen{ 12898184e3Ssthen # check the special casing of split /\s/ and unicode 13898184e3Ssthen use charnames qw(:full); 14898184e3Ssthen # below test data is extracted from 15898184e3Ssthen # PropList-5.0.0.txt 16898184e3Ssthen # Date: 2006-06-07, 23:22:52 GMT [MD] 17898184e3Ssthen # 18898184e3Ssthen # Unicode Character Database 19898184e3Ssthen # Copyright (c) 1991-2006 Unicode, Inc. 20898184e3Ssthen # For terms of use, see http://www.unicode.org/terms_of_use.html 21898184e3Ssthen # For documentation, see UCD.html 22898184e3Ssthen my @spaces=( 23898184e3Ssthen ord("\t"), # Cc <control-0009> 24898184e3Ssthen ord("\n"), # Cc <control-000A> 25898184e3Ssthen # not PerlSpace # Cc <control-000B> 26898184e3Ssthen ord("\f"), # Cc <control-000C> 27898184e3Ssthen ord("\r"), # Cc <control-000D> 28898184e3Ssthen ord(" "), # Zs SPACE 29898184e3Ssthen ord("\N{NEL}"), # Cc <control-0085> 30898184e3Ssthen ord("\N{NO-BREAK SPACE}"), 31898184e3Ssthen # Zs NO-BREAK SPACE 32898184e3Ssthen 0x1680, # Zs OGHAM SPACE MARK 33898184e3Ssthen 0x2000..0x200A, # Zs [11] EN QUAD..HAIR SPACE 34898184e3Ssthen 0x2028, # Zl LINE SEPARATOR 35898184e3Ssthen 0x2029, # Zp PARAGRAPH SEPARATOR 36898184e3Ssthen 0x202F, # Zs NARROW NO-BREAK SPACE 37898184e3Ssthen 0x205F, # Zs MEDIUM MATHEMATICAL SPACE 38898184e3Ssthen 0x3000 # Zs IDEOGRAPHIC SPACE 39898184e3Ssthen ); 40898184e3Ssthen #diag "Have @{[0+@spaces]} to test\n"; 41898184e3Ssthen foreach my $cp (@spaces) { 42898184e3Ssthen my $msg = sprintf "Space: U+%04x", $cp; 43898184e3Ssthen my $space = chr($cp); 44898184e3Ssthen my $str="A:$space:B\x{FFFD}"; 45898184e3Ssthen chop $str; 46898184e3Ssthen 47898184e3Ssthen my @res=split(/\s+/,$str); 48898184e3Ssthen my $cnt=split(/\s+/,$str); 49898184e3Ssthen ok(@res == 2 && join('-',@res) eq "A:-:B", "$msg - /\\s+/"); 50898184e3Ssthen is($cnt, scalar(@res), "$msg - /\\s+/ (count)"); 51898184e3Ssthen 52898184e3Ssthen my $s2 = "$space$space:A:$space$space:B\x{FFFD}"; 53898184e3Ssthen chop $s2; 54898184e3Ssthen 55898184e3Ssthen my @r2 = split(' ',$s2); 56898184e3Ssthen my $c2 = split(' ',$s2); 57898184e3Ssthen ok(@r2 == 2 && join('-', @r2) eq ":A:-:B", "$msg - ' '"); 58898184e3Ssthen is($c2, scalar(@r2), "$msg - ' ' (count)"); 59898184e3Ssthen 60898184e3Ssthen my @r3 = split(/\s+/, $s2); 61898184e3Ssthen my $c3 = split(/\s+/, $s2); 62898184e3Ssthen ok(@r3 == 3 && join('-', @r3) eq "-:A:-:B", "$msg - /\\s+/ No.2"); 63898184e3Ssthen is($c3, scalar(@r3), "$msg - /\\s+/ No.2 (count)"); 64898184e3Ssthen } 6591f110e0Safresh1 6691f110e0Safresh1 { # RT #114808 6791f110e0Safresh1 warning_is( 6891f110e0Safresh1 sub { 6991f110e0Safresh1 $p=chr(0x100); 7091f110e0Safresh1 for (".","ab\x{101}def") { 7191f110e0Safresh1 @q = split /$p/ 7291f110e0Safresh1 } 7391f110e0Safresh1 }, 7491f110e0Safresh1 undef, 7591f110e0Safresh1 'no warnings when part of split cant match non-utf8' 7691f110e0Safresh1 ); 7791f110e0Safresh1 } 7891f110e0Safresh1 79898184e3Ssthen} 80*eac174f2Safresh1 81*eac174f2Safresh1{ 82*eac174f2Safresh1 # Check empty pattern with specified field count on Unicode string 83*eac174f2Safresh1 my $string = "\x{100}\x{101}\x{102}"; 84*eac174f2Safresh1 $_ = join(':', split(//, $string, 2)); 85*eac174f2Safresh1 is($_, "\x{100}:\x{101}\x{102}", 86*eac174f2Safresh1 "Split into specified number of fields with empty pattern"); 87*eac174f2Safresh1 @ary = split(//, $string, 2); 88*eac174f2Safresh1 $cnt = split(//, $string, 2); 89*eac174f2Safresh1 is($cnt, scalar(@ary), "Check element count from previous test"); 90*eac174f2Safresh1} 91