1#!./perl 2 3BEGIN { 4 require './test.pl'; 5 skip_all_if_miniperl("no dynamic loading on miniperl, no File::Spec (used by charnames)"); 6 plan(tests => 151); 7} 8 9{ 10 # check the special casing of split /\s/ and unicode 11 use charnames qw(:full); 12 # below test data is extracted from 13 # PropList-5.0.0.txt 14 # Date: 2006-06-07, 23:22:52 GMT [MD] 15 # 16 # Unicode Character Database 17 # Copyright (c) 1991-2006 Unicode, Inc. 18 # For terms of use, see http://www.unicode.org/terms_of_use.html 19 # For documentation, see UCD.html 20 my @spaces=( 21 ord("\t"), # Cc <control-0009> 22 ord("\n"), # Cc <control-000A> 23 # not PerlSpace # Cc <control-000B> 24 ord("\f"), # Cc <control-000C> 25 ord("\r"), # Cc <control-000D> 26 ord(" "), # Zs SPACE 27 ord("\N{NEL}"), # Cc <control-0085> 28 ord("\N{NO-BREAK SPACE}"), 29 # Zs NO-BREAK SPACE 30 0x1680, # Zs OGHAM SPACE MARK 31 0x180E, # Zs MONGOLIAN VOWEL SEPARATOR 32 0x2000..0x200A, # Zs [11] EN QUAD..HAIR SPACE 33 0x2028, # Zl LINE SEPARATOR 34 0x2029, # Zp PARAGRAPH SEPARATOR 35 0x202F, # Zs NARROW NO-BREAK SPACE 36 0x205F, # Zs MEDIUM MATHEMATICAL SPACE 37 0x3000 # Zs IDEOGRAPHIC SPACE 38 ); 39 #diag "Have @{[0+@spaces]} to test\n"; 40 foreach my $cp (@spaces) { 41 my $msg = sprintf "Space: U+%04x", $cp; 42 my $space = chr($cp); 43 my $str="A:$space:B\x{FFFD}"; 44 chop $str; 45 46 my @res=split(/\s+/,$str); 47 my $cnt=split(/\s+/,$str); 48 ok(@res == 2 && join('-',@res) eq "A:-:B", "$msg - /\\s+/"); 49 is($cnt, scalar(@res), "$msg - /\\s+/ (count)"); 50 51 my $s2 = "$space$space:A:$space$space:B\x{FFFD}"; 52 chop $s2; 53 54 my @r2 = split(' ',$s2); 55 my $c2 = split(' ',$s2); 56 ok(@r2 == 2 && join('-', @r2) eq ":A:-:B", "$msg - ' '"); 57 is($c2, scalar(@r2), "$msg - ' ' (count)"); 58 59 my @r3 = split(/\s+/, $s2); 60 my $c3 = split(/\s+/, $s2); 61 ok(@r3 == 3 && join('-', @r3) eq "-:A:-:B", "$msg - /\\s+/ No.2"); 62 is($c3, scalar(@r3), "$msg - /\\s+/ No.2 (count)"); 63 } 64 65 { # RT #114808 66 warning_is( 67 sub { 68 $p=chr(0x100); 69 for (".","ab\x{101}def") { 70 @q = split /$p/ 71 } 72 }, 73 undef, 74 'no warnings when part of split cant match non-utf8' 75 ); 76 } 77 78} 79