xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/t/op/split.t (revision 0:68f95e015346)
1*0Sstevel@tonic-gate#!./perl
2*0Sstevel@tonic-gate
3*0Sstevel@tonic-gateBEGIN {
4*0Sstevel@tonic-gate    chdir 't' if -d 't';
5*0Sstevel@tonic-gate    @INC = '../lib';
6*0Sstevel@tonic-gate    require './test.pl';
7*0Sstevel@tonic-gate}
8*0Sstevel@tonic-gate
9*0Sstevel@tonic-gateplan tests => 55;
10*0Sstevel@tonic-gate
11*0Sstevel@tonic-gate$FS = ':';
12*0Sstevel@tonic-gate
13*0Sstevel@tonic-gate$_ = 'a:b:c';
14*0Sstevel@tonic-gate
15*0Sstevel@tonic-gate($a,$b,$c) = split($FS,$_);
16*0Sstevel@tonic-gate
17*0Sstevel@tonic-gateis(join(';',$a,$b,$c), 'a;b;c');
18*0Sstevel@tonic-gate
19*0Sstevel@tonic-gate@ary = split(/:b:/);
20*0Sstevel@tonic-gateis(join("$_",@ary), 'aa:b:cc');
21*0Sstevel@tonic-gate
22*0Sstevel@tonic-gate$_ = "abc\n";
23*0Sstevel@tonic-gatemy @xyz = (@ary = split(//));
24*0Sstevel@tonic-gateis(join(".",@ary), "a.b.c.\n");
25*0Sstevel@tonic-gate
26*0Sstevel@tonic-gate$_ = "a:b:c::::";
27*0Sstevel@tonic-gate@ary = split(/:/);
28*0Sstevel@tonic-gateis(join(".",@ary), "a.b.c");
29*0Sstevel@tonic-gate
30*0Sstevel@tonic-gate$_ = join(':',split(' ',"    a b\tc \t d "));
31*0Sstevel@tonic-gateis($_, 'a:b:c:d');
32*0Sstevel@tonic-gate
33*0Sstevel@tonic-gate$_ = join(':',split(/ */,"foo  bar bie\tdoll"));
34*0Sstevel@tonic-gateis($_ , "f:o:o:b:a:r:b:i:e:\t:d:o:l:l");
35*0Sstevel@tonic-gate
36*0Sstevel@tonic-gate$_ = join(':', 'foo', split(/ /,'a b  c'), 'bar');
37*0Sstevel@tonic-gateis($_, "foo:a:b::c:bar");
38*0Sstevel@tonic-gate
39*0Sstevel@tonic-gate# Can we say how many fields to split to?
40*0Sstevel@tonic-gate$_ = join(':', split(' ','1 2 3 4 5 6', 3));
41*0Sstevel@tonic-gateis($_, '1:2:3 4 5 6');
42*0Sstevel@tonic-gate
43*0Sstevel@tonic-gate# Can we do it as a variable?
44*0Sstevel@tonic-gate$x = 4;
45*0Sstevel@tonic-gate$_ = join(':', split(' ','1 2 3 4 5 6', $x));
46*0Sstevel@tonic-gateis($_, '1:2:3:4 5 6');
47*0Sstevel@tonic-gate
48*0Sstevel@tonic-gate# Does the 999 suppress null field chopping?
49*0Sstevel@tonic-gate$_ = join(':', split(/:/,'1:2:3:4:5:6:::', 999));
50*0Sstevel@tonic-gateis($_ , '1:2:3:4:5:6:::');
51*0Sstevel@tonic-gate
52*0Sstevel@tonic-gate# Does assignment to a list imply split to one more field than that?
53*0Sstevel@tonic-gateif ($^O eq 'MSWin32') { $foo = `.\\perl -Dt -e "(\$a,\$b) = split;" 2>&1` }
54*0Sstevel@tonic-gateelsif ($^O eq 'NetWare') { $foo = `perl -Dt -e "(\$a,\$b) = split;" 2>&1` }
55*0Sstevel@tonic-gateelsif ($^O eq 'VMS')  { $foo = `./perl "-Dt" -e "(\$a,\$b) = split;" 2>&1` }
56*0Sstevel@tonic-gateelsif ($^O eq 'MacOS'){ $foo = `$^X "-Dt" -e "(\$a,\$b) = split;"` }
57*0Sstevel@tonic-gateelse                  { $foo = `./perl -Dt -e '(\$a,\$b) = split;' 2>&1` }
58*0Sstevel@tonic-gateok($foo =~ /DEBUGGING/ || $foo =~ /\Qconst(IV(3))\E/);
59*0Sstevel@tonic-gate
60*0Sstevel@tonic-gate# Can we say how many fields to split to when assigning to a list?
61*0Sstevel@tonic-gate($a,$b) = split(' ','1 2 3 4 5 6', 2);
62*0Sstevel@tonic-gate$_ = join(':',$a,$b);
63*0Sstevel@tonic-gateis($_, '1:2 3 4 5 6');
64*0Sstevel@tonic-gate
65*0Sstevel@tonic-gate# do subpatterns generate additional fields (without trailing nulls)?
66*0Sstevel@tonic-gate$_ = join '|', split(/,|(-)/, "1-10,20,,,");
67*0Sstevel@tonic-gateis($_, "1|-|10||20");
68*0Sstevel@tonic-gate
69*0Sstevel@tonic-gate# do subpatterns generate additional fields (with a limit)?
70*0Sstevel@tonic-gate$_ = join '|', split(/,|(-)/, "1-10,20,,,", 10);
71*0Sstevel@tonic-gateis($_, "1|-|10||20||||||");
72*0Sstevel@tonic-gate
73*0Sstevel@tonic-gate# is the 'two undefs' bug fixed?
74*0Sstevel@tonic-gate(undef, $a, undef, $b) = qw(1 2 3 4);
75*0Sstevel@tonic-gateis("$a|$b", "2|4");
76*0Sstevel@tonic-gate
77*0Sstevel@tonic-gate# .. even for locals?
78*0Sstevel@tonic-gate{
79*0Sstevel@tonic-gate  local(undef, $a, undef, $b) = qw(1 2 3 4);
80*0Sstevel@tonic-gate  is("$a|$b", "2|4");
81*0Sstevel@tonic-gate}
82*0Sstevel@tonic-gate
83*0Sstevel@tonic-gate# check splitting of null string
84*0Sstevel@tonic-gate$_ = join('|', split(/x/,   '',-1), 'Z');
85*0Sstevel@tonic-gateis($_, "Z");
86*0Sstevel@tonic-gate
87*0Sstevel@tonic-gate$_ = join('|', split(/x/,   '', 1), 'Z');
88*0Sstevel@tonic-gateis($_, "Z");
89*0Sstevel@tonic-gate
90*0Sstevel@tonic-gate$_ = join('|', split(/(p+)/,'',-1), 'Z');
91*0Sstevel@tonic-gateis($_, "Z");
92*0Sstevel@tonic-gate
93*0Sstevel@tonic-gate$_ = join('|', split(/.?/,  '',-1), 'Z');
94*0Sstevel@tonic-gateis($_, "Z");
95*0Sstevel@tonic-gate
96*0Sstevel@tonic-gate
97*0Sstevel@tonic-gate# Are /^/m patterns scanned?
98*0Sstevel@tonic-gate$_ = join '|', split(/^a/m, "a b a\na d a", 20);
99*0Sstevel@tonic-gateis($_, "| b a\n| d a");
100*0Sstevel@tonic-gate
101*0Sstevel@tonic-gate# Are /$/m patterns scanned?
102*0Sstevel@tonic-gate$_ = join '|', split(/a$/m, "a b a\na d a", 20);
103*0Sstevel@tonic-gateis($_, "a b |\na d |");
104*0Sstevel@tonic-gate
105*0Sstevel@tonic-gate# Are /^/m patterns scanned?
106*0Sstevel@tonic-gate$_ = join '|', split(/^aa/m, "aa b aa\naa d aa", 20);
107*0Sstevel@tonic-gateis($_, "| b aa\n| d aa");
108*0Sstevel@tonic-gate
109*0Sstevel@tonic-gate# Are /$/m patterns scanned?
110*0Sstevel@tonic-gate$_ = join '|', split(/aa$/m, "aa b aa\naa d aa", 20);
111*0Sstevel@tonic-gateis($_, "aa b |\naa d |");
112*0Sstevel@tonic-gate
113*0Sstevel@tonic-gate# Greedyness:
114*0Sstevel@tonic-gate$_ = "a : b :c: d";
115*0Sstevel@tonic-gate@ary = split(/\s*:\s*/);
116*0Sstevel@tonic-gateis(($res = join(".",@ary)), "a.b.c.d", $res);
117*0Sstevel@tonic-gate
118*0Sstevel@tonic-gate# use of match result as pattern (!)
119*0Sstevel@tonic-gateis('p:q:r:s', join ':', split('abc' =~ /b/, 'p1q1r1s'));
120*0Sstevel@tonic-gate
121*0Sstevel@tonic-gate# /^/ treated as /^/m
122*0Sstevel@tonic-gate$_ = join ':', split /^/, "ab\ncd\nef\n";
123*0Sstevel@tonic-gateis($_, "ab\n:cd\n:ef\n");
124*0Sstevel@tonic-gate
125*0Sstevel@tonic-gate# see if @a = @b = split(...) optimization works
126*0Sstevel@tonic-gate@list1 = @list2 = split ('p',"a p b c p");
127*0Sstevel@tonic-gateok(@list1 == @list2 &&
128*0Sstevel@tonic-gate   "@list1" eq "@list2" &&
129*0Sstevel@tonic-gate   @list1 == 2 &&
130*0Sstevel@tonic-gate   "@list1" eq "a   b c ");
131*0Sstevel@tonic-gate
132*0Sstevel@tonic-gate# zero-width assertion
133*0Sstevel@tonic-gate$_ = join ':', split /(?=\w)/, "rm b";
134*0Sstevel@tonic-gateis($_, "r:m :b");
135*0Sstevel@tonic-gate
136*0Sstevel@tonic-gate# unicode splittage
137*0Sstevel@tonic-gate
138*0Sstevel@tonic-gate@ary = map {ord} split //, v1.20.300.4000.50000.4000.300.20.1;
139*0Sstevel@tonic-gateis("@ary", "1 20 300 4000 50000 4000 300 20 1");
140*0Sstevel@tonic-gate
141*0Sstevel@tonic-gate@ary = split(/\x{FE}/, "\x{FF}\x{FE}\x{FD}"); # bug id 20010105.016
142*0Sstevel@tonic-gateok(@ary == 2 &&
143*0Sstevel@tonic-gate   $ary[0] eq "\xFF"   && $ary[1] eq "\xFD" &&
144*0Sstevel@tonic-gate   $ary[0] eq "\x{FF}" && $ary[1] eq "\x{FD}");
145*0Sstevel@tonic-gate
146*0Sstevel@tonic-gate@ary = split(/(\x{FE}\xFE)/, "\xFF\x{FF}\xFE\x{FE}\xFD\x{FD}"); # variant of 31
147*0Sstevel@tonic-gateok(@ary == 3 &&
148*0Sstevel@tonic-gate   $ary[0] eq "\xFF\xFF"     &&
149*0Sstevel@tonic-gate   $ary[0] eq "\x{FF}\xFF"   &&
150*0Sstevel@tonic-gate   $ary[0] eq "\x{FF}\x{FF}" &&
151*0Sstevel@tonic-gate   $ary[1] eq "\xFE\xFE"     &&
152*0Sstevel@tonic-gate   $ary[1] eq "\x{FE}\xFE"   &&
153*0Sstevel@tonic-gate   $ary[1] eq "\x{FE}\x{FE}" &&
154*0Sstevel@tonic-gate   $ary[2] eq "\xFD\xFD"     &&
155*0Sstevel@tonic-gate   $ary[2] eq "\x{FD}\xFD"   &&
156*0Sstevel@tonic-gate   $ary[2] eq "\x{FD}\x{FD}");
157*0Sstevel@tonic-gate
158*0Sstevel@tonic-gate{
159*0Sstevel@tonic-gate    my @a = map ord, split(//, join("", map chr, (1234, 123, 2345)));
160*0Sstevel@tonic-gate    is("@a", "1234 123 2345");
161*0Sstevel@tonic-gate}
162*0Sstevel@tonic-gate
163*0Sstevel@tonic-gate{
164*0Sstevel@tonic-gate    my $x = 'A';
165*0Sstevel@tonic-gate    my @a = map ord, split(/$x/, join("", map chr, (1234, ord($x), 2345)));
166*0Sstevel@tonic-gate    is("@a", "1234 2345");
167*0Sstevel@tonic-gate}
168*0Sstevel@tonic-gate
169*0Sstevel@tonic-gate{
170*0Sstevel@tonic-gate    # bug id 20000427.003
171*0Sstevel@tonic-gate
172*0Sstevel@tonic-gate    use warnings;
173*0Sstevel@tonic-gate    use strict;
174*0Sstevel@tonic-gate
175*0Sstevel@tonic-gate    my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}";
176*0Sstevel@tonic-gate
177*0Sstevel@tonic-gate    my @charlist = split //, $sushi;
178*0Sstevel@tonic-gate    my $r = '';
179*0Sstevel@tonic-gate    foreach my $ch (@charlist) {
180*0Sstevel@tonic-gate	$r = $r . " " . sprintf "U+%04X", ord($ch);
181*0Sstevel@tonic-gate    }
182*0Sstevel@tonic-gate
183*0Sstevel@tonic-gate    is($r, " U+B36C U+5A8C U+FF5B U+5079 U+505B");
184*0Sstevel@tonic-gate}
185*0Sstevel@tonic-gate
186*0Sstevel@tonic-gate{
187*0Sstevel@tonic-gate    my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20";
188*0Sstevel@tonic-gate
189*0Sstevel@tonic-gate  SKIP: {
190*0Sstevel@tonic-gate    if (ord('A') == 193) {
191*0Sstevel@tonic-gate	skip("EBCDIC", 1);
192*0Sstevel@tonic-gate    } else {
193*0Sstevel@tonic-gate	# bug id 20000426.003
194*0Sstevel@tonic-gate
195*0Sstevel@tonic-gate	my ($a, $b, $c) = split(/\x40/, $s);
196*0Sstevel@tonic-gate	ok($a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a);
197*0Sstevel@tonic-gate    }
198*0Sstevel@tonic-gate  }
199*0Sstevel@tonic-gate
200*0Sstevel@tonic-gate    my ($a, $b) = split(/\x{100}/, $s);
201*0Sstevel@tonic-gate    ok($a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20");
202*0Sstevel@tonic-gate
203*0Sstevel@tonic-gate    my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s);
204*0Sstevel@tonic-gate    ok($a eq "\x20\x40" && $b eq "\x40\x20");
205*0Sstevel@tonic-gate
206*0Sstevel@tonic-gate  SKIP: {
207*0Sstevel@tonic-gate    if (ord('A') == 193) {
208*0Sstevel@tonic-gate	skip("EBCDIC", 1);
209*0Sstevel@tonic-gate    }  else {
210*0Sstevel@tonic-gate	my ($a, $b) = split(/\x40\x{80}/, $s);
211*0Sstevel@tonic-gate	ok($a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20");
212*0Sstevel@tonic-gate    }
213*0Sstevel@tonic-gate  }
214*0Sstevel@tonic-gate
215*0Sstevel@tonic-gate    my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s);
216*0Sstevel@tonic-gate    ok($a eq "\x20" && $b eq "\x{100}" && $c eq "\x20");
217*0Sstevel@tonic-gate}
218*0Sstevel@tonic-gate
219*0Sstevel@tonic-gate{
220*0Sstevel@tonic-gate    # 20001205.014
221*0Sstevel@tonic-gate
222*0Sstevel@tonic-gate    my $a = "ABC\x{263A}";
223*0Sstevel@tonic-gate
224*0Sstevel@tonic-gate    my @b = split( //, $a );
225*0Sstevel@tonic-gate
226*0Sstevel@tonic-gate    is(scalar @b, 4);
227*0Sstevel@tonic-gate
228*0Sstevel@tonic-gate    ok(length($b[3]) == 1 && $b[3] eq "\x{263A}");
229*0Sstevel@tonic-gate
230*0Sstevel@tonic-gate    $a =~ s/^A/Z/;
231*0Sstevel@tonic-gate    ok(length($a) == 4 && $a eq "ZBC\x{263A}");
232*0Sstevel@tonic-gate}
233*0Sstevel@tonic-gate
234*0Sstevel@tonic-gate{
235*0Sstevel@tonic-gate    my @a = split(/\xFE/, "\xFF\xFE\xFD");
236*0Sstevel@tonic-gate
237*0Sstevel@tonic-gate    ok(@a == 2 && $a[0] eq "\xFF" && $a[1] eq "\xFD");
238*0Sstevel@tonic-gate}
239*0Sstevel@tonic-gate
240*0Sstevel@tonic-gate{
241*0Sstevel@tonic-gate    # check that PMf_WHITE is cleared after \s+ is used
242*0Sstevel@tonic-gate    # reported in <20010627113312.RWGY6087.viemta06@localhost>
243*0Sstevel@tonic-gate    my $r;
244*0Sstevel@tonic-gate    foreach my $pat ( qr/\s+/, qr/ll/ ) {
245*0Sstevel@tonic-gate	$r = join ':' => split($pat, "hello cruel world");
246*0Sstevel@tonic-gate    }
247*0Sstevel@tonic-gate    is($r, "he:o cruel world");
248*0Sstevel@tonic-gate}
249*0Sstevel@tonic-gate
250*0Sstevel@tonic-gate
251*0Sstevel@tonic-gate{
252*0Sstevel@tonic-gate    # split /(A)|B/, "1B2" should return (1, undef, 2)
253*0Sstevel@tonic-gate    my @x = split /(A)|B/, "1B2";
254*0Sstevel@tonic-gate    ok($x[0] eq '1' and (not defined $x[1]) and $x[2] eq '2');
255*0Sstevel@tonic-gate}
256*0Sstevel@tonic-gate
257*0Sstevel@tonic-gate{
258*0Sstevel@tonic-gate    # [perl #17064]
259*0Sstevel@tonic-gate    my $warn;
260*0Sstevel@tonic-gate    local $SIG{__WARN__} = sub { $warn = join '', @_; chomp $warn };
261*0Sstevel@tonic-gate    my $char = "\x{10f1ff}";
262*0Sstevel@tonic-gate    my @a = split /\r?\n/, "$char\n";
263*0Sstevel@tonic-gate    ok(@a == 1 && $a[0] eq $char && !defined($warn));
264*0Sstevel@tonic-gate}
265*0Sstevel@tonic-gate
266*0Sstevel@tonic-gate{
267*0Sstevel@tonic-gate    # [perl #18195]
268*0Sstevel@tonic-gate    for my $u (0, 1) {
269*0Sstevel@tonic-gate	for my $a (0, 1) {
270*0Sstevel@tonic-gate	    $_ = 'readin,database,readout';
271*0Sstevel@tonic-gate	    utf8::upgrade $_ if $u;
272*0Sstevel@tonic-gate	    /(.+)/;
273*0Sstevel@tonic-gate	    my @d = split /[,]/,$1;
274*0Sstevel@tonic-gate	    is(join (':',@d), 'readin:database:readout', "[perl #18195]");
275*0Sstevel@tonic-gate	}
276*0Sstevel@tonic-gate    }
277*0Sstevel@tonic-gate}
278*0Sstevel@tonic-gate
279*0Sstevel@tonic-gate{
280*0Sstevel@tonic-gate    $p="a,b";
281*0Sstevel@tonic-gate    utf8::upgrade $p;
282*0Sstevel@tonic-gate    eval { @a=split(/[, ]+/,$p) };
283*0Sstevel@tonic-gate    is ("$@-@a-", '-a b-', '#20912 - split() to array with /[]+/ and utf8');
284*0Sstevel@tonic-gate}
285*0Sstevel@tonic-gate
286*0Sstevel@tonic-gate{
287*0Sstevel@tonic-gate    is (\@a, \@{"a"}, '@a must be global for following test');
288*0Sstevel@tonic-gate    $p="";
289*0Sstevel@tonic-gate    $n = @a = split /,/,$p;
290*0Sstevel@tonic-gate    is ($n, 0, '#21765 - pmreplroot hack used to return undef for 0 iters');
291*0Sstevel@tonic-gate}
292*0Sstevel@tonic-gate
293*0Sstevel@tonic-gate{
294*0Sstevel@tonic-gate    # [perl #28938]
295*0Sstevel@tonic-gate    # assigning off the end of the array after a split could leave garbage
296*0Sstevel@tonic-gate    # in the inner elements
297*0Sstevel@tonic-gate
298*0Sstevel@tonic-gate    my $x;
299*0Sstevel@tonic-gate    @a = split /,/, ',,,,,';
300*0Sstevel@tonic-gate    $a[3]=1;
301*0Sstevel@tonic-gate    $x = \$a[2];
302*0Sstevel@tonic-gate    is (ref $x, 'SCALAR', '#28938 - garbage after extend');
303*0Sstevel@tonic-gate}
304*0Sstevel@tonic-gate
305