1*0Sstevel@tonic-gate#!./perl 2*0Sstevel@tonic-gate 3*0Sstevel@tonic-gateBEGIN { 4*0Sstevel@tonic-gate chdir 't' if -d 't'; 5*0Sstevel@tonic-gate @INC = '../lib'; 6*0Sstevel@tonic-gate unless (find PerlIO::Layer 'perlio') { 7*0Sstevel@tonic-gate print "1..0 # Skip: not perlio\n"; 8*0Sstevel@tonic-gate exit 0; 9*0Sstevel@tonic-gate } 10*0Sstevel@tonic-gate} 11*0Sstevel@tonic-gate 12*0Sstevel@tonic-gateno utf8; # needed for use utf8 not griping about the raw octets 13*0Sstevel@tonic-gate 14*0Sstevel@tonic-gaterequire "./test.pl"; 15*0Sstevel@tonic-gate 16*0Sstevel@tonic-gateplan(tests => 53); 17*0Sstevel@tonic-gate 18*0Sstevel@tonic-gate$| = 1; 19*0Sstevel@tonic-gate 20*0Sstevel@tonic-gateopen(F,"+>:utf8",'a'); 21*0Sstevel@tonic-gateprint F chr(0x100).'�'; 22*0Sstevel@tonic-gateok( tell(F) == 4, tell(F) ); 23*0Sstevel@tonic-gateprint F "\n"; 24*0Sstevel@tonic-gateok( tell(F) >= 5, tell(F) ); 25*0Sstevel@tonic-gateseek(F,0,0); 26*0Sstevel@tonic-gateok( getc(F) eq chr(0x100) ); 27*0Sstevel@tonic-gateok( getc(F) eq "�" ); 28*0Sstevel@tonic-gateok( getc(F) eq "\n" ); 29*0Sstevel@tonic-gateseek(F,0,0); 30*0Sstevel@tonic-gatebinmode(F,":bytes"); 31*0Sstevel@tonic-gatemy $chr = chr(0xc4); 32*0Sstevel@tonic-gateif (ord('A') == 193) { $chr = chr(0x8c); } # EBCDIC 33*0Sstevel@tonic-gateok( getc(F) eq $chr ); 34*0Sstevel@tonic-gate$chr = chr(0x80); 35*0Sstevel@tonic-gateif (ord('A') == 193) { $chr = chr(0x41); } # EBCDIC 36*0Sstevel@tonic-gateok( getc(F) eq $chr ); 37*0Sstevel@tonic-gate$chr = chr(0xc2); 38*0Sstevel@tonic-gateif (ord('A') == 193) { $chr = chr(0x80); } # EBCDIC 39*0Sstevel@tonic-gateok( getc(F) eq $chr ); 40*0Sstevel@tonic-gate$chr = chr(0xa3); 41*0Sstevel@tonic-gateif (ord('A') == 193) { $chr = chr(0x44); } # EBCDIC 42*0Sstevel@tonic-gateok( getc(F) eq $chr ); 43*0Sstevel@tonic-gateok( getc(F) eq "\n" ); 44*0Sstevel@tonic-gateseek(F,0,0); 45*0Sstevel@tonic-gatebinmode(F,":utf8"); 46*0Sstevel@tonic-gateok( scalar(<F>) eq "\x{100}�\n" ); 47*0Sstevel@tonic-gateseek(F,0,0); 48*0Sstevel@tonic-gate$buf = chr(0x200); 49*0Sstevel@tonic-gate$count = read(F,$buf,2,1); 50*0Sstevel@tonic-gateok( $count == 2 ); 51*0Sstevel@tonic-gateok( $buf eq "\x{200}\x{100}�" ); 52*0Sstevel@tonic-gateclose(F); 53*0Sstevel@tonic-gate 54*0Sstevel@tonic-gate{ 55*0Sstevel@tonic-gate $a = chr(300); # This *is* UTF-encoded 56*0Sstevel@tonic-gate $b = chr(130); # This is not. 57*0Sstevel@tonic-gate 58*0Sstevel@tonic-gate open F, ">:utf8", 'a' or die $!; 59*0Sstevel@tonic-gate print F $a,"\n"; 60*0Sstevel@tonic-gate close F; 61*0Sstevel@tonic-gate 62*0Sstevel@tonic-gate open F, "<:utf8", 'a' or die $!; 63*0Sstevel@tonic-gate $x = <F>; 64*0Sstevel@tonic-gate chomp($x); 65*0Sstevel@tonic-gate ok( $x eq chr(300) ); 66*0Sstevel@tonic-gate 67*0Sstevel@tonic-gate open F, "a" or die $!; # Not UTF 68*0Sstevel@tonic-gate binmode(F, ":bytes"); 69*0Sstevel@tonic-gate $x = <F>; 70*0Sstevel@tonic-gate chomp($x); 71*0Sstevel@tonic-gate $chr = chr(196).chr(172); 72*0Sstevel@tonic-gate if (ord('A') == 193) { $chr = chr(141).chr(83); } # EBCDIC 73*0Sstevel@tonic-gate ok( $x eq $chr ); 74*0Sstevel@tonic-gate close F; 75*0Sstevel@tonic-gate 76*0Sstevel@tonic-gate open F, ">:utf8", 'a' or die $!; 77*0Sstevel@tonic-gate binmode(F); # we write a "\n" and then tell() - avoid CRLF issues. 78*0Sstevel@tonic-gate binmode(F,":utf8"); # turn UTF-8-ness back on 79*0Sstevel@tonic-gate print F $a; 80*0Sstevel@tonic-gate my $y; 81*0Sstevel@tonic-gate { my $x = tell(F); 82*0Sstevel@tonic-gate { use bytes; $y = length($a);} 83*0Sstevel@tonic-gate ok( $x == $y ); 84*0Sstevel@tonic-gate } 85*0Sstevel@tonic-gate 86*0Sstevel@tonic-gate { # Check byte length of $b 87*0Sstevel@tonic-gate use bytes; my $y = length($b); 88*0Sstevel@tonic-gate ok( $y == 1 ); 89*0Sstevel@tonic-gate } 90*0Sstevel@tonic-gate 91*0Sstevel@tonic-gate print F $b,"\n"; # Don't upgrades $b 92*0Sstevel@tonic-gate 93*0Sstevel@tonic-gate { # Check byte length of $b 94*0Sstevel@tonic-gate use bytes; my $y = length($b); 95*0Sstevel@tonic-gate ok( $y == 1 ); 96*0Sstevel@tonic-gate } 97*0Sstevel@tonic-gate 98*0Sstevel@tonic-gate { 99*0Sstevel@tonic-gate my $x = tell(F); 100*0Sstevel@tonic-gate { use bytes; if (ord('A')==193){$y += 2;}else{$y += 3;}} # EBCDIC ASCII 101*0Sstevel@tonic-gate ok( $x == $y ); 102*0Sstevel@tonic-gate } 103*0Sstevel@tonic-gate 104*0Sstevel@tonic-gate close F; 105*0Sstevel@tonic-gate 106*0Sstevel@tonic-gate open F, "a" or die $!; # Not UTF 107*0Sstevel@tonic-gate binmode(F, ":bytes"); 108*0Sstevel@tonic-gate $x = <F>; 109*0Sstevel@tonic-gate chomp($x); 110*0Sstevel@tonic-gate $chr = v196.172.194.130; 111*0Sstevel@tonic-gate if (ord('A') == 193) { $chr = v141.83.130; } # EBCDIC 112*0Sstevel@tonic-gate ok( $x eq $chr, sprintf('(%vd)', $x) ); 113*0Sstevel@tonic-gate 114*0Sstevel@tonic-gate open F, "<:utf8", "a" or die $!; 115*0Sstevel@tonic-gate $x = <F>; 116*0Sstevel@tonic-gate chomp($x); 117*0Sstevel@tonic-gate close F; 118*0Sstevel@tonic-gate ok( $x eq chr(300).chr(130), sprintf('(%vd)', $x) ); 119*0Sstevel@tonic-gate 120*0Sstevel@tonic-gate open F, ">", "a" or die $!; 121*0Sstevel@tonic-gate if (${^OPEN} =~ /:utf8/) { 122*0Sstevel@tonic-gate binmode(F, ":bytes:"); 123*0Sstevel@tonic-gate } 124*0Sstevel@tonic-gate 125*0Sstevel@tonic-gate # Now let's make it suffer. 126*0Sstevel@tonic-gate my $w; 127*0Sstevel@tonic-gate { 128*0Sstevel@tonic-gate use warnings 'utf8'; 129*0Sstevel@tonic-gate local $SIG{__WARN__} = sub { $w = $_[0] }; 130*0Sstevel@tonic-gate print F $a; 131*0Sstevel@tonic-gate ok( !($@ || $w !~ /Wide character in print/i) ); 132*0Sstevel@tonic-gate } 133*0Sstevel@tonic-gate} 134*0Sstevel@tonic-gate 135*0Sstevel@tonic-gate# Hm. Time to get more evil. 136*0Sstevel@tonic-gateopen F, ">:utf8", "a" or die $!; 137*0Sstevel@tonic-gateprint F $a; 138*0Sstevel@tonic-gatebinmode(F, ":bytes"); 139*0Sstevel@tonic-gateprint F chr(130)."\n"; 140*0Sstevel@tonic-gateclose F; 141*0Sstevel@tonic-gate 142*0Sstevel@tonic-gateopen F, "<", "a" or die $!; 143*0Sstevel@tonic-gatebinmode(F, ":bytes"); 144*0Sstevel@tonic-gate$x = <F>; chomp $x; 145*0Sstevel@tonic-gate$chr = v196.172.130; 146*0Sstevel@tonic-gateif (ord('A') == 193) { $chr = v141.83.130; } # EBCDIC 147*0Sstevel@tonic-gateok( $x eq $chr ); 148*0Sstevel@tonic-gate 149*0Sstevel@tonic-gate# Right. 150*0Sstevel@tonic-gateopen F, ">:utf8", "a" or die $!; 151*0Sstevel@tonic-gateprint F $a; 152*0Sstevel@tonic-gateclose F; 153*0Sstevel@tonic-gateopen F, ">>", "a" or die $!; 154*0Sstevel@tonic-gateprint F chr(130)."\n"; 155*0Sstevel@tonic-gateclose F; 156*0Sstevel@tonic-gate 157*0Sstevel@tonic-gateopen F, "<", "a" or die $!; 158*0Sstevel@tonic-gate$x = <F>; chomp $x; 159*0Sstevel@tonic-gateok( $x eq $chr ); 160*0Sstevel@tonic-gate 161*0Sstevel@tonic-gate# Now we have a deformed file. 162*0Sstevel@tonic-gate 163*0Sstevel@tonic-gateSKIP: { 164*0Sstevel@tonic-gate if (ord('A') == 193) { 165*0Sstevel@tonic-gate skip( "EBCDIC doesn't complain" ); 166*0Sstevel@tonic-gate } else { 167*0Sstevel@tonic-gate open F, "<:utf8", "a" or die $!; 168*0Sstevel@tonic-gate $x = <F>; chomp $x; 169*0Sstevel@tonic-gate local $SIG{__WARN__} = sub { ok( 1 ) }; 170*0Sstevel@tonic-gate eval { sprintf "%vd\n", $x }; 171*0Sstevel@tonic-gate } 172*0Sstevel@tonic-gate} 173*0Sstevel@tonic-gate 174*0Sstevel@tonic-gateclose F; 175*0Sstevel@tonic-gateunlink('a'); 176*0Sstevel@tonic-gate 177*0Sstevel@tonic-gateopen F, ">:utf8", "a"; 178*0Sstevel@tonic-gate@a = map { chr(1 << ($_ << 2)) } 0..5; # 0x1, 0x10, .., 0x100000 179*0Sstevel@tonic-gateunshift @a, chr(0); # ... and a null byte in front just for fun 180*0Sstevel@tonic-gateprint F @a; 181*0Sstevel@tonic-gateclose F; 182*0Sstevel@tonic-gate 183*0Sstevel@tonic-gatemy $c; 184*0Sstevel@tonic-gate 185*0Sstevel@tonic-gate# read() should work on characters, not bytes 186*0Sstevel@tonic-gateopen F, "<:utf8", "a"; 187*0Sstevel@tonic-gate$a = 0; 188*0Sstevel@tonic-gatefor (@a) { 189*0Sstevel@tonic-gate unless (($c = read(F, $b, 1) == 1) && 190*0Sstevel@tonic-gate length($b) == 1 && 191*0Sstevel@tonic-gate ord($b) == ord($_) && 192*0Sstevel@tonic-gate tell(F) == ($a += bytes::length($b))) { 193*0Sstevel@tonic-gate print '# ord($_) == ', ord($_), "\n"; 194*0Sstevel@tonic-gate print '# ord($b) == ', ord($b), "\n"; 195*0Sstevel@tonic-gate print '# length($b) == ', length($b), "\n"; 196*0Sstevel@tonic-gate print '# bytes::length($b) == ', bytes::length($b), "\n"; 197*0Sstevel@tonic-gate print '# tell(F) == ', tell(F), "\n"; 198*0Sstevel@tonic-gate print '# $a == ', $a, "\n"; 199*0Sstevel@tonic-gate print '# $c == ', $c, "\n"; 200*0Sstevel@tonic-gate print "not "; 201*0Sstevel@tonic-gate last; 202*0Sstevel@tonic-gate } 203*0Sstevel@tonic-gate} 204*0Sstevel@tonic-gateclose F; 205*0Sstevel@tonic-gateok( 1 ); 206*0Sstevel@tonic-gate 207*0Sstevel@tonic-gate{ 208*0Sstevel@tonic-gate # Check that warnings are on on I/O, and that they can be muffled. 209*0Sstevel@tonic-gate 210*0Sstevel@tonic-gate local $SIG{__WARN__} = sub { $@ = shift }; 211*0Sstevel@tonic-gate 212*0Sstevel@tonic-gate undef $@; 213*0Sstevel@tonic-gate open F, ">a"; 214*0Sstevel@tonic-gate binmode(F, ":bytes"); 215*0Sstevel@tonic-gate print F chr(0x100); 216*0Sstevel@tonic-gate close(F); 217*0Sstevel@tonic-gate 218*0Sstevel@tonic-gate like( $@, 'Wide character in print' ); 219*0Sstevel@tonic-gate 220*0Sstevel@tonic-gate undef $@; 221*0Sstevel@tonic-gate open F, ">:utf8", "a"; 222*0Sstevel@tonic-gate print F chr(0x100); 223*0Sstevel@tonic-gate close(F); 224*0Sstevel@tonic-gate 225*0Sstevel@tonic-gate isnt( defined $@ ); 226*0Sstevel@tonic-gate 227*0Sstevel@tonic-gate undef $@; 228*0Sstevel@tonic-gate open F, ">a"; 229*0Sstevel@tonic-gate binmode(F, ":utf8"); 230*0Sstevel@tonic-gate print F chr(0x100); 231*0Sstevel@tonic-gate close(F); 232*0Sstevel@tonic-gate 233*0Sstevel@tonic-gate isnt( defined $@ ); 234*0Sstevel@tonic-gate 235*0Sstevel@tonic-gate no warnings 'utf8'; 236*0Sstevel@tonic-gate 237*0Sstevel@tonic-gate undef $@; 238*0Sstevel@tonic-gate open F, ">a"; 239*0Sstevel@tonic-gate print F chr(0x100); 240*0Sstevel@tonic-gate close(F); 241*0Sstevel@tonic-gate 242*0Sstevel@tonic-gate isnt( defined $@ ); 243*0Sstevel@tonic-gate 244*0Sstevel@tonic-gate use warnings 'utf8'; 245*0Sstevel@tonic-gate 246*0Sstevel@tonic-gate undef $@; 247*0Sstevel@tonic-gate open F, ">a"; 248*0Sstevel@tonic-gate binmode(F, ":bytes"); 249*0Sstevel@tonic-gate print F chr(0x100); 250*0Sstevel@tonic-gate close(F); 251*0Sstevel@tonic-gate 252*0Sstevel@tonic-gate like( $@, 'Wide character in print' ); 253*0Sstevel@tonic-gate} 254*0Sstevel@tonic-gate 255*0Sstevel@tonic-gate{ 256*0Sstevel@tonic-gate open F, ">:bytes","a"; print F "\xde"; close F; 257*0Sstevel@tonic-gate 258*0Sstevel@tonic-gate open F, "<:bytes", "a"; 259*0Sstevel@tonic-gate my $b = chr 0x100; 260*0Sstevel@tonic-gate $b .= <F>; 261*0Sstevel@tonic-gate ok( $b eq chr(0x100).chr(0xde), "21395 '.= <>' utf8 vs. bytes" ); 262*0Sstevel@tonic-gate close F; 263*0Sstevel@tonic-gate} 264*0Sstevel@tonic-gate 265*0Sstevel@tonic-gate{ 266*0Sstevel@tonic-gate open F, ">:utf8","a"; print F chr 0x100; close F; 267*0Sstevel@tonic-gate 268*0Sstevel@tonic-gate open F, "<:utf8", "a"; 269*0Sstevel@tonic-gate my $b = "\xde"; 270*0Sstevel@tonic-gate $b .= <F>; 271*0Sstevel@tonic-gate ok( $b eq chr(0xde).chr(0x100), "21395 '.= <>' bytes vs. utf8" ); 272*0Sstevel@tonic-gate close F; 273*0Sstevel@tonic-gate} 274*0Sstevel@tonic-gate 275*0Sstevel@tonic-gate{ 276*0Sstevel@tonic-gate my @a = ( [ 0x007F, "bytes" ], 277*0Sstevel@tonic-gate [ 0x0080, "bytes" ], 278*0Sstevel@tonic-gate [ 0x0080, "utf8" ], 279*0Sstevel@tonic-gate [ 0x0100, "utf8" ] ); 280*0Sstevel@tonic-gate my $t = 34; 281*0Sstevel@tonic-gate for my $u (@a) { 282*0Sstevel@tonic-gate for my $v (@a) { 283*0Sstevel@tonic-gate # print "# @$u - @$v\n"; 284*0Sstevel@tonic-gate open F, ">a"; 285*0Sstevel@tonic-gate binmode(F, ":" . $u->[1]); 286*0Sstevel@tonic-gate print F chr($u->[0]); 287*0Sstevel@tonic-gate close F; 288*0Sstevel@tonic-gate 289*0Sstevel@tonic-gate open F, "<a"; 290*0Sstevel@tonic-gate binmode(F, ":" . $u->[1]); 291*0Sstevel@tonic-gate 292*0Sstevel@tonic-gate my $s = chr($v->[0]); 293*0Sstevel@tonic-gate utf8::upgrade($s) if $v->[1] eq "utf8"; 294*0Sstevel@tonic-gate 295*0Sstevel@tonic-gate $s .= <F>; 296*0Sstevel@tonic-gate ok( $s eq chr($v->[0]) . chr($u->[0]), 'rcatline utf8' ); 297*0Sstevel@tonic-gate close F; 298*0Sstevel@tonic-gate $t++; 299*0Sstevel@tonic-gate } 300*0Sstevel@tonic-gate } 301*0Sstevel@tonic-gate # last test here 49 302*0Sstevel@tonic-gate} 303*0Sstevel@tonic-gate 304*0Sstevel@tonic-gate{ 305*0Sstevel@tonic-gate # [perl #23428] Somethings rotten in unicode semantics 306*0Sstevel@tonic-gate open F, ">a"; 307*0Sstevel@tonic-gate binmode F, ":utf8"; 308*0Sstevel@tonic-gate syswrite(F, $a = chr(0x100)); 309*0Sstevel@tonic-gate close F; 310*0Sstevel@tonic-gate is( ord($a), 0x100, '23428 syswrite should not downgrade scalar' ); 311*0Sstevel@tonic-gate like( $a, qr/^\w+/, '23428 syswrite should not downgrade scalar' ); 312*0Sstevel@tonic-gate} 313*0Sstevel@tonic-gate 314*0Sstevel@tonic-gate# sysread() and syswrite() tested in lib/open.t since Fcntl is used 315*0Sstevel@tonic-gate 316*0Sstevel@tonic-gate{ 317*0Sstevel@tonic-gate # <FH> on a :utf8 stream should complain immediately with -w 318*0Sstevel@tonic-gate # if it finds bad UTF-8 (:encoding(utf8) works this way) 319*0Sstevel@tonic-gate use warnings 'utf8'; 320*0Sstevel@tonic-gate undef $@; 321*0Sstevel@tonic-gate local $SIG{__WARN__} = sub { $@ = shift }; 322*0Sstevel@tonic-gate open F, ">a"; 323*0Sstevel@tonic-gate binmode F; 324*0Sstevel@tonic-gate my ($chrE4, $chrF6) = (chr(0xE4), chr(0xF6)); 325*0Sstevel@tonic-gate if (ord('A') == 193) # EBCDIC 326*0Sstevel@tonic-gate { ($chrE4, $chrF6) = (chr(0x43), chr(0xEC)); } 327*0Sstevel@tonic-gate print F "foo", $chrE4, "\n"; 328*0Sstevel@tonic-gate print F "foo", $chrF6, "\n"; 329*0Sstevel@tonic-gate close F; 330*0Sstevel@tonic-gate open F, "<:utf8", "a"; 331*0Sstevel@tonic-gate undef $@; 332*0Sstevel@tonic-gate my $line = <F>; 333*0Sstevel@tonic-gate my ($chrE4, $chrF6) = ("E4", "F6"); 334*0Sstevel@tonic-gate if (ord('A') == 193) { ($chrE4, $chrF6) = ("43", "EC"); } # EBCDIC 335*0Sstevel@tonic-gate like( $@, qr/utf8 "\\x$chrE4" does not map to Unicode .+ <F> line 1/, 336*0Sstevel@tonic-gate "<:utf8 readline must warn about bad utf8"); 337*0Sstevel@tonic-gate undef $@; 338*0Sstevel@tonic-gate $line .= <F>; 339*0Sstevel@tonic-gate like( $@, qr/utf8 "\\x$chrF6" does not map to Unicode .+ <F> line 2/, 340*0Sstevel@tonic-gate "<:utf8 rcatline must warn about bad utf8"); 341*0Sstevel@tonic-gate close F; 342*0Sstevel@tonic-gate} 343*0Sstevel@tonic-gate 344*0Sstevel@tonic-gateEND { 345*0Sstevel@tonic-gate 1 while unlink "a"; 346*0Sstevel@tonic-gate 1 while unlink "b"; 347*0Sstevel@tonic-gate} 348