1*0Sstevel@tonic-gateBEGIN { 2*0Sstevel@tonic-gate if ($ENV{'PERL_CORE'}){ 3*0Sstevel@tonic-gate chdir 't'; 4*0Sstevel@tonic-gate unshift @INC, '../lib'; 5*0Sstevel@tonic-gate } 6*0Sstevel@tonic-gate require Config; import Config; 7*0Sstevel@tonic-gate if ($Config{'extensions'} !~ /\bEncode\b/) { 8*0Sstevel@tonic-gate print "1..0 # Skip: Encode was not built\n"; 9*0Sstevel@tonic-gate exit 0; 10*0Sstevel@tonic-gate } 11*0Sstevel@tonic-gate if (ord("A") == 193) { 12*0Sstevel@tonic-gate print "1..0 # Skip: EBCDIC\n"; 13*0Sstevel@tonic-gate exit 0; 14*0Sstevel@tonic-gate } 15*0Sstevel@tonic-gate $| = 1; 16*0Sstevel@tonic-gate} 17*0Sstevel@tonic-gate 18*0Sstevel@tonic-gateuse strict; 19*0Sstevel@tonic-gateuse File::Basename; 20*0Sstevel@tonic-gateuse File::Spec; 21*0Sstevel@tonic-gateuse Encode qw(decode encode find_encoding _utf8_off); 22*0Sstevel@tonic-gate 23*0Sstevel@tonic-gate#use Test::More qw(no_plan); 24*0Sstevel@tonic-gateuse Test::More tests => 29; 25*0Sstevel@tonic-gateuse_ok("Encode::Guess"); 26*0Sstevel@tonic-gate{ 27*0Sstevel@tonic-gate no warnings; 28*0Sstevel@tonic-gate $Encode::Guess::DEBUG = shift || 0; 29*0Sstevel@tonic-gate} 30*0Sstevel@tonic-gate 31*0Sstevel@tonic-gatemy $ascii = join('' => map {chr($_)}(0x21..0x7e)); 32*0Sstevel@tonic-gatemy $latin1 = join('' => map {chr($_)}(0xa1..0xfe)); 33*0Sstevel@tonic-gatemy $utf8on = join('' => map {chr($_)}(0x3000..0x30fe)); 34*0Sstevel@tonic-gatemy $utf8off = $utf8on; _utf8_off($utf8off); 35*0Sstevel@tonic-gatemy $utf16 = encode('UTF-16', $utf8on); 36*0Sstevel@tonic-gatemy $utf32 = encode('UTF-32', $utf8on); 37*0Sstevel@tonic-gate 38*0Sstevel@tonic-gateis(guess_encoding($ascii)->name, 'ascii', 'ascii'); 39*0Sstevel@tonic-gatelike(guess_encoding($latin1), qr/No appropriate encoding/io, 'no ascii'); 40*0Sstevel@tonic-gateis(guess_encoding($latin1, 'latin1')->name, 'iso-8859-1', 'iso-8859-1'); 41*0Sstevel@tonic-gateis(guess_encoding($utf8on)->name, 'utf8', 'utf8 w/ flag'); 42*0Sstevel@tonic-gateis(guess_encoding($utf8off)->name, 'utf8', 'utf8 w/o flag'); 43*0Sstevel@tonic-gateis(guess_encoding($utf16)->name, 'UTF-16', 'UTF-16'); 44*0Sstevel@tonic-gateis(guess_encoding($utf32)->name, 'UTF-32', 'UTF-32'); 45*0Sstevel@tonic-gate 46*0Sstevel@tonic-gatemy $jisx0201 = File::Spec->catfile(dirname(__FILE__), 'jisx0201.utf'); 47*0Sstevel@tonic-gatemy $jisx0208 = File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf'); 48*0Sstevel@tonic-gatemy $jisx0212 = File::Spec->catfile(dirname(__FILE__), 'jisx0212.utf'); 49*0Sstevel@tonic-gate 50*0Sstevel@tonic-gateopen my $fh, $jisx0208 or die "$jisx0208: $!"; 51*0Sstevel@tonic-gatebinmode($fh); 52*0Sstevel@tonic-gate$utf8off = join('' => <$fh>); 53*0Sstevel@tonic-gateclose $fh; 54*0Sstevel@tonic-gate$utf8on = decode('utf8', $utf8off); 55*0Sstevel@tonic-gate 56*0Sstevel@tonic-gatemy @jp = qw(7bit-jis shiftjis euc-jp); 57*0Sstevel@tonic-gate 58*0Sstevel@tonic-gateEncode::Guess->set_suspects(@jp); 59*0Sstevel@tonic-gate 60*0Sstevel@tonic-gatefor my $jp (@jp){ 61*0Sstevel@tonic-gate my $test = encode($jp, $utf8on); 62*0Sstevel@tonic-gate is(guess_encoding($test)->name, $jp, "JP:$jp"); 63*0Sstevel@tonic-gate} 64*0Sstevel@tonic-gate 65*0Sstevel@tonic-gateis (decode('Guess', encode('euc-jp', $utf8on)), $utf8on, "decode('Guess')"); 66*0Sstevel@tonic-gateeval{ encode('Guess', $utf8on) }; 67*0Sstevel@tonic-gatelike($@, qr/not defined/io, "no encode()"); 68*0Sstevel@tonic-gate 69*0Sstevel@tonic-gatemy %CJKT = 70*0Sstevel@tonic-gate ( 71*0Sstevel@tonic-gate 'euc-cn' => File::Spec->catfile(dirname(__FILE__), 'gb2312.utf'), 72*0Sstevel@tonic-gate 'euc-jp' => File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf'), 73*0Sstevel@tonic-gate 'euc-kr' => File::Spec->catfile(dirname(__FILE__), 'ksc5601.utf'), 74*0Sstevel@tonic-gate 'big5-eten' => File::Spec->catfile(dirname(__FILE__), 'big5-eten.utf'), 75*0Sstevel@tonic-gate); 76*0Sstevel@tonic-gate 77*0Sstevel@tonic-gateEncode::Guess->set_suspects(keys %CJKT); 78*0Sstevel@tonic-gate 79*0Sstevel@tonic-gatefor my $name (keys %CJKT){ 80*0Sstevel@tonic-gate open my $fh, $CJKT{$name} or die "$CJKT{$name}: $!"; 81*0Sstevel@tonic-gate binmode($fh); 82*0Sstevel@tonic-gate $utf8off = join('' => <$fh>); 83*0Sstevel@tonic-gate close $fh; 84*0Sstevel@tonic-gate 85*0Sstevel@tonic-gate my $test = encode($name, decode('utf8', $utf8off)); 86*0Sstevel@tonic-gate is(guess_encoding($test)->name, $name, "CJKT:$name"); 87*0Sstevel@tonic-gate} 88*0Sstevel@tonic-gate 89*0Sstevel@tonic-gatemy $ambiguous = "\x{5c0f}\x{98fc}\x{5f3e}"; 90*0Sstevel@tonic-gatemy $english = "The quick brown fox jumps over the black lazy dog."; 91*0Sstevel@tonic-gatefor my $utf (qw/UTF-16 UTF-32/){ 92*0Sstevel@tonic-gate for my $bl (qw/BE LE/){ 93*0Sstevel@tonic-gate my $test = encode("$utf$bl" => $english); 94*0Sstevel@tonic-gate is(guess_encoding($test)->name, "$utf$bl", "$utf$bl"); 95*0Sstevel@tonic-gate } 96*0Sstevel@tonic-gate} 97*0Sstevel@tonic-gatefor my $bl (qw/BE LE/){ 98*0Sstevel@tonic-gate my $test = encode("UTF-16$bl" => $ambiguous); 99*0Sstevel@tonic-gate my $result = guess_encoding($test); 100*0Sstevel@tonic-gate ok(! ref($result), "UTF-16$bl:$result"); 101*0Sstevel@tonic-gate} 102*0Sstevel@tonic-gate 103*0Sstevel@tonic-gate 104*0Sstevel@tonic-gate 105*0Sstevel@tonic-gateEncode::Guess->set_suspects(); 106*0Sstevel@tonic-gatefor my $jp (@jp){ 107*0Sstevel@tonic-gate # intentionally set $1 a priori -- see Changes 108*0Sstevel@tonic-gate my $test = "English"; 109*0Sstevel@tonic-gate '$1' =~ m/^(.*)/o; 110*0Sstevel@tonic-gate is(guess_encoding($test, ($jp))->name, 'ascii', 111*0Sstevel@tonic-gate "ascii vs $jp (\$1 messed)"); 112*0Sstevel@tonic-gate $test = encode($jp, $test . "\n\x{65e5}\x{672c}\x{8a9e}"); 113*0Sstevel@tonic-gate is(guess_encoding($test, ($jp))->name, 114*0Sstevel@tonic-gate $jp, "$jp vs ascii (\$1 messed)"); 115*0Sstevel@tonic-gate} 116*0Sstevel@tonic-gate 117*0Sstevel@tonic-gate__END__; 118