1BEGIN { 2 if ($ENV{'PERL_CORE'}){ 3 chdir 't'; 4 unshift @INC, '../lib'; 5 } 6 require Config; import Config; 7 if ($Config{'extensions'} !~ /\bEncode\b/) { 8 print "1..0 # Skip: Encode was not built\n"; 9 exit 0; 10 } 11 if (ord("A") == 193) { 12 print "1..0 # Skip: EBCDIC\n"; 13 exit 0; 14 } 15 $| = 1; 16} 17 18use strict; 19use File::Basename; 20use File::Spec; 21use Encode qw(decode encode find_encoding _utf8_off); 22 23#use Test::More qw(no_plan); 24use Test::More tests => 29; 25use_ok("Encode::Guess"); 26{ 27 no warnings; 28 $Encode::Guess::DEBUG = shift || 0; 29} 30 31my $ascii = join('' => map {chr($_)}(0x21..0x7e)); 32my $latin1 = join('' => map {chr($_)}(0xa1..0xfe)); 33my $utf8on = join('' => map {chr($_)}(0x3000..0x30fe)); 34my $utf8off = $utf8on; _utf8_off($utf8off); 35my $utf16 = encode('UTF-16', $utf8on); 36my $utf32 = encode('UTF-32', $utf8on); 37 38is(guess_encoding($ascii)->name, 'ascii', 'ascii'); 39like(guess_encoding($latin1), qr/No appropriate encoding/io, 'no ascii'); 40is(guess_encoding($latin1, 'latin1')->name, 'iso-8859-1', 'iso-8859-1'); 41is(guess_encoding($utf8on)->name, 'utf8', 'utf8 w/ flag'); 42is(guess_encoding($utf8off)->name, 'utf8', 'utf8 w/o flag'); 43is(guess_encoding($utf16)->name, 'UTF-16', 'UTF-16'); 44is(guess_encoding($utf32)->name, 'UTF-32', 'UTF-32'); 45 46my $jisx0201 = File::Spec->catfile(dirname(__FILE__), 'jisx0201.utf'); 47my $jisx0208 = File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf'); 48my $jisx0212 = File::Spec->catfile(dirname(__FILE__), 'jisx0212.utf'); 49 50open my $fh, $jisx0208 or die "$jisx0208: $!"; 51binmode($fh); 52$utf8off = join('' => <$fh>); 53close $fh; 54$utf8on = decode('utf8', $utf8off); 55 56my @jp = qw(7bit-jis shiftjis euc-jp); 57 58Encode::Guess->set_suspects(@jp); 59 60for my $jp (@jp){ 61 my $test = encode($jp, $utf8on); 62 is(guess_encoding($test)->name, $jp, "JP:$jp"); 63} 64 65is (decode('Guess', encode('euc-jp', $utf8on)), $utf8on, "decode('Guess')"); 66eval{ encode('Guess', $utf8on) }; 67like($@, qr/not defined/io, "no encode()"); 68 69my %CJKT = 70 ( 71 'euc-cn' => File::Spec->catfile(dirname(__FILE__), 'gb2312.utf'), 72 'euc-jp' => File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf'), 73 'euc-kr' => File::Spec->catfile(dirname(__FILE__), 'ksc5601.utf'), 74 'big5-eten' => File::Spec->catfile(dirname(__FILE__), 'big5-eten.utf'), 75); 76 77Encode::Guess->set_suspects(keys %CJKT); 78 79for my $name (keys %CJKT){ 80 open my $fh, $CJKT{$name} or die "$CJKT{$name}: $!"; 81 binmode($fh); 82 $utf8off = join('' => <$fh>); 83 close $fh; 84 85 my $test = encode($name, decode('utf8', $utf8off)); 86 is(guess_encoding($test)->name, $name, "CJKT:$name"); 87} 88 89my $ambiguous = "\x{5c0f}\x{98fc}\x{5f3e}"; 90my $english = "The quick brown fox jumps over the black lazy dog."; 91for my $utf (qw/UTF-16 UTF-32/){ 92 for my $bl (qw/BE LE/){ 93 my $test = encode("$utf$bl" => $english); 94 is(guess_encoding($test)->name, "$utf$bl", "$utf$bl"); 95 } 96} 97for my $bl (qw/BE LE/){ 98 my $test = encode("UTF-16$bl" => $ambiguous); 99 my $result = guess_encoding($test); 100 ok(! ref($result), "UTF-16$bl:$result"); 101} 102 103 104 105Encode::Guess->set_suspects(); 106for my $jp (@jp){ 107 # intentionally set $1 a priori -- see Changes 108 my $test = "English"; 109 '$1' =~ m/^(.*)/o; 110 is(guess_encoding($test, ($jp))->name, 'ascii', 111 "ascii vs $jp (\$1 messed)"); 112 $test = encode($jp, $test . "\n\x{65e5}\x{672c}\x{8a9e}"); 113 is(guess_encoding($test, ($jp))->name, 114 $jp, "$jp vs ascii (\$1 messed)"); 115} 116 117__END__; 118