xref: /openbsd-src/gnu/usr.bin/perl/cpan/Encode/t/guess.t (revision 4c1e55dc91edd6e69ccc60ce855900fbc12cf34f)
1BEGIN {
2    require Config; import Config;
3    if ($Config{'extensions'} !~ /\bEncode\b/) {
4      print "1..0 # Skip: Encode was not built\n";
5      exit 0;
6    }
7    if (ord("A") == 193) {
8    print "1..0 # Skip: EBCDIC\n";
9    exit 0;
10    }
11    $| = 1;
12}
13
14use strict;
15use File::Basename;
16use File::Spec;
17use Encode qw(decode encode find_encoding _utf8_off);
18
19#use Test::More qw(no_plan);
20use Test::More tests => 30;
21use_ok("Encode::Guess");
22{
23    no warnings;
24    $Encode::Guess::DEBUG = shift || 0;
25}
26
27my $ascii  = join('' => map {chr($_)}(0x21..0x7e));
28my $latin1 = join('' => map {chr($_)}(0xa1..0xfe));
29my $utf8on  = join('' => map {chr($_)}(0x3000..0x30fe));
30my $utf8off = $utf8on; _utf8_off($utf8off);
31my $utf16 = encode('UTF-16', $utf8on);
32my $utf32 = encode('UTF-32', $utf8on);
33
34like(guess_encoding(''), qr/empty string/io, 'empty string');
35is(guess_encoding($ascii)->name, 'ascii', 'ascii');
36like(guess_encoding($latin1), qr/No appropriate encoding/io, 'no ascii');
37is(guess_encoding($latin1, 'latin1')->name, 'iso-8859-1', 'iso-8859-1');
38is(guess_encoding($utf8on)->name, 'utf8', 'utf8 w/ flag');
39is(guess_encoding($utf8off)->name, 'utf8', 'utf8 w/o flag');
40is(guess_encoding($utf16)->name, 'UTF-16', 'UTF-16');
41is(guess_encoding($utf32)->name, 'UTF-32', 'UTF-32');
42
43my $jisx0201 = File::Spec->catfile(dirname(__FILE__), 'jisx0201.utf');
44my $jisx0208 = File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf');
45my $jisx0212 = File::Spec->catfile(dirname(__FILE__), 'jisx0212.utf');
46
47open my $fh, $jisx0208 or die "$jisx0208: $!";
48binmode($fh);
49$utf8off = join('' => <$fh>);
50close $fh;
51$utf8on = decode('utf8', $utf8off);
52
53my @jp = qw(7bit-jis shiftjis euc-jp);
54
55Encode::Guess->set_suspects(@jp);
56
57for my $jp (@jp){
58    my $test = encode($jp, $utf8on);
59    is(guess_encoding($test)->name, $jp, "JP:$jp");
60}
61
62is (decode('Guess', encode('euc-jp', $utf8on)), $utf8on, "decode('Guess')");
63eval{ encode('Guess', $utf8on) };
64like($@, qr/not defined/io, "no encode()");
65
66my %CJKT =
67    (
68     'euc-cn'    => File::Spec->catfile(dirname(__FILE__), 'gb2312.utf'),
69     'euc-jp'    => File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf'),
70     'euc-kr'    => File::Spec->catfile(dirname(__FILE__), 'ksc5601.utf'),
71     'big5-eten' => File::Spec->catfile(dirname(__FILE__), 'big5-eten.utf'),
72);
73
74Encode::Guess->set_suspects(keys %CJKT);
75
76for my $name (keys %CJKT){
77    open my $fh, $CJKT{$name} or die "$CJKT{$name}: $!";
78    binmode($fh);
79    $utf8off = join('' => <$fh>);
80    close $fh;
81
82    my $test = encode($name, decode('utf8', $utf8off));
83    is(guess_encoding($test)->name, $name, "CJKT:$name");
84}
85
86my $ambiguous =  "\x{5c0f}\x{98fc}\x{5f3e}";
87my $english   = "The quick brown fox jumps over the black lazy dog.";
88for my $utf (qw/UTF-16 UTF-32/){
89    for my $bl (qw/BE LE/){
90    my $test = encode("$utf$bl" => $english);
91    is(guess_encoding($test)->name, "$utf$bl", "$utf$bl");
92    }
93}
94for my $bl (qw/BE LE/){
95    my $test = encode("UTF-16$bl" => $ambiguous);
96    my $result = guess_encoding($test);
97    ok(! ref($result), "UTF-16$bl:$result");
98}
99
100
101
102Encode::Guess->set_suspects();
103for my $jp (@jp){
104    # intentionally set $1 a priori -- see Changes
105    my $test = "English";
106    '$1' =~ m/^(.*)/o;
107    is(guess_encoding($test, ($jp))->name, 'ascii',
108       "ascii vs $jp (\$1 messed)");
109    $test = encode($jp, $test . "\n\x{65e5}\x{672c}\x{8a9e}");
110    is(guess_encoding($test, ($jp))->name,
111       $jp, "$jp vs ascii (\$1 messed)");
112}
113
114__END__;
115