xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/ext/Encode/t/guess.t (revision 0:68f95e015346)
1*0Sstevel@tonic-gateBEGIN {
2*0Sstevel@tonic-gate    if ($ENV{'PERL_CORE'}){
3*0Sstevel@tonic-gate        chdir 't';
4*0Sstevel@tonic-gate        unshift @INC, '../lib';
5*0Sstevel@tonic-gate    }
6*0Sstevel@tonic-gate    require Config; import Config;
7*0Sstevel@tonic-gate    if ($Config{'extensions'} !~ /\bEncode\b/) {
8*0Sstevel@tonic-gate      print "1..0 # Skip: Encode was not built\n";
9*0Sstevel@tonic-gate      exit 0;
10*0Sstevel@tonic-gate    }
11*0Sstevel@tonic-gate    if (ord("A") == 193) {
12*0Sstevel@tonic-gate	print "1..0 # Skip: EBCDIC\n";
13*0Sstevel@tonic-gate	exit 0;
14*0Sstevel@tonic-gate    }
15*0Sstevel@tonic-gate    $| = 1;
16*0Sstevel@tonic-gate}
17*0Sstevel@tonic-gate
18*0Sstevel@tonic-gateuse strict;
19*0Sstevel@tonic-gateuse File::Basename;
20*0Sstevel@tonic-gateuse File::Spec;
21*0Sstevel@tonic-gateuse Encode qw(decode encode find_encoding _utf8_off);
22*0Sstevel@tonic-gate
23*0Sstevel@tonic-gate#use Test::More qw(no_plan);
24*0Sstevel@tonic-gateuse Test::More tests => 29;
25*0Sstevel@tonic-gateuse_ok("Encode::Guess");
26*0Sstevel@tonic-gate{
27*0Sstevel@tonic-gate    no warnings;
28*0Sstevel@tonic-gate    $Encode::Guess::DEBUG = shift || 0;
29*0Sstevel@tonic-gate}
30*0Sstevel@tonic-gate
31*0Sstevel@tonic-gatemy $ascii  = join('' => map {chr($_)}(0x21..0x7e));
32*0Sstevel@tonic-gatemy $latin1 = join('' => map {chr($_)}(0xa1..0xfe));
33*0Sstevel@tonic-gatemy $utf8on  = join('' => map {chr($_)}(0x3000..0x30fe));
34*0Sstevel@tonic-gatemy $utf8off = $utf8on; _utf8_off($utf8off);
35*0Sstevel@tonic-gatemy $utf16 = encode('UTF-16', $utf8on);
36*0Sstevel@tonic-gatemy $utf32 = encode('UTF-32', $utf8on);
37*0Sstevel@tonic-gate
38*0Sstevel@tonic-gateis(guess_encoding($ascii)->name, 'ascii', 'ascii');
39*0Sstevel@tonic-gatelike(guess_encoding($latin1), qr/No appropriate encoding/io, 'no ascii');
40*0Sstevel@tonic-gateis(guess_encoding($latin1, 'latin1')->name, 'iso-8859-1', 'iso-8859-1');
41*0Sstevel@tonic-gateis(guess_encoding($utf8on)->name, 'utf8', 'utf8 w/ flag');
42*0Sstevel@tonic-gateis(guess_encoding($utf8off)->name, 'utf8', 'utf8 w/o flag');
43*0Sstevel@tonic-gateis(guess_encoding($utf16)->name, 'UTF-16', 'UTF-16');
44*0Sstevel@tonic-gateis(guess_encoding($utf32)->name, 'UTF-32', 'UTF-32');
45*0Sstevel@tonic-gate
46*0Sstevel@tonic-gatemy $jisx0201 = File::Spec->catfile(dirname(__FILE__), 'jisx0201.utf');
47*0Sstevel@tonic-gatemy $jisx0208 = File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf');
48*0Sstevel@tonic-gatemy $jisx0212 = File::Spec->catfile(dirname(__FILE__), 'jisx0212.utf');
49*0Sstevel@tonic-gate
50*0Sstevel@tonic-gateopen my $fh, $jisx0208 or die "$jisx0208: $!";
51*0Sstevel@tonic-gatebinmode($fh);
52*0Sstevel@tonic-gate$utf8off = join('' => <$fh>);
53*0Sstevel@tonic-gateclose $fh;
54*0Sstevel@tonic-gate$utf8on = decode('utf8', $utf8off);
55*0Sstevel@tonic-gate
56*0Sstevel@tonic-gatemy @jp = qw(7bit-jis shiftjis euc-jp);
57*0Sstevel@tonic-gate
58*0Sstevel@tonic-gateEncode::Guess->set_suspects(@jp);
59*0Sstevel@tonic-gate
60*0Sstevel@tonic-gatefor my $jp (@jp){
61*0Sstevel@tonic-gate    my $test = encode($jp, $utf8on);
62*0Sstevel@tonic-gate    is(guess_encoding($test)->name, $jp, "JP:$jp");
63*0Sstevel@tonic-gate}
64*0Sstevel@tonic-gate
65*0Sstevel@tonic-gateis (decode('Guess', encode('euc-jp', $utf8on)), $utf8on, "decode('Guess')");
66*0Sstevel@tonic-gateeval{ encode('Guess', $utf8on) };
67*0Sstevel@tonic-gatelike($@, qr/not defined/io, "no encode()");
68*0Sstevel@tonic-gate
69*0Sstevel@tonic-gatemy %CJKT =
70*0Sstevel@tonic-gate    (
71*0Sstevel@tonic-gate     'euc-cn'    => File::Spec->catfile(dirname(__FILE__), 'gb2312.utf'),
72*0Sstevel@tonic-gate     'euc-jp'    => File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf'),
73*0Sstevel@tonic-gate     'euc-kr'    => File::Spec->catfile(dirname(__FILE__), 'ksc5601.utf'),
74*0Sstevel@tonic-gate     'big5-eten' => File::Spec->catfile(dirname(__FILE__), 'big5-eten.utf'),
75*0Sstevel@tonic-gate);
76*0Sstevel@tonic-gate
77*0Sstevel@tonic-gateEncode::Guess->set_suspects(keys %CJKT);
78*0Sstevel@tonic-gate
79*0Sstevel@tonic-gatefor my $name (keys %CJKT){
80*0Sstevel@tonic-gate    open my $fh, $CJKT{$name} or die "$CJKT{$name}: $!";
81*0Sstevel@tonic-gate    binmode($fh);
82*0Sstevel@tonic-gate    $utf8off = join('' => <$fh>);
83*0Sstevel@tonic-gate    close $fh;
84*0Sstevel@tonic-gate
85*0Sstevel@tonic-gate    my $test = encode($name, decode('utf8', $utf8off));
86*0Sstevel@tonic-gate    is(guess_encoding($test)->name, $name, "CJKT:$name");
87*0Sstevel@tonic-gate}
88*0Sstevel@tonic-gate
89*0Sstevel@tonic-gatemy $ambiguous =  "\x{5c0f}\x{98fc}\x{5f3e}";
90*0Sstevel@tonic-gatemy $english   = "The quick brown fox jumps over the black lazy dog.";
91*0Sstevel@tonic-gatefor my $utf (qw/UTF-16 UTF-32/){
92*0Sstevel@tonic-gate    for my $bl (qw/BE LE/){
93*0Sstevel@tonic-gate	my $test = encode("$utf$bl" => $english);
94*0Sstevel@tonic-gate	is(guess_encoding($test)->name, "$utf$bl", "$utf$bl");
95*0Sstevel@tonic-gate    }
96*0Sstevel@tonic-gate}
97*0Sstevel@tonic-gatefor my $bl (qw/BE LE/){
98*0Sstevel@tonic-gate    my $test = encode("UTF-16$bl" => $ambiguous);
99*0Sstevel@tonic-gate    my $result = guess_encoding($test);
100*0Sstevel@tonic-gate    ok(! ref($result), "UTF-16$bl:$result");
101*0Sstevel@tonic-gate}
102*0Sstevel@tonic-gate
103*0Sstevel@tonic-gate
104*0Sstevel@tonic-gate
105*0Sstevel@tonic-gateEncode::Guess->set_suspects();
106*0Sstevel@tonic-gatefor my $jp (@jp){
107*0Sstevel@tonic-gate    # intentionally set $1 a priori -- see Changes
108*0Sstevel@tonic-gate    my $test = "English";
109*0Sstevel@tonic-gate    '$1' =~ m/^(.*)/o;
110*0Sstevel@tonic-gate    is(guess_encoding($test, ($jp))->name, 'ascii',
111*0Sstevel@tonic-gate       "ascii vs $jp (\$1 messed)");
112*0Sstevel@tonic-gate    $test = encode($jp, $test . "\n\x{65e5}\x{672c}\x{8a9e}");
113*0Sstevel@tonic-gate    is(guess_encoding($test, ($jp))->name,
114*0Sstevel@tonic-gate       $jp, "$jp vs ascii (\$1 messed)");
115*0Sstevel@tonic-gate}
116*0Sstevel@tonic-gate
117*0Sstevel@tonic-gate__END__;
118