1b39c5158SmillertBEGIN { 2b39c5158Smillert if ($ENV{'PERL_CORE'}){ 3b39c5158Smillert chdir 't'; 4b39c5158Smillert unshift @INC, '../lib'; 5b39c5158Smillert } 6b39c5158Smillert if (ord("A") == 193) { 7b39c5158Smillert print "1..0 # Skip: EBCDIC\n"; 8b39c5158Smillert exit 0; 9b39c5158Smillert } 10*5486feefSafresh1 require Config; Config->import(); 11b39c5158Smillert if ($Config{'extensions'} !~ /\bEncode\b/) { 12b39c5158Smillert print "1..0 # Skip: Encode was not built\n"; 13b39c5158Smillert exit 0; 14b39c5158Smillert } 15b39c5158Smillert} 16b39c5158Smillertuse strict; 175759b3d2Safresh1use Test::More; 18b39c5158Smillertuse Encode qw(from_to encode decode 19b39c5158Smillert encode_utf8 decode_utf8 20b39c5158Smillert find_encoding is_utf8); 21b39c5158Smillertuse charnames qw(greek); 22b39c5158Smillertmy @encodings = grep(/iso-?8859/,Encode::encodings()); 23b39c5158Smillertmy $n = 2; 24b39c5158Smillertmy @character_set = ('0'..'9', 'A'..'Z', 'a'..'z'); 25b39c5158Smillertmy @source = qw(ascii iso8859-1 cp1250); 26b39c5158Smillertmy @destiny = qw(cp1047 cp37 posix-bc); 27b39c5158Smillertmy @ebcdic_sets = qw(cp1047 cp37 posix-bc); 28256a93a4Safresh1plan tests => 38+$n*@encodings + 2*@source*@destiny*@character_set + 2*@ebcdic_sets*256 + 6 + 3*8; 295759b3d2Safresh1 30b39c5158Smillertmy $str = join('',map(chr($_),0x20..0x7E)); 31b39c5158Smillertmy $cpy = $str; 32b39c5158Smillert 33b39c5158Smillertmy $sym = Encode->getEncoding('symbol'); 34b39c5158Smillertmy $uni = $sym->decode(encode(ascii => 'a')); 355759b3d2Safresh1is "\N{alpha}",substr($uni,0,1),"alpha does not map to symbol 'a'"; 36b39c5158Smillert$str = $sym->encode("\N{Beta}"); 375759b3d2Safresh1is "B",decode(ascii => substr($str,0,1)),"Symbol 'B' does not map to Beta"; 38b39c5158Smillert 39b39c5158Smillertforeach my $enc (qw(symbol dingbats ascii),@encodings) 40b39c5158Smillert { 41b39c5158Smillert my $tab = Encode->getEncoding($enc); 425759b3d2Safresh1 is 1,defined($tab),"Could not load $enc"; 43b39c5158Smillert $str = join('',map(chr($_),0x20..0x7E)); 44b39c5158Smillert $uni = $tab->decode($str); 45b39c5158Smillert $cpy = $tab->encode($uni); 465759b3d2Safresh1 is $cpy,$str,"$enc mangled translating to Unicode and back"; 47b39c5158Smillert } 48b39c5158Smillert 49b39c5158Smillert# On ASCII based machines see if we can map several codepoints from 50b39c5158Smillert# three distinct ASCII sets to three distinct EBCDIC coded character sets. 51b39c5158Smillert# On EBCDIC machines see if we can map from three EBCDIC sets to three 52b39c5158Smillert# distinct ASCII sets. 53b39c5158Smillert 54b39c5158Smillertmy @expectation = (240..249, 193..201,209..217,226..233, 129..137,145..153,162..169); 55b39c5158Smillertif (ord('A') != 65) { 56b39c5158Smillert my @temp = @destiny; 57b39c5158Smillert @destiny = @source; 58b39c5158Smillert @source = @temp; 59b39c5158Smillert undef(@temp); 60b39c5158Smillert @expectation = (48..57, 65..90, 97..122); 61b39c5158Smillert} 62b39c5158Smillert 63b39c5158Smillertforeach my $to (@destiny) 64b39c5158Smillert { 65b39c5158Smillert foreach my $from (@source) 66b39c5158Smillert { 67b39c5158Smillert my @expected = @expectation; 68b39c5158Smillert foreach my $chr (@character_set) 69b39c5158Smillert { 70b39c5158Smillert my $native_chr = $chr; 71b39c5158Smillert my $cpy = $chr; 72b39c5158Smillert my $rc = from_to($cpy,$from,$to); 735759b3d2Safresh1 is 1,$rc,"Could not translate from $from to $to"; 745759b3d2Safresh1 is ord($cpy),shift(@expected),"mangled translating $native_chr from $from to $to"; 75b39c5158Smillert } 76b39c5158Smillert } 77b39c5158Smillert } 78b39c5158Smillert 79b39c5158Smillert# On either ASCII or EBCDIC machines ensure we can take the full one 80b39c5158Smillert# byte repetoire to EBCDIC sets and back. 81b39c5158Smillert 82b39c5158Smillertmy $enc_as = 'iso8859-1'; 83b39c5158Smillertforeach my $enc_eb (@ebcdic_sets) 84b39c5158Smillert { 85b39c5158Smillert foreach my $ord (0..255) 86b39c5158Smillert { 87b39c5158Smillert $str = chr($ord); 88b39c5158Smillert my $rc = from_to($str,$enc_as,$enc_eb); 89b39c5158Smillert $rc += from_to($str,$enc_eb,$enc_as); 905759b3d2Safresh1 is $rc,2,"return code for $ord $enc_eb -> $enc_as -> $enc_eb was not obtained"; 915759b3d2Safresh1 is $ord,ord($str),"$enc_as mangled translating $ord to $enc_eb and back"; 92b39c5158Smillert } 93b39c5158Smillert } 94b39c5158Smillert 95b39c5158Smillertmy $mime = find_encoding('iso-8859-2'); 965759b3d2Safresh1is defined($mime),1,"Cannot find MIME-ish'iso-8859-2'"; 97b39c5158Smillertmy $x11 = find_encoding('iso8859-2'); 985759b3d2Safresh1is defined($x11),1,"Cannot find X11-ish 'iso8859-2'"; 995759b3d2Safresh1is $mime,$x11,"iso8598-2 and iso-8859-2 not same"; 100b39c5158Smillertmy $spc = find_encoding('iso 8859-2'); 1015759b3d2Safresh1is defined($spc),1,"Cannot find 'iso 8859-2'"; 1025759b3d2Safresh1is $spc,$mime,"iso 8859-2 and iso-8859-2 not same"; 103b39c5158Smillert 104b39c5158Smillertfor my $i (256,128,129,256) 105b39c5158Smillert { 106b39c5158Smillert my $c = chr($i); 107b39c5158Smillert my $s = "$c\n".sprintf("%02X",$i); 1085759b3d2Safresh1 is utf8::valid($s),1,"concat of $i botched"; 109b39c5158Smillert utf8::upgrade($s); 1105759b3d2Safresh1 is utf8::valid($s),1,"concat of $i botched"; 111b39c5158Smillert } 112b39c5158Smillert 113b39c5158Smillert# Spot check a few points in/out of utf8 114b39c5158Smillertfor my $i (ord('A'),128,256,0x20AC) 115b39c5158Smillert { 116b39c5158Smillert my $c = chr($i); 117b39c5158Smillert my $o = encode_utf8($c); 1185759b3d2Safresh1 is decode_utf8($o),$c,"decode_utf8 not inverse of encode_utf8 for $i"; 1195759b3d2Safresh1 is encode('utf8',$c),$o,"utf8 encode by name broken for $i"; 1205759b3d2Safresh1 is decode('utf8',$o),$c,"utf8 decode by name broken for $i"; 121b39c5158Smillert } 122b39c5158Smillert 123b39c5158Smillert 124b39c5158Smillert# is_utf8 125b39c5158Smillert 126b39c5158Smillertok( is_utf8("\x{100}")); 127b39c5158Smillertok(! is_utf8("a")); 128b39c5158Smillertok(! is_utf8("")); 129b39c5158Smillert"\x{100}" =~ /(.)/; 130b39c5158Smillertok( is_utf8($1)); # ID 20011127.151 131b39c5158Smillert$a = $1; 132b39c5158Smillertok( is_utf8($a)); 133b39c5158Smillert$a = "\x{100}"; 134b39c5158Smillertchop $a; 135b39c5158Smillertok( is_utf8($a)); # weird but true: an empty UTF-8 string 136b39c5158Smillert 137b39c5158Smillert# non-string arguments 138b39c5158Smillertpackage Encode::Dummy; 139b39c5158Smillertuse overload q("") => sub { $_[0]->[0] }; 140b39c5158Smillertsub new { my $class = shift; bless [ @_ ] => $class } 141b39c5158Smillertpackage main; 142b39c5158Smillertok(decode(latin1 => Encode::Dummy->new("foobar")), "foobar"); 143b39c5158Smillertok(encode(utf8 => Encode::Dummy->new("foobar")), "foobar"); 14491f110e0Safresh1 1456fb12b70Safresh1# RT#91569 1466fb12b70Safresh1# decode_utf8 with non-string arguments 1476fb12b70Safresh1ok(decode_utf8(*1), "*main::1"); 1486fb12b70Safresh1 14991f110e0Safresh1# hash keys 1505759b3d2Safresh1foreach my $name ("UTF-16LE", "UTF-8", "Latin1") { 1515759b3d2Safresh1 my $key = (keys %{{ "whatever\x{CA}" => '' }})[0]; 15291f110e0Safresh1 my $kopy = $key; 1535759b3d2Safresh1 encode($name, $kopy, Encode::FB_CROAK); 1545759b3d2Safresh1 is $key, "whatever\x{CA}", "encode $name with shared hash key scalars"; 1555759b3d2Safresh1 undef $key; 1565759b3d2Safresh1 $key = (keys %{{ "whatever\x{CA}" => '' }})[0]; 1575759b3d2Safresh1 $kopy = $key; 1585759b3d2Safresh1 encode($name, $kopy, Encode::FB_CROAK | Encode::LEAVE_SRC); 1595759b3d2Safresh1 is $key, "whatever\x{CA}", "encode $name with LEAVE_SRC and shared hash key scalars"; 16091f110e0Safresh1 undef $key; 16191f110e0Safresh1 $key = (keys %{{ "whatever" => '' }})[0]; 16291f110e0Safresh1 $kopy = $key; 1635759b3d2Safresh1 decode($name, $kopy, Encode::FB_CROAK); 1645759b3d2Safresh1 is $key, "whatever", "decode $name with shared hash key scalars"; 1655759b3d2Safresh1 undef $key; 1665759b3d2Safresh1 $key = (keys %{{ "whatever" => '' }})[0]; 1675759b3d2Safresh1 $kopy = $key; 1685759b3d2Safresh1 decode($name, $kopy, Encode::FB_CROAK | Encode::LEAVE_SRC); 1695759b3d2Safresh1 is $key, "whatever", "decode $name with LEAVE_SRC and shared hash key scalars"; 1705759b3d2Safresh1 1715759b3d2Safresh1 my $enc = find_encoding($name); 1725759b3d2Safresh1 undef $key; 1735759b3d2Safresh1 $key = (keys %{{ "whatever\x{CA}" => '' }})[0]; 1745759b3d2Safresh1 $kopy = $key; 1755759b3d2Safresh1 $enc->encode($kopy, Encode::FB_CROAK); 1765759b3d2Safresh1 is $key, "whatever\x{CA}", "encode obj $name with shared hash key scalars"; 1775759b3d2Safresh1 undef $key; 1785759b3d2Safresh1 $key = (keys %{{ "whatever\x{CA}" => '' }})[0]; 1795759b3d2Safresh1 $kopy = $key; 1805759b3d2Safresh1 $enc->encode($kopy, Encode::FB_CROAK | Encode::LEAVE_SRC); 1815759b3d2Safresh1 is $key, "whatever\x{CA}", "encode obj $name with LEAVE_SRC and shared hash key scalars"; 1825759b3d2Safresh1 undef $key; 1835759b3d2Safresh1 $key = (keys %{{ "whatever" => '' }})[0]; 1845759b3d2Safresh1 $kopy = $key; 1855759b3d2Safresh1 $enc->decode($kopy, Encode::FB_CROAK); 1865759b3d2Safresh1 is $key, "whatever", "decode obj $name with shared hash key scalars"; 1875759b3d2Safresh1 undef $key; 1885759b3d2Safresh1 $key = (keys %{{ "whatever" => '' }})[0]; 1895759b3d2Safresh1 $kopy = $key; 1905759b3d2Safresh1 $enc->decode($kopy, Encode::FB_CROAK | Encode::LEAVE_SRC); 1915759b3d2Safresh1 is $key, "whatever", "decode obj $name with LEAVE_SRC and shared hash key scalars"; 1925759b3d2Safresh1} 1935759b3d2Safresh1 1945759b3d2Safresh1my $latin1 = find_encoding('latin1'); 1955759b3d2Safresh1my $orig = "\316"; 1965759b3d2Safresh1$orig =~ /(.)/; 1975759b3d2Safresh1is $latin1->encode($1), $orig, '[cpan #115168] passing magic regex globals to encode'; 1985759b3d2Safresh1SKIP: { 1995759b3d2Safresh1 skip "Perl Version ($]) is older than v5.16", 1 if $] < 5.016; 2005759b3d2Safresh1 *a = $orig; 2015759b3d2Safresh1 is $latin1->encode(*a), '*main::'.$orig, '[cpan #115168] passing typeglobs to encode'; 2025759b3d2Safresh1} 203