1#!./perl 2# $Id: piconv,v 1.27 2003/06/18 09:29:02 dankogai Exp $ 3# 4use 5.8.0; 5use strict; 6use Encode ; 7use Encode::Alias; 8my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio); 9 10use File::Basename; 11my $name = basename($0); 12 13use Getopt::Long qw(:config no_ignore_case); 14 15my %Opt; 16 17help() 18 unless 19 GetOptions(\%Opt, 20 'from|f=s', 21 'to|t=s', 22 'list|l', 23 'string|s=s', 24 'check|C=i', 25 'c', 26 'perlqq|p', 27 'debug|D', 28 'scheme|S=s', 29 'resolve|r=s', 30 'help', 31 ); 32 33$Opt{help} and help(); 34$Opt{list} and list_encodings(); 35my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG}; 36defined $Opt{resolve} and resolve_encoding($Opt{resolve}); 37$Opt{from} || $Opt{to} || help(); 38my $from = $Opt{from} || $locale or help("from_encoding unspecified"); 39my $to = $Opt{to} || $locale or help("to_encoding unspecified"); 40$Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit; 41my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} : 'from_to'; 42$Opt{check} ||= $Opt{c}; 43$Opt{perlqq} and $Opt{check} = Encode::FB_PERLQQ; 44 45if ($Opt{debug}){ 46 my $cfrom = Encode->getEncoding($from)->name; 47 my $cto = Encode->getEncoding($to)->name; 48 print <<"EOT"; 49Scheme: $scheme 50From: $from => $cfrom 51To: $to => $cto 52EOT 53} 54 55# default 56if ($scheme eq 'from_to'){ 57 while(<>){ 58 Encode::from_to($_, $from, $to, $Opt{check}); print; 59 }; 60# step-by-step 61}elsif ($scheme eq 'decode_encode'){ 62 while(<>){ 63 my $decoded = decode($from, $_, $Opt{check}); 64 my $encoded = encode($to, $decoded); 65 print $encoded; 66 }; 67# NI-S favorite 68}elsif ($scheme eq 'perlio'){ 69 binmode(STDIN, ":encoding($from)"); 70 binmode(STDOUT, ":encoding($to)"); 71 while(<>){ print; } 72} else { # won't reach 73 die "$name: unknown scheme: $scheme"; 74} 75 76sub list_encodings{ 77 print join("\n", Encode->encodings(":all")), "\n"; 78 exit 0; 79} 80 81sub resolve_encoding { 82 if (my $alias = Encode::resolve_alias($_[0])) { 83 print $alias, "\n"; 84 exit 0; 85 } else { 86 warn "$name: $_[0] is not known to Encode\n"; 87 exit 1; 88 } 89} 90 91sub help{ 92 my $message = shift; 93 $message and print STDERR "$name error: $message\n"; 94 print STDERR <<"EOT"; 95$name [-f from_encoding] [-t to_encoding] [-s string] [files...] 96$name -l 97$name -r encoding_alias 98 -l,--list 99 lists all available encodings 100 -r,--resolve encoding_alias 101 resolve encoding to its (Encode) canonical name 102 -f,--from from_encoding 103 when omitted, the current locale will be used 104 -t,--to to_encoding 105 when omitted, the current locale will be used 106 -s,--string string 107 "string" will be the input instead of STDIN or files 108The following are mainly of interest to Encode hackers: 109 -D,--debug show debug information 110 -C N | -c | -p check the validity of the input 111 -S,--scheme scheme use the scheme for conversion 112EOT 113 exit; 114} 115 116__END__ 117 118=head1 NAME 119 120piconv -- iconv(1), reinvented in perl 121 122=head1 SYNOPSIS 123 124 piconv [-f from_encoding] [-t to_encoding] [-s string] [files...] 125 piconv -l 126 piconv [-C N|-c|-p] 127 piconv -S scheme ... 128 piconv -r encoding 129 piconv -D ... 130 piconv -h 131 132=head1 DESCRIPTION 133 134B<piconv> is perl version of B<iconv>, a character encoding converter 135widely available for various Unixen today. This script was primarily 136a technology demonstrator for Perl 5.8.0, but you can use piconv in the 137place of iconv for virtually any case. 138 139piconv converts the character encoding of either STDIN or files 140specified in the argument and prints out to STDOUT. 141 142Here is the list of options. Each option can be in short format (-f) 143or long (--from). 144 145=over 4 146 147=item -f,--from from_encoding 148 149Specifies the encoding you are converting from. Unlike B<iconv>, 150this option can be omitted. In such cases, the current locale is used. 151 152=item -t,--to to_encoding 153 154Specifies the encoding you are converting to. Unlike B<iconv>, 155this option can be omitted. In such cases, the current locale is used. 156 157Therefore, when both -f and -t are omitted, B<piconv> just acts 158like B<cat>. 159 160=item -s,--string I<string> 161 162uses I<string> instead of file for the source of text. 163 164=item -l,--list 165 166Lists all available encodings, one per line, in case-insensitive 167order. Note that only the canonical names are listed; many aliases 168exist. For example, the names are case-insensitive, and many standard 169and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850" 170instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported> 171for a full discussion. 172 173=item -C,--check I<N> 174 175Check the validity of the stream if I<N> = 1. When I<N> = -1, something 176interesting happens when it encounters an invalid character. 177 178=item -c 179 180Same as C<-C 1>. 181 182=item -p,--perlqq 183 184Same as C<-C -1>. 185 186=item -h,--help 187 188Show usage. 189 190=item -D,--debug 191 192Invokes debugging mode. Primarily for Encode hackers. 193 194=item -S,--scheme scheme 195 196Selects which scheme is to be used for conversion. Available schemes 197are as follows: 198 199=over 4 200 201=item from_to 202 203Uses Encode::from_to for conversion. This is the default. 204 205=item decode_encode 206 207Input strings are decode()d then encode()d. A straight two-step 208implementation. 209 210=item perlio 211 212The new perlIO layer is used. NI-S' favorite. 213 214=back 215 216Like the I<-D> option, this is also for Encode hackers. 217 218=back 219 220=head1 SEE ALSO 221 222L<iconv/1> 223L<locale/3> 224L<Encode> 225L<Encode::Supported> 226L<Encode::Alias> 227L<PerlIO> 228 229=cut 230