1*0Sstevel@tonic-gate#!./perl 2*0Sstevel@tonic-gate# $Id: piconv,v 1.27 2003/06/18 09:29:02 dankogai Exp $ 3*0Sstevel@tonic-gate# 4*0Sstevel@tonic-gateuse 5.8.0; 5*0Sstevel@tonic-gateuse strict; 6*0Sstevel@tonic-gateuse Encode ; 7*0Sstevel@tonic-gateuse Encode::Alias; 8*0Sstevel@tonic-gatemy %Scheme = map {$_ => 1} qw(from_to decode_encode perlio); 9*0Sstevel@tonic-gate 10*0Sstevel@tonic-gateuse File::Basename; 11*0Sstevel@tonic-gatemy $name = basename($0); 12*0Sstevel@tonic-gate 13*0Sstevel@tonic-gateuse Getopt::Long qw(:config no_ignore_case); 14*0Sstevel@tonic-gate 15*0Sstevel@tonic-gatemy %Opt; 16*0Sstevel@tonic-gate 17*0Sstevel@tonic-gatehelp() 18*0Sstevel@tonic-gate unless 19*0Sstevel@tonic-gate GetOptions(\%Opt, 20*0Sstevel@tonic-gate 'from|f=s', 21*0Sstevel@tonic-gate 'to|t=s', 22*0Sstevel@tonic-gate 'list|l', 23*0Sstevel@tonic-gate 'string|s=s', 24*0Sstevel@tonic-gate 'check|C=i', 25*0Sstevel@tonic-gate 'c', 26*0Sstevel@tonic-gate 'perlqq|p', 27*0Sstevel@tonic-gate 'debug|D', 28*0Sstevel@tonic-gate 'scheme|S=s', 29*0Sstevel@tonic-gate 'resolve|r=s', 30*0Sstevel@tonic-gate 'help', 31*0Sstevel@tonic-gate ); 32*0Sstevel@tonic-gate 33*0Sstevel@tonic-gate$Opt{help} and help(); 34*0Sstevel@tonic-gate$Opt{list} and list_encodings(); 35*0Sstevel@tonic-gatemy $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG}; 36*0Sstevel@tonic-gatedefined $Opt{resolve} and resolve_encoding($Opt{resolve}); 37*0Sstevel@tonic-gate$Opt{from} || $Opt{to} || help(); 38*0Sstevel@tonic-gatemy $from = $Opt{from} || $locale or help("from_encoding unspecified"); 39*0Sstevel@tonic-gatemy $to = $Opt{to} || $locale or help("to_encoding unspecified"); 40*0Sstevel@tonic-gate$Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit; 41*0Sstevel@tonic-gatemy $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} : 'from_to'; 42*0Sstevel@tonic-gate$Opt{check} ||= $Opt{c}; 43*0Sstevel@tonic-gate$Opt{perlqq} and $Opt{check} = Encode::FB_PERLQQ; 44*0Sstevel@tonic-gate 45*0Sstevel@tonic-gateif ($Opt{debug}){ 46*0Sstevel@tonic-gate my $cfrom = Encode->getEncoding($from)->name; 47*0Sstevel@tonic-gate my $cto = Encode->getEncoding($to)->name; 48*0Sstevel@tonic-gate print <<"EOT"; 49*0Sstevel@tonic-gateScheme: $scheme 50*0Sstevel@tonic-gateFrom: $from => $cfrom 51*0Sstevel@tonic-gateTo: $to => $cto 52*0Sstevel@tonic-gateEOT 53*0Sstevel@tonic-gate} 54*0Sstevel@tonic-gate 55*0Sstevel@tonic-gate# default 56*0Sstevel@tonic-gateif ($scheme eq 'from_to'){ 57*0Sstevel@tonic-gate while(<>){ 58*0Sstevel@tonic-gate Encode::from_to($_, $from, $to, $Opt{check}); print; 59*0Sstevel@tonic-gate }; 60*0Sstevel@tonic-gate# step-by-step 61*0Sstevel@tonic-gate}elsif ($scheme eq 'decode_encode'){ 62*0Sstevel@tonic-gate while(<>){ 63*0Sstevel@tonic-gate my $decoded = decode($from, $_, $Opt{check}); 64*0Sstevel@tonic-gate my $encoded = encode($to, $decoded); 65*0Sstevel@tonic-gate print $encoded; 66*0Sstevel@tonic-gate }; 67*0Sstevel@tonic-gate# NI-S favorite 68*0Sstevel@tonic-gate}elsif ($scheme eq 'perlio'){ 69*0Sstevel@tonic-gate binmode(STDIN, ":encoding($from)"); 70*0Sstevel@tonic-gate binmode(STDOUT, ":encoding($to)"); 71*0Sstevel@tonic-gate while(<>){ print; } 72*0Sstevel@tonic-gate} else { # won't reach 73*0Sstevel@tonic-gate die "$name: unknown scheme: $scheme"; 74*0Sstevel@tonic-gate} 75*0Sstevel@tonic-gate 76*0Sstevel@tonic-gatesub list_encodings{ 77*0Sstevel@tonic-gate print join("\n", Encode->encodings(":all")), "\n"; 78*0Sstevel@tonic-gate exit 0; 79*0Sstevel@tonic-gate} 80*0Sstevel@tonic-gate 81*0Sstevel@tonic-gatesub resolve_encoding { 82*0Sstevel@tonic-gate if (my $alias = Encode::resolve_alias($_[0])) { 83*0Sstevel@tonic-gate print $alias, "\n"; 84*0Sstevel@tonic-gate exit 0; 85*0Sstevel@tonic-gate } else { 86*0Sstevel@tonic-gate warn "$name: $_[0] is not known to Encode\n"; 87*0Sstevel@tonic-gate exit 1; 88*0Sstevel@tonic-gate } 89*0Sstevel@tonic-gate} 90*0Sstevel@tonic-gate 91*0Sstevel@tonic-gatesub help{ 92*0Sstevel@tonic-gate my $message = shift; 93*0Sstevel@tonic-gate $message and print STDERR "$name error: $message\n"; 94*0Sstevel@tonic-gate print STDERR <<"EOT"; 95*0Sstevel@tonic-gate$name [-f from_encoding] [-t to_encoding] [-s string] [files...] 96*0Sstevel@tonic-gate$name -l 97*0Sstevel@tonic-gate$name -r encoding_alias 98*0Sstevel@tonic-gate -l,--list 99*0Sstevel@tonic-gate lists all available encodings 100*0Sstevel@tonic-gate -r,--resolve encoding_alias 101*0Sstevel@tonic-gate resolve encoding to its (Encode) canonical name 102*0Sstevel@tonic-gate -f,--from from_encoding 103*0Sstevel@tonic-gate when omitted, the current locale will be used 104*0Sstevel@tonic-gate -t,--to to_encoding 105*0Sstevel@tonic-gate when omitted, the current locale will be used 106*0Sstevel@tonic-gate -s,--string string 107*0Sstevel@tonic-gate "string" will be the input instead of STDIN or files 108*0Sstevel@tonic-gateThe following are mainly of interest to Encode hackers: 109*0Sstevel@tonic-gate -D,--debug show debug information 110*0Sstevel@tonic-gate -C N | -c | -p check the validity of the input 111*0Sstevel@tonic-gate -S,--scheme scheme use the scheme for conversion 112*0Sstevel@tonic-gateEOT 113*0Sstevel@tonic-gate exit; 114*0Sstevel@tonic-gate} 115*0Sstevel@tonic-gate 116*0Sstevel@tonic-gate__END__ 117*0Sstevel@tonic-gate 118*0Sstevel@tonic-gate=head1 NAME 119*0Sstevel@tonic-gate 120*0Sstevel@tonic-gatepiconv -- iconv(1), reinvented in perl 121*0Sstevel@tonic-gate 122*0Sstevel@tonic-gate=head1 SYNOPSIS 123*0Sstevel@tonic-gate 124*0Sstevel@tonic-gate piconv [-f from_encoding] [-t to_encoding] [-s string] [files...] 125*0Sstevel@tonic-gate piconv -l 126*0Sstevel@tonic-gate piconv [-C N|-c|-p] 127*0Sstevel@tonic-gate piconv -S scheme ... 128*0Sstevel@tonic-gate piconv -r encoding 129*0Sstevel@tonic-gate piconv -D ... 130*0Sstevel@tonic-gate piconv -h 131*0Sstevel@tonic-gate 132*0Sstevel@tonic-gate=head1 DESCRIPTION 133*0Sstevel@tonic-gate 134*0Sstevel@tonic-gateB<piconv> is perl version of B<iconv>, a character encoding converter 135*0Sstevel@tonic-gatewidely available for various Unixen today. This script was primarily 136*0Sstevel@tonic-gatea technology demonstrator for Perl 5.8.0, but you can use piconv in the 137*0Sstevel@tonic-gateplace of iconv for virtually any case. 138*0Sstevel@tonic-gate 139*0Sstevel@tonic-gatepiconv converts the character encoding of either STDIN or files 140*0Sstevel@tonic-gatespecified in the argument and prints out to STDOUT. 141*0Sstevel@tonic-gate 142*0Sstevel@tonic-gateHere is the list of options. Each option can be in short format (-f) 143*0Sstevel@tonic-gateor long (--from). 144*0Sstevel@tonic-gate 145*0Sstevel@tonic-gate=over 4 146*0Sstevel@tonic-gate 147*0Sstevel@tonic-gate=item -f,--from from_encoding 148*0Sstevel@tonic-gate 149*0Sstevel@tonic-gateSpecifies the encoding you are converting from. Unlike B<iconv>, 150*0Sstevel@tonic-gatethis option can be omitted. In such cases, the current locale is used. 151*0Sstevel@tonic-gate 152*0Sstevel@tonic-gate=item -t,--to to_encoding 153*0Sstevel@tonic-gate 154*0Sstevel@tonic-gateSpecifies the encoding you are converting to. Unlike B<iconv>, 155*0Sstevel@tonic-gatethis option can be omitted. In such cases, the current locale is used. 156*0Sstevel@tonic-gate 157*0Sstevel@tonic-gateTherefore, when both -f and -t are omitted, B<piconv> just acts 158*0Sstevel@tonic-gatelike B<cat>. 159*0Sstevel@tonic-gate 160*0Sstevel@tonic-gate=item -s,--string I<string> 161*0Sstevel@tonic-gate 162*0Sstevel@tonic-gateuses I<string> instead of file for the source of text. 163*0Sstevel@tonic-gate 164*0Sstevel@tonic-gate=item -l,--list 165*0Sstevel@tonic-gate 166*0Sstevel@tonic-gateLists all available encodings, one per line, in case-insensitive 167*0Sstevel@tonic-gateorder. Note that only the canonical names are listed; many aliases 168*0Sstevel@tonic-gateexist. For example, the names are case-insensitive, and many standard 169*0Sstevel@tonic-gateand common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850" 170*0Sstevel@tonic-gateinstead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported> 171*0Sstevel@tonic-gatefor a full discussion. 172*0Sstevel@tonic-gate 173*0Sstevel@tonic-gate=item -C,--check I<N> 174*0Sstevel@tonic-gate 175*0Sstevel@tonic-gateCheck the validity of the stream if I<N> = 1. When I<N> = -1, something 176*0Sstevel@tonic-gateinteresting happens when it encounters an invalid character. 177*0Sstevel@tonic-gate 178*0Sstevel@tonic-gate=item -c 179*0Sstevel@tonic-gate 180*0Sstevel@tonic-gateSame as C<-C 1>. 181*0Sstevel@tonic-gate 182*0Sstevel@tonic-gate=item -p,--perlqq 183*0Sstevel@tonic-gate 184*0Sstevel@tonic-gateSame as C<-C -1>. 185*0Sstevel@tonic-gate 186*0Sstevel@tonic-gate=item -h,--help 187*0Sstevel@tonic-gate 188*0Sstevel@tonic-gateShow usage. 189*0Sstevel@tonic-gate 190*0Sstevel@tonic-gate=item -D,--debug 191*0Sstevel@tonic-gate 192*0Sstevel@tonic-gateInvokes debugging mode. Primarily for Encode hackers. 193*0Sstevel@tonic-gate 194*0Sstevel@tonic-gate=item -S,--scheme scheme 195*0Sstevel@tonic-gate 196*0Sstevel@tonic-gateSelects which scheme is to be used for conversion. Available schemes 197*0Sstevel@tonic-gateare as follows: 198*0Sstevel@tonic-gate 199*0Sstevel@tonic-gate=over 4 200*0Sstevel@tonic-gate 201*0Sstevel@tonic-gate=item from_to 202*0Sstevel@tonic-gate 203*0Sstevel@tonic-gateUses Encode::from_to for conversion. This is the default. 204*0Sstevel@tonic-gate 205*0Sstevel@tonic-gate=item decode_encode 206*0Sstevel@tonic-gate 207*0Sstevel@tonic-gateInput strings are decode()d then encode()d. A straight two-step 208*0Sstevel@tonic-gateimplementation. 209*0Sstevel@tonic-gate 210*0Sstevel@tonic-gate=item perlio 211*0Sstevel@tonic-gate 212*0Sstevel@tonic-gateThe new perlIO layer is used. NI-S' favorite. 213*0Sstevel@tonic-gate 214*0Sstevel@tonic-gate=back 215*0Sstevel@tonic-gate 216*0Sstevel@tonic-gateLike the I<-D> option, this is also for Encode hackers. 217*0Sstevel@tonic-gate 218*0Sstevel@tonic-gate=back 219*0Sstevel@tonic-gate 220*0Sstevel@tonic-gate=head1 SEE ALSO 221*0Sstevel@tonic-gate 222*0Sstevel@tonic-gateL<iconv/1> 223*0Sstevel@tonic-gateL<locale/3> 224*0Sstevel@tonic-gateL<Encode> 225*0Sstevel@tonic-gateL<Encode::Supported> 226*0Sstevel@tonic-gateL<Encode::Alias> 227*0Sstevel@tonic-gateL<PerlIO> 228*0Sstevel@tonic-gate 229*0Sstevel@tonic-gate=cut 230