xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/ext/Encode/bin/piconv (revision 0:68f95e015346)
1*0Sstevel@tonic-gate#!./perl
2*0Sstevel@tonic-gate# $Id: piconv,v 1.27 2003/06/18 09:29:02 dankogai Exp $
3*0Sstevel@tonic-gate#
4*0Sstevel@tonic-gateuse 5.8.0;
5*0Sstevel@tonic-gateuse strict;
6*0Sstevel@tonic-gateuse Encode ;
7*0Sstevel@tonic-gateuse Encode::Alias;
8*0Sstevel@tonic-gatemy %Scheme =  map {$_ => 1} qw(from_to decode_encode perlio);
9*0Sstevel@tonic-gate
10*0Sstevel@tonic-gateuse File::Basename;
11*0Sstevel@tonic-gatemy $name = basename($0);
12*0Sstevel@tonic-gate
13*0Sstevel@tonic-gateuse Getopt::Long qw(:config no_ignore_case);
14*0Sstevel@tonic-gate
15*0Sstevel@tonic-gatemy %Opt;
16*0Sstevel@tonic-gate
17*0Sstevel@tonic-gatehelp()
18*0Sstevel@tonic-gate    unless
19*0Sstevel@tonic-gate      GetOptions(\%Opt,
20*0Sstevel@tonic-gate		 'from|f=s',
21*0Sstevel@tonic-gate		 'to|t=s',
22*0Sstevel@tonic-gate		 'list|l',
23*0Sstevel@tonic-gate		 'string|s=s',
24*0Sstevel@tonic-gate		 'check|C=i',
25*0Sstevel@tonic-gate		 'c',
26*0Sstevel@tonic-gate		 'perlqq|p',
27*0Sstevel@tonic-gate		 'debug|D',
28*0Sstevel@tonic-gate		 'scheme|S=s',
29*0Sstevel@tonic-gate		 'resolve|r=s',
30*0Sstevel@tonic-gate		 'help',
31*0Sstevel@tonic-gate		 );
32*0Sstevel@tonic-gate
33*0Sstevel@tonic-gate$Opt{help} and help();
34*0Sstevel@tonic-gate$Opt{list} and list_encodings();
35*0Sstevel@tonic-gatemy $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
36*0Sstevel@tonic-gatedefined $Opt{resolve} and resolve_encoding($Opt{resolve});
37*0Sstevel@tonic-gate$Opt{from} || $Opt{to} || help();
38*0Sstevel@tonic-gatemy $from = $Opt{from} || $locale or help("from_encoding unspecified");
39*0Sstevel@tonic-gatemy $to   = $Opt{to}   || $locale or help("to_encoding unspecified");
40*0Sstevel@tonic-gate$Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit;
41*0Sstevel@tonic-gatemy $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} :  'from_to';
42*0Sstevel@tonic-gate$Opt{check} ||= $Opt{c};
43*0Sstevel@tonic-gate$Opt{perlqq} and $Opt{check} = Encode::FB_PERLQQ;
44*0Sstevel@tonic-gate
45*0Sstevel@tonic-gateif ($Opt{debug}){
46*0Sstevel@tonic-gate    my $cfrom = Encode->getEncoding($from)->name;
47*0Sstevel@tonic-gate    my $cto   = Encode->getEncoding($to)->name;
48*0Sstevel@tonic-gate    print <<"EOT";
49*0Sstevel@tonic-gateScheme: $scheme
50*0Sstevel@tonic-gateFrom:   $from => $cfrom
51*0Sstevel@tonic-gateTo:     $to => $cto
52*0Sstevel@tonic-gateEOT
53*0Sstevel@tonic-gate}
54*0Sstevel@tonic-gate
55*0Sstevel@tonic-gate# default
56*0Sstevel@tonic-gateif     ($scheme eq 'from_to'){
57*0Sstevel@tonic-gate    while(<>){
58*0Sstevel@tonic-gate	Encode::from_to($_, $from, $to, $Opt{check}); print;
59*0Sstevel@tonic-gate    };
60*0Sstevel@tonic-gate# step-by-step
61*0Sstevel@tonic-gate}elsif ($scheme eq 'decode_encode'){
62*0Sstevel@tonic-gate   while(<>){
63*0Sstevel@tonic-gate       my $decoded = decode($from, $_, $Opt{check});
64*0Sstevel@tonic-gate       my $encoded = encode($to, $decoded);
65*0Sstevel@tonic-gate       print $encoded;
66*0Sstevel@tonic-gate    };
67*0Sstevel@tonic-gate# NI-S favorite
68*0Sstevel@tonic-gate}elsif ($scheme eq 'perlio'){
69*0Sstevel@tonic-gate    binmode(STDIN,  ":encoding($from)");
70*0Sstevel@tonic-gate    binmode(STDOUT, ":encoding($to)");
71*0Sstevel@tonic-gate    while(<>){ print; }
72*0Sstevel@tonic-gate} else { # won't reach
73*0Sstevel@tonic-gate    die "$name: unknown scheme: $scheme";
74*0Sstevel@tonic-gate}
75*0Sstevel@tonic-gate
76*0Sstevel@tonic-gatesub list_encodings{
77*0Sstevel@tonic-gate    print join("\n", Encode->encodings(":all")), "\n";
78*0Sstevel@tonic-gate    exit 0;
79*0Sstevel@tonic-gate}
80*0Sstevel@tonic-gate
81*0Sstevel@tonic-gatesub resolve_encoding {
82*0Sstevel@tonic-gate    if (my $alias = Encode::resolve_alias($_[0])) {
83*0Sstevel@tonic-gate	print $alias, "\n";
84*0Sstevel@tonic-gate	exit 0;
85*0Sstevel@tonic-gate    } else {
86*0Sstevel@tonic-gate	warn "$name: $_[0] is not known to Encode\n";
87*0Sstevel@tonic-gate	exit 1;
88*0Sstevel@tonic-gate    }
89*0Sstevel@tonic-gate}
90*0Sstevel@tonic-gate
91*0Sstevel@tonic-gatesub help{
92*0Sstevel@tonic-gate    my $message = shift;
93*0Sstevel@tonic-gate    $message and print STDERR "$name error: $message\n";
94*0Sstevel@tonic-gate    print STDERR <<"EOT";
95*0Sstevel@tonic-gate$name [-f from_encoding] [-t to_encoding] [-s string] [files...]
96*0Sstevel@tonic-gate$name -l
97*0Sstevel@tonic-gate$name -r encoding_alias
98*0Sstevel@tonic-gate  -l,--list
99*0Sstevel@tonic-gate     lists all available encodings
100*0Sstevel@tonic-gate  -r,--resolve encoding_alias
101*0Sstevel@tonic-gate    resolve encoding to its (Encode) canonical name
102*0Sstevel@tonic-gate  -f,--from from_encoding
103*0Sstevel@tonic-gate     when omitted, the current locale will be used
104*0Sstevel@tonic-gate  -t,--to to_encoding
105*0Sstevel@tonic-gate     when omitted, the current locale will be used
106*0Sstevel@tonic-gate  -s,--string string
107*0Sstevel@tonic-gate     "string" will be the input instead of STDIN or files
108*0Sstevel@tonic-gateThe following are mainly of interest to Encode hackers:
109*0Sstevel@tonic-gate  -D,--debug          show debug information
110*0Sstevel@tonic-gate  -C N | -c | -p      check the validity of the input
111*0Sstevel@tonic-gate  -S,--scheme scheme  use the scheme for conversion
112*0Sstevel@tonic-gateEOT
113*0Sstevel@tonic-gate  exit;
114*0Sstevel@tonic-gate}
115*0Sstevel@tonic-gate
116*0Sstevel@tonic-gate__END__
117*0Sstevel@tonic-gate
118*0Sstevel@tonic-gate=head1 NAME
119*0Sstevel@tonic-gate
120*0Sstevel@tonic-gatepiconv -- iconv(1), reinvented in perl
121*0Sstevel@tonic-gate
122*0Sstevel@tonic-gate=head1 SYNOPSIS
123*0Sstevel@tonic-gate
124*0Sstevel@tonic-gate  piconv [-f from_encoding] [-t to_encoding] [-s string] [files...]
125*0Sstevel@tonic-gate  piconv -l
126*0Sstevel@tonic-gate  piconv [-C N|-c|-p]
127*0Sstevel@tonic-gate  piconv -S scheme ...
128*0Sstevel@tonic-gate  piconv -r encoding
129*0Sstevel@tonic-gate  piconv -D ...
130*0Sstevel@tonic-gate  piconv -h
131*0Sstevel@tonic-gate
132*0Sstevel@tonic-gate=head1 DESCRIPTION
133*0Sstevel@tonic-gate
134*0Sstevel@tonic-gateB<piconv> is perl version of B<iconv>, a character encoding converter
135*0Sstevel@tonic-gatewidely available for various Unixen today.  This script was primarily
136*0Sstevel@tonic-gatea technology demonstrator for Perl 5.8.0, but you can use piconv in the
137*0Sstevel@tonic-gateplace of iconv for virtually any case.
138*0Sstevel@tonic-gate
139*0Sstevel@tonic-gatepiconv converts the character encoding of either STDIN or files
140*0Sstevel@tonic-gatespecified in the argument and prints out to STDOUT.
141*0Sstevel@tonic-gate
142*0Sstevel@tonic-gateHere is the list of options.  Each option can be in short format (-f)
143*0Sstevel@tonic-gateor long (--from).
144*0Sstevel@tonic-gate
145*0Sstevel@tonic-gate=over 4
146*0Sstevel@tonic-gate
147*0Sstevel@tonic-gate=item -f,--from from_encoding
148*0Sstevel@tonic-gate
149*0Sstevel@tonic-gateSpecifies the encoding you are converting from.  Unlike B<iconv>,
150*0Sstevel@tonic-gatethis option can be omitted.  In such cases, the current locale is used.
151*0Sstevel@tonic-gate
152*0Sstevel@tonic-gate=item -t,--to to_encoding
153*0Sstevel@tonic-gate
154*0Sstevel@tonic-gateSpecifies the encoding you are converting to.  Unlike B<iconv>,
155*0Sstevel@tonic-gatethis option can be omitted.  In such cases, the current locale is used.
156*0Sstevel@tonic-gate
157*0Sstevel@tonic-gateTherefore, when both -f and -t are omitted, B<piconv> just acts
158*0Sstevel@tonic-gatelike B<cat>.
159*0Sstevel@tonic-gate
160*0Sstevel@tonic-gate=item -s,--string I<string>
161*0Sstevel@tonic-gate
162*0Sstevel@tonic-gateuses I<string> instead of file for the source of text.
163*0Sstevel@tonic-gate
164*0Sstevel@tonic-gate=item -l,--list
165*0Sstevel@tonic-gate
166*0Sstevel@tonic-gateLists all available encodings, one per line, in case-insensitive
167*0Sstevel@tonic-gateorder.  Note that only the canonical names are listed; many aliases
168*0Sstevel@tonic-gateexist.  For example, the names are case-insensitive, and many standard
169*0Sstevel@tonic-gateand common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850"
170*0Sstevel@tonic-gateinstead of "cp850", or "winlatin1" for "cp1252".  See L<Encode::Supported>
171*0Sstevel@tonic-gatefor a full discussion.
172*0Sstevel@tonic-gate
173*0Sstevel@tonic-gate=item -C,--check I<N>
174*0Sstevel@tonic-gate
175*0Sstevel@tonic-gateCheck the validity of the stream if I<N> = 1.  When I<N> = -1, something
176*0Sstevel@tonic-gateinteresting happens when it encounters an invalid character.
177*0Sstevel@tonic-gate
178*0Sstevel@tonic-gate=item -c
179*0Sstevel@tonic-gate
180*0Sstevel@tonic-gateSame as C<-C 1>.
181*0Sstevel@tonic-gate
182*0Sstevel@tonic-gate=item -p,--perlqq
183*0Sstevel@tonic-gate
184*0Sstevel@tonic-gateSame as C<-C -1>.
185*0Sstevel@tonic-gate
186*0Sstevel@tonic-gate=item -h,--help
187*0Sstevel@tonic-gate
188*0Sstevel@tonic-gateShow usage.
189*0Sstevel@tonic-gate
190*0Sstevel@tonic-gate=item -D,--debug
191*0Sstevel@tonic-gate
192*0Sstevel@tonic-gateInvokes debugging mode.  Primarily for Encode hackers.
193*0Sstevel@tonic-gate
194*0Sstevel@tonic-gate=item -S,--scheme scheme
195*0Sstevel@tonic-gate
196*0Sstevel@tonic-gateSelects which scheme is to be used for conversion.  Available schemes
197*0Sstevel@tonic-gateare as follows:
198*0Sstevel@tonic-gate
199*0Sstevel@tonic-gate=over 4
200*0Sstevel@tonic-gate
201*0Sstevel@tonic-gate=item from_to
202*0Sstevel@tonic-gate
203*0Sstevel@tonic-gateUses Encode::from_to for conversion.  This is the default.
204*0Sstevel@tonic-gate
205*0Sstevel@tonic-gate=item decode_encode
206*0Sstevel@tonic-gate
207*0Sstevel@tonic-gateInput strings are decode()d then encode()d.  A straight two-step
208*0Sstevel@tonic-gateimplementation.
209*0Sstevel@tonic-gate
210*0Sstevel@tonic-gate=item perlio
211*0Sstevel@tonic-gate
212*0Sstevel@tonic-gateThe new perlIO layer is used.  NI-S' favorite.
213*0Sstevel@tonic-gate
214*0Sstevel@tonic-gate=back
215*0Sstevel@tonic-gate
216*0Sstevel@tonic-gateLike the I<-D> option, this is also for Encode hackers.
217*0Sstevel@tonic-gate
218*0Sstevel@tonic-gate=back
219*0Sstevel@tonic-gate
220*0Sstevel@tonic-gate=head1 SEE ALSO
221*0Sstevel@tonic-gate
222*0Sstevel@tonic-gateL<iconv/1>
223*0Sstevel@tonic-gateL<locale/3>
224*0Sstevel@tonic-gateL<Encode>
225*0Sstevel@tonic-gateL<Encode::Supported>
226*0Sstevel@tonic-gateL<Encode::Alias>
227*0Sstevel@tonic-gateL<PerlIO>
228*0Sstevel@tonic-gate
229*0Sstevel@tonic-gate=cut
230