xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/ext/Encode/bin/piconv (revision 0:68f95e015346)
1#!./perl
2# $Id: piconv,v 1.27 2003/06/18 09:29:02 dankogai Exp $
3#
4use 5.8.0;
5use strict;
6use Encode ;
7use Encode::Alias;
8my %Scheme =  map {$_ => 1} qw(from_to decode_encode perlio);
9
10use File::Basename;
11my $name = basename($0);
12
13use Getopt::Long qw(:config no_ignore_case);
14
15my %Opt;
16
17help()
18    unless
19      GetOptions(\%Opt,
20		 'from|f=s',
21		 'to|t=s',
22		 'list|l',
23		 'string|s=s',
24		 'check|C=i',
25		 'c',
26		 'perlqq|p',
27		 'debug|D',
28		 'scheme|S=s',
29		 'resolve|r=s',
30		 'help',
31		 );
32
33$Opt{help} and help();
34$Opt{list} and list_encodings();
35my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
36defined $Opt{resolve} and resolve_encoding($Opt{resolve});
37$Opt{from} || $Opt{to} || help();
38my $from = $Opt{from} || $locale or help("from_encoding unspecified");
39my $to   = $Opt{to}   || $locale or help("to_encoding unspecified");
40$Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit;
41my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} :  'from_to';
42$Opt{check} ||= $Opt{c};
43$Opt{perlqq} and $Opt{check} = Encode::FB_PERLQQ;
44
45if ($Opt{debug}){
46    my $cfrom = Encode->getEncoding($from)->name;
47    my $cto   = Encode->getEncoding($to)->name;
48    print <<"EOT";
49Scheme: $scheme
50From:   $from => $cfrom
51To:     $to => $cto
52EOT
53}
54
55# default
56if     ($scheme eq 'from_to'){
57    while(<>){
58	Encode::from_to($_, $from, $to, $Opt{check}); print;
59    };
60# step-by-step
61}elsif ($scheme eq 'decode_encode'){
62   while(<>){
63       my $decoded = decode($from, $_, $Opt{check});
64       my $encoded = encode($to, $decoded);
65       print $encoded;
66    };
67# NI-S favorite
68}elsif ($scheme eq 'perlio'){
69    binmode(STDIN,  ":encoding($from)");
70    binmode(STDOUT, ":encoding($to)");
71    while(<>){ print; }
72} else { # won't reach
73    die "$name: unknown scheme: $scheme";
74}
75
76sub list_encodings{
77    print join("\n", Encode->encodings(":all")), "\n";
78    exit 0;
79}
80
81sub resolve_encoding {
82    if (my $alias = Encode::resolve_alias($_[0])) {
83	print $alias, "\n";
84	exit 0;
85    } else {
86	warn "$name: $_[0] is not known to Encode\n";
87	exit 1;
88    }
89}
90
91sub help{
92    my $message = shift;
93    $message and print STDERR "$name error: $message\n";
94    print STDERR <<"EOT";
95$name [-f from_encoding] [-t to_encoding] [-s string] [files...]
96$name -l
97$name -r encoding_alias
98  -l,--list
99     lists all available encodings
100  -r,--resolve encoding_alias
101    resolve encoding to its (Encode) canonical name
102  -f,--from from_encoding
103     when omitted, the current locale will be used
104  -t,--to to_encoding
105     when omitted, the current locale will be used
106  -s,--string string
107     "string" will be the input instead of STDIN or files
108The following are mainly of interest to Encode hackers:
109  -D,--debug          show debug information
110  -C N | -c | -p      check the validity of the input
111  -S,--scheme scheme  use the scheme for conversion
112EOT
113  exit;
114}
115
116__END__
117
118=head1 NAME
119
120piconv -- iconv(1), reinvented in perl
121
122=head1 SYNOPSIS
123
124  piconv [-f from_encoding] [-t to_encoding] [-s string] [files...]
125  piconv -l
126  piconv [-C N|-c|-p]
127  piconv -S scheme ...
128  piconv -r encoding
129  piconv -D ...
130  piconv -h
131
132=head1 DESCRIPTION
133
134B<piconv> is perl version of B<iconv>, a character encoding converter
135widely available for various Unixen today.  This script was primarily
136a technology demonstrator for Perl 5.8.0, but you can use piconv in the
137place of iconv for virtually any case.
138
139piconv converts the character encoding of either STDIN or files
140specified in the argument and prints out to STDOUT.
141
142Here is the list of options.  Each option can be in short format (-f)
143or long (--from).
144
145=over 4
146
147=item -f,--from from_encoding
148
149Specifies the encoding you are converting from.  Unlike B<iconv>,
150this option can be omitted.  In such cases, the current locale is used.
151
152=item -t,--to to_encoding
153
154Specifies the encoding you are converting to.  Unlike B<iconv>,
155this option can be omitted.  In such cases, the current locale is used.
156
157Therefore, when both -f and -t are omitted, B<piconv> just acts
158like B<cat>.
159
160=item -s,--string I<string>
161
162uses I<string> instead of file for the source of text.
163
164=item -l,--list
165
166Lists all available encodings, one per line, in case-insensitive
167order.  Note that only the canonical names are listed; many aliases
168exist.  For example, the names are case-insensitive, and many standard
169and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850"
170instead of "cp850", or "winlatin1" for "cp1252".  See L<Encode::Supported>
171for a full discussion.
172
173=item -C,--check I<N>
174
175Check the validity of the stream if I<N> = 1.  When I<N> = -1, something
176interesting happens when it encounters an invalid character.
177
178=item -c
179
180Same as C<-C 1>.
181
182=item -p,--perlqq
183
184Same as C<-C -1>.
185
186=item -h,--help
187
188Show usage.
189
190=item -D,--debug
191
192Invokes debugging mode.  Primarily for Encode hackers.
193
194=item -S,--scheme scheme
195
196Selects which scheme is to be used for conversion.  Available schemes
197are as follows:
198
199=over 4
200
201=item from_to
202
203Uses Encode::from_to for conversion.  This is the default.
204
205=item decode_encode
206
207Input strings are decode()d then encode()d.  A straight two-step
208implementation.
209
210=item perlio
211
212The new perlIO layer is used.  NI-S' favorite.
213
214=back
215
216Like the I<-D> option, this is also for Encode hackers.
217
218=back
219
220=head1 SEE ALSO
221
222L<iconv/1>
223L<locale/3>
224L<Encode>
225L<Encode::Supported>
226L<Encode::Alias>
227L<PerlIO>
228
229=cut
230