xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/ext/Encode/JP/JP.pm (revision 0:68f95e015346)
1*0Sstevel@tonic-gatepackage Encode::JP;
2*0Sstevel@tonic-gateBEGIN {
3*0Sstevel@tonic-gate    if (ord("A") == 193) {
4*0Sstevel@tonic-gate	die "Encode::JP not supported on EBCDIC\n";
5*0Sstevel@tonic-gate    }
6*0Sstevel@tonic-gate}
7*0Sstevel@tonic-gateuse Encode;
8*0Sstevel@tonic-gateour $VERSION = do { my @r = (q$Revision: 1.25 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
9*0Sstevel@tonic-gate
10*0Sstevel@tonic-gateuse XSLoader;
11*0Sstevel@tonic-gateXSLoader::load(__PACKAGE__,$VERSION);
12*0Sstevel@tonic-gate
13*0Sstevel@tonic-gateuse Encode::JP::JIS7;
14*0Sstevel@tonic-gate
15*0Sstevel@tonic-gate1;
16*0Sstevel@tonic-gate__END__
17*0Sstevel@tonic-gate
18*0Sstevel@tonic-gate=head1 NAME
19*0Sstevel@tonic-gate
20*0Sstevel@tonic-gateEncode::JP - Japanese Encodings
21*0Sstevel@tonic-gate
22*0Sstevel@tonic-gate=head1 SYNOPSIS
23*0Sstevel@tonic-gate
24*0Sstevel@tonic-gate    use Encode qw/encode decode/;
25*0Sstevel@tonic-gate    $euc_jp = encode("euc-jp", $utf8);   # loads Encode::JP implicitly
26*0Sstevel@tonic-gate    $utf8   = decode("euc-jp", $euc_jp); # ditto
27*0Sstevel@tonic-gate
28*0Sstevel@tonic-gate=head1 ABSTRACT
29*0Sstevel@tonic-gate
30*0Sstevel@tonic-gateThis module implements Japanese charset encodings.  Encodings
31*0Sstevel@tonic-gatesupported are as follows.
32*0Sstevel@tonic-gate
33*0Sstevel@tonic-gate  Canonical   Alias		Description
34*0Sstevel@tonic-gate  --------------------------------------------------------------------
35*0Sstevel@tonic-gate  euc-jp      /\beuc.*jp$/i	EUC (Extended Unix Character)
36*0Sstevel@tonic-gate              /\bjp.*euc/i
37*0Sstevel@tonic-gate	      /\bujis$/i
38*0Sstevel@tonic-gate  shiftjis    /\bshift.*jis$/i	Shift JIS (aka MS Kanji)
39*0Sstevel@tonic-gate	      /\bsjis$/i
40*0Sstevel@tonic-gate  7bit-jis    /\bjis$/i		7bit JIS
41*0Sstevel@tonic-gate  iso-2022-jp			ISO-2022-JP                  [RFC1468]
42*0Sstevel@tonic-gate				= 7bit JIS with all Halfwidth Kana
43*0Sstevel@tonic-gate				  converted to Fullwidth
44*0Sstevel@tonic-gate  iso-2022-jp-1			ISO-2022-JP-1                [RFC2237]
45*0Sstevel@tonic-gate                                = ISO-2022-JP with JIS X 0212-1990
46*0Sstevel@tonic-gate				  support.  See below
47*0Sstevel@tonic-gate  MacJapanese	                Shift JIS + Apple vendor mappings
48*0Sstevel@tonic-gate  cp932                         Code Page 932
49*0Sstevel@tonic-gate                                = Shift JIS + MS/IBM vendor mappings
50*0Sstevel@tonic-gate  jis0201-raw                   JIS0201, raw format
51*0Sstevel@tonic-gate  jis0208-raw                   JIS0201, raw format
52*0Sstevel@tonic-gate  jis0212-raw                   JIS0201, raw format
53*0Sstevel@tonic-gate  --------------------------------------------------------------------
54*0Sstevel@tonic-gate
55*0Sstevel@tonic-gate=head1 DESCRIPTION
56*0Sstevel@tonic-gate
57*0Sstevel@tonic-gateTo find out how to use this module in detail, see L<Encode>.
58*0Sstevel@tonic-gate
59*0Sstevel@tonic-gate=head1 Note on ISO-2022-JP(-1)?
60*0Sstevel@tonic-gate
61*0Sstevel@tonic-gateISO-2022-JP-1 (RFC2237) is a superset of ISO-2022-JP (RFC1468) which
62*0Sstevel@tonic-gateadds support for JIS X 0212-1990.  That means you can use the same
63*0Sstevel@tonic-gatecode to decode to utf8 but not vice versa.
64*0Sstevel@tonic-gate
65*0Sstevel@tonic-gate  $utf8 = decode('iso-2022-jp-1', $stream);
66*0Sstevel@tonic-gate
67*0Sstevel@tonic-gateand
68*0Sstevel@tonic-gate
69*0Sstevel@tonic-gate  $utf8 = decode('iso-2022-jp',   $stream);
70*0Sstevel@tonic-gate
71*0Sstevel@tonic-gateyield the same result but
72*0Sstevel@tonic-gate
73*0Sstevel@tonic-gate  $with_0212 = encode('iso-2022-jp-1', $utf8);
74*0Sstevel@tonic-gate
75*0Sstevel@tonic-gateis now different from
76*0Sstevel@tonic-gate
77*0Sstevel@tonic-gate  $without_0212 = encode('iso-2022-jp', $utf8 );
78*0Sstevel@tonic-gate
79*0Sstevel@tonic-gateIn the latter case, characters that map to 0212 are first converted
80*0Sstevel@tonic-gateto U+3013 (0xA2AE in EUC-JP; a white square also known as 'Tofu' or
81*0Sstevel@tonic-gate'geta mark') then fed to the decoding engine.  U+FFFD is not used,
82*0Sstevel@tonic-gatein order to preserve text layout as much as possible.
83*0Sstevel@tonic-gate
84*0Sstevel@tonic-gate=head1 BUGS
85*0Sstevel@tonic-gate
86*0Sstevel@tonic-gateThe ASCII region (0x00-0x7f) is preserved for all encodings, even
87*0Sstevel@tonic-gatethough this conflicts with mappings by the Unicode Consortium.  See
88*0Sstevel@tonic-gate
89*0Sstevel@tonic-gateL<http://www.debian.or.jp/~kubota/unicode-symbols.html.en>
90*0Sstevel@tonic-gate
91*0Sstevel@tonic-gateto find out why it is implemented that way.
92*0Sstevel@tonic-gate
93*0Sstevel@tonic-gate=head1 SEE ALSO
94*0Sstevel@tonic-gate
95*0Sstevel@tonic-gateL<Encode>
96*0Sstevel@tonic-gate
97*0Sstevel@tonic-gate=cut
98