xref: /openbsd-src/gnu/usr.bin/perl/cpan/Encode/encoding.pm (revision ae3cb403620ab940fbaabb3055fac045a63d56b7)
1# $Id: encoding.pm,v 2.17 2015/09/15 13:53:27 dankogai Exp dankogai $
2package encoding;
3our $VERSION = sprintf "%d.%02d", q$Revision: 2.17 $ =~ /(\d+)/g;
4
5use Encode;
6use strict;
7use warnings;
8
9use constant {
10    DEBUG => !!$ENV{PERL_ENCODE_DEBUG},
11    HAS_PERLIO => eval { require PerlIO::encoding; PerlIO::encoding->VERSION(0.02) },
12    PERL_5_21_7 => $^V && $^V ge v5.21.7,
13};
14
15sub _exception {
16    my $name = shift;
17    $] > 5.008 and return 0;    # 5.8.1 or higher then no
18    my %utfs = map { $_ => 1 }
19      qw(utf8 UCS-2BE UCS-2LE UTF-16 UTF-16BE UTF-16LE
20      UTF-32 UTF-32BE UTF-32LE);
21    $utfs{$name} or return 0;    # UTFs or no
22    require Config;
23    Config->import();
24    our %Config;
25    return $Config{perl_patchlevel} ? 0 : 1    # maintperl then no
26}
27
28sub in_locale { $^H & ( $locale::hint_bits || 0 ) }
29
30sub _get_locale_encoding {
31    my $locale_encoding;
32
33    if ($^O eq 'MSWin32') {
34        my @tries = (
35            # First try to get the OutputCP. This will work only if we
36            # are attached to a console
37            'Win32.pm' => 'Win32::GetConsoleOutputCP',
38            'Win32/Console.pm' => 'Win32::Console::OutputCP',
39            # If above failed, this means that we are a GUI app
40            # Let's assume that the ANSI codepage is what matters
41            'Win32.pm' => 'Win32::GetACP',
42        );
43        while (@tries) {
44            my $cp = eval {
45                require $tries[0];
46                no strict 'refs';
47                &{$tries[1]}()
48            };
49            if ($cp) {
50                if ($cp == 65001) { # Code page for UTF-8
51                    $locale_encoding = 'UTF-8';
52                } else {
53                    $locale_encoding = 'cp' . $cp;
54                }
55                return $locale_encoding;
56            }
57            splice(@tries, 0, 2)
58        }
59    }
60
61    # I18N::Langinfo isn't available everywhere
62    $locale_encoding = eval {
63        require I18N::Langinfo;
64        find_encoding(
65            I18N::Langinfo::langinfo( I18N::Langinfo::CODESET() )
66        )->name
67    };
68    return $locale_encoding if defined $locale_encoding;
69
70    eval {
71        require POSIX;
72        # Get the current locale
73        # Remember that MSVCRT impl is quite different from Unixes
74        my $locale = POSIX::setlocale(POSIX::LC_CTYPE());
75        if ( $locale =~ /^([^.]+)\.([^.@]+)(?:@.*)?$/ ) {
76            my $country_language;
77            ( $country_language, $locale_encoding ) = ( $1, $2 );
78
79            # Could do more heuristics based on the country and language
80            # since we have Locale::Country and Locale::Language available.
81            # TODO: get a database of Language -> Encoding mappings
82            # (the Estonian database at http://www.eki.ee/letter/
83            # would be excellent!) --jhi
84            if (lc($locale_encoding) eq 'euc') {
85                if ( $country_language =~ /^ja_JP|japan(?:ese)?$/i ) {
86                    $locale_encoding = 'euc-jp';
87                }
88                elsif ( $country_language =~ /^ko_KR|korean?$/i ) {
89                    $locale_encoding = 'euc-kr';
90                }
91                elsif ( $country_language =~ /^zh_CN|chin(?:a|ese)$/i ) {
92                    $locale_encoding = 'euc-cn';
93                }
94                elsif ( $country_language =~ /^zh_TW|taiwan(?:ese)?$/i ) {
95                    $locale_encoding = 'euc-tw';
96                }
97                else {
98                    require Carp;
99                    Carp::croak(
100                        "encoding: Locale encoding '$locale_encoding' too ambiguous"
101                    );
102                }
103            }
104        }
105    };
106
107    return $locale_encoding;
108}
109
110sub import {
111
112    if ( ord("A") == 193 ) {
113        require Carp;
114        Carp::croak("encoding: pragma does not support EBCDIC platforms");
115    }
116
117    if ($] >= 5.017) {
118	warnings::warnif("deprecated",
119			 "Use of the encoding pragma is deprecated")
120    }
121    my $class = shift;
122    my $name  = shift;
123    if (!$name){
124	require Carp;
125        Carp::croak("encoding: no encoding specified.");
126    }
127    if ( $name eq ':_get_locale_encoding' ) {    # used by lib/open.pm
128        my $caller = caller();
129        {
130            no strict 'refs';
131            *{"${caller}::_get_locale_encoding"} = \&_get_locale_encoding;
132        }
133        return;
134    }
135    $name = _get_locale_encoding() if $name eq ':locale';
136    my %arg = @_;
137    $name = $ENV{PERL_ENCODING} unless defined $name;
138    my $enc = find_encoding($name);
139    unless ( defined $enc ) {
140        require Carp;
141        Carp::croak("encoding: Unknown encoding '$name'");
142    }
143    $name = $enc->name;    # canonize
144    unless ( $arg{Filter} ) {
145        DEBUG and warn "_exception($name) = ", _exception($name);
146        if (! _exception($name)) {
147            if (!PERL_5_21_7) {
148                ${^ENCODING} = $enc;
149            }
150            else {
151                # Starting with 5.21.7, this pragma uses a shadow variable
152                # designed explicitly for it, ${^E_NCODING}, to enforce
153                # lexical scope; instead of ${^ENCODING}.
154                $^H{'encoding'} = 1;
155                ${^E_NCODING} = $enc;
156            }
157        }
158        HAS_PERLIO or return 1;
159    }
160    else {
161        defined( ${^ENCODING} ) and undef ${^ENCODING};
162        undef ${^E_NCODING} if PERL_5_21_7;
163
164        # implicitly 'use utf8'
165        require utf8;      # to fetch $utf8::hint_bits;
166        $^H |= $utf8::hint_bits;
167        eval {
168            require Filter::Util::Call;
169            Filter::Util::Call->import;
170            filter_add(
171                sub {
172                    my $status = filter_read();
173                    if ( $status > 0 ) {
174                        $_ = $enc->decode( $_, 1 );
175                        DEBUG and warn $_;
176                    }
177                    $status;
178                }
179            );
180        };
181        $@ eq '' and DEBUG and warn "Filter installed";
182    }
183    defined ${^UNICODE} and ${^UNICODE} != 0 and return 1;
184    for my $h (qw(STDIN STDOUT)) {
185        if ( $arg{$h} ) {
186            unless ( defined find_encoding( $arg{$h} ) ) {
187                require Carp;
188                Carp::croak(
189                    "encoding: Unknown encoding for $h, '$arg{$h}'");
190            }
191            eval { binmode( $h, ":raw :encoding($arg{$h})" ) };
192        }
193        else {
194            unless ( exists $arg{$h} ) {
195                eval {
196                    no warnings 'uninitialized';
197                    binmode( $h, ":raw :encoding($name)" );
198                };
199            }
200        }
201        if ($@) {
202            require Carp;
203            Carp::croak($@);
204        }
205    }
206    return 1;    # I doubt if we need it, though
207}
208
209sub unimport {
210    no warnings;
211    undef ${^ENCODING};
212    undef ${^E_NCODING} if PERL_5_21_7;
213    if (HAS_PERLIO) {
214        binmode( STDIN,  ":raw" );
215        binmode( STDOUT, ":raw" );
216    }
217    else {
218        binmode(STDIN);
219        binmode(STDOUT);
220    }
221    if ( $INC{"Filter/Util/Call.pm"} ) {
222        eval { filter_del() };
223    }
224}
225
2261;
227__END__
228
229=pod
230
231=head1 NAME
232
233encoding - allows you to write your script in non-ASCII and non-UTF-8
234
235=head1 WARNING
236
237This module has been deprecated since perl v5.18.  See L</DESCRIPTION> and
238L</BUGS>.
239
240=head1 SYNOPSIS
241
242  use encoding "greek";  # Perl like Greek to you?
243  use encoding "euc-jp"; # Jperl!
244
245  # or you can even do this if your shell supports your native encoding
246
247  perl -Mencoding=latin2 -e'...' # Feeling centrally European?
248  perl -Mencoding=euc-kr -e'...' # Or Korean?
249
250  # more control
251
252  # A simple euc-cn => utf-8 converter
253  use encoding "euc-cn", STDOUT => "utf8";  while(<>){print};
254
255  # "no encoding;" supported
256  no encoding;
257
258  # an alternate way, Filter
259  use encoding "euc-jp", Filter=>1;
260  # now you can use kanji identifiers -- in euc-jp!
261
262  # encode based on the current locale - specialized purposes only;
263  # fraught with danger!!
264  use encoding ':locale';
265
266=head1 DESCRIPTION
267
268This pragma is used to enable a Perl script to be written in encodings that
269aren't strictly ASCII nor UTF-8.  It translates all or portions of the Perl
270program script from a given encoding into UTF-8, and changes the PerlIO layers
271of C<STDIN> and C<STDOUT> to the encoding specified.
272
273This pragma dates from the days when UTF-8-enabled editors were uncommon.  But
274that was long ago, and the need for it is greatly diminished.  That, coupled
275with the fact that it doesn't work with threads, along with other problems,
276(see L</BUGS>) have led to its being deprecated.  It is planned to remove this
277pragma in a future Perl version.  New code should be written in UTF-8, and the
278C<use utf8> pragma used instead (see L<perluniintro> and L<utf8> for details).
279Old code should be converted to UTF-8, via something like the recipe in the
280L</SYNOPSIS> (though this simple approach may require manual adjustments
281afterwards).
282
283The only legitimate use of this pragma is almost certainly just one per file,
284near the top, with file scope, as the file is likely going to only be written
285in one encoding.  Further restrictions apply in Perls before v5.22 (see
286L</Prior to Perl v5.22>).
287
288There are two basic modes of operation (plus turning if off):
289
290=over 4
291
292=item C<use encoding ['I<ENCNAME>'] ;>
293
294This is the normal operation.  It translates various literals encountered in
295the Perl source file from the encoding I<ENCNAME> into UTF-8, and similarly
296converts character code points.  This is used when the script is a combination
297of ASCII (for the variable names and punctuation, I<etc>), but the literal
298data is in the specified encoding.
299
300I<ENCNAME> is optional.  If omitted, the encoding specified in the environment
301variable L<C<PERL_ENCODING>|perlrun/PERL_ENCODING> is used.  If this isn't
302set, or the resolved-to encoding is not known to C<L<Encode>>, the error
303C<Unknown encoding 'I<ENCNAME>'> will be thrown.
304
305Starting in Perl v5.8.6 (C<Encode> version 2.0.1), I<ENCNAME> may be the
306name C<:locale>.  This is for very specialized applications, and is documented
307in L</The C<:locale> sub-pragma> below.
308
309The literals that are converted are C<q//, qq//, qr//, qw///, qx//>, and
310starting in v5.8.1, C<tr///>.  Operations that do conversions include C<chr>,
311C<ord>, C<utf8::upgrade> (but not C<utf8::downgrade>), and C<chomp>.
312
313Also starting in v5.8.1, the C<DATA> pseudo-filehandle is translated from the
314encoding into UTF-8.
315
316For example, you can write code in EUC-JP as follows:
317
318  my $Rakuda = "\xF1\xD1\xF1\xCC"; # Camel in Kanji
319               #<-char-><-char->   # 4 octets
320  s/\bCamel\b/$Rakuda/;
321
322And with C<use encoding "euc-jp"> in effect, it is the same thing as
323that code in UTF-8:
324
325  my $Rakuda = "\x{99F1}\x{99DD}"; # two Unicode Characters
326  s/\bCamel\b/$Rakuda/;
327
328See L</EXAMPLE> below for a more complete example.
329
330Unless C<${^UNICODE}> (available starting in v5.8.2) exists and is non-zero, the
331PerlIO layers of C<STDIN> and C<STDOUT> are set to "C<:encoding(I<ENCNAME>)>".
332Therefore,
333
334  use encoding "euc-jp";
335  my $message = "Camel is the symbol of perl.\n";
336  my $Rakuda = "\xF1\xD1\xF1\xCC"; # Camel in Kanji
337  $message =~ s/\bCamel\b/$Rakuda/;
338  print $message;
339
340will print
341
342 "\xF1\xD1\xF1\xCC is the symbol of perl.\n"
343
344not
345
346 "\x{99F1}\x{99DD} is the symbol of perl.\n"
347
348You can override this by giving extra arguments; see below.
349
350Note that C<STDERR> WILL NOT be changed, regardless.
351
352Also note that non-STD file handles remain unaffected.  Use C<use
353open> or C<binmode> to change the layers of those.
354
355=item C<use encoding I<ENCNAME> Filter=E<gt>1;>
356
357This operates as above, but the C<Filter> argument with a non-zero
358value causes the entire script, and not just literals, to be translated from
359the encoding into UTF-8.  This allows identifiers in the source to be in that
360encoding as well.  (Problems may occur if the encoding is not a superset of
361ASCII; imagine all your semi-colons being translated into something
362different.)  One can use this form to make
363
364 ${"\x{4eba}"}++
365
366work.  (This is equivalent to C<$I<human>++>, where I<human> is a single Han
367ideograph).
368
369This effectively means that your source code behaves as if it were written in
370UTF-8 with C<'use utf8>' in effect.  So even if your editor only supports
371Shift_JIS, for example, you can still try examples in Chapter 15 of
372C<Programming Perl, 3rd Ed.>.
373
374This option is significantly slower than the other one.
375
376=item C<no encoding;>
377
378Unsets the script encoding. The layers of C<STDIN>, C<STDOUT> are
379reset to "C<:raw>" (the default unprocessed raw stream of bytes).
380
381=back
382
383=head1 OPTIONS
384
385=head2 Setting C<STDIN> and/or C<STDOUT> individually
386
387The encodings of C<STDIN> and C<STDOUT> are individually settable by parameters to
388the pragma:
389
390 use encoding 'euc-tw', STDIN => 'greek'  ...;
391
392In this case, you cannot omit the first I<ENCNAME>.  C<< STDIN => undef >>
393turns the I/O transcoding completely off for that filehandle.
394
395When C<${^UNICODE}> (available starting in v5.8.2) exists and is non-zero,
396these options will be completely ignored.  See L<perlvar/C<${^UNICODE}>> and
397L<"C<-C>" in perlrun|perlrun/-C [numberE<sol>list]> for details.
398
399=head2 The C<:locale> sub-pragma
400
401Starting in v5.8.6, the encoding name may be C<:locale>.  This means that the
402encoding is taken from the current locale, and not hard-coded by the pragma.
403Since a script really can only be encoded in exactly one encoding, this option
404is dangerous.  It makes sense only if the script itself is written in ASCII,
405and all the possible locales that will be in use when the script is executed
406are supersets of ASCII.  That means that the script itself doesn't get
407changed, but the I/O handles have the specified encoding added, and the
408operations like C<chr> and C<ord> use that encoding.
409
410The logic of finding which locale C<:locale> uses is as follows:
411
412=over 4
413
414=item 1.
415
416If the platform supports the C<langinfo(CODESET)> interface, the codeset
417returned is used as the default encoding for the open pragma.
418
419=item 2.
420
421If 1. didn't work but we are under the locale pragma, the environment
422variables C<LC_ALL> and C<LANG> (in that order) are matched for encodings
423(the part after "C<.>", if any), and if any found, that is used
424as the default encoding for the open pragma.
425
426=item 3.
427
428If 1. and 2. didn't work, the environment variables C<LC_ALL> and C<LANG>
429(in that order) are matched for anything looking like UTF-8, and if
430any found, C<:utf8> is used as the default encoding for the open
431pragma.
432
433=back
434
435If your locale environment variables (C<LC_ALL>, C<LC_CTYPE>, C<LANG>)
436contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching),
437the default encoding of your C<STDIN>, C<STDOUT>, and C<STDERR>, and of
438B<any subsequent file open>, is UTF-8.
439
440=head1 CAVEATS
441
442=head2 SIDE EFFECTS
443
444=over
445
446=item *
447
448If the C<encoding> pragma is in scope then the lengths returned are
449calculated from the length of C<$/> in Unicode characters, which is not
450always the same as the length of C<$/> in the native encoding.
451
452=item *
453
454Without this pragma, if strings operating under byte semantics and strings
455with Unicode character data are concatenated, the new string will
456be created by decoding the byte strings as I<ISO 8859-1 (Latin-1)>.
457
458The B<encoding> pragma changes this to use the specified encoding
459instead.  For example:
460
461    use encoding 'utf8';
462    my $string = chr(20000); # a Unicode string
463    utf8::encode($string);   # now it's a UTF-8 encoded byte string
464    # concatenate with another Unicode string
465    print length($string . chr(20000));
466
467Will print C<2>, because C<$string> is upgraded as UTF-8.  Without
468C<use encoding 'utf8';>, it will print C<4> instead, since C<$string>
469is three octets when interpreted as Latin-1.
470
471=back
472
473=head2 DO NOT MIX MULTIPLE ENCODINGS
474
475Notice that only literals (string or regular expression) having only
476legacy code points are affected: if you mix data like this
477
478    \x{100}\xDF
479    \xDF\x{100}
480
481the data is assumed to be in (Latin 1 and) Unicode, not in your native
482encoding.  In other words, this will match in "greek":
483
484    "\xDF" =~ /\x{3af}/
485
486but this will not
487
488    "\xDF\x{100}" =~ /\x{3af}\x{100}/
489
490since the C<\xDF> (ISO 8859-7 GREEK SMALL LETTER IOTA WITH TONOS) on
491the left will B<not> be upgraded to C<\x{3af}> (Unicode GREEK SMALL
492LETTER IOTA WITH TONOS) because of the C<\x{100}> on the left.  You
493should not be mixing your legacy data and Unicode in the same string.
494
495This pragma also affects encoding of the 0x80..0xFF code point range:
496normally characters in that range are left as eight-bit bytes (unless
497they are combined with characters with code points 0x100 or larger,
498in which case all characters need to become UTF-8 encoded), but if
499the C<encoding> pragma is present, even the 0x80..0xFF range always
500gets UTF-8 encoded.
501
502After all, the best thing about this pragma is that you don't have to
503resort to \x{....} just to spell your name in a native encoding.
504So feel free to put your strings in your encoding in quotes and
505regexes.
506
507=head2 Prior to Perl v5.22
508
509The pragma was a per script, not a per block lexical.  Only the last
510C<use encoding> or C<no encoding> mattered, and it affected
511B<the whole script>.  However, the C<no encoding> pragma was supported and
512C<use encoding> could appear as many times as you want in a given script
513(though only the last was effective).
514
515Since the scope wasn't lexical, other modules' use of C<chr>, C<ord>, I<etc.>
516were affected.  This leads to spooky, incorrect action at a distance that is
517hard to debug.
518
519This means you would have to be very careful of the load order:
520
521  # called module
522  package Module_IN_BAR;
523  use encoding "bar";
524  # stuff in "bar" encoding here
525  1;
526
527  # caller script
528  use encoding "foo"
529  use Module_IN_BAR;
530  # surprise! use encoding "bar" is in effect.
531
532The best way to avoid this oddity is to use this pragma RIGHT AFTER
533other modules are loaded.  i.e.
534
535  use Module_IN_BAR;
536  use encoding "foo";
537
538=head2 Prior to Encode version 1.87
539
540=over
541
542=item *
543
544C<STDIN> and C<STDOUT> were not set under the filter option.
545And C<< STDIN=>I<ENCODING> >> and C<< STDOUT=>I<ENCODING> >> didn't work like
546non-filter version.
547
548=item *
549
550C<use utf8> wasn't implicitly declared so you have to C<use utf8> to do
551
552 ${"\x{4eba}"}++
553
554=back
555
556=head2 Prior to Perl v5.8.1
557
558=over
559
560=item "NON-EUC" doublebyte encodings
561
562Because perl needs to parse the script before applying this pragma, such
563encodings as Shift_JIS and Big-5 that may contain C<'\'> (BACKSLASH;
564C<\x5c>) in the second byte fail because the second byte may
565accidentally escape the quoting character that follows.
566
567=item C<tr///>
568
569The B<encoding> pragma works by decoding string literals in
570C<q//,qq//,qr//,qw///, qx//> and so forth.  In perl v5.8.0, this
571does not apply to C<tr///>.  Therefore,
572
573  use encoding 'euc-jp';
574  #....
575  $kana =~ tr/\xA4\xA1-\xA4\xF3/\xA5\xA1-\xA5\xF3/;
576  #           -------- -------- -------- --------
577
578Does not work as
579
580  $kana =~ tr/\x{3041}-\x{3093}/\x{30a1}-\x{30f3}/;
581
582=over
583
584=item Legend of characters above
585
586  utf8     euc-jp   charnames::viacode()
587  -----------------------------------------
588  \x{3041} \xA4\xA1 HIRAGANA LETTER SMALL A
589  \x{3093} \xA4\xF3 HIRAGANA LETTER N
590  \x{30a1} \xA5\xA1 KATAKANA LETTER SMALL A
591  \x{30f3} \xA5\xF3 KATAKANA LETTER N
592
593=back
594
595This counterintuitive behavior has been fixed in perl v5.8.1.
596
597In perl v5.8.0, you can work around this as follows;
598
599  use encoding 'euc-jp';
600  #  ....
601  eval qq{ \$kana =~ tr/\xA4\xA1-\xA4\xF3/\xA5\xA1-\xA5\xF3/ };
602
603Note the C<tr//> expression is surrounded by C<qq{}>.  The idea behind
604this is the same as the classic idiom that makes C<tr///> 'interpolate':
605
606   tr/$from/$to/;            # wrong!
607   eval qq{ tr/$from/$to/ }; # workaround.
608
609=back
610
611=head1 EXAMPLE - Greekperl
612
613    use encoding "iso 8859-7";
614
615    # \xDF in ISO 8859-7 (Greek) is \x{3af} in Unicode.
616
617    $a = "\xDF";
618    $b = "\x{100}";
619
620    printf "%#x\n", ord($a); # will print 0x3af, not 0xdf
621
622    $c = $a . $b;
623
624    # $c will be "\x{3af}\x{100}", not "\x{df}\x{100}".
625
626    # chr() is affected, and ...
627
628    print "mega\n"  if ord(chr(0xdf)) == 0x3af;
629
630    # ... ord() is affected by the encoding pragma ...
631
632    print "tera\n" if ord(pack("C", 0xdf)) == 0x3af;
633
634    # ... as are eq and cmp ...
635
636    print "peta\n" if "\x{3af}" eq  pack("C", 0xdf);
637    print "exa\n"  if "\x{3af}" cmp pack("C", 0xdf) == 0;
638
639    # ... but pack/unpack C are not affected, in case you still
640    # want to go back to your native encoding
641
642    print "zetta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf;
643
644=head1 BUGS
645
646=over
647
648=item Thread safety
649
650C<use encoding ...> is not thread-safe (i.e., do not use in threaded
651applications).
652
653=item Can't be used by more than one module in a single program.
654
655Only one encoding is allowed.  If you combine modules in a program that have
656different encodings, only one will be actually used.
657
658=item Other modules using C<STDIN> and C<STDOUT> get the encoded stream
659
660They may be expecting something completely different.
661
662=item literals in regex that are longer than 127 bytes
663
664For native multibyte encodings (either fixed or variable length),
665the current implementation of the regular expressions may introduce
666recoding errors for regular expression literals longer than 127 bytes.
667
668=item EBCDIC
669
670The encoding pragma is not supported on EBCDIC platforms.
671
672=item C<format>
673
674This pragma doesn't work well with C<format> because PerlIO does not
675get along very well with it.  When C<format> contains non-ASCII
676characters it prints funny or gets "wide character warnings".
677To understand it, try the code below.
678
679  # Save this one in utf8
680  # replace *non-ascii* with a non-ascii string
681  my $camel;
682  format STDOUT =
683  *non-ascii*@>>>>>>>
684  $camel
685  .
686  $camel = "*non-ascii*";
687  binmode(STDOUT=>':encoding(utf8)'); # bang!
688  write;              # funny
689  print $camel, "\n"; # fine
690
691Without binmode this happens to work but without binmode, print()
692fails instead of write().
693
694At any rate, the very use of C<format> is questionable when it comes to
695unicode characters since you have to consider such things as character
696width (i.e. double-width for ideographs) and directions (i.e. BIDI for
697Arabic and Hebrew).
698
699=item See also L</CAVEATS>
700
701=back
702
703=head1 HISTORY
704
705This pragma first appeared in Perl v5.8.0.  It has been enhanced in later
706releases as specified above.
707
708=head1 SEE ALSO
709
710L<perlunicode>, L<Encode>, L<open>, L<Filter::Util::Call>,
711
712Ch. 15 of C<Programming Perl (3rd Edition)>
713by Larry Wall, Tom Christiansen, Jon Orwant;
714O'Reilly & Associates; ISBN 0-596-00027-8
715
716=cut
717