xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/lib/open.pm (revision 0:68f95e015346)
1*0Sstevel@tonic-gatepackage open;
2*0Sstevel@tonic-gateuse warnings;
3*0Sstevel@tonic-gateuse Carp;
4*0Sstevel@tonic-gate$open::hint_bits = 0x20000; # HINT_LOCALIZE_HH
5*0Sstevel@tonic-gate
6*0Sstevel@tonic-gateour $VERSION = '1.03';
7*0Sstevel@tonic-gate
8*0Sstevel@tonic-gatemy $locale_encoding;
9*0Sstevel@tonic-gate
10*0Sstevel@tonic-gatesub in_locale { $^H & ($locale::hint_bits || 0)}
11*0Sstevel@tonic-gate
12*0Sstevel@tonic-gatesub _get_locale_encoding {
13*0Sstevel@tonic-gate    unless (defined $locale_encoding) {
14*0Sstevel@tonic-gate	# I18N::Langinfo isn't available everywhere
15*0Sstevel@tonic-gate	eval {
16*0Sstevel@tonic-gate	    require I18N::Langinfo;
17*0Sstevel@tonic-gate	    I18N::Langinfo->import(qw(langinfo CODESET));
18*0Sstevel@tonic-gate	    $locale_encoding = langinfo(CODESET());
19*0Sstevel@tonic-gate	};
20*0Sstevel@tonic-gate	my $country_language;
21*0Sstevel@tonic-gate
22*0Sstevel@tonic-gate	no warnings 'uninitialized';
23*0Sstevel@tonic-gate
24*0Sstevel@tonic-gate        if (not $locale_encoding && in_locale()) {
25*0Sstevel@tonic-gate	    if ($ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/) {
26*0Sstevel@tonic-gate		($country_language, $locale_encoding) = ($1, $2);
27*0Sstevel@tonic-gate	    } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) {
28*0Sstevel@tonic-gate		($country_language, $locale_encoding) = ($1, $2);
29*0Sstevel@tonic-gate	    }
30*0Sstevel@tonic-gate	    # LANGUAGE affects only LC_MESSAGES only on glibc
31*0Sstevel@tonic-gate	} elsif (not $locale_encoding) {
32*0Sstevel@tonic-gate	    if ($ENV{LC_ALL} =~ /\butf-?8\b/i ||
33*0Sstevel@tonic-gate		$ENV{LANG}   =~ /\butf-?8\b/i) {
34*0Sstevel@tonic-gate		$locale_encoding = 'utf8';
35*0Sstevel@tonic-gate	    }
36*0Sstevel@tonic-gate	    # Could do more heuristics based on the country and language
37*0Sstevel@tonic-gate	    # parts of LC_ALL and LANG (the parts before the dot (if any)),
38*0Sstevel@tonic-gate	    # since we have Locale::Country and Locale::Language available.
39*0Sstevel@tonic-gate	    # TODO: get a database of Language -> Encoding mappings
40*0Sstevel@tonic-gate	    # (the Estonian database at http://www.eki.ee/letter/
41*0Sstevel@tonic-gate	    # would be excellent!) --jhi
42*0Sstevel@tonic-gate	}
43*0Sstevel@tonic-gate	if (defined $locale_encoding &&
44*0Sstevel@tonic-gate	    lc($locale_encoding) eq 'euc' &&
45*0Sstevel@tonic-gate	    defined $country_language) {
46*0Sstevel@tonic-gate	    if ($country_language =~ /^ja_JP|japan(?:ese)?$/i) {
47*0Sstevel@tonic-gate		$locale_encoding = 'euc-jp';
48*0Sstevel@tonic-gate	    } elsif ($country_language =~ /^ko_KR|korean?$/i) {
49*0Sstevel@tonic-gate		$locale_encoding = 'euc-kr';
50*0Sstevel@tonic-gate	    } elsif ($country_language =~ /^zh_CN|chin(?:a|ese)?$/i) {
51*0Sstevel@tonic-gate		$locale_encoding = 'euc-cn';
52*0Sstevel@tonic-gate	    } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)?$/i) {
53*0Sstevel@tonic-gate		$locale_encoding = 'euc-tw';
54*0Sstevel@tonic-gate	    } else {
55*0Sstevel@tonic-gate		croak "Locale encoding 'euc' too ambiguous";
56*0Sstevel@tonic-gate	    }
57*0Sstevel@tonic-gate	}
58*0Sstevel@tonic-gate    }
59*0Sstevel@tonic-gate}
60*0Sstevel@tonic-gate
61*0Sstevel@tonic-gatesub import {
62*0Sstevel@tonic-gate    my ($class,@args) = @_;
63*0Sstevel@tonic-gate    croak("`use open' needs explicit list of PerlIO layers") unless @args;
64*0Sstevel@tonic-gate    my $std;
65*0Sstevel@tonic-gate    $^H |= $open::hint_bits;
66*0Sstevel@tonic-gate    my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1);
67*0Sstevel@tonic-gate    while (@args) {
68*0Sstevel@tonic-gate	my $type = shift(@args);
69*0Sstevel@tonic-gate	my $dscp;
70*0Sstevel@tonic-gate	if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) {
71*0Sstevel@tonic-gate	    $type = 'IO';
72*0Sstevel@tonic-gate	    $dscp = ":$1";
73*0Sstevel@tonic-gate	} elsif ($type eq ':std') {
74*0Sstevel@tonic-gate	    $std = 1;
75*0Sstevel@tonic-gate	    next;
76*0Sstevel@tonic-gate	} else {
77*0Sstevel@tonic-gate	    $dscp = shift(@args) || '';
78*0Sstevel@tonic-gate	}
79*0Sstevel@tonic-gate	my @val;
80*0Sstevel@tonic-gate	foreach my $layer (split(/\s+/,$dscp)) {
81*0Sstevel@tonic-gate            $layer =~ s/^://;
82*0Sstevel@tonic-gate	    if ($layer eq 'locale') {
83*0Sstevel@tonic-gate		require Encode;
84*0Sstevel@tonic-gate		_get_locale_encoding()
85*0Sstevel@tonic-gate		    unless defined $locale_encoding;
86*0Sstevel@tonic-gate		(warnings::warnif("layer", "Cannot figure out an encoding to use"), last)
87*0Sstevel@tonic-gate		    unless defined $locale_encoding;
88*0Sstevel@tonic-gate		if ($locale_encoding =~ /^utf-?8$/i) {
89*0Sstevel@tonic-gate		    $layer = "utf8";
90*0Sstevel@tonic-gate		} else {
91*0Sstevel@tonic-gate		    $layer = "encoding($locale_encoding)";
92*0Sstevel@tonic-gate		}
93*0Sstevel@tonic-gate		$std = 1;
94*0Sstevel@tonic-gate	    } else {
95*0Sstevel@tonic-gate		my $target = $layer;		# the layer name itself
96*0Sstevel@tonic-gate		$target =~ s/^(\w+)\(.+\)$/$1/;	# strip parameters
97*0Sstevel@tonic-gate
98*0Sstevel@tonic-gate		unless(PerlIO::Layer::->find($target,1)) {
99*0Sstevel@tonic-gate		    warnings::warnif("layer", "Unknown PerlIO layer '$target'");
100*0Sstevel@tonic-gate		}
101*0Sstevel@tonic-gate	    }
102*0Sstevel@tonic-gate	    push(@val,":$layer");
103*0Sstevel@tonic-gate	    if ($layer =~ /^(crlf|raw)$/) {
104*0Sstevel@tonic-gate		$^H{"open_$type"} = $layer;
105*0Sstevel@tonic-gate	    }
106*0Sstevel@tonic-gate	}
107*0Sstevel@tonic-gate	if ($type eq 'IN') {
108*0Sstevel@tonic-gate	    $in  = join(' ',@val);
109*0Sstevel@tonic-gate	}
110*0Sstevel@tonic-gate	elsif ($type eq 'OUT') {
111*0Sstevel@tonic-gate	    $out = join(' ',@val);
112*0Sstevel@tonic-gate	}
113*0Sstevel@tonic-gate	elsif ($type eq 'IO') {
114*0Sstevel@tonic-gate	    $in = $out = join(' ',@val);
115*0Sstevel@tonic-gate	}
116*0Sstevel@tonic-gate	else {
117*0Sstevel@tonic-gate	    croak "Unknown PerlIO layer class '$type'";
118*0Sstevel@tonic-gate	}
119*0Sstevel@tonic-gate    }
120*0Sstevel@tonic-gate    ${^OPEN} = join("\0",$in,$out) if $in or $out;
121*0Sstevel@tonic-gate    if ($std) {
122*0Sstevel@tonic-gate	if ($in) {
123*0Sstevel@tonic-gate	    if ($in =~ /:utf8\b/) {
124*0Sstevel@tonic-gate		    binmode(STDIN,  ":utf8");
125*0Sstevel@tonic-gate		} elsif ($in =~ /(\w+\(.+\))/) {
126*0Sstevel@tonic-gate		    binmode(STDIN,  ":$1");
127*0Sstevel@tonic-gate		}
128*0Sstevel@tonic-gate	}
129*0Sstevel@tonic-gate	if ($out) {
130*0Sstevel@tonic-gate	    if ($out =~ /:utf8\b/) {
131*0Sstevel@tonic-gate		binmode(STDOUT,  ":utf8");
132*0Sstevel@tonic-gate		binmode(STDERR,  ":utf8");
133*0Sstevel@tonic-gate	    } elsif ($out =~ /(\w+\(.+\))/) {
134*0Sstevel@tonic-gate		binmode(STDOUT,  ":$1");
135*0Sstevel@tonic-gate		binmode(STDERR,  ":$1");
136*0Sstevel@tonic-gate	    }
137*0Sstevel@tonic-gate	}
138*0Sstevel@tonic-gate    }
139*0Sstevel@tonic-gate}
140*0Sstevel@tonic-gate
141*0Sstevel@tonic-gate1;
142*0Sstevel@tonic-gate__END__
143*0Sstevel@tonic-gate
144*0Sstevel@tonic-gate=head1 NAME
145*0Sstevel@tonic-gate
146*0Sstevel@tonic-gateopen - perl pragma to set default PerlIO layers for input and output
147*0Sstevel@tonic-gate
148*0Sstevel@tonic-gate=head1 SYNOPSIS
149*0Sstevel@tonic-gate
150*0Sstevel@tonic-gate    use open IN  => ":crlf", OUT => ":bytes";
151*0Sstevel@tonic-gate    use open OUT => ':utf8';
152*0Sstevel@tonic-gate    use open IO  => ":encoding(iso-8859-7)";
153*0Sstevel@tonic-gate
154*0Sstevel@tonic-gate    use open IO  => ':locale';
155*0Sstevel@tonic-gate
156*0Sstevel@tonic-gate    use open ':utf8';
157*0Sstevel@tonic-gate    use open ':locale';
158*0Sstevel@tonic-gate    use open ':encoding(iso-8859-7)';
159*0Sstevel@tonic-gate
160*0Sstevel@tonic-gate    use open ':std';
161*0Sstevel@tonic-gate
162*0Sstevel@tonic-gate=head1 DESCRIPTION
163*0Sstevel@tonic-gate
164*0Sstevel@tonic-gateFull-fledged support for I/O layers is now implemented provided
165*0Sstevel@tonic-gatePerl is configured to use PerlIO as its IO system (which is now the
166*0Sstevel@tonic-gatedefault).
167*0Sstevel@tonic-gate
168*0Sstevel@tonic-gateThe C<open> pragma serves as one of the interfaces to declare default
169*0Sstevel@tonic-gate"layers" (also known as "disciplines") for all I/O. Any two-argument
170*0Sstevel@tonic-gateopen(), readpipe() (aka qx//) and similar operators found within the
171*0Sstevel@tonic-gatelexical scope of this pragma will use the declared defaults.
172*0Sstevel@tonic-gateThree-argument opens are not affected by this pragma since there you
173*0Sstevel@tonic-gate(can) explicitly specify the layers and are supposed to know what you
174*0Sstevel@tonic-gateare doing.
175*0Sstevel@tonic-gate
176*0Sstevel@tonic-gateWith the C<IN> subpragma you can declare the default layers
177*0Sstevel@tonic-gateof input streams, and with the C<OUT> subpragma you can declare
178*0Sstevel@tonic-gatethe default layers of output streams.  With the C<IO>  subpragma
179*0Sstevel@tonic-gateyou can control both input and output streams simultaneously.
180*0Sstevel@tonic-gate
181*0Sstevel@tonic-gateIf you have a legacy encoding, you can use the C<:encoding(...)> tag.
182*0Sstevel@tonic-gate
183*0Sstevel@tonic-gateif you want to set your encoding layers based on your
184*0Sstevel@tonic-gatelocale environment variables, you can use the C<:locale> tag.
185*0Sstevel@tonic-gateFor example:
186*0Sstevel@tonic-gate
187*0Sstevel@tonic-gate    $ENV{LANG} = 'ru_RU.KOI8-R';
188*0Sstevel@tonic-gate    # the :locale will probe the locale environment variables like LANG
189*0Sstevel@tonic-gate    use open OUT => ':locale';
190*0Sstevel@tonic-gate    open(O, ">koi8");
191*0Sstevel@tonic-gate    print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1
192*0Sstevel@tonic-gate    close O;
193*0Sstevel@tonic-gate    open(I, "<koi8");
194*0Sstevel@tonic-gate    printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1
195*0Sstevel@tonic-gate    close I;
196*0Sstevel@tonic-gate
197*0Sstevel@tonic-gateThese are equivalent
198*0Sstevel@tonic-gate
199*0Sstevel@tonic-gate    use open ':utf8';
200*0Sstevel@tonic-gate    use open IO => ':utf8';
201*0Sstevel@tonic-gate
202*0Sstevel@tonic-gateas are these
203*0Sstevel@tonic-gate
204*0Sstevel@tonic-gate    use open ':locale';
205*0Sstevel@tonic-gate    use open IO => ':locale';
206*0Sstevel@tonic-gate
207*0Sstevel@tonic-gateand these
208*0Sstevel@tonic-gate
209*0Sstevel@tonic-gate    use open ':encoding(iso-8859-7)';
210*0Sstevel@tonic-gate    use open IO => ':encoding(iso-8859-7)';
211*0Sstevel@tonic-gate
212*0Sstevel@tonic-gateThe matching of encoding names is loose: case does not matter, and
213*0Sstevel@tonic-gatemany encodings have several aliases.  See L<Encode::Supported> for
214*0Sstevel@tonic-gatedetails and the list of supported locales.
215*0Sstevel@tonic-gate
216*0Sstevel@tonic-gateNote that C<:utf8> PerlIO layer must always be specified exactly like
217*0Sstevel@tonic-gatethat, it is not subject to the loose matching of encoding names.
218*0Sstevel@tonic-gate
219*0Sstevel@tonic-gateWhen open() is given an explicit list of layers they are appended to
220*0Sstevel@tonic-gatethe list declared using this pragma.
221*0Sstevel@tonic-gate
222*0Sstevel@tonic-gateThe C<:std> subpragma on its own has no effect, but if combined with
223*0Sstevel@tonic-gatethe C<:utf8> or C<:encoding> subpragmas, it converts the standard
224*0Sstevel@tonic-gatefilehandles (STDIN, STDOUT, STDERR) to comply with encoding selected
225*0Sstevel@tonic-gatefor input/output handles.  For example, if both input and out are
226*0Sstevel@tonic-gatechosen to be C<:utf8>, a C<:std> will mean that STDIN, STDOUT, and
227*0Sstevel@tonic-gateSTDERR are also in C<:utf8>.  On the other hand, if only output is
228*0Sstevel@tonic-gatechosen to be in C<< :encoding(koi8r) >>, a C<:std> will cause only the
229*0Sstevel@tonic-gateSTDOUT and STDERR to be in C<koi8r>.  The C<:locale> subpragma
230*0Sstevel@tonic-gateimplicitly turns on C<:std>.
231*0Sstevel@tonic-gate
232*0Sstevel@tonic-gateThe logic of C<:locale> is as follows:
233*0Sstevel@tonic-gate
234*0Sstevel@tonic-gate=over 4
235*0Sstevel@tonic-gate
236*0Sstevel@tonic-gate=item 1.
237*0Sstevel@tonic-gate
238*0Sstevel@tonic-gateIf the platform supports the langinfo(CODESET) interface, the codeset
239*0Sstevel@tonic-gatereturned is used as the default encoding for the open pragma.
240*0Sstevel@tonic-gate
241*0Sstevel@tonic-gate=item 2.
242*0Sstevel@tonic-gate
243*0Sstevel@tonic-gateIf 1. didn't work but we are under the locale pragma, the environment
244*0Sstevel@tonic-gatevariables LC_ALL and LANG (in that order) are matched for encodings
245*0Sstevel@tonic-gate(the part after C<.>, if any), and if any found, that is used
246*0Sstevel@tonic-gateas the default encoding for the open pragma.
247*0Sstevel@tonic-gate
248*0Sstevel@tonic-gate=item 3.
249*0Sstevel@tonic-gate
250*0Sstevel@tonic-gateIf 1. and 2. didn't work, the environment variables LC_ALL and LANG
251*0Sstevel@tonic-gate(in that order) are matched for anything looking like UTF-8, and if
252*0Sstevel@tonic-gateany found, C<:utf8> is used as the default encoding for the open
253*0Sstevel@tonic-gatepragma.
254*0Sstevel@tonic-gate
255*0Sstevel@tonic-gate=back
256*0Sstevel@tonic-gate
257*0Sstevel@tonic-gateIf your locale environment variables (LC_ALL, LC_CTYPE, LANG)
258*0Sstevel@tonic-gatecontain the strings 'UTF-8' or 'UTF8' (case-insensitive matching),
259*0Sstevel@tonic-gatethe default encoding of your STDIN, STDOUT, and STDERR, and of
260*0Sstevel@tonic-gateB<any subsequent file open>, is UTF-8.
261*0Sstevel@tonic-gate
262*0Sstevel@tonic-gateDirectory handles may also support PerlIO layers in the future.
263*0Sstevel@tonic-gate
264*0Sstevel@tonic-gate=head1 NONPERLIO FUNCTIONALITY
265*0Sstevel@tonic-gate
266*0Sstevel@tonic-gateIf Perl is not built to use PerlIO as its IO system then only the two
267*0Sstevel@tonic-gatepseudo-layers C<:bytes> and C<:crlf> are available.
268*0Sstevel@tonic-gate
269*0Sstevel@tonic-gateThe C<:bytes> layer corresponds to "binary mode" and the C<:crlf>
270*0Sstevel@tonic-gatelayer corresponds to "text mode" on platforms that distinguish
271*0Sstevel@tonic-gatebetween the two modes when opening files (which is many DOS-like
272*0Sstevel@tonic-gateplatforms, including Windows).  These two layers are no-ops on
273*0Sstevel@tonic-gateplatforms where binmode() is a no-op, but perform their functions
274*0Sstevel@tonic-gateeverywhere if PerlIO is enabled.
275*0Sstevel@tonic-gate
276*0Sstevel@tonic-gate=head1 IMPLEMENTATION DETAILS
277*0Sstevel@tonic-gate
278*0Sstevel@tonic-gateThere is a class method in C<PerlIO::Layer> C<find> which is
279*0Sstevel@tonic-gateimplemented as XS code.  It is called by C<import> to validate the
280*0Sstevel@tonic-gatelayers:
281*0Sstevel@tonic-gate
282*0Sstevel@tonic-gate   PerlIO::Layer::->find("perlio")
283*0Sstevel@tonic-gate
284*0Sstevel@tonic-gateThe return value (if defined) is a Perl object, of class
285*0Sstevel@tonic-gateC<PerlIO::Layer> which is created by the C code in F<perlio.c>.  As
286*0Sstevel@tonic-gateyet there is nothing useful you can do with the object at the perl
287*0Sstevel@tonic-gatelevel.
288*0Sstevel@tonic-gate
289*0Sstevel@tonic-gate=head1 SEE ALSO
290*0Sstevel@tonic-gate
291*0Sstevel@tonic-gateL<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>,
292*0Sstevel@tonic-gateL<encoding>
293*0Sstevel@tonic-gate
294*0Sstevel@tonic-gate=cut
295