1*0Sstevel@tonic-gatepackage open; 2*0Sstevel@tonic-gateuse warnings; 3*0Sstevel@tonic-gateuse Carp; 4*0Sstevel@tonic-gate$open::hint_bits = 0x20000; # HINT_LOCALIZE_HH 5*0Sstevel@tonic-gate 6*0Sstevel@tonic-gateour $VERSION = '1.03'; 7*0Sstevel@tonic-gate 8*0Sstevel@tonic-gatemy $locale_encoding; 9*0Sstevel@tonic-gate 10*0Sstevel@tonic-gatesub in_locale { $^H & ($locale::hint_bits || 0)} 11*0Sstevel@tonic-gate 12*0Sstevel@tonic-gatesub _get_locale_encoding { 13*0Sstevel@tonic-gate unless (defined $locale_encoding) { 14*0Sstevel@tonic-gate # I18N::Langinfo isn't available everywhere 15*0Sstevel@tonic-gate eval { 16*0Sstevel@tonic-gate require I18N::Langinfo; 17*0Sstevel@tonic-gate I18N::Langinfo->import(qw(langinfo CODESET)); 18*0Sstevel@tonic-gate $locale_encoding = langinfo(CODESET()); 19*0Sstevel@tonic-gate }; 20*0Sstevel@tonic-gate my $country_language; 21*0Sstevel@tonic-gate 22*0Sstevel@tonic-gate no warnings 'uninitialized'; 23*0Sstevel@tonic-gate 24*0Sstevel@tonic-gate if (not $locale_encoding && in_locale()) { 25*0Sstevel@tonic-gate if ($ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/) { 26*0Sstevel@tonic-gate ($country_language, $locale_encoding) = ($1, $2); 27*0Sstevel@tonic-gate } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) { 28*0Sstevel@tonic-gate ($country_language, $locale_encoding) = ($1, $2); 29*0Sstevel@tonic-gate } 30*0Sstevel@tonic-gate # LANGUAGE affects only LC_MESSAGES only on glibc 31*0Sstevel@tonic-gate } elsif (not $locale_encoding) { 32*0Sstevel@tonic-gate if ($ENV{LC_ALL} =~ /\butf-?8\b/i || 33*0Sstevel@tonic-gate $ENV{LANG} =~ /\butf-?8\b/i) { 34*0Sstevel@tonic-gate $locale_encoding = 'utf8'; 35*0Sstevel@tonic-gate } 36*0Sstevel@tonic-gate # Could do more heuristics based on the country and language 37*0Sstevel@tonic-gate # parts of LC_ALL and LANG (the parts before the dot (if any)), 38*0Sstevel@tonic-gate # since we have Locale::Country and Locale::Language available. 39*0Sstevel@tonic-gate # TODO: get a database of Language -> Encoding mappings 40*0Sstevel@tonic-gate # (the Estonian database at http://www.eki.ee/letter/ 41*0Sstevel@tonic-gate # would be excellent!) --jhi 42*0Sstevel@tonic-gate } 43*0Sstevel@tonic-gate if (defined $locale_encoding && 44*0Sstevel@tonic-gate lc($locale_encoding) eq 'euc' && 45*0Sstevel@tonic-gate defined $country_language) { 46*0Sstevel@tonic-gate if ($country_language =~ /^ja_JP|japan(?:ese)?$/i) { 47*0Sstevel@tonic-gate $locale_encoding = 'euc-jp'; 48*0Sstevel@tonic-gate } elsif ($country_language =~ /^ko_KR|korean?$/i) { 49*0Sstevel@tonic-gate $locale_encoding = 'euc-kr'; 50*0Sstevel@tonic-gate } elsif ($country_language =~ /^zh_CN|chin(?:a|ese)?$/i) { 51*0Sstevel@tonic-gate $locale_encoding = 'euc-cn'; 52*0Sstevel@tonic-gate } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)?$/i) { 53*0Sstevel@tonic-gate $locale_encoding = 'euc-tw'; 54*0Sstevel@tonic-gate } else { 55*0Sstevel@tonic-gate croak "Locale encoding 'euc' too ambiguous"; 56*0Sstevel@tonic-gate } 57*0Sstevel@tonic-gate } 58*0Sstevel@tonic-gate } 59*0Sstevel@tonic-gate} 60*0Sstevel@tonic-gate 61*0Sstevel@tonic-gatesub import { 62*0Sstevel@tonic-gate my ($class,@args) = @_; 63*0Sstevel@tonic-gate croak("`use open' needs explicit list of PerlIO layers") unless @args; 64*0Sstevel@tonic-gate my $std; 65*0Sstevel@tonic-gate $^H |= $open::hint_bits; 66*0Sstevel@tonic-gate my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1); 67*0Sstevel@tonic-gate while (@args) { 68*0Sstevel@tonic-gate my $type = shift(@args); 69*0Sstevel@tonic-gate my $dscp; 70*0Sstevel@tonic-gate if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) { 71*0Sstevel@tonic-gate $type = 'IO'; 72*0Sstevel@tonic-gate $dscp = ":$1"; 73*0Sstevel@tonic-gate } elsif ($type eq ':std') { 74*0Sstevel@tonic-gate $std = 1; 75*0Sstevel@tonic-gate next; 76*0Sstevel@tonic-gate } else { 77*0Sstevel@tonic-gate $dscp = shift(@args) || ''; 78*0Sstevel@tonic-gate } 79*0Sstevel@tonic-gate my @val; 80*0Sstevel@tonic-gate foreach my $layer (split(/\s+/,$dscp)) { 81*0Sstevel@tonic-gate $layer =~ s/^://; 82*0Sstevel@tonic-gate if ($layer eq 'locale') { 83*0Sstevel@tonic-gate require Encode; 84*0Sstevel@tonic-gate _get_locale_encoding() 85*0Sstevel@tonic-gate unless defined $locale_encoding; 86*0Sstevel@tonic-gate (warnings::warnif("layer", "Cannot figure out an encoding to use"), last) 87*0Sstevel@tonic-gate unless defined $locale_encoding; 88*0Sstevel@tonic-gate if ($locale_encoding =~ /^utf-?8$/i) { 89*0Sstevel@tonic-gate $layer = "utf8"; 90*0Sstevel@tonic-gate } else { 91*0Sstevel@tonic-gate $layer = "encoding($locale_encoding)"; 92*0Sstevel@tonic-gate } 93*0Sstevel@tonic-gate $std = 1; 94*0Sstevel@tonic-gate } else { 95*0Sstevel@tonic-gate my $target = $layer; # the layer name itself 96*0Sstevel@tonic-gate $target =~ s/^(\w+)\(.+\)$/$1/; # strip parameters 97*0Sstevel@tonic-gate 98*0Sstevel@tonic-gate unless(PerlIO::Layer::->find($target,1)) { 99*0Sstevel@tonic-gate warnings::warnif("layer", "Unknown PerlIO layer '$target'"); 100*0Sstevel@tonic-gate } 101*0Sstevel@tonic-gate } 102*0Sstevel@tonic-gate push(@val,":$layer"); 103*0Sstevel@tonic-gate if ($layer =~ /^(crlf|raw)$/) { 104*0Sstevel@tonic-gate $^H{"open_$type"} = $layer; 105*0Sstevel@tonic-gate } 106*0Sstevel@tonic-gate } 107*0Sstevel@tonic-gate if ($type eq 'IN') { 108*0Sstevel@tonic-gate $in = join(' ',@val); 109*0Sstevel@tonic-gate } 110*0Sstevel@tonic-gate elsif ($type eq 'OUT') { 111*0Sstevel@tonic-gate $out = join(' ',@val); 112*0Sstevel@tonic-gate } 113*0Sstevel@tonic-gate elsif ($type eq 'IO') { 114*0Sstevel@tonic-gate $in = $out = join(' ',@val); 115*0Sstevel@tonic-gate } 116*0Sstevel@tonic-gate else { 117*0Sstevel@tonic-gate croak "Unknown PerlIO layer class '$type'"; 118*0Sstevel@tonic-gate } 119*0Sstevel@tonic-gate } 120*0Sstevel@tonic-gate ${^OPEN} = join("\0",$in,$out) if $in or $out; 121*0Sstevel@tonic-gate if ($std) { 122*0Sstevel@tonic-gate if ($in) { 123*0Sstevel@tonic-gate if ($in =~ /:utf8\b/) { 124*0Sstevel@tonic-gate binmode(STDIN, ":utf8"); 125*0Sstevel@tonic-gate } elsif ($in =~ /(\w+\(.+\))/) { 126*0Sstevel@tonic-gate binmode(STDIN, ":$1"); 127*0Sstevel@tonic-gate } 128*0Sstevel@tonic-gate } 129*0Sstevel@tonic-gate if ($out) { 130*0Sstevel@tonic-gate if ($out =~ /:utf8\b/) { 131*0Sstevel@tonic-gate binmode(STDOUT, ":utf8"); 132*0Sstevel@tonic-gate binmode(STDERR, ":utf8"); 133*0Sstevel@tonic-gate } elsif ($out =~ /(\w+\(.+\))/) { 134*0Sstevel@tonic-gate binmode(STDOUT, ":$1"); 135*0Sstevel@tonic-gate binmode(STDERR, ":$1"); 136*0Sstevel@tonic-gate } 137*0Sstevel@tonic-gate } 138*0Sstevel@tonic-gate } 139*0Sstevel@tonic-gate} 140*0Sstevel@tonic-gate 141*0Sstevel@tonic-gate1; 142*0Sstevel@tonic-gate__END__ 143*0Sstevel@tonic-gate 144*0Sstevel@tonic-gate=head1 NAME 145*0Sstevel@tonic-gate 146*0Sstevel@tonic-gateopen - perl pragma to set default PerlIO layers for input and output 147*0Sstevel@tonic-gate 148*0Sstevel@tonic-gate=head1 SYNOPSIS 149*0Sstevel@tonic-gate 150*0Sstevel@tonic-gate use open IN => ":crlf", OUT => ":bytes"; 151*0Sstevel@tonic-gate use open OUT => ':utf8'; 152*0Sstevel@tonic-gate use open IO => ":encoding(iso-8859-7)"; 153*0Sstevel@tonic-gate 154*0Sstevel@tonic-gate use open IO => ':locale'; 155*0Sstevel@tonic-gate 156*0Sstevel@tonic-gate use open ':utf8'; 157*0Sstevel@tonic-gate use open ':locale'; 158*0Sstevel@tonic-gate use open ':encoding(iso-8859-7)'; 159*0Sstevel@tonic-gate 160*0Sstevel@tonic-gate use open ':std'; 161*0Sstevel@tonic-gate 162*0Sstevel@tonic-gate=head1 DESCRIPTION 163*0Sstevel@tonic-gate 164*0Sstevel@tonic-gateFull-fledged support for I/O layers is now implemented provided 165*0Sstevel@tonic-gatePerl is configured to use PerlIO as its IO system (which is now the 166*0Sstevel@tonic-gatedefault). 167*0Sstevel@tonic-gate 168*0Sstevel@tonic-gateThe C<open> pragma serves as one of the interfaces to declare default 169*0Sstevel@tonic-gate"layers" (also known as "disciplines") for all I/O. Any two-argument 170*0Sstevel@tonic-gateopen(), readpipe() (aka qx//) and similar operators found within the 171*0Sstevel@tonic-gatelexical scope of this pragma will use the declared defaults. 172*0Sstevel@tonic-gateThree-argument opens are not affected by this pragma since there you 173*0Sstevel@tonic-gate(can) explicitly specify the layers and are supposed to know what you 174*0Sstevel@tonic-gateare doing. 175*0Sstevel@tonic-gate 176*0Sstevel@tonic-gateWith the C<IN> subpragma you can declare the default layers 177*0Sstevel@tonic-gateof input streams, and with the C<OUT> subpragma you can declare 178*0Sstevel@tonic-gatethe default layers of output streams. With the C<IO> subpragma 179*0Sstevel@tonic-gateyou can control both input and output streams simultaneously. 180*0Sstevel@tonic-gate 181*0Sstevel@tonic-gateIf you have a legacy encoding, you can use the C<:encoding(...)> tag. 182*0Sstevel@tonic-gate 183*0Sstevel@tonic-gateif you want to set your encoding layers based on your 184*0Sstevel@tonic-gatelocale environment variables, you can use the C<:locale> tag. 185*0Sstevel@tonic-gateFor example: 186*0Sstevel@tonic-gate 187*0Sstevel@tonic-gate $ENV{LANG} = 'ru_RU.KOI8-R'; 188*0Sstevel@tonic-gate # the :locale will probe the locale environment variables like LANG 189*0Sstevel@tonic-gate use open OUT => ':locale'; 190*0Sstevel@tonic-gate open(O, ">koi8"); 191*0Sstevel@tonic-gate print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1 192*0Sstevel@tonic-gate close O; 193*0Sstevel@tonic-gate open(I, "<koi8"); 194*0Sstevel@tonic-gate printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1 195*0Sstevel@tonic-gate close I; 196*0Sstevel@tonic-gate 197*0Sstevel@tonic-gateThese are equivalent 198*0Sstevel@tonic-gate 199*0Sstevel@tonic-gate use open ':utf8'; 200*0Sstevel@tonic-gate use open IO => ':utf8'; 201*0Sstevel@tonic-gate 202*0Sstevel@tonic-gateas are these 203*0Sstevel@tonic-gate 204*0Sstevel@tonic-gate use open ':locale'; 205*0Sstevel@tonic-gate use open IO => ':locale'; 206*0Sstevel@tonic-gate 207*0Sstevel@tonic-gateand these 208*0Sstevel@tonic-gate 209*0Sstevel@tonic-gate use open ':encoding(iso-8859-7)'; 210*0Sstevel@tonic-gate use open IO => ':encoding(iso-8859-7)'; 211*0Sstevel@tonic-gate 212*0Sstevel@tonic-gateThe matching of encoding names is loose: case does not matter, and 213*0Sstevel@tonic-gatemany encodings have several aliases. See L<Encode::Supported> for 214*0Sstevel@tonic-gatedetails and the list of supported locales. 215*0Sstevel@tonic-gate 216*0Sstevel@tonic-gateNote that C<:utf8> PerlIO layer must always be specified exactly like 217*0Sstevel@tonic-gatethat, it is not subject to the loose matching of encoding names. 218*0Sstevel@tonic-gate 219*0Sstevel@tonic-gateWhen open() is given an explicit list of layers they are appended to 220*0Sstevel@tonic-gatethe list declared using this pragma. 221*0Sstevel@tonic-gate 222*0Sstevel@tonic-gateThe C<:std> subpragma on its own has no effect, but if combined with 223*0Sstevel@tonic-gatethe C<:utf8> or C<:encoding> subpragmas, it converts the standard 224*0Sstevel@tonic-gatefilehandles (STDIN, STDOUT, STDERR) to comply with encoding selected 225*0Sstevel@tonic-gatefor input/output handles. For example, if both input and out are 226*0Sstevel@tonic-gatechosen to be C<:utf8>, a C<:std> will mean that STDIN, STDOUT, and 227*0Sstevel@tonic-gateSTDERR are also in C<:utf8>. On the other hand, if only output is 228*0Sstevel@tonic-gatechosen to be in C<< :encoding(koi8r) >>, a C<:std> will cause only the 229*0Sstevel@tonic-gateSTDOUT and STDERR to be in C<koi8r>. The C<:locale> subpragma 230*0Sstevel@tonic-gateimplicitly turns on C<:std>. 231*0Sstevel@tonic-gate 232*0Sstevel@tonic-gateThe logic of C<:locale> is as follows: 233*0Sstevel@tonic-gate 234*0Sstevel@tonic-gate=over 4 235*0Sstevel@tonic-gate 236*0Sstevel@tonic-gate=item 1. 237*0Sstevel@tonic-gate 238*0Sstevel@tonic-gateIf the platform supports the langinfo(CODESET) interface, the codeset 239*0Sstevel@tonic-gatereturned is used as the default encoding for the open pragma. 240*0Sstevel@tonic-gate 241*0Sstevel@tonic-gate=item 2. 242*0Sstevel@tonic-gate 243*0Sstevel@tonic-gateIf 1. didn't work but we are under the locale pragma, the environment 244*0Sstevel@tonic-gatevariables LC_ALL and LANG (in that order) are matched for encodings 245*0Sstevel@tonic-gate(the part after C<.>, if any), and if any found, that is used 246*0Sstevel@tonic-gateas the default encoding for the open pragma. 247*0Sstevel@tonic-gate 248*0Sstevel@tonic-gate=item 3. 249*0Sstevel@tonic-gate 250*0Sstevel@tonic-gateIf 1. and 2. didn't work, the environment variables LC_ALL and LANG 251*0Sstevel@tonic-gate(in that order) are matched for anything looking like UTF-8, and if 252*0Sstevel@tonic-gateany found, C<:utf8> is used as the default encoding for the open 253*0Sstevel@tonic-gatepragma. 254*0Sstevel@tonic-gate 255*0Sstevel@tonic-gate=back 256*0Sstevel@tonic-gate 257*0Sstevel@tonic-gateIf your locale environment variables (LC_ALL, LC_CTYPE, LANG) 258*0Sstevel@tonic-gatecontain the strings 'UTF-8' or 'UTF8' (case-insensitive matching), 259*0Sstevel@tonic-gatethe default encoding of your STDIN, STDOUT, and STDERR, and of 260*0Sstevel@tonic-gateB<any subsequent file open>, is UTF-8. 261*0Sstevel@tonic-gate 262*0Sstevel@tonic-gateDirectory handles may also support PerlIO layers in the future. 263*0Sstevel@tonic-gate 264*0Sstevel@tonic-gate=head1 NONPERLIO FUNCTIONALITY 265*0Sstevel@tonic-gate 266*0Sstevel@tonic-gateIf Perl is not built to use PerlIO as its IO system then only the two 267*0Sstevel@tonic-gatepseudo-layers C<:bytes> and C<:crlf> are available. 268*0Sstevel@tonic-gate 269*0Sstevel@tonic-gateThe C<:bytes> layer corresponds to "binary mode" and the C<:crlf> 270*0Sstevel@tonic-gatelayer corresponds to "text mode" on platforms that distinguish 271*0Sstevel@tonic-gatebetween the two modes when opening files (which is many DOS-like 272*0Sstevel@tonic-gateplatforms, including Windows). These two layers are no-ops on 273*0Sstevel@tonic-gateplatforms where binmode() is a no-op, but perform their functions 274*0Sstevel@tonic-gateeverywhere if PerlIO is enabled. 275*0Sstevel@tonic-gate 276*0Sstevel@tonic-gate=head1 IMPLEMENTATION DETAILS 277*0Sstevel@tonic-gate 278*0Sstevel@tonic-gateThere is a class method in C<PerlIO::Layer> C<find> which is 279*0Sstevel@tonic-gateimplemented as XS code. It is called by C<import> to validate the 280*0Sstevel@tonic-gatelayers: 281*0Sstevel@tonic-gate 282*0Sstevel@tonic-gate PerlIO::Layer::->find("perlio") 283*0Sstevel@tonic-gate 284*0Sstevel@tonic-gateThe return value (if defined) is a Perl object, of class 285*0Sstevel@tonic-gateC<PerlIO::Layer> which is created by the C code in F<perlio.c>. As 286*0Sstevel@tonic-gateyet there is nothing useful you can do with the object at the perl 287*0Sstevel@tonic-gatelevel. 288*0Sstevel@tonic-gate 289*0Sstevel@tonic-gate=head1 SEE ALSO 290*0Sstevel@tonic-gate 291*0Sstevel@tonic-gateL<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>, 292*0Sstevel@tonic-gateL<encoding> 293*0Sstevel@tonic-gate 294*0Sstevel@tonic-gate=cut 295