1package open; 2use warnings; 3use Carp; 4$open::hint_bits = 0x20000; 5 6our $VERSION = '1.01'; 7 8my $locale_encoding; 9 10sub in_locale { $^H & ($locale::hint_bits || 0)} 11 12sub _get_locale_encoding { 13 unless (defined $locale_encoding) { 14 # I18N::Langinfo isn't available everywhere 15 eval { 16 require I18N::Langinfo; 17 I18N::Langinfo->import(qw(langinfo CODESET)); 18 $locale_encoding = langinfo(CODESET()); 19 }; 20 my $country_language; 21 22 no warnings 'uninitialized'; 23 24 if (not $locale_encoding && in_locale()) { 25 if ($ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/) { 26 ($country_language, $locale_encoding) = ($1, $2); 27 } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) { 28 ($country_language, $locale_encoding) = ($1, $2); 29 } 30 } elsif (not $locale_encoding) { 31 if ($ENV{LC_ALL} =~ /\butf-?8\b/i || 32 $ENV{LANG} =~ /\butf-?8\b/i) { 33 $locale_encoding = 'utf8'; 34 } 35 # Could do more heuristics based on the country and language 36 # parts of LC_ALL and LANG (the parts before the dot (if any)), 37 # since we have Locale::Country and Locale::Language available. 38 # TODO: get a database of Language -> Encoding mappings 39 # (the Estonian database at http://www.eki.ee/letter/ 40 # would be excellent!) --jhi 41 } 42 if (defined $locale_encoding && 43 $locale_encoding eq 'euc' && 44 defined $country_language) { 45 if ($country_language =~ /^ja_JP|japan(?:ese)?$/i) { 46 $locale_encoding = 'euc-jp'; 47 } elsif ($country_language =~ /^ko_KR|korean?$/i) { 48 $locale_encoding = 'euc-kr'; 49 } elsif ($country_language =~ /^zh_CN|chin(?:a|ese)?$/i) { 50 $locale_encoding = 'euc-cn'; 51 } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)?$/i) { 52 $locale_encoding = 'euc-tw'; 53 } 54 croak "Locale encoding 'euc' too ambiguous" 55 if $locale_encoding eq 'euc'; 56 } 57 } 58} 59 60sub import { 61 my ($class,@args) = @_; 62 croak("`use open' needs explicit list of PerlIO layers") unless @args; 63 my $std; 64 $^H |= $open::hint_bits; 65 my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1); 66 while (@args) { 67 my $type = shift(@args); 68 my $dscp; 69 if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) { 70 $type = 'IO'; 71 $dscp = ":$1"; 72 } elsif ($type eq ':std') { 73 $std = 1; 74 next; 75 } else { 76 $dscp = shift(@args) || ''; 77 } 78 my @val; 79 foreach my $layer (split(/\s+/,$dscp)) { 80 $layer =~ s/^://; 81 if ($layer eq 'locale') { 82 use Encode; 83 _get_locale_encoding() 84 unless defined $locale_encoding; 85 (warnings::warnif("layer", "Cannot figure out an encoding to use"), last) 86 unless defined $locale_encoding; 87 if ($locale_encoding =~ /^utf-?8$/i) { 88 $layer = "utf8"; 89 } else { 90 $layer = "encoding($locale_encoding)"; 91 } 92 $std = 1; 93 } else { 94 my $target = $layer; # the layer name itself 95 $target =~ s/^(\w+)\(.+\)$/$1/; # strip parameters 96 97 unless(PerlIO::Layer::->find($target)) { 98 warnings::warnif("layer", "Unknown PerlIO layer '$layer'"); 99 } 100 } 101 push(@val,":$layer"); 102 if ($layer =~ /^(crlf|raw)$/) { 103 $^H{"open_$type"} = $layer; 104 } 105 } 106 if ($type eq 'IN') { 107 $in = join(' ',@val); 108 } 109 elsif ($type eq 'OUT') { 110 $out = join(' ',@val); 111 } 112 elsif ($type eq 'IO') { 113 $in = $out = join(' ',@val); 114 } 115 else { 116 croak "Unknown PerlIO layer class '$type'"; 117 } 118 } 119 ${^OPEN} = join("\0",$in,$out) if $in or $out; 120 if ($std) { 121 if ($in) { 122 if ($in =~ /:utf8\b/) { 123 binmode(STDIN, ":utf8"); 124 } elsif ($in =~ /(\w+\(.+\))/) { 125 binmode(STDIN, ":$1"); 126 } 127 } 128 if ($out) { 129 if ($out =~ /:utf8\b/) { 130 binmode(STDOUT, ":utf8"); 131 binmode(STDERR, ":utf8"); 132 } elsif ($out =~ /(\w+\(.+\))/) { 133 binmode(STDOUT, ":$1"); 134 binmode(STDERR, ":$1"); 135 } 136 } 137 } 138} 139 1401; 141__END__ 142 143=head1 NAME 144 145open - perl pragma to set default PerlIO layers for input and output 146 147=head1 SYNOPSIS 148 149 use open IN => ":crlf", OUT => ":bytes"; 150 use open OUT => ':utf8'; 151 use open IO => ":encoding(iso-8859-7)"; 152 153 use open IO => ':locale'; 154 155 use open ':utf8'; 156 use open ':locale'; 157 use open ':encoding(iso-8859-7)'; 158 159 use open ':std'; 160 161=head1 DESCRIPTION 162 163Full-fledged support for I/O layers is now implemented provided 164Perl is configured to use PerlIO as its IO system (which is now the 165default). 166 167The C<open> pragma serves as one of the interfaces to declare default 168"layers" (also known as "disciplines") for all I/O. Any open(), 169readpipe() (aka qx//) and similar operators found within the lexical 170scope of this pragma will use the declared defaults. 171 172With the C<IN> subpragma you can declare the default layers 173of input streams, and with the C<OUT> subpragma you can declare 174the default layers of output streams. With the C<IO> subpragma 175you can control both input and output streams simultaneously. 176 177If you have a legacy encoding, you can use the C<:encoding(...)> tag. 178 179if you want to set your encoding layers based on your 180locale environment variables, you can use the C<:locale> tag. 181For example: 182 183 $ENV{LANG} = 'ru_RU.KOI8-R'; 184 # the :locale will probe the locale environment variables like LANG 185 use open OUT => ':locale'; 186 open(O, ">koi8"); 187 print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1 188 close O; 189 open(I, "<koi8"); 190 printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1 191 close I; 192 193These are equivalent 194 195 use open ':utf8'; 196 use open IO => ':utf8'; 197 198as are these 199 200 use open ':locale'; 201 use open IO => ':locale'; 202 203and these 204 205 use open ':encoding(iso-8859-7)'; 206 use open IO => ':encoding(iso-8859-7)'; 207 208The matching of encoding names is loose: case does not matter, and 209many encodings have several aliases. See L<Encode::Supported> for 210details and the list of supported locales. 211 212Note that C<:utf8> PerlIO layer must always be specified exactly like 213that, it is not subject to the loose matching of encoding names. 214 215When open() is given an explicit list of layers they are appended to 216the list declared using this pragma. 217 218The C<:std> subpragma on its own has no effect, but if combined with 219the C<:utf8> or C<:encoding> subpragmas, it converts the standard 220filehandles (STDIN, STDOUT, STDERR) to comply with encoding selected 221for input/output handles. For example, if both input and out are 222chosen to be C<:utf8>, a C<:std> will mean that STDIN, STDOUT, and 223STDERR are also in C<:utf8>. On the other hand, if only output is 224chosen to be in C<< :encoding(koi8r) >>, a C<:std> will cause only the 225STDOUT and STDERR to be in C<koi8r>. The C<:locale> subpragma 226implicitly turns on C<:std>. 227 228The logic of C<:locale> is as follows: 229 230=over 4 231 232=item 1. 233 234If the platform supports the langinfo(CODESET) interface, the codeset 235returned is used as the default encoding for the open pragma. 236 237=item 2. 238 239If 1. didn't work but we are under the locale pragma, the environment 240variables LC_ALL and LANG (in that order) are matched for encodings 241(the part after C<.>, if any), and if any found, that is used 242as the default encoding for the open pragma. 243 244=item 3. 245 246If 1. and 2. didn't work, the environment variables LC_ALL and LANG 247(in that order) are matched for anything looking like UTF-8, and if 248any found, C<:utf8> is used as the default encoding for the open 249pragma. 250 251=back 252 253If your locale environment variables (LANGUAGE, LC_ALL, LC_CTYPE, LANG) 254contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching), 255the default encoding of your STDIN, STDOUT, and STDERR, and of 256B<any subsequent file open>, is UTF-8. 257 258Directory handles may also support PerlIO layers in the future. 259 260=head1 NONPERLIO FUNCTIONALITY 261 262If Perl is not built to use PerlIO as its IO system then only the two 263pseudo-layers C<:bytes> and C<:crlf> are available. 264 265The C<:bytes> layer corresponds to "binary mode" and the C<:crlf> 266layer corresponds to "text mode" on platforms that distinguish 267between the two modes when opening files (which is many DOS-like 268platforms, including Windows). These two layers are no-ops on 269platforms where binmode() is a no-op, but perform their functions 270everywhere if PerlIO is enabled. 271 272=head1 IMPLEMENTATION DETAILS 273 274There is a class method in C<PerlIO::Layer> C<find> which is 275implemented as XS code. It is called by C<import> to validate the 276layers: 277 278 PerlIO::Layer::->find("perlio") 279 280The return value (if defined) is a Perl object, of class 281C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As 282yet there is nothing useful you can do with the object at the perl 283level. 284 285=head1 SEE ALSO 286 287L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>, 288L<encoding> 289 290=cut 291