1package Locale::Maketext::Guts; 2 3$VERSION = '1.13'; 4 5BEGIN { 6 # Just so we're nice and define SOMETHING in "our" package. 7 *zorp = sub { return scalar @_ } unless defined &zorp; 8} 9 10package Locale::Maketext; 11use strict; 12use vars qw($USE_LITERALS $GUTSPATH); 13 14BEGIN { 15 $GUTSPATH = __FILE__; 16 *DEBUG = sub () {0} unless defined &DEBUG; 17} 18 19use utf8; 20 21sub _compile { 22 # This big scary routine compiles an entry. 23 # It returns either a coderef if there's brackety bits in this, or 24 # otherwise a ref to a scalar. 25 26 my $target = ref($_[0]) || $_[0]; 27 28 my(@code); 29 my(@c) = (''); # "chunks" -- scratch. 30 my $call_count = 0; 31 my $big_pile = ''; 32 { 33 my $in_group = 0; # start out outside a group 34 my($m, @params); # scratch 35 36 while($_[1] =~ # Iterate over chunks. 37 m/\G( 38 [^\~\[\]]+ # non-~[] stuff 39 | 40 ~. # ~[, ~], ~~, ~other 41 | 42 \[ # [ presumably opening a group 43 | 44 \] # ] presumably closing a group 45 | 46 ~ # terminal ~ ? 47 | 48 $ 49 )/xgs 50 ) { 51 DEBUG>2 and print qq{ "$1"\n}; 52 53 if($1 eq '[' or $1 eq '') { # "[" or end 54 # Whether this is "[" or end, force processing of any 55 # preceding literal. 56 if($in_group) { 57 if($1 eq '') { 58 $target->_die_pointing($_[1], 'Unterminated bracket group'); 59 } 60 else { 61 $target->_die_pointing($_[1], 'You can\'t nest bracket groups'); 62 } 63 } 64 else { 65 if ($1 eq '') { 66 DEBUG>2 and print " [end-string]\n"; 67 } 68 else { 69 $in_group = 1; 70 } 71 die "How come \@c is empty?? in <$_[1]>" unless @c; # sanity 72 if(length $c[-1]) { 73 # Now actually processing the preceding literal 74 $big_pile .= $c[-1]; 75 if($USE_LITERALS and ( 76 (ord('A') == 65) 77 ? $c[-1] !~ m/[^\x20-\x7E]/s 78 # ASCII very safe chars 79 : $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s 80 # EBCDIC very safe chars 81 )) { 82 # normal case -- all very safe chars 83 $c[-1] =~ s/'/\\'/g; 84 push @code, q{ '} . $c[-1] . "',\n"; 85 $c[-1] = ''; # reuse this slot 86 } 87 else { 88 push @code, ' $c[' . $#c . "],\n"; 89 push @c, ''; # new chunk 90 } 91 } 92 # else just ignore the empty string. 93 } 94 95 } 96 elsif($1 eq ']') { # "]" 97 # close group -- go back in-band 98 if($in_group) { 99 $in_group = 0; 100 101 DEBUG>2 and print " --Closing group [$c[-1]]\n"; 102 103 # And now process the group... 104 105 if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) { 106 DEBUG > 2 and print " -- (Ignoring)\n"; 107 $c[-1] = ''; # reset out chink 108 next; 109 } 110 111 #$c[-1] =~ s/^\s+//s; 112 #$c[-1] =~ s/\s+$//s; 113 ($m,@params) = split(/,/, $c[-1], -1); # was /\s*,\s*/ 114 115 # A bit of a hack -- we've turned "~,"'s into DELs, so turn 116 # 'em into real commas here. 117 if (ord('A') == 65) { # ASCII, etc 118 foreach($m, @params) { tr/\x7F/,/ } 119 } 120 else { # EBCDIC (1047, 0037, POSIX-BC) 121 # Thanks to Peter Prymmer for the EBCDIC handling 122 foreach($m, @params) { tr/\x07/,/ } 123 } 124 125 # Special-case handling of some method names: 126 if($m eq '_*' or $m =~ m/^_(-?\d+)$/s) { 127 # Treat [_1,...] as [,_1,...], etc. 128 unshift @params, $m; 129 $m = ''; 130 } 131 elsif($m eq '*') { 132 $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars" 133 } 134 elsif($m eq '#') { 135 $m = 'numf'; # "#" for "number": [#,_1] for "the number _1" 136 } 137 138 # Most common case: a simple, legal-looking method name 139 if($m eq '') { 140 # 0-length method name means to just interpolate: 141 push @code, ' ('; 142 } 143 elsif($m =~ /^\w+(?:\:\:\w+)*$/s 144 and $m !~ m/(?:^|\:)\d/s 145 # exclude starting a (sub)package or symbol with a digit 146 ) { 147 # Yes, it even supports the demented (and undocumented?) 148 # $obj->Foo::bar(...) syntax. 149 $target->_die_pointing( 150 $_[1], q{Can't use "SUPER::" in a bracket-group method}, 151 2 + length($c[-1]) 152 ) 153 if $m =~ m/^SUPER::/s; 154 # Because for SUPER:: to work, we'd have to compile this into 155 # the right package, and that seems just not worth the bother, 156 # unless someone convinces me otherwise. 157 158 push @code, ' $_[0]->' . $m . '('; 159 } 160 else { 161 # TODO: implement something? or just too icky to consider? 162 $target->_die_pointing( 163 $_[1], 164 "Can't use \"$m\" as a method name in bracket group", 165 2 + length($c[-1]) 166 ); 167 } 168 169 pop @c; # we don't need that chunk anymore 170 ++$call_count; 171 172 foreach my $p (@params) { 173 if($p eq '_*') { 174 # Meaning: all parameters except $_[0] 175 $code[-1] .= ' @_[1 .. $#_], '; 176 # and yes, that does the right thing for all @_ < 3 177 } 178 elsif($p =~ m/^_(-?\d+)$/s) { 179 # _3 meaning $_[3] 180 $code[-1] .= '$_[' . (0 + $1) . '], '; 181 } 182 elsif($USE_LITERALS and ( 183 (ord('A') == 65) 184 ? $p !~ m/[^\x20-\x7E]/s 185 # ASCII very safe chars 186 : $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s 187 # EBCDIC very safe chars 188 )) { 189 # Normal case: a literal containing only safe characters 190 $p =~ s/'/\\'/g; 191 $code[-1] .= q{'} . $p . q{', }; 192 } 193 else { 194 # Stow it on the chunk-stack, and just refer to that. 195 push @c, $p; 196 push @code, ' $c[' . $#c . '], '; 197 } 198 } 199 $code[-1] .= "),\n"; 200 201 push @c, ''; 202 } 203 else { 204 $target->_die_pointing($_[1], q{Unbalanced ']'}); 205 } 206 207 } 208 elsif(substr($1,0,1) ne '~') { 209 # it's stuff not containing "~" or "[" or "]" 210 # i.e., a literal blob 211 $c[-1] .= $1; 212 213 } 214 elsif($1 eq '~~') { # "~~" 215 $c[-1] .= '~'; 216 217 } 218 elsif($1 eq '~[') { # "~[" 219 $c[-1] .= '['; 220 221 } 222 elsif($1 eq '~]') { # "~]" 223 $c[-1] .= ']'; 224 225 } 226 elsif($1 eq '~,') { # "~," 227 if($in_group) { 228 # This is a hack, based on the assumption that no-one will actually 229 # want a DEL inside a bracket group. Let's hope that's it's true. 230 if (ord('A') == 65) { # ASCII etc 231 $c[-1] .= "\x7F"; 232 } 233 else { # EBCDIC (cp 1047, 0037, POSIX-BC) 234 $c[-1] .= "\x07"; 235 } 236 } 237 else { 238 $c[-1] .= '~,'; 239 } 240 241 } 242 elsif($1 eq '~') { # possible only at string-end, it seems. 243 $c[-1] .= '~'; 244 245 } 246 else { 247 # It's a "~X" where X is not a special character. 248 # Consider it a literal ~ and X. 249 $c[-1] .= $1; 250 } 251 } 252 } 253 254 if($call_count) { 255 undef $big_pile; # Well, nevermind that. 256 } 257 else { 258 # It's all literals! Ahwell, that can happen. 259 # So don't bother with the eval. Return a SCALAR reference. 260 return \$big_pile; 261 } 262 263 die q{Last chunk isn't null??} if @c and length $c[-1]; # sanity 264 DEBUG and warn scalar(@c), " chunks under closure\n"; 265 if(@code == 0) { # not possible? 266 DEBUG and warn "Empty code\n"; 267 return \''; 268 } 269 elsif(@code > 1) { # most cases, presumably! 270 unshift @code, "join '',\n"; 271 } 272 unshift @code, "use strict; sub {\n"; 273 push @code, "}\n"; 274 275 DEBUG and warn @code; 276 my $sub = eval(join '', @code); 277 die "$@ while evalling" . join('', @code) if $@; # Should be impossible. 278 return $sub; 279} 280 281# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 282 283sub _die_pointing { 284 # This is used by _compile to throw a fatal error 285 my $target = shift; # class name 286 # ...leaving $_[0] the error-causing text, and $_[1] the error message 287 288 my $i = index($_[0], "\n"); 289 290 my $pointy; 291 my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1; 292 if($pos < 1) { 293 $pointy = "^=== near there\n"; 294 } 295 else { # we need to space over 296 my $first_tab = index($_[0], "\t"); 297 if($pos > 2 and ( -1 == $first_tab or $first_tab > pos($_[0]))) { 298 # No tabs, or the first tab is harmlessly after where we will point to, 299 # AND we're far enough from the margin that we can draw a proper arrow. 300 $pointy = ('=' x $pos) . "^ near there\n"; 301 } 302 else { 303 # tabs screw everything up! 304 $pointy = substr($_[0],0,$pos); 305 $pointy =~ tr/\t //cd; 306 # make everything into whitespace, but preseving tabs 307 $pointy .= "^=== near there\n"; 308 } 309 } 310 311 my $errmsg = "$_[1], in\:\n$_[0]"; 312 313 if($i == -1) { 314 # No newline. 315 $errmsg .= "\n" . $pointy; 316 } 317 elsif($i == (length($_[0]) - 1) ) { 318 # Already has a newline at end. 319 $errmsg .= $pointy; 320 } 321 else { 322 # don't bother with the pointy bit, I guess. 323 } 324 Carp::croak( "$errmsg via $target, as used" ); 325} 326 3271; 328 329