1*0Sstevel@tonic-gate 2*0Sstevel@tonic-gatepackage Locale::Maketext::Guts; 3*0Sstevel@tonic-gateBEGIN { *zorp = sub { return scalar @_ } unless defined &zorp; } 4*0Sstevel@tonic-gate # Just so we're nice and define SOMETHING in "our" package. 5*0Sstevel@tonic-gate 6*0Sstevel@tonic-gatepackage Locale::Maketext; 7*0Sstevel@tonic-gateuse strict; 8*0Sstevel@tonic-gateuse vars qw($USE_LITERALS $GUTSPATH); 9*0Sstevel@tonic-gate 10*0Sstevel@tonic-gateBEGIN { 11*0Sstevel@tonic-gate $GUTSPATH = __FILE__; 12*0Sstevel@tonic-gate *DEBUG = sub () {0} unless defined &DEBUG; 13*0Sstevel@tonic-gate} 14*0Sstevel@tonic-gate 15*0Sstevel@tonic-gateuse utf8; 16*0Sstevel@tonic-gate 17*0Sstevel@tonic-gatesub _compile { 18*0Sstevel@tonic-gate # This big scary routine compiles an entry. 19*0Sstevel@tonic-gate # It returns either a coderef if there's brackety bits in this, or 20*0Sstevel@tonic-gate # otherwise a ref to a scalar. 21*0Sstevel@tonic-gate 22*0Sstevel@tonic-gate my $target = ref($_[0]) || $_[0]; 23*0Sstevel@tonic-gate 24*0Sstevel@tonic-gate my(@code); 25*0Sstevel@tonic-gate my(@c) = (''); # "chunks" -- scratch. 26*0Sstevel@tonic-gate my $call_count = 0; 27*0Sstevel@tonic-gate my $big_pile = ''; 28*0Sstevel@tonic-gate { 29*0Sstevel@tonic-gate my $in_group = 0; # start out outside a group 30*0Sstevel@tonic-gate my($m, @params); # scratch 31*0Sstevel@tonic-gate 32*0Sstevel@tonic-gate while($_[1] =~ # Iterate over chunks. 33*0Sstevel@tonic-gate m<\G( 34*0Sstevel@tonic-gate [^\~\[\]]+ # non-~[] stuff 35*0Sstevel@tonic-gate | 36*0Sstevel@tonic-gate ~. # ~[, ~], ~~, ~other 37*0Sstevel@tonic-gate | 38*0Sstevel@tonic-gate \[ # [ presumably opening a group 39*0Sstevel@tonic-gate | 40*0Sstevel@tonic-gate \] # ] presumably closing a group 41*0Sstevel@tonic-gate | 42*0Sstevel@tonic-gate ~ # terminal ~ ? 43*0Sstevel@tonic-gate | 44*0Sstevel@tonic-gate $ 45*0Sstevel@tonic-gate )>xgs 46*0Sstevel@tonic-gate ) { 47*0Sstevel@tonic-gate print " \"$1\"\n" if DEBUG > 2; 48*0Sstevel@tonic-gate 49*0Sstevel@tonic-gate if($1 eq '[' or $1 eq '') { # "[" or end 50*0Sstevel@tonic-gate # Whether this is "[" or end, force processing of any 51*0Sstevel@tonic-gate # preceding literal. 52*0Sstevel@tonic-gate if($in_group) { 53*0Sstevel@tonic-gate if($1 eq '') { 54*0Sstevel@tonic-gate $target->_die_pointing($_[1], "Unterminated bracket group"); 55*0Sstevel@tonic-gate } else { 56*0Sstevel@tonic-gate $target->_die_pointing($_[1], "You can't nest bracket groups"); 57*0Sstevel@tonic-gate } 58*0Sstevel@tonic-gate } else { 59*0Sstevel@tonic-gate if($1 eq '') { 60*0Sstevel@tonic-gate print " [end-string]\n" if DEBUG > 2; 61*0Sstevel@tonic-gate } else { 62*0Sstevel@tonic-gate $in_group = 1; 63*0Sstevel@tonic-gate } 64*0Sstevel@tonic-gate die "How come \@c is empty?? in <$_[1]>" unless @c; # sanity 65*0Sstevel@tonic-gate if(length $c[-1]) { 66*0Sstevel@tonic-gate # Now actually processing the preceding literal 67*0Sstevel@tonic-gate $big_pile .= $c[-1]; 68*0Sstevel@tonic-gate if($USE_LITERALS and ( 69*0Sstevel@tonic-gate (ord('A') == 65) 70*0Sstevel@tonic-gate ? $c[-1] !~ m<[^\x20-\x7E]>s 71*0Sstevel@tonic-gate # ASCII very safe chars 72*0Sstevel@tonic-gate : $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s 73*0Sstevel@tonic-gate # EBCDIC very safe chars 74*0Sstevel@tonic-gate )) { 75*0Sstevel@tonic-gate # normal case -- all very safe chars 76*0Sstevel@tonic-gate $c[-1] =~ s/'/\\'/g; 77*0Sstevel@tonic-gate push @code, q{ '} . $c[-1] . "',\n"; 78*0Sstevel@tonic-gate $c[-1] = ''; # reuse this slot 79*0Sstevel@tonic-gate } else { 80*0Sstevel@tonic-gate push @code, ' $c[' . $#c . "],\n"; 81*0Sstevel@tonic-gate push @c, ''; # new chunk 82*0Sstevel@tonic-gate } 83*0Sstevel@tonic-gate } 84*0Sstevel@tonic-gate # else just ignore the empty string. 85*0Sstevel@tonic-gate } 86*0Sstevel@tonic-gate 87*0Sstevel@tonic-gate } elsif($1 eq ']') { # "]" 88*0Sstevel@tonic-gate # close group -- go back in-band 89*0Sstevel@tonic-gate if($in_group) { 90*0Sstevel@tonic-gate $in_group = 0; 91*0Sstevel@tonic-gate 92*0Sstevel@tonic-gate print " --Closing group [$c[-1]]\n" if DEBUG > 2; 93*0Sstevel@tonic-gate 94*0Sstevel@tonic-gate # And now process the group... 95*0Sstevel@tonic-gate 96*0Sstevel@tonic-gate if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) { 97*0Sstevel@tonic-gate DEBUG > 2 and print " -- (Ignoring)\n"; 98*0Sstevel@tonic-gate $c[-1] = ''; # reset out chink 99*0Sstevel@tonic-gate next; 100*0Sstevel@tonic-gate } 101*0Sstevel@tonic-gate 102*0Sstevel@tonic-gate #$c[-1] =~ s/^\s+//s; 103*0Sstevel@tonic-gate #$c[-1] =~ s/\s+$//s; 104*0Sstevel@tonic-gate ($m,@params) = split(",", $c[-1], -1); # was /\s*,\s*/ 105*0Sstevel@tonic-gate 106*0Sstevel@tonic-gate # A bit of a hack -- we've turned "~,"'s into DELs, so turn 107*0Sstevel@tonic-gate # 'em into real commas here. 108*0Sstevel@tonic-gate if (ord('A') == 65) { # ASCII, etc 109*0Sstevel@tonic-gate foreach($m, @params) { tr/\x7F/,/ } 110*0Sstevel@tonic-gate } else { # EBCDIC (1047, 0037, POSIX-BC) 111*0Sstevel@tonic-gate # Thanks to Peter Prymmer for the EBCDIC handling 112*0Sstevel@tonic-gate foreach($m, @params) { tr/\x07/,/ } 113*0Sstevel@tonic-gate } 114*0Sstevel@tonic-gate 115*0Sstevel@tonic-gate # Special-case handling of some method names: 116*0Sstevel@tonic-gate if($m eq '_*' or $m =~ m<^_(-?\d+)$>s) { 117*0Sstevel@tonic-gate # Treat [_1,...] as [,_1,...], etc. 118*0Sstevel@tonic-gate unshift @params, $m; 119*0Sstevel@tonic-gate $m = ''; 120*0Sstevel@tonic-gate } elsif($m eq '*') { 121*0Sstevel@tonic-gate $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars" 122*0Sstevel@tonic-gate } elsif($m eq '#') { 123*0Sstevel@tonic-gate $m = 'numf'; # "#" for "number": [#,_1] for "the number _1" 124*0Sstevel@tonic-gate } 125*0Sstevel@tonic-gate 126*0Sstevel@tonic-gate # Most common case: a simple, legal-looking method name 127*0Sstevel@tonic-gate if($m eq '') { 128*0Sstevel@tonic-gate # 0-length method name means to just interpolate: 129*0Sstevel@tonic-gate push @code, ' ('; 130*0Sstevel@tonic-gate } elsif($m =~ m<^\w+(?:\:\:\w+)*$>s 131*0Sstevel@tonic-gate and $m !~ m<(?:^|\:)\d>s 132*0Sstevel@tonic-gate # exclude starting a (sub)package or symbol with a digit 133*0Sstevel@tonic-gate ) { 134*0Sstevel@tonic-gate # Yes, it even supports the demented (and undocumented?) 135*0Sstevel@tonic-gate # $obj->Foo::bar(...) syntax. 136*0Sstevel@tonic-gate $target->_die_pointing( 137*0Sstevel@tonic-gate $_[1], "Can't (yet?) use \"SUPER::\" in a bracket-group method", 138*0Sstevel@tonic-gate 2 + length($c[-1]) 139*0Sstevel@tonic-gate ) 140*0Sstevel@tonic-gate if $m =~ m/^SUPER::/s; 141*0Sstevel@tonic-gate # Because for SUPER:: to work, we'd have to compile this into 142*0Sstevel@tonic-gate # the right package, and that seems just not worth the bother, 143*0Sstevel@tonic-gate # unless someone convinces me otherwise. 144*0Sstevel@tonic-gate 145*0Sstevel@tonic-gate push @code, ' $_[0]->' . $m . '('; 146*0Sstevel@tonic-gate } else { 147*0Sstevel@tonic-gate # TODO: implement something? or just too icky to consider? 148*0Sstevel@tonic-gate $target->_die_pointing( 149*0Sstevel@tonic-gate $_[1], 150*0Sstevel@tonic-gate "Can't use \"$m\" as a method name in bracket group", 151*0Sstevel@tonic-gate 2 + length($c[-1]) 152*0Sstevel@tonic-gate ); 153*0Sstevel@tonic-gate } 154*0Sstevel@tonic-gate 155*0Sstevel@tonic-gate pop @c; # we don't need that chunk anymore 156*0Sstevel@tonic-gate ++$call_count; 157*0Sstevel@tonic-gate 158*0Sstevel@tonic-gate foreach my $p (@params) { 159*0Sstevel@tonic-gate if($p eq '_*') { 160*0Sstevel@tonic-gate # Meaning: all parameters except $_[0] 161*0Sstevel@tonic-gate $code[-1] .= ' @_[1 .. $#_], '; 162*0Sstevel@tonic-gate # and yes, that does the right thing for all @_ < 3 163*0Sstevel@tonic-gate } elsif($p =~ m<^_(-?\d+)$>s) { 164*0Sstevel@tonic-gate # _3 meaning $_[3] 165*0Sstevel@tonic-gate $code[-1] .= '$_[' . (0 + $1) . '], '; 166*0Sstevel@tonic-gate } elsif($USE_LITERALS and ( 167*0Sstevel@tonic-gate (ord('A') == 65) 168*0Sstevel@tonic-gate ? $p !~ m<[^\x20-\x7E]>s 169*0Sstevel@tonic-gate # ASCII very safe chars 170*0Sstevel@tonic-gate : $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s 171*0Sstevel@tonic-gate # EBCDIC very safe chars 172*0Sstevel@tonic-gate )) { 173*0Sstevel@tonic-gate # Normal case: a literal containing only safe characters 174*0Sstevel@tonic-gate $p =~ s/'/\\'/g; 175*0Sstevel@tonic-gate $code[-1] .= q{'} . $p . q{', }; 176*0Sstevel@tonic-gate } else { 177*0Sstevel@tonic-gate # Stow it on the chunk-stack, and just refer to that. 178*0Sstevel@tonic-gate push @c, $p; 179*0Sstevel@tonic-gate push @code, ' $c[' . $#c . "], "; 180*0Sstevel@tonic-gate } 181*0Sstevel@tonic-gate } 182*0Sstevel@tonic-gate $code[-1] .= "),\n"; 183*0Sstevel@tonic-gate 184*0Sstevel@tonic-gate push @c, ''; 185*0Sstevel@tonic-gate } else { 186*0Sstevel@tonic-gate $target->_die_pointing($_[1], "Unbalanced ']'"); 187*0Sstevel@tonic-gate } 188*0Sstevel@tonic-gate 189*0Sstevel@tonic-gate } elsif(substr($1,0,1) ne '~') { 190*0Sstevel@tonic-gate # it's stuff not containing "~" or "[" or "]" 191*0Sstevel@tonic-gate # i.e., a literal blob 192*0Sstevel@tonic-gate $c[-1] .= $1; 193*0Sstevel@tonic-gate 194*0Sstevel@tonic-gate } elsif($1 eq '~~') { # "~~" 195*0Sstevel@tonic-gate $c[-1] .= '~'; 196*0Sstevel@tonic-gate 197*0Sstevel@tonic-gate } elsif($1 eq '~[') { # "~[" 198*0Sstevel@tonic-gate $c[-1] .= '['; 199*0Sstevel@tonic-gate 200*0Sstevel@tonic-gate } elsif($1 eq '~]') { # "~]" 201*0Sstevel@tonic-gate $c[-1] .= ']'; 202*0Sstevel@tonic-gate 203*0Sstevel@tonic-gate } elsif($1 eq '~,') { # "~," 204*0Sstevel@tonic-gate if($in_group) { 205*0Sstevel@tonic-gate # This is a hack, based on the assumption that no-one will actually 206*0Sstevel@tonic-gate # want a DEL inside a bracket group. Let's hope that's it's true. 207*0Sstevel@tonic-gate if (ord('A') == 65) { # ASCII etc 208*0Sstevel@tonic-gate $c[-1] .= "\x7F"; 209*0Sstevel@tonic-gate } else { # EBCDIC (cp 1047, 0037, POSIX-BC) 210*0Sstevel@tonic-gate $c[-1] .= "\x07"; 211*0Sstevel@tonic-gate } 212*0Sstevel@tonic-gate } else { 213*0Sstevel@tonic-gate $c[-1] .= '~,'; 214*0Sstevel@tonic-gate } 215*0Sstevel@tonic-gate 216*0Sstevel@tonic-gate } elsif($1 eq '~') { # possible only at string-end, it seems. 217*0Sstevel@tonic-gate $c[-1] .= '~'; 218*0Sstevel@tonic-gate 219*0Sstevel@tonic-gate } else { 220*0Sstevel@tonic-gate # It's a "~X" where X is not a special character. 221*0Sstevel@tonic-gate # Consider it a literal ~ and X. 222*0Sstevel@tonic-gate $c[-1] .= $1; 223*0Sstevel@tonic-gate } 224*0Sstevel@tonic-gate } 225*0Sstevel@tonic-gate } 226*0Sstevel@tonic-gate 227*0Sstevel@tonic-gate if($call_count) { 228*0Sstevel@tonic-gate undef $big_pile; # Well, nevermind that. 229*0Sstevel@tonic-gate } else { 230*0Sstevel@tonic-gate # It's all literals! Ahwell, that can happen. 231*0Sstevel@tonic-gate # So don't bother with the eval. Return a SCALAR reference. 232*0Sstevel@tonic-gate return \$big_pile; 233*0Sstevel@tonic-gate } 234*0Sstevel@tonic-gate 235*0Sstevel@tonic-gate die "Last chunk isn't null??" if @c and length $c[-1]; # sanity 236*0Sstevel@tonic-gate print scalar(@c), " chunks under closure\n" if DEBUG; 237*0Sstevel@tonic-gate if(@code == 0) { # not possible? 238*0Sstevel@tonic-gate print "Empty code\n" if DEBUG; 239*0Sstevel@tonic-gate return \''; 240*0Sstevel@tonic-gate } elsif(@code > 1) { # most cases, presumably! 241*0Sstevel@tonic-gate unshift @code, "join '',\n"; 242*0Sstevel@tonic-gate } 243*0Sstevel@tonic-gate unshift @code, "use strict; sub {\n"; 244*0Sstevel@tonic-gate push @code, "}\n"; 245*0Sstevel@tonic-gate 246*0Sstevel@tonic-gate print @code if DEBUG; 247*0Sstevel@tonic-gate my $sub = eval(join '', @code); 248*0Sstevel@tonic-gate die "$@ while evalling" . join('', @code) if $@; # Should be impossible. 249*0Sstevel@tonic-gate return $sub; 250*0Sstevel@tonic-gate} 251*0Sstevel@tonic-gate 252*0Sstevel@tonic-gate# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 253*0Sstevel@tonic-gate 254*0Sstevel@tonic-gatesub _die_pointing { 255*0Sstevel@tonic-gate # This is used by _compile to throw a fatal error 256*0Sstevel@tonic-gate my $target = shift; # class name 257*0Sstevel@tonic-gate # ...leaving $_[0] the error-causing text, and $_[1] the error message 258*0Sstevel@tonic-gate 259*0Sstevel@tonic-gate my $i = index($_[0], "\n"); 260*0Sstevel@tonic-gate 261*0Sstevel@tonic-gate my $pointy; 262*0Sstevel@tonic-gate my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1; 263*0Sstevel@tonic-gate if($pos < 1) { 264*0Sstevel@tonic-gate $pointy = "^=== near there\n"; 265*0Sstevel@tonic-gate } else { # we need to space over 266*0Sstevel@tonic-gate my $first_tab = index($_[0], "\t"); 267*0Sstevel@tonic-gate if($pos > 2 and ( -1 == $first_tab or $first_tab > pos($_[0]))) { 268*0Sstevel@tonic-gate # No tabs, or the first tab is harmlessly after where we will point to, 269*0Sstevel@tonic-gate # AND we're far enough from the margin that we can draw a proper arrow. 270*0Sstevel@tonic-gate $pointy = ('=' x $pos) . "^ near there\n"; 271*0Sstevel@tonic-gate } else { 272*0Sstevel@tonic-gate # tabs screw everything up! 273*0Sstevel@tonic-gate $pointy = substr($_[0],0,$pos); 274*0Sstevel@tonic-gate $pointy =~ tr/\t //cd; 275*0Sstevel@tonic-gate # make everything into whitespace, but preseving tabs 276*0Sstevel@tonic-gate $pointy .= "^=== near there\n"; 277*0Sstevel@tonic-gate } 278*0Sstevel@tonic-gate } 279*0Sstevel@tonic-gate 280*0Sstevel@tonic-gate my $errmsg = "$_[1], in\:\n$_[0]"; 281*0Sstevel@tonic-gate 282*0Sstevel@tonic-gate if($i == -1) { 283*0Sstevel@tonic-gate # No newline. 284*0Sstevel@tonic-gate $errmsg .= "\n" . $pointy; 285*0Sstevel@tonic-gate } elsif($i == (length($_[0]) - 1) ) { 286*0Sstevel@tonic-gate # Already has a newline at end. 287*0Sstevel@tonic-gate $errmsg .= $pointy; 288*0Sstevel@tonic-gate } else { 289*0Sstevel@tonic-gate # don't bother with the pointy bit, I guess. 290*0Sstevel@tonic-gate } 291*0Sstevel@tonic-gate Carp::croak( "$errmsg via $target, as used" ); 292*0Sstevel@tonic-gate} 293*0Sstevel@tonic-gate 294*0Sstevel@tonic-gate1; 295*0Sstevel@tonic-gate 296