1*0Sstevel@tonic-gate
2*0Sstevel@tonic-gatepackage Locale::Maketext::Guts;
3*0Sstevel@tonic-gateBEGIN { *zorp = sub { return scalar @_ } unless defined &zorp; }
4*0Sstevel@tonic-gate # Just so we're nice and define SOMETHING in "our" package.
5*0Sstevel@tonic-gate
6*0Sstevel@tonic-gatepackage Locale::Maketext;
7*0Sstevel@tonic-gateuse strict;
8*0Sstevel@tonic-gateuse vars qw($USE_LITERALS $GUTSPATH);
9*0Sstevel@tonic-gate
10*0Sstevel@tonic-gateBEGIN {
11*0Sstevel@tonic-gate  $GUTSPATH = __FILE__;
12*0Sstevel@tonic-gate  *DEBUG = sub () {0} unless defined &DEBUG;
13*0Sstevel@tonic-gate}
14*0Sstevel@tonic-gate
15*0Sstevel@tonic-gateuse utf8;
16*0Sstevel@tonic-gate
17*0Sstevel@tonic-gatesub _compile {
18*0Sstevel@tonic-gate  # This big scary routine compiles an entry.
19*0Sstevel@tonic-gate  # It returns either a coderef if there's brackety bits in this, or
20*0Sstevel@tonic-gate  #  otherwise a ref to a scalar.
21*0Sstevel@tonic-gate
22*0Sstevel@tonic-gate  my $target = ref($_[0]) || $_[0];
23*0Sstevel@tonic-gate
24*0Sstevel@tonic-gate  my(@code);
25*0Sstevel@tonic-gate  my(@c) = (''); # "chunks" -- scratch.
26*0Sstevel@tonic-gate  my $call_count = 0;
27*0Sstevel@tonic-gate  my $big_pile = '';
28*0Sstevel@tonic-gate  {
29*0Sstevel@tonic-gate    my $in_group = 0; # start out outside a group
30*0Sstevel@tonic-gate    my($m, @params); # scratch
31*0Sstevel@tonic-gate
32*0Sstevel@tonic-gate    while($_[1] =~  # Iterate over chunks.
33*0Sstevel@tonic-gate     m<\G(
34*0Sstevel@tonic-gate       [^\~\[\]]+  # non-~[] stuff
35*0Sstevel@tonic-gate       |
36*0Sstevel@tonic-gate       ~.       # ~[, ~], ~~, ~other
37*0Sstevel@tonic-gate       |
38*0Sstevel@tonic-gate       \[          # [ presumably opening a group
39*0Sstevel@tonic-gate       |
40*0Sstevel@tonic-gate       \]          # ] presumably closing a group
41*0Sstevel@tonic-gate       |
42*0Sstevel@tonic-gate       ~           # terminal ~ ?
43*0Sstevel@tonic-gate       |
44*0Sstevel@tonic-gate       $
45*0Sstevel@tonic-gate     )>xgs
46*0Sstevel@tonic-gate    ) {
47*0Sstevel@tonic-gate      print "  \"$1\"\n" if DEBUG > 2;
48*0Sstevel@tonic-gate
49*0Sstevel@tonic-gate      if($1 eq '[' or $1 eq '') {       # "[" or end
50*0Sstevel@tonic-gate        # Whether this is "[" or end, force processing of any
51*0Sstevel@tonic-gate        #  preceding literal.
52*0Sstevel@tonic-gate        if($in_group) {
53*0Sstevel@tonic-gate          if($1 eq '') {
54*0Sstevel@tonic-gate            $target->_die_pointing($_[1], "Unterminated bracket group");
55*0Sstevel@tonic-gate          } else {
56*0Sstevel@tonic-gate            $target->_die_pointing($_[1], "You can't nest bracket groups");
57*0Sstevel@tonic-gate          }
58*0Sstevel@tonic-gate        } else {
59*0Sstevel@tonic-gate          if($1 eq '') {
60*0Sstevel@tonic-gate            print "   [end-string]\n" if DEBUG > 2;
61*0Sstevel@tonic-gate          } else {
62*0Sstevel@tonic-gate            $in_group = 1;
63*0Sstevel@tonic-gate          }
64*0Sstevel@tonic-gate          die "How come \@c is empty?? in <$_[1]>" unless @c; # sanity
65*0Sstevel@tonic-gate          if(length $c[-1]) {
66*0Sstevel@tonic-gate            # Now actually processing the preceding literal
67*0Sstevel@tonic-gate            $big_pile .= $c[-1];
68*0Sstevel@tonic-gate            if($USE_LITERALS and (
69*0Sstevel@tonic-gate              (ord('A') == 65)
70*0Sstevel@tonic-gate               ? $c[-1] !~ m<[^\x20-\x7E]>s
71*0Sstevel@tonic-gate                  # ASCII very safe chars
72*0Sstevel@tonic-gate               : $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
73*0Sstevel@tonic-gate                  # EBCDIC very safe chars
74*0Sstevel@tonic-gate            )) {
75*0Sstevel@tonic-gate              # normal case -- all very safe chars
76*0Sstevel@tonic-gate              $c[-1] =~ s/'/\\'/g;
77*0Sstevel@tonic-gate              push @code, q{ '} . $c[-1] . "',\n";
78*0Sstevel@tonic-gate              $c[-1] = ''; # reuse this slot
79*0Sstevel@tonic-gate            } else {
80*0Sstevel@tonic-gate              push @code, ' $c[' . $#c . "],\n";
81*0Sstevel@tonic-gate              push @c, ''; # new chunk
82*0Sstevel@tonic-gate            }
83*0Sstevel@tonic-gate          }
84*0Sstevel@tonic-gate           # else just ignore the empty string.
85*0Sstevel@tonic-gate        }
86*0Sstevel@tonic-gate
87*0Sstevel@tonic-gate      } elsif($1 eq ']') {  # "]"
88*0Sstevel@tonic-gate        # close group -- go back in-band
89*0Sstevel@tonic-gate        if($in_group) {
90*0Sstevel@tonic-gate          $in_group = 0;
91*0Sstevel@tonic-gate
92*0Sstevel@tonic-gate          print "   --Closing group [$c[-1]]\n" if DEBUG > 2;
93*0Sstevel@tonic-gate
94*0Sstevel@tonic-gate          # And now process the group...
95*0Sstevel@tonic-gate
96*0Sstevel@tonic-gate          if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) {
97*0Sstevel@tonic-gate            DEBUG > 2 and print "   -- (Ignoring)\n";
98*0Sstevel@tonic-gate            $c[-1] = ''; # reset out chink
99*0Sstevel@tonic-gate            next;
100*0Sstevel@tonic-gate          }
101*0Sstevel@tonic-gate
102*0Sstevel@tonic-gate           #$c[-1] =~ s/^\s+//s;
103*0Sstevel@tonic-gate           #$c[-1] =~ s/\s+$//s;
104*0Sstevel@tonic-gate          ($m,@params) = split(",", $c[-1], -1);  # was /\s*,\s*/
105*0Sstevel@tonic-gate
106*0Sstevel@tonic-gate          # A bit of a hack -- we've turned "~,"'s into DELs, so turn
107*0Sstevel@tonic-gate          #  'em into real commas here.
108*0Sstevel@tonic-gate          if (ord('A') == 65) { # ASCII, etc
109*0Sstevel@tonic-gate            foreach($m, @params) { tr/\x7F/,/ }
110*0Sstevel@tonic-gate          } else {              # EBCDIC (1047, 0037, POSIX-BC)
111*0Sstevel@tonic-gate            # Thanks to Peter Prymmer for the EBCDIC handling
112*0Sstevel@tonic-gate            foreach($m, @params) { tr/\x07/,/ }
113*0Sstevel@tonic-gate          }
114*0Sstevel@tonic-gate
115*0Sstevel@tonic-gate          # Special-case handling of some method names:
116*0Sstevel@tonic-gate          if($m eq '_*' or $m =~ m<^_(-?\d+)$>s) {
117*0Sstevel@tonic-gate            # Treat [_1,...] as [,_1,...], etc.
118*0Sstevel@tonic-gate            unshift @params, $m;
119*0Sstevel@tonic-gate            $m = '';
120*0Sstevel@tonic-gate          } elsif($m eq '*') {
121*0Sstevel@tonic-gate            $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars"
122*0Sstevel@tonic-gate          } elsif($m eq '#') {
123*0Sstevel@tonic-gate            $m = 'numf';  # "#" for "number": [#,_1] for "the number _1"
124*0Sstevel@tonic-gate          }
125*0Sstevel@tonic-gate
126*0Sstevel@tonic-gate          # Most common case: a simple, legal-looking method name
127*0Sstevel@tonic-gate          if($m eq '') {
128*0Sstevel@tonic-gate            # 0-length method name means to just interpolate:
129*0Sstevel@tonic-gate            push @code, ' (';
130*0Sstevel@tonic-gate          } elsif($m =~ m<^\w+(?:\:\:\w+)*$>s
131*0Sstevel@tonic-gate            and $m !~ m<(?:^|\:)\d>s
132*0Sstevel@tonic-gate             # exclude starting a (sub)package or symbol with a digit
133*0Sstevel@tonic-gate          ) {
134*0Sstevel@tonic-gate            # Yes, it even supports the demented (and undocumented?)
135*0Sstevel@tonic-gate            #  $obj->Foo::bar(...) syntax.
136*0Sstevel@tonic-gate            $target->_die_pointing(
137*0Sstevel@tonic-gate              $_[1], "Can't (yet?) use \"SUPER::\" in a bracket-group method",
138*0Sstevel@tonic-gate              2 + length($c[-1])
139*0Sstevel@tonic-gate            )
140*0Sstevel@tonic-gate             if $m =~ m/^SUPER::/s;
141*0Sstevel@tonic-gate              # Because for SUPER:: to work, we'd have to compile this into
142*0Sstevel@tonic-gate              #  the right package, and that seems just not worth the bother,
143*0Sstevel@tonic-gate              #  unless someone convinces me otherwise.
144*0Sstevel@tonic-gate
145*0Sstevel@tonic-gate            push @code, ' $_[0]->' . $m . '(';
146*0Sstevel@tonic-gate          } else {
147*0Sstevel@tonic-gate            # TODO: implement something?  or just too icky to consider?
148*0Sstevel@tonic-gate            $target->_die_pointing(
149*0Sstevel@tonic-gate             $_[1],
150*0Sstevel@tonic-gate             "Can't use \"$m\" as a method name in bracket group",
151*0Sstevel@tonic-gate             2 + length($c[-1])
152*0Sstevel@tonic-gate            );
153*0Sstevel@tonic-gate          }
154*0Sstevel@tonic-gate
155*0Sstevel@tonic-gate          pop @c; # we don't need that chunk anymore
156*0Sstevel@tonic-gate          ++$call_count;
157*0Sstevel@tonic-gate
158*0Sstevel@tonic-gate          foreach my $p (@params) {
159*0Sstevel@tonic-gate            if($p eq '_*') {
160*0Sstevel@tonic-gate              # Meaning: all parameters except $_[0]
161*0Sstevel@tonic-gate              $code[-1] .= ' @_[1 .. $#_], ';
162*0Sstevel@tonic-gate               # and yes, that does the right thing for all @_ < 3
163*0Sstevel@tonic-gate            } elsif($p =~ m<^_(-?\d+)$>s) {
164*0Sstevel@tonic-gate              # _3 meaning $_[3]
165*0Sstevel@tonic-gate              $code[-1] .= '$_[' . (0 + $1) . '], ';
166*0Sstevel@tonic-gate            } elsif($USE_LITERALS and (
167*0Sstevel@tonic-gate              (ord('A') == 65)
168*0Sstevel@tonic-gate               ? $p !~ m<[^\x20-\x7E]>s
169*0Sstevel@tonic-gate                  # ASCII very safe chars
170*0Sstevel@tonic-gate               : $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
171*0Sstevel@tonic-gate                  # EBCDIC very safe chars
172*0Sstevel@tonic-gate            )) {
173*0Sstevel@tonic-gate              # Normal case: a literal containing only safe characters
174*0Sstevel@tonic-gate              $p =~ s/'/\\'/g;
175*0Sstevel@tonic-gate              $code[-1] .= q{'} . $p . q{', };
176*0Sstevel@tonic-gate            } else {
177*0Sstevel@tonic-gate              # Stow it on the chunk-stack, and just refer to that.
178*0Sstevel@tonic-gate              push @c, $p;
179*0Sstevel@tonic-gate              push @code, ' $c[' . $#c . "], ";
180*0Sstevel@tonic-gate            }
181*0Sstevel@tonic-gate          }
182*0Sstevel@tonic-gate          $code[-1] .= "),\n";
183*0Sstevel@tonic-gate
184*0Sstevel@tonic-gate          push @c, '';
185*0Sstevel@tonic-gate        } else {
186*0Sstevel@tonic-gate          $target->_die_pointing($_[1], "Unbalanced ']'");
187*0Sstevel@tonic-gate        }
188*0Sstevel@tonic-gate
189*0Sstevel@tonic-gate      } elsif(substr($1,0,1) ne '~') {
190*0Sstevel@tonic-gate        # it's stuff not containing "~" or "[" or "]"
191*0Sstevel@tonic-gate        # i.e., a literal blob
192*0Sstevel@tonic-gate        $c[-1] .= $1;
193*0Sstevel@tonic-gate
194*0Sstevel@tonic-gate      } elsif($1 eq '~~') { # "~~"
195*0Sstevel@tonic-gate        $c[-1] .= '~';
196*0Sstevel@tonic-gate
197*0Sstevel@tonic-gate      } elsif($1 eq '~[') { # "~["
198*0Sstevel@tonic-gate        $c[-1] .= '[';
199*0Sstevel@tonic-gate
200*0Sstevel@tonic-gate      } elsif($1 eq '~]') { # "~]"
201*0Sstevel@tonic-gate        $c[-1] .= ']';
202*0Sstevel@tonic-gate
203*0Sstevel@tonic-gate      } elsif($1 eq '~,') { # "~,"
204*0Sstevel@tonic-gate        if($in_group) {
205*0Sstevel@tonic-gate          # This is a hack, based on the assumption that no-one will actually
206*0Sstevel@tonic-gate          # want a DEL inside a bracket group.  Let's hope that's it's true.
207*0Sstevel@tonic-gate          if (ord('A') == 65) { # ASCII etc
208*0Sstevel@tonic-gate            $c[-1] .= "\x7F";
209*0Sstevel@tonic-gate          } else {              # EBCDIC (cp 1047, 0037, POSIX-BC)
210*0Sstevel@tonic-gate            $c[-1] .= "\x07";
211*0Sstevel@tonic-gate          }
212*0Sstevel@tonic-gate        } else {
213*0Sstevel@tonic-gate          $c[-1] .= '~,';
214*0Sstevel@tonic-gate        }
215*0Sstevel@tonic-gate
216*0Sstevel@tonic-gate      } elsif($1 eq '~') { # possible only at string-end, it seems.
217*0Sstevel@tonic-gate        $c[-1] .= '~';
218*0Sstevel@tonic-gate
219*0Sstevel@tonic-gate      } else {
220*0Sstevel@tonic-gate        # It's a "~X" where X is not a special character.
221*0Sstevel@tonic-gate        # Consider it a literal ~ and X.
222*0Sstevel@tonic-gate        $c[-1] .= $1;
223*0Sstevel@tonic-gate      }
224*0Sstevel@tonic-gate    }
225*0Sstevel@tonic-gate  }
226*0Sstevel@tonic-gate
227*0Sstevel@tonic-gate  if($call_count) {
228*0Sstevel@tonic-gate    undef $big_pile; # Well, nevermind that.
229*0Sstevel@tonic-gate  } else {
230*0Sstevel@tonic-gate    # It's all literals!  Ahwell, that can happen.
231*0Sstevel@tonic-gate    # So don't bother with the eval.  Return a SCALAR reference.
232*0Sstevel@tonic-gate    return \$big_pile;
233*0Sstevel@tonic-gate  }
234*0Sstevel@tonic-gate
235*0Sstevel@tonic-gate  die "Last chunk isn't null??" if @c and length $c[-1]; # sanity
236*0Sstevel@tonic-gate  print scalar(@c), " chunks under closure\n" if DEBUG;
237*0Sstevel@tonic-gate  if(@code == 0) { # not possible?
238*0Sstevel@tonic-gate    print "Empty code\n" if DEBUG;
239*0Sstevel@tonic-gate    return \'';
240*0Sstevel@tonic-gate  } elsif(@code > 1) { # most cases, presumably!
241*0Sstevel@tonic-gate    unshift @code, "join '',\n";
242*0Sstevel@tonic-gate  }
243*0Sstevel@tonic-gate  unshift @code, "use strict; sub {\n";
244*0Sstevel@tonic-gate  push @code, "}\n";
245*0Sstevel@tonic-gate
246*0Sstevel@tonic-gate  print @code if DEBUG;
247*0Sstevel@tonic-gate  my $sub = eval(join '', @code);
248*0Sstevel@tonic-gate  die "$@ while evalling" . join('', @code) if $@; # Should be impossible.
249*0Sstevel@tonic-gate  return $sub;
250*0Sstevel@tonic-gate}
251*0Sstevel@tonic-gate
252*0Sstevel@tonic-gate# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
253*0Sstevel@tonic-gate
254*0Sstevel@tonic-gatesub _die_pointing {
255*0Sstevel@tonic-gate  # This is used by _compile to throw a fatal error
256*0Sstevel@tonic-gate  my $target = shift; # class name
257*0Sstevel@tonic-gate  # ...leaving $_[0] the error-causing text, and $_[1] the error message
258*0Sstevel@tonic-gate
259*0Sstevel@tonic-gate  my $i = index($_[0], "\n");
260*0Sstevel@tonic-gate
261*0Sstevel@tonic-gate  my $pointy;
262*0Sstevel@tonic-gate  my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1;
263*0Sstevel@tonic-gate  if($pos < 1) {
264*0Sstevel@tonic-gate    $pointy = "^=== near there\n";
265*0Sstevel@tonic-gate  } else { # we need to space over
266*0Sstevel@tonic-gate    my $first_tab = index($_[0], "\t");
267*0Sstevel@tonic-gate    if($pos > 2 and ( -1 == $first_tab  or  $first_tab > pos($_[0]))) {
268*0Sstevel@tonic-gate      # No tabs, or the first tab is harmlessly after where we will point to,
269*0Sstevel@tonic-gate      # AND we're far enough from the margin that we can draw a proper arrow.
270*0Sstevel@tonic-gate      $pointy = ('=' x $pos) . "^ near there\n";
271*0Sstevel@tonic-gate    } else {
272*0Sstevel@tonic-gate      # tabs screw everything up!
273*0Sstevel@tonic-gate      $pointy = substr($_[0],0,$pos);
274*0Sstevel@tonic-gate      $pointy =~ tr/\t //cd;
275*0Sstevel@tonic-gate       # make everything into whitespace, but preseving tabs
276*0Sstevel@tonic-gate      $pointy .= "^=== near there\n";
277*0Sstevel@tonic-gate    }
278*0Sstevel@tonic-gate  }
279*0Sstevel@tonic-gate
280*0Sstevel@tonic-gate  my $errmsg = "$_[1], in\:\n$_[0]";
281*0Sstevel@tonic-gate
282*0Sstevel@tonic-gate  if($i == -1) {
283*0Sstevel@tonic-gate    # No newline.
284*0Sstevel@tonic-gate    $errmsg .= "\n" . $pointy;
285*0Sstevel@tonic-gate  } elsif($i == (length($_[0]) - 1)  ) {
286*0Sstevel@tonic-gate    # Already has a newline at end.
287*0Sstevel@tonic-gate    $errmsg .= $pointy;
288*0Sstevel@tonic-gate  } else {
289*0Sstevel@tonic-gate    # don't bother with the pointy bit, I guess.
290*0Sstevel@tonic-gate  }
291*0Sstevel@tonic-gate  Carp::croak( "$errmsg via $target, as used" );
292*0Sstevel@tonic-gate}
293*0Sstevel@tonic-gate
294*0Sstevel@tonic-gate1;
295*0Sstevel@tonic-gate
296