xref: /openbsd-src/gnu/usr.bin/perl/dist/Locale-Maketext/lib/Locale/Maketext/Guts.pm (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1package Locale::Maketext::Guts;
2
3$VERSION = '1.13';
4
5BEGIN {
6    # Just so we're nice and define SOMETHING in "our" package.
7    *zorp = sub { return scalar @_ } unless defined &zorp;
8}
9
10package Locale::Maketext;
11use strict;
12use vars qw($USE_LITERALS $GUTSPATH);
13
14BEGIN {
15    $GUTSPATH = __FILE__;
16    *DEBUG = sub () {0} unless defined &DEBUG;
17}
18
19use utf8;
20
21sub _compile {
22    # This big scary routine compiles an entry.
23    # It returns either a coderef if there's brackety bits in this, or
24    #  otherwise a ref to a scalar.
25
26    my $target = ref($_[0]) || $_[0];
27
28    my(@code);
29    my(@c) = (''); # "chunks" -- scratch.
30    my $call_count = 0;
31    my $big_pile = '';
32    {
33        my $in_group = 0; # start out outside a group
34        my($m, @params); # scratch
35
36        while($_[1] =~  # Iterate over chunks.
37            m/\G(
38                [^\~\[\]]+  # non-~[] stuff
39                |
40                ~.       # ~[, ~], ~~, ~other
41                |
42                \[          # [ presumably opening a group
43                |
44                \]          # ] presumably closing a group
45                |
46                ~           # terminal ~ ?
47                |
48                $
49            )/xgs
50        ) {
51            DEBUG>2 and print qq{  "$1"\n};
52
53            if($1 eq '[' or $1 eq '') {       # "[" or end
54                # Whether this is "[" or end, force processing of any
55                #  preceding literal.
56                if($in_group) {
57                    if($1 eq '') {
58                        $target->_die_pointing($_[1], 'Unterminated bracket group');
59                    }
60                    else {
61                        $target->_die_pointing($_[1], 'You can\'t nest bracket groups');
62                    }
63                }
64                else {
65                    if ($1 eq '') {
66                        DEBUG>2 and print "   [end-string]\n";
67                    }
68                    else {
69                        $in_group = 1;
70                    }
71                    die "How come \@c is empty?? in <$_[1]>" unless @c; # sanity
72                    if(length $c[-1]) {
73                        # Now actually processing the preceding literal
74                        $big_pile .= $c[-1];
75                        if($USE_LITERALS and (
76                                (ord('A') == 65)
77                                ? $c[-1] !~ m/[^\x20-\x7E]/s
78                                # ASCII very safe chars
79                                : $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
80                                # EBCDIC very safe chars
81                            )) {
82                            # normal case -- all very safe chars
83                            $c[-1] =~ s/'/\\'/g;
84                            push @code, q{ '} . $c[-1] . "',\n";
85                            $c[-1] = ''; # reuse this slot
86                        }
87                        else {
88                            push @code, ' $c[' . $#c . "],\n";
89                            push @c, ''; # new chunk
90                        }
91                    }
92                    # else just ignore the empty string.
93                }
94
95            }
96            elsif($1 eq ']') {  # "]"
97                # close group -- go back in-band
98                if($in_group) {
99                    $in_group = 0;
100
101                    DEBUG>2 and print "   --Closing group [$c[-1]]\n";
102
103                    # And now process the group...
104
105                    if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) {
106                        DEBUG > 2 and print "   -- (Ignoring)\n";
107                        $c[-1] = ''; # reset out chink
108                        next;
109                    }
110
111                    #$c[-1] =~ s/^\s+//s;
112                    #$c[-1] =~ s/\s+$//s;
113                    ($m,@params) = split(/,/, $c[-1], -1);  # was /\s*,\s*/
114
115                    # A bit of a hack -- we've turned "~,"'s into DELs, so turn
116                    #  'em into real commas here.
117                    if (ord('A') == 65) { # ASCII, etc
118                        foreach($m, @params) { tr/\x7F/,/ }
119                    }
120                    else {              # EBCDIC (1047, 0037, POSIX-BC)
121                        # Thanks to Peter Prymmer for the EBCDIC handling
122                        foreach($m, @params) { tr/\x07/,/ }
123                    }
124
125                    # Special-case handling of some method names:
126                    if($m eq '_*' or $m =~ m/^_(-?\d+)$/s) {
127                        # Treat [_1,...] as [,_1,...], etc.
128                        unshift @params, $m;
129                        $m = '';
130                    }
131                    elsif($m eq '*') {
132                        $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars"
133                    }
134                    elsif($m eq '#') {
135                        $m = 'numf';  # "#" for "number": [#,_1] for "the number _1"
136                    }
137
138                    # Most common case: a simple, legal-looking method name
139                    if($m eq '') {
140                        # 0-length method name means to just interpolate:
141                        push @code, ' (';
142                    }
143                    elsif($m =~ /^\w+(?:\:\:\w+)*$/s
144                            and $m !~ m/(?:^|\:)\d/s
145                        # exclude starting a (sub)package or symbol with a digit
146                    ) {
147                        # Yes, it even supports the demented (and undocumented?)
148                        #  $obj->Foo::bar(...) syntax.
149                        $target->_die_pointing(
150                            $_[1], q{Can't use "SUPER::" in a bracket-group method},
151                            2 + length($c[-1])
152                        )
153                        if $m =~ m/^SUPER::/s;
154                        # Because for SUPER:: to work, we'd have to compile this into
155                        #  the right package, and that seems just not worth the bother,
156                        #  unless someone convinces me otherwise.
157
158                        push @code, ' $_[0]->' . $m . '(';
159                    }
160                    else {
161                        # TODO: implement something?  or just too icky to consider?
162                        $target->_die_pointing(
163                            $_[1],
164                            "Can't use \"$m\" as a method name in bracket group",
165                            2 + length($c[-1])
166                        );
167                    }
168
169                    pop @c; # we don't need that chunk anymore
170                    ++$call_count;
171
172                    foreach my $p (@params) {
173                        if($p eq '_*') {
174                            # Meaning: all parameters except $_[0]
175                            $code[-1] .= ' @_[1 .. $#_], ';
176                            # and yes, that does the right thing for all @_ < 3
177                        }
178                        elsif($p =~ m/^_(-?\d+)$/s) {
179                            # _3 meaning $_[3]
180                            $code[-1] .= '$_[' . (0 + $1) . '], ';
181                        }
182                        elsif($USE_LITERALS and (
183                                (ord('A') == 65)
184                                ? $p !~ m/[^\x20-\x7E]/s
185                                # ASCII very safe chars
186                                : $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
187                                # EBCDIC very safe chars
188                            )) {
189                            # Normal case: a literal containing only safe characters
190                            $p =~ s/'/\\'/g;
191                            $code[-1] .= q{'} . $p . q{', };
192                        }
193                        else {
194                            # Stow it on the chunk-stack, and just refer to that.
195                            push @c, $p;
196                            push @code, ' $c[' . $#c . '], ';
197                        }
198                    }
199                    $code[-1] .= "),\n";
200
201                    push @c, '';
202                }
203                else {
204                    $target->_die_pointing($_[1], q{Unbalanced ']'});
205                }
206
207            }
208            elsif(substr($1,0,1) ne '~') {
209                # it's stuff not containing "~" or "[" or "]"
210                # i.e., a literal blob
211                $c[-1] .= $1;
212
213            }
214            elsif($1 eq '~~') { # "~~"
215                $c[-1] .= '~';
216
217            }
218            elsif($1 eq '~[') { # "~["
219                $c[-1] .= '[';
220
221            }
222            elsif($1 eq '~]') { # "~]"
223                $c[-1] .= ']';
224
225            }
226            elsif($1 eq '~,') { # "~,"
227                if($in_group) {
228                    # This is a hack, based on the assumption that no-one will actually
229                    # want a DEL inside a bracket group.  Let's hope that's it's true.
230                    if (ord('A') == 65) { # ASCII etc
231                        $c[-1] .= "\x7F";
232                    }
233                    else {              # EBCDIC (cp 1047, 0037, POSIX-BC)
234                        $c[-1] .= "\x07";
235                    }
236                }
237                else {
238                    $c[-1] .= '~,';
239                }
240
241            }
242            elsif($1 eq '~') { # possible only at string-end, it seems.
243                $c[-1] .= '~';
244
245            }
246            else {
247                # It's a "~X" where X is not a special character.
248                # Consider it a literal ~ and X.
249                $c[-1] .= $1;
250            }
251        }
252    }
253
254    if($call_count) {
255        undef $big_pile; # Well, nevermind that.
256    }
257    else {
258        # It's all literals!  Ahwell, that can happen.
259        # So don't bother with the eval.  Return a SCALAR reference.
260        return \$big_pile;
261    }
262
263    die q{Last chunk isn't null??} if @c and length $c[-1]; # sanity
264    DEBUG and warn scalar(@c), " chunks under closure\n";
265    if(@code == 0) { # not possible?
266        DEBUG and warn "Empty code\n";
267        return \'';
268    }
269    elsif(@code > 1) { # most cases, presumably!
270        unshift @code, "join '',\n";
271    }
272    unshift @code, "use strict; sub {\n";
273    push @code, "}\n";
274
275    DEBUG and warn @code;
276    my $sub = eval(join '', @code);
277    die "$@ while evalling" . join('', @code) if $@; # Should be impossible.
278    return $sub;
279}
280
281# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
282
283sub _die_pointing {
284    # This is used by _compile to throw a fatal error
285    my $target = shift; # class name
286    # ...leaving $_[0] the error-causing text, and $_[1] the error message
287
288    my $i = index($_[0], "\n");
289
290    my $pointy;
291    my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1;
292    if($pos < 1) {
293        $pointy = "^=== near there\n";
294    }
295    else { # we need to space over
296        my $first_tab = index($_[0], "\t");
297        if($pos > 2 and ( -1 == $first_tab  or  $first_tab > pos($_[0]))) {
298            # No tabs, or the first tab is harmlessly after where we will point to,
299            # AND we're far enough from the margin that we can draw a proper arrow.
300            $pointy = ('=' x $pos) . "^ near there\n";
301        }
302        else {
303            # tabs screw everything up!
304            $pointy = substr($_[0],0,$pos);
305            $pointy =~ tr/\t //cd;
306            # make everything into whitespace, but preseving tabs
307            $pointy .= "^=== near there\n";
308        }
309    }
310
311    my $errmsg = "$_[1], in\:\n$_[0]";
312
313    if($i == -1) {
314        # No newline.
315        $errmsg .= "\n" . $pointy;
316    }
317    elsif($i == (length($_[0]) - 1)  ) {
318        # Already has a newline at end.
319        $errmsg .= $pointy;
320    }
321    else {
322        # don't bother with the pointy bit, I guess.
323    }
324    Carp::croak( "$errmsg via $target, as used" );
325}
326
3271;
328
329