xref: /openbsd-src/gnu/usr.bin/perl/regen/embed.pl (revision 3d61058aa5c692477b6d18acfbbdb653a9930ff9)
1#!/usr/bin/perl -w
2#
3# Regenerate (overwriting only if changed):
4#
5#    embed.h
6#    embedvar.h
7#    proto.h
8#
9# from information stored in
10#
11#    embed.fnc
12#    intrpvar.h
13#    perlvars.h
14#    regen/opcodes
15#
16# Accepts the standard regen_lib -q and -v args.
17#
18# This script is normally invoked from regen.pl.
19
20require 5.004;  # keep this compatible, an old perl is all we may have before
21                # we build the new one
22
23use strict;
24
25BEGIN {
26    # Get function prototypes
27    require './regen/regen_lib.pl';
28    require './regen/embed_lib.pl';
29}
30
31my $unflagged_pointers;
32my @az = ('a'..'z');
33
34#
35# See database of global and static function prototypes in embed.fnc
36# This is used to generate prototype headers under various configurations,
37# export symbols lists for different platforms, and macros to provide an
38# implicit interpreter context argument.
39#
40
41my $error_count = 0;
42sub die_at_end ($) { # Keeps going for now, but makes sure the regen doesn't
43                     # succeed.
44    warn shift;
45    $error_count++;
46}
47
48sub full_name ($$) { # Returns the function name with potentially the
49                     # prefixes 'S_' or 'Perl_'
50    my ($func, $flags) = @_;
51
52    return "Perl_$func" if $flags =~ /[ps]/;
53    return "S_$func" if $flags =~ /[SIi]/;
54    return $func;
55}
56
57sub open_print_header {
58    my ($file, $quote) = @_;
59
60    return open_new($file, '>',
61                    { file => $file, style => '*', by => 'regen/embed.pl',
62                      from => [
63                               'embed.fnc',
64                               'intrpvar.h',
65                               'perlvars.h',
66                               'regen/opcodes',
67                               'regen/embed.pl',
68                               'regen/embed_lib.pl',
69                               'regen/HeaderParser.pm',
70                           ],
71                      final => "\nEdit those files and run 'make regen_headers' to effect changes.\n",
72                      copyright => [1993 .. 2022],
73                      quote => $quote });
74}
75
76
77sub open_buf_out {
78    $_[0] //= "";
79    open my $fh,">", \$_[0]
80        or die "Failed to open buffer: $!";
81    return $fh;
82}
83
84# generate proto.h
85sub generate_proto_h {
86    my ($all)= @_;
87    my $pr = open_buf_out(my $proto_buffer);
88    my $ret;
89
90    foreach (@$all) {
91        if ($_->{type} ne "content") {
92            print $pr "$_->{line}";
93            next;
94        }
95        my $embed= $_->{embed}
96            or next;
97
98        my $level= $_->{level};
99        my $ind= $level ? " " : "";
100        $ind .= "  " x ($level-1) if $level>1;
101        my $inner_ind= $ind ? "  " : " ";
102
103        my ($flags,$retval,$plain_func,$args) = @{$embed}{qw(flags return_type name args)};
104        if ($flags =~ / ( [^AabCDdEefFGhIiMmNnOoPpRrSsTUuvWXx;] ) /x) {
105            die_at_end "flag $1 is not legal (for function $plain_func)";
106        }
107        my @nonnull;
108        my $args_assert_line = ( $flags !~ /[Gm]/ );
109        my $has_depth = ( $flags =~ /W/ );
110        my $has_context = ( $flags !~ /T/ );
111        my $never_returns = ( $flags =~ /r/ );
112        my $binarycompat = ( $flags =~ /b/ );
113        my $commented_out = ( $flags =~ /m/ );
114        my $is_malloc = ( $flags =~ /a/ );
115        my $can_ignore = ( $flags !~ /R/ ) && ( $flags !~ /P/ ) && !$is_malloc;
116        my @names_of_nn;
117        my $func;
118
119        if (! $can_ignore && $retval eq 'void') {
120            warn "It is nonsensical to require the return value of a void function ($plain_func) to be checked";
121        }
122
123        die_at_end "$plain_func: S and p flags are mutually exclusive"
124                                            if $flags =~ /S/ && $flags =~ /p/;
125        die_at_end "$plain_func: m and $1 flags are mutually exclusive"
126                                        if $flags =~ /m/ && $flags =~ /([pS])/;
127
128        die_at_end "$plain_func: u flag only usable with m" if $flags =~ /u/
129                                                            && $flags !~ /m/;
130
131        my ($static_flag, @extra_static_flags)= $flags =~/([SsIi])/g;
132
133        if (@extra_static_flags) {
134            my $flags_str = join ", ", $static_flag, @extra_static_flags;
135            $flags_str =~ s/, (\w)\z/ and $1/;
136            die_at_end "$plain_func: flags $flags_str are mutually exclusive\n";
137        }
138
139        my $static_inline = 0;
140        if ($static_flag) {
141            my $type;
142            if ($never_returns) {
143                $type = {
144                    'S' => 'PERL_STATIC_NO_RET',
145                    's' => 'PERL_STATIC_NO_RET',
146                    'i' => 'PERL_STATIC_INLINE_NO_RET',
147                    'I' => 'PERL_STATIC_FORCE_INLINE_NO_RET'
148                }->{$static_flag};
149            }
150            else {
151                $type = {
152                    'S' => 'STATIC',
153                    's' => 'STATIC',
154                    'i' => 'PERL_STATIC_INLINE',
155                    'I' => 'PERL_STATIC_FORCE_INLINE'
156                }->{$static_flag};
157            }
158            $retval = "$type $retval";
159            die_at_end "Don't declare static function '$plain_func' pure" if $flags =~ /P/;
160            $static_inline = $type =~ /^PERL_STATIC(?:_FORCE)?_INLINE/;
161        }
162        else {
163            if ($never_returns) {
164                $retval = "PERL_CALLCONV_NO_RET $retval";
165            }
166            else {
167                $retval = "PERL_CALLCONV $retval";
168            }
169        }
170
171        $func = full_name($plain_func, $flags);
172
173        die_at_end "For '$plain_func', M flag requires p flag"
174                                            if $flags =~ /M/ && $flags !~ /p/;
175        my $C_required_flags = '[pIimbs]';
176        die_at_end
177            "For '$plain_func', C flag requires one of $C_required_flags] flags"
178                                                if $flags =~ /C/
179                                                && ($flags !~ /$C_required_flags/
180
181                                                   # Notwithstanding the
182                                                   # above, if the name won't
183                                                   # clash with a user name,
184                                                   # it's ok.
185                                                && $plain_func !~ /^[Pp]erl/);
186
187        die_at_end "For '$plain_func', X flag requires one of [Iip] flags"
188                                            if $flags =~ /X/ && $flags !~ /[Iip]/;
189        die_at_end "For '$plain_func', X and m flags are mutually exclusive"
190                                            if $flags =~ /X/ && $flags =~ /m/;
191        die_at_end "For '$plain_func', [Ii] with [ACX] requires p flag"
192                        if $flags =~ /[Ii]/ && $flags =~ /[ACX]/ && $flags !~ /p/;
193        die_at_end "For '$plain_func', b and m flags are mutually exclusive"
194                 . " (try M flag)" if $flags =~ /b/ && $flags =~ /m/;
195        die_at_end "For '$plain_func', b flag without M flag requires D flag"
196                            if $flags =~ /b/ && $flags !~ /M/ && $flags !~ /D/;
197        die_at_end "For '$plain_func', I and i flags are mutually exclusive"
198                                            if $flags =~ /I/ && $flags =~ /i/;
199
200        $ret = "";
201        $ret .= "$retval\n";
202        $ret .= "$func(";
203        if ( $has_context ) {
204            $ret .= @$args ? "pTHX_ " : "pTHX";
205        }
206        if (@$args) {
207            die_at_end "n flag is contradicted by having arguments"
208                                                                if $flags =~ /n/;
209            my $n;
210            for my $arg ( @$args ) {
211                ++$n;
212                if ($arg =~ / ^ " (.+) " $ /x) {    # Handle literal string
213                    my $name = $1;
214
215                    # Make the string a legal C identifier; 'p' is arbitrary,
216                    # and is because C reserves leading underscores
217                    $name =~ s/^\W/p/a;
218                    $name =~ s/\W/_/ag;
219
220                    $arg = "const char * const $name";
221                    die_at_end 'm flag required for "literal" argument'
222                                                            unless $flags =~ /m/;
223                }
224                elsif (   $args_assert_line
225                       && $arg =~ /\*/
226                       && $arg !~ /\b(NN|NULLOK)\b/ )
227                {
228                    warn "$func: $arg needs NN or NULLOK\n";
229                    ++$unflagged_pointers;
230                }
231                my $nn = ( $arg =~ s/\s*\bNN\b\s+// );
232                push( @nonnull, $n ) if $nn;
233                my $nz = ( $arg =~ s/\s*\bNZ\b\s+// );
234
235                my $nullok = ( $arg =~ s/\s*\bNULLOK\b\s+// ); # strip NULLOK with no effect
236
237                # Make sure each arg has at least a type and a var name.
238                # An arg of "int" is valid C, but want it to be "int foo".
239                my $temp_arg = $arg;
240                $temp_arg =~ s/\*//g;
241                $temp_arg =~ s/\s*\bstruct\b\s*/ /g;
242                if ( ($temp_arg ne "...")
243                     && ($temp_arg !~ /\w+\s+(\w+)(?:\[\d+\])?\s*$/) ) {
244                    die_at_end "$func: $arg ($n) doesn't have a name\n";
245                }
246                if (defined $1 && ($nn||$nz) && !($commented_out && !$binarycompat)) {
247                    push @names_of_nn, $1;
248                }
249            }
250            $ret .= join ", ", @$args;
251        }
252        else {
253            $ret .= "void" if !$has_context;
254        }
255        $ret .= " comma_pDEPTH" if $has_depth;
256        $ret .= ")";
257        my @attrs;
258        if ( $flags =~ /r/ ) {
259            push @attrs, "__attribute__noreturn__";
260        }
261        if ( $flags =~ /D/ ) {
262            push @attrs, "__attribute__deprecated__";
263        }
264        if ( $is_malloc ) {
265            push @attrs, "__attribute__malloc__";
266        }
267        if ( !$can_ignore ) {
268            push @attrs, "__attribute__warn_unused_result__";
269        }
270        if ( $flags =~ /P/ ) {
271            push @attrs, "__attribute__pure__";
272        }
273        if ( $flags =~ /I/ ) {
274            push @attrs, "__attribute__always_inline__";
275        }
276        # roughly the inverse of the rules used in makedef.pl
277        if ( $flags !~ /[AbCeIimSX]/ ) {
278            push @attrs, '__attribute__visibility__("hidden")'
279        }
280        if( $flags =~ /f/ ) {
281            my $prefix  = $has_context ? 'pTHX_' : '';
282            my ($argc, $pat);
283            if (!defined $args->[1]) {
284                use Data::Dumper;
285                die Dumper($_);
286            }
287            if ($args->[-1] eq '...') {
288                $argc   = scalar @$args;
289                $pat    = $argc - 1;
290                $argc   = $prefix . $argc;
291            }
292            else {
293                # don't check args, and guess which arg is the pattern
294                # (one of 'fmt', 'pat', 'f'),
295                $argc = 0;
296                my @fmts = grep $args->[$_] =~ /\b(f|pat|fmt)$/, 0..$#$args;
297                if (@fmts != 1) {
298                    die "embed.pl: '$plain_func': can't determine pattern arg\n";
299                }
300                $pat = $fmts[0] + 1;
301            }
302            my $macro   = grep($_ == $pat, @nonnull)
303                                ? '__attribute__format__'
304                                : '__attribute__format__null_ok__';
305            if ($plain_func =~ /strftime/) {
306                push @attrs, sprintf "%s(__strftime__,%s1,0)", $macro, $prefix;
307            }
308            else {
309                push @attrs, sprintf "%s(__printf__,%s%d,%s)", $macro,
310                                    $prefix, $pat, $argc;
311            }
312        }
313        elsif ((grep { $_ eq '...' } @$args) && $flags !~ /F/) {
314            die_at_end "$plain_func: Function with '...' arguments must have"
315                     . " f or F flag";
316        }
317        if ( @attrs ) {
318            $ret .= "\n";
319            $ret .= join( "\n", map { (" " x 8) . $_ } @attrs );
320        }
321        $ret .= ";";
322        $ret = "/* $ret */" if $commented_out;
323
324        if ($args_assert_line || @names_of_nn) {
325            $ret .= "\n#${ind}define PERL_ARGS_ASSERT_\U$plain_func\E";
326            if (@names_of_nn) {
327                $ret .= " \\\n";
328                my $def = " " x 8;
329                foreach my $ix (0..$#names_of_nn) {
330                    $def .= "assert($names_of_nn[$ix])";
331                    if ($ix == $#names_of_nn) {
332                        $def .= "\n";
333                    } elsif (length $def > 70) {
334                        $ret .= $def . "; \\\n";
335                        $def = " " x 8;
336                    } else {
337                        $def .= "; ";
338                    }
339                }
340                $ret .= $def;
341            }
342        }
343        $ret .= "\n";
344
345        $ret = "#${ind}ifndef PERL_NO_INLINE_FUNCTIONS\n$ret\n#${ind}endif"
346            if $static_inline;
347        $ret = "#${ind}ifndef NO_MATHOMS\n$ret\n#${ind}endif"
348            if $binarycompat;
349
350        $ret .= @attrs ? "\n\n" : "\n";
351
352        print $pr $ret;
353    }
354
355
356    close $pr;
357
358    my $clean= normalize_group_content($proto_buffer);
359
360    my $fh = open_print_header("proto.h");
361    print $fh <<~"EOF";
362    START_EXTERN_C
363    $clean
364    #ifdef PERL_CORE
365    #  include "pp_proto.h"
366    #endif
367    END_EXTERN_C
368    EOF
369
370    read_only_bottom_close_and_rename($fh) if ! $error_count;
371}
372
373{
374    my $hp= HeaderParser->new();
375    sub normalize_group_content {
376        open my $in, "<", \$_[0]
377            or die "Failed to open buffer: $!";
378        $hp->parse_fh($in);
379        my $ppc= sub {
380            my ($self, $line_data)= @_;
381            # re-align defines so that the definitions line up at the 48th col
382            # as much as possible.
383            if ($line_data->{sub_type} eq "#define") {
384                $line_data->{line}=~s/^(\s*#\s*define\s+\S+?(?:\([^()]*\))?\s)(\s*)(\S+)/
385                    sprintf "%-48s%s", $1, $3/e;
386            }
387        };
388        my $clean= $hp->lines_as_str($hp->group_content(),$ppc);
389        return $clean;
390    }
391}
392
393sub normalize_and_print {
394    my ($file, $buffer)= @_;
395    my $fh = open_print_header($file);
396    print $fh normalize_group_content($buffer);
397    read_only_bottom_close_and_rename($fh);
398}
399
400
401sub readvars {
402    my ($file, $pre) = @_;
403    my $hp= HeaderParser->new()->read_file($file);
404    my %seen;
405    foreach my $line_data (@{$hp->lines}) {
406        #next unless $line_data->is_content;
407        my $line= $line_data->line;
408        if ($line=~m/^\s*PERLVARA?I?C?\(\s*$pre\s*,\s*(\w+)/){
409            $seen{$1}++
410                and
411                die_at_end "duplicate symbol $1 while processing $file line "
412                       . ($line_data->start_line_num) . "\n"
413        }
414    }
415    my @keys= sort { lc($a) cmp lc($b) ||
416                        $a  cmp    $b }
417              keys %seen;
418    return @keys;
419}
420
421sub add_indent {
422    #my ($ret, $add, $width)= @_;
423    my $width= $_[2] || 48;
424    $_[0] .= " " x ($width-length($_[0])) if length($_[0])<$width;
425    $_[0] .= " " unless $_[0]=~/\s\z/;
426    if (defined $_[1]) {
427        $_[0] .= $_[1];
428    }
429    return $_[0];
430}
431
432sub indent_define {
433    my ($from, $to, $indent, $width) = @_;
434    $indent = '' unless defined $indent;
435    my $ret= "#${indent}define $from";
436    add_indent($ret,"$to\n",$width);
437}
438
439sub multon {
440    my ($sym,$pre,$ptr,$ind) = @_;
441    $ind//="";
442    indent_define("PL_$sym", "($ptr$pre$sym)", $ind);
443}
444
445sub embed_h {
446    my ($em, $guard, $funcs) = @_;
447
448    my $lines;
449    foreach (@$funcs) {
450        if ($_->{type} ne "content") {
451            $lines .= $_->{line};
452            next;
453        }
454        my $level= $_->{level};
455        my $embed= $_->{embed} or next;
456        my ($flags,$retval,$func,$args) = @{$embed}{qw(flags return_type name args)};
457        my $ret = "";
458        my $ind= $level ? " " : "";
459        $ind .= "  " x ($level-1) if $level>1;
460        my $inner_ind= $ind ? "  " : " ";
461        unless ($flags =~ /[omM]/) {
462            my $argc = scalar @$args;
463            if ($flags =~ /T/) {
464                my $full_name = full_name($func, $flags);
465                next if $full_name eq $func;    # Don't output a no-op.
466                $ret = indent_define($func, $full_name, $ind);
467            }
468            else {
469                my $use_va_list = $argc && $args->[-1] =~ /\.\.\./;
470
471                if($use_va_list) {
472                    # CPP has trouble with empty __VA_ARGS__ and comma joining,
473                    # so we'll have to eat an extra params here.
474                    if($argc < 2) {
475                        die "Cannot use ... as the only parameter to a macro ($func)\n";
476                    }
477                    $argc -= 2;
478                }
479
480                my $paramlist   = join(",", @az[0..$argc-1],
481                    $use_va_list ? ("...") : ());
482                my $replacelist = join(",", @az[0..$argc-1],
483                    $use_va_list ? ("__VA_ARGS__") : ());
484                $ret = "#${ind}define $func($paramlist) ";
485                add_indent($ret,full_name($func, $flags) . "(aTHX");
486                $ret .= "_ " if $replacelist;
487                $ret .= $replacelist;
488                if ($flags =~ /W/) {
489                    if ($replacelist) {
490                        $ret .= " comma_aDEPTH";
491                    } else {
492                        die "Can't use W without other args (currently)";
493                    }
494                }
495                $ret .= ")\n";
496                if($use_va_list and $flags =~ /v/) {
497                    # Make older ones available only when !MULTIPLICITY or PERL_CORE or PERL_WANT_VARARGS
498                    # These should not be done uncondtionally because existing
499                    # code might call e.g. warn() without aTHX in scope.
500                    $ret = "#${ind}if !defined(MULTIPLICITY) || defined(PERL_CORE) || defined(PERL_WANT_VARARGS)\n" .
501                           $ret .
502                           "#${ind}endif\n";
503                }
504            }
505            $ret = "#${ind}ifndef NO_MATHOMS\n$ret#${ind}endif\n" if $flags =~ /b/;
506        }
507        $lines .= $ret;
508    }
509    # remove empty blocks
510    1 while $lines =~ s/^#\s*if.*\n#\s*endif.*\n//mg
511         or $lines =~ s/^(#\s*if)\s+(.*)\n#else.*\n/$1 !($2)\n/mg;
512    if ($guard) {
513        print $em "$guard /* guard */\n";
514        $lines=~s/^#(\s*)/"#".(length($1)?"  ":" ").$1/mge;
515    }
516    print $em $lines;
517    print $em "#endif\n" if $guard;
518}
519
520sub generate_embed_h {
521    my ($all, $api, $ext, $core)= @_;
522
523    my $em= open_buf_out(my $embed_buffer);
524
525    print $em <<~'END';
526    /* (Doing namespace management portably in C is really gross.) */
527
528    /* By defining PERL_NO_SHORT_NAMES (not done by default) the short forms
529     * (like warn instead of Perl_warn) for the API are not defined.
530     * Not defining the short forms is a good thing for cleaner embedding.
531     * BEWARE that a bunch of macros don't have long names, so either must be
532     * added or don't use them if you define this symbol */
533
534    #ifndef PERL_NO_SHORT_NAMES
535
536    /* Hide global symbols */
537
538    END
539
540    embed_h($em, '', $api);
541    embed_h($em, '#if defined(PERL_CORE) || defined(PERL_EXT)', $ext);
542    embed_h($em, '#if defined(PERL_CORE)', $core);
543
544    print $em <<~'END';
545
546    #endif      /* #ifndef PERL_NO_SHORT_NAMES */
547
548    #if !defined(PERL_CORE)
549    /* Compatibility stubs.  Compile extensions with -DPERL_NOCOMPAT to
550     * disable them.
551     */
552    #  define sv_setptrobj(rv,ptr,name) sv_setref_iv(rv,name,PTR2IV(ptr))
553    #  define sv_setptrref(rv,ptr)              sv_setref_iv(rv,NULL,PTR2IV(ptr))
554    #endif
555
556    #if !defined(PERL_CORE) && !defined(PERL_NOCOMPAT)
557
558    /* Compatibility for various misnamed functions.  All functions
559       in the API that begin with "perl_" (not "Perl_") take an explicit
560       interpreter context pointer.
561       The following are not like that, but since they had a "perl_"
562       prefix in previous versions, we provide compatibility macros.
563     */
564    #  define perl_atexit(a,b)          call_atexit(a,b)
565    END
566
567    foreach (@$all) {
568        my $embed= $_->{embed} or next;
569        my ($flags, $retval, $func, $args) = @{$embed}{qw(flags return_type name args)};
570        next unless $flags =~ /O/;
571
572        my $alist = join ",", @az[0..$#$args];
573        my $ret = "#  define perl_$func($alist) ";
574        print $em add_indent($ret,"$func($alist)\n");
575    }
576
577    my @nocontext;
578    {
579        my (%has_va, %has_nocontext);
580        foreach (@$all) {
581            my $embed= $_->{embed}
582                or next;
583            ++$has_va{$embed->{name}} if @{$embed->{args}} and $embed->{args}[-1] =~ /\.\.\./;
584            ++$has_nocontext{$1} if $embed->{name} =~ /(.*)_nocontext/;
585        }
586
587        @nocontext = sort grep {
588            $has_nocontext{$_}
589                && !/printf/ # Not clear to me why these are skipped but they are.
590        } keys %has_va;
591    }
592
593    print $em <<~'END';
594
595    /* Before C99, macros could not wrap varargs functions. This
596       provides a set of compatibility functions that don't take an
597       extra argument but grab the context pointer using the macro dTHX.
598     */
599    #if defined(MULTIPLICITY) && !defined(PERL_NO_SHORT_NAMES)
600    END
601
602    foreach (@nocontext) {
603        print $em indent_define($_, "Perl_${_}_nocontext", "  ");
604    }
605
606    print $em <<~'END';
607    #endif
608
609    #endif /* !defined(PERL_CORE) && !defined(PERL_NOCOMPAT) */
610
611    #if !defined(MULTIPLICITY)
612    /* undefined symbols, point them back at the usual ones */
613    END
614
615    foreach (@nocontext) {
616        print $em indent_define("Perl_${_}_nocontext", "Perl_$_", "  ");
617    }
618
619    print $em "#endif\n";
620    close $em;
621
622    normalize_and_print('embed.h',$embed_buffer)
623        unless $error_count;
624}
625
626sub generate_embedvar_h {
627    my $em = open_buf_out(my $embedvar_buffer);
628
629    print $em "#if defined(MULTIPLICITY)\n",
630              indent_define("vTHX","aTHX"," ");
631
632
633    my @intrp = readvars 'intrpvar.h','I';
634    #my @globvar = readvars 'perlvars.h','G';
635
636
637    for my $sym (@intrp) {
638        my $ind = " ";
639        if ($sym eq 'sawampersand') {
640            print $em "# if !defined(PL_sawampersand)\n";
641            $ind = "   ";
642        }
643        my $line = multon($sym, 'I', 'vTHX->', $ind);
644        print $em $line;
645        if ($sym eq 'sawampersand') {
646            print $em "# endif /* !defined(PL_sawampersand) */\n";
647        }
648    }
649
650    print $em "#endif       /* MULTIPLICITY */\n";
651    close $em;
652
653    normalize_and_print('embedvar.h',$embedvar_buffer)
654        unless $error_count;
655}
656
657sub update_headers {
658    my ($all, $api, $ext, $core) = setup_embed(); # see regen/embed_lib.pl
659    generate_proto_h($all);
660    die_at_end "$unflagged_pointers pointer arguments to clean up\n" if $unflagged_pointers;
661    generate_embed_h($all, $api, $ext, $core);
662    generate_embedvar_h();
663    die "$error_count errors found" if $error_count;
664}
665
666update_headers() unless caller;
667
668# ex: set ts=8 sts=4 sw=4 noet:
669