1package ExtUtils::Constant::Base; 2 3use strict; 4use vars qw($VERSION); 5use Carp; 6use Text::Wrap; 7use ExtUtils::Constant::Utils qw(C_stringify perl_stringify); 8$VERSION = '0.04'; 9 10use constant is_perl56 => ($] < 5.007 && $] > 5.005_50); 11 12 13=head1 NAME 14 15ExtUtils::Constant::Base - base class for ExtUtils::Constant objects 16 17=head1 SYNOPSIS 18 19 require ExtUtils::Constant::Base; 20 @ISA = 'ExtUtils::Constant::Base'; 21 22=head1 DESCRIPTION 23 24ExtUtils::Constant::Base provides a base implementation of methods to 25generate C code to give fast constant value lookup by named string. Currently 26it's mostly used ExtUtils::Constant::XS, which generates the lookup code 27for the constant() subroutine found in many XS modules. 28 29=head1 USAGE 30 31ExtUtils::Constant::Base exports no subroutines. The following methods are 32available 33 34=over 4 35 36=cut 37 38sub valid_type { 39 # Default to assuming that you don't need different types of return data. 40 1; 41} 42sub default_type { 43 ''; 44} 45 46=item header 47 48A method returning a scalar containing definitions needed, typically for a 49C header file. 50 51=cut 52 53sub header { 54 '' 55} 56 57# This might actually be a return statement. Note that you are responsible 58# for any space you might need before your value, as it lets to perform 59# "tricks" such as "return KEY_" and have strings appended. 60sub assignment_clause_for_type; 61# In which case this might be an empty string 62sub return_statement_for_type {undef}; 63sub return_statement_for_notdef; 64sub return_statement_for_notfound; 65 66# "#if 1" is true to a C pre-processor 67sub macro_from_name { 68 1; 69} 70 71sub macro_from_item { 72 1; 73} 74 75sub macro_to_ifdef { 76 my ($self, $macro) = @_; 77 if (ref $macro) { 78 return $macro->[0]; 79 } 80 if (defined $macro && $macro ne "" && $macro ne "1") { 81 return $macro ? "#ifdef $macro\n" : "#if 0\n"; 82 } 83 return ""; 84} 85 86sub macro_to_endif { 87 my ($self, $macro) = @_; 88 89 if (ref $macro) { 90 return $macro->[1]; 91 } 92 if (defined $macro && $macro ne "" && $macro ne "1") { 93 return "#endif\n"; 94 } 95 return ""; 96} 97 98sub name_param { 99 'name'; 100} 101 102# This is possibly buggy, in that it's not mandatory (below, in the main 103# C_constant parameters, but is expected to exist here, if it's needed) 104# Buggy because if you're definitely pure 8 bit only, and will never be 105# presented with your constants in utf8, the default form of C_constant can't 106# be told not to do the utf8 version. 107 108sub is_utf8_param { 109 'utf8'; 110} 111 112sub memEQ { 113 "!memcmp"; 114} 115 116=item memEQ_clause args_hashref 117 118A method to return a suitable C C<if> statement to check whether I<name> 119is equal to the C variable C<name>. If I<checked_at> is defined, then it 120is used to avoid C<memEQ> for short names, or to generate a comment to 121highlight the position of the character in the C<switch> statement. 122 123If i<checked_at> is a reference to a scalar, then instead it gives 124the characters pre-checked at the beginning, (and the number of chars by 125which the C variable name has been advanced. These need to be chopped from 126the front of I<name>). 127 128=cut 129 130sub memEQ_clause { 131# if (memEQ(name, "thingy", 6)) { 132 # Which could actually be a character comparison or even "" 133 my ($self, $args) = @_; 134 my ($name, $checked_at, $indent) = @{$args}{qw(name checked_at indent)}; 135 $indent = ' ' x ($indent || 4); 136 my $front_chop; 137 if (ref $checked_at) { 138 # regexp won't work on 5.6.1 without use utf8; in turn that won't work 139 # on 5.005_03. 140 substr ($name, 0, length $$checked_at,) = ''; 141 $front_chop = C_stringify ($$checked_at); 142 undef $checked_at; 143 } 144 my $len = length $name; 145 146 if ($len < 2) { 147 return $indent . "{\n" 148 if (defined $checked_at and $checked_at == 0) or $len == 0; 149 # We didn't switch, drop through to the code for the 2 character string 150 $checked_at = 1; 151 } 152 153 my $name_param = $self->name_param; 154 155 if ($len < 3 and defined $checked_at) { 156 my $check; 157 if ($checked_at == 1) { 158 $check = 0; 159 } elsif ($checked_at == 0) { 160 $check = 1; 161 } 162 if (defined $check) { 163 my $char = C_stringify (substr $name, $check, 1); 164 # Placate 5.005 with a break in the string. I can't see a good way of 165 # getting it to not take [ as introducing an array lookup, even with 166 # ${name_param}[$check] 167 return $indent . "if ($name_param" . "[$check] == '$char') {\n"; 168 } 169 } 170 if (($len == 2 and !defined $checked_at) 171 or ($len == 3 and defined ($checked_at) and $checked_at == 2)) { 172 my $char1 = C_stringify (substr $name, 0, 1); 173 my $char2 = C_stringify (substr $name, 1, 1); 174 return $indent . 175 "if ($name_param" . "[0] == '$char1' && $name_param" . "[1] == '$char2') {\n"; 176 } 177 if (($len == 3 and defined ($checked_at) and $checked_at == 1)) { 178 my $char1 = C_stringify (substr $name, 0, 1); 179 my $char2 = C_stringify (substr $name, 2, 1); 180 return $indent . 181 "if ($name_param" . "[0] == '$char1' && $name_param" . "[2] == '$char2') {\n"; 182 } 183 184 my $pointer = '^'; 185 my $have_checked_last = defined ($checked_at) && $len == $checked_at + 1; 186 if ($have_checked_last) { 187 # Checked at the last character, so no need to memEQ it. 188 $pointer = C_stringify (chop $name); 189 $len--; 190 } 191 192 $name = C_stringify ($name); 193 my $memEQ = $self->memEQ(); 194 my $body = $indent . "if ($memEQ($name_param, \"$name\", $len)) {\n"; 195 # Put a little ^ under the letter we checked at 196 # Screws up for non printable and non-7 bit stuff, but that's too hard to 197 # get right. 198 if (defined $checked_at) { 199 $body .= $indent . "/* " . (' ' x length $memEQ) 200 . (' ' x length $name_param) 201 . (' ' x $checked_at) . $pointer 202 . (' ' x ($len - $checked_at + length $len)) . " */\n"; 203 } elsif (defined $front_chop) { 204 $body .= $indent . "/* $front_chop" 205 . (' ' x ($len + 1 + length $len)) . " */\n"; 206 } 207 return $body; 208} 209 210=item dump_names arg_hashref, ITEM... 211 212An internal function to generate the embedded perl code that will regenerate 213the constant subroutines. I<default_type>, I<types> and I<ITEM>s are the 214same as for C_constant. I<indent> is treated as number of spaces to indent 215by. If C<declare_types> is true a C<$types> is always declared in the perl 216code generated, if defined and false never declared, and if undefined C<$types> 217is only declared if the values in I<types> as passed in cannot be inferred from 218I<default_types> and the I<ITEM>s. 219 220=cut 221 222sub dump_names { 223 my ($self, $args, @items) = @_; 224 my ($default_type, $what, $indent, $declare_types) 225 = @{$args}{qw(default_type what indent declare_types)}; 226 $indent = ' ' x ($indent || 0); 227 228 my $result; 229 my (@simple, @complex, %used_types); 230 foreach (@items) { 231 my $type; 232 if (ref $_) { 233 $type = $_->{type} || $default_type; 234 if ($_->{utf8}) { 235 # For simplicity always skip the bytes case, and reconstitute this entry 236 # from its utf8 twin. 237 next if $_->{utf8} eq 'no'; 238 # Copy the hashref, as we don't want to mess with the caller's hashref. 239 $_ = {%$_}; 240 unless (is_perl56) { 241 utf8::decode ($_->{name}); 242 } else { 243 $_->{name} = pack 'U*', unpack 'U0U*', $_->{name}; 244 } 245 delete $_->{utf8}; 246 } 247 } else { 248 $_ = {name=>$_}; 249 $type = $default_type; 250 } 251 $used_types{$type}++; 252 if ($type eq $default_type 253 # grr 5.6.1 254 and length $_->{name} 255 and length $_->{name} == ($_->{name} =~ tr/A-Za-z0-9_//) 256 and !defined ($_->{macro}) and !defined ($_->{value}) 257 and !defined ($_->{default}) and !defined ($_->{pre}) 258 and !defined ($_->{post}) and !defined ($_->{def_pre}) 259 and !defined ($_->{def_post}) and !defined ($_->{weight})) { 260 # It's the default type, and the name consists only of A-Za-z0-9_ 261 push @simple, $_->{name}; 262 } else { 263 push @complex, $_; 264 } 265 } 266 267 if (!defined $declare_types) { 268 # Do they pass in any types we weren't already using? 269 foreach (keys %$what) { 270 next if $used_types{$_}; 271 $declare_types++; # Found one in $what that wasn't used. 272 last; # And one is enough to terminate this loop 273 } 274 } 275 if ($declare_types) { 276 $result = $indent . 'my $types = {map {($_, 1)} qw(' 277 . join (" ", sort keys %$what) . ")};\n"; 278 } 279 local $Text::Wrap::huge = 'overflow'; 280 local $Text::Wrap::columns = 80; 281 $result .= wrap ($indent . "my \@names = (qw(", 282 $indent . " ", join (" ", sort @simple) . ")"); 283 if (@complex) { 284 foreach my $item (sort {$a->{name} cmp $b->{name}} @complex) { 285 my $name = perl_stringify $item->{name}; 286 my $line = ",\n$indent {name=>\"$name\""; 287 $line .= ", type=>\"$item->{type}\"" if defined $item->{type}; 288 foreach my $thing (qw (macro value default pre post def_pre def_post)) { 289 my $value = $item->{$thing}; 290 if (defined $value) { 291 if (ref $value) { 292 $line .= ", $thing=>[\"" 293 . join ('", "', map {perl_stringify $_} @$value) . '"]'; 294 } else { 295 $line .= ", $thing=>\"" . perl_stringify($value) . "\""; 296 } 297 } 298 } 299 $line .= "}"; 300 # Ensure that the enclosing C comment doesn't end 301 # by turning */ into *" . "/ 302 $line =~ s!\*\/!\*" . "/!gs; 303 # gcc -Wall doesn't like finding /* inside a comment 304 $line =~ s!\/\*!/" . "\*!gs; 305 $result .= $line; 306 } 307 } 308 $result .= ");\n"; 309 310 $result; 311} 312 313=item assign arg_hashref, VALUE... 314 315A method to return a suitable assignment clause. If I<type> is aggregate 316(eg I<PVN> expects both pointer and length) then there should be multiple 317I<VALUE>s for the components. I<pre> and I<post> if defined give snippets 318of C code to proceed and follow the assignment. I<pre> will be at the start 319of a block, so variables may be defined in it. 320 321=cut 322# Hmm. value undef to to NOTDEF? value () to do NOTFOUND? 323 324sub assign { 325 my $self = shift; 326 my $args = shift; 327 my ($indent, $type, $pre, $post, $item) 328 = @{$args}{qw(indent type pre post item)}; 329 $post ||= ''; 330 my $clause; 331 my $close; 332 if ($pre) { 333 chomp $pre; 334 $close = "$indent}\n"; 335 $clause = $indent . "{\n"; 336 $indent .= " "; 337 $clause .= "$indent$pre"; 338 $clause .= ";" unless $pre =~ /;$/; 339 $clause .= "\n"; 340 } 341 confess "undef \$type" unless defined $type; 342 confess "Can't generate code for type $type" 343 unless $self->valid_type($type); 344 345 $clause .= join '', map {"$indent$_\n"} 346 $self->assignment_clause_for_type({type=>$type,item=>$item}, @_); 347 chomp $post; 348 if (length $post) { 349 $clause .= "$post"; 350 $clause .= ";" unless $post =~ /;$/; 351 $clause .= "\n"; 352 } 353 my $return = $self->return_statement_for_type($type); 354 $clause .= "$indent$return\n" if defined $return; 355 $clause .= $close if $close; 356 return $clause; 357} 358 359=item return_clause arg_hashref, ITEM 360 361A method to return a suitable C<#ifdef> clause. I<ITEM> is a hashref 362(as passed to C<C_constant> and C<match_clause>. I<indent> is the number 363of spaces to indent, defaulting to 6. 364 365=cut 366 367sub return_clause { 368 369##ifdef thingy 370# *iv_return = thingy; 371# return PERL_constant_ISIV; 372##else 373# return PERL_constant_NOTDEF; 374##endif 375 my ($self, $args, $item) = @_; 376 my $indent = $args->{indent}; 377 378 my ($name, $value, $default, $pre, $post, $def_pre, $def_post, $type) 379 = @$item{qw (name value default pre post def_pre def_post type)}; 380 $value = $name unless defined $value; 381 my $macro = $self->macro_from_item($item); 382 $indent = ' ' x ($indent || 6); 383 unless (defined $type) { 384 # use Data::Dumper; print STDERR Dumper ($item); 385 confess "undef \$type"; 386 } 387 388 ##ifdef thingy 389 my $clause = $self->macro_to_ifdef($macro); 390 391 # *iv_return = thingy; 392 # return PERL_constant_ISIV; 393 $clause 394 .= $self->assign ({indent=>$indent, type=>$type, pre=>$pre, post=>$post, 395 item=>$item}, ref $value ? @$value : $value); 396 397 if (defined $macro && $macro ne "" && $macro ne "1") { 398 ##else 399 $clause .= "#else\n"; 400 401 # return PERL_constant_NOTDEF; 402 if (!defined $default) { 403 my $notdef = $self->return_statement_for_notdef(); 404 $clause .= "$indent$notdef\n" if defined $notdef; 405 } else { 406 my @default = ref $default ? @$default : $default; 407 $type = shift @default; 408 $clause .= $self->assign ({indent=>$indent, type=>$type, pre=>$pre, 409 post=>$post, item=>$item}, @default); 410 } 411 } 412 ##endif 413 $clause .= $self->macro_to_endif($macro); 414 415 return $clause; 416} 417 418sub match_clause { 419 # $offset defined if we have checked an offset. 420 my ($self, $args, $item) = @_; 421 my ($offset, $indent) = @{$args}{qw(checked_at indent)}; 422 $indent = ' ' x ($indent || 4); 423 my $body = ''; 424 my ($no, $yes, $either, $name, $inner_indent); 425 if (ref $item eq 'ARRAY') { 426 ($yes, $no) = @$item; 427 $either = $yes || $no; 428 confess "$item is $either expecting hashref in [0] || [1]" 429 unless ref $either eq 'HASH'; 430 $name = $either->{name}; 431 } else { 432 confess "$item->{name} has utf8 flag '$item->{utf8}', should be false" 433 if $item->{utf8}; 434 $name = $item->{name}; 435 $inner_indent = $indent; 436 } 437 438 $body .= $self->memEQ_clause ({name => $name, checked_at => $offset, 439 indent => length $indent}); 440 # If we've been presented with an arrayref for $item, then the user string 441 # contains in the range 128-255, and we need to check whether it was utf8 442 # (or not). 443 # In the worst case we have two named constants, where one's name happens 444 # encoded in UTF8 happens to be the same byte sequence as the second's 445 # encoded in (say) ISO-8859-1. 446 # In this case, $yes and $no both have item hashrefs. 447 if ($yes) { 448 $body .= $indent . " if (" . $self->is_utf8_param . ") {\n"; 449 } elsif ($no) { 450 $body .= $indent . " if (!" . $self->is_utf8_param . ") {\n"; 451 } 452 if ($either) { 453 $body .= $self->return_clause ({indent=>4 + length $indent}, $either); 454 if ($yes and $no) { 455 $body .= $indent . " } else {\n"; 456 $body .= $self->return_clause ({indent=>4 + length $indent}, $no); 457 } 458 $body .= $indent . " }\n"; 459 } else { 460 $body .= $self->return_clause ({indent=>2 + length $indent}, $item); 461 } 462 $body .= $indent . "}\n"; 463} 464 465 466=item switch_clause arg_hashref, NAMELEN, ITEMHASH, ITEM... 467 468An internal method to generate a suitable C<switch> clause, called by 469C<C_constant> I<ITEM>s are in the hash ref format as given in the description 470of C<C_constant>, and must all have the names of the same length, given by 471I<NAMELEN>. I<ITEMHASH> is a reference to a hash, keyed by name, values being 472the hashrefs in the I<ITEM> list. (No parameters are modified, and there can 473be keys in the I<ITEMHASH> that are not in the list of I<ITEM>s without 474causing problems - the hash is passed in to save generating it afresh for 475each call). 476 477=cut 478 479sub switch_clause { 480 my ($self, $args, $namelen, $items, @items) = @_; 481 my ($indent, $comment) = @{$args}{qw(indent comment)}; 482 $indent = ' ' x ($indent || 2); 483 484 local $Text::Wrap::huge = 'overflow'; 485 local $Text::Wrap::columns = 80; 486 487 my @names = sort map {$_->{name}} @items; 488 my $leader = $indent . '/* '; 489 my $follower = ' ' x length $leader; 490 my $body = $indent . "/* Names all of length $namelen. */\n"; 491 if (defined $comment) { 492 $body = wrap ($leader, $follower, $comment) . "\n"; 493 $leader = $follower; 494 } 495 my @safe_names = @names; 496 foreach (@safe_names) { 497 confess sprintf "Name '$_' is length %d, not $namelen", length 498 unless length == $namelen; 499 # Argh. 5.6.1 500 # next unless tr/A-Za-z0-9_//c; 501 next if tr/A-Za-z0-9_// == length; 502 $_ = '"' . perl_stringify ($_) . '"'; 503 # Ensure that the enclosing C comment doesn't end 504 # by turning */ into *" . "/ 505 s!\*\/!\*"."/!gs; 506 # gcc -Wall doesn't like finding /* inside a comment 507 s!\/\*!/"."\*!gs; 508 } 509 $body .= wrap ($leader, $follower, join (" ", @safe_names) . " */") . "\n"; 510 # Figure out what to switch on. 511 # (RMS, Spread of jump table, Position, Hashref) 512 my @best = (1e38, ~0); 513 # Prefer the last character over the others. (As it lets us shorten the 514 # memEQ clause at no cost). 515 foreach my $i ($namelen - 1, 0 .. ($namelen - 2)) { 516 my ($min, $max) = (~0, 0); 517 my %spread; 518 if (is_perl56) { 519 # Need proper Unicode preserving hash keys for bytes in range 128-255 520 # here too, for some reason. grr 5.6.1 yet again. 521 tie %spread, 'ExtUtils::Constant::Aaargh56Hash'; 522 } 523 foreach (@names) { 524 my $char = substr $_, $i, 1; 525 my $ord = ord $char; 526 confess "char $ord is out of range" if $ord > 255; 527 $max = $ord if $ord > $max; 528 $min = $ord if $ord < $min; 529 push @{$spread{$char}}, $_; 530 # warn "$_ $char"; 531 } 532 # I'm going to pick the character to split on that minimises the root 533 # mean square of the number of names in each case. Normally this should 534 # be the one with the most keys, but it may pick a 7 where the 8 has 535 # one long linear search. I'm not sure if RMS or just sum of squares is 536 # actually better. 537 # $max and $min are for the tie-breaker if the root mean squares match. 538 # Assuming that the compiler may be building a jump table for the 539 # switch() then try to minimise the size of that jump table. 540 # Finally use < not <= so that if it still ties the earliest part of 541 # the string wins. Because if that passes but the memEQ fails, it may 542 # only need the start of the string to bin the choice. 543 # I think. But I'm micro-optimising. :-) 544 # OK. Trump that. Now favour the last character of the string, before the 545 # rest. 546 my $ss; 547 $ss += @$_ * @$_ foreach values %spread; 548 my $rms = sqrt ($ss / keys %spread); 549 if ($rms < $best[0] || ($rms == $best[0] && ($max - $min) < $best[1])) { 550 @best = ($rms, $max - $min, $i, \%spread); 551 } 552 } 553 confess "Internal error. Failed to pick a switch point for @names" 554 unless defined $best[2]; 555 # use Data::Dumper; print Dumper (@best); 556 my ($offset, $best) = @best[2,3]; 557 $body .= $indent . "/* Offset $offset gives the best switch position. */\n"; 558 559 my $do_front_chop = $offset == 0 && $namelen > 2; 560 if ($do_front_chop) { 561 $body .= $indent . "switch (*" . $self->name_param() . "++) {\n"; 562 } else { 563 $body .= $indent . "switch (" . $self->name_param() . "[$offset]) {\n"; 564 } 565 foreach my $char (sort keys %$best) { 566 confess sprintf "'$char' is %d bytes long, not 1", length $char 567 if length ($char) != 1; 568 confess sprintf "char %#X is out of range", ord $char if ord ($char) > 255; 569 $body .= $indent . "case '" . C_stringify ($char) . "':\n"; 570 foreach my $thisone (sort { 571 # Deal with the case of an item actually being an array ref to 1 or 2 572 # hashrefs. Don't assign to $a or $b, as they're aliases to the orignal 573 my $l = ref $a eq 'ARRAY' ? ($a->[0] || $->[1]) : $a; 574 my $r = ref $b eq 'ARRAY' ? ($b->[0] || $->[1]) : $b; 575 # Sort by weight first 576 ($r->{weight} || 0) <=> ($l->{weight} || 0) 577 # Sort equal weights by name 578 or $l->{name} cmp $r->{name}} 579 # If this looks evil, maybe it is. $items is a 580 # hashref, and we're doing a hash slice on it 581 @{$items}{@{$best->{$char}}}) { 582 # warn "You are here"; 583 if ($do_front_chop) { 584 $body .= $self->match_clause ({indent => 2 + length $indent, 585 checked_at => \$char}, $thisone); 586 } else { 587 $body .= $self->match_clause ({indent => 2 + length $indent, 588 checked_at => $offset}, $thisone); 589 } 590 } 591 $body .= $indent . " break;\n"; 592 } 593 $body .= $indent . "}\n"; 594 return $body; 595} 596 597sub C_constant_return_type { 598 "static int"; 599} 600 601sub C_constant_prefix_param { 602 ''; 603} 604 605sub C_constant_prefix_param_defintion { 606 ''; 607} 608 609sub name_param_definition { 610 "const char *" . $_[0]->name_param; 611} 612 613sub namelen_param { 614 'len'; 615} 616 617sub namelen_param_definition { 618 'size_t ' . $_[0]->namelen_param; 619} 620 621sub C_constant_other_params { 622 ''; 623} 624 625sub C_constant_other_params_defintion { 626 ''; 627} 628 629=item params WHAT 630 631An "internal" method, subject to change, currently called to allow an 632overriding class to cache information that will then be passed into all 633the C<*param*> calls. (Yes, having to read the source to make sense of this is 634considered a known bug). I<WHAT> is be a hashref of types the constant 635function will return. In ExtUtils::Constant::XS this method is used to 636returns a hashref keyed IV NV PV SV to show which combination of pointers will 637be needed in the C argument list generated by 638C_constant_other_params_definition and C_constant_other_params 639 640=cut 641 642sub params { 643 ''; 644} 645 646 647=item dogfood arg_hashref, ITEM... 648 649An internal function to generate the embedded perl code that will regenerate 650the constant subroutines. Parameters are the same as for C_constant. 651 652Currently the base class does nothing and returns an empty string. 653 654=cut 655 656sub dogfood { 657 '' 658} 659 660=item normalise_items args, default_type, seen_types, seen_items, ITEM... 661 662Convert the items to a normalised form. For 8 bit and Unicode values converts 663the item to an array of 1 or 2 items, both 8 bit and UTF-8 encoded. 664 665=cut 666 667sub normalise_items 668{ 669 my $self = shift; 670 my $args = shift; 671 my $default_type = shift; 672 my $what = shift; 673 my $items = shift; 674 my @new_items; 675 foreach my $orig (@_) { 676 my ($name, $item); 677 if (ref $orig) { 678 # Make a copy which is a normalised version of the ref passed in. 679 $name = $orig->{name}; 680 my ($type, $macro, $value) = @$orig{qw (type macro value)}; 681 $type ||= $default_type; 682 $what->{$type} = 1; 683 $item = {name=>$name, type=>$type}; 684 685 undef $macro if defined $macro and $macro eq $name; 686 $item->{macro} = $macro if defined $macro; 687 undef $value if defined $value and $value eq $name; 688 $item->{value} = $value if defined $value; 689 foreach my $key (qw(default pre post def_pre def_post weight 690 not_constant)) { 691 my $value = $orig->{$key}; 692 $item->{$key} = $value if defined $value; 693 # warn "$key $value"; 694 } 695 } else { 696 $name = $orig; 697 $item = {name=>$name, type=>$default_type}; 698 $what->{$default_type} = 1; 699 } 700 warn +(ref ($self) || $self) 701 . "doesn't know how to handle values of type $_ used in macro $name" 702 unless $self->valid_type ($item->{type}); 703 # tr///c is broken on 5.6.1 for utf8, so my original tr/\0-\177//c 704 # doesn't work. Upgrade to 5.8 705 # if ($name !~ tr/\0-\177//c || $] < 5.005_50) { 706 if ($name =~ tr/\0-\177// == length $name || $] < 5.005_50 707 || $args->{disable_utf8_duplication}) { 708 # No characters outside 7 bit ASCII. 709 if (exists $items->{$name}) { 710 die "Multiple definitions for macro $name"; 711 } 712 $items->{$name} = $item; 713 } else { 714 # No characters outside 8 bit. This is hardest. 715 if (exists $items->{$name} and ref $items->{$name} ne 'ARRAY') { 716 confess "Unexpected ASCII definition for macro $name"; 717 } 718 # Again, 5.6.1 tr broken, so s/5\.6.*/5\.8\.0/; 719 # if ($name !~ tr/\0-\377//c) { 720 if ($name =~ tr/\0-\377// == length $name) { 721# if ($] < 5.007) { 722# $name = pack "C*", unpack "U*", $name; 723# } 724 $item->{utf8} = 'no'; 725 $items->{$name}[1] = $item; 726 push @new_items, $item; 727 # Copy item, to create the utf8 variant. 728 $item = {%$item}; 729 } 730 # Encode the name as utf8 bytes. 731 unless (is_perl56) { 732 utf8::encode($name); 733 } else { 734# warn "Was >$name< " . length ${name}; 735 $name = pack 'C*', unpack 'C*', $name . pack 'U*'; 736# warn "Now '${name}' " . length ${name}; 737 } 738 if ($items->{$name}[0]) { 739 die "Multiple definitions for macro $name"; 740 } 741 $item->{utf8} = 'yes'; 742 $item->{name} = $name; 743 $items->{$name}[0] = $item; 744 # We have need for the utf8 flag. 745 $what->{''} = 1; 746 } 747 push @new_items, $item; 748 } 749 @new_items; 750} 751 752=item C_constant arg_hashref, ITEM... 753 754A function that returns a B<list> of C subroutine definitions that return 755the value and type of constants when passed the name by the XS wrapper. 756I<ITEM...> gives a list of constant names. Each can either be a string, 757which is taken as a C macro name, or a reference to a hash with the following 758keys 759 760=over 8 761 762=item name 763 764The name of the constant, as seen by the perl code. 765 766=item type 767 768The type of the constant (I<IV>, I<NV> etc) 769 770=item value 771 772A C expression for the value of the constant, or a list of C expressions if 773the type is aggregate. This defaults to the I<name> if not given. 774 775=item macro 776 777The C pre-processor macro to use in the C<#ifdef>. This defaults to the 778I<name>, and is mainly used if I<value> is an C<enum>. If a reference an 779array is passed then the first element is used in place of the C<#ifdef> 780line, and the second element in place of the C<#endif>. This allows 781pre-processor constructions such as 782 783 #if defined (foo) 784 #if !defined (bar) 785 ... 786 #endif 787 #endif 788 789to be used to determine if a constant is to be defined. 790 791A "macro" 1 signals that the constant is always defined, so the C<#if>/C<#endif> 792test is omitted. 793 794=item default 795 796Default value to use (instead of C<croak>ing with "your vendor has not 797defined...") to return if the macro isn't defined. Specify a reference to 798an array with type followed by value(s). 799 800=item pre 801 802C code to use before the assignment of the value of the constant. This allows 803you to use temporary variables to extract a value from part of a C<struct> 804and return this as I<value>. This C code is places at the start of a block, 805so you can declare variables in it. 806 807=item post 808 809C code to place between the assignment of value (to a temporary) and the 810return from the function. This allows you to clear up anything in I<pre>. 811Rarely needed. 812 813=item def_pre 814 815=item def_post 816 817Equivalents of I<pre> and I<post> for the default value. 818 819=item utf8 820 821Generated internally. Is zero or undefined if name is 7 bit ASCII, 822"no" if the name is 8 bit (and so should only match if SvUTF8() is false), 823"yes" if the name is utf8 encoded. 824 825The internals automatically clone any name with characters 128-255 but none 826256+ (ie one that could be either in bytes or utf8) into a second entry 827which is utf8 encoded. 828 829=item weight 830 831Optional sorting weight for names, to determine the order of 832linear testing when multiple names fall in the same case of a switch clause. 833Higher comes earlier, undefined defaults to zero. 834 835=back 836 837In the argument hashref, I<package> is the name of the package, and is only 838used in comments inside the generated C code. I<subname> defaults to 839C<constant> if undefined. 840 841I<default_type> is the type returned by C<ITEM>s that don't specify their 842type. It defaults to the value of C<default_type()>. I<types> should be given 843either as a comma separated list of types that the C subroutine I<subname> 844will generate or as a reference to a hash. I<default_type> will be added to 845the list if not present, as will any types given in the list of I<ITEM>s. The 846resultant list should be the same list of types that C<XS_constant> is 847given. [Otherwise C<XS_constant> and C<C_constant> may differ in the number of 848parameters to the constant function. I<indent> is currently unused and 849ignored. In future it may be used to pass in information used to change the C 850indentation style used.] The best way to maintain consistency is to pass in a 851hash reference and let this function update it. 852 853I<breakout> governs when child functions of I<subname> are generated. If there 854are I<breakout> or more I<ITEM>s with the same length of name, then the code 855to switch between them is placed into a function named I<subname>_I<len>, for 856example C<constant_5> for names 5 characters long. The default I<breakout> is 8573. A single C<ITEM> is always inlined. 858 859=cut 860 861# The parameter now BREAKOUT was previously documented as: 862# 863# I<NAMELEN> if defined signals that all the I<name>s of the I<ITEM>s are of 864# this length, and that the constant name passed in by perl is checked and 865# also of this length. It is used during recursion, and should be C<undef> 866# unless the caller has checked all the lengths during code generation, and 867# the generated subroutine is only to be called with a name of this length. 868# 869# As you can see it now performs this function during recursion by being a 870# scalar reference. 871 872sub C_constant { 873 my ($self, $args, @items) = @_; 874 my ($package, $subname, $default_type, $what, $indent, $breakout) = 875 @{$args}{qw(package subname default_type types indent breakout)}; 876 $package ||= 'Foo'; 877 $subname ||= 'constant'; 878 # I'm not using this. But a hashref could be used for full formatting without 879 # breaking this API 880 # $indent ||= 0; 881 882 my ($namelen, $items); 883 if (ref $breakout) { 884 # We are called recursively. We trust @items to be normalised, $what to 885 # be a hashref, and pinch %$items from our parent to save recalculation. 886 ($namelen, $items) = @$breakout; 887 } else { 888 $items = {}; 889 if (is_perl56) { 890 # Need proper Unicode preserving hash keys. 891 require ExtUtils::Constant::Aaargh56Hash; 892 tie %$items, 'ExtUtils::Constant::Aaargh56Hash'; 893 } 894 $breakout ||= 3; 895 $default_type ||= $self->default_type(); 896 if (!ref $what) { 897 # Convert line of the form IV,UV,NV to hash 898 $what = {map {$_ => 1} split /,\s*/, ($what || '')}; 899 # Figure out what types we're dealing with, and assign all unknowns to the 900 # default type 901 } 902 @items = $self->normalise_items ({}, $default_type, $what, $items, @items); 903 # use Data::Dumper; print Dumper @items; 904 } 905 my $params = $self->params ($what); 906 907 # Probably "static int" 908 my ($body, @subs); 909 $body = $self->C_constant_return_type($params) . "\n$subname (" 910 # Eg "pTHX_ " 911 . $self->C_constant_prefix_param_defintion($params) 912 # Probably "const char *name" 913 . $self->name_param_definition($params); 914 # Something like ", STRLEN len" 915 $body .= ", " . $self->namelen_param_definition($params) 916 unless defined $namelen; 917 $body .= $self->C_constant_other_params_defintion($params); 918 $body .= ") {\n"; 919 920 if (defined $namelen) { 921 # We are a child subroutine. Print the simple description 922 my $comment = 'When generated this function returned values for the list' 923 . ' of names given here. However, subsequent manual editing may have' 924 . ' added or removed some.'; 925 $body .= $self->switch_clause ({indent=>2, comment=>$comment}, 926 $namelen, $items, @items); 927 } else { 928 # We are the top level. 929 $body .= " /* Initially switch on the length of the name. */\n"; 930 $body .= $self->dogfood ({package => $package, subname => $subname, 931 default_type => $default_type, what => $what, 932 indent => $indent, breakout => $breakout}, 933 @items); 934 $body .= ' switch ('.$self->namelen_param().") {\n"; 935 # Need to group names of the same length 936 my @by_length; 937 foreach (@items) { 938 push @{$by_length[length $_->{name}]}, $_; 939 } 940 foreach my $i (0 .. $#by_length) { 941 next unless $by_length[$i]; # None of this length 942 $body .= " case $i:\n"; 943 if (@{$by_length[$i]} == 1) { 944 my $only_thing = $by_length[$i]->[0]; 945 if ($only_thing->{utf8}) { 946 if ($only_thing->{utf8} eq 'yes') { 947 # With utf8 on flag item is passed in element 0 948 $body .= $self->match_clause (undef, [$only_thing]); 949 } else { 950 # With utf8 off flag item is passed in element 1 951 $body .= $self->match_clause (undef, [undef, $only_thing]); 952 } 953 } else { 954 $body .= $self->match_clause (undef, $only_thing); 955 } 956 } elsif (@{$by_length[$i]} < $breakout) { 957 $body .= $self->switch_clause ({indent=>4}, 958 $i, $items, @{$by_length[$i]}); 959 } else { 960 # Only use the minimal set of parameters actually needed by the types 961 # of the names of this length. 962 my $what = {}; 963 foreach (@{$by_length[$i]}) { 964 $what->{$_->{type}} = 1; 965 $what->{''} = 1 if $_->{utf8}; 966 } 967 $params = $self->params ($what); 968 push @subs, $self->C_constant ({package=>$package, 969 subname=>"${subname}_$i", 970 default_type => $default_type, 971 types => $what, indent => $indent, 972 breakout => [$i, $items]}, 973 @{$by_length[$i]}); 974 $body .= " return ${subname}_$i (" 975 # Eg "aTHX_ " 976 . $self->C_constant_prefix_param($params) 977 # Probably "name" 978 . $self->name_param($params); 979 $body .= $self->C_constant_other_params($params); 980 $body .= ");\n"; 981 } 982 $body .= " break;\n"; 983 } 984 $body .= " }\n"; 985 } 986 my $notfound = $self->return_statement_for_notfound(); 987 $body .= " $notfound\n" if $notfound; 988 $body .= "}\n"; 989 return (@subs, $body); 990} 991 9921; 993__END__ 994 995=back 996 997=head1 BUGS 998 999Not everything is documented yet. 1000 1001Probably others. 1002 1003=head1 AUTHOR 1004 1005Nicholas Clark <nick@ccl4.org> based on the code in C<h2xs> by Larry Wall and 1006others 1007