xref: /openbsd-src/gnu/usr.bin/perl/lib/ExtUtils/XSSymSet.pm (revision 5759b3d249badf144a6240f7eec4dcf9df003e6b)
143003dfeSmillertpackage ExtUtils::XSSymSet;
243003dfeSmillert
343003dfeSmillertuse strict;
4898184e3Ssthenuse Config;
5*5759b3d2Safresh1our $VERSION = '1.4';
643003dfeSmillert
743003dfeSmillert
843003dfeSmillertsub new {
943003dfeSmillert  my($pkg,$maxlen,$silent) = @_;
1043003dfeSmillert  $maxlen ||= 31;
11898184e3Ssthen  # Allow absurdly long symbols here if we've told the compiler to
12898184e3Ssthen  # do the shortening for us.
13898184e3Ssthen  $maxlen = 2048 if $Config{'useshortenedsymbols'};
1443003dfeSmillert  $silent ||= 0;
1543003dfeSmillert  my($obj) = { '__M@xLen' => $maxlen, '__S!lent' => $silent };
1643003dfeSmillert  bless $obj, $pkg;
1743003dfeSmillert}
1843003dfeSmillert
1943003dfeSmillert
2043003dfeSmillertsub trimsym {
2143003dfeSmillert  my($self,$name,$maxlen,$silent) = @_;
2243003dfeSmillert
2343003dfeSmillert  unless (defined $maxlen) {
2443003dfeSmillert    if (ref $self) { $maxlen ||= $self->{'__M@xLen'}; }
2543003dfeSmillert    $maxlen ||= 31;
2643003dfeSmillert  }
27898184e3Ssthen  $maxlen = 2048 if $Config{'useshortenedsymbols'};
28898184e3Ssthen
2943003dfeSmillert  unless (defined $silent) {
3043003dfeSmillert    if (ref $self) { $silent ||= $self->{'__S!lent'}; }
3143003dfeSmillert    $silent ||= 0;
3243003dfeSmillert  }
3343003dfeSmillert  return $name if (length $name <= $maxlen);
3443003dfeSmillert
3543003dfeSmillert  my $trimmed = $name;
3643003dfeSmillert  # First, just try to remove duplicated delimiters
3743003dfeSmillert  $trimmed =~ s/__/_/g;
3843003dfeSmillert  if (length $trimmed > $maxlen) {
3943003dfeSmillert    # Next, all duplicated chars
4043003dfeSmillert    $trimmed =~ s/(.)\1+/$1/g;
4143003dfeSmillert    if (length $trimmed > $maxlen) {
4243003dfeSmillert      my $squeezed = $trimmed;
4343003dfeSmillert      my($xs,$prefix,$func) = $trimmed =~ /^(XS_)?(.*)_([^_]*)$/;
4443003dfeSmillert      $xs ||= '';
4543003dfeSmillert      my $frac = 3; # replaces broken length-based calculations but w/same result
4643003dfeSmillert      my $pat = '([^_])';
4743003dfeSmillert      if (length $func <= 12) {  # Try to preserve short function names
4843003dfeSmillert        if ($frac > 1) { $pat .= '[^A-Z_]{' . ($frac - 1) . '}'; }
4943003dfeSmillert        $prefix =~ s/$pat/$1/g;
5043003dfeSmillert        $squeezed = "$xs$prefix" . "_$func";
5143003dfeSmillert        if (length $squeezed > $maxlen) {
5243003dfeSmillert          $pat =~ s/A-Z//;
5343003dfeSmillert          $prefix =~ s/$pat/$1/g;
5443003dfeSmillert          $squeezed = "$xs$prefix" . "_$func";
5543003dfeSmillert        }
5643003dfeSmillert      }
5743003dfeSmillert      else {
5843003dfeSmillert        if ($frac > 1) { $pat .= '[^A-Z_]{' . ($frac - 1) . '}'; }
5943003dfeSmillert        $squeezed = "$prefix$func";
6043003dfeSmillert        $squeezed =~ s/$pat/$1/g;
6143003dfeSmillert        if (length "$xs$squeezed" > $maxlen) {
6243003dfeSmillert          $pat =~ s/A-Z//;
6343003dfeSmillert          $squeezed =~ s/$pat/$1/g;
6443003dfeSmillert        }
6543003dfeSmillert        $squeezed = "$xs$squeezed";
6643003dfeSmillert      }
6743003dfeSmillert      if (length $squeezed <= $maxlen) { $trimmed = $squeezed; }
6843003dfeSmillert      else {
6943003dfeSmillert        my $frac = int((length $trimmed - $maxlen) / length $trimmed + 0.5);
7043003dfeSmillert        my $pat = '(.).{$frac}';
7143003dfeSmillert        $trimmed =~ s/$pat/$1/g;
7243003dfeSmillert      }
7343003dfeSmillert    }
7443003dfeSmillert  }
7543003dfeSmillert  warn "Warning: long symbol $name\n\ttrimmed to $trimmed\n\t" unless $silent;
7643003dfeSmillert  return $trimmed;
7743003dfeSmillert}
7843003dfeSmillert
7943003dfeSmillert
8043003dfeSmillertsub addsym {
8143003dfeSmillert  my($self,$sym,$maxlen,$silent) = @_;
8243003dfeSmillert  my $trimmed = $self->get_trimmed($sym);
8343003dfeSmillert
8443003dfeSmillert  return $trimmed if defined $trimmed;
8543003dfeSmillert
8643003dfeSmillert  $maxlen ||= $self->{'__M@xLen'} || 31;
8743003dfeSmillert  $silent ||= $self->{'__S!lent'} || 0;
8843003dfeSmillert  $trimmed = $self->trimsym($sym,$maxlen,1);
8943003dfeSmillert  if (exists $self->{$trimmed}) {
9043003dfeSmillert    my($i) = "00";
9143003dfeSmillert    $trimmed = $self->trimsym($sym,$maxlen-3,$silent);
9243003dfeSmillert    while (exists $self->{"${trimmed}_$i"}) { $i++; }
9343003dfeSmillert    warn "Warning: duplicate symbol $trimmed\n\tchanged to ${trimmed}_$i\n\t(original was $sym)\n\t"
9443003dfeSmillert      unless $silent;
9543003dfeSmillert    $trimmed .= "_$i";
9643003dfeSmillert  }
9743003dfeSmillert  elsif (not $silent and $trimmed ne $sym) {
9843003dfeSmillert    warn "Warning: long symbol $sym\n\ttrimmed to $trimmed\n\t";
9943003dfeSmillert  }
10043003dfeSmillert  $self->{$trimmed} = $sym;
10143003dfeSmillert  $self->{'__N+Map'}->{$sym} = $trimmed;
10243003dfeSmillert  $trimmed;
10343003dfeSmillert}
10443003dfeSmillert
10543003dfeSmillert
10643003dfeSmillertsub delsym {
10743003dfeSmillert  my($self,$sym) = @_;
10843003dfeSmillert  my $trimmed = $self->{'__N+Map'}->{$sym};
10943003dfeSmillert  if (defined $trimmed) {
11043003dfeSmillert    delete $self->{'__N+Map'}->{$sym};
11143003dfeSmillert    delete $self->{$trimmed};
11243003dfeSmillert  }
11343003dfeSmillert  $trimmed;
11443003dfeSmillert}
11543003dfeSmillert
11643003dfeSmillert
11743003dfeSmillertsub get_trimmed {
11843003dfeSmillert  my($self,$sym) = @_;
11943003dfeSmillert  $self->{'__N+Map'}->{$sym};
12043003dfeSmillert}
12143003dfeSmillert
12243003dfeSmillert
12343003dfeSmillertsub get_orig {
12443003dfeSmillert  my($self,$trimmed) = @_;
12543003dfeSmillert  $self->{$trimmed};
12643003dfeSmillert}
12743003dfeSmillert
12843003dfeSmillert
12943003dfeSmillertsub all_orig { (keys %{$_[0]->{'__N+Map'}}); }
13043003dfeSmillertsub all_trimmed { (grep { /^\w+$/ } keys %{$_[0]}); }
13143003dfeSmillert
13243003dfeSmillert__END__
13343003dfeSmillert
13443003dfeSmillert=head1 NAME
13543003dfeSmillert
13643003dfeSmillertExtUtils::XSSymSet - keep sets of symbol names palatable to the VMS linker
13743003dfeSmillert
13843003dfeSmillert=head1 SYNOPSIS
13943003dfeSmillert
14043003dfeSmillert  use ExtUtils::XSSymSet;
14143003dfeSmillert
14243003dfeSmillert  $set = new ExtUtils::XSSymSet;
14343003dfeSmillert  while ($sym = make_symbol()) { $set->addsym($sym); }
14443003dfeSmillert  foreach $safesym ($set->all_trimmed) {
1456fb12b70Safresh1    print "Processing $safesym (derived from ",
1466fb12b70Safresh1        $self->get_orig($safesym), ")\n";
14743003dfeSmillert    do_stuff($safesym);
14843003dfeSmillert  }
14943003dfeSmillert
15043003dfeSmillert  $safesym = ExtUtils::XSSymSet->trimsym($onesym);
15143003dfeSmillert
15243003dfeSmillert=head1 DESCRIPTION
15343003dfeSmillert
15443003dfeSmillertSince the VMS linker distinguishes symbols based only on the first 31
15543003dfeSmillertcharacters of their names, it is occasionally necessary to shorten
15643003dfeSmillertsymbol names in order to avoid collisions.  (This is especially true of
15743003dfeSmillertnames generated by xsubpp, since prefixes generated by nested package
15843003dfeSmillertnames can become quite long.)  C<ExtUtils::XSSymSet> provides functions to
15943003dfeSmillertshorten names in a consistent fashion, and to track a set of names to
16043003dfeSmillertinsure that each is unique.  While designed with F<xsubpp> in mind, it
16143003dfeSmillertmay be used with any set of strings.
16243003dfeSmillert
16343003dfeSmillertThis package supplies the following functions, all of which should be
16443003dfeSmillertcalled as methods.
16543003dfeSmillert
16643003dfeSmillert=over 4
16743003dfeSmillert
16843003dfeSmillert=item new([$maxlen[,$silent]])
16943003dfeSmillert
17043003dfeSmillertCreates an empty C<ExtUtils::XSSymset> set of symbols.  This function may be
17143003dfeSmillertcalled as a static method or via an existing object.  If C<$maxlen> or
17243003dfeSmillertC<$silent> are specified, they are used as the defaults for maximum
17343003dfeSmillertname length and warning behavior in future calls to addsym() or
174898184e3Ssthentrimsym() via this object.  If the compiler has been instructed to do its
175898184e3Ssthenown symbol shortening via C<$Config{'useshortenedsymbols'}>, a value of
176898184e3Ssthen2048 is assumed for C<$maxlen> as a way of bypassing the shortening done by
177898184e3Ssthenthis module.
17843003dfeSmillert
17943003dfeSmillert=item addsym($name[,$maxlen[,$silent]])
18043003dfeSmillert
18143003dfeSmillertCreates a symbol name from C<$name>, using the methods described
18243003dfeSmillertunder trimsym(), which is unique in this set of symbols, and returns
18343003dfeSmillertthe new name.  C<$name> and its resultant are added to the set, and
18443003dfeSmillertany future calls to addsym() specifying the same C<$name> will return
18543003dfeSmillertthe same result, regardless of the value of C<$maxlen> specified.
18643003dfeSmillertUnless C<$silent> is true, warnings are output if C<$name> had to be
18743003dfeSmillerttrimmed or changed in order to avoid collision with an existing symbol
18843003dfeSmillertname.  C<$maxlen> and C<$silent> default to the values specified when
18943003dfeSmillertthis set of symbols was created.  This method must be called via an
19043003dfeSmillertexisting object.
19143003dfeSmillert
19243003dfeSmillert=item trimsym($name[,$maxlen[,$silent]])
19343003dfeSmillert
19443003dfeSmillertCreates a symbol name C<$maxlen> or fewer characters long from
19543003dfeSmillertC<$name> and returns it. If C<$name> is too long, it first tries to
19643003dfeSmillertshorten it by removing duplicate characters, then by periodically
19743003dfeSmillertremoving non-underscore characters, and finally, if necessary, by
19843003dfeSmillertperiodically removing characters of any type.  C<$maxlen> defaults
19943003dfeSmillertto 31.  Unless C<$silent> is true, a warning is output if C<$name>
20043003dfeSmillertis altered in any way.  This function may be called either as a
20143003dfeSmillertstatic method or via an existing object, but in the latter case no
20243003dfeSmillertcheck is made to insure that the resulting name is unique in the
203898184e3Ssthenset of symbols.    If the compiler has been instructed to do its
204898184e3Ssthenown symbol shortening via C<$Config{'useshortenedsymbols'}>, a value
205898184e3Ssthenof 2048 is assumed for C<$maxlen> as a way of bypassing the shortening
206898184e3Ssthendone by this module.
20743003dfeSmillert
20843003dfeSmillert=item delsym($name)
20943003dfeSmillert
21043003dfeSmillertRemoves C<$name> from the set of symbols, where C<$name> is the
21143003dfeSmillertoriginal symbol name passed previously to addsym().  If C<$name>
21243003dfeSmillertexisted in the set of symbols, returns its "trimmed" equivalent,
21343003dfeSmillertotherwise returns C<undef>.  This method must be called via an
21443003dfeSmillertexisting object.
21543003dfeSmillert
21643003dfeSmillert=item get_orig($trimmed)
21743003dfeSmillert
21843003dfeSmillertReturns the original name which was trimmed to C<$trimmed> by a
21943003dfeSmillertprevious call to addsym(), or C<undef> if C<$trimmed> does not
22043003dfeSmillertcorrespond to a member of this set of symbols.  This method must be
22143003dfeSmillertcalled via an existing object.
22243003dfeSmillert
22343003dfeSmillert=item get_trimmed($name)
22443003dfeSmillert
22543003dfeSmillertReturns the trimmed name which was generated from C<$name> by a
22643003dfeSmillertprevious call to addsym(), or C<undef> if C<$name> is not a member
22743003dfeSmillertof this set of symbols.  This method must be called via an
22843003dfeSmillertexisting object.
22943003dfeSmillert
23043003dfeSmillert=item all_orig()
23143003dfeSmillert
23243003dfeSmillertReturns a list containing all of the original symbol names
23343003dfeSmillertfrom this set.
23443003dfeSmillert
23543003dfeSmillert=item all_trimmed()
23643003dfeSmillert
23743003dfeSmillertReturns a list containing all of the trimmed symbol names
23843003dfeSmillertfrom this set.
23943003dfeSmillert
24043003dfeSmillert=back
24143003dfeSmillert
24243003dfeSmillert=head1 AUTHOR
24343003dfeSmillert
24443003dfeSmillertCharles Bailey  E<lt>I<bailey@newman.upenn.edu>E<gt>
24543003dfeSmillert
24643003dfeSmillert=head1 REVISION
24743003dfeSmillert
248898184e3SsthenLast revised 8-Oct-2010, for Perl 5.13.6.
24943003dfeSmillert
250