xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/lib/Search/Dict.pm (revision 0:68f95e015346)
1*0Sstevel@tonic-gatepackage Search::Dict;
2*0Sstevel@tonic-gaterequire 5.000;
3*0Sstevel@tonic-gaterequire Exporter;
4*0Sstevel@tonic-gate
5*0Sstevel@tonic-gateuse strict;
6*0Sstevel@tonic-gate
7*0Sstevel@tonic-gateour $VERSION = '1.02';
8*0Sstevel@tonic-gateour @ISA = qw(Exporter);
9*0Sstevel@tonic-gateour @EXPORT = qw(look);
10*0Sstevel@tonic-gate
11*0Sstevel@tonic-gate=head1 NAME
12*0Sstevel@tonic-gate
13*0Sstevel@tonic-gateSearch::Dict, look - search for key in dictionary file
14*0Sstevel@tonic-gate
15*0Sstevel@tonic-gate=head1 SYNOPSIS
16*0Sstevel@tonic-gate
17*0Sstevel@tonic-gate    use Search::Dict;
18*0Sstevel@tonic-gate    look *FILEHANDLE, $key, $dict, $fold;
19*0Sstevel@tonic-gate
20*0Sstevel@tonic-gate    use Search::Dict;
21*0Sstevel@tonic-gate    look *FILEHANDLE, $params;
22*0Sstevel@tonic-gate
23*0Sstevel@tonic-gate=head1 DESCRIPTION
24*0Sstevel@tonic-gate
25*0Sstevel@tonic-gateSets file position in FILEHANDLE to be first line greater than or equal
26*0Sstevel@tonic-gate(stringwise) to I<$key>.  Returns the new file position, or -1 if an error
27*0Sstevel@tonic-gateoccurs.
28*0Sstevel@tonic-gate
29*0Sstevel@tonic-gateThe flags specify dictionary order and case folding:
30*0Sstevel@tonic-gate
31*0Sstevel@tonic-gateIf I<$dict> is true, search by dictionary order (ignore anything but word
32*0Sstevel@tonic-gatecharacters and whitespace).  The default is honour all characters.
33*0Sstevel@tonic-gate
34*0Sstevel@tonic-gateIf I<$fold> is true, ignore case.  The default is to honour case.
35*0Sstevel@tonic-gate
36*0Sstevel@tonic-gateIf there are only three arguments and the third argument is a hash
37*0Sstevel@tonic-gatereference, the keys of that hash can have values C<dict>, C<fold>, and
38*0Sstevel@tonic-gateC<comp> or C<xfrm> (see below), and their correponding values will be
39*0Sstevel@tonic-gateused as the parameters.
40*0Sstevel@tonic-gate
41*0Sstevel@tonic-gateIf a comparison subroutine (comp) is defined, it must return less than zero,
42*0Sstevel@tonic-gatezero, or greater than zero, if the first comparand is less than,
43*0Sstevel@tonic-gateequal, or greater than the second comparand.
44*0Sstevel@tonic-gate
45*0Sstevel@tonic-gateIf a transformation subroutine (xfrm) is defined, its value is used to
46*0Sstevel@tonic-gatetransform the lines read from the filehandle before their comparison.
47*0Sstevel@tonic-gate
48*0Sstevel@tonic-gate=cut
49*0Sstevel@tonic-gate
50*0Sstevel@tonic-gatesub look {
51*0Sstevel@tonic-gate    my($fh,$key,$dict,$fold) = @_;
52*0Sstevel@tonic-gate    my ($comp, $xfrm);
53*0Sstevel@tonic-gate    if (@_ == 3 && ref $dict eq 'HASH') {
54*0Sstevel@tonic-gate	my $params = $dict;
55*0Sstevel@tonic-gate	$dict = 0;
56*0Sstevel@tonic-gate	$dict = $params->{dict} if exists $params->{dict};
57*0Sstevel@tonic-gate	$fold = $params->{fold} if exists $params->{fold};
58*0Sstevel@tonic-gate	$comp = $params->{comp} if exists $params->{comp};
59*0Sstevel@tonic-gate	$xfrm = $params->{xfrm} if exists $params->{xfrm};
60*0Sstevel@tonic-gate    }
61*0Sstevel@tonic-gate    $comp = sub { $_[0] cmp $_[1] } unless defined $comp;
62*0Sstevel@tonic-gate    local($_);
63*0Sstevel@tonic-gate    my(@stat) = stat($fh)
64*0Sstevel@tonic-gate	or return -1;
65*0Sstevel@tonic-gate    my($size, $blksize) = @stat[7,11];
66*0Sstevel@tonic-gate    $blksize ||= 8192;
67*0Sstevel@tonic-gate    $key =~ s/[^\w\s]//g if $dict;
68*0Sstevel@tonic-gate    $key = lc $key       if $fold;
69*0Sstevel@tonic-gate    # find the right block
70*0Sstevel@tonic-gate    my($min, $max) = (0, int($size / $blksize));
71*0Sstevel@tonic-gate    my $mid;
72*0Sstevel@tonic-gate    while ($max - $min > 1) {
73*0Sstevel@tonic-gate	$mid = int(($max + $min) / 2);
74*0Sstevel@tonic-gate	seek($fh, $mid * $blksize, 0)
75*0Sstevel@tonic-gate	    or return -1;
76*0Sstevel@tonic-gate	<$fh> if $mid;			# probably a partial line
77*0Sstevel@tonic-gate	$_ = <$fh>;
78*0Sstevel@tonic-gate	$_ = $xfrm->($_) if defined $xfrm;
79*0Sstevel@tonic-gate	chomp;
80*0Sstevel@tonic-gate	s/[^\w\s]//g if $dict;
81*0Sstevel@tonic-gate	$_ = lc $_   if $fold;
82*0Sstevel@tonic-gate	if (defined($_) && $comp->($_, $key) < 0) {
83*0Sstevel@tonic-gate	    $min = $mid;
84*0Sstevel@tonic-gate	}
85*0Sstevel@tonic-gate	else {
86*0Sstevel@tonic-gate	    $max = $mid;
87*0Sstevel@tonic-gate	}
88*0Sstevel@tonic-gate    }
89*0Sstevel@tonic-gate    # find the right line
90*0Sstevel@tonic-gate    $min *= $blksize;
91*0Sstevel@tonic-gate    seek($fh,$min,0)
92*0Sstevel@tonic-gate	or return -1;
93*0Sstevel@tonic-gate    <$fh> if $min;
94*0Sstevel@tonic-gate    for (;;) {
95*0Sstevel@tonic-gate	$min = tell($fh);
96*0Sstevel@tonic-gate	defined($_ = <$fh>)
97*0Sstevel@tonic-gate	    or last;
98*0Sstevel@tonic-gate	$_ = $xfrm->($_) if defined $xfrm;
99*0Sstevel@tonic-gate	chomp;
100*0Sstevel@tonic-gate	s/[^\w\s]//g if $dict;
101*0Sstevel@tonic-gate	$_ = lc $_   if $fold;
102*0Sstevel@tonic-gate	last if $comp->($_, $key) >= 0;
103*0Sstevel@tonic-gate    }
104*0Sstevel@tonic-gate    seek($fh,$min,0);
105*0Sstevel@tonic-gate    $min;
106*0Sstevel@tonic-gate}
107*0Sstevel@tonic-gate
108*0Sstevel@tonic-gate1;
109