xref: /plan9/sys/src/ape/cmd/diff/io.c (revision 0b459c2cb92b7c9d88818e9a2f72e678e5bc4553)
1 /* File I/O for GNU DIFF.
2    Copyright (C) 1988, 1989, 1992, 1993, 1994 Free Software Foundation, Inc.
3 
4 This file is part of GNU DIFF.
5 
6 GNU DIFF is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU DIFF is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GNU DIFF; see the file COPYING.  If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
19 
20 #include "diff.h"
21 
22 /* Rotate a value n bits to the left. */
23 #define UINT_BIT (sizeof (unsigned) * CHAR_BIT)
24 #define ROL(v, n) ((v) << (n) | (v) >> (UINT_BIT - (n)))
25 
26 /* Given a hash value and a new character, return a new hash value. */
27 #define HASH(h, c) ((c) + ROL (h, 7))
28 
29 /* Guess remaining number of lines from number N of lines so far,
30    size S so far, and total size T.  */
31 #define GUESS_LINES(n,s,t) (((t) - (s)) / ((n) < 10 ? 32 : (s) / ((n)-1)) + 5)
32 
33 /* Type used for fast prefix comparison in find_identical_ends.  */
34 #ifndef word
35 #define word int
36 #endif
37 
38 /* Lines are put into equivalence classes (of lines that match in line_cmp).
39    Each equivalence class is represented by one of these structures,
40    but only while the classes are being computed.
41    Afterward, each class is represented by a number.  */
42 struct equivclass
43 {
44   int next;	/* Next item in this bucket. */
45   unsigned hash;	/* Hash of lines in this class.  */
46   char const *line;	/* A line that fits this class. */
47   size_t length;	/* That line's length, not counting its newline.  */
48 };
49 
50 /* Hash-table: array of buckets, each being a chain of equivalence classes.
51    buckets[-1] is reserved for incomplete lines.  */
52 static int *buckets;
53 
54 /* Number of buckets in the hash table array, not counting buckets[-1]. */
55 static int nbuckets;
56 
57 /* Array in which the equivalence classes are allocated.
58    The bucket-chains go through the elements in this array.
59    The number of an equivalence class is its index in this array.  */
60 static struct equivclass *equivs;
61 
62 /* Index of first free element in the array `equivs'.  */
63 static int equivs_index;
64 
65 /* Number of elements allocated in the array `equivs'.  */
66 static int equivs_alloc;
67 
68 static void find_and_hash_each_line PARAMS((struct file_data *));
69 static void find_identical_ends PARAMS((struct file_data[]));
70 static void prepare_text_end PARAMS((struct file_data *));
71 
72 /* Check for binary files and compare them for exact identity.  */
73 
74 /* Return 1 if BUF contains a non text character.
75    SIZE is the number of characters in BUF.  */
76 
77 #define binary_file_p(buf, size) (memchr (buf, '\0', size) != 0)
78 
79 /* Get ready to read the current file.
80    Return nonzero if SKIP_TEST is zero,
81    and if it appears to be a binary file.  */
82 
83 int
sip(current,skip_test)84 sip (current, skip_test)
85      struct file_data *current;
86      int skip_test;
87 {
88   /* If we have a nonexistent file at this stage, treat it as empty.  */
89   if (current->desc < 0)
90     {
91       /* Leave room for a sentinel.  */
92       current->bufsize = sizeof (word);
93       current->buffer = xmalloc (current->bufsize);
94     }
95   else
96     {
97       current->bufsize = STAT_BLOCKSIZE (current->stat);
98       current->buffer = xmalloc (current->bufsize);
99 
100       if (! skip_test)
101 	{
102 	  /* Check first part of file to see if it's a binary file.  */
103 #if HAVE_SETMODE
104 	  int oldmode = setmode (current->desc, O_BINARY);
105 #endif
106 	  size_t n = read (current->desc, current->buffer, current->bufsize);
107 	  if (n == -1)
108 	    pfatal_with_name (current->name);
109 	  current->buffered_chars = n;
110 #if HAVE_SETMODE
111 	  if (oldmode != O_BINARY)
112 	    {
113 	      if (lseek (current->desc, - (off_t) n, SEEK_CUR) == -1)
114 		pfatal_with_name (current->name);
115 	      setmode (current->desc, oldmode);
116 	      current->buffered_chars = 0;
117 	    }
118 #endif
119 	  return binary_file_p (current->buffer, n);
120 	}
121     }
122 
123   current->buffered_chars = 0;
124   return 0;
125 }
126 
127 /* Slurp the rest of the current file completely into memory.  */
128 
129 void
slurp(current)130 slurp (current)
131      struct file_data *current;
132 {
133   size_t cc;
134 
135   if (current->desc < 0)
136     /* The file is nonexistent.  */
137     ;
138   else if (S_ISREG (current->stat.st_mode))
139     {
140       /* It's a regular file; slurp in the rest all at once.  */
141 
142       /* Get the size out of the stat block.
143 	 Allocate enough room for appended newline and sentinel.  */
144       cc = current->stat.st_size + 1 + sizeof (word);
145       if (current->bufsize < cc)
146 	{
147 	  current->bufsize = cc;
148 	  current->buffer = xrealloc (current->buffer, cc);
149 	}
150 
151       if (current->buffered_chars < current->stat.st_size)
152 	{
153 	  cc = read (current->desc,
154 		     current->buffer + current->buffered_chars,
155 		     current->stat.st_size - current->buffered_chars);
156 	  if (cc == -1)
157 	    pfatal_with_name (current->name);
158 	  current->buffered_chars += cc;
159 	}
160     }
161   /* It's not a regular file; read it, growing the buffer as needed.  */
162   else if (always_text_flag || current->buffered_chars != 0)
163     {
164       for (;;)
165 	{
166 	  if (current->buffered_chars == current->bufsize)
167 	    {
168 	      current->bufsize = current->bufsize * 2;
169 	      current->buffer = xrealloc (current->buffer, current->bufsize);
170 	    }
171 	  cc = read (current->desc,
172 		     current->buffer + current->buffered_chars,
173 		     current->bufsize - current->buffered_chars);
174 	  if (cc == 0)
175 	    break;
176 	  if (cc == -1)
177 	    pfatal_with_name (current->name);
178 	  current->buffered_chars += cc;
179 	}
180       /* Allocate just enough room for appended newline and sentinel.  */
181       current->bufsize = current->buffered_chars + 1 + sizeof (word);
182       current->buffer = xrealloc (current->buffer, current->bufsize);
183     }
184 }
185 
186 /* Split the file into lines, simultaneously computing the equivalence class for
187    each line. */
188 
189 static void
find_and_hash_each_line(current)190 find_and_hash_each_line (current)
191      struct file_data *current;
192 {
193   unsigned h;
194   unsigned char const *p = (unsigned char const *) current->prefix_end;
195   unsigned char c;
196   int i, *bucket;
197   size_t length;
198 
199   /* Cache often-used quantities in local variables to help the compiler.  */
200   char const **linbuf = current->linbuf;
201   int alloc_lines = current->alloc_lines;
202   int line = 0;
203   int linbuf_base = current->linbuf_base;
204   int *cureqs = (int *) xmalloc (alloc_lines * sizeof (int));
205   struct equivclass *eqs = equivs;
206   int eqs_index = equivs_index;
207   int eqs_alloc = equivs_alloc;
208   char const *suffix_begin = current->suffix_begin;
209   char const *bufend = current->buffer + current->buffered_chars;
210   int use_line_cmp = ignore_some_line_changes;
211 
212   while ((char const *) p < suffix_begin)
213     {
214       char const *ip = (char const *) p;
215 
216       /* Compute the equivalence class for this line.  */
217 
218       h = 0;
219 
220       /* Hash this line until we find a newline. */
221       if (ignore_case_flag)
222 	{
223 	  if (ignore_all_space_flag)
224 	    while ((c = *p++) != '\n')
225 	      {
226 		if (! ISSPACE (c))
227 		  h = HASH (h, ISUPPER (c) ? tolower (c) : c);
228 	      }
229 	  else if (ignore_space_change_flag)
230 	    while ((c = *p++) != '\n')
231 	      {
232 		if (ISSPACE (c))
233 		  {
234 		    for (;;)
235 		      {
236 			c = *p++;
237 			if (!ISSPACE (c))
238 			  break;
239 			if (c == '\n')
240 			  goto hashing_done;
241 		      }
242 		    h = HASH (h, ' ');
243 		  }
244 		/* C is now the first non-space.  */
245 		h = HASH (h, ISUPPER (c) ? tolower (c) : c);
246 	      }
247 	  else
248 	    while ((c = *p++) != '\n')
249 	      h = HASH (h, ISUPPER (c) ? tolower (c) : c);
250 	}
251       else
252 	{
253 	  if (ignore_all_space_flag)
254 	    while ((c = *p++) != '\n')
255 	      {
256 		if (! ISSPACE (c))
257 		  h = HASH (h, c);
258 	      }
259 	  else if (ignore_space_change_flag)
260 	    while ((c = *p++) != '\n')
261 	      {
262 		if (ISSPACE (c))
263 		  {
264 		    for (;;)
265 		      {
266 			c = *p++;
267 			if (!ISSPACE (c))
268 			  break;
269 			if (c == '\n')
270 			  goto hashing_done;
271 		      }
272 		    h = HASH (h, ' ');
273 		  }
274 		/* C is now the first non-space.  */
275 		h = HASH (h, c);
276 	      }
277 	  else
278 	    while ((c = *p++) != '\n')
279 	      h = HASH (h, c);
280 	}
281    hashing_done:;
282 
283       bucket = &buckets[h % nbuckets];
284       length = (char const *) p - ip - 1;
285 
286       if ((char const *) p == bufend
287 	  && current->missing_newline
288 	  && ROBUST_OUTPUT_STYLE (output_style))
289 	{
290 	  /* This line is incomplete.  If this is significant,
291 	     put the line into bucket[-1].  */
292 	  if (! (ignore_space_change_flag | ignore_all_space_flag))
293 	    bucket = &buckets[-1];
294 
295 	  /* Omit the inserted newline when computing linbuf later.  */
296 	  p--;
297 	  bufend = suffix_begin = (char const *) p;
298 	}
299 
300       for (i = *bucket;  ;  i = eqs[i].next)
301 	if (!i)
302 	  {
303 	    /* Create a new equivalence class in this bucket. */
304 	    i = eqs_index++;
305 	    if (i == eqs_alloc)
306 	      eqs = (struct equivclass *)
307 		      xrealloc (eqs, (eqs_alloc*=2) * sizeof(*eqs));
308 	    eqs[i].next = *bucket;
309 	    eqs[i].hash = h;
310 	    eqs[i].line = ip;
311 	    eqs[i].length = length;
312 	    *bucket = i;
313 	    break;
314 	  }
315 	else if (eqs[i].hash == h)
316 	  {
317 	    char const *eqline = eqs[i].line;
318 
319 	    /* Reuse existing equivalence class if the lines are identical.
320 	       This detects the common case of exact identity
321 	       faster than complete comparison would.  */
322 	    if (eqs[i].length == length && memcmp (eqline, ip, length) == 0)
323 	      break;
324 
325 	    /* Reuse existing class if line_cmp reports the lines equal.  */
326 	    if (use_line_cmp && line_cmp (eqline, ip) == 0)
327 	      break;
328 	  }
329 
330       /* Maybe increase the size of the line table. */
331       if (line == alloc_lines)
332 	{
333 	  /* Double (alloc_lines - linbuf_base) by adding to alloc_lines.  */
334 	  alloc_lines = 2 * alloc_lines - linbuf_base;
335 	  cureqs = (int *) xrealloc (cureqs, alloc_lines * sizeof (*cureqs));
336 	  linbuf = (char const **) xrealloc (linbuf + linbuf_base,
337 					     (alloc_lines - linbuf_base)
338 					     * sizeof (*linbuf))
339 		   - linbuf_base;
340 	}
341       linbuf[line] = ip;
342       cureqs[line] = i;
343       ++line;
344     }
345 
346   current->buffered_lines = line;
347 
348   for (i = 0;  ;  i++)
349     {
350       /* Record the line start for lines in the suffix that we care about.
351 	 Record one more line start than lines,
352 	 so that we can compute the length of any buffered line.  */
353       if (line == alloc_lines)
354 	{
355 	  /* Double (alloc_lines - linbuf_base) by adding to alloc_lines.  */
356 	  alloc_lines = 2 * alloc_lines - linbuf_base;
357 	  linbuf = (char const **) xrealloc (linbuf + linbuf_base,
358 					     (alloc_lines - linbuf_base)
359 					     * sizeof (*linbuf))
360 		   - linbuf_base;
361 	}
362       linbuf[line] = (char const *) p;
363 
364       if ((char const *) p == bufend)
365 	break;
366 
367       if (context <= i && no_diff_means_no_output)
368 	break;
369 
370       line++;
371 
372       while (*p++ != '\n')
373 	;
374     }
375 
376   /* Done with cache in local variables.  */
377   current->linbuf = linbuf;
378   current->valid_lines = line;
379   current->alloc_lines = alloc_lines;
380   current->equivs = cureqs;
381   equivs = eqs;
382   equivs_alloc = eqs_alloc;
383   equivs_index = eqs_index;
384 }
385 
386 /* Prepare the end of the text.  Make sure it's initialized.
387    Make sure text ends in a newline,
388    but remember that we had to add one.  */
389 
390 static void
prepare_text_end(current)391 prepare_text_end (current)
392      struct file_data *current;
393 {
394   size_t buffered_chars = current->buffered_chars;
395   char *p = current->buffer;
396 
397   if (buffered_chars == 0 || p[buffered_chars - 1] == '\n')
398     current->missing_newline = 0;
399   else
400     {
401       p[buffered_chars++] = '\n';
402       current->buffered_chars = buffered_chars;
403       current->missing_newline = 1;
404     }
405 
406   /* Don't use uninitialized storage when planting or using sentinels.  */
407   if (p)
408     bzero (p + buffered_chars, sizeof (word));
409 }
410 
411 /* Given a vector of two file_data objects, find the identical
412    prefixes and suffixes of each object. */
413 
414 static void
find_identical_ends(filevec)415 find_identical_ends (filevec)
416      struct file_data filevec[];
417 {
418   word *w0, *w1;
419   char *p0, *p1, *buffer0, *buffer1;
420   char const *end0, *beg0;
421   char const **linbuf0, **linbuf1;
422   int i, lines;
423   size_t n0, n1, tem;
424   int alloc_lines0, alloc_lines1;
425   int buffered_prefix, prefix_count, prefix_mask;
426 
427   slurp (&filevec[0]);
428   if (filevec[0].desc != filevec[1].desc)
429     slurp (&filevec[1]);
430   else
431     {
432       filevec[1].buffer = filevec[0].buffer;
433       filevec[1].bufsize = filevec[0].bufsize;
434       filevec[1].buffered_chars = filevec[0].buffered_chars;
435     }
436   for (i = 0; i < 2; i++)
437     prepare_text_end (&filevec[i]);
438 
439   /* Find identical prefix.  */
440 
441   p0 = buffer0 = filevec[0].buffer;
442   p1 = buffer1 = filevec[1].buffer;
443 
444   n0 = filevec[0].buffered_chars;
445   n1 = filevec[1].buffered_chars;
446 
447   if (p0 == p1)
448     /* The buffers are the same; sentinels won't work.  */
449     p0 = p1 += n1;
450   else
451     {
452       /* Insert end sentinels, in this case characters that are guaranteed
453 	 to make the equality test false, and thus terminate the loop.  */
454 
455       if (n0 < n1)
456 	p0[n0] = ~p1[n0];
457       else
458 	p1[n1] = ~p0[n1];
459 
460       /* Loop until first mismatch, or to the sentinel characters.  */
461 
462       /* Compare a word at a time for speed.  */
463       w0 = (word *) p0;
464       w1 = (word *) p1;
465       while (*w0++ == *w1++)
466 	;
467       --w0, --w1;
468 
469       /* Do the last few bytes of comparison a byte at a time.  */
470       p0 = (char *) w0;
471       p1 = (char *) w1;
472       while (*p0++ == *p1++)
473 	;
474       --p0, --p1;
475 
476       /* Don't mistakenly count missing newline as part of prefix. */
477       if (ROBUST_OUTPUT_STYLE (output_style)
478 	  && (buffer0 + n0 - filevec[0].missing_newline < p0)
479 	     !=
480 	     (buffer1 + n1 - filevec[1].missing_newline < p1))
481 	--p0, --p1;
482     }
483 
484   /* Now P0 and P1 point at the first nonmatching characters.  */
485 
486   /* Skip back to last line-beginning in the prefix,
487      and then discard up to HORIZON_LINES lines from the prefix.  */
488   i = horizon_lines;
489   while (p0 != buffer0 && (p0[-1] != '\n' || i--))
490     --p0, --p1;
491 
492   /* Record the prefix.  */
493   filevec[0].prefix_end = p0;
494   filevec[1].prefix_end = p1;
495 
496   /* Find identical suffix.  */
497 
498   /* P0 and P1 point beyond the last chars not yet compared.  */
499   p0 = buffer0 + n0;
500   p1 = buffer1 + n1;
501 
502   if (! ROBUST_OUTPUT_STYLE (output_style)
503       || filevec[0].missing_newline == filevec[1].missing_newline)
504     {
505       end0 = p0;	/* Addr of last char in file 0.  */
506 
507       /* Get value of P0 at which we should stop scanning backward:
508 	 this is when either P0 or P1 points just past the last char
509 	 of the identical prefix.  */
510       beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
511 
512       /* Scan back until chars don't match or we reach that point.  */
513       while (p0 != beg0)
514 	if (*--p0 != *--p1)
515 	  {
516 	    /* Point at the first char of the matching suffix.  */
517 	    ++p0, ++p1;
518 	    beg0 = p0;
519 	    break;
520 	  }
521 
522       /* Are we at a line-beginning in both files?  If not, add the rest of
523 	 this line to the main body.  Discard up to HORIZON_LINES lines from
524 	 the identical suffix.  Also, discard one extra line,
525 	 because shift_boundaries may need it.  */
526       i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n')
527 			    &&
528 			    (buffer1 == p1 || p1[-1] == '\n'));
529       while (i-- && p0 != end0)
530 	while (*p0++ != '\n')
531 	  ;
532 
533       p1 += p0 - beg0;
534     }
535 
536   /* Record the suffix.  */
537   filevec[0].suffix_begin = p0;
538   filevec[1].suffix_begin = p1;
539 
540   /* Calculate number of lines of prefix to save.
541 
542      prefix_count == 0 means save the whole prefix;
543      we need this with for options like -D that output the whole file.
544      We also need it for options like -F that output some preceding line;
545      at least we will need to find the last few lines,
546      but since we don't know how many, it's easiest to find them all.
547 
548      Otherwise, prefix_count != 0.  Save just prefix_count lines at start
549      of the line buffer; they'll be moved to the proper location later.
550      Handle 1 more line than the context says (because we count 1 too many),
551      rounded up to the next power of 2 to speed index computation.  */
552 
553   if (no_diff_means_no_output && ! function_regexp_list)
554     {
555       for (prefix_count = 1;  prefix_count < context + 1;  prefix_count *= 2)
556 	;
557       prefix_mask = prefix_count - 1;
558       alloc_lines0
559 	= prefix_count
560 	  + GUESS_LINES (0, 0, p0 - filevec[0].prefix_end)
561 	  + context;
562     }
563   else
564     {
565       prefix_count = 0;
566       prefix_mask = ~0;
567       alloc_lines0 = GUESS_LINES (0, 0, n0);
568     }
569 
570   lines = 0;
571   linbuf0 = (char const **) xmalloc (alloc_lines0 * sizeof (*linbuf0));
572 
573   /* If the prefix is needed, find the prefix lines.  */
574   if (! (no_diff_means_no_output
575 	 && filevec[0].prefix_end == p0
576 	 && filevec[1].prefix_end == p1))
577     {
578       p0 = buffer0;
579       end0 = filevec[0].prefix_end;
580       while (p0 != end0)
581 	{
582 	  int l = lines++ & prefix_mask;
583 	  if (l == alloc_lines0)
584 	    linbuf0 = (char const **) xrealloc (linbuf0, (alloc_lines0 *= 2)
585 							 * sizeof(*linbuf0));
586 	  linbuf0[l] = p0;
587 	  while (*p0++ != '\n')
588 	    ;
589 	}
590     }
591   buffered_prefix = prefix_count && context < lines ? context : lines;
592 
593   /* Allocate line buffer 1.  */
594   tem = prefix_count ? filevec[1].suffix_begin - buffer1 : n1;
595 
596   alloc_lines1
597     = (buffered_prefix
598        + GUESS_LINES (lines, filevec[1].prefix_end - buffer1, tem)
599        + context);
600   linbuf1 = (char const **) xmalloc (alloc_lines1 * sizeof (*linbuf1));
601 
602   if (buffered_prefix != lines)
603     {
604       /* Rotate prefix lines to proper location.  */
605       for (i = 0;  i < buffered_prefix;  i++)
606 	linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
607       for (i = 0;  i < buffered_prefix;  i++)
608 	linbuf0[i] = linbuf1[i];
609     }
610 
611   /* Initialize line buffer 1 from line buffer 0.  */
612   for (i = 0; i < buffered_prefix; i++)
613     linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
614 
615   /* Record the line buffer, adjusted so that
616      linbuf*[0] points at the first differing line.  */
617   filevec[0].linbuf = linbuf0 + buffered_prefix;
618   filevec[1].linbuf = linbuf1 + buffered_prefix;
619   filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix;
620   filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
621   filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
622   filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
623 }
624 
625 /* Largest primes less than some power of two, for nbuckets.  Values range
626    from useful to preposterous.  If one of these numbers isn't prime
627    after all, don't blame it on me, blame it on primes (6) . . . */
628 static int const primes[] =
629 {
630   509,
631   1021,
632   2039,
633   4093,
634   8191,
635   16381,
636   32749,
637 #if 32767 < INT_MAX
638   65521,
639   131071,
640   262139,
641   524287,
642   1048573,
643   2097143,
644   4194301,
645   8388593,
646   16777213,
647   33554393,
648   67108859,			/* Preposterously large . . . */
649   134217689,
650   268435399,
651   536870909,
652   1073741789,
653   2147483647,
654 #endif
655   0
656 };
657 
658 /* Given a vector of two file_data objects, read the file associated
659    with each one, and build the table of equivalence classes.
660    Return 1 if either file appears to be a binary file.
661    If PRETEND_BINARY is nonzero, pretend they are binary regardless.  */
662 
663 int
read_files(filevec,pretend_binary)664 read_files (filevec, pretend_binary)
665      struct file_data filevec[];
666      int pretend_binary;
667 {
668   int i;
669   int skip_test = always_text_flag | pretend_binary;
670   int appears_binary = pretend_binary | sip (&filevec[0], skip_test);
671 
672   if (filevec[0].desc != filevec[1].desc)
673     appears_binary |= sip (&filevec[1], skip_test | appears_binary);
674   else
675     {
676       filevec[1].buffer = filevec[0].buffer;
677       filevec[1].bufsize = filevec[0].bufsize;
678       filevec[1].buffered_chars = filevec[0].buffered_chars;
679     }
680   if (appears_binary)
681     {
682 #if HAVE_SETMODE
683       setmode (filevec[0].desc, O_BINARY);
684       setmode (filevec[1].desc, O_BINARY);
685 #endif
686       return 1;
687     }
688 
689   find_identical_ends (filevec);
690 
691   equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
692   equivs = (struct equivclass *) xmalloc (equivs_alloc * sizeof (struct equivclass));
693   /* Equivalence class 0 is permanently safe for lines that were not
694      hashed.  Real equivalence classes start at 1. */
695   equivs_index = 1;
696 
697   for (i = 0;  primes[i] < equivs_alloc / 3;  i++)
698     if (! primes[i])
699       abort ();
700   nbuckets = primes[i];
701 
702   buckets = (int *) xmalloc ((nbuckets + 1) * sizeof (*buckets));
703   bzero (buckets++, (nbuckets + 1) * sizeof (*buckets));
704 
705   for (i = 0; i < 2; i++)
706     find_and_hash_each_line (&filevec[i]);
707 
708   filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
709 
710   free (equivs);
711   free (buckets - 1);
712 
713   return 0;
714 }
715