xref: /openbsd-src/gnu/usr.bin/cvs/diff/io.c (revision c71bc7e269286e43816004eb0fcd7a55f036cd69)
12286d8edStholo /* File I/O for GNU DIFF.
22286d8edStholo    Copyright (C) 1988, 1989, 1992, 1993, 1994 Free Software Foundation, Inc.
32286d8edStholo 
42286d8edStholo This file is part of GNU DIFF.
52286d8edStholo 
62286d8edStholo GNU DIFF is free software; you can redistribute it and/or modify
72286d8edStholo it under the terms of the GNU General Public License as published by
82286d8edStholo the Free Software Foundation; either version 2, or (at your option)
92286d8edStholo any later version.
102286d8edStholo 
112286d8edStholo GNU DIFF is distributed in the hope that it will be useful,
122286d8edStholo but WITHOUT ANY WARRANTY; without even the implied warranty of
132286d8edStholo MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
142286d8edStholo GNU General Public License for more details.
152286d8edStholo 
16*c71bc7e2Stholo */
172286d8edStholo 
182286d8edStholo #include "diff.h"
192286d8edStholo 
202286d8edStholo /* Rotate a value n bits to the left. */
212286d8edStholo #define UINT_BIT (sizeof (unsigned) * CHAR_BIT)
222286d8edStholo #define ROL(v, n) ((v) << (n) | (v) >> (UINT_BIT - (n)))
232286d8edStholo 
242286d8edStholo /* Given a hash value and a new character, return a new hash value. */
252286d8edStholo #define HASH(h, c) ((c) + ROL (h, 7))
262286d8edStholo 
272286d8edStholo /* Guess remaining number of lines from number N of lines so far,
282286d8edStholo    size S so far, and total size T.  */
292286d8edStholo #define GUESS_LINES(n,s,t) (((t) - (s)) / ((n) < 10 ? 32 : (s) / ((n)-1)) + 5)
302286d8edStholo 
312286d8edStholo /* Type used for fast prefix comparison in find_identical_ends.  */
322286d8edStholo #ifndef word
332286d8edStholo #define word int
342286d8edStholo #endif
352286d8edStholo 
362286d8edStholo /* Lines are put into equivalence classes (of lines that match in line_cmp).
372286d8edStholo    Each equivalence class is represented by one of these structures,
382286d8edStholo    but only while the classes are being computed.
392286d8edStholo    Afterward, each class is represented by a number.  */
402286d8edStholo struct equivclass
412286d8edStholo {
422286d8edStholo   int next;	/* Next item in this bucket. */
432286d8edStholo   unsigned hash;	/* Hash of lines in this class.  */
442286d8edStholo   char const *line;	/* A line that fits this class. */
452286d8edStholo   size_t length;	/* That line's length, not counting its newline.  */
462286d8edStholo };
472286d8edStholo 
482286d8edStholo /* Hash-table: array of buckets, each being a chain of equivalence classes.
492286d8edStholo    buckets[-1] is reserved for incomplete lines.  */
502286d8edStholo static int *buckets;
512286d8edStholo 
522286d8edStholo /* Number of buckets in the hash table array, not counting buckets[-1]. */
532286d8edStholo static int nbuckets;
542286d8edStholo 
552286d8edStholo /* Array in which the equivalence classes are allocated.
562286d8edStholo    The bucket-chains go through the elements in this array.
572286d8edStholo    The number of an equivalence class is its index in this array.  */
582286d8edStholo static struct equivclass *equivs;
592286d8edStholo 
602286d8edStholo /* Index of first free element in the array `equivs'.  */
612286d8edStholo static int equivs_index;
622286d8edStholo 
632286d8edStholo /* Number of elements allocated in the array `equivs'.  */
642286d8edStholo static int equivs_alloc;
652286d8edStholo 
662286d8edStholo static void find_and_hash_each_line PARAMS((struct file_data *));
672286d8edStholo static void find_identical_ends PARAMS((struct file_data[]));
682286d8edStholo static void prepare_text_end PARAMS((struct file_data *));
692286d8edStholo 
702286d8edStholo /* Check for binary files and compare them for exact identity.  */
712286d8edStholo 
722286d8edStholo /* Return 1 if BUF contains a non text character.
732286d8edStholo    SIZE is the number of characters in BUF.  */
742286d8edStholo 
752286d8edStholo #define binary_file_p(buf, size) (memchr (buf, '\0', size) != 0)
762286d8edStholo 
772286d8edStholo /* Get ready to read the current file.
782286d8edStholo    Return nonzero if SKIP_TEST is zero,
792286d8edStholo    and if it appears to be a binary file.  */
802286d8edStholo 
812286d8edStholo int
sip(current,skip_test)822286d8edStholo sip (current, skip_test)
832286d8edStholo      struct file_data *current;
842286d8edStholo      int skip_test;
852286d8edStholo {
862286d8edStholo   /* If we have a nonexistent file at this stage, treat it as empty.  */
872286d8edStholo   if (current->desc < 0)
882286d8edStholo     {
892286d8edStholo       /* Leave room for a sentinel.  */
902286d8edStholo       current->bufsize = sizeof (word);
912286d8edStholo       current->buffer = xmalloc (current->bufsize);
922286d8edStholo     }
932286d8edStholo   else
942286d8edStholo     {
952286d8edStholo       current->bufsize = STAT_BLOCKSIZE (current->stat);
962286d8edStholo       current->buffer = xmalloc (current->bufsize);
972286d8edStholo 
982286d8edStholo       if (! skip_test)
992286d8edStholo 	{
1002286d8edStholo 	  /* Check first part of file to see if it's a binary file.  */
1012286d8edStholo #if HAVE_SETMODE
1022286d8edStholo 	  int oldmode = setmode (current->desc, O_BINARY);
1032286d8edStholo #endif
1042286d8edStholo 	  size_t n = read (current->desc, current->buffer, current->bufsize);
1052286d8edStholo 	  if (n == -1)
1062286d8edStholo 	    pfatal_with_name (current->name);
1072286d8edStholo 	  current->buffered_chars = n;
1082286d8edStholo #if HAVE_SETMODE
1092286d8edStholo 	  if (oldmode != O_BINARY)
1102286d8edStholo 	    {
1112286d8edStholo 	      if (lseek (current->desc, - (off_t) n, SEEK_CUR) == -1)
1122286d8edStholo 		pfatal_with_name (current->name);
1132286d8edStholo 	      setmode (current->desc, oldmode);
1142286d8edStholo 	      current->buffered_chars = 0;
1152286d8edStholo 	    }
1162286d8edStholo #endif
1172286d8edStholo 	  return binary_file_p (current->buffer, n);
1182286d8edStholo 	}
1192286d8edStholo     }
1202286d8edStholo 
1212286d8edStholo   current->buffered_chars = 0;
1222286d8edStholo   return 0;
1232286d8edStholo }
1242286d8edStholo 
1252286d8edStholo /* Slurp the rest of the current file completely into memory.  */
1262286d8edStholo 
1272286d8edStholo void
slurp(current)1282286d8edStholo slurp (current)
1292286d8edStholo      struct file_data *current;
1302286d8edStholo {
1312286d8edStholo   size_t cc;
1322286d8edStholo 
1332286d8edStholo   if (current->desc < 0)
1342286d8edStholo     /* The file is nonexistent.  */
1352286d8edStholo     ;
1362286d8edStholo   else if (S_ISREG (current->stat.st_mode))
1372286d8edStholo     {
1382286d8edStholo       /* It's a regular file; slurp in the rest all at once.  */
1392286d8edStholo 
1402286d8edStholo       /* Get the size out of the stat block.
1412286d8edStholo 	 Allocate enough room for appended newline and sentinel.  */
1422286d8edStholo       cc = current->stat.st_size + 1 + sizeof (word);
1432286d8edStholo       if (current->bufsize < cc)
1442286d8edStholo 	{
1452286d8edStholo 	  current->bufsize = cc;
1462286d8edStholo 	  current->buffer = xrealloc (current->buffer, cc);
1472286d8edStholo 	}
1482286d8edStholo 
1492286d8edStholo       if (current->buffered_chars < current->stat.st_size)
1502286d8edStholo 	{
1512286d8edStholo 	  cc = read (current->desc,
1522286d8edStholo 		     current->buffer + current->buffered_chars,
1532286d8edStholo 		     current->stat.st_size - current->buffered_chars);
1542286d8edStholo 	  if (cc == -1)
1552286d8edStholo 	    pfatal_with_name (current->name);
1562286d8edStholo 	  current->buffered_chars += cc;
1572286d8edStholo 	}
1582286d8edStholo     }
1592286d8edStholo   /* It's not a regular file; read it, growing the buffer as needed.  */
1602286d8edStholo   else if (always_text_flag || current->buffered_chars != 0)
1612286d8edStholo     {
1622286d8edStholo       for (;;)
1632286d8edStholo 	{
1642286d8edStholo 	  if (current->buffered_chars == current->bufsize)
1652286d8edStholo 	    {
1662286d8edStholo 	      current->bufsize = current->bufsize * 2;
1672286d8edStholo 	      current->buffer = xrealloc (current->buffer, current->bufsize);
1682286d8edStholo 	    }
1692286d8edStholo 	  cc = read (current->desc,
1702286d8edStholo 		     current->buffer + current->buffered_chars,
1712286d8edStholo 		     current->bufsize - current->buffered_chars);
1722286d8edStholo 	  if (cc == 0)
1732286d8edStholo 	    break;
1742286d8edStholo 	  if (cc == -1)
1752286d8edStholo 	    pfatal_with_name (current->name);
1762286d8edStholo 	  current->buffered_chars += cc;
1772286d8edStholo 	}
1782286d8edStholo       /* Allocate just enough room for appended newline and sentinel.  */
1792286d8edStholo       current->bufsize = current->buffered_chars + 1 + sizeof (word);
1802286d8edStholo       current->buffer = xrealloc (current->buffer, current->bufsize);
1812286d8edStholo     }
1822286d8edStholo }
1832286d8edStholo 
1842286d8edStholo /* Split the file into lines, simultaneously computing the equivalence class for
1852286d8edStholo    each line. */
1862286d8edStholo 
1872286d8edStholo static void
find_and_hash_each_line(current)1882286d8edStholo find_and_hash_each_line (current)
1892286d8edStholo      struct file_data *current;
1902286d8edStholo {
1912286d8edStholo   unsigned h;
1922286d8edStholo   unsigned char const *p = (unsigned char const *) current->prefix_end;
1932286d8edStholo   unsigned char c;
1942286d8edStholo   int i, *bucket;
1952286d8edStholo   size_t length;
1962286d8edStholo 
1972286d8edStholo   /* Cache often-used quantities in local variables to help the compiler.  */
1982286d8edStholo   char const **linbuf = current->linbuf;
1992286d8edStholo   int alloc_lines = current->alloc_lines;
2002286d8edStholo   int line = 0;
2012286d8edStholo   int linbuf_base = current->linbuf_base;
2022286d8edStholo   int *cureqs = (int *) xmalloc (alloc_lines * sizeof (int));
2032286d8edStholo   struct equivclass *eqs = equivs;
2042286d8edStholo   int eqs_index = equivs_index;
2052286d8edStholo   int eqs_alloc = equivs_alloc;
2062286d8edStholo   char const *suffix_begin = current->suffix_begin;
2072286d8edStholo   char const *bufend = current->buffer + current->buffered_chars;
2082286d8edStholo   int use_line_cmp = ignore_some_line_changes;
2092286d8edStholo 
2102286d8edStholo   while ((char const *) p < suffix_begin)
2112286d8edStholo     {
2122286d8edStholo       char const *ip = (char const *) p;
2132286d8edStholo 
2142286d8edStholo       /* Compute the equivalence class for this line.  */
2152286d8edStholo 
2162286d8edStholo       h = 0;
2172286d8edStholo 
2182286d8edStholo       /* Hash this line until we find a newline. */
2192286d8edStholo       if (ignore_case_flag)
2202286d8edStholo 	{
2212286d8edStholo 	  if (ignore_all_space_flag)
2222286d8edStholo 	    while ((c = *p++) != '\n')
2232286d8edStholo 	      {
2242286d8edStholo 		if (! ISSPACE (c))
2252286d8edStholo 		  h = HASH (h, ISUPPER (c) ? tolower (c) : c);
2262286d8edStholo 	      }
2272286d8edStholo 	  else if (ignore_space_change_flag)
2282286d8edStholo 	    while ((c = *p++) != '\n')
2292286d8edStholo 	      {
2302286d8edStholo 		if (ISSPACE (c))
2312286d8edStholo 		  {
2322286d8edStholo 		    for (;;)
2332286d8edStholo 		      {
2342286d8edStholo 			c = *p++;
2352286d8edStholo 			if (!ISSPACE (c))
2362286d8edStholo 			  break;
2372286d8edStholo 			if (c == '\n')
2382286d8edStholo 			  goto hashing_done;
2392286d8edStholo 		      }
2402286d8edStholo 		    h = HASH (h, ' ');
2412286d8edStholo 		  }
2422286d8edStholo 		/* C is now the first non-space.  */
2432286d8edStholo 		h = HASH (h, ISUPPER (c) ? tolower (c) : c);
2442286d8edStholo 	      }
2452286d8edStholo 	  else
2462286d8edStholo 	    while ((c = *p++) != '\n')
2472286d8edStholo 	      h = HASH (h, ISUPPER (c) ? tolower (c) : c);
2482286d8edStholo 	}
2492286d8edStholo       else
2502286d8edStholo 	{
2512286d8edStholo 	  if (ignore_all_space_flag)
2522286d8edStholo 	    while ((c = *p++) != '\n')
2532286d8edStholo 	      {
2542286d8edStholo 		if (! ISSPACE (c))
2552286d8edStholo 		  h = HASH (h, c);
2562286d8edStholo 	      }
2572286d8edStholo 	  else if (ignore_space_change_flag)
2582286d8edStholo 	    while ((c = *p++) != '\n')
2592286d8edStholo 	      {
2602286d8edStholo 		if (ISSPACE (c))
2612286d8edStholo 		  {
2622286d8edStholo 		    for (;;)
2632286d8edStholo 		      {
2642286d8edStholo 			c = *p++;
2652286d8edStholo 			if (!ISSPACE (c))
2662286d8edStholo 			  break;
2672286d8edStholo 			if (c == '\n')
2682286d8edStholo 			  goto hashing_done;
2692286d8edStholo 		      }
2702286d8edStholo 		    h = HASH (h, ' ');
2712286d8edStholo 		  }
2722286d8edStholo 		/* C is now the first non-space.  */
2732286d8edStholo 		h = HASH (h, c);
2742286d8edStholo 	      }
2752286d8edStholo 	  else
2762286d8edStholo 	    while ((c = *p++) != '\n')
2772286d8edStholo 	      h = HASH (h, c);
2782286d8edStholo 	}
2792286d8edStholo    hashing_done:;
2802286d8edStholo 
2812286d8edStholo       bucket = &buckets[h % nbuckets];
2822286d8edStholo       length = (char const *) p - ip - 1;
2832286d8edStholo 
2842286d8edStholo       if ((char const *) p == bufend
2852286d8edStholo 	  && current->missing_newline
2862286d8edStholo 	  && ROBUST_OUTPUT_STYLE (output_style))
2872286d8edStholo 	{
2882286d8edStholo 	  /* This line is incomplete.  If this is significant,
2892286d8edStholo 	     put the line into bucket[-1].  */
2902286d8edStholo 	  if (! (ignore_space_change_flag | ignore_all_space_flag))
2912286d8edStholo 	    bucket = &buckets[-1];
2922286d8edStholo 
2932286d8edStholo 	  /* Omit the inserted newline when computing linbuf later.  */
2942286d8edStholo 	  p--;
2952286d8edStholo 	  bufend = suffix_begin = (char const *) p;
2962286d8edStholo 	}
2972286d8edStholo 
2982286d8edStholo       for (i = *bucket;  ;  i = eqs[i].next)
2992286d8edStholo 	if (!i)
3002286d8edStholo 	  {
3012286d8edStholo 	    /* Create a new equivalence class in this bucket. */
3022286d8edStholo 	    i = eqs_index++;
3032286d8edStholo 	    if (i == eqs_alloc)
3042286d8edStholo 	      eqs = (struct equivclass *)
3052286d8edStholo 		      xrealloc (eqs, (eqs_alloc*=2) * sizeof(*eqs));
3062286d8edStholo 	    eqs[i].next = *bucket;
3072286d8edStholo 	    eqs[i].hash = h;
3082286d8edStholo 	    eqs[i].line = ip;
3092286d8edStholo 	    eqs[i].length = length;
3102286d8edStholo 	    *bucket = i;
3112286d8edStholo 	    break;
3122286d8edStholo 	  }
3132286d8edStholo 	else if (eqs[i].hash == h)
3142286d8edStholo 	  {
3152286d8edStholo 	    char const *eqline = eqs[i].line;
3162286d8edStholo 
3172286d8edStholo 	    /* Reuse existing equivalence class if the lines are identical.
3182286d8edStholo 	       This detects the common case of exact identity
3192286d8edStholo 	       faster than complete comparison would.  */
3202286d8edStholo 	    if (eqs[i].length == length && memcmp (eqline, ip, length) == 0)
3212286d8edStholo 	      break;
3222286d8edStholo 
3232286d8edStholo 	    /* Reuse existing class if line_cmp reports the lines equal.  */
3242286d8edStholo 	    if (use_line_cmp && line_cmp (eqline, ip) == 0)
3252286d8edStholo 	      break;
3262286d8edStholo 	  }
3272286d8edStholo 
3282286d8edStholo       /* Maybe increase the size of the line table. */
3292286d8edStholo       if (line == alloc_lines)
3302286d8edStholo 	{
3312286d8edStholo 	  /* Double (alloc_lines - linbuf_base) by adding to alloc_lines.  */
3322286d8edStholo 	  alloc_lines = 2 * alloc_lines - linbuf_base;
3332286d8edStholo 	  cureqs = (int *) xrealloc (cureqs, alloc_lines * sizeof (*cureqs));
3342286d8edStholo 	  linbuf = (char const **) xrealloc (linbuf + linbuf_base,
3352286d8edStholo 					     (alloc_lines - linbuf_base)
3362286d8edStholo 					     * sizeof (*linbuf))
3372286d8edStholo 		   - linbuf_base;
3382286d8edStholo 	}
3392286d8edStholo       linbuf[line] = ip;
3402286d8edStholo       cureqs[line] = i;
3412286d8edStholo       ++line;
3422286d8edStholo     }
3432286d8edStholo 
3442286d8edStholo   current->buffered_lines = line;
3452286d8edStholo 
3462286d8edStholo   for (i = 0;  ;  i++)
3472286d8edStholo     {
3482286d8edStholo       /* Record the line start for lines in the suffix that we care about.
3492286d8edStholo 	 Record one more line start than lines,
3502286d8edStholo 	 so that we can compute the length of any buffered line.  */
3512286d8edStholo       if (line == alloc_lines)
3522286d8edStholo 	{
3532286d8edStholo 	  /* Double (alloc_lines - linbuf_base) by adding to alloc_lines.  */
3542286d8edStholo 	  alloc_lines = 2 * alloc_lines - linbuf_base;
3552286d8edStholo 	  linbuf = (char const **) xrealloc (linbuf + linbuf_base,
3562286d8edStholo 					     (alloc_lines - linbuf_base)
3572286d8edStholo 					     * sizeof (*linbuf))
3582286d8edStholo 		   - linbuf_base;
3592286d8edStholo 	}
3602286d8edStholo       linbuf[line] = (char const *) p;
3612286d8edStholo 
3622286d8edStholo       if ((char const *) p == bufend)
3632286d8edStholo 	break;
3642286d8edStholo 
3652286d8edStholo       if (context <= i && no_diff_means_no_output)
3662286d8edStholo 	break;
3672286d8edStholo 
3682286d8edStholo       line++;
3692286d8edStholo 
3702286d8edStholo       while (*p++ != '\n')
3712286d8edStholo 	;
3722286d8edStholo     }
3732286d8edStholo 
3742286d8edStholo   /* Done with cache in local variables.  */
3752286d8edStholo   current->linbuf = linbuf;
3762286d8edStholo   current->valid_lines = line;
3772286d8edStholo   current->alloc_lines = alloc_lines;
3782286d8edStholo   current->equivs = cureqs;
3792286d8edStholo   equivs = eqs;
3802286d8edStholo   equivs_alloc = eqs_alloc;
3812286d8edStholo   equivs_index = eqs_index;
3822286d8edStholo }
3832286d8edStholo 
3842286d8edStholo /* Prepare the end of the text.  Make sure it's initialized.
3852286d8edStholo    Make sure text ends in a newline,
3862286d8edStholo    but remember that we had to add one.  */
3872286d8edStholo 
3882286d8edStholo static void
prepare_text_end(current)3892286d8edStholo prepare_text_end (current)
3902286d8edStholo      struct file_data *current;
3912286d8edStholo {
3922286d8edStholo   size_t buffered_chars = current->buffered_chars;
3932286d8edStholo   char *p = current->buffer;
3942286d8edStholo 
3952286d8edStholo   if (buffered_chars == 0 || p[buffered_chars - 1] == '\n')
3962286d8edStholo     current->missing_newline = 0;
3972286d8edStholo   else
3982286d8edStholo     {
3992286d8edStholo       p[buffered_chars++] = '\n';
4002286d8edStholo       current->buffered_chars = buffered_chars;
4012286d8edStholo       current->missing_newline = 1;
4022286d8edStholo     }
4032286d8edStholo 
4042286d8edStholo   /* Don't use uninitialized storage when planting or using sentinels.  */
4052286d8edStholo   if (p)
4062286d8edStholo     bzero (p + buffered_chars, sizeof (word));
4072286d8edStholo }
4082286d8edStholo 
4092286d8edStholo /* Given a vector of two file_data objects, find the identical
4102286d8edStholo    prefixes and suffixes of each object. */
4112286d8edStholo 
4122286d8edStholo static void
find_identical_ends(filevec)4132286d8edStholo find_identical_ends (filevec)
4142286d8edStholo      struct file_data filevec[];
4152286d8edStholo {
4162286d8edStholo   word *w0, *w1;
4172286d8edStholo   char *p0, *p1, *buffer0, *buffer1;
4182286d8edStholo   char const *end0, *beg0;
4192286d8edStholo   char const **linbuf0, **linbuf1;
4202286d8edStholo   int i, lines;
4212286d8edStholo   size_t n0, n1, tem;
4222286d8edStholo   int alloc_lines0, alloc_lines1;
4232286d8edStholo   int buffered_prefix, prefix_count, prefix_mask;
4242286d8edStholo 
4252286d8edStholo   slurp (&filevec[0]);
4262286d8edStholo   if (filevec[0].desc != filevec[1].desc)
4272286d8edStholo     slurp (&filevec[1]);
4282286d8edStholo   else
4292286d8edStholo     {
4302286d8edStholo       filevec[1].buffer = filevec[0].buffer;
4312286d8edStholo       filevec[1].bufsize = filevec[0].bufsize;
4322286d8edStholo       filevec[1].buffered_chars = filevec[0].buffered_chars;
4332286d8edStholo     }
4342286d8edStholo   for (i = 0; i < 2; i++)
4352286d8edStholo     prepare_text_end (&filevec[i]);
4362286d8edStholo 
4372286d8edStholo   /* Find identical prefix.  */
4382286d8edStholo 
4392286d8edStholo   p0 = buffer0 = filevec[0].buffer;
4402286d8edStholo   p1 = buffer1 = filevec[1].buffer;
4412286d8edStholo 
4422286d8edStholo   n0 = filevec[0].buffered_chars;
4432286d8edStholo   n1 = filevec[1].buffered_chars;
4442286d8edStholo 
4452286d8edStholo   if (p0 == p1)
4462286d8edStholo     /* The buffers are the same; sentinels won't work.  */
4472286d8edStholo     p0 = p1 += n1;
4482286d8edStholo   else
4492286d8edStholo     {
4502286d8edStholo       /* Insert end sentinels, in this case characters that are guaranteed
4512286d8edStholo 	 to make the equality test false, and thus terminate the loop.  */
4522286d8edStholo 
4532286d8edStholo       if (n0 < n1)
4542286d8edStholo 	p0[n0] = ~p1[n0];
4552286d8edStholo       else
4562286d8edStholo 	p1[n1] = ~p0[n1];
4572286d8edStholo 
4582286d8edStholo       /* Loop until first mismatch, or to the sentinel characters.  */
4592286d8edStholo 
4602286d8edStholo       /* Compare a word at a time for speed.  */
4612286d8edStholo       w0 = (word *) p0;
4622286d8edStholo       w1 = (word *) p1;
4632286d8edStholo       while (*w0++ == *w1++)
4642286d8edStholo 	;
4652286d8edStholo       --w0, --w1;
4662286d8edStholo 
4672286d8edStholo       /* Do the last few bytes of comparison a byte at a time.  */
4682286d8edStholo       p0 = (char *) w0;
4692286d8edStholo       p1 = (char *) w1;
4702286d8edStholo       while (*p0++ == *p1++)
4712286d8edStholo 	;
4722286d8edStholo       --p0, --p1;
4732286d8edStholo 
4742286d8edStholo       /* Don't mistakenly count missing newline as part of prefix. */
4752286d8edStholo       if (ROBUST_OUTPUT_STYLE (output_style)
4762286d8edStholo 	  && (buffer0 + n0 - filevec[0].missing_newline < p0)
4772286d8edStholo 	     !=
4782286d8edStholo 	     (buffer1 + n1 - filevec[1].missing_newline < p1))
4792286d8edStholo 	--p0, --p1;
4802286d8edStholo     }
4812286d8edStholo 
4822286d8edStholo   /* Now P0 and P1 point at the first nonmatching characters.  */
4832286d8edStholo 
4842286d8edStholo   /* Skip back to last line-beginning in the prefix,
4852286d8edStholo      and then discard up to HORIZON_LINES lines from the prefix.  */
4862286d8edStholo   i = horizon_lines;
4872286d8edStholo   while (p0 != buffer0 && (p0[-1] != '\n' || i--))
4882286d8edStholo     --p0, --p1;
4892286d8edStholo 
4902286d8edStholo   /* Record the prefix.  */
4912286d8edStholo   filevec[0].prefix_end = p0;
4922286d8edStholo   filevec[1].prefix_end = p1;
4932286d8edStholo 
4942286d8edStholo   /* Find identical suffix.  */
4952286d8edStholo 
4962286d8edStholo   /* P0 and P1 point beyond the last chars not yet compared.  */
4972286d8edStholo   p0 = buffer0 + n0;
4982286d8edStholo   p1 = buffer1 + n1;
4992286d8edStholo 
5002286d8edStholo   if (! ROBUST_OUTPUT_STYLE (output_style)
5012286d8edStholo       || filevec[0].missing_newline == filevec[1].missing_newline)
5022286d8edStholo     {
5032286d8edStholo       end0 = p0;	/* Addr of last char in file 0.  */
5042286d8edStholo 
5052286d8edStholo       /* Get value of P0 at which we should stop scanning backward:
5062286d8edStholo 	 this is when either P0 or P1 points just past the last char
5072286d8edStholo 	 of the identical prefix.  */
5082286d8edStholo       beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
5092286d8edStholo 
5102286d8edStholo       /* Scan back until chars don't match or we reach that point.  */
5112286d8edStholo       while (p0 != beg0)
5122286d8edStholo 	if (*--p0 != *--p1)
5132286d8edStholo 	  {
5142286d8edStholo 	    /* Point at the first char of the matching suffix.  */
5152286d8edStholo 	    ++p0, ++p1;
5162286d8edStholo 	    beg0 = p0;
5172286d8edStholo 	    break;
5182286d8edStholo 	  }
5192286d8edStholo 
5202286d8edStholo       /* Are we at a line-beginning in both files?  If not, add the rest of
5212286d8edStholo 	 this line to the main body.  Discard up to HORIZON_LINES lines from
5222286d8edStholo 	 the identical suffix.  Also, discard one extra line,
5232286d8edStholo 	 because shift_boundaries may need it.  */
5242286d8edStholo       i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n')
5252286d8edStholo 			    &&
5262286d8edStholo 			    (buffer1 == p1 || p1[-1] == '\n'));
5272286d8edStholo       while (i-- && p0 != end0)
5282286d8edStholo 	while (*p0++ != '\n')
5292286d8edStholo 	  ;
5302286d8edStholo 
5312286d8edStholo       p1 += p0 - beg0;
5322286d8edStholo     }
5332286d8edStholo 
5342286d8edStholo   /* Record the suffix.  */
5352286d8edStholo   filevec[0].suffix_begin = p0;
5362286d8edStholo   filevec[1].suffix_begin = p1;
5372286d8edStholo 
5382286d8edStholo   /* Calculate number of lines of prefix to save.
5392286d8edStholo 
5402286d8edStholo      prefix_count == 0 means save the whole prefix;
5412286d8edStholo      we need this with for options like -D that output the whole file.
5422286d8edStholo      We also need it for options like -F that output some preceding line;
5432286d8edStholo      at least we will need to find the last few lines,
5442286d8edStholo      but since we don't know how many, it's easiest to find them all.
5452286d8edStholo 
5462286d8edStholo      Otherwise, prefix_count != 0.  Save just prefix_count lines at start
5472286d8edStholo      of the line buffer; they'll be moved to the proper location later.
5482286d8edStholo      Handle 1 more line than the context says (because we count 1 too many),
5492286d8edStholo      rounded up to the next power of 2 to speed index computation.  */
5502286d8edStholo 
5512286d8edStholo   if (no_diff_means_no_output && ! function_regexp_list)
5522286d8edStholo     {
5532286d8edStholo       for (prefix_count = 1;  prefix_count < context + 1;  prefix_count *= 2)
5542286d8edStholo 	;
5552286d8edStholo       prefix_mask = prefix_count - 1;
5562286d8edStholo       alloc_lines0
5572286d8edStholo 	= prefix_count
5582286d8edStholo 	  + GUESS_LINES (0, 0, p0 - filevec[0].prefix_end)
5592286d8edStholo 	  + context;
5602286d8edStholo     }
5612286d8edStholo   else
5622286d8edStholo     {
5632286d8edStholo       prefix_count = 0;
5642286d8edStholo       prefix_mask = ~0;
5652286d8edStholo       alloc_lines0 = GUESS_LINES (0, 0, n0);
5662286d8edStholo     }
5672286d8edStholo 
5682286d8edStholo   lines = 0;
5692286d8edStholo   linbuf0 = (char const **) xmalloc (alloc_lines0 * sizeof (*linbuf0));
5702286d8edStholo 
5712286d8edStholo   /* If the prefix is needed, find the prefix lines.  */
5722286d8edStholo   if (! (no_diff_means_no_output
5732286d8edStholo 	 && filevec[0].prefix_end == p0
5742286d8edStholo 	 && filevec[1].prefix_end == p1))
5752286d8edStholo     {
5762286d8edStholo       p0 = buffer0;
5772286d8edStholo       end0 = filevec[0].prefix_end;
5782286d8edStholo       while (p0 != end0)
5792286d8edStholo 	{
5802286d8edStholo 	  int l = lines++ & prefix_mask;
5812286d8edStholo 	  if (l == alloc_lines0)
5822286d8edStholo 	    linbuf0 = (char const **) xrealloc (linbuf0, (alloc_lines0 *= 2)
5832286d8edStholo 							 * sizeof(*linbuf0));
5842286d8edStholo 	  linbuf0[l] = p0;
5852286d8edStholo 	  while (*p0++ != '\n')
5862286d8edStholo 	    ;
5872286d8edStholo 	}
5882286d8edStholo     }
5892286d8edStholo   buffered_prefix = prefix_count && context < lines ? context : lines;
5902286d8edStholo 
5912286d8edStholo   /* Allocate line buffer 1.  */
5922286d8edStholo   tem = prefix_count ? filevec[1].suffix_begin - buffer1 : n1;
5932286d8edStholo 
5942286d8edStholo   alloc_lines1
5952286d8edStholo     = (buffered_prefix
5962286d8edStholo        + GUESS_LINES (lines, filevec[1].prefix_end - buffer1, tem)
5972286d8edStholo        + context);
5982286d8edStholo   linbuf1 = (char const **) xmalloc (alloc_lines1 * sizeof (*linbuf1));
5992286d8edStholo 
6002286d8edStholo   if (buffered_prefix != lines)
6012286d8edStholo     {
6022286d8edStholo       /* Rotate prefix lines to proper location.  */
6032286d8edStholo       for (i = 0;  i < buffered_prefix;  i++)
6042286d8edStholo 	linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
6052286d8edStholo       for (i = 0;  i < buffered_prefix;  i++)
6062286d8edStholo 	linbuf0[i] = linbuf1[i];
6072286d8edStholo     }
6082286d8edStholo 
6092286d8edStholo   /* Initialize line buffer 1 from line buffer 0.  */
6102286d8edStholo   for (i = 0; i < buffered_prefix; i++)
6112286d8edStholo     linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
6122286d8edStholo 
6132286d8edStholo   /* Record the line buffer, adjusted so that
6142286d8edStholo      linbuf*[0] points at the first differing line.  */
6152286d8edStholo   filevec[0].linbuf = linbuf0 + buffered_prefix;
6162286d8edStholo   filevec[1].linbuf = linbuf1 + buffered_prefix;
6172286d8edStholo   filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix;
6182286d8edStholo   filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
6192286d8edStholo   filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
6202286d8edStholo   filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
6212286d8edStholo }
6222286d8edStholo 
6232286d8edStholo /* Largest primes less than some power of two, for nbuckets.  Values range
6242286d8edStholo    from useful to preposterous.  If one of these numbers isn't prime
6252286d8edStholo    after all, don't blame it on me, blame it on primes (6) . . . */
6262286d8edStholo static int const primes[] =
6272286d8edStholo {
6282286d8edStholo   509,
6292286d8edStholo   1021,
6302286d8edStholo   2039,
6312286d8edStholo   4093,
6322286d8edStholo   8191,
6332286d8edStholo   16381,
6342286d8edStholo   32749,
6352286d8edStholo #if 32767 < INT_MAX
6362286d8edStholo   65521,
6372286d8edStholo   131071,
6382286d8edStholo   262139,
6392286d8edStholo   524287,
6402286d8edStholo   1048573,
6412286d8edStholo   2097143,
6422286d8edStholo   4194301,
6432286d8edStholo   8388593,
6442286d8edStholo   16777213,
6452286d8edStholo   33554393,
6462286d8edStholo   67108859,			/* Preposterously large . . . */
6472286d8edStholo   134217689,
6482286d8edStholo   268435399,
6492286d8edStholo   536870909,
6502286d8edStholo   1073741789,
6512286d8edStholo   2147483647,
6522286d8edStholo #endif
6532286d8edStholo   0
6542286d8edStholo };
6552286d8edStholo 
6562286d8edStholo /* Given a vector of two file_data objects, read the file associated
6572286d8edStholo    with each one, and build the table of equivalence classes.
6582286d8edStholo    Return 1 if either file appears to be a binary file.
6592286d8edStholo    If PRETEND_BINARY is nonzero, pretend they are binary regardless.  */
6602286d8edStholo 
6612286d8edStholo int
read_files(filevec,pretend_binary)6622286d8edStholo read_files (filevec, pretend_binary)
6632286d8edStholo      struct file_data filevec[];
6642286d8edStholo      int pretend_binary;
6652286d8edStholo {
6662286d8edStholo   int i;
6672286d8edStholo   int skip_test = always_text_flag | pretend_binary;
6682286d8edStholo   int appears_binary = pretend_binary | sip (&filevec[0], skip_test);
6692286d8edStholo 
6702286d8edStholo   if (filevec[0].desc != filevec[1].desc)
6712286d8edStholo     appears_binary |= sip (&filevec[1], skip_test | appears_binary);
6722286d8edStholo   else
6732286d8edStholo     {
6742286d8edStholo       filevec[1].buffer = filevec[0].buffer;
6752286d8edStholo       filevec[1].bufsize = filevec[0].bufsize;
6762286d8edStholo       filevec[1].buffered_chars = filevec[0].buffered_chars;
6772286d8edStholo     }
6782286d8edStholo   if (appears_binary)
6792286d8edStholo     {
6802286d8edStholo #if HAVE_SETMODE
6812286d8edStholo       setmode (filevec[0].desc, O_BINARY);
6822286d8edStholo       setmode (filevec[1].desc, O_BINARY);
6832286d8edStholo #endif
6842286d8edStholo       return 1;
6852286d8edStholo     }
6862286d8edStholo 
6872286d8edStholo   find_identical_ends (filevec);
6882286d8edStholo 
6892286d8edStholo   equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
6902286d8edStholo   equivs = (struct equivclass *) xmalloc (equivs_alloc * sizeof (struct equivclass));
6912286d8edStholo   /* Equivalence class 0 is permanently safe for lines that were not
6922286d8edStholo      hashed.  Real equivalence classes start at 1. */
6932286d8edStholo   equivs_index = 1;
6942286d8edStholo 
6952286d8edStholo   for (i = 0;  primes[i] < equivs_alloc / 3;  i++)
6962286d8edStholo     if (! primes[i])
6972286d8edStholo       abort ();
6982286d8edStholo   nbuckets = primes[i];
6992286d8edStholo 
7002286d8edStholo   buckets = (int *) xmalloc ((nbuckets + 1) * sizeof (*buckets));
7012286d8edStholo   bzero (buckets++, (nbuckets + 1) * sizeof (*buckets));
7022286d8edStholo 
7032286d8edStholo   for (i = 0; i < 2; i++)
7042286d8edStholo     find_and_hash_each_line (&filevec[i]);
7052286d8edStholo 
7062286d8edStholo   filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
7072286d8edStholo 
7082286d8edStholo   free (equivs);
7092286d8edStholo   free (buckets - 1);
7102286d8edStholo 
7112286d8edStholo   return 0;
7122286d8edStholo }
713