xref: /netbsd-src/external/gpl2/diffutils/dist/src/cmp.c (revision 75f6d617e282811cb173c2ccfbf5df0dd71f7045)
1 /*	$NetBSD: cmp.c,v 1.1.1.1 2016/01/13 03:15:30 christos Exp $	*/
2 
3 /* cmp - compare two files byte by byte
4 
5    Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001,
6    2002 Free Software Foundation, Inc.
7 
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 2, or (at your option)
11    any later version.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16    See the GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program; see the file COPYING.
20    If not, write to the Free Software Foundation,
21    59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22 
23 #include "system.h"
24 
25 #include <stdio.h>
26 #include <cmpbuf.h>
27 #include <c-stack.h>
28 #include <error.h>
29 #include <exitfail.h>
30 #include <freesoft.h>
31 #include <getopt.h>
32 #include <hard-locale.h>
33 #include <inttostr.h>
34 #include <setmode.h>
35 #include <xalloc.h>
36 #include <xstrtol.h>
37 
38 #if defined LC_MESSAGES && ENABLE_NLS
39 # define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
40 #else
41 # define hard_locale_LC_MESSAGES 0
42 #endif
43 
44 static char const authorship_msgid[] =
45   N_("Written by Torbjorn Granlund and David MacKenzie.");
46 
47 static char const copyright_string[] =
48   "Copyright (C) 2002 Free Software Foundation, Inc.";
49 
50 extern char const version_string[];
51 
52 static int cmp (void);
53 static off_t file_position (int);
54 static size_t block_compare (word const *, word const *);
55 static size_t block_compare_and_count (word const *, word const *, off_t *);
56 static void sprintc (char *, unsigned char);
57 
58 /* Name under which this program was invoked.  */
59 char *program_name;
60 
61 /* Filenames of the compared files.  */
62 static char const *file[2];
63 
64 /* File descriptors of the files.  */
65 static int file_desc[2];
66 
67 /* Status of the files.  */
68 static struct stat stat_buf[2];
69 
70 /* Read buffers for the files.  */
71 static word *buffer[2];
72 
73 /* Optimal block size for the files.  */
74 static size_t buf_size;
75 
76 /* Initial prefix to ignore for each file.  */
77 static off_t ignore_initial[2];
78 
79 /* Number of bytes to compare.  */
80 static uintmax_t bytes = UINTMAX_MAX;
81 
82 /* Output format.  */
83 static enum comparison_type
84   {
85     type_first_diff,	/* Print the first difference.  */
86     type_all_diffs,	/* Print all differences.  */
87     type_status		/* Exit status only.  */
88   } comparison_type;
89 
90 /* If nonzero, print values of bytes quoted like cat -t does. */
91 static bool opt_print_bytes;
92 
93 /* Values for long options that do not have single-letter equivalents.  */
94 enum
95 {
96   HELP_OPTION = CHAR_MAX + 1
97 };
98 
99 static struct option const long_options[] =
100 {
101   {"print-bytes", 0, 0, 'b'},
102   {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
103   {"ignore-initial", 1, 0, 'i'},
104   {"verbose", 0, 0, 'l'},
105   {"bytes", 1, 0, 'n'},
106   {"silent", 0, 0, 's'},
107   {"quiet", 0, 0, 's'},
108   {"version", 0, 0, 'v'},
109   {"help", 0, 0, HELP_OPTION},
110   {0, 0, 0, 0}
111 };
112 
113 static void try_help (char const *, char const *) __attribute__((noreturn));
114 static void
try_help(char const * reason_msgid,char const * operand)115 try_help (char const *reason_msgid, char const *operand)
116 {
117   if (reason_msgid)
118     error (0, 0, _(reason_msgid), operand);
119   error (EXIT_TROUBLE, 0,
120 	 _("Try `%s --help' for more information."), program_name);
121   abort ();
122 }
123 
124 static char const valid_suffixes[] = "kKMGTPEZY0";
125 
126 /* Parse an operand *ARGPTR of --ignore-initial, updating *ARGPTR to
127    point after the operand.  If DELIMITER is nonzero, the operand may
128    be followed by DELIMITER; otherwise it must be null-terminated.  */
129 static off_t
parse_ignore_initial(char ** argptr,char delimiter)130 parse_ignore_initial (char **argptr, char delimiter)
131 {
132   uintmax_t val;
133   off_t o;
134   char const *arg = *argptr;
135   strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes);
136   if (! (e == LONGINT_OK
137 	 || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter))
138       || (o = val) < 0 || o != val || val == UINTMAX_MAX)
139     try_help ("invalid --ignore-initial value `%s'", arg);
140   return o;
141 }
142 
143 /* Specify the output format.  */
144 static void
specify_comparison_type(enum comparison_type t)145 specify_comparison_type (enum comparison_type t)
146 {
147   if (comparison_type)
148     try_help ("options -l and -s are incompatible", 0);
149   comparison_type = t;
150 }
151 
152 static void
check_stdout(void)153 check_stdout (void)
154 {
155   if (ferror (stdout))
156     error (EXIT_TROUBLE, 0, "%s", _("write failed"));
157   else if (fclose (stdout) != 0)
158     error (EXIT_TROUBLE, errno, "%s", _("standard output"));
159 }
160 
161 static char const * const option_help_msgid[] = {
162   N_("-b  --print-bytes  Print differing bytes."),
163   N_("-i SKIP  --ignore-initial=SKIP  Skip the first SKIP bytes of input."),
164   N_("-i SKIP1:SKIP2  --ignore-initial=SKIP1:SKIP2"),
165   N_("  Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."),
166   N_("-l  --verbose  Output byte numbers and values of all differing bytes."),
167   N_("-n LIMIT  --bytes=LIMIT  Compare at most LIMIT bytes."),
168   N_("-s  --quiet  --silent  Output nothing; yield exit status only."),
169   N_("-v  --version  Output version info."),
170   N_("--help  Output this help."),
171   0
172 };
173 
174 static void
usage(void)175 usage (void)
176 {
177   char const * const *p;
178 
179   printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
180 	  program_name);
181   printf ("%s\n\n", _("Compare two files byte by byte."));
182   for (p = option_help_msgid;  *p;  p++)
183     printf ("  %s\n", _(*p));
184   printf ("\n%s\n%s\n\n%s\n\n%s\n",
185 	  _("SKIP1 and SKIP2 are the number of bytes to skip in each file."),
186 	  _("SKIP values may be followed by the following multiplicative suffixes:\n\
187 kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
188 GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
189 	  _("If a FILE is `-' or missing, read standard input."),
190 	  _("Report bugs to <bug-gnu-utils@gnu.org>."));
191 }
192 
193 int
main(int argc,char ** argv)194 main (int argc, char **argv)
195 {
196   int c, f, exit_status;
197   size_t words_per_buffer;
198 
199   exit_failure = EXIT_TROUBLE;
200   initialize_main (&argc, &argv);
201   program_name = argv[0];
202   setlocale (LC_ALL, "");
203   bindtextdomain (PACKAGE, LOCALEDIR);
204   textdomain (PACKAGE);
205   c_stack_action (c_stack_die);
206 
207   /* Parse command line options.  */
208 
209   while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0))
210 	 != -1)
211     switch (c)
212       {
213       case 'b':
214       case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
215 	opt_print_bytes = 1;
216 	break;
217 
218       case 'i':
219 	ignore_initial[0] = parse_ignore_initial (&optarg, ':');
220 	ignore_initial[1] = (*optarg++ == ':'
221 			     ? parse_ignore_initial (&optarg, 0)
222 			     : ignore_initial[0]);
223 	break;
224 
225       case 'l':
226 	specify_comparison_type (type_all_diffs);
227 	break;
228 
229       case 'n':
230 	{
231 	  uintmax_t n;
232 	  if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK)
233 	    try_help ("invalid --bytes value `%s'", optarg);
234 	  if (n < bytes)
235 	    bytes = n;
236 	}
237 	break;
238 
239       case 's':
240 	specify_comparison_type (type_status);
241 	break;
242 
243       case 'v':
244 	printf ("cmp %s\n%s\n\n%s\n\n%s\n",
245 		version_string, copyright_string,
246 		_(free_software_msgid), _(authorship_msgid));
247 	check_stdout ();
248 	return EXIT_SUCCESS;
249 
250       case HELP_OPTION:
251 	usage ();
252 	check_stdout ();
253 	return EXIT_SUCCESS;
254 
255       default:
256 	try_help (0, 0);
257       }
258 
259   if (optind == argc)
260     try_help ("missing operand after `%s'", argv[argc - 1]);
261 
262   file[0] = argv[optind++];
263   file[1] = optind < argc ? argv[optind++] : "-";
264 
265   for (f = 0; f < 2 && optind < argc; f++)
266     {
267       char *arg = argv[optind++];
268       ignore_initial[f] = parse_ignore_initial (&arg, 0);
269     }
270 
271   if (optind < argc)
272     try_help ("extra operand `%s'", argv[optind]);
273 
274   for (f = 0; f < 2; f++)
275     {
276       /* If file[1] is "-", treat it first; this avoids a misdiagnostic if
277 	 stdin is closed and opening file[0] yields file descriptor 0.  */
278       int f1 = f ^ (strcmp (file[1], "-") == 0);
279 
280       /* Two files with the same name are identical.
281 	 But wait until we open the file once, for proper diagnostics.  */
282       if (f && file_name_cmp (file[0], file[1]) == 0)
283 	return EXIT_SUCCESS;
284 
285       file_desc[f1] = (strcmp (file[f1], "-") == 0
286 		       ? STDIN_FILENO
287 		       : open (file[f1], O_RDONLY, 0));
288       if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0)
289 	{
290 	  if (file_desc[f1] < 0 && comparison_type == type_status)
291 	    exit (EXIT_TROUBLE);
292 	  else
293 	    error (EXIT_TROUBLE, errno, "%s", file[f1]);
294 	}
295 
296       set_binary_mode (file_desc[f1], 1);
297     }
298 
299   /* If the files are links to the same inode and have the same file position,
300      they are identical.  */
301 
302   if (0 < same_file (&stat_buf[0], &stat_buf[1])
303       && same_file_attributes (&stat_buf[0], &stat_buf[1])
304       && file_position (0) == file_position (1))
305     return EXIT_SUCCESS;
306 
307   /* If output is redirected to the null device, we may assume `-s'.  */
308 
309   if (comparison_type != type_status)
310     {
311       struct stat outstat, nullstat;
312 
313       if (fstat (STDOUT_FILENO, &outstat) == 0
314 	  && stat (NULL_DEVICE, &nullstat) == 0
315 	  && 0 < same_file (&outstat, &nullstat))
316 	comparison_type = type_status;
317     }
318 
319   /* If only a return code is needed,
320      and if both input descriptors are associated with plain files,
321      conclude that the files differ if they have different sizes
322      and if more bytes will be compared than are in the smaller file.  */
323 
324   if (comparison_type == type_status
325       && S_ISREG (stat_buf[0].st_mode)
326       && S_ISREG (stat_buf[1].st_mode))
327     {
328       off_t s0 = stat_buf[0].st_size - file_position (0);
329       off_t s1 = stat_buf[1].st_size - file_position (1);
330       if (s0 < 0)
331 	s0 = 0;
332       if (s1 < 0)
333 	s1 = 0;
334       if (s0 != s1 && MIN (s0, s1) < bytes)
335 	exit (EXIT_FAILURE);
336     }
337 
338   /* Get the optimal block size of the files.  */
339 
340   buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
341 			 STAT_BLOCKSIZE (stat_buf[1]),
342 			 PTRDIFF_MAX - sizeof (word));
343 
344   /* Allocate word-aligned buffers, with space for sentinels at the end.  */
345 
346   words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word);
347   buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer);
348   buffer[1] = buffer[0] + words_per_buffer;
349 
350   exit_status = cmp ();
351 
352   for (f = 0; f < 2; f++)
353     if (close (file_desc[f]) != 0)
354       error (EXIT_TROUBLE, errno, "%s", file[f]);
355   if (exit_status != 0  &&  comparison_type != type_status)
356     check_stdout ();
357   exit (exit_status);
358   return exit_status;
359 }
360 
361 /* Compare the two files already open on `file_desc[0]' and `file_desc[1]',
362    using `buffer[0]' and `buffer[1]'.
363    Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
364    >1 if error.  */
365 
366 static int
cmp(void)367 cmp (void)
368 {
369   off_t line_number = 1;	/* Line number (1...) of difference. */
370   off_t byte_number = 1;	/* Byte number (1...) of difference. */
371   uintmax_t remaining = bytes;	/* Remaining number of bytes to compare.  */
372   size_t read0, read1;		/* Number of bytes read from each file. */
373   size_t first_diff;		/* Offset (0...) in buffers of 1st diff. */
374   size_t smaller;		/* The lesser of `read0' and `read1'. */
375   word *buffer0 = buffer[0];
376   word *buffer1 = buffer[1];
377   char *buf0 = (char *) buffer0;
378   char *buf1 = (char *) buffer1;
379   int ret = EXIT_SUCCESS;
380   int f;
381   int offset_width;
382 
383   if (comparison_type == type_all_diffs)
384     {
385       off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t));
386 
387       for (f = 0; f < 2; f++)
388 	if (S_ISREG (stat_buf[f].st_mode))
389 	  {
390 	    off_t file_bytes = stat_buf[f].st_size - file_position (f);
391 	    if (file_bytes < byte_number_max)
392 	      byte_number_max = file_bytes;
393 	  }
394 
395       for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++)
396 	continue;
397     }
398 
399   for (f = 0; f < 2; f++)
400     {
401       off_t ig = ignore_initial[f];
402       if (ig && file_position (f) == -1)
403 	{
404 	  /* lseek failed; read and discard the ignored initial prefix.  */
405 	  do
406 	    {
407 	      size_t bytes_to_read = MIN (ig, buf_size);
408 	      size_t r = block_read (file_desc[f], buf0, bytes_to_read);
409 	      if (r != bytes_to_read)
410 		{
411 		  if (r == SIZE_MAX)
412 		    error (EXIT_TROUBLE, errno, "%s", file[f]);
413 		  break;
414 		}
415 	      ig -= r;
416 	    }
417 	  while (ig);
418 	}
419     }
420 
421   do
422     {
423       size_t bytes_to_read = buf_size;
424 
425       if (remaining != UINTMAX_MAX)
426 	{
427 	  if (remaining < bytes_to_read)
428 	    bytes_to_read = remaining;
429 	  remaining -= bytes_to_read;
430 	}
431 
432       read0 = block_read (file_desc[0], buf0, bytes_to_read);
433       if (read0 == SIZE_MAX)
434 	error (EXIT_TROUBLE, errno, "%s", file[0]);
435       read1 = block_read (file_desc[1], buf1, bytes_to_read);
436       if (read1 == SIZE_MAX)
437 	error (EXIT_TROUBLE, errno, "%s", file[1]);
438 
439       /* Insert sentinels for the block compare.  */
440 
441       buf0[read0] = ~buf1[read0];
442       buf1[read1] = ~buf0[read1];
443 
444       /* If the line number should be written for differing files,
445 	 compare the blocks and count the number of newlines
446 	 simultaneously.  */
447       first_diff = (comparison_type == type_first_diff
448 		    ? block_compare_and_count (buffer0, buffer1, &line_number)
449 		    : block_compare (buffer0, buffer1));
450 
451       byte_number += first_diff;
452       smaller = MIN (read0, read1);
453 
454       if (first_diff < smaller)
455 	{
456 	  switch (comparison_type)
457 	    {
458 	    case type_first_diff:
459 	      {
460 		char byte_buf[INT_BUFSIZE_BOUND (off_t)];
461 		char line_buf[INT_BUFSIZE_BOUND (off_t)];
462 		char const *byte_num = offtostr (byte_number, byte_buf);
463 		char const *line_num = offtostr (line_number, line_buf);
464 		if (!opt_print_bytes)
465 		  {
466 		    /* See POSIX 1003.1-2001 for this format.  This
467 		       message is used only in the POSIX locale, so it
468 		       need not be translated.  */
469 		    static char const char_message[] =
470 		      "%s %s differ: char %s, line %s\n";
471 
472 		    /* The POSIX rationale recommends using the word
473 		       "byte" outside the POSIX locale.  Some gettext
474 		       implementations translate even in the POSIX
475 		       locale if certain other environment variables
476 		       are set, so use "byte" if a translation is
477 		       available, or if outside the POSIX locale.  */
478 		    static char const byte_msgid[] =
479 		      N_("%s %s differ: byte %s, line %s\n");
480 		    char const *byte_message = _(byte_msgid);
481 		    bool use_byte_message = (byte_message != byte_msgid
482 					     || hard_locale_LC_MESSAGES);
483 
484 		    printf ((use_byte_message
485 			     ? byte_message
486 			     : "%s %s differ: char %s, line %s\n"),
487 			    file[0], file[1], byte_num, line_num);
488 		  }
489 		else
490 		  {
491 		    unsigned char c0 = buf0[first_diff];
492 		    unsigned char c1 = buf1[first_diff];
493 		    char s0[5];
494 		    char s1[5];
495 		    sprintc (s0, c0);
496 		    sprintc (s1, c1);
497 		    printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
498 			    file[0], file[1], byte_num, line_num,
499 			    c0, s0, c1, s1);
500 		}
501 	      }
502 	      /* Fall through.  */
503 	    case type_status:
504 	      return EXIT_FAILURE;
505 
506 	    case type_all_diffs:
507 	      do
508 		{
509 		  unsigned char c0 = buf0[first_diff];
510 		  unsigned char c1 = buf1[first_diff];
511 		  if (c0 != c1)
512 		    {
513 		      char byte_buf[INT_BUFSIZE_BOUND (off_t)];
514 		      char const *byte_num = offtostr (byte_number, byte_buf);
515 		      if (!opt_print_bytes)
516 			{
517 			  /* See POSIX 1003.1-2001 for this format.  */
518 			  printf ("%*s %3o %3o\n",
519 				  offset_width, byte_num, c0, c1);
520 			}
521 		      else
522 			{
523 			  char s0[5];
524 			  char s1[5];
525 			  sprintc (s0, c0);
526 			  sprintc (s1, c1);
527 			  printf ("%*s %3o %-4s %3o %s\n",
528 				  offset_width, byte_num, c0, s0, c1, s1);
529 			}
530 		    }
531 		  byte_number++;
532 		  first_diff++;
533 		}
534 	      while (first_diff < smaller);
535 	      ret = EXIT_FAILURE;
536 	      break;
537 	    }
538 	}
539 
540       if (read0 != read1)
541 	{
542 	  if (comparison_type != type_status)
543 	    {
544 	      /* See POSIX 1003.1-2001 for this format.  */
545 	      fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]);
546 	    }
547 
548 	  return EXIT_FAILURE;
549 	}
550     }
551   while (read0 == buf_size);
552 
553   return ret;
554 }
555 
556 /* Compare two blocks of memory P0 and P1 until they differ,
557    and count the number of '\n' occurrences in the common
558    part of P0 and P1.
559    If the blocks are not guaranteed to be different, put sentinels at the ends
560    of the blocks before calling this function.
561 
562    Return the offset of the first byte that differs.
563    Increment *COUNT by the count of '\n' occurrences.  */
564 
565 static size_t
block_compare_and_count(word const * p0,word const * p1,off_t * count)566 block_compare_and_count (word const *p0, word const *p1, off_t *count)
567 {
568   word l;		/* One word from first buffer. */
569   word const *l0, *l1;	/* Pointers into each buffer. */
570   char const *c0, *c1;	/* Pointers for finding exact address. */
571   size_t cnt = 0;	/* Number of '\n' occurrences. */
572   word nnnn;		/* Newline, sizeof (word) times.  */
573   int i;
574 
575   nnnn = 0;
576   for (i = 0; i < sizeof nnnn; i++)
577     nnnn = (nnnn << CHAR_BIT) | '\n';
578 
579   /* Find the rough position of the first difference by reading words,
580      not bytes.  */
581 
582   for (l0 = p0, l1 = p1;  (l = *l0) == *l1;  l0++, l1++)
583     {
584       l ^= nnnn;
585       for (i = 0; i < sizeof l; i++)
586 	{
587 	  cnt += ! (unsigned char) l;
588 	  l >>= CHAR_BIT;
589 	}
590     }
591 
592   /* Find the exact differing position (endianness independent).  */
593 
594   for (c0 = (char const *) l0, c1 = (char const *) l1;
595        *c0 == *c1;
596        c0++, c1++)
597     cnt += *c0 == '\n';
598 
599   *count += cnt;
600   return c0 - (char const *) p0;
601 }
602 
603 /* Compare two blocks of memory P0 and P1 until they differ.
604    If the blocks are not guaranteed to be different, put sentinels at the ends
605    of the blocks before calling this function.
606 
607    Return the offset of the first byte that differs.  */
608 
609 static size_t
block_compare(word const * p0,word const * p1)610 block_compare (word const *p0, word const *p1)
611 {
612   word const *l0, *l1;
613   char const *c0, *c1;
614 
615   /* Find the rough position of the first difference by reading words,
616      not bytes.  */
617 
618   for (l0 = p0, l1 = p1;  *l0 == *l1;  l0++, l1++)
619     continue;
620 
621   /* Find the exact differing position (endianness independent).  */
622 
623   for (c0 = (char const *) l0, c1 = (char const *) l1;
624        *c0 == *c1;
625        c0++, c1++)
626     continue;
627 
628   return c0 - (char const *) p0;
629 }
630 
631 /* Put into BUF the unsigned char C, making unprintable bytes
632    visible by quoting like cat -t does.  */
633 
634 static void
sprintc(char * buf,unsigned char c)635 sprintc (char *buf, unsigned char c)
636 {
637   if (! ISPRINT (c))
638     {
639       if (c >= 128)
640 	{
641 	  *buf++ = 'M';
642 	  *buf++ = '-';
643 	  c -= 128;
644 	}
645       if (c < 32)
646 	{
647 	  *buf++ = '^';
648 	  c += 64;
649 	}
650       else if (c == 127)
651 	{
652 	  *buf++ = '^';
653 	  c = '?';
654 	}
655     }
656 
657   *buf++ = c;
658   *buf = 0;
659 }
660 
661 /* Position file F to ignore_initial[F] bytes from its initial position,
662    and yield its new position.  Don't try more than once.  */
663 
664 static off_t
file_position(int f)665 file_position (int f)
666 {
667   static bool positioned[2];
668   static off_t position[2];
669 
670   if (! positioned[f])
671     {
672       positioned[f] = 1;
673       position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR);
674     }
675   return position[f];
676 }
677