1 /* $NetBSD: cmp.c,v 1.1.1.1 2016/01/13 03:15:30 christos Exp $ */
2
3 /* cmp - compare two files byte by byte
4
5 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001,
6 2002 Free Software Foundation, Inc.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16 See the GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING.
20 If not, write to the Free Software Foundation,
21 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23 #include "system.h"
24
25 #include <stdio.h>
26 #include <cmpbuf.h>
27 #include <c-stack.h>
28 #include <error.h>
29 #include <exitfail.h>
30 #include <freesoft.h>
31 #include <getopt.h>
32 #include <hard-locale.h>
33 #include <inttostr.h>
34 #include <setmode.h>
35 #include <xalloc.h>
36 #include <xstrtol.h>
37
38 #if defined LC_MESSAGES && ENABLE_NLS
39 # define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
40 #else
41 # define hard_locale_LC_MESSAGES 0
42 #endif
43
44 static char const authorship_msgid[] =
45 N_("Written by Torbjorn Granlund and David MacKenzie.");
46
47 static char const copyright_string[] =
48 "Copyright (C) 2002 Free Software Foundation, Inc.";
49
50 extern char const version_string[];
51
52 static int cmp (void);
53 static off_t file_position (int);
54 static size_t block_compare (word const *, word const *);
55 static size_t block_compare_and_count (word const *, word const *, off_t *);
56 static void sprintc (char *, unsigned char);
57
58 /* Name under which this program was invoked. */
59 char *program_name;
60
61 /* Filenames of the compared files. */
62 static char const *file[2];
63
64 /* File descriptors of the files. */
65 static int file_desc[2];
66
67 /* Status of the files. */
68 static struct stat stat_buf[2];
69
70 /* Read buffers for the files. */
71 static word *buffer[2];
72
73 /* Optimal block size for the files. */
74 static size_t buf_size;
75
76 /* Initial prefix to ignore for each file. */
77 static off_t ignore_initial[2];
78
79 /* Number of bytes to compare. */
80 static uintmax_t bytes = UINTMAX_MAX;
81
82 /* Output format. */
83 static enum comparison_type
84 {
85 type_first_diff, /* Print the first difference. */
86 type_all_diffs, /* Print all differences. */
87 type_status /* Exit status only. */
88 } comparison_type;
89
90 /* If nonzero, print values of bytes quoted like cat -t does. */
91 static bool opt_print_bytes;
92
93 /* Values for long options that do not have single-letter equivalents. */
94 enum
95 {
96 HELP_OPTION = CHAR_MAX + 1
97 };
98
99 static struct option const long_options[] =
100 {
101 {"print-bytes", 0, 0, 'b'},
102 {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
103 {"ignore-initial", 1, 0, 'i'},
104 {"verbose", 0, 0, 'l'},
105 {"bytes", 1, 0, 'n'},
106 {"silent", 0, 0, 's'},
107 {"quiet", 0, 0, 's'},
108 {"version", 0, 0, 'v'},
109 {"help", 0, 0, HELP_OPTION},
110 {0, 0, 0, 0}
111 };
112
113 static void try_help (char const *, char const *) __attribute__((noreturn));
114 static void
try_help(char const * reason_msgid,char const * operand)115 try_help (char const *reason_msgid, char const *operand)
116 {
117 if (reason_msgid)
118 error (0, 0, _(reason_msgid), operand);
119 error (EXIT_TROUBLE, 0,
120 _("Try `%s --help' for more information."), program_name);
121 abort ();
122 }
123
124 static char const valid_suffixes[] = "kKMGTPEZY0";
125
126 /* Parse an operand *ARGPTR of --ignore-initial, updating *ARGPTR to
127 point after the operand. If DELIMITER is nonzero, the operand may
128 be followed by DELIMITER; otherwise it must be null-terminated. */
129 static off_t
parse_ignore_initial(char ** argptr,char delimiter)130 parse_ignore_initial (char **argptr, char delimiter)
131 {
132 uintmax_t val;
133 off_t o;
134 char const *arg = *argptr;
135 strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes);
136 if (! (e == LONGINT_OK
137 || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter))
138 || (o = val) < 0 || o != val || val == UINTMAX_MAX)
139 try_help ("invalid --ignore-initial value `%s'", arg);
140 return o;
141 }
142
143 /* Specify the output format. */
144 static void
specify_comparison_type(enum comparison_type t)145 specify_comparison_type (enum comparison_type t)
146 {
147 if (comparison_type)
148 try_help ("options -l and -s are incompatible", 0);
149 comparison_type = t;
150 }
151
152 static void
check_stdout(void)153 check_stdout (void)
154 {
155 if (ferror (stdout))
156 error (EXIT_TROUBLE, 0, "%s", _("write failed"));
157 else if (fclose (stdout) != 0)
158 error (EXIT_TROUBLE, errno, "%s", _("standard output"));
159 }
160
161 static char const * const option_help_msgid[] = {
162 N_("-b --print-bytes Print differing bytes."),
163 N_("-i SKIP --ignore-initial=SKIP Skip the first SKIP bytes of input."),
164 N_("-i SKIP1:SKIP2 --ignore-initial=SKIP1:SKIP2"),
165 N_(" Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."),
166 N_("-l --verbose Output byte numbers and values of all differing bytes."),
167 N_("-n LIMIT --bytes=LIMIT Compare at most LIMIT bytes."),
168 N_("-s --quiet --silent Output nothing; yield exit status only."),
169 N_("-v --version Output version info."),
170 N_("--help Output this help."),
171 0
172 };
173
174 static void
usage(void)175 usage (void)
176 {
177 char const * const *p;
178
179 printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
180 program_name);
181 printf ("%s\n\n", _("Compare two files byte by byte."));
182 for (p = option_help_msgid; *p; p++)
183 printf (" %s\n", _(*p));
184 printf ("\n%s\n%s\n\n%s\n\n%s\n",
185 _("SKIP1 and SKIP2 are the number of bytes to skip in each file."),
186 _("SKIP values may be followed by the following multiplicative suffixes:\n\
187 kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
188 GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
189 _("If a FILE is `-' or missing, read standard input."),
190 _("Report bugs to <bug-gnu-utils@gnu.org>."));
191 }
192
193 int
main(int argc,char ** argv)194 main (int argc, char **argv)
195 {
196 int c, f, exit_status;
197 size_t words_per_buffer;
198
199 exit_failure = EXIT_TROUBLE;
200 initialize_main (&argc, &argv);
201 program_name = argv[0];
202 setlocale (LC_ALL, "");
203 bindtextdomain (PACKAGE, LOCALEDIR);
204 textdomain (PACKAGE);
205 c_stack_action (c_stack_die);
206
207 /* Parse command line options. */
208
209 while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0))
210 != -1)
211 switch (c)
212 {
213 case 'b':
214 case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
215 opt_print_bytes = 1;
216 break;
217
218 case 'i':
219 ignore_initial[0] = parse_ignore_initial (&optarg, ':');
220 ignore_initial[1] = (*optarg++ == ':'
221 ? parse_ignore_initial (&optarg, 0)
222 : ignore_initial[0]);
223 break;
224
225 case 'l':
226 specify_comparison_type (type_all_diffs);
227 break;
228
229 case 'n':
230 {
231 uintmax_t n;
232 if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK)
233 try_help ("invalid --bytes value `%s'", optarg);
234 if (n < bytes)
235 bytes = n;
236 }
237 break;
238
239 case 's':
240 specify_comparison_type (type_status);
241 break;
242
243 case 'v':
244 printf ("cmp %s\n%s\n\n%s\n\n%s\n",
245 version_string, copyright_string,
246 _(free_software_msgid), _(authorship_msgid));
247 check_stdout ();
248 return EXIT_SUCCESS;
249
250 case HELP_OPTION:
251 usage ();
252 check_stdout ();
253 return EXIT_SUCCESS;
254
255 default:
256 try_help (0, 0);
257 }
258
259 if (optind == argc)
260 try_help ("missing operand after `%s'", argv[argc - 1]);
261
262 file[0] = argv[optind++];
263 file[1] = optind < argc ? argv[optind++] : "-";
264
265 for (f = 0; f < 2 && optind < argc; f++)
266 {
267 char *arg = argv[optind++];
268 ignore_initial[f] = parse_ignore_initial (&arg, 0);
269 }
270
271 if (optind < argc)
272 try_help ("extra operand `%s'", argv[optind]);
273
274 for (f = 0; f < 2; f++)
275 {
276 /* If file[1] is "-", treat it first; this avoids a misdiagnostic if
277 stdin is closed and opening file[0] yields file descriptor 0. */
278 int f1 = f ^ (strcmp (file[1], "-") == 0);
279
280 /* Two files with the same name are identical.
281 But wait until we open the file once, for proper diagnostics. */
282 if (f && file_name_cmp (file[0], file[1]) == 0)
283 return EXIT_SUCCESS;
284
285 file_desc[f1] = (strcmp (file[f1], "-") == 0
286 ? STDIN_FILENO
287 : open (file[f1], O_RDONLY, 0));
288 if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0)
289 {
290 if (file_desc[f1] < 0 && comparison_type == type_status)
291 exit (EXIT_TROUBLE);
292 else
293 error (EXIT_TROUBLE, errno, "%s", file[f1]);
294 }
295
296 set_binary_mode (file_desc[f1], 1);
297 }
298
299 /* If the files are links to the same inode and have the same file position,
300 they are identical. */
301
302 if (0 < same_file (&stat_buf[0], &stat_buf[1])
303 && same_file_attributes (&stat_buf[0], &stat_buf[1])
304 && file_position (0) == file_position (1))
305 return EXIT_SUCCESS;
306
307 /* If output is redirected to the null device, we may assume `-s'. */
308
309 if (comparison_type != type_status)
310 {
311 struct stat outstat, nullstat;
312
313 if (fstat (STDOUT_FILENO, &outstat) == 0
314 && stat (NULL_DEVICE, &nullstat) == 0
315 && 0 < same_file (&outstat, &nullstat))
316 comparison_type = type_status;
317 }
318
319 /* If only a return code is needed,
320 and if both input descriptors are associated with plain files,
321 conclude that the files differ if they have different sizes
322 and if more bytes will be compared than are in the smaller file. */
323
324 if (comparison_type == type_status
325 && S_ISREG (stat_buf[0].st_mode)
326 && S_ISREG (stat_buf[1].st_mode))
327 {
328 off_t s0 = stat_buf[0].st_size - file_position (0);
329 off_t s1 = stat_buf[1].st_size - file_position (1);
330 if (s0 < 0)
331 s0 = 0;
332 if (s1 < 0)
333 s1 = 0;
334 if (s0 != s1 && MIN (s0, s1) < bytes)
335 exit (EXIT_FAILURE);
336 }
337
338 /* Get the optimal block size of the files. */
339
340 buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
341 STAT_BLOCKSIZE (stat_buf[1]),
342 PTRDIFF_MAX - sizeof (word));
343
344 /* Allocate word-aligned buffers, with space for sentinels at the end. */
345
346 words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word);
347 buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer);
348 buffer[1] = buffer[0] + words_per_buffer;
349
350 exit_status = cmp ();
351
352 for (f = 0; f < 2; f++)
353 if (close (file_desc[f]) != 0)
354 error (EXIT_TROUBLE, errno, "%s", file[f]);
355 if (exit_status != 0 && comparison_type != type_status)
356 check_stdout ();
357 exit (exit_status);
358 return exit_status;
359 }
360
361 /* Compare the two files already open on `file_desc[0]' and `file_desc[1]',
362 using `buffer[0]' and `buffer[1]'.
363 Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
364 >1 if error. */
365
366 static int
cmp(void)367 cmp (void)
368 {
369 off_t line_number = 1; /* Line number (1...) of difference. */
370 off_t byte_number = 1; /* Byte number (1...) of difference. */
371 uintmax_t remaining = bytes; /* Remaining number of bytes to compare. */
372 size_t read0, read1; /* Number of bytes read from each file. */
373 size_t first_diff; /* Offset (0...) in buffers of 1st diff. */
374 size_t smaller; /* The lesser of `read0' and `read1'. */
375 word *buffer0 = buffer[0];
376 word *buffer1 = buffer[1];
377 char *buf0 = (char *) buffer0;
378 char *buf1 = (char *) buffer1;
379 int ret = EXIT_SUCCESS;
380 int f;
381 int offset_width;
382
383 if (comparison_type == type_all_diffs)
384 {
385 off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t));
386
387 for (f = 0; f < 2; f++)
388 if (S_ISREG (stat_buf[f].st_mode))
389 {
390 off_t file_bytes = stat_buf[f].st_size - file_position (f);
391 if (file_bytes < byte_number_max)
392 byte_number_max = file_bytes;
393 }
394
395 for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++)
396 continue;
397 }
398
399 for (f = 0; f < 2; f++)
400 {
401 off_t ig = ignore_initial[f];
402 if (ig && file_position (f) == -1)
403 {
404 /* lseek failed; read and discard the ignored initial prefix. */
405 do
406 {
407 size_t bytes_to_read = MIN (ig, buf_size);
408 size_t r = block_read (file_desc[f], buf0, bytes_to_read);
409 if (r != bytes_to_read)
410 {
411 if (r == SIZE_MAX)
412 error (EXIT_TROUBLE, errno, "%s", file[f]);
413 break;
414 }
415 ig -= r;
416 }
417 while (ig);
418 }
419 }
420
421 do
422 {
423 size_t bytes_to_read = buf_size;
424
425 if (remaining != UINTMAX_MAX)
426 {
427 if (remaining < bytes_to_read)
428 bytes_to_read = remaining;
429 remaining -= bytes_to_read;
430 }
431
432 read0 = block_read (file_desc[0], buf0, bytes_to_read);
433 if (read0 == SIZE_MAX)
434 error (EXIT_TROUBLE, errno, "%s", file[0]);
435 read1 = block_read (file_desc[1], buf1, bytes_to_read);
436 if (read1 == SIZE_MAX)
437 error (EXIT_TROUBLE, errno, "%s", file[1]);
438
439 /* Insert sentinels for the block compare. */
440
441 buf0[read0] = ~buf1[read0];
442 buf1[read1] = ~buf0[read1];
443
444 /* If the line number should be written for differing files,
445 compare the blocks and count the number of newlines
446 simultaneously. */
447 first_diff = (comparison_type == type_first_diff
448 ? block_compare_and_count (buffer0, buffer1, &line_number)
449 : block_compare (buffer0, buffer1));
450
451 byte_number += first_diff;
452 smaller = MIN (read0, read1);
453
454 if (first_diff < smaller)
455 {
456 switch (comparison_type)
457 {
458 case type_first_diff:
459 {
460 char byte_buf[INT_BUFSIZE_BOUND (off_t)];
461 char line_buf[INT_BUFSIZE_BOUND (off_t)];
462 char const *byte_num = offtostr (byte_number, byte_buf);
463 char const *line_num = offtostr (line_number, line_buf);
464 if (!opt_print_bytes)
465 {
466 /* See POSIX 1003.1-2001 for this format. This
467 message is used only in the POSIX locale, so it
468 need not be translated. */
469 static char const char_message[] =
470 "%s %s differ: char %s, line %s\n";
471
472 /* The POSIX rationale recommends using the word
473 "byte" outside the POSIX locale. Some gettext
474 implementations translate even in the POSIX
475 locale if certain other environment variables
476 are set, so use "byte" if a translation is
477 available, or if outside the POSIX locale. */
478 static char const byte_msgid[] =
479 N_("%s %s differ: byte %s, line %s\n");
480 char const *byte_message = _(byte_msgid);
481 bool use_byte_message = (byte_message != byte_msgid
482 || hard_locale_LC_MESSAGES);
483
484 printf ((use_byte_message
485 ? byte_message
486 : "%s %s differ: char %s, line %s\n"),
487 file[0], file[1], byte_num, line_num);
488 }
489 else
490 {
491 unsigned char c0 = buf0[first_diff];
492 unsigned char c1 = buf1[first_diff];
493 char s0[5];
494 char s1[5];
495 sprintc (s0, c0);
496 sprintc (s1, c1);
497 printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
498 file[0], file[1], byte_num, line_num,
499 c0, s0, c1, s1);
500 }
501 }
502 /* Fall through. */
503 case type_status:
504 return EXIT_FAILURE;
505
506 case type_all_diffs:
507 do
508 {
509 unsigned char c0 = buf0[first_diff];
510 unsigned char c1 = buf1[first_diff];
511 if (c0 != c1)
512 {
513 char byte_buf[INT_BUFSIZE_BOUND (off_t)];
514 char const *byte_num = offtostr (byte_number, byte_buf);
515 if (!opt_print_bytes)
516 {
517 /* See POSIX 1003.1-2001 for this format. */
518 printf ("%*s %3o %3o\n",
519 offset_width, byte_num, c0, c1);
520 }
521 else
522 {
523 char s0[5];
524 char s1[5];
525 sprintc (s0, c0);
526 sprintc (s1, c1);
527 printf ("%*s %3o %-4s %3o %s\n",
528 offset_width, byte_num, c0, s0, c1, s1);
529 }
530 }
531 byte_number++;
532 first_diff++;
533 }
534 while (first_diff < smaller);
535 ret = EXIT_FAILURE;
536 break;
537 }
538 }
539
540 if (read0 != read1)
541 {
542 if (comparison_type != type_status)
543 {
544 /* See POSIX 1003.1-2001 for this format. */
545 fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]);
546 }
547
548 return EXIT_FAILURE;
549 }
550 }
551 while (read0 == buf_size);
552
553 return ret;
554 }
555
556 /* Compare two blocks of memory P0 and P1 until they differ,
557 and count the number of '\n' occurrences in the common
558 part of P0 and P1.
559 If the blocks are not guaranteed to be different, put sentinels at the ends
560 of the blocks before calling this function.
561
562 Return the offset of the first byte that differs.
563 Increment *COUNT by the count of '\n' occurrences. */
564
565 static size_t
block_compare_and_count(word const * p0,word const * p1,off_t * count)566 block_compare_and_count (word const *p0, word const *p1, off_t *count)
567 {
568 word l; /* One word from first buffer. */
569 word const *l0, *l1; /* Pointers into each buffer. */
570 char const *c0, *c1; /* Pointers for finding exact address. */
571 size_t cnt = 0; /* Number of '\n' occurrences. */
572 word nnnn; /* Newline, sizeof (word) times. */
573 int i;
574
575 nnnn = 0;
576 for (i = 0; i < sizeof nnnn; i++)
577 nnnn = (nnnn << CHAR_BIT) | '\n';
578
579 /* Find the rough position of the first difference by reading words,
580 not bytes. */
581
582 for (l0 = p0, l1 = p1; (l = *l0) == *l1; l0++, l1++)
583 {
584 l ^= nnnn;
585 for (i = 0; i < sizeof l; i++)
586 {
587 cnt += ! (unsigned char) l;
588 l >>= CHAR_BIT;
589 }
590 }
591
592 /* Find the exact differing position (endianness independent). */
593
594 for (c0 = (char const *) l0, c1 = (char const *) l1;
595 *c0 == *c1;
596 c0++, c1++)
597 cnt += *c0 == '\n';
598
599 *count += cnt;
600 return c0 - (char const *) p0;
601 }
602
603 /* Compare two blocks of memory P0 and P1 until they differ.
604 If the blocks are not guaranteed to be different, put sentinels at the ends
605 of the blocks before calling this function.
606
607 Return the offset of the first byte that differs. */
608
609 static size_t
block_compare(word const * p0,word const * p1)610 block_compare (word const *p0, word const *p1)
611 {
612 word const *l0, *l1;
613 char const *c0, *c1;
614
615 /* Find the rough position of the first difference by reading words,
616 not bytes. */
617
618 for (l0 = p0, l1 = p1; *l0 == *l1; l0++, l1++)
619 continue;
620
621 /* Find the exact differing position (endianness independent). */
622
623 for (c0 = (char const *) l0, c1 = (char const *) l1;
624 *c0 == *c1;
625 c0++, c1++)
626 continue;
627
628 return c0 - (char const *) p0;
629 }
630
631 /* Put into BUF the unsigned char C, making unprintable bytes
632 visible by quoting like cat -t does. */
633
634 static void
sprintc(char * buf,unsigned char c)635 sprintc (char *buf, unsigned char c)
636 {
637 if (! ISPRINT (c))
638 {
639 if (c >= 128)
640 {
641 *buf++ = 'M';
642 *buf++ = '-';
643 c -= 128;
644 }
645 if (c < 32)
646 {
647 *buf++ = '^';
648 c += 64;
649 }
650 else if (c == 127)
651 {
652 *buf++ = '^';
653 c = '?';
654 }
655 }
656
657 *buf++ = c;
658 *buf = 0;
659 }
660
661 /* Position file F to ignore_initial[F] bytes from its initial position,
662 and yield its new position. Don't try more than once. */
663
664 static off_t
file_position(int f)665 file_position (int f)
666 {
667 static bool positioned[2];
668 static off_t position[2];
669
670 if (! positioned[f])
671 {
672 positioned[f] = 1;
673 position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR);
674 }
675 return position[f];
676 }
677