1*18fd37a7SXin LI /* cmp - compare two files byte by byte
2*18fd37a7SXin LI
3*18fd37a7SXin LI Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001,
4*18fd37a7SXin LI 2002, 2004 Free Software Foundation, Inc.
5*18fd37a7SXin LI
6*18fd37a7SXin LI This program is free software; you can redistribute it and/or modify
7*18fd37a7SXin LI it under the terms of the GNU General Public License as published by
8*18fd37a7SXin LI the Free Software Foundation; either version 2, or (at your option)
9*18fd37a7SXin LI any later version.
10*18fd37a7SXin LI
11*18fd37a7SXin LI This program is distributed in the hope that it will be useful,
12*18fd37a7SXin LI but WITHOUT ANY WARRANTY; without even the implied warranty of
13*18fd37a7SXin LI MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14*18fd37a7SXin LI See the GNU General Public License for more details.
15*18fd37a7SXin LI
16*18fd37a7SXin LI You should have received a copy of the GNU General Public License
17*18fd37a7SXin LI along with this program; see the file COPYING.
18*18fd37a7SXin LI If not, write to the Free Software Foundation,
19*18fd37a7SXin LI 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20*18fd37a7SXin LI
21*18fd37a7SXin LI #include "system.h"
22*18fd37a7SXin LI #include "paths.h"
23*18fd37a7SXin LI
24*18fd37a7SXin LI #include <stdio.h>
25*18fd37a7SXin LI
26*18fd37a7SXin LI #include <c-stack.h>
27*18fd37a7SXin LI #include <cmpbuf.h>
28*18fd37a7SXin LI #include <error.h>
29*18fd37a7SXin LI #include <exit.h>
30*18fd37a7SXin LI #include <exitfail.h>
31*18fd37a7SXin LI #include <file-type.h>
32*18fd37a7SXin LI #include <getopt.h>
33*18fd37a7SXin LI #include <hard-locale.h>
34*18fd37a7SXin LI #include <inttostr.h>
35*18fd37a7SXin LI #include <setmode.h>
36*18fd37a7SXin LI #include <unlocked-io.h>
37*18fd37a7SXin LI #include <version-etc.h>
38*18fd37a7SXin LI #include <xalloc.h>
39*18fd37a7SXin LI #include <xstrtol.h>
40*18fd37a7SXin LI
41*18fd37a7SXin LI #if defined LC_MESSAGES && ENABLE_NLS
42*18fd37a7SXin LI # define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
43*18fd37a7SXin LI #else
44*18fd37a7SXin LI # define hard_locale_LC_MESSAGES 0
45*18fd37a7SXin LI #endif
46*18fd37a7SXin LI
47*18fd37a7SXin LI static int cmp (void);
48*18fd37a7SXin LI static off_t file_position (int);
49*18fd37a7SXin LI static size_t block_compare (word const *, word const *);
50*18fd37a7SXin LI static size_t block_compare_and_count (word const *, word const *, off_t *);
51*18fd37a7SXin LI static void sprintc (char *, unsigned char);
52*18fd37a7SXin LI
53*18fd37a7SXin LI /* Name under which this program was invoked. */
54*18fd37a7SXin LI char *program_name;
55*18fd37a7SXin LI
56*18fd37a7SXin LI /* Filenames of the compared files. */
57*18fd37a7SXin LI static char const *file[2];
58*18fd37a7SXin LI
59*18fd37a7SXin LI /* File descriptors of the files. */
60*18fd37a7SXin LI static int file_desc[2];
61*18fd37a7SXin LI
62*18fd37a7SXin LI /* Status of the files. */
63*18fd37a7SXin LI static struct stat stat_buf[2];
64*18fd37a7SXin LI
65*18fd37a7SXin LI /* Read buffers for the files. */
66*18fd37a7SXin LI static word *buffer[2];
67*18fd37a7SXin LI
68*18fd37a7SXin LI /* Optimal block size for the files. */
69*18fd37a7SXin LI static size_t buf_size;
70*18fd37a7SXin LI
71*18fd37a7SXin LI /* Initial prefix to ignore for each file. */
72*18fd37a7SXin LI static off_t ignore_initial[2];
73*18fd37a7SXin LI
74*18fd37a7SXin LI /* Number of bytes to compare. */
75*18fd37a7SXin LI static uintmax_t bytes = UINTMAX_MAX;
76*18fd37a7SXin LI
77*18fd37a7SXin LI /* Output format. */
78*18fd37a7SXin LI static enum comparison_type
79*18fd37a7SXin LI {
80*18fd37a7SXin LI type_first_diff, /* Print the first difference. */
81*18fd37a7SXin LI type_all_diffs, /* Print all differences. */
82*18fd37a7SXin LI type_status /* Exit status only. */
83*18fd37a7SXin LI } comparison_type;
84*18fd37a7SXin LI
85*18fd37a7SXin LI /* If nonzero, print values of bytes quoted like cat -t does. */
86*18fd37a7SXin LI static bool opt_print_bytes;
87*18fd37a7SXin LI
88*18fd37a7SXin LI /* Values for long options that do not have single-letter equivalents. */
89*18fd37a7SXin LI enum
90*18fd37a7SXin LI {
91*18fd37a7SXin LI HELP_OPTION = CHAR_MAX + 1
92*18fd37a7SXin LI };
93*18fd37a7SXin LI
94*18fd37a7SXin LI static struct option const long_options[] =
95*18fd37a7SXin LI {
96*18fd37a7SXin LI {"print-bytes", 0, 0, 'b'},
97*18fd37a7SXin LI {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
98*18fd37a7SXin LI {"ignore-initial", 1, 0, 'i'},
99*18fd37a7SXin LI {"verbose", 0, 0, 'l'},
100*18fd37a7SXin LI {"bytes", 1, 0, 'n'},
101*18fd37a7SXin LI {"silent", 0, 0, 's'},
102*18fd37a7SXin LI {"quiet", 0, 0, 's'},
103*18fd37a7SXin LI {"version", 0, 0, 'v'},
104*18fd37a7SXin LI {"help", 0, 0, HELP_OPTION},
105*18fd37a7SXin LI {0, 0, 0, 0}
106*18fd37a7SXin LI };
107*18fd37a7SXin LI
108*18fd37a7SXin LI static void try_help (char const *, char const *) __attribute__((noreturn));
109*18fd37a7SXin LI static void
try_help(char const * reason_msgid,char const * operand)110*18fd37a7SXin LI try_help (char const *reason_msgid, char const *operand)
111*18fd37a7SXin LI {
112*18fd37a7SXin LI if (reason_msgid)
113*18fd37a7SXin LI error (0, 0, _(reason_msgid), operand);
114*18fd37a7SXin LI error (EXIT_TROUBLE, 0,
115*18fd37a7SXin LI _("Try `%s --help' for more information."), program_name);
116*18fd37a7SXin LI abort ();
117*18fd37a7SXin LI }
118*18fd37a7SXin LI
119*18fd37a7SXin LI static char const valid_suffixes[] = "kKMGTPEZY0";
120*18fd37a7SXin LI
121*18fd37a7SXin LI /* Update ignore_initial[F] according to the result of parsing an
122*18fd37a7SXin LI *operand ARGPTR of --ignore-initial, updating *ARGPTR to point
123*18fd37a7SXin LI *after the operand. If DELIMITER is nonzero, the operand may be
124*18fd37a7SXin LI *followed by DELIMITER; otherwise it must be null-terminated. */
125*18fd37a7SXin LI static void
specify_ignore_initial(int f,char ** argptr,char delimiter)126*18fd37a7SXin LI specify_ignore_initial (int f, char **argptr, char delimiter)
127*18fd37a7SXin LI {
128*18fd37a7SXin LI uintmax_t val;
129*18fd37a7SXin LI off_t o;
130*18fd37a7SXin LI char const *arg = *argptr;
131*18fd37a7SXin LI strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes);
132*18fd37a7SXin LI if (! (e == LONGINT_OK
133*18fd37a7SXin LI || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter))
134*18fd37a7SXin LI || (o = val) < 0 || o != val || val == UINTMAX_MAX)
135*18fd37a7SXin LI try_help ("invalid --ignore-initial value `%s'", arg);
136*18fd37a7SXin LI if (ignore_initial[f] < o)
137*18fd37a7SXin LI ignore_initial[f] = o;
138*18fd37a7SXin LI }
139*18fd37a7SXin LI
140*18fd37a7SXin LI /* Specify the output format. */
141*18fd37a7SXin LI static void
specify_comparison_type(enum comparison_type t)142*18fd37a7SXin LI specify_comparison_type (enum comparison_type t)
143*18fd37a7SXin LI {
144*18fd37a7SXin LI if (comparison_type && comparison_type != t)
145*18fd37a7SXin LI try_help ("options -l and -s are incompatible", 0);
146*18fd37a7SXin LI comparison_type = t;
147*18fd37a7SXin LI }
148*18fd37a7SXin LI
149*18fd37a7SXin LI static void
check_stdout(void)150*18fd37a7SXin LI check_stdout (void)
151*18fd37a7SXin LI {
152*18fd37a7SXin LI if (ferror (stdout))
153*18fd37a7SXin LI error (EXIT_TROUBLE, 0, "%s", _("write failed"));
154*18fd37a7SXin LI else if (fclose (stdout) != 0)
155*18fd37a7SXin LI error (EXIT_TROUBLE, errno, "%s", _("standard output"));
156*18fd37a7SXin LI }
157*18fd37a7SXin LI
158*18fd37a7SXin LI static char const * const option_help_msgid[] = {
159*18fd37a7SXin LI N_("-b --print-bytes Print differing bytes."),
160*18fd37a7SXin LI N_("-i SKIP --ignore-initial=SKIP Skip the first SKIP bytes of input."),
161*18fd37a7SXin LI N_("-i SKIP1:SKIP2 --ignore-initial=SKIP1:SKIP2"),
162*18fd37a7SXin LI N_(" Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."),
163*18fd37a7SXin LI N_("-l --verbose Output byte numbers and values of all differing bytes."),
164*18fd37a7SXin LI N_("-n LIMIT --bytes=LIMIT Compare at most LIMIT bytes."),
165*18fd37a7SXin LI N_("-s --quiet --silent Output nothing; yield exit status only."),
166*18fd37a7SXin LI N_("-v --version Output version info."),
167*18fd37a7SXin LI N_("--help Output this help."),
168*18fd37a7SXin LI 0
169*18fd37a7SXin LI };
170*18fd37a7SXin LI
171*18fd37a7SXin LI static void
usage(void)172*18fd37a7SXin LI usage (void)
173*18fd37a7SXin LI {
174*18fd37a7SXin LI char const * const *p;
175*18fd37a7SXin LI
176*18fd37a7SXin LI printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
177*18fd37a7SXin LI program_name);
178*18fd37a7SXin LI printf ("%s\n\n", _("Compare two files byte by byte."));
179*18fd37a7SXin LI for (p = option_help_msgid; *p; p++)
180*18fd37a7SXin LI printf (" %s\n", _(*p));
181*18fd37a7SXin LI printf ("\n%s\n%s\n\n%s\n%s\n\n%s\n",
182*18fd37a7SXin LI _("SKIP1 and SKIP2 are the number of bytes to skip in each file."),
183*18fd37a7SXin LI _("SKIP values may be followed by the following multiplicative suffixes:\n\
184*18fd37a7SXin LI kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
185*18fd37a7SXin LI GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
186*18fd37a7SXin LI _("If a FILE is `-' or missing, read standard input."),
187*18fd37a7SXin LI _("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."),
188*18fd37a7SXin LI _("Report bugs to <bug-gnu-utils@gnu.org>."));
189*18fd37a7SXin LI }
190*18fd37a7SXin LI
191*18fd37a7SXin LI int
main(int argc,char ** argv)192*18fd37a7SXin LI main (int argc, char **argv)
193*18fd37a7SXin LI {
194*18fd37a7SXin LI int c, f, exit_status;
195*18fd37a7SXin LI size_t words_per_buffer;
196*18fd37a7SXin LI
197*18fd37a7SXin LI exit_failure = EXIT_TROUBLE;
198*18fd37a7SXin LI initialize_main (&argc, &argv);
199*18fd37a7SXin LI program_name = argv[0];
200*18fd37a7SXin LI setlocale (LC_ALL, "");
201*18fd37a7SXin LI bindtextdomain (PACKAGE, LOCALEDIR);
202*18fd37a7SXin LI textdomain (PACKAGE);
203*18fd37a7SXin LI c_stack_action (0);
204*18fd37a7SXin LI
205*18fd37a7SXin LI /* Parse command line options. */
206*18fd37a7SXin LI
207*18fd37a7SXin LI while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0))
208*18fd37a7SXin LI != -1)
209*18fd37a7SXin LI switch (c)
210*18fd37a7SXin LI {
211*18fd37a7SXin LI case 'b':
212*18fd37a7SXin LI case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
213*18fd37a7SXin LI opt_print_bytes = true;
214*18fd37a7SXin LI break;
215*18fd37a7SXin LI
216*18fd37a7SXin LI case 'i':
217*18fd37a7SXin LI specify_ignore_initial (0, &optarg, ':');
218*18fd37a7SXin LI if (*optarg++ == ':')
219*18fd37a7SXin LI specify_ignore_initial (1, &optarg, 0);
220*18fd37a7SXin LI else if (ignore_initial[1] < ignore_initial[0])
221*18fd37a7SXin LI ignore_initial[1] = ignore_initial[0];
222*18fd37a7SXin LI break;
223*18fd37a7SXin LI
224*18fd37a7SXin LI case 'l':
225*18fd37a7SXin LI specify_comparison_type (type_all_diffs);
226*18fd37a7SXin LI break;
227*18fd37a7SXin LI
228*18fd37a7SXin LI case 'n':
229*18fd37a7SXin LI {
230*18fd37a7SXin LI uintmax_t n;
231*18fd37a7SXin LI if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK)
232*18fd37a7SXin LI try_help ("invalid --bytes value `%s'", optarg);
233*18fd37a7SXin LI if (n < bytes)
234*18fd37a7SXin LI bytes = n;
235*18fd37a7SXin LI }
236*18fd37a7SXin LI break;
237*18fd37a7SXin LI
238*18fd37a7SXin LI case 's':
239*18fd37a7SXin LI specify_comparison_type (type_status);
240*18fd37a7SXin LI break;
241*18fd37a7SXin LI
242*18fd37a7SXin LI case 'v':
243*18fd37a7SXin LI /* TRANSLATORS: Please translate the second "o" in "Torbjorn
244*18fd37a7SXin LI Granlund" to an o-with-umlaut (U+00F6, LATIN SMALL LETTER O
245*18fd37a7SXin LI WITH DIAERESIS) if possible. */
246*18fd37a7SXin LI version_etc (stdout, "cmp", PACKAGE_NAME, PACKAGE_VERSION,
247*18fd37a7SXin LI _("Torbjorn Granlund"), "David MacKenzie", (char *) 0);
248*18fd37a7SXin LI check_stdout ();
249*18fd37a7SXin LI return EXIT_SUCCESS;
250*18fd37a7SXin LI
251*18fd37a7SXin LI case HELP_OPTION:
252*18fd37a7SXin LI usage ();
253*18fd37a7SXin LI check_stdout ();
254*18fd37a7SXin LI return EXIT_SUCCESS;
255*18fd37a7SXin LI
256*18fd37a7SXin LI default:
257*18fd37a7SXin LI try_help (0, 0);
258*18fd37a7SXin LI }
259*18fd37a7SXin LI
260*18fd37a7SXin LI if (optind == argc)
261*18fd37a7SXin LI try_help ("missing operand after `%s'", argv[argc - 1]);
262*18fd37a7SXin LI
263*18fd37a7SXin LI file[0] = argv[optind++];
264*18fd37a7SXin LI file[1] = optind < argc ? argv[optind++] : "-";
265*18fd37a7SXin LI
266*18fd37a7SXin LI for (f = 0; f < 2 && optind < argc; f++)
267*18fd37a7SXin LI {
268*18fd37a7SXin LI char *arg = argv[optind++];
269*18fd37a7SXin LI specify_ignore_initial (f, &arg, 0);
270*18fd37a7SXin LI }
271*18fd37a7SXin LI
272*18fd37a7SXin LI if (optind < argc)
273*18fd37a7SXin LI try_help ("extra operand `%s'", argv[optind]);
274*18fd37a7SXin LI
275*18fd37a7SXin LI for (f = 0; f < 2; f++)
276*18fd37a7SXin LI {
277*18fd37a7SXin LI /* If file[1] is "-", treat it first; this avoids a misdiagnostic if
278*18fd37a7SXin LI stdin is closed and opening file[0] yields file descriptor 0. */
279*18fd37a7SXin LI int f1 = f ^ (strcmp (file[1], "-") == 0);
280*18fd37a7SXin LI
281*18fd37a7SXin LI /* Two files with the same name and offset are identical.
282*18fd37a7SXin LI But wait until we open the file once, for proper diagnostics. */
283*18fd37a7SXin LI if (f && ignore_initial[0] == ignore_initial[1]
284*18fd37a7SXin LI && file_name_cmp (file[0], file[1]) == 0)
285*18fd37a7SXin LI return EXIT_SUCCESS;
286*18fd37a7SXin LI
287*18fd37a7SXin LI file_desc[f1] = (strcmp (file[f1], "-") == 0
288*18fd37a7SXin LI ? STDIN_FILENO
289*18fd37a7SXin LI : open (file[f1], O_RDONLY, 0));
290*18fd37a7SXin LI if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0)
291*18fd37a7SXin LI {
292*18fd37a7SXin LI if (file_desc[f1] < 0 && comparison_type == type_status)
293*18fd37a7SXin LI exit (EXIT_TROUBLE);
294*18fd37a7SXin LI else
295*18fd37a7SXin LI error (EXIT_TROUBLE, errno, "%s", file[f1]);
296*18fd37a7SXin LI }
297*18fd37a7SXin LI
298*18fd37a7SXin LI set_binary_mode (file_desc[f1], true);
299*18fd37a7SXin LI }
300*18fd37a7SXin LI
301*18fd37a7SXin LI /* If the files are links to the same inode and have the same file position,
302*18fd37a7SXin LI they are identical. */
303*18fd37a7SXin LI
304*18fd37a7SXin LI if (0 < same_file (&stat_buf[0], &stat_buf[1])
305*18fd37a7SXin LI && same_file_attributes (&stat_buf[0], &stat_buf[1])
306*18fd37a7SXin LI && file_position (0) == file_position (1))
307*18fd37a7SXin LI return EXIT_SUCCESS;
308*18fd37a7SXin LI
309*18fd37a7SXin LI /* If output is redirected to the null device, we may assume `-s'. */
310*18fd37a7SXin LI
311*18fd37a7SXin LI if (comparison_type != type_status)
312*18fd37a7SXin LI {
313*18fd37a7SXin LI struct stat outstat, nullstat;
314*18fd37a7SXin LI
315*18fd37a7SXin LI if (fstat (STDOUT_FILENO, &outstat) == 0
316*18fd37a7SXin LI && stat (NULL_DEVICE, &nullstat) == 0
317*18fd37a7SXin LI && 0 < same_file (&outstat, &nullstat))
318*18fd37a7SXin LI comparison_type = type_status;
319*18fd37a7SXin LI }
320*18fd37a7SXin LI
321*18fd37a7SXin LI /* If only a return code is needed,
322*18fd37a7SXin LI and if both input descriptors are associated with plain files,
323*18fd37a7SXin LI conclude that the files differ if they have different sizes
324*18fd37a7SXin LI and if more bytes will be compared than are in the smaller file. */
325*18fd37a7SXin LI
326*18fd37a7SXin LI if (comparison_type == type_status
327*18fd37a7SXin LI && S_ISREG (stat_buf[0].st_mode)
328*18fd37a7SXin LI && S_ISREG (stat_buf[1].st_mode))
329*18fd37a7SXin LI {
330*18fd37a7SXin LI off_t s0 = stat_buf[0].st_size - file_position (0);
331*18fd37a7SXin LI off_t s1 = stat_buf[1].st_size - file_position (1);
332*18fd37a7SXin LI if (s0 < 0)
333*18fd37a7SXin LI s0 = 0;
334*18fd37a7SXin LI if (s1 < 0)
335*18fd37a7SXin LI s1 = 0;
336*18fd37a7SXin LI if (s0 != s1 && MIN (s0, s1) < bytes)
337*18fd37a7SXin LI exit (EXIT_FAILURE);
338*18fd37a7SXin LI }
339*18fd37a7SXin LI
340*18fd37a7SXin LI /* Get the optimal block size of the files. */
341*18fd37a7SXin LI
342*18fd37a7SXin LI buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
343*18fd37a7SXin LI STAT_BLOCKSIZE (stat_buf[1]),
344*18fd37a7SXin LI PTRDIFF_MAX - sizeof (word));
345*18fd37a7SXin LI
346*18fd37a7SXin LI /* Allocate word-aligned buffers, with space for sentinels at the end. */
347*18fd37a7SXin LI
348*18fd37a7SXin LI words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word);
349*18fd37a7SXin LI buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer);
350*18fd37a7SXin LI buffer[1] = buffer[0] + words_per_buffer;
351*18fd37a7SXin LI
352*18fd37a7SXin LI exit_status = cmp ();
353*18fd37a7SXin LI
354*18fd37a7SXin LI for (f = 0; f < 2; f++)
355*18fd37a7SXin LI if (close (file_desc[f]) != 0)
356*18fd37a7SXin LI error (EXIT_TROUBLE, errno, "%s", file[f]);
357*18fd37a7SXin LI if (exit_status != 0 && comparison_type != type_status)
358*18fd37a7SXin LI check_stdout ();
359*18fd37a7SXin LI exit (exit_status);
360*18fd37a7SXin LI return exit_status;
361*18fd37a7SXin LI }
362*18fd37a7SXin LI
363*18fd37a7SXin LI /* Compare the two files already open on `file_desc[0]' and `file_desc[1]',
364*18fd37a7SXin LI using `buffer[0]' and `buffer[1]'.
365*18fd37a7SXin LI Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
366*18fd37a7SXin LI >1 if error. */
367*18fd37a7SXin LI
368*18fd37a7SXin LI static int
cmp(void)369*18fd37a7SXin LI cmp (void)
370*18fd37a7SXin LI {
371*18fd37a7SXin LI off_t line_number = 1; /* Line number (1...) of difference. */
372*18fd37a7SXin LI off_t byte_number = 1; /* Byte number (1...) of difference. */
373*18fd37a7SXin LI uintmax_t remaining = bytes; /* Remaining number of bytes to compare. */
374*18fd37a7SXin LI size_t read0, read1; /* Number of bytes read from each file. */
375*18fd37a7SXin LI size_t first_diff; /* Offset (0...) in buffers of 1st diff. */
376*18fd37a7SXin LI size_t smaller; /* The lesser of `read0' and `read1'. */
377*18fd37a7SXin LI word *buffer0 = buffer[0];
378*18fd37a7SXin LI word *buffer1 = buffer[1];
379*18fd37a7SXin LI char *buf0 = (char *) buffer0;
380*18fd37a7SXin LI char *buf1 = (char *) buffer1;
381*18fd37a7SXin LI int ret = EXIT_SUCCESS;
382*18fd37a7SXin LI int f;
383*18fd37a7SXin LI int offset_width;
384*18fd37a7SXin LI
385*18fd37a7SXin LI if (comparison_type == type_all_diffs)
386*18fd37a7SXin LI {
387*18fd37a7SXin LI off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t));
388*18fd37a7SXin LI
389*18fd37a7SXin LI for (f = 0; f < 2; f++)
390*18fd37a7SXin LI if (S_ISREG (stat_buf[f].st_mode))
391*18fd37a7SXin LI {
392*18fd37a7SXin LI off_t file_bytes = stat_buf[f].st_size - file_position (f);
393*18fd37a7SXin LI if (file_bytes < byte_number_max)
394*18fd37a7SXin LI byte_number_max = file_bytes;
395*18fd37a7SXin LI }
396*18fd37a7SXin LI
397*18fd37a7SXin LI for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++)
398*18fd37a7SXin LI continue;
399*18fd37a7SXin LI }
400*18fd37a7SXin LI
401*18fd37a7SXin LI for (f = 0; f < 2; f++)
402*18fd37a7SXin LI {
403*18fd37a7SXin LI off_t ig = ignore_initial[f];
404*18fd37a7SXin LI if (ig && file_position (f) == -1)
405*18fd37a7SXin LI {
406*18fd37a7SXin LI /* lseek failed; read and discard the ignored initial prefix. */
407*18fd37a7SXin LI do
408*18fd37a7SXin LI {
409*18fd37a7SXin LI size_t bytes_to_read = MIN (ig, buf_size);
410*18fd37a7SXin LI size_t r = block_read (file_desc[f], buf0, bytes_to_read);
411*18fd37a7SXin LI if (r != bytes_to_read)
412*18fd37a7SXin LI {
413*18fd37a7SXin LI if (r == SIZE_MAX)
414*18fd37a7SXin LI error (EXIT_TROUBLE, errno, "%s", file[f]);
415*18fd37a7SXin LI break;
416*18fd37a7SXin LI }
417*18fd37a7SXin LI ig -= r;
418*18fd37a7SXin LI }
419*18fd37a7SXin LI while (ig);
420*18fd37a7SXin LI }
421*18fd37a7SXin LI }
422*18fd37a7SXin LI
423*18fd37a7SXin LI do
424*18fd37a7SXin LI {
425*18fd37a7SXin LI size_t bytes_to_read = buf_size;
426*18fd37a7SXin LI
427*18fd37a7SXin LI if (remaining != UINTMAX_MAX)
428*18fd37a7SXin LI {
429*18fd37a7SXin LI if (remaining < bytes_to_read)
430*18fd37a7SXin LI bytes_to_read = remaining;
431*18fd37a7SXin LI remaining -= bytes_to_read;
432*18fd37a7SXin LI }
433*18fd37a7SXin LI
434*18fd37a7SXin LI read0 = block_read (file_desc[0], buf0, bytes_to_read);
435*18fd37a7SXin LI if (read0 == SIZE_MAX)
436*18fd37a7SXin LI error (EXIT_TROUBLE, errno, "%s", file[0]);
437*18fd37a7SXin LI read1 = block_read (file_desc[1], buf1, bytes_to_read);
438*18fd37a7SXin LI if (read1 == SIZE_MAX)
439*18fd37a7SXin LI error (EXIT_TROUBLE, errno, "%s", file[1]);
440*18fd37a7SXin LI
441*18fd37a7SXin LI /* Insert sentinels for the block compare. */
442*18fd37a7SXin LI
443*18fd37a7SXin LI buf0[read0] = ~buf1[read0];
444*18fd37a7SXin LI buf1[read1] = ~buf0[read1];
445*18fd37a7SXin LI
446*18fd37a7SXin LI /* If the line number should be written for differing files,
447*18fd37a7SXin LI compare the blocks and count the number of newlines
448*18fd37a7SXin LI simultaneously. */
449*18fd37a7SXin LI first_diff = (comparison_type == type_first_diff
450*18fd37a7SXin LI ? block_compare_and_count (buffer0, buffer1, &line_number)
451*18fd37a7SXin LI : block_compare (buffer0, buffer1));
452*18fd37a7SXin LI
453*18fd37a7SXin LI byte_number += first_diff;
454*18fd37a7SXin LI smaller = MIN (read0, read1);
455*18fd37a7SXin LI
456*18fd37a7SXin LI if (first_diff < smaller)
457*18fd37a7SXin LI {
458*18fd37a7SXin LI switch (comparison_type)
459*18fd37a7SXin LI {
460*18fd37a7SXin LI case type_first_diff:
461*18fd37a7SXin LI {
462*18fd37a7SXin LI char byte_buf[INT_BUFSIZE_BOUND (off_t)];
463*18fd37a7SXin LI char line_buf[INT_BUFSIZE_BOUND (off_t)];
464*18fd37a7SXin LI char const *byte_num = offtostr (byte_number, byte_buf);
465*18fd37a7SXin LI char const *line_num = offtostr (line_number, line_buf);
466*18fd37a7SXin LI if (!opt_print_bytes)
467*18fd37a7SXin LI {
468*18fd37a7SXin LI /* See POSIX 1003.1-2001 for this format. This
469*18fd37a7SXin LI message is used only in the POSIX locale, so it
470*18fd37a7SXin LI need not be translated. */
471*18fd37a7SXin LI static char const char_message[] =
472*18fd37a7SXin LI "%s %s differ: char %s, line %s\n";
473*18fd37a7SXin LI
474*18fd37a7SXin LI /* The POSIX rationale recommends using the word
475*18fd37a7SXin LI "byte" outside the POSIX locale. Some gettext
476*18fd37a7SXin LI implementations translate even in the POSIX
477*18fd37a7SXin LI locale if certain other environment variables
478*18fd37a7SXin LI are set, so use "byte" if a translation is
479*18fd37a7SXin LI available, or if outside the POSIX locale. */
480*18fd37a7SXin LI static char const byte_msgid[] =
481*18fd37a7SXin LI N_("%s %s differ: byte %s, line %s\n");
482*18fd37a7SXin LI char const *byte_message = _(byte_msgid);
483*18fd37a7SXin LI bool use_byte_message = (byte_message != byte_msgid
484*18fd37a7SXin LI || hard_locale_LC_MESSAGES);
485*18fd37a7SXin LI
486*18fd37a7SXin LI printf (use_byte_message ? byte_message : char_message,
487*18fd37a7SXin LI file[0], file[1], byte_num, line_num);
488*18fd37a7SXin LI }
489*18fd37a7SXin LI else
490*18fd37a7SXin LI {
491*18fd37a7SXin LI unsigned char c0 = buf0[first_diff];
492*18fd37a7SXin LI unsigned char c1 = buf1[first_diff];
493*18fd37a7SXin LI char s0[5];
494*18fd37a7SXin LI char s1[5];
495*18fd37a7SXin LI sprintc (s0, c0);
496*18fd37a7SXin LI sprintc (s1, c1);
497*18fd37a7SXin LI printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
498*18fd37a7SXin LI file[0], file[1], byte_num, line_num,
499*18fd37a7SXin LI c0, s0, c1, s1);
500*18fd37a7SXin LI }
501*18fd37a7SXin LI }
502*18fd37a7SXin LI /* Fall through. */
503*18fd37a7SXin LI case type_status:
504*18fd37a7SXin LI return EXIT_FAILURE;
505*18fd37a7SXin LI
506*18fd37a7SXin LI case type_all_diffs:
507*18fd37a7SXin LI do
508*18fd37a7SXin LI {
509*18fd37a7SXin LI unsigned char c0 = buf0[first_diff];
510*18fd37a7SXin LI unsigned char c1 = buf1[first_diff];
511*18fd37a7SXin LI if (c0 != c1)
512*18fd37a7SXin LI {
513*18fd37a7SXin LI char byte_buf[INT_BUFSIZE_BOUND (off_t)];
514*18fd37a7SXin LI char const *byte_num = offtostr (byte_number, byte_buf);
515*18fd37a7SXin LI if (!opt_print_bytes)
516*18fd37a7SXin LI {
517*18fd37a7SXin LI /* See POSIX 1003.1-2001 for this format. */
518*18fd37a7SXin LI printf ("%*s %3o %3o\n",
519*18fd37a7SXin LI offset_width, byte_num, c0, c1);
520*18fd37a7SXin LI }
521*18fd37a7SXin LI else
522*18fd37a7SXin LI {
523*18fd37a7SXin LI char s0[5];
524*18fd37a7SXin LI char s1[5];
525*18fd37a7SXin LI sprintc (s0, c0);
526*18fd37a7SXin LI sprintc (s1, c1);
527*18fd37a7SXin LI printf ("%*s %3o %-4s %3o %s\n",
528*18fd37a7SXin LI offset_width, byte_num, c0, s0, c1, s1);
529*18fd37a7SXin LI }
530*18fd37a7SXin LI }
531*18fd37a7SXin LI byte_number++;
532*18fd37a7SXin LI first_diff++;
533*18fd37a7SXin LI }
534*18fd37a7SXin LI while (first_diff < smaller);
535*18fd37a7SXin LI ret = EXIT_FAILURE;
536*18fd37a7SXin LI break;
537*18fd37a7SXin LI }
538*18fd37a7SXin LI }
539*18fd37a7SXin LI
540*18fd37a7SXin LI if (read0 != read1)
541*18fd37a7SXin LI {
542*18fd37a7SXin LI if (comparison_type != type_status)
543*18fd37a7SXin LI {
544*18fd37a7SXin LI /* See POSIX 1003.1-2001 for this format. */
545*18fd37a7SXin LI fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]);
546*18fd37a7SXin LI }
547*18fd37a7SXin LI
548*18fd37a7SXin LI return EXIT_FAILURE;
549*18fd37a7SXin LI }
550*18fd37a7SXin LI }
551*18fd37a7SXin LI while (read0 == buf_size);
552*18fd37a7SXin LI
553*18fd37a7SXin LI return ret;
554*18fd37a7SXin LI }
555*18fd37a7SXin LI
556*18fd37a7SXin LI /* Compare two blocks of memory P0 and P1 until they differ,
557*18fd37a7SXin LI and count the number of '\n' occurrences in the common
558*18fd37a7SXin LI part of P0 and P1.
559*18fd37a7SXin LI If the blocks are not guaranteed to be different, put sentinels at the ends
560*18fd37a7SXin LI of the blocks before calling this function.
561*18fd37a7SXin LI
562*18fd37a7SXin LI Return the offset of the first byte that differs.
563*18fd37a7SXin LI Increment *COUNT by the count of '\n' occurrences. */
564*18fd37a7SXin LI
565*18fd37a7SXin LI static size_t
block_compare_and_count(word const * p0,word const * p1,off_t * count)566*18fd37a7SXin LI block_compare_and_count (word const *p0, word const *p1, off_t *count)
567*18fd37a7SXin LI {
568*18fd37a7SXin LI word l; /* One word from first buffer. */
569*18fd37a7SXin LI word const *l0, *l1; /* Pointers into each buffer. */
570*18fd37a7SXin LI char const *c0, *c1; /* Pointers for finding exact address. */
571*18fd37a7SXin LI size_t cnt = 0; /* Number of '\n' occurrences. */
572*18fd37a7SXin LI word nnnn; /* Newline, sizeof (word) times. */
573*18fd37a7SXin LI int i;
574*18fd37a7SXin LI
575*18fd37a7SXin LI nnnn = 0;
576*18fd37a7SXin LI for (i = 0; i < sizeof nnnn; i++)
577*18fd37a7SXin LI nnnn = (nnnn << CHAR_BIT) | '\n';
578*18fd37a7SXin LI
579*18fd37a7SXin LI /* Find the rough position of the first difference by reading words,
580*18fd37a7SXin LI not bytes. */
581*18fd37a7SXin LI
582*18fd37a7SXin LI for (l0 = p0, l1 = p1; (l = *l0) == *l1; l0++, l1++)
583*18fd37a7SXin LI {
584*18fd37a7SXin LI l ^= nnnn;
585*18fd37a7SXin LI for (i = 0; i < sizeof l; i++)
586*18fd37a7SXin LI {
587*18fd37a7SXin LI unsigned char uc = l;
588*18fd37a7SXin LI cnt += ! uc;
589*18fd37a7SXin LI l >>= CHAR_BIT;
590*18fd37a7SXin LI }
591*18fd37a7SXin LI }
592*18fd37a7SXin LI
593*18fd37a7SXin LI /* Find the exact differing position (endianness independent). */
594*18fd37a7SXin LI
595*18fd37a7SXin LI for (c0 = (char const *) l0, c1 = (char const *) l1;
596*18fd37a7SXin LI *c0 == *c1;
597*18fd37a7SXin LI c0++, c1++)
598*18fd37a7SXin LI cnt += *c0 == '\n';
599*18fd37a7SXin LI
600*18fd37a7SXin LI *count += cnt;
601*18fd37a7SXin LI return c0 - (char const *) p0;
602*18fd37a7SXin LI }
603*18fd37a7SXin LI
604*18fd37a7SXin LI /* Compare two blocks of memory P0 and P1 until they differ.
605*18fd37a7SXin LI If the blocks are not guaranteed to be different, put sentinels at the ends
606*18fd37a7SXin LI of the blocks before calling this function.
607*18fd37a7SXin LI
608*18fd37a7SXin LI Return the offset of the first byte that differs. */
609*18fd37a7SXin LI
610*18fd37a7SXin LI static size_t
block_compare(word const * p0,word const * p1)611*18fd37a7SXin LI block_compare (word const *p0, word const *p1)
612*18fd37a7SXin LI {
613*18fd37a7SXin LI word const *l0, *l1;
614*18fd37a7SXin LI char const *c0, *c1;
615*18fd37a7SXin LI
616*18fd37a7SXin LI /* Find the rough position of the first difference by reading words,
617*18fd37a7SXin LI not bytes. */
618*18fd37a7SXin LI
619*18fd37a7SXin LI for (l0 = p0, l1 = p1; *l0 == *l1; l0++, l1++)
620*18fd37a7SXin LI continue;
621*18fd37a7SXin LI
622*18fd37a7SXin LI /* Find the exact differing position (endianness independent). */
623*18fd37a7SXin LI
624*18fd37a7SXin LI for (c0 = (char const *) l0, c1 = (char const *) l1;
625*18fd37a7SXin LI *c0 == *c1;
626*18fd37a7SXin LI c0++, c1++)
627*18fd37a7SXin LI continue;
628*18fd37a7SXin LI
629*18fd37a7SXin LI return c0 - (char const *) p0;
630*18fd37a7SXin LI }
631*18fd37a7SXin LI
632*18fd37a7SXin LI /* Put into BUF the unsigned char C, making unprintable bytes
633*18fd37a7SXin LI visible by quoting like cat -t does. */
634*18fd37a7SXin LI
635*18fd37a7SXin LI static void
sprintc(char * buf,unsigned char c)636*18fd37a7SXin LI sprintc (char *buf, unsigned char c)
637*18fd37a7SXin LI {
638*18fd37a7SXin LI if (! isprint (c))
639*18fd37a7SXin LI {
640*18fd37a7SXin LI if (c >= 128)
641*18fd37a7SXin LI {
642*18fd37a7SXin LI *buf++ = 'M';
643*18fd37a7SXin LI *buf++ = '-';
644*18fd37a7SXin LI c -= 128;
645*18fd37a7SXin LI }
646*18fd37a7SXin LI if (c < 32)
647*18fd37a7SXin LI {
648*18fd37a7SXin LI *buf++ = '^';
649*18fd37a7SXin LI c += 64;
650*18fd37a7SXin LI }
651*18fd37a7SXin LI else if (c == 127)
652*18fd37a7SXin LI {
653*18fd37a7SXin LI *buf++ = '^';
654*18fd37a7SXin LI c = '?';
655*18fd37a7SXin LI }
656*18fd37a7SXin LI }
657*18fd37a7SXin LI
658*18fd37a7SXin LI *buf++ = c;
659*18fd37a7SXin LI *buf = 0;
660*18fd37a7SXin LI }
661*18fd37a7SXin LI
662*18fd37a7SXin LI /* Position file F to ignore_initial[F] bytes from its initial position,
663*18fd37a7SXin LI and yield its new position. Don't try more than once. */
664*18fd37a7SXin LI
665*18fd37a7SXin LI static off_t
file_position(int f)666*18fd37a7SXin LI file_position (int f)
667*18fd37a7SXin LI {
668*18fd37a7SXin LI static bool positioned[2];
669*18fd37a7SXin LI static off_t position[2];
670*18fd37a7SXin LI
671*18fd37a7SXin LI if (! positioned[f])
672*18fd37a7SXin LI {
673*18fd37a7SXin LI positioned[f] = true;
674*18fd37a7SXin LI position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR);
675*18fd37a7SXin LI }
676*18fd37a7SXin LI return position[f];
677*18fd37a7SXin LI }
678