xref: /freebsd-src/contrib/diff/src/cmp.c (revision 18fd37a72c3a7549d2d4f6c6ea00bdcd2bdaca01)
1*18fd37a7SXin LI /* cmp - compare two files byte by byte
2*18fd37a7SXin LI 
3*18fd37a7SXin LI    Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001,
4*18fd37a7SXin LI    2002, 2004 Free Software Foundation, Inc.
5*18fd37a7SXin LI 
6*18fd37a7SXin LI    This program is free software; you can redistribute it and/or modify
7*18fd37a7SXin LI    it under the terms of the GNU General Public License as published by
8*18fd37a7SXin LI    the Free Software Foundation; either version 2, or (at your option)
9*18fd37a7SXin LI    any later version.
10*18fd37a7SXin LI 
11*18fd37a7SXin LI    This program is distributed in the hope that it will be useful,
12*18fd37a7SXin LI    but WITHOUT ANY WARRANTY; without even the implied warranty of
13*18fd37a7SXin LI    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14*18fd37a7SXin LI    See the GNU General Public License for more details.
15*18fd37a7SXin LI 
16*18fd37a7SXin LI    You should have received a copy of the GNU General Public License
17*18fd37a7SXin LI    along with this program; see the file COPYING.
18*18fd37a7SXin LI    If not, write to the Free Software Foundation,
19*18fd37a7SXin LI    59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
20*18fd37a7SXin LI 
21*18fd37a7SXin LI #include "system.h"
22*18fd37a7SXin LI #include "paths.h"
23*18fd37a7SXin LI 
24*18fd37a7SXin LI #include <stdio.h>
25*18fd37a7SXin LI 
26*18fd37a7SXin LI #include <c-stack.h>
27*18fd37a7SXin LI #include <cmpbuf.h>
28*18fd37a7SXin LI #include <error.h>
29*18fd37a7SXin LI #include <exit.h>
30*18fd37a7SXin LI #include <exitfail.h>
31*18fd37a7SXin LI #include <file-type.h>
32*18fd37a7SXin LI #include <getopt.h>
33*18fd37a7SXin LI #include <hard-locale.h>
34*18fd37a7SXin LI #include <inttostr.h>
35*18fd37a7SXin LI #include <setmode.h>
36*18fd37a7SXin LI #include <unlocked-io.h>
37*18fd37a7SXin LI #include <version-etc.h>
38*18fd37a7SXin LI #include <xalloc.h>
39*18fd37a7SXin LI #include <xstrtol.h>
40*18fd37a7SXin LI 
41*18fd37a7SXin LI #if defined LC_MESSAGES && ENABLE_NLS
42*18fd37a7SXin LI # define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
43*18fd37a7SXin LI #else
44*18fd37a7SXin LI # define hard_locale_LC_MESSAGES 0
45*18fd37a7SXin LI #endif
46*18fd37a7SXin LI 
47*18fd37a7SXin LI static int cmp (void);
48*18fd37a7SXin LI static off_t file_position (int);
49*18fd37a7SXin LI static size_t block_compare (word const *, word const *);
50*18fd37a7SXin LI static size_t block_compare_and_count (word const *, word const *, off_t *);
51*18fd37a7SXin LI static void sprintc (char *, unsigned char);
52*18fd37a7SXin LI 
53*18fd37a7SXin LI /* Name under which this program was invoked.  */
54*18fd37a7SXin LI char *program_name;
55*18fd37a7SXin LI 
56*18fd37a7SXin LI /* Filenames of the compared files.  */
57*18fd37a7SXin LI static char const *file[2];
58*18fd37a7SXin LI 
59*18fd37a7SXin LI /* File descriptors of the files.  */
60*18fd37a7SXin LI static int file_desc[2];
61*18fd37a7SXin LI 
62*18fd37a7SXin LI /* Status of the files.  */
63*18fd37a7SXin LI static struct stat stat_buf[2];
64*18fd37a7SXin LI 
65*18fd37a7SXin LI /* Read buffers for the files.  */
66*18fd37a7SXin LI static word *buffer[2];
67*18fd37a7SXin LI 
68*18fd37a7SXin LI /* Optimal block size for the files.  */
69*18fd37a7SXin LI static size_t buf_size;
70*18fd37a7SXin LI 
71*18fd37a7SXin LI /* Initial prefix to ignore for each file.  */
72*18fd37a7SXin LI static off_t ignore_initial[2];
73*18fd37a7SXin LI 
74*18fd37a7SXin LI /* Number of bytes to compare.  */
75*18fd37a7SXin LI static uintmax_t bytes = UINTMAX_MAX;
76*18fd37a7SXin LI 
77*18fd37a7SXin LI /* Output format.  */
78*18fd37a7SXin LI static enum comparison_type
79*18fd37a7SXin LI   {
80*18fd37a7SXin LI     type_first_diff,	/* Print the first difference.  */
81*18fd37a7SXin LI     type_all_diffs,	/* Print all differences.  */
82*18fd37a7SXin LI     type_status		/* Exit status only.  */
83*18fd37a7SXin LI   } comparison_type;
84*18fd37a7SXin LI 
85*18fd37a7SXin LI /* If nonzero, print values of bytes quoted like cat -t does. */
86*18fd37a7SXin LI static bool opt_print_bytes;
87*18fd37a7SXin LI 
88*18fd37a7SXin LI /* Values for long options that do not have single-letter equivalents.  */
89*18fd37a7SXin LI enum
90*18fd37a7SXin LI {
91*18fd37a7SXin LI   HELP_OPTION = CHAR_MAX + 1
92*18fd37a7SXin LI };
93*18fd37a7SXin LI 
94*18fd37a7SXin LI static struct option const long_options[] =
95*18fd37a7SXin LI {
96*18fd37a7SXin LI   {"print-bytes", 0, 0, 'b'},
97*18fd37a7SXin LI   {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
98*18fd37a7SXin LI   {"ignore-initial", 1, 0, 'i'},
99*18fd37a7SXin LI   {"verbose", 0, 0, 'l'},
100*18fd37a7SXin LI   {"bytes", 1, 0, 'n'},
101*18fd37a7SXin LI   {"silent", 0, 0, 's'},
102*18fd37a7SXin LI   {"quiet", 0, 0, 's'},
103*18fd37a7SXin LI   {"version", 0, 0, 'v'},
104*18fd37a7SXin LI   {"help", 0, 0, HELP_OPTION},
105*18fd37a7SXin LI   {0, 0, 0, 0}
106*18fd37a7SXin LI };
107*18fd37a7SXin LI 
108*18fd37a7SXin LI static void try_help (char const *, char const *) __attribute__((noreturn));
109*18fd37a7SXin LI static void
try_help(char const * reason_msgid,char const * operand)110*18fd37a7SXin LI try_help (char const *reason_msgid, char const *operand)
111*18fd37a7SXin LI {
112*18fd37a7SXin LI   if (reason_msgid)
113*18fd37a7SXin LI     error (0, 0, _(reason_msgid), operand);
114*18fd37a7SXin LI   error (EXIT_TROUBLE, 0,
115*18fd37a7SXin LI 	 _("Try `%s --help' for more information."), program_name);
116*18fd37a7SXin LI   abort ();
117*18fd37a7SXin LI }
118*18fd37a7SXin LI 
119*18fd37a7SXin LI static char const valid_suffixes[] = "kKMGTPEZY0";
120*18fd37a7SXin LI 
121*18fd37a7SXin LI /* Update ignore_initial[F] according to the result of parsing an
122*18fd37a7SXin LI    *operand ARGPTR of --ignore-initial, updating *ARGPTR to point
123*18fd37a7SXin LI    *after the operand.  If DELIMITER is nonzero, the operand may be
124*18fd37a7SXin LI    *followed by DELIMITER; otherwise it must be null-terminated.  */
125*18fd37a7SXin LI static void
specify_ignore_initial(int f,char ** argptr,char delimiter)126*18fd37a7SXin LI specify_ignore_initial (int f, char **argptr, char delimiter)
127*18fd37a7SXin LI {
128*18fd37a7SXin LI   uintmax_t val;
129*18fd37a7SXin LI   off_t o;
130*18fd37a7SXin LI   char const *arg = *argptr;
131*18fd37a7SXin LI   strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes);
132*18fd37a7SXin LI   if (! (e == LONGINT_OK
133*18fd37a7SXin LI 	 || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter))
134*18fd37a7SXin LI       || (o = val) < 0 || o != val || val == UINTMAX_MAX)
135*18fd37a7SXin LI     try_help ("invalid --ignore-initial value `%s'", arg);
136*18fd37a7SXin LI   if (ignore_initial[f] < o)
137*18fd37a7SXin LI     ignore_initial[f] = o;
138*18fd37a7SXin LI }
139*18fd37a7SXin LI 
140*18fd37a7SXin LI /* Specify the output format.  */
141*18fd37a7SXin LI static void
specify_comparison_type(enum comparison_type t)142*18fd37a7SXin LI specify_comparison_type (enum comparison_type t)
143*18fd37a7SXin LI {
144*18fd37a7SXin LI   if (comparison_type && comparison_type != t)
145*18fd37a7SXin LI     try_help ("options -l and -s are incompatible", 0);
146*18fd37a7SXin LI   comparison_type = t;
147*18fd37a7SXin LI }
148*18fd37a7SXin LI 
149*18fd37a7SXin LI static void
check_stdout(void)150*18fd37a7SXin LI check_stdout (void)
151*18fd37a7SXin LI {
152*18fd37a7SXin LI   if (ferror (stdout))
153*18fd37a7SXin LI     error (EXIT_TROUBLE, 0, "%s", _("write failed"));
154*18fd37a7SXin LI   else if (fclose (stdout) != 0)
155*18fd37a7SXin LI     error (EXIT_TROUBLE, errno, "%s", _("standard output"));
156*18fd37a7SXin LI }
157*18fd37a7SXin LI 
158*18fd37a7SXin LI static char const * const option_help_msgid[] = {
159*18fd37a7SXin LI   N_("-b  --print-bytes  Print differing bytes."),
160*18fd37a7SXin LI   N_("-i SKIP  --ignore-initial=SKIP  Skip the first SKIP bytes of input."),
161*18fd37a7SXin LI   N_("-i SKIP1:SKIP2  --ignore-initial=SKIP1:SKIP2"),
162*18fd37a7SXin LI   N_("  Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."),
163*18fd37a7SXin LI   N_("-l  --verbose  Output byte numbers and values of all differing bytes."),
164*18fd37a7SXin LI   N_("-n LIMIT  --bytes=LIMIT  Compare at most LIMIT bytes."),
165*18fd37a7SXin LI   N_("-s  --quiet  --silent  Output nothing; yield exit status only."),
166*18fd37a7SXin LI   N_("-v  --version  Output version info."),
167*18fd37a7SXin LI   N_("--help  Output this help."),
168*18fd37a7SXin LI   0
169*18fd37a7SXin LI };
170*18fd37a7SXin LI 
171*18fd37a7SXin LI static void
usage(void)172*18fd37a7SXin LI usage (void)
173*18fd37a7SXin LI {
174*18fd37a7SXin LI   char const * const *p;
175*18fd37a7SXin LI 
176*18fd37a7SXin LI   printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
177*18fd37a7SXin LI 	  program_name);
178*18fd37a7SXin LI   printf ("%s\n\n", _("Compare two files byte by byte."));
179*18fd37a7SXin LI   for (p = option_help_msgid;  *p;  p++)
180*18fd37a7SXin LI     printf ("  %s\n", _(*p));
181*18fd37a7SXin LI   printf ("\n%s\n%s\n\n%s\n%s\n\n%s\n",
182*18fd37a7SXin LI 	  _("SKIP1 and SKIP2 are the number of bytes to skip in each file."),
183*18fd37a7SXin LI 	  _("SKIP values may be followed by the following multiplicative suffixes:\n\
184*18fd37a7SXin LI kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
185*18fd37a7SXin LI GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
186*18fd37a7SXin LI 	  _("If a FILE is `-' or missing, read standard input."),
187*18fd37a7SXin LI 	  _("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."),
188*18fd37a7SXin LI 	  _("Report bugs to <bug-gnu-utils@gnu.org>."));
189*18fd37a7SXin LI }
190*18fd37a7SXin LI 
191*18fd37a7SXin LI int
main(int argc,char ** argv)192*18fd37a7SXin LI main (int argc, char **argv)
193*18fd37a7SXin LI {
194*18fd37a7SXin LI   int c, f, exit_status;
195*18fd37a7SXin LI   size_t words_per_buffer;
196*18fd37a7SXin LI 
197*18fd37a7SXin LI   exit_failure = EXIT_TROUBLE;
198*18fd37a7SXin LI   initialize_main (&argc, &argv);
199*18fd37a7SXin LI   program_name = argv[0];
200*18fd37a7SXin LI   setlocale (LC_ALL, "");
201*18fd37a7SXin LI   bindtextdomain (PACKAGE, LOCALEDIR);
202*18fd37a7SXin LI   textdomain (PACKAGE);
203*18fd37a7SXin LI   c_stack_action (0);
204*18fd37a7SXin LI 
205*18fd37a7SXin LI   /* Parse command line options.  */
206*18fd37a7SXin LI 
207*18fd37a7SXin LI   while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0))
208*18fd37a7SXin LI 	 != -1)
209*18fd37a7SXin LI     switch (c)
210*18fd37a7SXin LI       {
211*18fd37a7SXin LI       case 'b':
212*18fd37a7SXin LI       case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
213*18fd37a7SXin LI 	opt_print_bytes = true;
214*18fd37a7SXin LI 	break;
215*18fd37a7SXin LI 
216*18fd37a7SXin LI       case 'i':
217*18fd37a7SXin LI 	specify_ignore_initial (0, &optarg, ':');
218*18fd37a7SXin LI 	if (*optarg++ == ':')
219*18fd37a7SXin LI 	  specify_ignore_initial (1, &optarg, 0);
220*18fd37a7SXin LI 	else if (ignore_initial[1] < ignore_initial[0])
221*18fd37a7SXin LI 	  ignore_initial[1] = ignore_initial[0];
222*18fd37a7SXin LI 	break;
223*18fd37a7SXin LI 
224*18fd37a7SXin LI       case 'l':
225*18fd37a7SXin LI 	specify_comparison_type (type_all_diffs);
226*18fd37a7SXin LI 	break;
227*18fd37a7SXin LI 
228*18fd37a7SXin LI       case 'n':
229*18fd37a7SXin LI 	{
230*18fd37a7SXin LI 	  uintmax_t n;
231*18fd37a7SXin LI 	  if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK)
232*18fd37a7SXin LI 	    try_help ("invalid --bytes value `%s'", optarg);
233*18fd37a7SXin LI 	  if (n < bytes)
234*18fd37a7SXin LI 	    bytes = n;
235*18fd37a7SXin LI 	}
236*18fd37a7SXin LI 	break;
237*18fd37a7SXin LI 
238*18fd37a7SXin LI       case 's':
239*18fd37a7SXin LI 	specify_comparison_type (type_status);
240*18fd37a7SXin LI 	break;
241*18fd37a7SXin LI 
242*18fd37a7SXin LI       case 'v':
243*18fd37a7SXin LI 	/* TRANSLATORS: Please translate the second "o" in "Torbjorn
244*18fd37a7SXin LI 	   Granlund" to an o-with-umlaut (U+00F6, LATIN SMALL LETTER O
245*18fd37a7SXin LI 	   WITH DIAERESIS) if possible.  */
246*18fd37a7SXin LI 	version_etc (stdout, "cmp", PACKAGE_NAME, PACKAGE_VERSION,
247*18fd37a7SXin LI 		     _("Torbjorn Granlund"), "David MacKenzie", (char *) 0);
248*18fd37a7SXin LI 	check_stdout ();
249*18fd37a7SXin LI 	return EXIT_SUCCESS;
250*18fd37a7SXin LI 
251*18fd37a7SXin LI       case HELP_OPTION:
252*18fd37a7SXin LI 	usage ();
253*18fd37a7SXin LI 	check_stdout ();
254*18fd37a7SXin LI 	return EXIT_SUCCESS;
255*18fd37a7SXin LI 
256*18fd37a7SXin LI       default:
257*18fd37a7SXin LI 	try_help (0, 0);
258*18fd37a7SXin LI       }
259*18fd37a7SXin LI 
260*18fd37a7SXin LI   if (optind == argc)
261*18fd37a7SXin LI     try_help ("missing operand after `%s'", argv[argc - 1]);
262*18fd37a7SXin LI 
263*18fd37a7SXin LI   file[0] = argv[optind++];
264*18fd37a7SXin LI   file[1] = optind < argc ? argv[optind++] : "-";
265*18fd37a7SXin LI 
266*18fd37a7SXin LI   for (f = 0; f < 2 && optind < argc; f++)
267*18fd37a7SXin LI     {
268*18fd37a7SXin LI       char *arg = argv[optind++];
269*18fd37a7SXin LI       specify_ignore_initial (f, &arg, 0);
270*18fd37a7SXin LI     }
271*18fd37a7SXin LI 
272*18fd37a7SXin LI   if (optind < argc)
273*18fd37a7SXin LI     try_help ("extra operand `%s'", argv[optind]);
274*18fd37a7SXin LI 
275*18fd37a7SXin LI   for (f = 0; f < 2; f++)
276*18fd37a7SXin LI     {
277*18fd37a7SXin LI       /* If file[1] is "-", treat it first; this avoids a misdiagnostic if
278*18fd37a7SXin LI 	 stdin is closed and opening file[0] yields file descriptor 0.  */
279*18fd37a7SXin LI       int f1 = f ^ (strcmp (file[1], "-") == 0);
280*18fd37a7SXin LI 
281*18fd37a7SXin LI       /* Two files with the same name and offset are identical.
282*18fd37a7SXin LI 	 But wait until we open the file once, for proper diagnostics.  */
283*18fd37a7SXin LI       if (f && ignore_initial[0] == ignore_initial[1]
284*18fd37a7SXin LI 	  && file_name_cmp (file[0], file[1]) == 0)
285*18fd37a7SXin LI 	return EXIT_SUCCESS;
286*18fd37a7SXin LI 
287*18fd37a7SXin LI       file_desc[f1] = (strcmp (file[f1], "-") == 0
288*18fd37a7SXin LI 		       ? STDIN_FILENO
289*18fd37a7SXin LI 		       : open (file[f1], O_RDONLY, 0));
290*18fd37a7SXin LI       if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0)
291*18fd37a7SXin LI 	{
292*18fd37a7SXin LI 	  if (file_desc[f1] < 0 && comparison_type == type_status)
293*18fd37a7SXin LI 	    exit (EXIT_TROUBLE);
294*18fd37a7SXin LI 	  else
295*18fd37a7SXin LI 	    error (EXIT_TROUBLE, errno, "%s", file[f1]);
296*18fd37a7SXin LI 	}
297*18fd37a7SXin LI 
298*18fd37a7SXin LI       set_binary_mode (file_desc[f1], true);
299*18fd37a7SXin LI     }
300*18fd37a7SXin LI 
301*18fd37a7SXin LI   /* If the files are links to the same inode and have the same file position,
302*18fd37a7SXin LI      they are identical.  */
303*18fd37a7SXin LI 
304*18fd37a7SXin LI   if (0 < same_file (&stat_buf[0], &stat_buf[1])
305*18fd37a7SXin LI       && same_file_attributes (&stat_buf[0], &stat_buf[1])
306*18fd37a7SXin LI       && file_position (0) == file_position (1))
307*18fd37a7SXin LI     return EXIT_SUCCESS;
308*18fd37a7SXin LI 
309*18fd37a7SXin LI   /* If output is redirected to the null device, we may assume `-s'.  */
310*18fd37a7SXin LI 
311*18fd37a7SXin LI   if (comparison_type != type_status)
312*18fd37a7SXin LI     {
313*18fd37a7SXin LI       struct stat outstat, nullstat;
314*18fd37a7SXin LI 
315*18fd37a7SXin LI       if (fstat (STDOUT_FILENO, &outstat) == 0
316*18fd37a7SXin LI 	  && stat (NULL_DEVICE, &nullstat) == 0
317*18fd37a7SXin LI 	  && 0 < same_file (&outstat, &nullstat))
318*18fd37a7SXin LI 	comparison_type = type_status;
319*18fd37a7SXin LI     }
320*18fd37a7SXin LI 
321*18fd37a7SXin LI   /* If only a return code is needed,
322*18fd37a7SXin LI      and if both input descriptors are associated with plain files,
323*18fd37a7SXin LI      conclude that the files differ if they have different sizes
324*18fd37a7SXin LI      and if more bytes will be compared than are in the smaller file.  */
325*18fd37a7SXin LI 
326*18fd37a7SXin LI   if (comparison_type == type_status
327*18fd37a7SXin LI       && S_ISREG (stat_buf[0].st_mode)
328*18fd37a7SXin LI       && S_ISREG (stat_buf[1].st_mode))
329*18fd37a7SXin LI     {
330*18fd37a7SXin LI       off_t s0 = stat_buf[0].st_size - file_position (0);
331*18fd37a7SXin LI       off_t s1 = stat_buf[1].st_size - file_position (1);
332*18fd37a7SXin LI       if (s0 < 0)
333*18fd37a7SXin LI 	s0 = 0;
334*18fd37a7SXin LI       if (s1 < 0)
335*18fd37a7SXin LI 	s1 = 0;
336*18fd37a7SXin LI       if (s0 != s1 && MIN (s0, s1) < bytes)
337*18fd37a7SXin LI 	exit (EXIT_FAILURE);
338*18fd37a7SXin LI     }
339*18fd37a7SXin LI 
340*18fd37a7SXin LI   /* Get the optimal block size of the files.  */
341*18fd37a7SXin LI 
342*18fd37a7SXin LI   buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
343*18fd37a7SXin LI 			 STAT_BLOCKSIZE (stat_buf[1]),
344*18fd37a7SXin LI 			 PTRDIFF_MAX - sizeof (word));
345*18fd37a7SXin LI 
346*18fd37a7SXin LI   /* Allocate word-aligned buffers, with space for sentinels at the end.  */
347*18fd37a7SXin LI 
348*18fd37a7SXin LI   words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word);
349*18fd37a7SXin LI   buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer);
350*18fd37a7SXin LI   buffer[1] = buffer[0] + words_per_buffer;
351*18fd37a7SXin LI 
352*18fd37a7SXin LI   exit_status = cmp ();
353*18fd37a7SXin LI 
354*18fd37a7SXin LI   for (f = 0; f < 2; f++)
355*18fd37a7SXin LI     if (close (file_desc[f]) != 0)
356*18fd37a7SXin LI       error (EXIT_TROUBLE, errno, "%s", file[f]);
357*18fd37a7SXin LI   if (exit_status != 0  &&  comparison_type != type_status)
358*18fd37a7SXin LI     check_stdout ();
359*18fd37a7SXin LI   exit (exit_status);
360*18fd37a7SXin LI   return exit_status;
361*18fd37a7SXin LI }
362*18fd37a7SXin LI 
363*18fd37a7SXin LI /* Compare the two files already open on `file_desc[0]' and `file_desc[1]',
364*18fd37a7SXin LI    using `buffer[0]' and `buffer[1]'.
365*18fd37a7SXin LI    Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
366*18fd37a7SXin LI    >1 if error.  */
367*18fd37a7SXin LI 
368*18fd37a7SXin LI static int
cmp(void)369*18fd37a7SXin LI cmp (void)
370*18fd37a7SXin LI {
371*18fd37a7SXin LI   off_t line_number = 1;	/* Line number (1...) of difference. */
372*18fd37a7SXin LI   off_t byte_number = 1;	/* Byte number (1...) of difference. */
373*18fd37a7SXin LI   uintmax_t remaining = bytes;	/* Remaining number of bytes to compare.  */
374*18fd37a7SXin LI   size_t read0, read1;		/* Number of bytes read from each file. */
375*18fd37a7SXin LI   size_t first_diff;		/* Offset (0...) in buffers of 1st diff. */
376*18fd37a7SXin LI   size_t smaller;		/* The lesser of `read0' and `read1'. */
377*18fd37a7SXin LI   word *buffer0 = buffer[0];
378*18fd37a7SXin LI   word *buffer1 = buffer[1];
379*18fd37a7SXin LI   char *buf0 = (char *) buffer0;
380*18fd37a7SXin LI   char *buf1 = (char *) buffer1;
381*18fd37a7SXin LI   int ret = EXIT_SUCCESS;
382*18fd37a7SXin LI   int f;
383*18fd37a7SXin LI   int offset_width;
384*18fd37a7SXin LI 
385*18fd37a7SXin LI   if (comparison_type == type_all_diffs)
386*18fd37a7SXin LI     {
387*18fd37a7SXin LI       off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t));
388*18fd37a7SXin LI 
389*18fd37a7SXin LI       for (f = 0; f < 2; f++)
390*18fd37a7SXin LI 	if (S_ISREG (stat_buf[f].st_mode))
391*18fd37a7SXin LI 	  {
392*18fd37a7SXin LI 	    off_t file_bytes = stat_buf[f].st_size - file_position (f);
393*18fd37a7SXin LI 	    if (file_bytes < byte_number_max)
394*18fd37a7SXin LI 	      byte_number_max = file_bytes;
395*18fd37a7SXin LI 	  }
396*18fd37a7SXin LI 
397*18fd37a7SXin LI       for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++)
398*18fd37a7SXin LI 	continue;
399*18fd37a7SXin LI     }
400*18fd37a7SXin LI 
401*18fd37a7SXin LI   for (f = 0; f < 2; f++)
402*18fd37a7SXin LI     {
403*18fd37a7SXin LI       off_t ig = ignore_initial[f];
404*18fd37a7SXin LI       if (ig && file_position (f) == -1)
405*18fd37a7SXin LI 	{
406*18fd37a7SXin LI 	  /* lseek failed; read and discard the ignored initial prefix.  */
407*18fd37a7SXin LI 	  do
408*18fd37a7SXin LI 	    {
409*18fd37a7SXin LI 	      size_t bytes_to_read = MIN (ig, buf_size);
410*18fd37a7SXin LI 	      size_t r = block_read (file_desc[f], buf0, bytes_to_read);
411*18fd37a7SXin LI 	      if (r != bytes_to_read)
412*18fd37a7SXin LI 		{
413*18fd37a7SXin LI 		  if (r == SIZE_MAX)
414*18fd37a7SXin LI 		    error (EXIT_TROUBLE, errno, "%s", file[f]);
415*18fd37a7SXin LI 		  break;
416*18fd37a7SXin LI 		}
417*18fd37a7SXin LI 	      ig -= r;
418*18fd37a7SXin LI 	    }
419*18fd37a7SXin LI 	  while (ig);
420*18fd37a7SXin LI 	}
421*18fd37a7SXin LI     }
422*18fd37a7SXin LI 
423*18fd37a7SXin LI   do
424*18fd37a7SXin LI     {
425*18fd37a7SXin LI       size_t bytes_to_read = buf_size;
426*18fd37a7SXin LI 
427*18fd37a7SXin LI       if (remaining != UINTMAX_MAX)
428*18fd37a7SXin LI 	{
429*18fd37a7SXin LI 	  if (remaining < bytes_to_read)
430*18fd37a7SXin LI 	    bytes_to_read = remaining;
431*18fd37a7SXin LI 	  remaining -= bytes_to_read;
432*18fd37a7SXin LI 	}
433*18fd37a7SXin LI 
434*18fd37a7SXin LI       read0 = block_read (file_desc[0], buf0, bytes_to_read);
435*18fd37a7SXin LI       if (read0 == SIZE_MAX)
436*18fd37a7SXin LI 	error (EXIT_TROUBLE, errno, "%s", file[0]);
437*18fd37a7SXin LI       read1 = block_read (file_desc[1], buf1, bytes_to_read);
438*18fd37a7SXin LI       if (read1 == SIZE_MAX)
439*18fd37a7SXin LI 	error (EXIT_TROUBLE, errno, "%s", file[1]);
440*18fd37a7SXin LI 
441*18fd37a7SXin LI       /* Insert sentinels for the block compare.  */
442*18fd37a7SXin LI 
443*18fd37a7SXin LI       buf0[read0] = ~buf1[read0];
444*18fd37a7SXin LI       buf1[read1] = ~buf0[read1];
445*18fd37a7SXin LI 
446*18fd37a7SXin LI       /* If the line number should be written for differing files,
447*18fd37a7SXin LI 	 compare the blocks and count the number of newlines
448*18fd37a7SXin LI 	 simultaneously.  */
449*18fd37a7SXin LI       first_diff = (comparison_type == type_first_diff
450*18fd37a7SXin LI 		    ? block_compare_and_count (buffer0, buffer1, &line_number)
451*18fd37a7SXin LI 		    : block_compare (buffer0, buffer1));
452*18fd37a7SXin LI 
453*18fd37a7SXin LI       byte_number += first_diff;
454*18fd37a7SXin LI       smaller = MIN (read0, read1);
455*18fd37a7SXin LI 
456*18fd37a7SXin LI       if (first_diff < smaller)
457*18fd37a7SXin LI 	{
458*18fd37a7SXin LI 	  switch (comparison_type)
459*18fd37a7SXin LI 	    {
460*18fd37a7SXin LI 	    case type_first_diff:
461*18fd37a7SXin LI 	      {
462*18fd37a7SXin LI 		char byte_buf[INT_BUFSIZE_BOUND (off_t)];
463*18fd37a7SXin LI 		char line_buf[INT_BUFSIZE_BOUND (off_t)];
464*18fd37a7SXin LI 		char const *byte_num = offtostr (byte_number, byte_buf);
465*18fd37a7SXin LI 		char const *line_num = offtostr (line_number, line_buf);
466*18fd37a7SXin LI 		if (!opt_print_bytes)
467*18fd37a7SXin LI 		  {
468*18fd37a7SXin LI 		    /* See POSIX 1003.1-2001 for this format.  This
469*18fd37a7SXin LI 		       message is used only in the POSIX locale, so it
470*18fd37a7SXin LI 		       need not be translated.  */
471*18fd37a7SXin LI 		    static char const char_message[] =
472*18fd37a7SXin LI 		      "%s %s differ: char %s, line %s\n";
473*18fd37a7SXin LI 
474*18fd37a7SXin LI 		    /* The POSIX rationale recommends using the word
475*18fd37a7SXin LI 		       "byte" outside the POSIX locale.  Some gettext
476*18fd37a7SXin LI 		       implementations translate even in the POSIX
477*18fd37a7SXin LI 		       locale if certain other environment variables
478*18fd37a7SXin LI 		       are set, so use "byte" if a translation is
479*18fd37a7SXin LI 		       available, or if outside the POSIX locale.  */
480*18fd37a7SXin LI 		    static char const byte_msgid[] =
481*18fd37a7SXin LI 		      N_("%s %s differ: byte %s, line %s\n");
482*18fd37a7SXin LI 		    char const *byte_message = _(byte_msgid);
483*18fd37a7SXin LI 		    bool use_byte_message = (byte_message != byte_msgid
484*18fd37a7SXin LI 					     || hard_locale_LC_MESSAGES);
485*18fd37a7SXin LI 
486*18fd37a7SXin LI 		    printf (use_byte_message ? byte_message : char_message,
487*18fd37a7SXin LI 			    file[0], file[1], byte_num, line_num);
488*18fd37a7SXin LI 		  }
489*18fd37a7SXin LI 		else
490*18fd37a7SXin LI 		  {
491*18fd37a7SXin LI 		    unsigned char c0 = buf0[first_diff];
492*18fd37a7SXin LI 		    unsigned char c1 = buf1[first_diff];
493*18fd37a7SXin LI 		    char s0[5];
494*18fd37a7SXin LI 		    char s1[5];
495*18fd37a7SXin LI 		    sprintc (s0, c0);
496*18fd37a7SXin LI 		    sprintc (s1, c1);
497*18fd37a7SXin LI 		    printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
498*18fd37a7SXin LI 			    file[0], file[1], byte_num, line_num,
499*18fd37a7SXin LI 			    c0, s0, c1, s1);
500*18fd37a7SXin LI 		}
501*18fd37a7SXin LI 	      }
502*18fd37a7SXin LI 	      /* Fall through.  */
503*18fd37a7SXin LI 	    case type_status:
504*18fd37a7SXin LI 	      return EXIT_FAILURE;
505*18fd37a7SXin LI 
506*18fd37a7SXin LI 	    case type_all_diffs:
507*18fd37a7SXin LI 	      do
508*18fd37a7SXin LI 		{
509*18fd37a7SXin LI 		  unsigned char c0 = buf0[first_diff];
510*18fd37a7SXin LI 		  unsigned char c1 = buf1[first_diff];
511*18fd37a7SXin LI 		  if (c0 != c1)
512*18fd37a7SXin LI 		    {
513*18fd37a7SXin LI 		      char byte_buf[INT_BUFSIZE_BOUND (off_t)];
514*18fd37a7SXin LI 		      char const *byte_num = offtostr (byte_number, byte_buf);
515*18fd37a7SXin LI 		      if (!opt_print_bytes)
516*18fd37a7SXin LI 			{
517*18fd37a7SXin LI 			  /* See POSIX 1003.1-2001 for this format.  */
518*18fd37a7SXin LI 			  printf ("%*s %3o %3o\n",
519*18fd37a7SXin LI 				  offset_width, byte_num, c0, c1);
520*18fd37a7SXin LI 			}
521*18fd37a7SXin LI 		      else
522*18fd37a7SXin LI 			{
523*18fd37a7SXin LI 			  char s0[5];
524*18fd37a7SXin LI 			  char s1[5];
525*18fd37a7SXin LI 			  sprintc (s0, c0);
526*18fd37a7SXin LI 			  sprintc (s1, c1);
527*18fd37a7SXin LI 			  printf ("%*s %3o %-4s %3o %s\n",
528*18fd37a7SXin LI 				  offset_width, byte_num, c0, s0, c1, s1);
529*18fd37a7SXin LI 			}
530*18fd37a7SXin LI 		    }
531*18fd37a7SXin LI 		  byte_number++;
532*18fd37a7SXin LI 		  first_diff++;
533*18fd37a7SXin LI 		}
534*18fd37a7SXin LI 	      while (first_diff < smaller);
535*18fd37a7SXin LI 	      ret = EXIT_FAILURE;
536*18fd37a7SXin LI 	      break;
537*18fd37a7SXin LI 	    }
538*18fd37a7SXin LI 	}
539*18fd37a7SXin LI 
540*18fd37a7SXin LI       if (read0 != read1)
541*18fd37a7SXin LI 	{
542*18fd37a7SXin LI 	  if (comparison_type != type_status)
543*18fd37a7SXin LI 	    {
544*18fd37a7SXin LI 	      /* See POSIX 1003.1-2001 for this format.  */
545*18fd37a7SXin LI 	      fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]);
546*18fd37a7SXin LI 	    }
547*18fd37a7SXin LI 
548*18fd37a7SXin LI 	  return EXIT_FAILURE;
549*18fd37a7SXin LI 	}
550*18fd37a7SXin LI     }
551*18fd37a7SXin LI   while (read0 == buf_size);
552*18fd37a7SXin LI 
553*18fd37a7SXin LI   return ret;
554*18fd37a7SXin LI }
555*18fd37a7SXin LI 
556*18fd37a7SXin LI /* Compare two blocks of memory P0 and P1 until they differ,
557*18fd37a7SXin LI    and count the number of '\n' occurrences in the common
558*18fd37a7SXin LI    part of P0 and P1.
559*18fd37a7SXin LI    If the blocks are not guaranteed to be different, put sentinels at the ends
560*18fd37a7SXin LI    of the blocks before calling this function.
561*18fd37a7SXin LI 
562*18fd37a7SXin LI    Return the offset of the first byte that differs.
563*18fd37a7SXin LI    Increment *COUNT by the count of '\n' occurrences.  */
564*18fd37a7SXin LI 
565*18fd37a7SXin LI static size_t
block_compare_and_count(word const * p0,word const * p1,off_t * count)566*18fd37a7SXin LI block_compare_and_count (word const *p0, word const *p1, off_t *count)
567*18fd37a7SXin LI {
568*18fd37a7SXin LI   word l;		/* One word from first buffer. */
569*18fd37a7SXin LI   word const *l0, *l1;	/* Pointers into each buffer. */
570*18fd37a7SXin LI   char const *c0, *c1;	/* Pointers for finding exact address. */
571*18fd37a7SXin LI   size_t cnt = 0;	/* Number of '\n' occurrences. */
572*18fd37a7SXin LI   word nnnn;		/* Newline, sizeof (word) times.  */
573*18fd37a7SXin LI   int i;
574*18fd37a7SXin LI 
575*18fd37a7SXin LI   nnnn = 0;
576*18fd37a7SXin LI   for (i = 0; i < sizeof nnnn; i++)
577*18fd37a7SXin LI     nnnn = (nnnn << CHAR_BIT) | '\n';
578*18fd37a7SXin LI 
579*18fd37a7SXin LI   /* Find the rough position of the first difference by reading words,
580*18fd37a7SXin LI      not bytes.  */
581*18fd37a7SXin LI 
582*18fd37a7SXin LI   for (l0 = p0, l1 = p1;  (l = *l0) == *l1;  l0++, l1++)
583*18fd37a7SXin LI     {
584*18fd37a7SXin LI       l ^= nnnn;
585*18fd37a7SXin LI       for (i = 0; i < sizeof l; i++)
586*18fd37a7SXin LI 	{
587*18fd37a7SXin LI 	  unsigned char uc = l;
588*18fd37a7SXin LI 	  cnt += ! uc;
589*18fd37a7SXin LI 	  l >>= CHAR_BIT;
590*18fd37a7SXin LI 	}
591*18fd37a7SXin LI     }
592*18fd37a7SXin LI 
593*18fd37a7SXin LI   /* Find the exact differing position (endianness independent).  */
594*18fd37a7SXin LI 
595*18fd37a7SXin LI   for (c0 = (char const *) l0, c1 = (char const *) l1;
596*18fd37a7SXin LI        *c0 == *c1;
597*18fd37a7SXin LI        c0++, c1++)
598*18fd37a7SXin LI     cnt += *c0 == '\n';
599*18fd37a7SXin LI 
600*18fd37a7SXin LI   *count += cnt;
601*18fd37a7SXin LI   return c0 - (char const *) p0;
602*18fd37a7SXin LI }
603*18fd37a7SXin LI 
604*18fd37a7SXin LI /* Compare two blocks of memory P0 and P1 until they differ.
605*18fd37a7SXin LI    If the blocks are not guaranteed to be different, put sentinels at the ends
606*18fd37a7SXin LI    of the blocks before calling this function.
607*18fd37a7SXin LI 
608*18fd37a7SXin LI    Return the offset of the first byte that differs.  */
609*18fd37a7SXin LI 
610*18fd37a7SXin LI static size_t
block_compare(word const * p0,word const * p1)611*18fd37a7SXin LI block_compare (word const *p0, word const *p1)
612*18fd37a7SXin LI {
613*18fd37a7SXin LI   word const *l0, *l1;
614*18fd37a7SXin LI   char const *c0, *c1;
615*18fd37a7SXin LI 
616*18fd37a7SXin LI   /* Find the rough position of the first difference by reading words,
617*18fd37a7SXin LI      not bytes.  */
618*18fd37a7SXin LI 
619*18fd37a7SXin LI   for (l0 = p0, l1 = p1;  *l0 == *l1;  l0++, l1++)
620*18fd37a7SXin LI     continue;
621*18fd37a7SXin LI 
622*18fd37a7SXin LI   /* Find the exact differing position (endianness independent).  */
623*18fd37a7SXin LI 
624*18fd37a7SXin LI   for (c0 = (char const *) l0, c1 = (char const *) l1;
625*18fd37a7SXin LI        *c0 == *c1;
626*18fd37a7SXin LI        c0++, c1++)
627*18fd37a7SXin LI     continue;
628*18fd37a7SXin LI 
629*18fd37a7SXin LI   return c0 - (char const *) p0;
630*18fd37a7SXin LI }
631*18fd37a7SXin LI 
632*18fd37a7SXin LI /* Put into BUF the unsigned char C, making unprintable bytes
633*18fd37a7SXin LI    visible by quoting like cat -t does.  */
634*18fd37a7SXin LI 
635*18fd37a7SXin LI static void
sprintc(char * buf,unsigned char c)636*18fd37a7SXin LI sprintc (char *buf, unsigned char c)
637*18fd37a7SXin LI {
638*18fd37a7SXin LI   if (! isprint (c))
639*18fd37a7SXin LI     {
640*18fd37a7SXin LI       if (c >= 128)
641*18fd37a7SXin LI 	{
642*18fd37a7SXin LI 	  *buf++ = 'M';
643*18fd37a7SXin LI 	  *buf++ = '-';
644*18fd37a7SXin LI 	  c -= 128;
645*18fd37a7SXin LI 	}
646*18fd37a7SXin LI       if (c < 32)
647*18fd37a7SXin LI 	{
648*18fd37a7SXin LI 	  *buf++ = '^';
649*18fd37a7SXin LI 	  c += 64;
650*18fd37a7SXin LI 	}
651*18fd37a7SXin LI       else if (c == 127)
652*18fd37a7SXin LI 	{
653*18fd37a7SXin LI 	  *buf++ = '^';
654*18fd37a7SXin LI 	  c = '?';
655*18fd37a7SXin LI 	}
656*18fd37a7SXin LI     }
657*18fd37a7SXin LI 
658*18fd37a7SXin LI   *buf++ = c;
659*18fd37a7SXin LI   *buf = 0;
660*18fd37a7SXin LI }
661*18fd37a7SXin LI 
662*18fd37a7SXin LI /* Position file F to ignore_initial[F] bytes from its initial position,
663*18fd37a7SXin LI    and yield its new position.  Don't try more than once.  */
664*18fd37a7SXin LI 
665*18fd37a7SXin LI static off_t
file_position(int f)666*18fd37a7SXin LI file_position (int f)
667*18fd37a7SXin LI {
668*18fd37a7SXin LI   static bool positioned[2];
669*18fd37a7SXin LI   static off_t position[2];
670*18fd37a7SXin LI 
671*18fd37a7SXin LI   if (! positioned[f])
672*18fd37a7SXin LI     {
673*18fd37a7SXin LI       positioned[f] = true;
674*18fd37a7SXin LI       position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR);
675*18fd37a7SXin LI     }
676*18fd37a7SXin LI   return position[f];
677*18fd37a7SXin LI }
678