xref: /netbsd-src/usr.bin/cmp/cmp.c (revision 38023541164cff097d5fadec63134189b1453b8c)
1 /*
2  * Copyright (c) 1987 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #ifndef lint
35 char copyright[] =
36 "@(#) Copyright (c) 1987, 1990 Regents of the University of California.\n\
37  All rights reserved.\n";
38 #endif /* not lint */
39 
40 #ifndef lint
41 /*static char sccsid[] = "from: @(#)cmp.c	5.3 (Berkeley) 6/1/90";*/
42 static char rcsid[] = "$Id: cmp.c,v 1.4 1993/11/23 00:50:46 jtc Exp $";
43 #endif /* not lint */
44 
45 #include <sys/param.h>
46 #include <sys/file.h>
47 #include <sys/stat.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include <ctype.h>
52 #include <errno.h>
53 #include <locale.h>
54 #include <unistd.h>
55 
56 #define	EXITNODIFF	0
57 #define	EXITDIFF	1
58 #define	EXITERR		2
59 
60 void skip		__P(());
61 __dead void cmp		__P(());
62 __dead void error	__P(());
63 __dead void endoffile	__P(());
64 __dead void usage	__P(());
65 
66 int	all, fd1, fd2, silent;
67 u_char	buf1[MAXBSIZE], buf2[MAXBSIZE];
68 char	*file1, *file2;
69 
70 int
71 main(argc, argv)
72 	int argc;
73 	char **argv;
74 {
75 	int ch;
76 
77 	setlocale(LC_ALL, "");
78 
79 	while ((ch = getopt(argc, argv, "ls")) != -1)
80 		switch (ch) {
81 		case 'l':		/* print all differences */
82 			all = 1;
83 			break;
84 		case 's':		/* silent run */
85 			silent = 1;
86 			break;
87 		case '?':
88 		default:
89 			usage();
90 		}
91 	argv += optind;
92 	argc -= optind;
93 
94 	if (argc < 2 || argc > 4)
95 		usage();
96 
97 	if (all && silent)
98 		usage ();
99 
100 	if (strcmp(file1 = argv[0], "-") == 0)
101 		fd1 = 0;
102 	else if ((fd1 = open(file1, O_RDONLY, 0)) < 0)
103 		error(file1);
104 	if (strcmp(file2 = argv[1], "-") == 0)
105 		fd2 = 0;
106 	else if ((fd2 = open(file2, O_RDONLY, 0)) < 0)
107 		error(file2);
108 	if (fd1 == fd2) {
109 		fprintf(stderr,
110 		    "cmp: standard input may only be specified once.\n");
111 		exit(EXITERR);
112 	}
113 
114 	/* handle skip arguments */
115 	if (argc > 2) {
116 		skip(strtoul(argv[2], NULL, 0), fd1, file1);
117 		if (argc == 4)
118 			skip(strtoul(argv[3], NULL, 0), fd2, file2);
119 	}
120 	cmp();
121 	/*NOTREACHED*/
122 }
123 
124 /*
125  * skip --
126  *	skip first part of file
127  */
128 void
129 skip(dist, fd, fname)
130 	register u_long dist;
131 	register int fd;
132 	char *fname;
133 {
134 	register int rlen, nread;
135 
136 	for (; dist; dist -= rlen) {
137 		rlen = MIN(dist, sizeof(buf1));
138 		if ((nread = read(fd, buf1, rlen)) != rlen) {
139 			if (nread < 0)
140 				error(fname);
141 			else
142 				endoffile(fname);
143 		}
144 	}
145 }
146 
147 void
148 cmp()
149 {
150 	register u_char	*p1, *p2;
151 	register int cnt, len1, len2;
152 	register long byte, line;
153 	int dfound = 0;
154 
155 	for (byte = 0, line = 1; ; ) {
156 		switch (len1 = read(fd1, buf1, MAXBSIZE)) {
157 		case -1:
158 			error(file1);
159 		case 0:
160 			/*
161 			 * read of file 1 just failed, find out
162 			 * if there's anything left in file 2
163 			 */
164 			switch (read(fd2, buf2, 1)) {
165 				case -1:
166 					error(file2);
167 					/* NOTREACHED */
168 				case 0:
169 					exit(dfound ? EXITDIFF : EXITNODIFF);
170 					/* NOTREACHED */
171 				default:
172 					endoffile(file1);
173 					break;
174 			}
175 		}
176 		/*
177 		 * file1 might be stdio, which means that a read of less than
178 		 * MAXBSIZE might not mean an EOF.  So, read whatever we read
179 		 * from file1 from file2.
180 		 */
181 		if ((len2 = read(fd2, buf2, len1)) == -1)
182 			error(file2);
183 		if (memcmp(buf1, buf2, len2)) {
184 			if (silent)
185 				exit(EXITDIFF);
186 			if (all) {
187 				dfound = 1;
188 				for (p1 = buf1, p2 = buf2, cnt = len2; cnt--;
189 				    ++p1, ++p2) {
190 					++byte;
191 					if (*p1 != *p2)
192 						printf("%6ld %3o %3o\n",
193 						    byte, *p1, *p2);
194 				}
195 			} else for (p1 = buf1, p2 = buf2; ; ++p1, ++p2) {
196 				++byte;
197 				if (*p1 != *p2) {
198 					printf("%s %s differ: char %ld, line %ld\n", file1, file2, byte, line);
199 					exit(EXITDIFF);
200 				}
201 				if (*p1 == '\n')
202 					++line;
203 			}
204 		} else {
205 			byte += len2;
206 			/*
207 			 * here's the real performance problem, we've got to
208 			 * count the stupid lines, which means that -l is a
209 			 * *much* faster version, i.e., unless you really
210 			 * *want* to know the line number, run -s or -l.
211 			 */
212 			if (!silent && !all)
213 				for (p1 = buf1, cnt = len2; cnt--; )
214 					if (*p1++ == '\n')
215 						++line;
216 		}
217 		/*
218 		 * couldn't read as much from file2 as from file1; checked
219 		 * here because there might be a difference before we got
220 		 * to this point, which would have precedence.
221 		 */
222 		if (len2 < len1)
223 			endoffile(file2);
224 	}
225 }
226 
227 /*
228  * error --
229  *	print I/O error message and die
230  */
231 void
232 error(filename)
233 	char *filename;
234 {
235 	extern int errno;
236 	char *strerror();
237 
238 	if (!silent)
239 		(void) fprintf(stderr, "cmp: %s: %s\n",
240 		    filename, strerror(errno));
241 	exit(EXITERR);
242 }
243 
244 /*
245  * endoffile --
246  *	print end-of-file message and exit indicating the files were different
247  */
248 void
249 endoffile(filename)
250 	char *filename;
251 {
252 	if (!silent)
253 		(void) fprintf(stderr, "cmp: EOF on %s\n", filename);
254 	exit(EXITDIFF);
255 }
256 
257 /*
258  * usage --
259  *	print usage and die
260  */
261 void
262 usage()
263 {
264 	fputs("usage: cmp [-l | -s] file1 file2 [skip1] [skip2]\n", stderr);
265 	exit(EXITERR);
266 }
267