1*59c8e88eSDag-Erling Smørgrav /* Commandline diff utility to test diff implementations. */
2*59c8e88eSDag-Erling Smørgrav /*
3*59c8e88eSDag-Erling Smørgrav * Copyright (c) 2018 Martin Pieuchot
4*59c8e88eSDag-Erling Smørgrav * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
5*59c8e88eSDag-Erling Smørgrav *
6*59c8e88eSDag-Erling Smørgrav * Permission to use, copy, modify, and distribute this software for any
7*59c8e88eSDag-Erling Smørgrav * purpose with or without fee is hereby granted, provided that the above
8*59c8e88eSDag-Erling Smørgrav * copyright notice and this permission notice appear in all copies.
9*59c8e88eSDag-Erling Smørgrav *
10*59c8e88eSDag-Erling Smørgrav * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11*59c8e88eSDag-Erling Smørgrav * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12*59c8e88eSDag-Erling Smørgrav * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13*59c8e88eSDag-Erling Smørgrav * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14*59c8e88eSDag-Erling Smørgrav * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15*59c8e88eSDag-Erling Smørgrav * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16*59c8e88eSDag-Erling Smørgrav * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17*59c8e88eSDag-Erling Smørgrav */
18*59c8e88eSDag-Erling Smørgrav
19*59c8e88eSDag-Erling Smørgrav #include <sys/mman.h>
20*59c8e88eSDag-Erling Smørgrav #include <sys/stat.h>
21*59c8e88eSDag-Erling Smørgrav #include <sys/types.h>
22*59c8e88eSDag-Erling Smørgrav
23*59c8e88eSDag-Erling Smørgrav #include <err.h>
24*59c8e88eSDag-Erling Smørgrav #include <fcntl.h>
25*59c8e88eSDag-Erling Smørgrav #include <stdint.h>
26*59c8e88eSDag-Erling Smørgrav #include <stdio.h>
27*59c8e88eSDag-Erling Smørgrav #include <stdlib.h>
28*59c8e88eSDag-Erling Smørgrav #include <stdbool.h>
29*59c8e88eSDag-Erling Smørgrav #include <string.h>
30*59c8e88eSDag-Erling Smørgrav #include <unistd.h>
31*59c8e88eSDag-Erling Smørgrav
32*59c8e88eSDag-Erling Smørgrav #include <arraylist.h>
33*59c8e88eSDag-Erling Smørgrav #include <diff_main.h>
34*59c8e88eSDag-Erling Smørgrav #include <diff_output.h>
35*59c8e88eSDag-Erling Smørgrav
36*59c8e88eSDag-Erling Smørgrav enum diffreg_algo {
37*59c8e88eSDag-Erling Smørgrav DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE = 0,
38*59c8e88eSDag-Erling Smørgrav DIFFREG_ALGO_MYERS_THEN_PATIENCE = 1,
39*59c8e88eSDag-Erling Smørgrav DIFFREG_ALGO_PATIENCE = 2,
40*59c8e88eSDag-Erling Smørgrav DIFFREG_ALGO_NONE = 3,
41*59c8e88eSDag-Erling Smørgrav };
42*59c8e88eSDag-Erling Smørgrav
43*59c8e88eSDag-Erling Smørgrav __dead void usage(void);
44*59c8e88eSDag-Erling Smørgrav int diffreg(char *, char *, enum diffreg_algo, bool, bool, bool,
45*59c8e88eSDag-Erling Smørgrav int, bool);
46*59c8e88eSDag-Erling Smørgrav FILE * openfile(const char *, char **, struct stat *);
47*59c8e88eSDag-Erling Smørgrav
48*59c8e88eSDag-Erling Smørgrav __dead void
usage(void)49*59c8e88eSDag-Erling Smørgrav usage(void)
50*59c8e88eSDag-Erling Smørgrav {
51*59c8e88eSDag-Erling Smørgrav fprintf(stderr,
52*59c8e88eSDag-Erling Smørgrav "usage: %s [-apPQTwe] [-U n] file1 file2\n"
53*59c8e88eSDag-Erling Smørgrav "\n"
54*59c8e88eSDag-Erling Smørgrav " -a Treat input as ASCII even if binary data is detected\n"
55*59c8e88eSDag-Erling Smørgrav " -p Show function prototypes in hunk headers\n"
56*59c8e88eSDag-Erling Smørgrav " -P Use Patience Diff (slower but often nicer)\n"
57*59c8e88eSDag-Erling Smørgrav " -Q Use forward-Myers for small files, otherwise Patience\n"
58*59c8e88eSDag-Erling Smørgrav " -T Trivial algo: detect similar start and end only\n"
59*59c8e88eSDag-Erling Smørgrav " -w Ignore Whitespace\n"
60*59c8e88eSDag-Erling Smørgrav " -U n Number of Context Lines\n"
61*59c8e88eSDag-Erling Smørgrav " -e Produce ed script output\n"
62*59c8e88eSDag-Erling Smørgrav , getprogname());
63*59c8e88eSDag-Erling Smørgrav exit(1);
64*59c8e88eSDag-Erling Smørgrav }
65*59c8e88eSDag-Erling Smørgrav
66*59c8e88eSDag-Erling Smørgrav int
main(int argc,char * argv[])67*59c8e88eSDag-Erling Smørgrav main(int argc, char *argv[])
68*59c8e88eSDag-Erling Smørgrav {
69*59c8e88eSDag-Erling Smørgrav int ch, rc;
70*59c8e88eSDag-Erling Smørgrav bool force_text = false;
71*59c8e88eSDag-Erling Smørgrav bool ignore_whitespace = false;
72*59c8e88eSDag-Erling Smørgrav bool show_function_prototypes = false;
73*59c8e88eSDag-Erling Smørgrav bool edscript = false;
74*59c8e88eSDag-Erling Smørgrav int context_lines = 3;
75*59c8e88eSDag-Erling Smørgrav enum diffreg_algo algo = DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE;
76*59c8e88eSDag-Erling Smørgrav
77*59c8e88eSDag-Erling Smørgrav while ((ch = getopt(argc, argv, "apPQTwU:e")) != -1) {
78*59c8e88eSDag-Erling Smørgrav switch (ch) {
79*59c8e88eSDag-Erling Smørgrav case 'a':
80*59c8e88eSDag-Erling Smørgrav force_text = true;
81*59c8e88eSDag-Erling Smørgrav break;
82*59c8e88eSDag-Erling Smørgrav case 'p':
83*59c8e88eSDag-Erling Smørgrav show_function_prototypes = true;
84*59c8e88eSDag-Erling Smørgrav break;
85*59c8e88eSDag-Erling Smørgrav case 'P':
86*59c8e88eSDag-Erling Smørgrav algo = DIFFREG_ALGO_PATIENCE;
87*59c8e88eSDag-Erling Smørgrav break;
88*59c8e88eSDag-Erling Smørgrav case 'Q':
89*59c8e88eSDag-Erling Smørgrav algo = DIFFREG_ALGO_MYERS_THEN_PATIENCE;
90*59c8e88eSDag-Erling Smørgrav break;
91*59c8e88eSDag-Erling Smørgrav case 'T':
92*59c8e88eSDag-Erling Smørgrav algo = DIFFREG_ALGO_NONE;
93*59c8e88eSDag-Erling Smørgrav break;
94*59c8e88eSDag-Erling Smørgrav case 'w':
95*59c8e88eSDag-Erling Smørgrav ignore_whitespace = true;
96*59c8e88eSDag-Erling Smørgrav break;
97*59c8e88eSDag-Erling Smørgrav case 'U':
98*59c8e88eSDag-Erling Smørgrav context_lines = atoi(optarg);
99*59c8e88eSDag-Erling Smørgrav break;
100*59c8e88eSDag-Erling Smørgrav case 'e':
101*59c8e88eSDag-Erling Smørgrav edscript = true;
102*59c8e88eSDag-Erling Smørgrav break;
103*59c8e88eSDag-Erling Smørgrav default:
104*59c8e88eSDag-Erling Smørgrav usage();
105*59c8e88eSDag-Erling Smørgrav }
106*59c8e88eSDag-Erling Smørgrav }
107*59c8e88eSDag-Erling Smørgrav
108*59c8e88eSDag-Erling Smørgrav argc -= optind;
109*59c8e88eSDag-Erling Smørgrav argv += optind;
110*59c8e88eSDag-Erling Smørgrav
111*59c8e88eSDag-Erling Smørgrav if (argc != 2)
112*59c8e88eSDag-Erling Smørgrav usage();
113*59c8e88eSDag-Erling Smørgrav
114*59c8e88eSDag-Erling Smørgrav rc = diffreg(argv[0], argv[1], algo, force_text, ignore_whitespace,
115*59c8e88eSDag-Erling Smørgrav show_function_prototypes, context_lines, edscript);
116*59c8e88eSDag-Erling Smørgrav if (rc != DIFF_RC_OK) {
117*59c8e88eSDag-Erling Smørgrav fprintf(stderr, "diff: %s\n", strerror(rc));
118*59c8e88eSDag-Erling Smørgrav return 1;
119*59c8e88eSDag-Erling Smørgrav }
120*59c8e88eSDag-Erling Smørgrav return 0;
121*59c8e88eSDag-Erling Smørgrav }
122*59c8e88eSDag-Erling Smørgrav
123*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config myers_then_patience;
124*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config myers_then_myers_divide;
125*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config patience;
126*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config myers_divide;
127*59c8e88eSDag-Erling Smørgrav
128*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config myers_then_patience = (struct diff_algo_config){
129*59c8e88eSDag-Erling Smørgrav .impl = diff_algo_myers,
130*59c8e88eSDag-Erling Smørgrav .permitted_state_size = 1024 * 1024 * sizeof(int),
131*59c8e88eSDag-Erling Smørgrav .fallback_algo = &patience,
132*59c8e88eSDag-Erling Smørgrav };
133*59c8e88eSDag-Erling Smørgrav
134*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config myers_then_myers_divide =
135*59c8e88eSDag-Erling Smørgrav (struct diff_algo_config){
136*59c8e88eSDag-Erling Smørgrav .impl = diff_algo_myers,
137*59c8e88eSDag-Erling Smørgrav .permitted_state_size = 1024 * 1024 * sizeof(int),
138*59c8e88eSDag-Erling Smørgrav .fallback_algo = &myers_divide,
139*59c8e88eSDag-Erling Smørgrav };
140*59c8e88eSDag-Erling Smørgrav
141*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config patience = (struct diff_algo_config){
142*59c8e88eSDag-Erling Smørgrav .impl = diff_algo_patience,
143*59c8e88eSDag-Erling Smørgrav /* After subdivision, do Patience again: */
144*59c8e88eSDag-Erling Smørgrav .inner_algo = &patience,
145*59c8e88eSDag-Erling Smørgrav /* If subdivision failed, do Myers Divide et Impera: */
146*59c8e88eSDag-Erling Smørgrav .fallback_algo = &myers_then_myers_divide,
147*59c8e88eSDag-Erling Smørgrav };
148*59c8e88eSDag-Erling Smørgrav
149*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config myers_divide = (struct diff_algo_config){
150*59c8e88eSDag-Erling Smørgrav .impl = diff_algo_myers_divide,
151*59c8e88eSDag-Erling Smørgrav /* When division succeeded, start from the top: */
152*59c8e88eSDag-Erling Smørgrav .inner_algo = &myers_then_myers_divide,
153*59c8e88eSDag-Erling Smørgrav /* (fallback_algo = NULL implies diff_algo_none). */
154*59c8e88eSDag-Erling Smørgrav };
155*59c8e88eSDag-Erling Smørgrav
156*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config no_algo = (struct diff_algo_config){
157*59c8e88eSDag-Erling Smørgrav .impl = diff_algo_none,
158*59c8e88eSDag-Erling Smørgrav };
159*59c8e88eSDag-Erling Smørgrav
160*59c8e88eSDag-Erling Smørgrav /* If the state for a forward-Myers is small enough, use Myers, otherwise first
161*59c8e88eSDag-Erling Smørgrav * do a Myers-divide. */
162*59c8e88eSDag-Erling Smørgrav const struct diff_config diff_config_myers_then_myers_divide = {
163*59c8e88eSDag-Erling Smørgrav .atomize_func = diff_atomize_text_by_line,
164*59c8e88eSDag-Erling Smørgrav .algo = &myers_then_myers_divide,
165*59c8e88eSDag-Erling Smørgrav };
166*59c8e88eSDag-Erling Smørgrav
167*59c8e88eSDag-Erling Smørgrav /* If the state for a forward-Myers is small enough, use Myers, otherwise first
168*59c8e88eSDag-Erling Smørgrav * do a Patience. */
169*59c8e88eSDag-Erling Smørgrav const struct diff_config diff_config_myers_then_patience = {
170*59c8e88eSDag-Erling Smørgrav .atomize_func = diff_atomize_text_by_line,
171*59c8e88eSDag-Erling Smørgrav .algo = &myers_then_patience,
172*59c8e88eSDag-Erling Smørgrav };
173*59c8e88eSDag-Erling Smørgrav
174*59c8e88eSDag-Erling Smørgrav /* Directly force Patience as a first divider of the source file. */
175*59c8e88eSDag-Erling Smørgrav const struct diff_config diff_config_patience = {
176*59c8e88eSDag-Erling Smørgrav .atomize_func = diff_atomize_text_by_line,
177*59c8e88eSDag-Erling Smørgrav .algo = &patience,
178*59c8e88eSDag-Erling Smørgrav };
179*59c8e88eSDag-Erling Smørgrav
180*59c8e88eSDag-Erling Smørgrav /* Directly force Patience as a first divider of the source file. */
181*59c8e88eSDag-Erling Smørgrav const struct diff_config diff_config_no_algo = {
182*59c8e88eSDag-Erling Smørgrav .atomize_func = diff_atomize_text_by_line,
183*59c8e88eSDag-Erling Smørgrav };
184*59c8e88eSDag-Erling Smørgrav
185*59c8e88eSDag-Erling Smørgrav int
diffreg(char * file1,char * file2,enum diffreg_algo algo,bool force_text,bool ignore_whitespace,bool show_function_prototypes,int context_lines,bool edscript)186*59c8e88eSDag-Erling Smørgrav diffreg(char *file1, char *file2, enum diffreg_algo algo, bool force_text,
187*59c8e88eSDag-Erling Smørgrav bool ignore_whitespace, bool show_function_prototypes, int context_lines,
188*59c8e88eSDag-Erling Smørgrav bool edscript)
189*59c8e88eSDag-Erling Smørgrav {
190*59c8e88eSDag-Erling Smørgrav char *str1, *str2;
191*59c8e88eSDag-Erling Smørgrav FILE *f1, *f2;
192*59c8e88eSDag-Erling Smørgrav struct stat st1, st2;
193*59c8e88eSDag-Erling Smørgrav struct diff_input_info info = {
194*59c8e88eSDag-Erling Smørgrav .left_path = file1,
195*59c8e88eSDag-Erling Smørgrav .right_path = file2,
196*59c8e88eSDag-Erling Smørgrav };
197*59c8e88eSDag-Erling Smørgrav struct diff_data left = {}, right = {};
198*59c8e88eSDag-Erling Smørgrav struct diff_result *result = NULL;
199*59c8e88eSDag-Erling Smørgrav int rc;
200*59c8e88eSDag-Erling Smørgrav const struct diff_config *cfg;
201*59c8e88eSDag-Erling Smørgrav int diff_flags = 0;
202*59c8e88eSDag-Erling Smørgrav
203*59c8e88eSDag-Erling Smørgrav switch (algo) {
204*59c8e88eSDag-Erling Smørgrav default:
205*59c8e88eSDag-Erling Smørgrav case DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE:
206*59c8e88eSDag-Erling Smørgrav cfg = &diff_config_myers_then_myers_divide;
207*59c8e88eSDag-Erling Smørgrav break;
208*59c8e88eSDag-Erling Smørgrav case DIFFREG_ALGO_MYERS_THEN_PATIENCE:
209*59c8e88eSDag-Erling Smørgrav cfg = &diff_config_myers_then_patience;
210*59c8e88eSDag-Erling Smørgrav break;
211*59c8e88eSDag-Erling Smørgrav case DIFFREG_ALGO_PATIENCE:
212*59c8e88eSDag-Erling Smørgrav cfg = &diff_config_patience;
213*59c8e88eSDag-Erling Smørgrav break;
214*59c8e88eSDag-Erling Smørgrav case DIFFREG_ALGO_NONE:
215*59c8e88eSDag-Erling Smørgrav cfg = &diff_config_no_algo;
216*59c8e88eSDag-Erling Smørgrav break;
217*59c8e88eSDag-Erling Smørgrav }
218*59c8e88eSDag-Erling Smørgrav
219*59c8e88eSDag-Erling Smørgrav f1 = openfile(file1, &str1, &st1);
220*59c8e88eSDag-Erling Smørgrav f2 = openfile(file2, &str2, &st2);
221*59c8e88eSDag-Erling Smørgrav
222*59c8e88eSDag-Erling Smørgrav if (force_text)
223*59c8e88eSDag-Erling Smørgrav diff_flags |= DIFF_FLAG_FORCE_TEXT_DATA;
224*59c8e88eSDag-Erling Smørgrav if (ignore_whitespace)
225*59c8e88eSDag-Erling Smørgrav diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE;
226*59c8e88eSDag-Erling Smørgrav if (show_function_prototypes)
227*59c8e88eSDag-Erling Smørgrav diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES;
228*59c8e88eSDag-Erling Smørgrav
229*59c8e88eSDag-Erling Smørgrav rc = diff_atomize_file(&left, cfg, f1, str1, st1.st_size, diff_flags);
230*59c8e88eSDag-Erling Smørgrav if (rc)
231*59c8e88eSDag-Erling Smørgrav goto done;
232*59c8e88eSDag-Erling Smørgrav rc = diff_atomize_file(&right, cfg, f2, str2, st2.st_size, diff_flags);
233*59c8e88eSDag-Erling Smørgrav if (rc)
234*59c8e88eSDag-Erling Smørgrav goto done;
235*59c8e88eSDag-Erling Smørgrav
236*59c8e88eSDag-Erling Smørgrav result = diff_main(cfg, &left, &right);
237*59c8e88eSDag-Erling Smørgrav #if 0
238*59c8e88eSDag-Erling Smørgrav rc = diff_output_plain(stdout, &info, result);
239*59c8e88eSDag-Erling Smørgrav #else
240*59c8e88eSDag-Erling Smørgrav if (edscript)
241*59c8e88eSDag-Erling Smørgrav rc = diff_output_edscript(NULL, stdout, &info, result);
242*59c8e88eSDag-Erling Smørgrav else {
243*59c8e88eSDag-Erling Smørgrav rc = diff_output_unidiff(NULL, stdout, &info, result,
244*59c8e88eSDag-Erling Smørgrav context_lines);
245*59c8e88eSDag-Erling Smørgrav }
246*59c8e88eSDag-Erling Smørgrav #endif
247*59c8e88eSDag-Erling Smørgrav done:
248*59c8e88eSDag-Erling Smørgrav diff_result_free(result);
249*59c8e88eSDag-Erling Smørgrav diff_data_free(&left);
250*59c8e88eSDag-Erling Smørgrav diff_data_free(&right);
251*59c8e88eSDag-Erling Smørgrav if (str1)
252*59c8e88eSDag-Erling Smørgrav munmap(str1, st1.st_size);
253*59c8e88eSDag-Erling Smørgrav if (str2)
254*59c8e88eSDag-Erling Smørgrav munmap(str2, st2.st_size);
255*59c8e88eSDag-Erling Smørgrav fclose(f1);
256*59c8e88eSDag-Erling Smørgrav fclose(f2);
257*59c8e88eSDag-Erling Smørgrav
258*59c8e88eSDag-Erling Smørgrav return rc;
259*59c8e88eSDag-Erling Smørgrav }
260*59c8e88eSDag-Erling Smørgrav
261*59c8e88eSDag-Erling Smørgrav FILE *
openfile(const char * path,char ** p,struct stat * st)262*59c8e88eSDag-Erling Smørgrav openfile(const char *path, char **p, struct stat *st)
263*59c8e88eSDag-Erling Smørgrav {
264*59c8e88eSDag-Erling Smørgrav FILE *f = NULL;
265*59c8e88eSDag-Erling Smørgrav
266*59c8e88eSDag-Erling Smørgrav f = fopen(path, "r");
267*59c8e88eSDag-Erling Smørgrav if (f == NULL)
268*59c8e88eSDag-Erling Smørgrav err(2, "%s", path);
269*59c8e88eSDag-Erling Smørgrav
270*59c8e88eSDag-Erling Smørgrav if (fstat(fileno(f), st) == -1)
271*59c8e88eSDag-Erling Smørgrav err(2, "%s", path);
272*59c8e88eSDag-Erling Smørgrav
273*59c8e88eSDag-Erling Smørgrav #ifndef DIFF_NO_MMAP
274*59c8e88eSDag-Erling Smørgrav *p = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fileno(f), 0);
275*59c8e88eSDag-Erling Smørgrav if (*p == MAP_FAILED)
276*59c8e88eSDag-Erling Smørgrav #endif
277*59c8e88eSDag-Erling Smørgrav *p = NULL; /* fall back on file I/O */
278*59c8e88eSDag-Erling Smørgrav
279*59c8e88eSDag-Erling Smørgrav return f;
280*59c8e88eSDag-Erling Smørgrav }
281