xref: /freebsd-src/contrib/diff/src/diff.h (revision 18fd37a72c3a7549d2d4f6c6ea00bdcd2bdaca01)
1*18fd37a7SXin LI /* Shared definitions for GNU DIFF
2*18fd37a7SXin LI 
3*18fd37a7SXin LI    Copyright (C) 1988, 1989, 1991, 1992, 1993, 1994, 1995, 1998, 2001,
4*18fd37a7SXin LI    2002, 2004 Free Software Foundation, Inc.
5*18fd37a7SXin LI 
6*18fd37a7SXin LI    This file is part of GNU DIFF.
7*18fd37a7SXin LI 
8*18fd37a7SXin LI    GNU DIFF is free software; you can redistribute it and/or modify
9*18fd37a7SXin LI    it under the terms of the GNU General Public License as published by
10*18fd37a7SXin LI    the Free Software Foundation; either version 2, or (at your option)
11*18fd37a7SXin LI    any later version.
12*18fd37a7SXin LI 
13*18fd37a7SXin LI    GNU DIFF is distributed in the hope that it will be useful,
14*18fd37a7SXin LI    but WITHOUT ANY WARRANTY; without even the implied warranty of
15*18fd37a7SXin LI    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16*18fd37a7SXin LI    GNU General Public License for more details.
17*18fd37a7SXin LI 
18*18fd37a7SXin LI    You should have received a copy of the GNU General Public License
19*18fd37a7SXin LI    along with this program; see the file COPYING.
20*18fd37a7SXin LI    If not, write to the Free Software Foundation,
21*18fd37a7SXin LI    59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22*18fd37a7SXin LI 
23*18fd37a7SXin LI #include "system.h"
24*18fd37a7SXin LI #include <regex.h>
25*18fd37a7SXin LI #include <stdio.h>
26*18fd37a7SXin LI #include <unlocked-io.h>
27*18fd37a7SXin LI 
28*18fd37a7SXin LI /* What kind of changes a hunk contains.  */
29*18fd37a7SXin LI enum changes
30*18fd37a7SXin LI {
31*18fd37a7SXin LI   /* No changes: lines common to both files.  */
32*18fd37a7SXin LI   UNCHANGED,
33*18fd37a7SXin LI 
34*18fd37a7SXin LI   /* Deletes only: lines taken from just the first file.  */
35*18fd37a7SXin LI   OLD,
36*18fd37a7SXin LI 
37*18fd37a7SXin LI   /* Inserts only: lines taken from just the second file.  */
38*18fd37a7SXin LI   NEW,
39*18fd37a7SXin LI 
40*18fd37a7SXin LI   /* Both deletes and inserts: a hunk containing both old and new lines.  */
41*18fd37a7SXin LI   CHANGED
42*18fd37a7SXin LI };
43*18fd37a7SXin LI 
44*18fd37a7SXin LI /* Variables for command line options */
45*18fd37a7SXin LI 
46*18fd37a7SXin LI #ifndef GDIFF_MAIN
47*18fd37a7SXin LI # define XTERN extern
48*18fd37a7SXin LI #else
49*18fd37a7SXin LI # define XTERN
50*18fd37a7SXin LI #endif
51*18fd37a7SXin LI 
52*18fd37a7SXin LI enum output_style
53*18fd37a7SXin LI {
54*18fd37a7SXin LI   /* No output style specified.  */
55*18fd37a7SXin LI   OUTPUT_UNSPECIFIED,
56*18fd37a7SXin LI 
57*18fd37a7SXin LI   /* Default output style.  */
58*18fd37a7SXin LI   OUTPUT_NORMAL,
59*18fd37a7SXin LI 
60*18fd37a7SXin LI   /* Output the differences with lines of context before and after (-c).  */
61*18fd37a7SXin LI   OUTPUT_CONTEXT,
62*18fd37a7SXin LI 
63*18fd37a7SXin LI   /* Output the differences in a unified context diff format (-u).  */
64*18fd37a7SXin LI   OUTPUT_UNIFIED,
65*18fd37a7SXin LI 
66*18fd37a7SXin LI   /* Output the differences as commands suitable for `ed' (-e).  */
67*18fd37a7SXin LI   OUTPUT_ED,
68*18fd37a7SXin LI 
69*18fd37a7SXin LI   /* Output the diff as a forward ed script (-f).  */
70*18fd37a7SXin LI   OUTPUT_FORWARD_ED,
71*18fd37a7SXin LI 
72*18fd37a7SXin LI   /* Like -f, but output a count of changed lines in each "command" (-n).  */
73*18fd37a7SXin LI   OUTPUT_RCS,
74*18fd37a7SXin LI 
75*18fd37a7SXin LI   /* Output merged #ifdef'd file (-D).  */
76*18fd37a7SXin LI   OUTPUT_IFDEF,
77*18fd37a7SXin LI 
78*18fd37a7SXin LI   /* Output sdiff style (-y).  */
79*18fd37a7SXin LI   OUTPUT_SDIFF
80*18fd37a7SXin LI };
81*18fd37a7SXin LI 
82*18fd37a7SXin LI /* True for output styles that are robust,
83*18fd37a7SXin LI    i.e. can handle a file that ends in a non-newline.  */
84*18fd37a7SXin LI #define ROBUST_OUTPUT_STYLE(S) ((S) != OUTPUT_ED && (S) != OUTPUT_FORWARD_ED)
85*18fd37a7SXin LI 
86*18fd37a7SXin LI XTERN enum output_style output_style;
87*18fd37a7SXin LI 
88*18fd37a7SXin LI /* Nonzero if output cannot be generated for identical files.  */
89*18fd37a7SXin LI XTERN bool no_diff_means_no_output;
90*18fd37a7SXin LI 
91*18fd37a7SXin LI /* Number of lines of context to show in each set of diffs.
92*18fd37a7SXin LI    This is zero when context is not to be shown.  */
93*18fd37a7SXin LI XTERN lin context;
94*18fd37a7SXin LI 
95*18fd37a7SXin LI /* Consider all files as text files (-a).
96*18fd37a7SXin LI    Don't interpret codes over 0177 as implying a "binary file".  */
97*18fd37a7SXin LI XTERN bool text;
98*18fd37a7SXin LI 
99*18fd37a7SXin LI /* Number of lines to keep in identical prefix and suffix.  */
100*18fd37a7SXin LI XTERN lin horizon_lines;
101*18fd37a7SXin LI 
102*18fd37a7SXin LI /* The significance of white space during comparisons.  */
103*18fd37a7SXin LI XTERN enum
104*18fd37a7SXin LI {
105*18fd37a7SXin LI   /* All white space is significant (the default).  */
106*18fd37a7SXin LI   IGNORE_NO_WHITE_SPACE,
107*18fd37a7SXin LI 
108*18fd37a7SXin LI   /* Ignore changes due to tab expansion (-E).  */
109*18fd37a7SXin LI   IGNORE_TAB_EXPANSION,
110*18fd37a7SXin LI 
111*18fd37a7SXin LI   /* Ignore changes in horizontal white space (-b).  */
112*18fd37a7SXin LI   IGNORE_SPACE_CHANGE,
113*18fd37a7SXin LI 
114*18fd37a7SXin LI   /* Ignore all horizontal white space (-w).  */
115*18fd37a7SXin LI   IGNORE_ALL_SPACE
116*18fd37a7SXin LI } ignore_white_space;
117*18fd37a7SXin LI 
118*18fd37a7SXin LI /* Ignore changes that affect only blank lines (-B).  */
119*18fd37a7SXin LI XTERN bool ignore_blank_lines;
120*18fd37a7SXin LI 
121*18fd37a7SXin LI /* Files can be compared byte-by-byte, as if they were binary.
122*18fd37a7SXin LI    This depends on various options.  */
123*18fd37a7SXin LI XTERN bool files_can_be_treated_as_binary;
124*18fd37a7SXin LI 
125*18fd37a7SXin LI /* Ignore differences in case of letters (-i).  */
126*18fd37a7SXin LI XTERN bool ignore_case;
127*18fd37a7SXin LI 
128*18fd37a7SXin LI /* Ignore differences in case of letters in file names.  */
129*18fd37a7SXin LI XTERN bool ignore_file_name_case;
130*18fd37a7SXin LI 
131*18fd37a7SXin LI /* File labels for `-c' output headers (--label).  */
132*18fd37a7SXin LI XTERN char *file_label[2];
133*18fd37a7SXin LI 
134*18fd37a7SXin LI /* Regexp to identify function-header lines (-F).  */
135*18fd37a7SXin LI XTERN struct re_pattern_buffer function_regexp;
136*18fd37a7SXin LI 
137*18fd37a7SXin LI /* Ignore changes that affect only lines matching this regexp (-I).  */
138*18fd37a7SXin LI XTERN struct re_pattern_buffer ignore_regexp;
139*18fd37a7SXin LI 
140*18fd37a7SXin LI /* Say only whether files differ, not how (-q).  */
141*18fd37a7SXin LI XTERN bool brief;
142*18fd37a7SXin LI 
143*18fd37a7SXin LI /* Expand tabs in the output so the text lines up properly
144*18fd37a7SXin LI    despite the characters added to the front of each line (-t).  */
145*18fd37a7SXin LI XTERN bool expand_tabs;
146*18fd37a7SXin LI 
147*18fd37a7SXin LI /* Number of columns between tab stops.  */
148*18fd37a7SXin LI XTERN size_t tabsize;
149*18fd37a7SXin LI 
150*18fd37a7SXin LI /* Use a tab in the output, rather than a space, before the text of an
151*18fd37a7SXin LI    input line, so as to keep the proper alignment in the input line
152*18fd37a7SXin LI    without changing the characters in it (-T).  */
153*18fd37a7SXin LI XTERN bool initial_tab;
154*18fd37a7SXin LI 
155*18fd37a7SXin LI /* Remove trailing carriage returns from input.  */
156*18fd37a7SXin LI XTERN bool strip_trailing_cr;
157*18fd37a7SXin LI 
158*18fd37a7SXin LI /* In directory comparison, specify file to start with (-S).
159*18fd37a7SXin LI    This is used for resuming an aborted comparison.
160*18fd37a7SXin LI    All file names less than this name are ignored.  */
161*18fd37a7SXin LI XTERN char const *starting_file;
162*18fd37a7SXin LI 
163*18fd37a7SXin LI /* Pipe each file's output through pr (-l).  */
164*18fd37a7SXin LI XTERN bool paginate;
165*18fd37a7SXin LI 
166*18fd37a7SXin LI /* Line group formats for unchanged, old, new, and changed groups.  */
167*18fd37a7SXin LI XTERN char const *group_format[CHANGED + 1];
168*18fd37a7SXin LI 
169*18fd37a7SXin LI /* Line formats for unchanged, old, and new lines.  */
170*18fd37a7SXin LI XTERN char const *line_format[NEW + 1];
171*18fd37a7SXin LI 
172*18fd37a7SXin LI /* If using OUTPUT_SDIFF print extra information to help the sdiff filter.  */
173*18fd37a7SXin LI XTERN bool sdiff_merge_assist;
174*18fd37a7SXin LI 
175*18fd37a7SXin LI /* Tell OUTPUT_SDIFF to show only the left version of common lines.  */
176*18fd37a7SXin LI XTERN bool left_column;
177*18fd37a7SXin LI 
178*18fd37a7SXin LI /* Tell OUTPUT_SDIFF to not show common lines.  */
179*18fd37a7SXin LI XTERN bool suppress_common_lines;
180*18fd37a7SXin LI 
181*18fd37a7SXin LI /* The half line width and column 2 offset for OUTPUT_SDIFF.  */
182*18fd37a7SXin LI XTERN size_t sdiff_half_width;
183*18fd37a7SXin LI XTERN size_t sdiff_column2_offset;
184*18fd37a7SXin LI 
185*18fd37a7SXin LI /* String containing all the command options diff received,
186*18fd37a7SXin LI    with spaces between and at the beginning but none at the end.
187*18fd37a7SXin LI    If there were no options given, this string is empty.  */
188*18fd37a7SXin LI XTERN char *switch_string;
189*18fd37a7SXin LI 
190*18fd37a7SXin LI /* Use heuristics for better speed with large files with a small
191*18fd37a7SXin LI    density of changes.  */
192*18fd37a7SXin LI XTERN bool speed_large_files;
193*18fd37a7SXin LI 
194*18fd37a7SXin LI /* Patterns that match file names to be excluded.  */
195*18fd37a7SXin LI XTERN struct exclude *excluded;
196*18fd37a7SXin LI 
197*18fd37a7SXin LI /* Don't discard lines.  This makes things slower (sometimes much
198*18fd37a7SXin LI    slower) but will find a guaranteed minimal set of changes.  */
199*18fd37a7SXin LI XTERN bool minimal;
200*18fd37a7SXin LI 
201*18fd37a7SXin LI /* Name of program the user invoked (for error messages).  */
202*18fd37a7SXin LI XTERN char *program_name;
203*18fd37a7SXin LI 
204*18fd37a7SXin LI /* The strftime format to use for time strings.  */
205*18fd37a7SXin LI XTERN char const *time_format;
206*18fd37a7SXin LI 
207*18fd37a7SXin LI /* The result of comparison is an "edit script": a chain of `struct change'.
208*18fd37a7SXin LI    Each `struct change' represents one place where some lines are deleted
209*18fd37a7SXin LI    and some are inserted.
210*18fd37a7SXin LI 
211*18fd37a7SXin LI    LINE0 and LINE1 are the first affected lines in the two files (origin 0).
212*18fd37a7SXin LI    DELETED is the number of lines deleted here from file 0.
213*18fd37a7SXin LI    INSERTED is the number of lines inserted here in file 1.
214*18fd37a7SXin LI 
215*18fd37a7SXin LI    If DELETED is 0 then LINE0 is the number of the line before
216*18fd37a7SXin LI    which the insertion was done; vice versa for INSERTED and LINE1.  */
217*18fd37a7SXin LI 
218*18fd37a7SXin LI struct change
219*18fd37a7SXin LI {
220*18fd37a7SXin LI   struct change *link;		/* Previous or next edit command  */
221*18fd37a7SXin LI   lin inserted;			/* # lines of file 1 changed here.  */
222*18fd37a7SXin LI   lin deleted;			/* # lines of file 0 changed here.  */
223*18fd37a7SXin LI   lin line0;			/* Line number of 1st deleted line.  */
224*18fd37a7SXin LI   lin line1;			/* Line number of 1st inserted line.  */
225*18fd37a7SXin LI   bool ignore;			/* Flag used in context.c.  */
226*18fd37a7SXin LI };
227*18fd37a7SXin LI 
228*18fd37a7SXin LI /* Structures that describe the input files.  */
229*18fd37a7SXin LI 
230*18fd37a7SXin LI /* Data on one input file being compared.  */
231*18fd37a7SXin LI 
232*18fd37a7SXin LI struct file_data {
233*18fd37a7SXin LI     int             desc;	/* File descriptor  */
234*18fd37a7SXin LI     char const      *name;	/* File name  */
235*18fd37a7SXin LI     struct stat     stat;	/* File status */
236*18fd37a7SXin LI 
237*18fd37a7SXin LI     /* Buffer in which text of file is read.  */
238*18fd37a7SXin LI     word *buffer;
239*18fd37a7SXin LI 
240*18fd37a7SXin LI     /* Allocated size of buffer, in bytes.  Always a multiple of
241*18fd37a7SXin LI        sizeof *buffer.  */
242*18fd37a7SXin LI     size_t bufsize;
243*18fd37a7SXin LI 
244*18fd37a7SXin LI     /* Number of valid bytes now in the buffer.  */
245*18fd37a7SXin LI     size_t buffered;
246*18fd37a7SXin LI 
247*18fd37a7SXin LI     /* Array of pointers to lines in the file.  */
248*18fd37a7SXin LI     char const **linbuf;
249*18fd37a7SXin LI 
250*18fd37a7SXin LI     /* linbuf_base <= buffered_lines <= valid_lines <= alloc_lines.
251*18fd37a7SXin LI        linebuf[linbuf_base ... buffered_lines - 1] are possibly differing.
252*18fd37a7SXin LI        linebuf[linbuf_base ... valid_lines - 1] contain valid data.
253*18fd37a7SXin LI        linebuf[linbuf_base ... alloc_lines - 1] are allocated.  */
254*18fd37a7SXin LI     lin linbuf_base, buffered_lines, valid_lines, alloc_lines;
255*18fd37a7SXin LI 
256*18fd37a7SXin LI     /* Pointer to end of prefix of this file to ignore when hashing.  */
257*18fd37a7SXin LI     char const *prefix_end;
258*18fd37a7SXin LI 
259*18fd37a7SXin LI     /* Count of lines in the prefix.
260*18fd37a7SXin LI        There are this many lines in the file before linbuf[0].  */
261*18fd37a7SXin LI     lin prefix_lines;
262*18fd37a7SXin LI 
263*18fd37a7SXin LI     /* Pointer to start of suffix of this file to ignore when hashing.  */
264*18fd37a7SXin LI     char const *suffix_begin;
265*18fd37a7SXin LI 
266*18fd37a7SXin LI     /* Vector, indexed by line number, containing an equivalence code for
267*18fd37a7SXin LI        each line.  It is this vector that is actually compared with that
268*18fd37a7SXin LI        of another file to generate differences.  */
269*18fd37a7SXin LI     lin *equivs;
270*18fd37a7SXin LI 
271*18fd37a7SXin LI     /* Vector, like the previous one except that
272*18fd37a7SXin LI        the elements for discarded lines have been squeezed out.  */
273*18fd37a7SXin LI     lin *undiscarded;
274*18fd37a7SXin LI 
275*18fd37a7SXin LI     /* Vector mapping virtual line numbers (not counting discarded lines)
276*18fd37a7SXin LI        to real ones (counting those lines).  Both are origin-0.  */
277*18fd37a7SXin LI     lin *realindexes;
278*18fd37a7SXin LI 
279*18fd37a7SXin LI     /* Total number of nondiscarded lines.  */
280*18fd37a7SXin LI     lin nondiscarded_lines;
281*18fd37a7SXin LI 
282*18fd37a7SXin LI     /* Vector, indexed by real origin-0 line number,
283*18fd37a7SXin LI        containing 1 for a line that is an insertion or a deletion.
284*18fd37a7SXin LI        The results of comparison are stored here.  */
285*18fd37a7SXin LI     char *changed;
286*18fd37a7SXin LI 
287*18fd37a7SXin LI     /* 1 if file ends in a line with no final newline.  */
288*18fd37a7SXin LI     bool missing_newline;
289*18fd37a7SXin LI 
290*18fd37a7SXin LI     /* 1 if at end of file.  */
291*18fd37a7SXin LI     bool eof;
292*18fd37a7SXin LI 
293*18fd37a7SXin LI     /* 1 more than the maximum equivalence value used for this or its
294*18fd37a7SXin LI        sibling file.  */
295*18fd37a7SXin LI     lin equiv_max;
296*18fd37a7SXin LI };
297*18fd37a7SXin LI 
298*18fd37a7SXin LI /* The file buffer, considered as an array of bytes rather than
299*18fd37a7SXin LI    as an array of words.  */
300*18fd37a7SXin LI #define FILE_BUFFER(f) ((char *) (f)->buffer)
301*18fd37a7SXin LI 
302*18fd37a7SXin LI /* Data on two input files being compared.  */
303*18fd37a7SXin LI 
304*18fd37a7SXin LI struct comparison
305*18fd37a7SXin LI   {
306*18fd37a7SXin LI     struct file_data file[2];
307*18fd37a7SXin LI     struct comparison const *parent;  /* parent, if a recursive comparison */
308*18fd37a7SXin LI   };
309*18fd37a7SXin LI 
310*18fd37a7SXin LI /* Describe the two files currently being compared.  */
311*18fd37a7SXin LI 
312*18fd37a7SXin LI XTERN struct file_data files[2];
313*18fd37a7SXin LI 
314*18fd37a7SXin LI /* Stdio stream to output diffs to.  */
315*18fd37a7SXin LI 
316*18fd37a7SXin LI XTERN FILE *outfile;
317*18fd37a7SXin LI 
318*18fd37a7SXin LI /* Declare various functions.  */
319*18fd37a7SXin LI 
320*18fd37a7SXin LI /* analyze.c */
321*18fd37a7SXin LI int diff_2_files (struct comparison *);
322*18fd37a7SXin LI 
323*18fd37a7SXin LI /* context.c */
324*18fd37a7SXin LI void print_context_header (struct file_data[], bool);
325*18fd37a7SXin LI void print_context_script (struct change *, bool);
326*18fd37a7SXin LI 
327*18fd37a7SXin LI /* dir.c */
328*18fd37a7SXin LI int diff_dirs (struct comparison const *, int (*) (struct comparison const *, char const *, char const *));
329*18fd37a7SXin LI 
330*18fd37a7SXin LI /* ed.c */
331*18fd37a7SXin LI void print_ed_script (struct change *);
332*18fd37a7SXin LI void pr_forward_ed_script (struct change *);
333*18fd37a7SXin LI 
334*18fd37a7SXin LI /* ifdef.c */
335*18fd37a7SXin LI void print_ifdef_script (struct change *);
336*18fd37a7SXin LI 
337*18fd37a7SXin LI /* io.c */
338*18fd37a7SXin LI void file_block_read (struct file_data *, size_t);
339*18fd37a7SXin LI bool read_files (struct file_data[], bool);
340*18fd37a7SXin LI 
341*18fd37a7SXin LI /* normal.c */
342*18fd37a7SXin LI void print_normal_script (struct change *);
343*18fd37a7SXin LI 
344*18fd37a7SXin LI /* rcs.c */
345*18fd37a7SXin LI void print_rcs_script (struct change *);
346*18fd37a7SXin LI 
347*18fd37a7SXin LI /* side.c */
348*18fd37a7SXin LI void print_sdiff_script (struct change *);
349*18fd37a7SXin LI 
350*18fd37a7SXin LI /* util.c */
351*18fd37a7SXin LI extern char const change_letter[4];
352*18fd37a7SXin LI extern char const pr_program[];
353*18fd37a7SXin LI char *concat (char const *, char const *, char const *);
354*18fd37a7SXin LI char *dir_file_pathname (char const *, char const *);
355*18fd37a7SXin LI bool lines_differ (char const *, char const *);
356*18fd37a7SXin LI lin translate_line_number (struct file_data const *, lin);
357*18fd37a7SXin LI struct change *find_change (struct change *);
358*18fd37a7SXin LI struct change *find_reverse_change (struct change *);
359*18fd37a7SXin LI void *zalloc (size_t);
360*18fd37a7SXin LI enum changes analyze_hunk (struct change *, lin *, lin *, lin *, lin *);
361*18fd37a7SXin LI void begin_output (void);
362*18fd37a7SXin LI void debug_script (struct change *);
363*18fd37a7SXin LI void fatal (char const *) __attribute__((noreturn));
364*18fd37a7SXin LI void finish_output (void);
365*18fd37a7SXin LI void message (char const *, char const *, char const *);
366*18fd37a7SXin LI void message5 (char const *, char const *, char const *, char const *, char const *);
367*18fd37a7SXin LI void output_1_line (char const *, char const *, char const *, char const *);
368*18fd37a7SXin LI void perror_with_name (char const *);
369*18fd37a7SXin LI void pfatal_with_name (char const *) __attribute__((noreturn));
370*18fd37a7SXin LI void print_1_line (char const *, char const * const *);
371*18fd37a7SXin LI void print_message_queue (void);
372*18fd37a7SXin LI void print_number_range (char, struct file_data *, lin, lin);
373*18fd37a7SXin LI void print_script (struct change *, struct change * (*) (struct change *), void (*) (struct change *));
374*18fd37a7SXin LI void setup_output (char const *, char const *, bool);
375*18fd37a7SXin LI void translate_range (struct file_data const *, lin, lin, long int *, long int *);
376