1*18fd37a7SXin LI /* Shared definitions for GNU DIFF 2*18fd37a7SXin LI 3*18fd37a7SXin LI Copyright (C) 1988, 1989, 1991, 1992, 1993, 1994, 1995, 1998, 2001, 4*18fd37a7SXin LI 2002, 2004 Free Software Foundation, Inc. 5*18fd37a7SXin LI 6*18fd37a7SXin LI This file is part of GNU DIFF. 7*18fd37a7SXin LI 8*18fd37a7SXin LI GNU DIFF is free software; you can redistribute it and/or modify 9*18fd37a7SXin LI it under the terms of the GNU General Public License as published by 10*18fd37a7SXin LI the Free Software Foundation; either version 2, or (at your option) 11*18fd37a7SXin LI any later version. 12*18fd37a7SXin LI 13*18fd37a7SXin LI GNU DIFF is distributed in the hope that it will be useful, 14*18fd37a7SXin LI but WITHOUT ANY WARRANTY; without even the implied warranty of 15*18fd37a7SXin LI MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16*18fd37a7SXin LI GNU General Public License for more details. 17*18fd37a7SXin LI 18*18fd37a7SXin LI You should have received a copy of the GNU General Public License 19*18fd37a7SXin LI along with this program; see the file COPYING. 20*18fd37a7SXin LI If not, write to the Free Software Foundation, 21*18fd37a7SXin LI 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 22*18fd37a7SXin LI 23*18fd37a7SXin LI #include "system.h" 24*18fd37a7SXin LI #include <regex.h> 25*18fd37a7SXin LI #include <stdio.h> 26*18fd37a7SXin LI #include <unlocked-io.h> 27*18fd37a7SXin LI 28*18fd37a7SXin LI /* What kind of changes a hunk contains. */ 29*18fd37a7SXin LI enum changes 30*18fd37a7SXin LI { 31*18fd37a7SXin LI /* No changes: lines common to both files. */ 32*18fd37a7SXin LI UNCHANGED, 33*18fd37a7SXin LI 34*18fd37a7SXin LI /* Deletes only: lines taken from just the first file. */ 35*18fd37a7SXin LI OLD, 36*18fd37a7SXin LI 37*18fd37a7SXin LI /* Inserts only: lines taken from just the second file. */ 38*18fd37a7SXin LI NEW, 39*18fd37a7SXin LI 40*18fd37a7SXin LI /* Both deletes and inserts: a hunk containing both old and new lines. */ 41*18fd37a7SXin LI CHANGED 42*18fd37a7SXin LI }; 43*18fd37a7SXin LI 44*18fd37a7SXin LI /* Variables for command line options */ 45*18fd37a7SXin LI 46*18fd37a7SXin LI #ifndef GDIFF_MAIN 47*18fd37a7SXin LI # define XTERN extern 48*18fd37a7SXin LI #else 49*18fd37a7SXin LI # define XTERN 50*18fd37a7SXin LI #endif 51*18fd37a7SXin LI 52*18fd37a7SXin LI enum output_style 53*18fd37a7SXin LI { 54*18fd37a7SXin LI /* No output style specified. */ 55*18fd37a7SXin LI OUTPUT_UNSPECIFIED, 56*18fd37a7SXin LI 57*18fd37a7SXin LI /* Default output style. */ 58*18fd37a7SXin LI OUTPUT_NORMAL, 59*18fd37a7SXin LI 60*18fd37a7SXin LI /* Output the differences with lines of context before and after (-c). */ 61*18fd37a7SXin LI OUTPUT_CONTEXT, 62*18fd37a7SXin LI 63*18fd37a7SXin LI /* Output the differences in a unified context diff format (-u). */ 64*18fd37a7SXin LI OUTPUT_UNIFIED, 65*18fd37a7SXin LI 66*18fd37a7SXin LI /* Output the differences as commands suitable for `ed' (-e). */ 67*18fd37a7SXin LI OUTPUT_ED, 68*18fd37a7SXin LI 69*18fd37a7SXin LI /* Output the diff as a forward ed script (-f). */ 70*18fd37a7SXin LI OUTPUT_FORWARD_ED, 71*18fd37a7SXin LI 72*18fd37a7SXin LI /* Like -f, but output a count of changed lines in each "command" (-n). */ 73*18fd37a7SXin LI OUTPUT_RCS, 74*18fd37a7SXin LI 75*18fd37a7SXin LI /* Output merged #ifdef'd file (-D). */ 76*18fd37a7SXin LI OUTPUT_IFDEF, 77*18fd37a7SXin LI 78*18fd37a7SXin LI /* Output sdiff style (-y). */ 79*18fd37a7SXin LI OUTPUT_SDIFF 80*18fd37a7SXin LI }; 81*18fd37a7SXin LI 82*18fd37a7SXin LI /* True for output styles that are robust, 83*18fd37a7SXin LI i.e. can handle a file that ends in a non-newline. */ 84*18fd37a7SXin LI #define ROBUST_OUTPUT_STYLE(S) ((S) != OUTPUT_ED && (S) != OUTPUT_FORWARD_ED) 85*18fd37a7SXin LI 86*18fd37a7SXin LI XTERN enum output_style output_style; 87*18fd37a7SXin LI 88*18fd37a7SXin LI /* Nonzero if output cannot be generated for identical files. */ 89*18fd37a7SXin LI XTERN bool no_diff_means_no_output; 90*18fd37a7SXin LI 91*18fd37a7SXin LI /* Number of lines of context to show in each set of diffs. 92*18fd37a7SXin LI This is zero when context is not to be shown. */ 93*18fd37a7SXin LI XTERN lin context; 94*18fd37a7SXin LI 95*18fd37a7SXin LI /* Consider all files as text files (-a). 96*18fd37a7SXin LI Don't interpret codes over 0177 as implying a "binary file". */ 97*18fd37a7SXin LI XTERN bool text; 98*18fd37a7SXin LI 99*18fd37a7SXin LI /* Number of lines to keep in identical prefix and suffix. */ 100*18fd37a7SXin LI XTERN lin horizon_lines; 101*18fd37a7SXin LI 102*18fd37a7SXin LI /* The significance of white space during comparisons. */ 103*18fd37a7SXin LI XTERN enum 104*18fd37a7SXin LI { 105*18fd37a7SXin LI /* All white space is significant (the default). */ 106*18fd37a7SXin LI IGNORE_NO_WHITE_SPACE, 107*18fd37a7SXin LI 108*18fd37a7SXin LI /* Ignore changes due to tab expansion (-E). */ 109*18fd37a7SXin LI IGNORE_TAB_EXPANSION, 110*18fd37a7SXin LI 111*18fd37a7SXin LI /* Ignore changes in horizontal white space (-b). */ 112*18fd37a7SXin LI IGNORE_SPACE_CHANGE, 113*18fd37a7SXin LI 114*18fd37a7SXin LI /* Ignore all horizontal white space (-w). */ 115*18fd37a7SXin LI IGNORE_ALL_SPACE 116*18fd37a7SXin LI } ignore_white_space; 117*18fd37a7SXin LI 118*18fd37a7SXin LI /* Ignore changes that affect only blank lines (-B). */ 119*18fd37a7SXin LI XTERN bool ignore_blank_lines; 120*18fd37a7SXin LI 121*18fd37a7SXin LI /* Files can be compared byte-by-byte, as if they were binary. 122*18fd37a7SXin LI This depends on various options. */ 123*18fd37a7SXin LI XTERN bool files_can_be_treated_as_binary; 124*18fd37a7SXin LI 125*18fd37a7SXin LI /* Ignore differences in case of letters (-i). */ 126*18fd37a7SXin LI XTERN bool ignore_case; 127*18fd37a7SXin LI 128*18fd37a7SXin LI /* Ignore differences in case of letters in file names. */ 129*18fd37a7SXin LI XTERN bool ignore_file_name_case; 130*18fd37a7SXin LI 131*18fd37a7SXin LI /* File labels for `-c' output headers (--label). */ 132*18fd37a7SXin LI XTERN char *file_label[2]; 133*18fd37a7SXin LI 134*18fd37a7SXin LI /* Regexp to identify function-header lines (-F). */ 135*18fd37a7SXin LI XTERN struct re_pattern_buffer function_regexp; 136*18fd37a7SXin LI 137*18fd37a7SXin LI /* Ignore changes that affect only lines matching this regexp (-I). */ 138*18fd37a7SXin LI XTERN struct re_pattern_buffer ignore_regexp; 139*18fd37a7SXin LI 140*18fd37a7SXin LI /* Say only whether files differ, not how (-q). */ 141*18fd37a7SXin LI XTERN bool brief; 142*18fd37a7SXin LI 143*18fd37a7SXin LI /* Expand tabs in the output so the text lines up properly 144*18fd37a7SXin LI despite the characters added to the front of each line (-t). */ 145*18fd37a7SXin LI XTERN bool expand_tabs; 146*18fd37a7SXin LI 147*18fd37a7SXin LI /* Number of columns between tab stops. */ 148*18fd37a7SXin LI XTERN size_t tabsize; 149*18fd37a7SXin LI 150*18fd37a7SXin LI /* Use a tab in the output, rather than a space, before the text of an 151*18fd37a7SXin LI input line, so as to keep the proper alignment in the input line 152*18fd37a7SXin LI without changing the characters in it (-T). */ 153*18fd37a7SXin LI XTERN bool initial_tab; 154*18fd37a7SXin LI 155*18fd37a7SXin LI /* Remove trailing carriage returns from input. */ 156*18fd37a7SXin LI XTERN bool strip_trailing_cr; 157*18fd37a7SXin LI 158*18fd37a7SXin LI /* In directory comparison, specify file to start with (-S). 159*18fd37a7SXin LI This is used for resuming an aborted comparison. 160*18fd37a7SXin LI All file names less than this name are ignored. */ 161*18fd37a7SXin LI XTERN char const *starting_file; 162*18fd37a7SXin LI 163*18fd37a7SXin LI /* Pipe each file's output through pr (-l). */ 164*18fd37a7SXin LI XTERN bool paginate; 165*18fd37a7SXin LI 166*18fd37a7SXin LI /* Line group formats for unchanged, old, new, and changed groups. */ 167*18fd37a7SXin LI XTERN char const *group_format[CHANGED + 1]; 168*18fd37a7SXin LI 169*18fd37a7SXin LI /* Line formats for unchanged, old, and new lines. */ 170*18fd37a7SXin LI XTERN char const *line_format[NEW + 1]; 171*18fd37a7SXin LI 172*18fd37a7SXin LI /* If using OUTPUT_SDIFF print extra information to help the sdiff filter. */ 173*18fd37a7SXin LI XTERN bool sdiff_merge_assist; 174*18fd37a7SXin LI 175*18fd37a7SXin LI /* Tell OUTPUT_SDIFF to show only the left version of common lines. */ 176*18fd37a7SXin LI XTERN bool left_column; 177*18fd37a7SXin LI 178*18fd37a7SXin LI /* Tell OUTPUT_SDIFF to not show common lines. */ 179*18fd37a7SXin LI XTERN bool suppress_common_lines; 180*18fd37a7SXin LI 181*18fd37a7SXin LI /* The half line width and column 2 offset for OUTPUT_SDIFF. */ 182*18fd37a7SXin LI XTERN size_t sdiff_half_width; 183*18fd37a7SXin LI XTERN size_t sdiff_column2_offset; 184*18fd37a7SXin LI 185*18fd37a7SXin LI /* String containing all the command options diff received, 186*18fd37a7SXin LI with spaces between and at the beginning but none at the end. 187*18fd37a7SXin LI If there were no options given, this string is empty. */ 188*18fd37a7SXin LI XTERN char *switch_string; 189*18fd37a7SXin LI 190*18fd37a7SXin LI /* Use heuristics for better speed with large files with a small 191*18fd37a7SXin LI density of changes. */ 192*18fd37a7SXin LI XTERN bool speed_large_files; 193*18fd37a7SXin LI 194*18fd37a7SXin LI /* Patterns that match file names to be excluded. */ 195*18fd37a7SXin LI XTERN struct exclude *excluded; 196*18fd37a7SXin LI 197*18fd37a7SXin LI /* Don't discard lines. This makes things slower (sometimes much 198*18fd37a7SXin LI slower) but will find a guaranteed minimal set of changes. */ 199*18fd37a7SXin LI XTERN bool minimal; 200*18fd37a7SXin LI 201*18fd37a7SXin LI /* Name of program the user invoked (for error messages). */ 202*18fd37a7SXin LI XTERN char *program_name; 203*18fd37a7SXin LI 204*18fd37a7SXin LI /* The strftime format to use for time strings. */ 205*18fd37a7SXin LI XTERN char const *time_format; 206*18fd37a7SXin LI 207*18fd37a7SXin LI /* The result of comparison is an "edit script": a chain of `struct change'. 208*18fd37a7SXin LI Each `struct change' represents one place where some lines are deleted 209*18fd37a7SXin LI and some are inserted. 210*18fd37a7SXin LI 211*18fd37a7SXin LI LINE0 and LINE1 are the first affected lines in the two files (origin 0). 212*18fd37a7SXin LI DELETED is the number of lines deleted here from file 0. 213*18fd37a7SXin LI INSERTED is the number of lines inserted here in file 1. 214*18fd37a7SXin LI 215*18fd37a7SXin LI If DELETED is 0 then LINE0 is the number of the line before 216*18fd37a7SXin LI which the insertion was done; vice versa for INSERTED and LINE1. */ 217*18fd37a7SXin LI 218*18fd37a7SXin LI struct change 219*18fd37a7SXin LI { 220*18fd37a7SXin LI struct change *link; /* Previous or next edit command */ 221*18fd37a7SXin LI lin inserted; /* # lines of file 1 changed here. */ 222*18fd37a7SXin LI lin deleted; /* # lines of file 0 changed here. */ 223*18fd37a7SXin LI lin line0; /* Line number of 1st deleted line. */ 224*18fd37a7SXin LI lin line1; /* Line number of 1st inserted line. */ 225*18fd37a7SXin LI bool ignore; /* Flag used in context.c. */ 226*18fd37a7SXin LI }; 227*18fd37a7SXin LI 228*18fd37a7SXin LI /* Structures that describe the input files. */ 229*18fd37a7SXin LI 230*18fd37a7SXin LI /* Data on one input file being compared. */ 231*18fd37a7SXin LI 232*18fd37a7SXin LI struct file_data { 233*18fd37a7SXin LI int desc; /* File descriptor */ 234*18fd37a7SXin LI char const *name; /* File name */ 235*18fd37a7SXin LI struct stat stat; /* File status */ 236*18fd37a7SXin LI 237*18fd37a7SXin LI /* Buffer in which text of file is read. */ 238*18fd37a7SXin LI word *buffer; 239*18fd37a7SXin LI 240*18fd37a7SXin LI /* Allocated size of buffer, in bytes. Always a multiple of 241*18fd37a7SXin LI sizeof *buffer. */ 242*18fd37a7SXin LI size_t bufsize; 243*18fd37a7SXin LI 244*18fd37a7SXin LI /* Number of valid bytes now in the buffer. */ 245*18fd37a7SXin LI size_t buffered; 246*18fd37a7SXin LI 247*18fd37a7SXin LI /* Array of pointers to lines in the file. */ 248*18fd37a7SXin LI char const **linbuf; 249*18fd37a7SXin LI 250*18fd37a7SXin LI /* linbuf_base <= buffered_lines <= valid_lines <= alloc_lines. 251*18fd37a7SXin LI linebuf[linbuf_base ... buffered_lines - 1] are possibly differing. 252*18fd37a7SXin LI linebuf[linbuf_base ... valid_lines - 1] contain valid data. 253*18fd37a7SXin LI linebuf[linbuf_base ... alloc_lines - 1] are allocated. */ 254*18fd37a7SXin LI lin linbuf_base, buffered_lines, valid_lines, alloc_lines; 255*18fd37a7SXin LI 256*18fd37a7SXin LI /* Pointer to end of prefix of this file to ignore when hashing. */ 257*18fd37a7SXin LI char const *prefix_end; 258*18fd37a7SXin LI 259*18fd37a7SXin LI /* Count of lines in the prefix. 260*18fd37a7SXin LI There are this many lines in the file before linbuf[0]. */ 261*18fd37a7SXin LI lin prefix_lines; 262*18fd37a7SXin LI 263*18fd37a7SXin LI /* Pointer to start of suffix of this file to ignore when hashing. */ 264*18fd37a7SXin LI char const *suffix_begin; 265*18fd37a7SXin LI 266*18fd37a7SXin LI /* Vector, indexed by line number, containing an equivalence code for 267*18fd37a7SXin LI each line. It is this vector that is actually compared with that 268*18fd37a7SXin LI of another file to generate differences. */ 269*18fd37a7SXin LI lin *equivs; 270*18fd37a7SXin LI 271*18fd37a7SXin LI /* Vector, like the previous one except that 272*18fd37a7SXin LI the elements for discarded lines have been squeezed out. */ 273*18fd37a7SXin LI lin *undiscarded; 274*18fd37a7SXin LI 275*18fd37a7SXin LI /* Vector mapping virtual line numbers (not counting discarded lines) 276*18fd37a7SXin LI to real ones (counting those lines). Both are origin-0. */ 277*18fd37a7SXin LI lin *realindexes; 278*18fd37a7SXin LI 279*18fd37a7SXin LI /* Total number of nondiscarded lines. */ 280*18fd37a7SXin LI lin nondiscarded_lines; 281*18fd37a7SXin LI 282*18fd37a7SXin LI /* Vector, indexed by real origin-0 line number, 283*18fd37a7SXin LI containing 1 for a line that is an insertion or a deletion. 284*18fd37a7SXin LI The results of comparison are stored here. */ 285*18fd37a7SXin LI char *changed; 286*18fd37a7SXin LI 287*18fd37a7SXin LI /* 1 if file ends in a line with no final newline. */ 288*18fd37a7SXin LI bool missing_newline; 289*18fd37a7SXin LI 290*18fd37a7SXin LI /* 1 if at end of file. */ 291*18fd37a7SXin LI bool eof; 292*18fd37a7SXin LI 293*18fd37a7SXin LI /* 1 more than the maximum equivalence value used for this or its 294*18fd37a7SXin LI sibling file. */ 295*18fd37a7SXin LI lin equiv_max; 296*18fd37a7SXin LI }; 297*18fd37a7SXin LI 298*18fd37a7SXin LI /* The file buffer, considered as an array of bytes rather than 299*18fd37a7SXin LI as an array of words. */ 300*18fd37a7SXin LI #define FILE_BUFFER(f) ((char *) (f)->buffer) 301*18fd37a7SXin LI 302*18fd37a7SXin LI /* Data on two input files being compared. */ 303*18fd37a7SXin LI 304*18fd37a7SXin LI struct comparison 305*18fd37a7SXin LI { 306*18fd37a7SXin LI struct file_data file[2]; 307*18fd37a7SXin LI struct comparison const *parent; /* parent, if a recursive comparison */ 308*18fd37a7SXin LI }; 309*18fd37a7SXin LI 310*18fd37a7SXin LI /* Describe the two files currently being compared. */ 311*18fd37a7SXin LI 312*18fd37a7SXin LI XTERN struct file_data files[2]; 313*18fd37a7SXin LI 314*18fd37a7SXin LI /* Stdio stream to output diffs to. */ 315*18fd37a7SXin LI 316*18fd37a7SXin LI XTERN FILE *outfile; 317*18fd37a7SXin LI 318*18fd37a7SXin LI /* Declare various functions. */ 319*18fd37a7SXin LI 320*18fd37a7SXin LI /* analyze.c */ 321*18fd37a7SXin LI int diff_2_files (struct comparison *); 322*18fd37a7SXin LI 323*18fd37a7SXin LI /* context.c */ 324*18fd37a7SXin LI void print_context_header (struct file_data[], bool); 325*18fd37a7SXin LI void print_context_script (struct change *, bool); 326*18fd37a7SXin LI 327*18fd37a7SXin LI /* dir.c */ 328*18fd37a7SXin LI int diff_dirs (struct comparison const *, int (*) (struct comparison const *, char const *, char const *)); 329*18fd37a7SXin LI 330*18fd37a7SXin LI /* ed.c */ 331*18fd37a7SXin LI void print_ed_script (struct change *); 332*18fd37a7SXin LI void pr_forward_ed_script (struct change *); 333*18fd37a7SXin LI 334*18fd37a7SXin LI /* ifdef.c */ 335*18fd37a7SXin LI void print_ifdef_script (struct change *); 336*18fd37a7SXin LI 337*18fd37a7SXin LI /* io.c */ 338*18fd37a7SXin LI void file_block_read (struct file_data *, size_t); 339*18fd37a7SXin LI bool read_files (struct file_data[], bool); 340*18fd37a7SXin LI 341*18fd37a7SXin LI /* normal.c */ 342*18fd37a7SXin LI void print_normal_script (struct change *); 343*18fd37a7SXin LI 344*18fd37a7SXin LI /* rcs.c */ 345*18fd37a7SXin LI void print_rcs_script (struct change *); 346*18fd37a7SXin LI 347*18fd37a7SXin LI /* side.c */ 348*18fd37a7SXin LI void print_sdiff_script (struct change *); 349*18fd37a7SXin LI 350*18fd37a7SXin LI /* util.c */ 351*18fd37a7SXin LI extern char const change_letter[4]; 352*18fd37a7SXin LI extern char const pr_program[]; 353*18fd37a7SXin LI char *concat (char const *, char const *, char const *); 354*18fd37a7SXin LI char *dir_file_pathname (char const *, char const *); 355*18fd37a7SXin LI bool lines_differ (char const *, char const *); 356*18fd37a7SXin LI lin translate_line_number (struct file_data const *, lin); 357*18fd37a7SXin LI struct change *find_change (struct change *); 358*18fd37a7SXin LI struct change *find_reverse_change (struct change *); 359*18fd37a7SXin LI void *zalloc (size_t); 360*18fd37a7SXin LI enum changes analyze_hunk (struct change *, lin *, lin *, lin *, lin *); 361*18fd37a7SXin LI void begin_output (void); 362*18fd37a7SXin LI void debug_script (struct change *); 363*18fd37a7SXin LI void fatal (char const *) __attribute__((noreturn)); 364*18fd37a7SXin LI void finish_output (void); 365*18fd37a7SXin LI void message (char const *, char const *, char const *); 366*18fd37a7SXin LI void message5 (char const *, char const *, char const *, char const *, char const *); 367*18fd37a7SXin LI void output_1_line (char const *, char const *, char const *, char const *); 368*18fd37a7SXin LI void perror_with_name (char const *); 369*18fd37a7SXin LI void pfatal_with_name (char const *) __attribute__((noreturn)); 370*18fd37a7SXin LI void print_1_line (char const *, char const * const *); 371*18fd37a7SXin LI void print_message_queue (void); 372*18fd37a7SXin LI void print_number_range (char, struct file_data *, lin, lin); 373*18fd37a7SXin LI void print_script (struct change *, struct change * (*) (struct change *), void (*) (struct change *)); 374*18fd37a7SXin LI void setup_output (char const *, char const *, bool); 375*18fd37a7SXin LI void translate_range (struct file_data const *, lin, lin, long int *, long int *); 376