xref: /freebsd-src/contrib/libdiff/lib/diff_internal.h (revision 59c8e88e72633afbc47a4ace0d2170d00d51f7dc)
1*59c8e88eSDag-Erling Smørgrav /* Generic infrastructure to implement various diff algorithms. */
2*59c8e88eSDag-Erling Smørgrav /*
3*59c8e88eSDag-Erling Smørgrav  * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4*59c8e88eSDag-Erling Smørgrav  *
5*59c8e88eSDag-Erling Smørgrav  * Permission to use, copy, modify, and distribute this software for any
6*59c8e88eSDag-Erling Smørgrav  * purpose with or without fee is hereby granted, provided that the above
7*59c8e88eSDag-Erling Smørgrav  * copyright notice and this permission notice appear in all copies.
8*59c8e88eSDag-Erling Smørgrav  *
9*59c8e88eSDag-Erling Smørgrav  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10*59c8e88eSDag-Erling Smørgrav  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11*59c8e88eSDag-Erling Smørgrav  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12*59c8e88eSDag-Erling Smørgrav  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13*59c8e88eSDag-Erling Smørgrav  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14*59c8e88eSDag-Erling Smørgrav  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15*59c8e88eSDag-Erling Smørgrav  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16*59c8e88eSDag-Erling Smørgrav  */
17*59c8e88eSDag-Erling Smørgrav 
18*59c8e88eSDag-Erling Smørgrav #ifndef MAX
19*59c8e88eSDag-Erling Smørgrav #define MAX(A,B) ((A)>(B)?(A):(B))
20*59c8e88eSDag-Erling Smørgrav #endif
21*59c8e88eSDag-Erling Smørgrav #ifndef MIN
22*59c8e88eSDag-Erling Smørgrav #define MIN(A,B) ((A)<(B)?(A):(B))
23*59c8e88eSDag-Erling Smørgrav #endif
24*59c8e88eSDag-Erling Smørgrav 
25*59c8e88eSDag-Erling Smørgrav static inline bool
26*59c8e88eSDag-Erling Smørgrav diff_range_empty(const struct diff_range *r)
27*59c8e88eSDag-Erling Smørgrav {
28*59c8e88eSDag-Erling Smørgrav 	return r->start == r->end;
29*59c8e88eSDag-Erling Smørgrav }
30*59c8e88eSDag-Erling Smørgrav 
31*59c8e88eSDag-Erling Smørgrav static inline bool
32*59c8e88eSDag-Erling Smørgrav diff_ranges_touch(const struct diff_range *a, const struct diff_range *b)
33*59c8e88eSDag-Erling Smørgrav {
34*59c8e88eSDag-Erling Smørgrav 	return (a->end >= b->start) && (a->start <= b->end);
35*59c8e88eSDag-Erling Smørgrav }
36*59c8e88eSDag-Erling Smørgrav 
37*59c8e88eSDag-Erling Smørgrav static inline void
38*59c8e88eSDag-Erling Smørgrav diff_ranges_merge(struct diff_range *a, const struct diff_range *b)
39*59c8e88eSDag-Erling Smørgrav {
40*59c8e88eSDag-Erling Smørgrav 	*a = (struct diff_range){
41*59c8e88eSDag-Erling Smørgrav 		.start = MIN(a->start, b->start),
42*59c8e88eSDag-Erling Smørgrav 		.end = MAX(a->end, b->end),
43*59c8e88eSDag-Erling Smørgrav 	};
44*59c8e88eSDag-Erling Smørgrav }
45*59c8e88eSDag-Erling Smørgrav 
46*59c8e88eSDag-Erling Smørgrav static inline int
47*59c8e88eSDag-Erling Smørgrav diff_range_len(const struct diff_range *r)
48*59c8e88eSDag-Erling Smørgrav {
49*59c8e88eSDag-Erling Smørgrav 	if (!r)
50*59c8e88eSDag-Erling Smørgrav 		return 0;
51*59c8e88eSDag-Erling Smørgrav 	return r->end - r->start;
52*59c8e88eSDag-Erling Smørgrav }
53*59c8e88eSDag-Erling Smørgrav 
54*59c8e88eSDag-Erling Smørgrav /* Indicate whether two given diff atoms match. */
55*59c8e88eSDag-Erling Smørgrav int
56*59c8e88eSDag-Erling Smørgrav diff_atom_same(bool *same,
57*59c8e88eSDag-Erling Smørgrav 	       const struct diff_atom *left,
58*59c8e88eSDag-Erling Smørgrav 	       const struct diff_atom *right);
59*59c8e88eSDag-Erling Smørgrav 
60*59c8e88eSDag-Erling Smørgrav /* A diff chunk represents a set of atoms on the left and/or a set of atoms on
61*59c8e88eSDag-Erling Smørgrav  * the right.
62*59c8e88eSDag-Erling Smørgrav  *
63*59c8e88eSDag-Erling Smørgrav  * If solved == false:
64*59c8e88eSDag-Erling Smørgrav  * The diff algorithm has divided the source file, and this is a chunk that the
65*59c8e88eSDag-Erling Smørgrav  * inner_algo should run on next.
66*59c8e88eSDag-Erling Smørgrav  * The lines on the left should be diffed against the lines on the right.
67*59c8e88eSDag-Erling Smørgrav  * (If there are no left lines or no right lines, it implies solved == true,
68*59c8e88eSDag-Erling Smørgrav  * because there is nothing to diff.)
69*59c8e88eSDag-Erling Smørgrav  *
70*59c8e88eSDag-Erling Smørgrav  * If solved == true:
71*59c8e88eSDag-Erling Smørgrav  * If there are only left atoms, it is a chunk removing atoms from the left ("a
72*59c8e88eSDag-Erling Smørgrav  * minus chunk").
73*59c8e88eSDag-Erling Smørgrav  * If there are only right atoms, it is a chunk adding atoms from the right ("a
74*59c8e88eSDag-Erling Smørgrav  * plus chunk").
75*59c8e88eSDag-Erling Smørgrav  * If there are both left and right lines, it is a chunk of equal content on
76*59c8e88eSDag-Erling Smørgrav  * both sides, and left_count == right_count:
77*59c8e88eSDag-Erling Smørgrav  *
78*59c8e88eSDag-Erling Smørgrav  * - foo  }
79*59c8e88eSDag-Erling Smørgrav  * - bar  }-- diff_chunk{ left_start = &left.atoms.head[0], left_count = 3,
80*59c8e88eSDag-Erling Smørgrav  * - baz  }               right_start = NULL, right_count = 0 }
81*59c8e88eSDag-Erling Smørgrav  *   moo    }
82*59c8e88eSDag-Erling Smørgrav  *   goo    }-- diff_chunk{ left_start = &left.atoms.head[3], left_count = 3,
83*59c8e88eSDag-Erling Smørgrav  *   zoo    }              right_start = &right.atoms.head[0], right_count = 3 }
84*59c8e88eSDag-Erling Smørgrav  *  +loo      }
85*59c8e88eSDag-Erling Smørgrav  *  +roo      }-- diff_chunk{ left_start = NULL, left_count = 0,
86*59c8e88eSDag-Erling Smørgrav  *  +too      }            right_start = &right.atoms.head[3], right_count = 3 }
87*59c8e88eSDag-Erling Smørgrav  *
88*59c8e88eSDag-Erling Smørgrav  */
89*59c8e88eSDag-Erling Smørgrav struct diff_chunk {
90*59c8e88eSDag-Erling Smørgrav 	bool solved;
91*59c8e88eSDag-Erling Smørgrav 	struct diff_atom *left_start;
92*59c8e88eSDag-Erling Smørgrav 	unsigned int left_count;
93*59c8e88eSDag-Erling Smørgrav 	struct diff_atom *right_start;
94*59c8e88eSDag-Erling Smørgrav 	unsigned int right_count;
95*59c8e88eSDag-Erling Smørgrav };
96*59c8e88eSDag-Erling Smørgrav 
97*59c8e88eSDag-Erling Smørgrav #define DIFF_RESULT_ALLOC_BLOCKSIZE 128
98*59c8e88eSDag-Erling Smørgrav 
99*59c8e88eSDag-Erling Smørgrav struct diff_chunk_context;
100*59c8e88eSDag-Erling Smørgrav 
101*59c8e88eSDag-Erling Smørgrav bool
102*59c8e88eSDag-Erling Smørgrav diff_chunk_context_empty(const struct diff_chunk_context *cc);
103*59c8e88eSDag-Erling Smørgrav 
104*59c8e88eSDag-Erling Smørgrav bool
105*59c8e88eSDag-Erling Smørgrav diff_chunk_contexts_touch(const struct diff_chunk_context *cc,
106*59c8e88eSDag-Erling Smørgrav 			  const struct diff_chunk_context *other);
107*59c8e88eSDag-Erling Smørgrav 
108*59c8e88eSDag-Erling Smørgrav void
109*59c8e88eSDag-Erling Smørgrav diff_chunk_contexts_merge(struct diff_chunk_context *cc,
110*59c8e88eSDag-Erling Smørgrav 			  const struct diff_chunk_context *other);
111*59c8e88eSDag-Erling Smørgrav 
112*59c8e88eSDag-Erling Smørgrav struct diff_state {
113*59c8e88eSDag-Erling Smørgrav 	/* The final result passed to the original diff caller. */
114*59c8e88eSDag-Erling Smørgrav 	struct diff_result *result;
115*59c8e88eSDag-Erling Smørgrav 
116*59c8e88eSDag-Erling Smørgrav 	/* The root diff_data is in result->left,right, these are (possibly)
117*59c8e88eSDag-Erling Smørgrav 	 * subsections of the root data. */
118*59c8e88eSDag-Erling Smørgrav 	struct diff_data left;
119*59c8e88eSDag-Erling Smørgrav 	struct diff_data right;
120*59c8e88eSDag-Erling Smørgrav 
121*59c8e88eSDag-Erling Smørgrav 	unsigned int recursion_depth_left;
122*59c8e88eSDag-Erling Smørgrav 
123*59c8e88eSDag-Erling Smørgrav 	/* Remaining chunks from one diff algorithm pass, if any solved == false
124*59c8e88eSDag-Erling Smørgrav 	 * chunks came up. */
125*59c8e88eSDag-Erling Smørgrav 	diff_chunk_arraylist_t temp_result;
126*59c8e88eSDag-Erling Smørgrav 
127*59c8e88eSDag-Erling Smørgrav  	/* State buffer used by Myers algorithm. */
128*59c8e88eSDag-Erling Smørgrav 	int *kd_buf;
129*59c8e88eSDag-Erling Smørgrav 	size_t kd_buf_size; /* in units of sizeof(int), not bytes */
130*59c8e88eSDag-Erling Smørgrav };
131*59c8e88eSDag-Erling Smørgrav 
132*59c8e88eSDag-Erling Smørgrav struct diff_chunk *diff_state_add_chunk(struct diff_state *state, bool solved,
133*59c8e88eSDag-Erling Smørgrav 					struct diff_atom *left_start,
134*59c8e88eSDag-Erling Smørgrav 					unsigned int left_count,
135*59c8e88eSDag-Erling Smørgrav 					struct diff_atom *right_start,
136*59c8e88eSDag-Erling Smørgrav 					unsigned int right_count);
137*59c8e88eSDag-Erling Smørgrav 
138*59c8e88eSDag-Erling Smørgrav struct diff_output_info;
139*59c8e88eSDag-Erling Smørgrav 
140*59c8e88eSDag-Erling Smørgrav int diff_output_lines(struct diff_output_info *output_info, FILE *dest,
141*59c8e88eSDag-Erling Smørgrav 		       const char *prefix, struct diff_atom *start_atom,
142*59c8e88eSDag-Erling Smørgrav 		       unsigned int count);
143*59c8e88eSDag-Erling Smørgrav 
144*59c8e88eSDag-Erling Smørgrav int diff_output_trailing_newline_msg(struct diff_output_info *outinfo,
145*59c8e88eSDag-Erling Smørgrav 				     FILE *dest,
146*59c8e88eSDag-Erling Smørgrav 				     const struct diff_chunk *c);
147*59c8e88eSDag-Erling Smørgrav #define DIFF_FUNCTION_CONTEXT_SIZE	55
148*59c8e88eSDag-Erling Smørgrav int diff_output_match_function_prototype(char *prototype, size_t prototype_size,
149*59c8e88eSDag-Erling Smørgrav 					 int *last_prototype_idx,
150*59c8e88eSDag-Erling Smørgrav 					 const struct diff_result *result,
151*59c8e88eSDag-Erling Smørgrav 					 const struct diff_chunk_context *cc);
152*59c8e88eSDag-Erling Smørgrav 
153*59c8e88eSDag-Erling Smørgrav struct diff_output_info *diff_output_info_alloc(void);
154*59c8e88eSDag-Erling Smørgrav 
155*59c8e88eSDag-Erling Smørgrav void
156*59c8e88eSDag-Erling Smørgrav diff_data_init_subsection(struct diff_data *d, struct diff_data *parent,
157*59c8e88eSDag-Erling Smørgrav 			  struct diff_atom *from_atom, unsigned int atoms_count);
158