xref: /openbsd-src/usr.bin/sdiff/sdiff.c (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1 /*	$OpenBSD: sdiff.c,v 1.23 2007/09/02 15:19:34 deraadt Exp $ */
2 
3 /*
4  * Written by Raymond Lai <ray@cyth.net>.
5  * Public domain.
6  */
7 
8 #include <sys/param.h>
9 #include <sys/queue.h>
10 #include <sys/stat.h>
11 #include <sys/types.h>
12 #include <sys/wait.h>
13 
14 #include <ctype.h>
15 #include <err.h>
16 #include <errno.h>
17 #include <fcntl.h>
18 #include <getopt.h>
19 #include <limits.h>
20 #include <paths.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <unistd.h>
25 #include <util.h>
26 
27 #include "common.h"
28 #include "extern.h"
29 
30 #define WIDTH 130
31 /*
32  * Each column must be at least one character wide, plus three
33  * characters between the columns (space, [<|>], space).
34  */
35 #define WIDTH_MIN 5
36 
37 /* A single diff line. */
38 struct diffline {
39 	SIMPLEQ_ENTRY(diffline) diffentries;
40 	char	*left;
41 	char	 div;
42 	char	*right;
43 };
44 
45 static void astrcat(char **, const char *);
46 static void enqueue(char *, char, char *);
47 static char *mktmpcpy(const char *);
48 static void freediff(struct diffline *);
49 static void int_usage(void);
50 static int parsecmd(FILE *, FILE *, FILE *);
51 static void printa(FILE *, size_t);
52 static void printc(FILE *, size_t, FILE *, size_t);
53 static void printcol(const char *, size_t *, const size_t);
54 static void printd(FILE *, size_t);
55 static void println(const char *, const char, const char *);
56 static void processq(void);
57 static void prompt(const char *, const char *);
58 __dead static void usage(void);
59 static char *xfgets(FILE *);
60 
61 SIMPLEQ_HEAD(, diffline) diffhead = SIMPLEQ_HEAD_INITIALIZER(diffhead);
62 size_t	 line_width;	/* width of a line (two columns and divider) */
63 size_t	 width;		/* width of each column */
64 size_t	 file1ln, file2ln;	/* line number of file1 and file2 */
65 int	 Iflag = 0;	/* ignore sets matching regexp */
66 int	 lflag;		/* print only left column for identical lines */
67 int	 sflag;		/* skip identical lines */
68 FILE	*outfile;	/* file to save changes to */
69 const char *tmpdir;	/* TMPDIR or /tmp */
70 
71 static struct option longopts[] = {
72 	{ "text",			no_argument,		NULL,	'a' },
73 	{ "ignore-blank-lines",		no_argument,		NULL,	'B' },
74 	{ "ignore-space-change",	no_argument,		NULL,	'b' },
75 	{ "minimal",			no_argument,		NULL,	'd' },
76 	{ "ignore-tab-expansion",	no_argument,		NULL,	'E' },
77 	{ "diff-program",		required_argument,	NULL,	'F' },
78 	{ "speed-large-files",		no_argument,		NULL,	'H' },
79 	{ "ignore-matching-lines",	required_argument,	NULL,	'I' },
80 	{ "ignore-case",		no_argument,		NULL,	'i' },
81 	{ "left-column",		no_argument,		NULL,	'l' },
82 	{ "output",			required_argument,	NULL,	'o' },
83 	{ "strip-trailing-cr",		no_argument,		NULL,	'S' },
84 	{ "suppress-common-lines",	no_argument,		NULL,	's' },
85 	{ "expand-tabs",		no_argument,		NULL,	't' },
86 	{ "ignore-all-space",		no_argument,		NULL,	'W' },
87 	{ "width",			required_argument,	NULL,	'w' },
88 	{ NULL,				0,			NULL,	 0  }
89 };
90 
91 /*
92  * Create temporary file if source_file is not a regular file.
93  * Returns temporary file name if one was malloced, NULL if unnecessary.
94  */
95 static char *
96 mktmpcpy(const char *source_file)
97 {
98 	struct stat sb;
99 	ssize_t rcount;
100 	int ifd, ofd;
101 	u_char buf[BUFSIZ];
102 	char *target_file;
103 
104 	/* Open input and output. */
105 	ifd = open(source_file, O_RDONLY, 0);
106 	/* File was opened successfully. */
107 	if (ifd != -1) {
108 		if (fstat(ifd, &sb) == -1)
109 			err(2, "error getting file status from %s", source_file);
110 
111 		/* Regular file. */
112 		if (S_ISREG(sb.st_mode))
113 			return (NULL);
114 	} else {
115 		/* If ``-'' does not exist the user meant stdin. */
116 		if (errno == ENOENT && strcmp(source_file, "-") == 0)
117 			ifd = STDIN_FILENO;
118 		else
119 			err(2, "error opening %s", source_file);
120 	}
121 
122 	/* Not a regular file, so copy input into temporary file. */
123 	if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1)
124 		err(2, "asprintf");
125 	if ((ofd = mkstemp(target_file)) == -1) {
126 		warn("error opening %s", target_file);
127 		goto FAIL;
128 	}
129 	while ((rcount = read(ifd, buf, sizeof(buf))) != -1 &&
130 	    rcount != 0) {
131 		ssize_t wcount;
132 
133 		wcount = write(ofd, buf, (size_t)rcount);
134 		if (-1 == wcount || rcount != wcount) {
135 			warn("error writing to %s", target_file);
136 			goto FAIL;
137 		}
138 	}
139 	if (rcount == -1) {
140 		warn("error reading from %s", source_file);
141 		goto FAIL;
142 	}
143 
144 	close(ifd);
145 	close(ofd);
146 
147 	return (target_file);
148 
149 FAIL:
150 	unlink(target_file);
151 	exit(2);
152 }
153 
154 int
155 main(int argc, char **argv)
156 {
157 	FILE *diffpipe, *file1, *file2;
158 	size_t diffargc = 0, wflag = WIDTH;
159 	int ch, fd[2], status;
160 	pid_t pid;
161 	char **diffargv, *diffprog = "diff", *filename1, *filename2,
162 	    *tmp1, *tmp2, *s1, *s2;
163 
164 	/*
165 	 * Process diff flags.
166 	 */
167 	/*
168 	 * Allocate memory for diff arguments and NULL.
169 	 * Each flag has at most one argument, so doubling argc gives an
170 	 * upper limit of how many diff args can be passed.  argv[0],
171 	 * file1, and file2 won't have arguments so doubling them will
172 	 * waste some memory; however we need an extra space for the
173 	 * NULL at the end, so it sort of works out.
174 	 */
175 	if (!(diffargv = calloc(argc, sizeof(char **) * 2)))
176 		err(2, "main");
177 
178 	/* Add first argument, the program name. */
179 	diffargv[diffargc++] = diffprog;
180 
181 	while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:",
182 	    longopts, NULL)) != -1) {
183 		const char *errstr;
184 
185 		switch (ch) {
186 		case 'a':
187 			diffargv[diffargc++] = "-a";
188 			break;
189 		case 'B':
190 			diffargv[diffargc++] = "-B";
191 			break;
192 		case 'b':
193 			diffargv[diffargc++] = "-b";
194 			break;
195 		case 'd':
196 			diffargv[diffargc++] = "-d";
197 			break;
198 		case 'E':
199 			diffargv[diffargc++] = "-E";
200 			break;
201 		case 'F':
202 			diffargv[0] = diffprog = optarg;
203 			break;
204 		case 'H':
205 			diffargv[diffargc++] = "-H";
206 			break;
207 		case 'I':
208 			Iflag = 1;
209 			diffargv[diffargc++] = "-I";
210 			diffargv[diffargc++] = optarg;
211 			break;
212 		case 'i':
213 			diffargv[diffargc++] = "-i";
214 			break;
215 		case 'l':
216 			lflag = 1;
217 			break;
218 		case 'o':
219 			if ((outfile = fopen(optarg, "w")) == NULL)
220 				err(2, "could not open: %s", optarg);
221 			break;
222 		case 'S':
223 			diffargv[diffargc++] = "--strip-trailing-cr";
224 			break;
225 		case 's':
226 			sflag = 1;
227 			break;
228 		case 't':
229 			diffargv[diffargc++] = "-t";
230 			break;
231 		case 'W':
232 			diffargv[diffargc++] = "-w";
233 			break;
234 		case 'w':
235 			wflag = strtonum(optarg, WIDTH_MIN,
236 			    INT_MAX, &errstr);
237 			if (errstr)
238 				errx(2, "width is %s: %s", errstr, optarg);
239 			break;
240 		default:
241 			usage();
242 		}
243 
244 	}
245 	argc -= optind;
246 	argv += optind;
247 
248 	if (argc != 2)
249 		usage();
250 
251 	if ((tmpdir = getenv("TMPDIR")) == NULL)
252 		tmpdir = _PATH_TMP;
253 
254 	filename1 = argv[0];
255 	filename2 = argv[1];
256 
257 	/*
258 	 * Create temporary files for diff and sdiff to share if file1
259 	 * or file2 are not regular files.  This allows sdiff and diff
260 	 * to read the same inputs if one or both inputs are stdin.
261 	 *
262 	 * If any temporary files were created, their names would be
263 	 * saved in tmp1 or tmp2.  tmp1 should never equal tmp2.
264 	 */
265 	tmp1 = tmp2 = NULL;
266 	/* file1 and file2 are the same, so copy to same temp file. */
267 	if (strcmp(filename1, filename2) == 0) {
268 		if ((tmp1 = mktmpcpy(filename1)))
269 			filename1 = filename2 = tmp1;
270 	/* Copy file1 and file2 into separate temp files. */
271 	} else {
272 		if ((tmp1 = mktmpcpy(filename1)))
273 			filename1 = tmp1;
274 		if ((tmp2 = mktmpcpy(filename2)))
275 			filename2 = tmp2;
276 	}
277 
278 	diffargv[diffargc++] = filename1;
279 	diffargv[diffargc++] = filename2;
280 	/* Add NULL to end of array to indicate end of array. */
281 	diffargv[diffargc++] = NULL;
282 
283 	/* Subtract column divider and divide by two. */
284 	width = (wflag - 3) / 2;
285 	/* Make sure line_width can fit in size_t. */
286 	if (width > (SIZE_T_MAX - 3) / 2)
287 		errx(2, "width is too large: %zu", width);
288 	line_width = width * 2 + 3;
289 
290 	if (pipe(fd))
291 		err(2, "pipe");
292 
293 	switch(pid = fork()) {
294 	case 0:
295 		/* child */
296 		/* We don't read from the pipe. */
297 		close(fd[0]);
298 		if (dup2(fd[1], STDOUT_FILENO) == -1)
299 			err(2, "child could not duplicate descriptor");
300 		/* Free unused descriptor. */
301 		close(fd[1]);
302 
303 		execvp(diffprog, diffargv);
304 		err(2, "could not execute diff: %s", diffprog);
305 	case -1:
306 		err(2, "could not fork");
307 	}
308 
309 	/* parent */
310 	/* We don't write to the pipe. */
311 	close(fd[1]);
312 
313 	/* Open pipe to diff command. */
314 	if ((diffpipe = fdopen(fd[0], "r")) == NULL)
315 		err(2, "could not open diff pipe");
316 	if ((file1 = fopen(filename1, "r")) == NULL)
317 		err(2, "could not open %s", filename1);
318 	if ((file2 = fopen(filename2, "r")) == NULL)
319 		err(2, "could not open %s", filename2);
320 
321 	/* Line numbers start at one. */
322 	file1ln = file2ln = 1;
323 
324 	/* Read and parse diff output. */
325 	while (parsecmd(diffpipe, file1, file2) != EOF)
326 		;
327 	fclose(diffpipe);
328 
329 	/* Wait for diff to exit. */
330 	if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) ||
331 	    WEXITSTATUS(status) >= 2)
332 		err(2, "diff exited abnormally");
333 
334 	/* Delete and free unneeded temporary files. */
335 	if (tmp1)
336 		if (unlink(tmp1))
337 			warn("error deleting %s", tmp1);
338 	if (tmp2)
339 		if (unlink(tmp2))
340 			warn("error deleting %s", tmp2);
341 	free(tmp1);
342 	free(tmp2);
343 	filename1 = filename2 = tmp1 = tmp2 = NULL;
344 
345 	/* No more diffs, so print common lines. */
346 	if (lflag)
347 		while ((s1 = xfgets(file1)))
348 			enqueue(s1, ' ', NULL);
349 	else
350 		for (;;) {
351 			s1 = xfgets(file1);
352 			s2 = xfgets(file2);
353 			if (s1 || s2)
354 				enqueue(s1, ' ', s2);
355 			else
356 				break;
357 		}
358 	fclose(file1);
359 	fclose(file2);
360 	/* Process unmodified lines. */
361 	processq();
362 
363 	/* Return diff exit status. */
364 	return (WEXITSTATUS(status));
365 }
366 
367 /*
368  * Prints an individual column (left or right), taking into account
369  * that tabs are variable-width.  Takes a string, the current column
370  * the cursor is on the screen, and the maximum value of the column.
371  * The column value is updated as we go along.
372  */
373 static void
374 printcol(const char *s, size_t *col, const size_t col_max)
375 {
376 
377 	for (; *s && *col < col_max; ++s) {
378 		size_t new_col;
379 
380 		switch (*s) {
381 		case '\t':
382 			/*
383 			 * If rounding to next multiple of eight causes
384 			 * an integer overflow, just return.
385 			 */
386 			if (*col > SIZE_T_MAX - 8)
387 				return;
388 
389 			/* Round to next multiple of eight. */
390 			new_col = (*col / 8 + 1) * 8;
391 
392 			/*
393 			 * If printing the tab goes past the column
394 			 * width, don't print it and just quit.
395 			 */
396 			if (new_col > col_max)
397 				return;
398 			*col = new_col;
399 			break;
400 
401 		default:
402 			++(*col);
403 		}
404 
405 		putchar(*s);
406 	}
407 }
408 
409 /*
410  * Prompts user to either choose between two strings or edit one, both,
411  * or neither.
412  */
413 static void
414 prompt(const char *s1, const char *s2)
415 {
416 	char *cmd;
417 
418 	/* Print command prompt. */
419 	putchar('%');
420 
421 	/* Get user input. */
422 	for (; (cmd = xfgets(stdin)); free(cmd)) {
423 		const char *p;
424 
425 		/* Skip leading whitespace. */
426 		for (p = cmd; isspace(*p); ++p)
427 			;
428 
429 		switch (*p) {
430 		case 'e':
431 			/* Skip `e'. */
432 			++p;
433 
434 			if (eparse(p, s1, s2) == -1)
435 				goto USAGE;
436 			break;
437 
438 		case 'l':
439 		case '1':
440 			/* Choose left column as-is. */
441 			if (s1 != NULL)
442 				fprintf(outfile, "%s\n", s1);
443 
444 			/* End of command parsing. */
445 			break;
446 
447 		case 'q':
448 			goto QUIT;
449 
450 		case 'r':
451 		case '2':
452 			/* Choose right column as-is. */
453 			if (s2 != NULL)
454 				fprintf(outfile, "%s\n", s2);
455 
456 			/* End of command parsing. */
457 			break;
458 
459 		case 's':
460 			sflag = 1;
461 			goto PROMPT;
462 
463 		case 'v':
464 			sflag = 0;
465 			/* FALLTHROUGH */
466 
467 		default:
468 			/* Interactive usage help. */
469 USAGE:
470 			int_usage();
471 PROMPT:
472 			putchar('%');
473 
474 			/* Prompt user again. */
475 			continue;
476 		}
477 
478 		free(cmd);
479 		return;
480 	}
481 
482 	/*
483 	 * If there was no error, we received an EOF from stdin, so we
484 	 * should quit.
485 	 */
486 QUIT:
487 	fclose(outfile);
488 	exit(0);
489 }
490 
491 /*
492  * Takes two strings, separated by a column divider.  NULL strings are
493  * treated as empty columns.  If the divider is the ` ' character, the
494  * second column is not printed (-l flag).  In this case, the second
495  * string must be NULL.  When the second column is NULL, the divider
496  * does not print the trailing space following the divider character.
497  *
498  * Takes into account that tabs can take multiple columns.
499  */
500 static void
501 println(const char *s1, const char div, const char *s2)
502 {
503 	size_t col;
504 
505 	/* Print first column.  Skips if s1 == NULL. */
506 	col = 0;
507 	if (s1) {
508 		/* Skip angle bracket and space. */
509 		printcol(s1, &col, width);
510 
511 	}
512 
513 	/* Only print left column. */
514 	if (div == ' ' && !s2) {
515 		putchar('\n');
516 		return;
517 	}
518 
519 	/* Otherwise, we pad this column up to width. */
520 	for (; col < width; ++col)
521 		putchar(' ');
522 
523 	/*
524 	 * Print column divider.  If there is no second column, we don't
525 	 * need to add the space for padding.
526 	 */
527 	if (!s2) {
528 		printf(" %c\n", div);
529 		return;
530 	}
531 	printf(" %c ", div);
532 	col += 3;
533 
534 	/* Skip angle bracket and space. */
535 	printcol(s2, &col, line_width);
536 
537 	putchar('\n');
538 }
539 
540 /*
541  * Reads a line from file and returns as a string.  If EOF is reached,
542  * NULL is returned.  The returned string must be freed afterwards.
543  */
544 static char *
545 xfgets(FILE *file)
546 {
547 	const char delim[3] = {'\0', '\0', '\0'};
548 	char *s;
549 
550 	/* XXX - Is this necessary? */
551 	clearerr(file);
552 
553 	if (!(s = fparseln(file, NULL, NULL, delim, 0)) &&
554 	    ferror(file))
555 		err(2, "error reading file");
556 
557 	if (!s) {
558 		return (NULL);
559 	}
560 
561 	return (s);
562 }
563 
564 /*
565  * Parse ed commands from diffpipe and print lines from file1 (lines
566  * to change or delete) or file2 (lines to add or change).
567  * Returns EOF or 0.
568  */
569 static int
570 parsecmd(FILE *diffpipe, FILE *file1, FILE *file2)
571 {
572 	size_t file1start, file1end, file2start, file2end, n;
573 	/* ed command line and pointer to characters in line */
574 	char *line, *p, *q;
575 	const char *errstr;
576 	char c, cmd;
577 
578 	/* Read ed command. */
579 	if (!(line = xfgets(diffpipe)))
580 		return (EOF);
581 
582 	p = line;
583 	/* Go to character after line number. */
584 	while (isdigit(*p))
585 		++p;
586 	c = *p;
587 	*p++ = 0;
588 	file1start = strtonum(line, 0, INT_MAX, &errstr);
589 	if (errstr)
590 		errx(2, "file1 start is %s: %s", errstr, line);
591 
592 	/* A range is specified for file1. */
593 	if (c == ',') {
594 
595 		q = p;
596 		/* Go to character after file2end. */
597 		while (isdigit(*p))
598 			++p;
599 		c = *p;
600 		*p++ = 0;
601 		file1end = strtonum(q, 0, INT_MAX, &errstr);
602 		if (errstr)
603 			errx(2, "file1 end is %s: %s", errstr, line);
604 		if (file1start > file1end)
605 			errx(2, "invalid line range in file1: %s", line);
606 
607 	} else
608 		file1end = file1start;
609 
610 	cmd = c;
611 	/* Check that cmd is valid. */
612 	if (!(cmd == 'a' || cmd == 'c' || cmd == 'd'))
613 		errx(2, "ed command not recognized: %c: %s", cmd, line);
614 
615 	q = p;
616 	/* Go to character after line number. */
617 	while (isdigit(*p))
618 		++p;
619 	c = *p;
620 	*p++ = 0;
621 	file2start = strtonum(q, 0, INT_MAX, &errstr);
622 	if (errstr)
623 		errx(2, "file2 start is %s: %s", errstr, line);
624 
625 	/*
626 	 * There should either be a comma signifying a second line
627 	 * number or the line should just end here.
628 	 */
629 	if (c != ',' && c != '\0')
630 		errx(2, "invalid line range in file2: %c: %s", c, line);
631 
632 	if (c == ',') {
633 
634 		file2end = strtonum(p, 0, INT_MAX, &errstr);
635 		if (errstr)
636 			errx(2, "file2 end is %s: %s", errstr, line);
637 		if (file2start >= file2end)
638 			errx(2, "invalid line range in file2: %s", line);
639 	} else
640 		file2end = file2start;
641 
642 	/* Appends happen _after_ stated line. */
643 	if (cmd == 'a') {
644 		if (file1start != file1end)
645 			errx(2, "append cannot have a file1 range: %s",
646 			    line);
647 		if (file1start == SIZE_T_MAX)
648 			errx(2, "file1 line range too high: %s", line);
649 		file1start = ++file1end;
650 	}
651 	/*
652 	 * I'm not sure what the deal is with the line numbers for
653 	 * deletes, though.
654 	 */
655 	else if (cmd == 'd') {
656 		if (file2start != file2end)
657 			errx(2, "delete cannot have a file2 range: %s",
658 			    line);
659 		if (file2start == SIZE_T_MAX)
660 			errx(2, "file2 line range too high: %s", line);
661 		file2start = ++file2end;
662 	}
663 
664 	/*
665 	 * Continue reading file1 and file2 until we reach line numbers
666 	 * specified by diff.  Should only happen with -I flag.
667 	 */
668 	for (; file1ln < file1start && file2ln < file2start;
669 	    ++file1ln, ++file2ln) {
670 		char *s1, *s2;
671 
672 		if (!(s1 = xfgets(file1)))
673 			errx(2, "file1 shorter than expected");
674 		if (!(s2 = xfgets(file2)))
675 			errx(2, "file2 shorter than expected");
676 
677 		/* If the -l flag was specified, print only left column. */
678 		if (lflag) {
679 			free(s2);
680 			/*
681 			 * XXX - If -l and -I are both specified, all
682 			 * unchanged or ignored lines are shown with a
683 			 * `(' divider.  This matches GNU sdiff, but I
684 			 * believe it is a bug.  Just check out:
685 			 * gsdiff -l -I '^$' samefile samefile.
686 			 */
687 			if (Iflag)
688 				enqueue(s1, '(', NULL);
689 			else
690 				enqueue(s1, ' ', NULL);
691 		} else
692 			enqueue(s1, ' ', s2);
693 	}
694 	/* Ignore deleted lines. */
695 	for (; file1ln < file1start; ++file1ln) {
696 		char *s;
697 
698 		if (!(s = xfgets(file1)))
699 			errx(2, "file1 shorter than expected");
700 
701 		enqueue(s, '(', NULL);
702 	}
703 	/* Ignore added lines. */
704 	for (; file2ln < file2start; ++file2ln) {
705 		char *s;
706 
707 		if (!(s = xfgets(file2)))
708 			errx(2, "file2 shorter than expected");
709 
710 		/* If -l flag was given, don't print right column. */
711 		if (lflag)
712 			free(s);
713 		else
714 			enqueue(NULL, ')', s);
715 	}
716 
717 	/* Process unmodified or skipped lines. */
718 	processq();
719 
720 	switch (cmd) {
721 	case 'a':
722 		printa(file2, file2end);
723 		n = file2end - file2start + 1;
724 		break;
725 
726 	case 'c':
727 		printc(file1, file1end, file2, file2end);
728 		n = file1end - file1start + 1 + 1 + file2end - file2start + 1;
729 		break;
730 
731 	case 'd':
732 		printd(file1, file1end);
733 		n = file1end - file1start + 1;
734 		break;
735 
736 	default:
737 		errx(2, "invalid diff command: %c: %s", cmd, line);
738 	}
739 
740 	/* Skip to next ed line. */
741 	while (n--)
742 		if (!xfgets(diffpipe))
743 			errx(2, "diff ended early");
744 
745 	return (0);
746 }
747 
748 /*
749  * Queues up a diff line.
750  */
751 static void
752 enqueue(char *left, char div, char *right)
753 {
754 	struct diffline *diffp;
755 
756 	if (!(diffp = malloc(sizeof(struct diffline))))
757 		err(2, "enqueue");
758 	diffp->left = left;
759 	diffp->div = div;
760 	diffp->right = right;
761 	SIMPLEQ_INSERT_TAIL(&diffhead, diffp, diffentries);
762 }
763 
764 /*
765  * Free a diffline structure and its elements.
766  */
767 static void
768 freediff(struct diffline *diffp)
769 {
770 	free(diffp->left);
771 	free(diffp->right);
772 	free(diffp);
773 }
774 
775 /*
776  * Append second string into first.  Repeated appends to the same string
777  * are cached, making this an O(n) function, where n = strlen(append).
778  */
779 static void
780 astrcat(char **s, const char *append)
781 {
782 	/* Length of string in previous run. */
783 	static size_t offset = 0;
784 	size_t newsiz;
785 	/*
786 	 * String from previous run.  Compared to *s to see if we are
787 	 * dealing with the same string.  If so, we can use offset.
788 	 */
789 	static const char *oldstr = NULL;
790 	char *newstr;
791 
792 
793 	/*
794 	 * First string is NULL, so just copy append.
795 	 */
796 	if (!*s) {
797 		if (!(*s = strdup(append)))
798 			err(2, "astrcat");
799 
800 		/* Keep track of string. */
801 		offset = strlen(*s);
802 		oldstr = *s;
803 
804 		return;
805 	}
806 
807 	/*
808 	 * *s is a string so concatenate.
809 	 */
810 
811 	/* Did we process the same string in the last run? */
812 	/*
813 	 * If this is a different string from the one we just processed
814 	 * cache new string.
815 	 */
816 	if (oldstr != *s) {
817 		offset = strlen(*s);
818 		oldstr = *s;
819 	}
820 
821 	/* Size = strlen(*s) + \n + strlen(append) + '\0'. */
822 	newsiz = offset + 1 + strlen(append) + 1;
823 
824 	/* Resize *s to fit new string. */
825 	newstr = realloc(*s, newsiz);
826 	if (newstr == NULL)
827 		err(2, "astrcat");
828 	*s = newstr;
829 
830 	/* *s + offset should be end of string. */
831 	/* Concatenate. */
832 	strlcpy(*s + offset, "\n", newsiz - offset);
833 	strlcat(*s + offset, append, newsiz - offset);
834 
835 	/* New string length should be exactly newsiz - 1 characters. */
836 	/* Store generated string's values. */
837 	offset = newsiz - 1;
838 	oldstr = *s;
839 }
840 
841 /*
842  * Process diff set queue, printing, prompting, and saving each diff
843  * line stored in queue.
844  */
845 static void
846 processq(void)
847 {
848 	struct diffline *diffp;
849 	char divc, *left, *right;
850 
851 	/* Don't process empty queue. */
852 	if (SIMPLEQ_EMPTY(&diffhead))
853 		return;
854 
855 	/* Remember the divider. */
856 	divc = SIMPLEQ_FIRST(&diffhead)->div;
857 
858 	left = NULL;
859 	right = NULL;
860 	/*
861 	 * Go through set of diffs, concatenating each line in left or
862 	 * right column into two long strings, `left' and `right'.
863 	 */
864 	SIMPLEQ_FOREACH(diffp, &diffhead, diffentries) {
865 		/*
866 		 * Print changed lines if -s was given,
867 		 * print all lines if -s was not given.
868 		 */
869 		if (!sflag || diffp->div == '|' || diffp->div == '<' ||
870 		    diffp->div == '>')
871 			println(diffp->left, diffp->div, diffp->right);
872 
873 		/* Append new lines to diff set. */
874 		if (diffp->left)
875 			astrcat(&left, diffp->left);
876 		if (diffp->right)
877 			astrcat(&right, diffp->right);
878 	}
879 
880 	/* Empty queue and free each diff line and its elements. */
881 	while (!SIMPLEQ_EMPTY(&diffhead)) {
882 		diffp = SIMPLEQ_FIRST(&diffhead);
883 		SIMPLEQ_REMOVE_HEAD(&diffhead, diffentries);
884 		freediff(diffp);
885 	}
886 
887 	/* Write to outfile, prompting user if lines are different. */
888 	if (outfile)
889 		switch (divc) {
890 		case ' ': case '(': case ')':
891 			fprintf(outfile, "%s\n", left);
892 			break;
893 		case '|': case '<': case '>':
894 			prompt(left, right);
895 			break;
896 		default:
897 			errx(2, "invalid divider: %c", divc);
898 		}
899 
900 	/* Free left and right. */
901 	free(left);
902 	free(right);
903 }
904 
905 /*
906  * Print lines following an (a)ppend command.
907  */
908 static void
909 printa(FILE *file, size_t line2)
910 {
911 	char *line;
912 
913 	for (; file2ln <= line2; ++file2ln) {
914 		if (!(line = xfgets(file)))
915 			errx(2, "append ended early");
916 		enqueue(NULL, '>', line);
917 	}
918 
919 	processq();
920 }
921 
922 /*
923  * Print lines following a (c)hange command, from file1ln to file1end
924  * and from file2ln to file2end.
925  */
926 static void
927 printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end)
928 {
929 	struct fileline {
930 		SIMPLEQ_ENTRY(fileline)	 fileentries;
931 		char			*line;
932 	};
933 	SIMPLEQ_HEAD(, fileline) delqhead = SIMPLEQ_HEAD_INITIALIZER(delqhead);
934 
935 	/* Read lines to be deleted. */
936 	for (; file1ln <= file1end; ++file1ln) {
937 		struct fileline *linep;
938 		char *line1;
939 
940 		/* Read lines from both. */
941 		if (!(line1 = xfgets(file1)))
942 			errx(2, "error reading file1 in delete in change");
943 
944 		/* Add to delete queue. */
945 		if (!(linep = malloc(sizeof(struct fileline))))
946 			err(2, "printc");
947 		linep->line = line1;
948 		SIMPLEQ_INSERT_TAIL(&delqhead, linep, fileentries);
949 	}
950 
951 	/* Process changed lines.. */
952 	for (; !SIMPLEQ_EMPTY(&delqhead) && file2ln <= file2end;
953 	    ++file2ln) {
954 		struct fileline *del;
955 		char *add;
956 
957 		/* Get add line. */
958 		if (!(add = xfgets(file2)))
959 			errx(2, "error reading add in change");
960 
961 		del = SIMPLEQ_FIRST(&delqhead);
962 		enqueue(del->line, '|', add);
963 		SIMPLEQ_REMOVE_HEAD(&delqhead, fileentries);
964 		/*
965 		 * Free fileline structure but not its elements since
966 		 * they are queued up.
967 		 */
968 		free(del);
969 	}
970 	processq();
971 
972 	/* Process remaining lines to add. */
973 	for (; file2ln <= file2end; ++file2ln) {
974 		char *add;
975 
976 		/* Get add line. */
977 		if (!(add = xfgets(file2)))
978 			errx(2, "error reading add in change");
979 
980 		enqueue(NULL, '>', add);
981 	}
982 	processq();
983 
984 	/* Process remaining lines to delete. */
985 	while (!SIMPLEQ_EMPTY(&delqhead)) {
986 		struct fileline *filep;
987 
988 		filep = SIMPLEQ_FIRST(&delqhead);
989 		enqueue(filep->line, '<', NULL);
990 		SIMPLEQ_REMOVE_HEAD(&delqhead, fileentries);
991 		free(filep);
992 	}
993 	processq();
994 }
995 
996 /*
997  * Print deleted lines from file, from file1ln to file1end.
998  */
999 static void
1000 printd(FILE *file1, size_t file1end)
1001 {
1002 	char *line1;
1003 
1004 	/* Print out lines file1ln to line2. */
1005 	for (; file1ln <= file1end; ++file1ln) {
1006 		/* XXX - Why can't this handle stdin? */
1007 		if (!(line1 = xfgets(file1)))
1008 			errx(2, "file1 ended early in delete");
1009 		enqueue(line1, '<', NULL);
1010 	}
1011 	processq();
1012 }
1013 
1014 /*
1015  * Interactive mode usage.
1016  */
1017 static void
1018 int_usage(void)
1019 {
1020 	puts("e:\tedit blank diff\n"
1021 	    "eb:\tedit both diffs concatenated\n"
1022 	    "el:\tedit left diff\n"
1023 	    "er:\tedit right diff\n"
1024 	    "l | 1:\tchoose left diff\n"
1025 	    "r | 2:\tchoose right diff\n"
1026 	    "s:\tsilent mode--don't print identical lines\n"
1027 	    "v:\tverbose mode--print identical lines\n"
1028 	    "q:\tquit");
1029 }
1030 
1031 static void
1032 usage(void)
1033 {
1034 	extern char *__progname;
1035 
1036 	fprintf(stderr,
1037 	    "usage: %s [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n",
1038 	    __progname);
1039 	exit(2);
1040 }
1041