1 /* $NetBSD: util.c,v 1.2 2020/12/13 00:04:40 roy Exp $ */
2
3 /* Support routines for GNU DIFF.
4
5 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002
6 Free Software Foundation, Inc.
7
8 This file is part of GNU DIFF.
9
10 GNU DIFF is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 GNU DIFF is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; see the file COPYING.
22 If not, write to the Free Software Foundation,
23 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
24
25 #include "diff.h"
26 #include <dirname.h>
27 #include <error.h>
28 #include <quotesys.h>
29 #include <regex.h>
30 #include <xalloc.h>
31
32 char const pr_program[] = PR_PROGRAM;
33
34 /* Queue up one-line messages to be printed at the end,
35 when -l is specified. Each message is recorded with a `struct msg'. */
36
37 struct msg
38 {
39 struct msg *next;
40 char args[1]; /* Format + 4 args, each '\0' terminated, concatenated. */
41 };
42
43 /* Head of the chain of queues messages. */
44
45 static struct msg *msg_chain;
46
47 /* Tail of the chain of queues messages. */
48
49 static struct msg **msg_chain_end = &msg_chain;
50
51 /* Use when a system call returns non-zero status.
52 NAME should normally be the file name. */
53
54 void
perror_with_name(char const * name)55 perror_with_name (char const *name)
56 {
57 error (0, errno, "%s", name);
58 }
59
60 /* Use when a system call returns non-zero status and that is fatal. */
61
62 void
pfatal_with_name(char const * name)63 pfatal_with_name (char const *name)
64 {
65 int e = errno;
66 print_message_queue ();
67 error (EXIT_TROUBLE, e, "%s", name);
68 abort ();
69 }
70
71 /* Print an error message containing MSGID, then exit. */
72
73 void
fatal(char const * msgid)74 fatal (char const *msgid)
75 {
76 print_message_queue ();
77 error (EXIT_TROUBLE, 0, "%s", _(msgid));
78 abort ();
79 }
80
81 /* Like printf, except if -l in effect then save the message and print later.
82 This is used for things like "Only in ...". */
83
84 void
message(char const * format_msgid,char const * arg1,char const * arg2)85 message (char const *format_msgid, char const *arg1, char const *arg2)
86 {
87 message5 (format_msgid, arg1, arg2, 0, 0);
88 }
89
90 void
message5(char const * format_msgid,char const * arg1,char const * arg2,char const * arg3,char const * arg4)91 message5 (char const *format_msgid, char const *arg1, char const *arg2,
92 char const *arg3, char const *arg4)
93 {
94 if (paginate)
95 {
96 char *p;
97 char const *arg[5];
98 int i;
99 size_t size[5];
100 size_t total_size = offsetof (struct msg, args);
101 struct msg *new;
102
103 arg[0] = format_msgid;
104 arg[1] = arg1;
105 arg[2] = arg2;
106 arg[3] = arg3 ? arg3 : "";
107 arg[4] = arg4 ? arg4 : "";
108
109 for (i = 0; i < 5; i++)
110 total_size += size[i] = strlen (arg[i]) + 1;
111
112 new = xmalloc (total_size);
113
114 for (i = 0, p = new->args; i < 5; p += size[i++])
115 memcpy (p, arg[i], size[i]);
116
117 *msg_chain_end = new;
118 new->next = 0;
119 msg_chain_end = &new->next;
120 }
121 else
122 {
123 if (sdiff_merge_assist)
124 putchar (' ');
125 printf (_(format_msgid), arg1, arg2, arg3, arg4);
126 }
127 }
128
129 /* Output all the messages that were saved up by calls to `message'. */
130
131 void
print_message_queue(void)132 print_message_queue (void)
133 {
134 char const *arg[5];
135 int i;
136 struct msg *m = msg_chain;
137
138 while (m)
139 {
140 struct msg *next = m->next;
141 arg[0] = m->args;
142 for (i = 0; i < 4; i++)
143 arg[i + 1] = arg[i] + strlen (arg[i]) + 1;
144 printf (_(arg[0]), arg[1], arg[2], arg[3], arg[4]);
145 free (m);
146 m = next;
147 }
148 }
149
150 /* Call before outputting the results of comparing files NAME0 and NAME1
151 to set up OUTFILE, the stdio stream for the output to go to.
152
153 Usually, OUTFILE is just stdout. But when -l was specified
154 we fork off a `pr' and make OUTFILE a pipe to it.
155 `pr' then outputs to our stdout. */
156
157 static char const *current_name0;
158 static char const *current_name1;
159 static bool currently_recursive;
160
161 void
setup_output(char const * name0,char const * name1,bool recursive)162 setup_output (char const *name0, char const *name1, bool recursive)
163 {
164 current_name0 = name0;
165 current_name1 = name1;
166 currently_recursive = recursive;
167 outfile = 0;
168 }
169
170 #if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
171 static pid_t pr_pid;
172 #endif
173
174 void
begin_output(void)175 begin_output (void)
176 {
177 char *name;
178
179 if (outfile != 0)
180 return;
181
182 /* Construct the header of this piece of diff. */
183 name = xmalloc (strlen (current_name0) + strlen (current_name1)
184 + strlen (switch_string) + 7);
185
186 /* POSIX 1003.1-2001 specifies this format. But there are some bugs in
187 the standard: it says that we must print only the last component
188 of the pathnames, and it requires two spaces after "diff" if
189 there are no options. These requirements are silly and do not
190 match historical practice. */
191 sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1);
192
193 if (paginate)
194 {
195 if (fflush (stdout) != 0)
196 pfatal_with_name (_("write failed"));
197
198 /* Make OUTFILE a pipe to a subsidiary `pr'. */
199 {
200 #if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
201 int pipes[2];
202
203 if (pipe (pipes) != 0)
204 pfatal_with_name ("pipe");
205
206 pr_pid = vfork ();
207 if (pr_pid < 0)
208 pfatal_with_name ("fork");
209
210 if (pr_pid == 0)
211 {
212 close (pipes[1]);
213 if (pipes[0] != STDIN_FILENO)
214 {
215 if (dup2 (pipes[0], STDIN_FILENO) < 0)
216 pfatal_with_name ("dup2");
217 close (pipes[0]);
218 }
219
220 execl (pr_program, pr_program, "-h", name, NULL);
221 _exit (errno == ENOEXEC ? 126 : 127);
222 }
223 else
224 {
225 close (pipes[0]);
226 outfile = fdopen (pipes[1], "w");
227 if (!outfile)
228 pfatal_with_name ("fdopen");
229 }
230 #else
231 char *command = xmalloc (sizeof pr_program - 1 + 7
232 + quote_system_arg ((char *) 0, name) + 1);
233 char *p;
234 sprintf (command, "%s -f -h ", pr_program);
235 p = command + sizeof pr_program - 1 + 7;
236 p += quote_system_arg (p, name);
237 *p = 0;
238 errno = 0;
239 outfile = popen (command, "w");
240 if (!outfile)
241 pfatal_with_name (command);
242 free (command);
243 #endif
244 }
245 }
246 else
247 {
248
249 /* If -l was not specified, output the diff straight to `stdout'. */
250
251 outfile = stdout;
252
253 /* If handling multiple files (because scanning a directory),
254 print which files the following output is about. */
255 if (currently_recursive)
256 printf ("%s\n", name);
257 }
258
259 free (name);
260
261 /* A special header is needed at the beginning of context output. */
262 switch (output_style)
263 {
264 case OUTPUT_CONTEXT:
265 print_context_header (files, 0);
266 break;
267
268 case OUTPUT_UNIFIED:
269 print_context_header (files, 1);
270 break;
271
272 default:
273 break;
274 }
275 }
276
277 /* Call after the end of output of diffs for one file.
278 Close OUTFILE and get rid of the `pr' subfork. */
279
280 void
finish_output(void)281 finish_output (void)
282 {
283 if (outfile != 0 && outfile != stdout)
284 {
285 int wstatus;
286 int werrno = 0;
287 if (ferror (outfile))
288 fatal ("write failed");
289 #if ! (HAVE_WORKING_FORK || HAVE_WORKING_VFORK)
290 wstatus = pclose (outfile);
291 if (wstatus == -1)
292 werrno = errno;
293 #else
294 if (fclose (outfile) != 0)
295 pfatal_with_name (_("write failed"));
296 if (waitpid (pr_pid, &wstatus, 0) < 0)
297 pfatal_with_name ("waitpid");
298 #endif
299 if (! werrno && WIFEXITED (wstatus) && WEXITSTATUS (wstatus) == 127)
300 error (EXIT_TROUBLE, 0, _("subsidiary program `%s' not found"),
301 pr_program);
302 if (wstatus != 0)
303 error (EXIT_TROUBLE, werrno, _("subsidiary program `%s' failed"),
304 pr_program);
305 }
306
307 outfile = 0;
308 }
309
310 /* Compare two lines (typically one from each input file)
311 according to the command line options.
312 For efficiency, this is invoked only when the lines do not match exactly
313 but an option like -i might cause us to ignore the difference.
314 Return nonzero if the lines differ. */
315
316 bool
lines_differ(char const * s1,char const * s2)317 lines_differ (char const *s1, char const *s2)
318 {
319 register unsigned char const *t1 = (unsigned char const *) s1;
320 register unsigned char const *t2 = (unsigned char const *) s2;
321 size_t column = 0;
322
323 while (1)
324 {
325 register unsigned char c1 = *t1++;
326 register unsigned char c2 = *t2++;
327
328 /* Test for exact char equality first, since it's a common case. */
329 if (c1 != c2)
330 {
331 switch (ignore_white_space)
332 {
333 case IGNORE_ALL_SPACE:
334 /* For -w, just skip past any white space. */
335 while (ISSPACE (c1) && c1 != '\n') c1 = *t1++;
336 while (ISSPACE (c2) && c2 != '\n') c2 = *t2++;
337 break;
338
339 case IGNORE_SPACE_CHANGE:
340 /* For -b, advance past any sequence of white space in
341 line 1 and consider it just one space, or nothing at
342 all if it is at the end of the line. */
343 if (ISSPACE (c1))
344 {
345 while (c1 != '\n')
346 {
347 c1 = *t1++;
348 if (! ISSPACE (c1))
349 {
350 --t1;
351 c1 = ' ';
352 break;
353 }
354 }
355 }
356
357 /* Likewise for line 2. */
358 if (ISSPACE (c2))
359 {
360 while (c2 != '\n')
361 {
362 c2 = *t2++;
363 if (! ISSPACE (c2))
364 {
365 --t2;
366 c2 = ' ';
367 break;
368 }
369 }
370 }
371
372 if (c1 != c2)
373 {
374 /* If we went too far when doing the simple test
375 for equality, go back to the first non-white-space
376 character in both sides and try again. */
377 if (c2 == ' ' && c1 != '\n'
378 && (unsigned char const *) s1 + 1 < t1
379 && ISSPACE (t1[-2]))
380 {
381 --t1;
382 continue;
383 }
384 if (c1 == ' ' && c2 != '\n'
385 && (unsigned char const *) s2 + 1 < t2
386 && ISSPACE (t2[-2]))
387 {
388 --t2;
389 continue;
390 }
391 }
392
393 break;
394
395 case IGNORE_TAB_EXPANSION:
396 if ((c1 == ' ' && c2 == '\t')
397 || (c1 == '\t' && c2 == ' '))
398 {
399 size_t column2 = column;
400 for (;; c1 = *t1++)
401 {
402 if (c1 == ' ')
403 column++;
404 else if (c1 == '\t')
405 column += TAB_WIDTH - column % TAB_WIDTH;
406 else
407 break;
408 }
409 for (;; c2 = *t2++)
410 {
411 if (c2 == ' ')
412 column2++;
413 else if (c2 == '\t')
414 column2 += TAB_WIDTH - column2 % TAB_WIDTH;
415 else
416 break;
417 }
418 if (column != column2)
419 return 1;
420 }
421 break;
422
423 case IGNORE_NO_WHITE_SPACE:
424 break;
425 }
426
427 /* Lowercase all letters if -i is specified. */
428
429 if (ignore_case)
430 {
431 c1 = TOLOWER (c1);
432 c2 = TOLOWER (c2);
433 }
434
435 if (c1 != c2)
436 break;
437 }
438 if (c1 == '\n')
439 return 0;
440
441 column += c1 == '\t' ? TAB_WIDTH - column % TAB_WIDTH : 1;
442 }
443
444 return 1;
445 }
446
447 /* Find the consecutive changes at the start of the script START.
448 Return the last link before the first gap. */
449
450 struct change *
find_change(struct change * start)451 find_change (struct change *start)
452 {
453 return start;
454 }
455
456 struct change *
find_reverse_change(struct change * start)457 find_reverse_change (struct change *start)
458 {
459 return start;
460 }
461
462 /* Divide SCRIPT into pieces by calling HUNKFUN and
463 print each piece with PRINTFUN.
464 Both functions take one arg, an edit script.
465
466 HUNKFUN is called with the tail of the script
467 and returns the last link that belongs together with the start
468 of the tail.
469
470 PRINTFUN takes a subscript which belongs together (with a null
471 link at the end) and prints it. */
472
473 void
print_script(struct change * script,struct change * (* hunkfun)(struct change *),void (* printfun)(struct change *))474 print_script (struct change *script,
475 struct change * (*hunkfun) (struct change *),
476 void (*printfun) (struct change *))
477 {
478 struct change *next = script;
479
480 while (next)
481 {
482 struct change *this, *end;
483
484 /* Find a set of changes that belong together. */
485 this = next;
486 end = (*hunkfun) (next);
487
488 /* Disconnect them from the rest of the changes,
489 making them a hunk, and remember the rest for next iteration. */
490 next = end->link;
491 end->link = 0;
492 #ifdef DEBUG
493 debug_script (this);
494 #endif
495
496 /* Print this hunk. */
497 (*printfun) (this);
498
499 /* Reconnect the script so it will all be freed properly. */
500 end->link = next;
501 }
502 }
503
504 /* Print the text of a single line LINE,
505 flagging it with the characters in LINE_FLAG (which say whether
506 the line is inserted, deleted, changed, etc.). */
507
508 void
print_1_line(char const * line_flag,char const * const * line)509 print_1_line (char const *line_flag, char const *const *line)
510 {
511 char const *base = line[0], *limit = line[1]; /* Help the compiler. */
512 FILE *out = outfile; /* Help the compiler some more. */
513 char const *flag_format = 0;
514
515 /* If -T was specified, use a Tab between the line-flag and the text.
516 Otherwise use a Space (as Unix diff does).
517 Print neither space nor tab if line-flags are empty. */
518
519 if (line_flag && *line_flag)
520 {
521 flag_format = initial_tab ? "%s\t" : "%s ";
522 fprintf (out, flag_format, line_flag);
523 }
524
525 output_1_line (base, limit, flag_format, line_flag);
526
527 if ((!line_flag || line_flag[0]) && limit[-1] != '\n')
528 fprintf (out, "\n\\ %s\n", _("No newline at end of file"));
529 }
530
531 /* Output a line from BASE up to LIMIT.
532 With -t, expand white space characters to spaces, and if FLAG_FORMAT
533 is nonzero, output it with argument LINE_FLAG after every
534 internal carriage return, so that tab stops continue to line up. */
535
536 void
output_1_line(char const * base,char const * limit,char const * flag_format,char const * line_flag)537 output_1_line (char const *base, char const *limit, char const *flag_format,
538 char const *line_flag)
539 {
540 if (!expand_tabs)
541 fwrite (base, limit - base, 1, outfile);
542 else
543 {
544 register FILE *out = outfile;
545 register unsigned char c;
546 register char const *t = base;
547 register unsigned int column = 0;
548
549 while (t < limit)
550 switch ((c = *t++))
551 {
552 case '\t':
553 {
554 unsigned int spaces = TAB_WIDTH - column % TAB_WIDTH;
555 column += spaces;
556 do
557 putc (' ', out);
558 while (--spaces);
559 }
560 break;
561
562 case '\r':
563 putc (c, out);
564 if (flag_format && t < limit && *t != '\n')
565 fprintf (out, flag_format, line_flag);
566 column = 0;
567 break;
568
569 case '\b':
570 if (column == 0)
571 continue;
572 column--;
573 putc (c, out);
574 break;
575
576 default:
577 if (ISPRINT (c))
578 column++;
579 putc (c, out);
580 break;
581 }
582 }
583 }
584
585 char const change_letter[] = { 0, 'd', 'a', 'c' };
586
587 /* Translate an internal line number (an index into diff's table of lines)
588 into an actual line number in the input file.
589 The internal line number is I. FILE points to the data on the file.
590
591 Internal line numbers count from 0 starting after the prefix.
592 Actual line numbers count from 1 within the entire file. */
593
594 lin
translate_line_number(struct file_data const * file,lin i)595 translate_line_number (struct file_data const *file, lin i)
596 {
597 return i + file->prefix_lines + 1;
598 }
599
600 /* Translate a line number range. This is always done for printing,
601 so for convenience translate to long rather than lin, so that the
602 caller can use printf with "%ld" without casting. */
603
604 void
translate_range(struct file_data const * file,lin a,lin b,long * aptr,long * bptr)605 translate_range (struct file_data const *file,
606 lin a, lin b,
607 long *aptr, long *bptr)
608 {
609 *aptr = translate_line_number (file, a - 1) + 1;
610 *bptr = translate_line_number (file, b + 1) - 1;
611 }
612
613 /* Print a pair of line numbers with SEPCHAR, translated for file FILE.
614 If the two numbers are identical, print just one number.
615
616 Args A and B are internal line numbers.
617 We print the translated (real) line numbers. */
618
619 void
print_number_range(char sepchar,struct file_data * file,lin a,lin b)620 print_number_range (char sepchar, struct file_data *file, lin a, lin b)
621 {
622 long trans_a, trans_b;
623 translate_range (file, a, b, &trans_a, &trans_b);
624
625 /* Note: we can have B < A in the case of a range of no lines.
626 In this case, we should print the line number before the range,
627 which is B. */
628 if (trans_b > trans_a)
629 fprintf (outfile, "%ld%c%ld", trans_a, sepchar, trans_b);
630 else
631 fprintf (outfile, "%ld", trans_b);
632 }
633
634 /* Look at a hunk of edit script and report the range of lines in each file
635 that it applies to. HUNK is the start of the hunk, which is a chain
636 of `struct change'. The first and last line numbers of file 0 are stored in
637 *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1.
638 Note that these are internal line numbers that count from 0.
639
640 If no lines from file 0 are deleted, then FIRST0 is LAST0+1.
641
642 Return UNCHANGED if only ignorable lines are inserted or deleted,
643 OLD if lines of file 0 are deleted,
644 NEW if lines of file 1 are inserted,
645 and CHANGED if both kinds of changes are found. */
646
647 enum changes
analyze_hunk(struct change * hunk,lin * first0,lin * last0,lin * first1,lin * last1)648 analyze_hunk (struct change *hunk,
649 lin *first0, lin *last0,
650 lin *first1, lin *last1)
651 {
652 struct change *next;
653 lin l0, l1;
654 lin show_from, show_to;
655 lin i;
656 bool trivial = ignore_blank_lines || ignore_regexp.fastmap;
657 size_t trivial_length = (int) ignore_blank_lines - 1;
658 /* If 0, ignore zero-length lines;
659 if SIZE_MAX, do not ignore lines just because of their length. */
660
661 char const * const *linbuf0 = files[0].linbuf; /* Help the compiler. */
662 char const * const *linbuf1 = files[1].linbuf;
663
664 show_from = show_to = 0;
665
666 *first0 = hunk->line0;
667 *first1 = hunk->line1;
668
669 next = hunk;
670 do
671 {
672 l0 = next->line0 + next->deleted - 1;
673 l1 = next->line1 + next->inserted - 1;
674 show_from += next->deleted;
675 show_to += next->inserted;
676
677 for (i = next->line0; i <= l0 && trivial; i++)
678 {
679 char const *line = linbuf0[i];
680 size_t len = linbuf0[i + 1] - line - 1;
681 if (len != trivial_length
682 && (! ignore_regexp.fastmap
683 || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
684 trivial = 0;
685 }
686
687 for (i = next->line1; i <= l1 && trivial; i++)
688 {
689 char const *line = linbuf1[i];
690 size_t len = linbuf1[i + 1] - line - 1;
691 if (len != trivial_length
692 && (! ignore_regexp.fastmap
693 || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
694 trivial = 0;
695 }
696 }
697 while ((next = next->link) != 0);
698
699 *last0 = l0;
700 *last1 = l1;
701
702 /* If all inserted or deleted lines are ignorable,
703 tell the caller to ignore this hunk. */
704
705 if (trivial)
706 return UNCHANGED;
707
708 return (show_from ? OLD : UNCHANGED) | (show_to ? NEW : UNCHANGED);
709 }
710
711 /* Concatenate three strings, returning a newly malloc'd string. */
712
713 char *
concat(char const * s1,char const * s2,char const * s3)714 concat (char const *s1, char const *s2, char const *s3)
715 {
716 char *new = xmalloc (strlen (s1) + strlen (s2) + strlen (s3) + 1);
717 sprintf (new, "%s%s%s", s1, s2, s3);
718 return new;
719 }
720
721 /* Yield a new block of SIZE bytes, initialized to zero. */
722
723 void *
zalloc(size_t size)724 zalloc (size_t size)
725 {
726 void *p = xmalloc (size);
727 memset (p, 0, size);
728 return p;
729 }
730
731 /* Yield the newly malloc'd pathname
732 of the file in DIR whose filename is FILE. */
733
734 char *
dir_file_pathname(char const * dir,char const * file)735 dir_file_pathname (char const *dir, char const *file)
736 {
737 char const *base = base_name (dir);
738 bool omit_slash = !*base || base[strlen (base) - 1] == '/';
739 return concat (dir, "/" + omit_slash, file);
740 }
741
742 void
debug_script(struct change * sp)743 debug_script (struct change *sp)
744 {
745 fflush (stdout);
746
747 for (; sp; sp = sp->link)
748 {
749 long line0 = sp->line0;
750 long line1 = sp->line1;
751 long deleted = sp->deleted;
752 long inserted = sp->inserted;
753 fprintf (stderr, "%3ld %3ld delete %ld insert %ld\n",
754 line0, line1, deleted, inserted);
755 }
756
757 fflush (stderr);
758 }
759