xref: /dflybsd-src/contrib/diffutils/src/util.c (revision c0d274d062fd959993bf623f25f7cb6a8a676c4e)
1 /* Support routines for GNU DIFF.
2 
3    Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006, 2009-2010
4    Free Software Foundation, Inc.
5 
6    This file is part of GNU DIFF.
7 
8    This program is free software: you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation, either version 3 of the License, or
11    (at your option) any later version.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
20 
21 #include "diff.h"
22 #include <dirname.h>
23 #include <error.h>
24 #include <sh-quote.h>
25 #include <xalloc.h>
26 
27 char const pr_program[] = PR_PROGRAM;
28 
29 /* Queue up one-line messages to be printed at the end,
30    when -l is specified.  Each message is recorded with a `struct msg'.  */
31 
32 struct msg
33 {
34   struct msg *next;
35   char args[1]; /* Format + 4 args, each '\0' terminated, concatenated.  */
36 };
37 
38 /* Head of the chain of queues messages.  */
39 
40 static struct msg *msg_chain;
41 
42 /* Tail of the chain of queues messages.  */
43 
44 static struct msg **msg_chain_end = &msg_chain;
45 
46 /* Use when a system call returns non-zero status.
47    NAME should normally be the file name.  */
48 
49 void
50 perror_with_name (char const *name)
51 {
52   error (0, errno, "%s", name);
53 }
54 
55 /* Use when a system call returns non-zero status and that is fatal.  */
56 
57 void
58 pfatal_with_name (char const *name)
59 {
60   int e = errno;
61   print_message_queue ();
62   error (EXIT_TROUBLE, e, "%s", name);
63   abort ();
64 }
65 
66 /* Print an error message containing MSGID, then exit.  */
67 
68 void
69 fatal (char const *msgid)
70 {
71   print_message_queue ();
72   error (EXIT_TROUBLE, 0, "%s", _(msgid));
73   abort ();
74 }
75 
76 /* Like printf, except if -l in effect then save the message and print later.
77    This is used for things like "Only in ...".  */
78 
79 void
80 message (char const *format_msgid, char const *arg1, char const *arg2)
81 {
82   message5 (format_msgid, arg1, arg2, 0, 0);
83 }
84 
85 void
86 message5 (char const *format_msgid, char const *arg1, char const *arg2,
87 	  char const *arg3, char const *arg4)
88 {
89   if (paginate)
90     {
91       char *p;
92       char const *arg[5];
93       int i;
94       size_t size[5];
95       size_t total_size = offsetof (struct msg, args);
96       struct msg *new;
97 
98       arg[0] = format_msgid;
99       arg[1] = arg1;
100       arg[2] = arg2;
101       arg[3] = arg3 ? arg3 : "";
102       arg[4] = arg4 ? arg4 : "";
103 
104       for (i = 0;  i < 5;  i++)
105 	total_size += size[i] = strlen (arg[i]) + 1;
106 
107       new = xmalloc (total_size);
108 
109       for (i = 0, p = new->args;  i < 5;  p += size[i++])
110 	memcpy (p, arg[i], size[i]);
111 
112       *msg_chain_end = new;
113       new->next = 0;
114       msg_chain_end = &new->next;
115     }
116   else
117     {
118       if (sdiff_merge_assist)
119 	putchar (' ');
120       printf (_(format_msgid), arg1, arg2, arg3, arg4);
121     }
122 }
123 
124 /* Output all the messages that were saved up by calls to `message'.  */
125 
126 void
127 print_message_queue (void)
128 {
129   char const *arg[5];
130   int i;
131   struct msg *m = msg_chain;
132 
133   while (m)
134     {
135       struct msg *next = m->next;
136       arg[0] = m->args;
137       for (i = 0;  i < 4;  i++)
138 	arg[i + 1] = arg[i] + strlen (arg[i]) + 1;
139       printf (_(arg[0]), arg[1], arg[2], arg[3], arg[4]);
140       free (m);
141       m = next;
142     }
143 }
144 
145 /* Call before outputting the results of comparing files NAME0 and NAME1
146    to set up OUTFILE, the stdio stream for the output to go to.
147 
148    Usually, OUTFILE is just stdout.  But when -l was specified
149    we fork off a `pr' and make OUTFILE a pipe to it.
150    `pr' then outputs to our stdout.  */
151 
152 static char const *current_name0;
153 static char const *current_name1;
154 static bool currently_recursive;
155 
156 void
157 setup_output (char const *name0, char const *name1, bool recursive)
158 {
159   current_name0 = name0;
160   current_name1 = name1;
161   currently_recursive = recursive;
162   outfile = 0;
163 }
164 
165 #if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
166 static pid_t pr_pid;
167 #endif
168 
169 void
170 begin_output (void)
171 {
172   char *name;
173 
174   if (outfile != 0)
175     return;
176 
177   /* Construct the header of this piece of diff.  */
178   name = xmalloc (strlen (current_name0) + strlen (current_name1)
179 		  + strlen (switch_string) + 7);
180 
181   /* POSIX 1003.1-2001 specifies this format.  But there are some bugs in
182      the standard: it says that we must print only the last component
183      of the pathnames, and it requires two spaces after "diff" if
184      there are no options.  These requirements are silly and do not
185      match historical practice.  */
186   sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1);
187 
188   if (paginate)
189     {
190       if (fflush (stdout) != 0)
191 	pfatal_with_name (_("write failed"));
192 
193       /* Make OUTFILE a pipe to a subsidiary `pr'.  */
194       {
195 #if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
196 	int pipes[2];
197 
198 	if (pipe (pipes) != 0)
199 	  pfatal_with_name ("pipe");
200 
201 	pr_pid = vfork ();
202 	if (pr_pid < 0)
203 	  pfatal_with_name ("fork");
204 
205 	if (pr_pid == 0)
206 	  {
207 	    close (pipes[1]);
208 	    if (pipes[0] != STDIN_FILENO)
209 	      {
210 		if (dup2 (pipes[0], STDIN_FILENO) < 0)
211 		  pfatal_with_name ("dup2");
212 		close (pipes[0]);
213 	      }
214 
215 	    execl (pr_program, pr_program, "-h", name, (char *) 0);
216 	    _exit (errno == ENOENT ? 127 : 126);
217 	  }
218 	else
219 	  {
220 	    close (pipes[0]);
221 	    outfile = fdopen (pipes[1], "w");
222 	    if (!outfile)
223 	      pfatal_with_name ("fdopen");
224 	  }
225 #else
226 	char *command = xmalloc (sizeof pr_program - 1 + 7
227 				 + shell_quote_length (name) + 1);
228 	char *p;
229 	sprintf (command, "%s -f -h ", pr_program);
230 	p = command + sizeof pr_program - 1 + 7;
231 	p = shell_quote_copy (p, name);
232 	*p = 0;
233 	errno = 0;
234 	outfile = popen (command, "w");
235 	if (!outfile)
236 	  pfatal_with_name (command);
237 	free (command);
238 #endif
239       }
240     }
241   else
242     {
243 
244       /* If -l was not specified, output the diff straight to `stdout'.  */
245 
246       outfile = stdout;
247 
248       /* If handling multiple files (because scanning a directory),
249 	 print which files the following output is about.  */
250       if (currently_recursive)
251 	printf ("%s\n", name);
252     }
253 
254   free (name);
255 
256   /* A special header is needed at the beginning of context output.  */
257   switch (output_style)
258     {
259     case OUTPUT_CONTEXT:
260       print_context_header (files, false);
261       break;
262 
263     case OUTPUT_UNIFIED:
264       print_context_header (files, true);
265       break;
266 
267     default:
268       break;
269     }
270 }
271 
272 /* Call after the end of output of diffs for one file.
273    Close OUTFILE and get rid of the `pr' subfork.  */
274 
275 void
276 finish_output (void)
277 {
278   if (outfile != 0 && outfile != stdout)
279     {
280       int status;
281       int wstatus;
282       int werrno = 0;
283       if (ferror (outfile))
284 	fatal ("write failed");
285 #if ! (HAVE_WORKING_FORK || HAVE_WORKING_VFORK)
286       wstatus = pclose (outfile);
287       if (wstatus == -1)
288 	werrno = errno;
289 #else
290       if (fclose (outfile) != 0)
291 	pfatal_with_name (_("write failed"));
292       if (waitpid (pr_pid, &wstatus, 0) < 0)
293 	pfatal_with_name ("waitpid");
294 #endif
295       status = (! werrno && WIFEXITED (wstatus)
296 		? WEXITSTATUS (wstatus)
297 		: INT_MAX);
298       if (status)
299 	error (EXIT_TROUBLE, werrno,
300 	       _(status == 126
301 		 ? "subsidiary program `%s' could not be invoked"
302 		 : status == 127
303 		 ? "subsidiary program `%s' not found"
304 		 : status == INT_MAX
305 		 ? "subsidiary program `%s' failed"
306 		 : "subsidiary program `%s' failed (exit status %d)"),
307 	       pr_program, status);
308     }
309 
310   outfile = 0;
311 }
312 
313 /* Compare two lines (typically one from each input file)
314    according to the command line options.
315    For efficiency, this is invoked only when the lines do not match exactly
316    but an option like -i might cause us to ignore the difference.
317    Return nonzero if the lines differ.  */
318 
319 bool
320 lines_differ (char const *s1, char const *s2)
321 {
322   register char const *t1 = s1;
323   register char const *t2 = s2;
324   size_t column = 0;
325 
326   while (1)
327     {
328       register unsigned char c1 = *t1++;
329       register unsigned char c2 = *t2++;
330 
331       /* Test for exact char equality first, since it's a common case.  */
332       if (c1 != c2)
333 	{
334 	  switch (ignore_white_space)
335 	    {
336 	    case IGNORE_ALL_SPACE:
337 	      /* For -w, just skip past any white space.  */
338 	      while (isspace (c1) && c1 != '\n') c1 = *t1++;
339 	      while (isspace (c2) && c2 != '\n') c2 = *t2++;
340 	      break;
341 
342 	    case IGNORE_SPACE_CHANGE:
343 	      /* For -b, advance past any sequence of white space in
344 		 line 1 and consider it just one space, or nothing at
345 		 all if it is at the end of the line.  */
346 	      if (isspace (c1))
347 		{
348 		  while (c1 != '\n')
349 		    {
350 		      c1 = *t1++;
351 		      if (! isspace (c1))
352 			{
353 			  --t1;
354 			  c1 = ' ';
355 			  break;
356 			}
357 		    }
358 		}
359 
360 	      /* Likewise for line 2.  */
361 	      if (isspace (c2))
362 		{
363 		  while (c2 != '\n')
364 		    {
365 		      c2 = *t2++;
366 		      if (! isspace (c2))
367 			{
368 			  --t2;
369 			  c2 = ' ';
370 			  break;
371 			}
372 		    }
373 		}
374 
375 	      if (c1 != c2)
376 		{
377 		  /* If we went too far when doing the simple test
378 		     for equality, go back to the first non-white-space
379 		     character in both sides and try again.  */
380 		  if (c2 == ' ' && c1 != '\n'
381 		      && s1 + 1 < t1
382 		      && isspace ((unsigned char) t1[-2]))
383 		    {
384 		      --t1;
385 		      continue;
386 		    }
387 		  if (c1 == ' ' && c2 != '\n'
388 		      && s2 + 1 < t2
389 		      && isspace ((unsigned char) t2[-2]))
390 		    {
391 		      --t2;
392 		      continue;
393 		    }
394 		}
395 
396 	      break;
397 
398 	    case IGNORE_TAB_EXPANSION:
399 	      if ((c1 == ' ' && c2 == '\t')
400 		  || (c1 == '\t' && c2 == ' '))
401 		{
402 		  size_t column2 = column;
403 		  for (;; c1 = *t1++)
404 		    {
405 		      if (c1 == ' ')
406 			column++;
407 		      else if (c1 == '\t')
408 			column += tabsize - column % tabsize;
409 		      else
410 			break;
411 		    }
412 		  for (;; c2 = *t2++)
413 		    {
414 		      if (c2 == ' ')
415 			column2++;
416 		      else if (c2 == '\t')
417 			column2 += tabsize - column2 % tabsize;
418 		      else
419 			break;
420 		    }
421 		  if (column != column2)
422 		    return true;
423 		}
424 	      break;
425 
426 	    case IGNORE_NO_WHITE_SPACE:
427 	      break;
428 	    }
429 
430 	  /* Lowercase all letters if -i is specified.  */
431 
432 	  if (ignore_case)
433 	    {
434 	      c1 = tolower (c1);
435 	      c2 = tolower (c2);
436 	    }
437 
438 	  if (c1 != c2)
439 	    break;
440 	}
441       if (c1 == '\n')
442 	return false;
443 
444       column += c1 == '\t' ? tabsize - column % tabsize : 1;
445     }
446 
447   return true;
448 }
449 
450 /* Find the consecutive changes at the start of the script START.
451    Return the last link before the first gap.  */
452 
453 struct change *
454 find_change (struct change *start)
455 {
456   return start;
457 }
458 
459 struct change *
460 find_reverse_change (struct change *start)
461 {
462   return start;
463 }
464 
465 /* Divide SCRIPT into pieces by calling HUNKFUN and
466    print each piece with PRINTFUN.
467    Both functions take one arg, an edit script.
468 
469    HUNKFUN is called with the tail of the script
470    and returns the last link that belongs together with the start
471    of the tail.
472 
473    PRINTFUN takes a subscript which belongs together (with a null
474    link at the end) and prints it.  */
475 
476 void
477 print_script (struct change *script,
478 	      struct change * (*hunkfun) (struct change *),
479 	      void (*printfun) (struct change *))
480 {
481   struct change *next = script;
482 
483   while (next)
484     {
485       struct change *this, *end;
486 
487       /* Find a set of changes that belong together.  */
488       this = next;
489       end = (*hunkfun) (next);
490 
491       /* Disconnect them from the rest of the changes,
492 	 making them a hunk, and remember the rest for next iteration.  */
493       next = end->link;
494       end->link = 0;
495 #ifdef DEBUG
496       debug_script (this);
497 #endif
498 
499       /* Print this hunk.  */
500       (*printfun) (this);
501 
502       /* Reconnect the script so it will all be freed properly.  */
503       end->link = next;
504     }
505 }
506 
507 /* Print the text of a single line LINE,
508    flagging it with the characters in LINE_FLAG (which say whether
509    the line is inserted, deleted, changed, etc.).  LINE_FLAG must not
510    end in a blank, unless it is a single blank.  */
511 
512 void
513 print_1_line (char const *line_flag, char const *const *line)
514 {
515   char const *base = line[0], *limit = line[1]; /* Help the compiler.  */
516   FILE *out = outfile; /* Help the compiler some more.  */
517   char const *flag_format = 0;
518 
519   /* If -T was specified, use a Tab between the line-flag and the text.
520      Otherwise use a Space (as Unix diff does).
521      Print neither space nor tab if line-flags are empty.
522      But omit trailing blanks if requested.  */
523 
524   if (line_flag && *line_flag)
525     {
526       char const *flag_format_1 = flag_format = initial_tab ? "%s\t" : "%s ";
527       char const *line_flag_1 = line_flag;
528 
529       if (suppress_blank_empty && **line == '\n')
530 	{
531 	  flag_format_1 = "%s";
532 
533 	  /* This hack to omit trailing blanks takes advantage of the
534 	     fact that the only way that LINE_FLAG can end in a blank
535 	     is when LINE_FLAG consists of a single blank.  */
536 	  line_flag_1 += *line_flag_1 == ' ';
537 	}
538 
539       fprintf (out, flag_format_1, line_flag_1);
540     }
541 
542   output_1_line (base, limit, flag_format, line_flag);
543 
544   if ((!line_flag || line_flag[0]) && limit[-1] != '\n')
545     fprintf (out, "\n\\ %s\n", _("No newline at end of file"));
546 }
547 
548 /* Output a line from BASE up to LIMIT.
549    With -t, expand white space characters to spaces, and if FLAG_FORMAT
550    is nonzero, output it with argument LINE_FLAG after every
551    internal carriage return, so that tab stops continue to line up.  */
552 
553 void
554 output_1_line (char const *base, char const *limit, char const *flag_format,
555 	       char const *line_flag)
556 {
557   if (!expand_tabs)
558     fwrite (base, sizeof (char), limit - base, outfile);
559   else
560     {
561       register FILE *out = outfile;
562       register unsigned char c;
563       register char const *t = base;
564       register size_t column = 0;
565       size_t tab_size = tabsize;
566 
567       while (t < limit)
568 	switch ((c = *t++))
569 	  {
570 	  case '\t':
571 	    {
572 	      size_t spaces = tab_size - column % tab_size;
573 	      column += spaces;
574 	      do
575 		putc (' ', out);
576 	      while (--spaces);
577 	    }
578 	    break;
579 
580 	  case '\r':
581 	    putc (c, out);
582 	    if (flag_format && t < limit && *t != '\n')
583 	      fprintf (out, flag_format, line_flag);
584 	    column = 0;
585 	    break;
586 
587 	  case '\b':
588 	    if (column == 0)
589 	      continue;
590 	    column--;
591 	    putc (c, out);
592 	    break;
593 
594 	  default:
595 	    column += isprint (c) != 0;
596 	    putc (c, out);
597 	    break;
598 	  }
599     }
600 }
601 
602 char const change_letter[] = { 0, 'd', 'a', 'c' };
603 
604 /* Translate an internal line number (an index into diff's table of lines)
605    into an actual line number in the input file.
606    The internal line number is I.  FILE points to the data on the file.
607 
608    Internal line numbers count from 0 starting after the prefix.
609    Actual line numbers count from 1 within the entire file.  */
610 
611 lin
612 translate_line_number (struct file_data const *file, lin i)
613 {
614   return i + file->prefix_lines + 1;
615 }
616 
617 /* Translate a line number range.  This is always done for printing,
618    so for convenience translate to long int rather than lin, so that the
619    caller can use printf with "%ld" without casting.  */
620 
621 void
622 translate_range (struct file_data const *file,
623 		 lin a, lin b,
624 		 long int *aptr, long int *bptr)
625 {
626   *aptr = translate_line_number (file, a - 1) + 1;
627   *bptr = translate_line_number (file, b + 1) - 1;
628 }
629 
630 /* Print a pair of line numbers with SEPCHAR, translated for file FILE.
631    If the two numbers are identical, print just one number.
632 
633    Args A and B are internal line numbers.
634    We print the translated (real) line numbers.  */
635 
636 void
637 print_number_range (char sepchar, struct file_data *file, lin a, lin b)
638 {
639   long int trans_a, trans_b;
640   translate_range (file, a, b, &trans_a, &trans_b);
641 
642   /* Note: we can have B < A in the case of a range of no lines.
643      In this case, we should print the line number before the range,
644      which is B.  */
645   if (trans_b > trans_a)
646     fprintf (outfile, "%ld%c%ld", trans_a, sepchar, trans_b);
647   else
648     fprintf (outfile, "%ld", trans_b);
649 }
650 
651 /* Look at a hunk of edit script and report the range of lines in each file
652    that it applies to.  HUNK is the start of the hunk, which is a chain
653    of `struct change'.  The first and last line numbers of file 0 are stored in
654    *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1.
655    Note that these are internal line numbers that count from 0.
656 
657    If no lines from file 0 are deleted, then FIRST0 is LAST0+1.
658 
659    Return UNCHANGED if only ignorable lines are inserted or deleted,
660    OLD if lines of file 0 are deleted,
661    NEW if lines of file 1 are inserted,
662    and CHANGED if both kinds of changes are found. */
663 
664 enum changes
665 analyze_hunk (struct change *hunk,
666 	      lin *first0, lin *last0,
667 	      lin *first1, lin *last1)
668 {
669   struct change *next;
670   lin l0, l1;
671   lin show_from, show_to;
672   lin i;
673   bool trivial = ignore_blank_lines || ignore_regexp.fastmap;
674   size_t trivial_length = ignore_blank_lines - 1;
675     /* If 0, ignore zero-length lines;
676        if SIZE_MAX, do not ignore lines just because of their length.  */
677   bool skip_leading_white_space =
678     (ignore_blank_lines && IGNORE_SPACE_CHANGE <= ignore_white_space);
679 
680   char const * const *linbuf0 = files[0].linbuf;  /* Help the compiler.  */
681   char const * const *linbuf1 = files[1].linbuf;
682 
683   show_from = show_to = 0;
684 
685   *first0 = hunk->line0;
686   *first1 = hunk->line1;
687 
688   next = hunk;
689   do
690     {
691       l0 = next->line0 + next->deleted - 1;
692       l1 = next->line1 + next->inserted - 1;
693       show_from += next->deleted;
694       show_to += next->inserted;
695 
696       for (i = next->line0; i <= l0 && trivial; i++)
697 	{
698 	  char const *line = linbuf0[i];
699 	  char const *newline = linbuf0[i + 1] - 1;
700 	  size_t len = newline - line;
701 	  char const *p = line;
702 	  if (skip_leading_white_space)
703 	    while (isspace ((unsigned char) *p) && *p != '\n')
704 	      p++;
705 	  if (newline - p != trivial_length
706 	      && (! ignore_regexp.fastmap
707 		  || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
708 	    trivial = 0;
709 	}
710 
711       for (i = next->line1; i <= l1 && trivial; i++)
712 	{
713 	  char const *line = linbuf1[i];
714 	  char const *newline = linbuf1[i + 1] - 1;
715 	  size_t len = newline - line;
716 	  char const *p = line;
717 	  if (skip_leading_white_space)
718 	    while (isspace ((unsigned char) *p) && *p != '\n')
719 	      p++;
720 	  if (newline - p != trivial_length
721 	      && (! ignore_regexp.fastmap
722 		  || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
723 	    trivial = 0;
724 	}
725     }
726   while ((next = next->link) != 0);
727 
728   *last0 = l0;
729   *last1 = l1;
730 
731   /* If all inserted or deleted lines are ignorable,
732      tell the caller to ignore this hunk.  */
733 
734   if (trivial)
735     return UNCHANGED;
736 
737   return (show_from ? OLD : UNCHANGED) | (show_to ? NEW : UNCHANGED);
738 }
739 
740 /* Concatenate three strings, returning a newly malloc'd string.  */
741 
742 char *
743 concat (char const *s1, char const *s2, char const *s3)
744 {
745   char *new = xmalloc (strlen (s1) + strlen (s2) + strlen (s3) + 1);
746   sprintf (new, "%s%s%s", s1, s2, s3);
747   return new;
748 }
749 
750 /* Yield a new block of SIZE bytes, initialized to zero.  */
751 
752 void *
753 zalloc (size_t size)
754 {
755   void *p = xmalloc (size);
756   memset (p, 0, size);
757   return p;
758 }
759 
760 /* Yield the newly malloc'd pathname
761    of the file in DIR whose filename is FILE.  */
762 
763 char *
764 dir_file_pathname (char const *dir, char const *file)
765 {
766   char const *base = last_component (dir);
767   size_t baselen = base_len (base);
768   bool omit_slash = baselen == 0 || base[baselen - 1] == '/';
769   return concat (dir, "/" + omit_slash, file);
770 }
771 
772 void
773 debug_script (struct change *sp)
774 {
775   fflush (stdout);
776 
777   for (; sp; sp = sp->link)
778     {
779       long int line0 = sp->line0;
780       long int line1 = sp->line1;
781       long int deleted = sp->deleted;
782       long int inserted = sp->inserted;
783       fprintf (stderr, "%3ld %3ld delete %ld insert %ld\n",
784 	       line0, line1, deleted, inserted);
785     }
786 
787   fflush (stderr);
788 }
789