xref: /openbsd-src/gnu/usr.bin/cvs/diff/util.c (revision 443998a44aec1b782e28ea78ab54ad82e5be4c96)
1 /* Support routines for GNU DIFF.
2    Copyright (C) 1988, 1989, 1992, 1993, 1994, 1997, 1998 Free Software Foundation, Inc.
3 
4 This file is part of GNU DIFF.
5 
6 GNU DIFF is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU DIFF is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GNU DIFF; see the file COPYING.  If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
19 
20 #include "diff.h"
21 
22 #ifdef __STDC__
23 #include <stdarg.h>
24 #else
25 #include <varargs.h>
26 #endif
27 
28 #ifndef strerror
29 extern char *strerror ();
30 #endif
31 
32 /* Queue up one-line messages to be printed at the end,
33    when -l is specified.  Each message is recorded with a `struct msg'.  */
34 
35 struct msg
36 {
37   struct msg *next;
38   char const *format;
39   char const *arg1;
40   char const *arg2;
41   char const *arg3;
42   char const *arg4;
43 };
44 
45 /* Head of the chain of queues messages.  */
46 
47 static struct msg *msg_chain;
48 
49 /* Tail of the chain of queues messages.  */
50 
51 static struct msg **msg_chain_end = &msg_chain;
52 
53 /* Use when a system call returns non-zero status.
54    TEXT should normally be the file name.  */
55 
56 void
57 perror_with_name (text)
58      char const *text;
59 {
60   int e = errno;
61 
62   if (callbacks && callbacks->error)
63     (*callbacks->error) ("%s: %s", text, strerror (e));
64   else
65     {
66       fprintf (stderr, "%s: ", diff_program_name);
67       errno = e;
68       perror (text);
69     }
70 }
71 
72 /* Use when a system call returns non-zero status and that is fatal.  */
73 
74 void
75 pfatal_with_name (text)
76      char const *text;
77 {
78   int e = errno;
79   print_message_queue ();
80   if (callbacks && callbacks->error)
81     (*callbacks->error) ("%s: %s", text, strerror (e));
82   else
83     {
84       fprintf (stderr, "%s: ", diff_program_name);
85       errno = e;
86       perror (text);
87     }
88   DIFF_ABORT (2);
89 }
90 
91 /* Print an error message from the format-string FORMAT
92    with args ARG1 and ARG2.  */
93 
94 void
95 diff_error (format, arg, arg1)
96      char const *format, *arg, *arg1;
97 {
98   if (callbacks && callbacks->error)
99     (*callbacks->error) (format, arg, arg1);
100   else
101     {
102       fprintf (stderr, "%s: ", diff_program_name);
103       fprintf (stderr, format, arg, arg1);
104       fprintf (stderr, "\n");
105     }
106 }
107 
108 /* Print an error message containing the string TEXT, then exit.  */
109 
110 void
111 fatal (m)
112      char const *m;
113 {
114   print_message_queue ();
115   diff_error ("%s", m, 0);
116   DIFF_ABORT (2);
117 }
118 
119 /* Like printf, except if -l in effect then save the message and print later.
120    This is used for things like "binary files differ" and "Only in ...".  */
121 
122 void
123 message (format, arg1, arg2)
124      char const *format, *arg1, *arg2;
125 {
126   message5 (format, arg1, arg2, 0, 0);
127 }
128 
129 void
130 message5 (format, arg1, arg2, arg3, arg4)
131      char const *format, *arg1, *arg2, *arg3, *arg4;
132 {
133   if (paginate_flag)
134     {
135       struct msg *new = (struct msg *) xmalloc (sizeof (struct msg));
136       new->format = format;
137       new->arg1 = concat (arg1, "", "");
138       new->arg2 = concat (arg2, "", "");
139       new->arg3 = arg3 ? concat (arg3, "", "") : 0;
140       new->arg4 = arg4 ? concat (arg4, "", "") : 0;
141       new->next = 0;
142       *msg_chain_end = new;
143       msg_chain_end = &new->next;
144     }
145   else
146     {
147       if (sdiff_help_sdiff)
148 	write_output (" ", 1);
149       printf_output (format, arg1, arg2, arg3, arg4);
150     }
151 }
152 
153 /* Output all the messages that were saved up by calls to `message'.  */
154 
155 void
156 print_message_queue ()
157 {
158   struct msg *m;
159 
160   for (m = msg_chain; m; m = m->next)
161     printf_output (m->format, m->arg1, m->arg2, m->arg3, m->arg4);
162 }
163 
164 /* Call before outputting the results of comparing files NAME0 and NAME1
165    to set up OUTFILE, the stdio stream for the output to go to.
166 
167    Usually, OUTFILE is just stdout.  But when -l was specified
168    we fork off a `pr' and make OUTFILE a pipe to it.
169    `pr' then outputs to our stdout.  */
170 
171 static char const *current_name0;
172 static char const *current_name1;
173 static int current_depth;
174 
175 static int output_in_progress = 0;
176 
177 void
178 setup_output (name0, name1, depth)
179      char const *name0, *name1;
180      int depth;
181 {
182   current_name0 = name0;
183   current_name1 = name1;
184   current_depth = depth;
185 }
186 
187 #if HAVE_FORK && defined (PR_PROGRAM)
188 static pid_t pr_pid;
189 #endif
190 
191 void
192 begin_output ()
193 {
194   char *name;
195 
196   if (output_in_progress)
197     return;
198   output_in_progress = 1;
199 
200   /* Construct the header of this piece of diff.  */
201   name = xmalloc (strlen (current_name0) + strlen (current_name1)
202 		  + strlen (switch_string) + 7);
203   /* Posix.2 section 4.17.6.1.1 specifies this format.  But there is a
204      bug in the first printing (IEEE Std 1003.2-1992 p 251 l 3304):
205      it says that we must print only the last component of the pathnames.
206      This requirement is silly and does not match historical practice.  */
207   sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1);
208 
209   if (paginate_flag && callbacks && callbacks->write_output)
210     fatal ("can't paginate when using library callbacks");
211 
212   if (paginate_flag)
213     {
214       /* Make OUTFILE a pipe to a subsidiary `pr'.  */
215 
216 #ifdef PR_PROGRAM
217 
218 # if HAVE_FORK
219       int pipes[2];
220 
221       if (pipe (pipes) != 0)
222 	pfatal_with_name ("pipe");
223 
224       fflush (stdout);
225 
226       pr_pid = vfork ();
227       if (pr_pid < 0)
228 	pfatal_with_name ("vfork");
229 
230       if (pr_pid == 0)
231 	{
232 	  close (pipes[1]);
233 	  if (pipes[0] != STDIN_FILENO)
234 	    {
235 	      if (dup2 (pipes[0], STDIN_FILENO) < 0)
236 		pfatal_with_name ("dup2");
237 	      close (pipes[0]);
238 	    }
239 
240 	  execl (PR_PROGRAM, PR_PROGRAM, "-f", "-h", name, 0);
241 	  pfatal_with_name (PR_PROGRAM);
242 	}
243       else
244 	{
245 	  close (pipes[0]);
246 	  outfile = fdopen (pipes[1], "w");
247 	  if (!outfile)
248 	    pfatal_with_name ("fdopen");
249 	}
250 # else /* ! HAVE_FORK */
251       char *command = xmalloc (4 * strlen (name) + strlen (PR_PROGRAM) + 10);
252       char *p;
253       char const *a = name;
254       sprintf (command, "%s -f -h ", PR_PROGRAM);
255       p = command + strlen (command);
256       SYSTEM_QUOTE_ARG (p, a);
257       *p = 0;
258       outfile = popen (command, "w");
259       if (!outfile)
260 	pfatal_with_name (command);
261       free (command);
262 # endif /* ! HAVE_FORK */
263 #else
264       fatal ("This port does not support the --paginate option to diff.");
265 #endif
266     }
267   else
268     {
269 
270       /* If -l was not specified, output the diff straight to `stdout'.  */
271 
272       /* If handling multiple files (because scanning a directory),
273 	 print which files the following output is about.  */
274       if (current_depth > 0)
275 	printf_output ("%s\n", name);
276     }
277 
278   free (name);
279 
280   /* A special header is needed at the beginning of context output.  */
281   switch (output_style)
282     {
283     case OUTPUT_CONTEXT:
284       print_context_header (files, 0);
285       break;
286 
287     case OUTPUT_UNIFIED:
288       print_context_header (files, 1);
289       break;
290 
291     default:
292       break;
293     }
294 }
295 
296 /* Call after the end of output of diffs for one file.
297    If -l was given, close OUTFILE and get rid of the `pr' subfork.  */
298 
299 void
300 finish_output ()
301 {
302   if (paginate_flag && outfile != 0 && outfile != stdout)
303     {
304 #ifdef PR_PROGRAM
305       int wstatus;
306       if (ferror (outfile))
307 	fatal ("write error");
308 # if ! HAVE_FORK
309       wstatus = pclose (outfile);
310 # else /* HAVE_FORK */
311       if (fclose (outfile) != 0)
312 	pfatal_with_name ("write error");
313       if (waitpid (pr_pid, &wstatus, 0) < 0)
314 	pfatal_with_name ("waitpid");
315 # endif /* HAVE_FORK */
316       if (wstatus != 0)
317 	fatal ("subsidiary pr failed");
318 #else
319       fatal ("internal error in finish_output");
320 #endif
321     }
322 
323   output_in_progress = 0;
324 }
325 
326 /* Write something to the output file.  */
327 
328 void
329 write_output (text, len)
330      char const *text;
331      size_t len;
332 {
333   if (callbacks && callbacks->write_output)
334     (*callbacks->write_output) (text, len);
335   else if (len == 1)
336     putc (*text, outfile);
337   else
338     fwrite (text, sizeof (char), len, outfile);
339 }
340 
341 /* Printf something to the output file.  */
342 
343 #ifdef __STDC__
344 #define VA_START(args, lastarg) va_start(args, lastarg)
345 #else /* ! __STDC__ */
346 #define VA_START(args, lastarg) va_start(args)
347 #endif /* __STDC__ */
348 
349 void
350 #if defined (__STDC__)
351 printf_output (const char *format, ...)
352 #else
353 printf_output (format, va_alist)
354      char const *format;
355      va_dcl
356 #endif
357 {
358   va_list args;
359 
360   VA_START (args, format);
361   if (callbacks && callbacks->write_output)
362     {
363       char *p;
364 
365       p = NULL;
366       vasprintf (&p, format, args);
367       if (p == NULL)
368 	fatal ("out of memory");
369       (*callbacks->write_output) (p, strlen (p));
370       free (p);
371     }
372   else
373     vfprintf (outfile, format, args);
374   va_end (args);
375 }
376 
377 /* Flush the output file.  */
378 
379 void
380 flush_output ()
381 {
382   if (callbacks && callbacks->flush_output)
383     (*callbacks->flush_output) ();
384   else
385     fflush (outfile);
386 }
387 
388 /* Compare two lines (typically one from each input file)
389    according to the command line options.
390    For efficiency, this is invoked only when the lines do not match exactly
391    but an option like -i might cause us to ignore the difference.
392    Return nonzero if the lines differ.  */
393 
394 int
395 line_cmp (s1, s2)
396      char const *s1, *s2;
397 {
398   register unsigned char const *t1 = (unsigned char const *) s1;
399   register unsigned char const *t2 = (unsigned char const *) s2;
400 
401   while (1)
402     {
403       register unsigned char c1 = *t1++;
404       register unsigned char c2 = *t2++;
405 
406       /* Test for exact char equality first, since it's a common case.  */
407       if (c1 != c2)
408 	{
409 	  /* Ignore horizontal white space if -b or -w is specified.  */
410 
411 	  if (ignore_all_space_flag)
412 	    {
413 	      /* For -w, just skip past any white space.  */
414 	      while (ISSPACE (c1) && c1 != '\n') c1 = *t1++;
415 	      while (ISSPACE (c2) && c2 != '\n') c2 = *t2++;
416 	    }
417 	  else if (ignore_space_change_flag)
418 	    {
419 	      /* For -b, advance past any sequence of white space in line 1
420 		 and consider it just one Space, or nothing at all
421 		 if it is at the end of the line.  */
422 	      if (ISSPACE (c1))
423 		{
424 		  while (c1 != '\n')
425 		    {
426 		      c1 = *t1++;
427 		      if (! ISSPACE (c1))
428 			{
429 			  --t1;
430 			  c1 = ' ';
431 			  break;
432 			}
433 		    }
434 		}
435 
436 	      /* Likewise for line 2.  */
437 	      if (ISSPACE (c2))
438 		{
439 		  while (c2 != '\n')
440 		    {
441 		      c2 = *t2++;
442 		      if (! ISSPACE (c2))
443 			{
444 			  --t2;
445 			  c2 = ' ';
446 			  break;
447 			}
448 		    }
449 		}
450 
451 	      if (c1 != c2)
452 		{
453 		  /* If we went too far when doing the simple test
454 		     for equality, go back to the first non-white-space
455 		     character in both sides and try again.  */
456 		  if (c2 == ' ' && c1 != '\n'
457 		      && (unsigned char const *) s1 + 1 < t1
458 		      && ISSPACE(t1[-2]))
459 		    {
460 		      --t1;
461 		      continue;
462 		    }
463 		  if (c1 == ' ' && c2 != '\n'
464 		      && (unsigned char const *) s2 + 1 < t2
465 		      && ISSPACE(t2[-2]))
466 		    {
467 		      --t2;
468 		      continue;
469 		    }
470 		}
471 	    }
472 
473 	  /* Lowercase all letters if -i is specified.  */
474 
475 	  if (ignore_case_flag)
476 	    {
477 	      if (ISUPPER (c1))
478 		c1 = tolower (c1);
479 	      if (ISUPPER (c2))
480 		c2 = tolower (c2);
481 	    }
482 
483 	  if (c1 != c2)
484 	    break;
485 	}
486       if (c1 == '\n')
487 	return 0;
488     }
489 
490   return (1);
491 }
492 
493 /* Find the consecutive changes at the start of the script START.
494    Return the last link before the first gap.  */
495 
496 struct change *
497 find_change (start)
498      struct change *start;
499 {
500   return start;
501 }
502 
503 struct change *
504 find_reverse_change (start)
505      struct change *start;
506 {
507   return start;
508 }
509 
510 /* Divide SCRIPT into pieces by calling HUNKFUN and
511    print each piece with PRINTFUN.
512    Both functions take one arg, an edit script.
513 
514    HUNKFUN is called with the tail of the script
515    and returns the last link that belongs together with the start
516    of the tail.
517 
518    PRINTFUN takes a subscript which belongs together (with a null
519    link at the end) and prints it.  */
520 
521 void
522 print_script (script, hunkfun, printfun)
523      struct change *script;
524      struct change * (*hunkfun) PARAMS((struct change *));
525      void (*printfun) PARAMS((struct change *));
526 {
527   struct change *next = script;
528 
529   while (next)
530     {
531       struct change *this, *end;
532 
533       /* Find a set of changes that belong together.  */
534       this = next;
535       end = (*hunkfun) (next);
536 
537       /* Disconnect them from the rest of the changes,
538 	 making them a hunk, and remember the rest for next iteration.  */
539       next = end->link;
540       end->link = 0;
541 #ifdef DEBUG
542       debug_script (this);
543 #endif
544 
545       /* Print this hunk.  */
546       (*printfun) (this);
547 
548       /* Reconnect the script so it will all be freed properly.  */
549       end->link = next;
550     }
551 }
552 
553 /* Print the text of a single line LINE,
554    flagging it with the characters in LINE_FLAG (which say whether
555    the line is inserted, deleted, changed, etc.).  */
556 
557 void
558 print_1_line (line_flag, line)
559      char const *line_flag;
560      char const * const *line;
561 {
562   char const *text = line[0], *limit = line[1]; /* Help the compiler.  */
563   char const *flag_format = 0;
564 
565   /* If -T was specified, use a Tab between the line-flag and the text.
566      Otherwise use a Space (as Unix diff does).
567      Print neither space nor tab if line-flags are empty.  */
568 
569   if (line_flag && *line_flag)
570     {
571       flag_format = tab_align_flag ? "%s\t" : "%s ";
572       printf_output (flag_format, line_flag);
573     }
574 
575   output_1_line (text, limit, flag_format, line_flag);
576 
577   if ((!line_flag || line_flag[0]) && limit[-1] != '\n')
578     printf_output ("\n\\ No newline at end of file\n");
579 }
580 
581 /* Output a line from TEXT up to LIMIT.  Without -t, output verbatim.
582    With -t, expand white space characters to spaces, and if FLAG_FORMAT
583    is nonzero, output it with argument LINE_FLAG after every
584    internal carriage return, so that tab stops continue to line up.  */
585 
586 void
587 output_1_line (text, limit, flag_format, line_flag)
588      char const *text, *limit, *flag_format, *line_flag;
589 {
590   if (!tab_expand_flag)
591     write_output (text, limit - text);
592   else
593     {
594       register unsigned char c;
595       register char const *t = text;
596       register unsigned column = 0;
597       /* CC is used to avoid taking the address of the register
598          variable C.  */
599       char cc;
600 
601       while (t < limit)
602 	switch ((c = *t++))
603 	  {
604 	  case '\t':
605 	    {
606 	      unsigned spaces = TAB_WIDTH - column % TAB_WIDTH;
607 	      column += spaces;
608 	      do
609 		write_output (" ", 1);
610 	      while (--spaces);
611 	    }
612 	    break;
613 
614 	  case '\r':
615 	    write_output ("\r", 1);
616 	    if (flag_format && t < limit && *t != '\n')
617 	      printf_output (flag_format, line_flag);
618 	    column = 0;
619 	    break;
620 
621 	  case '\b':
622 	    if (column == 0)
623 	      continue;
624 	    column--;
625 	    write_output ("\b", 1);
626 	    break;
627 
628 	  default:
629 	    if (ISPRINT (c))
630 	      column++;
631 	    cc = c;
632 	    write_output (&cc, 1);
633 	    break;
634 	  }
635     }
636 }
637 
638 int
639 change_letter (inserts, deletes)
640      int inserts, deletes;
641 {
642   if (!inserts)
643     return 'd';
644   else if (!deletes)
645     return 'a';
646   else
647     return 'c';
648 }
649 
650 /* Translate an internal line number (an index into diff's table of lines)
651    into an actual line number in the input file.
652    The internal line number is LNUM.  FILE points to the data on the file.
653 
654    Internal line numbers count from 0 starting after the prefix.
655    Actual line numbers count from 1 within the entire file.  */
656 
657 int
658 translate_line_number (file, lnum)
659      struct file_data const *file;
660      int lnum;
661 {
662   return lnum + file->prefix_lines + 1;
663 }
664 
665 void
666 translate_range (file, a, b, aptr, bptr)
667      struct file_data const *file;
668      int a, b;
669      int *aptr, *bptr;
670 {
671   *aptr = translate_line_number (file, a - 1) + 1;
672   *bptr = translate_line_number (file, b + 1) - 1;
673 }
674 
675 /* Print a pair of line numbers with SEPCHAR, translated for file FILE.
676    If the two numbers are identical, print just one number.
677 
678    Args A and B are internal line numbers.
679    We print the translated (real) line numbers.  */
680 
681 void
682 print_number_range (sepchar, file, a, b)
683      int sepchar;
684      struct file_data *file;
685      int a, b;
686 {
687   int trans_a, trans_b;
688   translate_range (file, a, b, &trans_a, &trans_b);
689 
690   /* Note: we can have B < A in the case of a range of no lines.
691      In this case, we should print the line number before the range,
692      which is B.  */
693   if (trans_b > trans_a)
694     printf_output ("%d%c%d", trans_a, sepchar, trans_b);
695   else
696     printf_output ("%d", trans_b);
697 }
698 
699 /* Look at a hunk of edit script and report the range of lines in each file
700    that it applies to.  HUNK is the start of the hunk, which is a chain
701    of `struct change'.  The first and last line numbers of file 0 are stored in
702    *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1.
703    Note that these are internal line numbers that count from 0.
704 
705    If no lines from file 0 are deleted, then FIRST0 is LAST0+1.
706 
707    Also set *DELETES nonzero if any lines of file 0 are deleted
708    and set *INSERTS nonzero if any lines of file 1 are inserted.
709    If only ignorable lines are inserted or deleted, both are
710    set to 0.  */
711 
712 void
713 analyze_hunk (hunk, first0, last0, first1, last1, deletes, inserts)
714      struct change *hunk;
715      int *first0, *last0, *first1, *last1;
716      int *deletes, *inserts;
717 {
718   int l0, l1, show_from, show_to;
719   int i;
720   int trivial = ignore_blank_lines_flag || ignore_regexp_list;
721   struct change *next;
722 
723   show_from = show_to = 0;
724 
725   *first0 = hunk->line0;
726   *first1 = hunk->line1;
727 
728   next = hunk;
729   do
730     {
731       l0 = next->line0 + next->deleted - 1;
732       l1 = next->line1 + next->inserted - 1;
733       show_from += next->deleted;
734       show_to += next->inserted;
735 
736       for (i = next->line0; i <= l0 && trivial; i++)
737 	if (!ignore_blank_lines_flag || files[0].linbuf[i][0] != '\n')
738 	  {
739 	    struct regexp_list *r;
740 	    char const *line = files[0].linbuf[i];
741 	    int len = files[0].linbuf[i + 1] - line;
742 
743 	    for (r = ignore_regexp_list; r; r = r->next)
744 	      if (0 <= re_search (&r->buf, line, len, 0, len, 0))
745 		break;	/* Found a match.  Ignore this line.  */
746 	    /* If we got all the way through the regexp list without
747 	       finding a match, then it's nontrivial.  */
748 	    if (!r)
749 	      trivial = 0;
750 	  }
751 
752       for (i = next->line1; i <= l1 && trivial; i++)
753 	if (!ignore_blank_lines_flag || files[1].linbuf[i][0] != '\n')
754 	  {
755 	    struct regexp_list *r;
756 	    char const *line = files[1].linbuf[i];
757 	    int len = files[1].linbuf[i + 1] - line;
758 
759 	    for (r = ignore_regexp_list; r; r = r->next)
760 	      if (0 <= re_search (&r->buf, line, len, 0, len, 0))
761 		break;	/* Found a match.  Ignore this line.  */
762 	    /* If we got all the way through the regexp list without
763 	       finding a match, then it's nontrivial.  */
764 	    if (!r)
765 	      trivial = 0;
766 	  }
767     }
768   while ((next = next->link) != 0);
769 
770   *last0 = l0;
771   *last1 = l1;
772 
773   /* If all inserted or deleted lines are ignorable,
774      tell the caller to ignore this hunk.  */
775 
776   if (trivial)
777     show_from = show_to = 0;
778 
779   *deletes = show_from;
780   *inserts = show_to;
781 }
782 
783 /* Concatenate three strings, returning a newly malloc'd string.  */
784 
785 char *
786 concat (s1, s2, s3)
787      char const *s1, *s2, *s3;
788 {
789   size_t len = strlen (s1) + strlen (s2) + strlen (s3);
790   char *new = xmalloc (len + 1);
791   sprintf (new, "%s%s%s", s1, s2, s3);
792   return new;
793 }
794 
795 /* Yield the newly malloc'd pathname
796    of the file in DIR whose filename is FILE.  */
797 
798 char *
799 dir_file_pathname (dir, file)
800      char const *dir, *file;
801 {
802   char const *p = filename_lastdirchar (dir);
803   return concat (dir, "/" + (p && !p[1]), file);
804 }
805 
806 void
807 debug_script (sp)
808      struct change *sp;
809 {
810   fflush (stdout);
811   for (; sp; sp = sp->link)
812     fprintf (stderr, "%3d %3d delete %d insert %d\n",
813 	     sp->line0, sp->line1, sp->deleted, sp->inserted);
814   fflush (stderr);
815 }
816