xref: /plan9/sys/src/ape/cmd/diff/util.c (revision 0b459c2cb92b7c9d88818e9a2f72e678e5bc4553)
1 /* Support routines for GNU DIFF.
2    Copyright (C) 1988, 1989, 1992, 1993, 1994 Free Software Foundation, Inc.
3 
4 This file is part of GNU DIFF.
5 
6 GNU DIFF is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU DIFF is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GNU DIFF; see the file COPYING.  If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
19 
20 /* $FreeBSD: src/contrib/diff/util.c,v 1.2.6.2 2000/09/20 02:24:32 jkh Exp $ */
21 
22 #include "diff.h"
23 
24 #ifndef PR_PROGRAM
25 #define PR_PROGRAM "/bin/pr"
26 #endif
27 
28 /* Queue up one-line messages to be printed at the end,
29    when -l is specified.  Each message is recorded with a `struct msg'.  */
30 
31 struct msg
32 {
33   struct msg *next;
34   char const *format;
35   char const *arg1;
36   char const *arg2;
37   char const *arg3;
38   char const *arg4;
39 };
40 
41 /* Head of the chain of queues messages.  */
42 
43 static struct msg *msg_chain;
44 
45 /* Tail of the chain of queues messages.  */
46 
47 static struct msg **msg_chain_end = &msg_chain;
48 
49 /* Use when a system call returns non-zero status.
50    TEXT should normally be the file name.  */
51 
52 void
perror_with_name(text)53 perror_with_name (text)
54      char const *text;
55 {
56   int e = errno;
57   fprintf (stderr, "%s: ", program_name);
58   errno = e;
59   perror (text);
60 }
61 
62 /* Use when a system call returns non-zero status and that is fatal.  */
63 
64 void
pfatal_with_name(text)65 pfatal_with_name (text)
66      char const *text;
67 {
68   int e = errno;
69   print_message_queue ();
70   fprintf (stderr, "%s: ", program_name);
71   errno = e;
72   perror (text);
73   exit (2);
74 }
75 
76 /* Print an error message from the format-string FORMAT
77    with args ARG1 and ARG2.  */
78 
79 void
error(format,arg,arg1)80 error (format, arg, arg1)
81      char const *format, *arg, *arg1;
82 {
83   fprintf (stderr, "%s: ", program_name);
84   fprintf (stderr, format, arg, arg1);
85   fprintf (stderr, "\n");
86 }
87 
88 /* Print an error message containing the string TEXT, then exit.  */
89 
90 void
fatal(m)91 fatal (m)
92      char const *m;
93 {
94   print_message_queue ();
95   error ("%s", m, 0);
96   exit (2);
97 }
98 
99 /* Like printf, except if -l in effect then save the message and print later.
100    This is used for things like "binary files differ" and "Only in ...".  */
101 
102 void
message(format,arg1,arg2)103 message (format, arg1, arg2)
104      char const *format, *arg1, *arg2;
105 {
106   message5 (format, arg1, arg2, 0, 0);
107 }
108 
109 void
message5(format,arg1,arg2,arg3,arg4)110 message5 (format, arg1, arg2, arg3, arg4)
111      char const *format, *arg1, *arg2, *arg3, *arg4;
112 {
113   if (paginate_flag)
114     {
115       struct msg *new = (struct msg *) xmalloc (sizeof (struct msg));
116       new->format = format;
117       new->arg1 = concat (arg1, "", "");
118       new->arg2 = concat (arg2, "", "");
119       new->arg3 = arg3 ? concat (arg3, "", "") : 0;
120       new->arg4 = arg4 ? concat (arg4, "", "") : 0;
121       new->next = 0;
122       *msg_chain_end = new;
123       msg_chain_end = &new->next;
124     }
125   else
126     {
127       if (sdiff_help_sdiff)
128 	putchar (' ');
129       printf (format, arg1, arg2, arg3, arg4);
130     }
131 }
132 
133 /* Output all the messages that were saved up by calls to `message'.  */
134 
135 void
print_message_queue()136 print_message_queue ()
137 {
138   struct msg *m;
139 
140   for (m = msg_chain; m; m = m->next)
141     printf (m->format, m->arg1, m->arg2, m->arg3, m->arg4);
142 }
143 
144 /* Call before outputting the results of comparing files NAME0 and NAME1
145    to set up OUTFILE, the stdio stream for the output to go to.
146 
147    Usually, OUTFILE is just stdout.  But when -l was specified
148    we fork off a `pr' and make OUTFILE a pipe to it.
149    `pr' then outputs to our stdout.  */
150 
151 static char const *current_name0;
152 static char const *current_name1;
153 static int current_depth;
154 
155 void
setup_output(name0,name1,depth)156 setup_output (name0, name1, depth)
157      char const *name0, *name1;
158      int depth;
159 {
160   current_name0 = name0;
161   current_name1 = name1;
162   current_depth = depth;
163   outfile = 0;
164 }
165 
166 #if HAVE_FORK
167 static pid_t pr_pid;
168 #endif
169 
170 void
begin_output()171 begin_output ()
172 {
173   char *name;
174 
175   if (outfile != 0)
176     return;
177 
178   /* Construct the header of this piece of diff.  */
179   name = xmalloc (strlen (current_name0) + strlen (current_name1)
180 		  + strlen (switch_string) + 7);
181   /* Posix.2 section 4.17.6.1.1 specifies this format.  But there is a
182      bug in the first printing (IEEE Std 1003.2-1992 p 251 l 3304):
183      it says that we must print only the last component of the pathnames.
184      This requirement is silly and does not match historical practice.  */
185   sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1);
186 
187   if (paginate_flag)
188     {
189       /* Make OUTFILE a pipe to a subsidiary `pr'.  */
190 
191 #if HAVE_FORK
192       int pipes[2];
193 
194       if (pipe (pipes) != 0)
195 	pfatal_with_name ("pipe");
196 
197       fflush (stdout);
198 
199       pr_pid = fork ();
200       if (pr_pid < 0)
201 	pfatal_with_name ("vfork");
202 
203       if (pr_pid == 0)
204 	{
205 	  close (pipes[1]);
206 	  if (pipes[0] != STDIN_FILENO)
207 	    {
208 	      if (dup2 (pipes[0], STDIN_FILENO) < 0)
209 		pfatal_with_name ("dup2");
210 	      close (pipes[0]);
211 	    }
212 #ifdef __FreeBSD__
213 	  execl (PR_PROGRAM, PR_PROGRAM, "-F", "-h", name, 0);
214 #else
215 	  execl (PR_PROGRAM, PR_PROGRAM, "-f", "-h", name, 0);
216 #endif
217 	  pfatal_with_name (PR_PROGRAM);
218 	}
219       else
220 	{
221 	  close (pipes[0]);
222 	  outfile = fdopen (pipes[1], "w");
223 	  if (!outfile)
224 	    pfatal_with_name ("fdopen");
225 	}
226 #else /* ! HAVE_FORK */
227       char *command = xmalloc (4 * strlen (name) + strlen (PR_PROGRAM) + 10);
228       char *p;
229       char const *a = name;
230       sprintf (command, "%s -f -h ", PR_PROGRAM);
231       p = command + strlen (command);
232       SYSTEM_QUOTE_ARG (p, a);
233       *p = 0;
234       outfile = popen (command, "w");
235       if (!outfile)
236 	pfatal_with_name (command);
237       free (command);
238 #endif /* ! HAVE_FORK */
239     }
240   else
241     {
242 
243       /* If -l was not specified, output the diff straight to `stdout'.  */
244 
245       outfile = stdout;
246 
247       /* If handling multiple files (because scanning a directory),
248 	 print which files the following output is about.  */
249       if (current_depth > 0)
250 	printf ("%s\n", name);
251     }
252 
253   free (name);
254 
255   /* A special header is needed at the beginning of context output.  */
256   switch (output_style)
257     {
258     case OUTPUT_CONTEXT:
259       print_context_header (files, 0);
260       break;
261 
262     case OUTPUT_UNIFIED:
263       print_context_header (files, 1);
264       break;
265 
266     default:
267       break;
268     }
269 }
270 
271 /* Call after the end of output of diffs for one file.
272    Close OUTFILE and get rid of the `pr' subfork.  */
273 
274 void
finish_output()275 finish_output ()
276 {
277   if (outfile != 0 && outfile != stdout)
278     {
279       int wstatus;
280       if (ferror (outfile))
281 	fatal ("write error");
282 #if ! HAVE_FORK
283       wstatus = pclose (outfile);
284 #else /* HAVE_FORK */
285       if (fclose (outfile) != 0)
286 	pfatal_with_name ("write error");
287       if (waitpid (pr_pid, &wstatus, 0) < 0)
288 	pfatal_with_name ("waitpid");
289 #endif /* HAVE_FORK */
290       if (wstatus != 0)
291 	fatal ("subsidiary pr failed");
292     }
293 
294   outfile = 0;
295 }
296 
297 /* Compare two lines (typically one from each input file)
298    according to the command line options.
299    For efficiency, this is invoked only when the lines do not match exactly
300    but an option like -i might cause us to ignore the difference.
301    Return nonzero if the lines differ.  */
302 
303 int
line_cmp(s1,s2)304 line_cmp (s1, s2)
305      char const *s1, *s2;
306 {
307   register unsigned char const *t1 = (unsigned char const *) s1;
308   register unsigned char const *t2 = (unsigned char const *) s2;
309 
310   while (1)
311     {
312       register unsigned char c1 = *t1++;
313       register unsigned char c2 = *t2++;
314 
315       /* Test for exact char equality first, since it's a common case.  */
316       if (c1 != c2)
317 	{
318 	  /* Ignore horizontal white space if -b or -w is specified.  */
319 
320 	  if (ignore_all_space_flag)
321 	    {
322 	      /* For -w, just skip past any white space.  */
323 	      while (ISSPACE (c1) && c1 != '\n') c1 = *t1++;
324 	      while (ISSPACE (c2) && c2 != '\n') c2 = *t2++;
325 	    }
326 	  else if (ignore_space_change_flag)
327 	    {
328 	      /* For -b, advance past any sequence of white space in line 1
329 		 and consider it just one Space, or nothing at all
330 		 if it is at the end of the line.  */
331 	      if (ISSPACE (c1))
332 		{
333 		  while (c1 != '\n')
334 		    {
335 		      c1 = *t1++;
336 		      if (! ISSPACE (c1))
337 			{
338 			  --t1;
339 			  c1 = ' ';
340 			  break;
341 			}
342 		    }
343 		}
344 
345 	      /* Likewise for line 2.  */
346 	      if (ISSPACE (c2))
347 		{
348 		  while (c2 != '\n')
349 		    {
350 		      c2 = *t2++;
351 		      if (! ISSPACE (c2))
352 			{
353 			  --t2;
354 			  c2 = ' ';
355 			  break;
356 			}
357 		    }
358 		}
359 
360 	      if (c1 != c2)
361 		{
362 		  /* If we went too far when doing the simple test
363 		     for equality, go back to the first non-white-space
364 		     character in both sides and try again.  */
365 		  if (c2 == ' ' && c1 != '\n'
366 		      && (unsigned char const *) s1 + 1 < t1
367 		      && ISSPACE(t1[-2]))
368 		    {
369 		      --t1;
370 		      continue;
371 		    }
372 		  if (c1 == ' ' && c2 != '\n'
373 		      && (unsigned char const *) s2 + 1 < t2
374 		      && ISSPACE(t2[-2]))
375 		    {
376 		      --t2;
377 		      continue;
378 		    }
379 		}
380 	    }
381 
382 	  /* Lowercase all letters if -i is specified.  */
383 
384 	  if (ignore_case_flag)
385 	    {
386 	      if (ISUPPER (c1))
387 		c1 = tolower (c1);
388 	      if (ISUPPER (c2))
389 		c2 = tolower (c2);
390 	    }
391 
392 	  if (c1 != c2)
393 	    break;
394 	}
395       if (c1 == '\n')
396 	return 0;
397     }
398 
399   return (1);
400 }
401 
402 /* Find the consecutive changes at the start of the script START.
403    Return the last link before the first gap.  */
404 
405 struct change *
find_change(start)406 find_change (start)
407      struct change *start;
408 {
409   return start;
410 }
411 
412 struct change *
find_reverse_change(start)413 find_reverse_change (start)
414      struct change *start;
415 {
416   return start;
417 }
418 
419 /* Divide SCRIPT into pieces by calling HUNKFUN and
420    print each piece with PRINTFUN.
421    Both functions take one arg, an edit script.
422 
423    HUNKFUN is called with the tail of the script
424    and returns the last link that belongs together with the start
425    of the tail.
426 
427    PRINTFUN takes a subscript which belongs together (with a null
428    link at the end) and prints it.  */
429 
430 void
print_script(script,hunkfun,printfun)431 print_script (script, hunkfun, printfun)
432      struct change *script;
433      struct change * (*hunkfun) PARAMS((struct change *));
434      void (*printfun) PARAMS((struct change *));
435 {
436   struct change *next = script;
437 
438   while (next)
439     {
440       struct change *this, *end;
441 
442       /* Find a set of changes that belong together.  */
443       this = next;
444       end = (*hunkfun) (next);
445 
446       /* Disconnect them from the rest of the changes,
447 	 making them a hunk, and remember the rest for next iteration.  */
448       next = end->link;
449       end->link = 0;
450 #ifdef DEBUG
451       debug_script (this);
452 #endif
453 
454       /* Print this hunk.  */
455       (*printfun) (this);
456 
457       /* Reconnect the script so it will all be freed properly.  */
458       end->link = next;
459     }
460 }
461 
462 /* Print the text of a single line LINE,
463    flagging it with the characters in LINE_FLAG (which say whether
464    the line is inserted, deleted, changed, etc.).  */
465 
466 void
print_1_line(line_flag,line)467 print_1_line (line_flag, line)
468      char const *line_flag;
469      char const * const *line;
470 {
471   char const *text = line[0], *limit = line[1]; /* Help the compiler.  */
472   FILE *out = outfile; /* Help the compiler some more.  */
473   char const *flag_format = 0;
474 
475   /* If -T was specified, use a Tab between the line-flag and the text.
476      Otherwise use a Space (as Unix diff does).
477      Print neither space nor tab if line-flags are empty.  */
478 
479   if (line_flag && *line_flag)
480     {
481       flag_format = tab_align_flag ? "%s\t" : "%s ";
482       fprintf (out, flag_format, line_flag);
483     }
484 
485   output_1_line (text, limit, flag_format, line_flag);
486 
487   if ((!line_flag || line_flag[0]) && limit[-1] != '\n')
488     fputc ('\n', out);
489 }
490 
491 /* Output a line from TEXT up to LIMIT.  Without -t, output verbatim.
492    With -t, expand white space characters to spaces, and if FLAG_FORMAT
493    is nonzero, output it with argument LINE_FLAG after every
494    internal carriage return, so that tab stops continue to line up.  */
495 
496 void
output_1_line(text,limit,flag_format,line_flag)497 output_1_line (text, limit, flag_format, line_flag)
498      char const *text, *limit, *flag_format, *line_flag;
499 {
500   if (!tab_expand_flag)
501     fwrite (text, sizeof (char), limit - text, outfile);
502   else
503     {
504       register FILE *out = outfile;
505       register unsigned char c;
506       register char const *t = text;
507       register unsigned column = 0;
508 
509       while (t < limit)
510 	switch ((c = *t++))
511 	  {
512 	  case '\t':
513 	    {
514 	      unsigned spaces = TAB_WIDTH - column % TAB_WIDTH;
515 	      column += spaces;
516 	      do
517 		putc (' ', out);
518 	      while (--spaces);
519 	    }
520 	    break;
521 
522 	  case '\r':
523 	    putc (c, out);
524 	    if (flag_format && t < limit && *t != '\n')
525 	      fprintf (out, flag_format, line_flag);
526 	    column = 0;
527 	    break;
528 
529 	  case '\b':
530 	    if (column == 0)
531 	      continue;
532 	    column--;
533 	    putc (c, out);
534 	    break;
535 
536 	  default:
537 	    if (ISPRINT (c))
538 	      column++;
539 	    putc (c, out);
540 	    break;
541 	  }
542     }
543 }
544 
545 int
change_letter(inserts,deletes)546 change_letter (inserts, deletes)
547      int inserts, deletes;
548 {
549   if (!inserts)
550     return 'd';
551   else if (!deletes)
552     return 'a';
553   else
554     return 'c';
555 }
556 
557 /* Translate an internal line number (an index into diff's table of lines)
558    into an actual line number in the input file.
559    The internal line number is LNUM.  FILE points to the data on the file.
560 
561    Internal line numbers count from 0 starting after the prefix.
562    Actual line numbers count from 1 within the entire file.  */
563 
564 int
translate_line_number(file,lnum)565 translate_line_number (file, lnum)
566      struct file_data const *file;
567      int lnum;
568 {
569   return lnum + file->prefix_lines + 1;
570 }
571 
572 void
translate_range(file,a,b,aptr,bptr)573 translate_range (file, a, b, aptr, bptr)
574      struct file_data const *file;
575      int a, b;
576      int *aptr, *bptr;
577 {
578   *aptr = translate_line_number (file, a - 1) + 1;
579   *bptr = translate_line_number (file, b + 1) - 1;
580 }
581 
582 /* Print a pair of line numbers with SEPCHAR, translated for file FILE.
583    If the two numbers are identical, print just one number.
584 
585    Args A and B are internal line numbers.
586    We print the translated (real) line numbers.  */
587 
588 void
print_number_range(sepchar,file,a,b)589 print_number_range (sepchar, file, a, b)
590      int sepchar;
591      struct file_data *file;
592      int a, b;
593 {
594   int trans_a, trans_b;
595   translate_range (file, a, b, &trans_a, &trans_b);
596 
597   /* Note: we can have B < A in the case of a range of no lines.
598      In this case, we should print the line number before the range,
599      which is B.  */
600   if (trans_b > trans_a)
601     fprintf (outfile, "%d%c%d", trans_a, sepchar, trans_b);
602   else
603     fprintf (outfile, "%d", trans_b);
604 }
605 
606 /* Look at a hunk of edit script and report the range of lines in each file
607    that it applies to.  HUNK is the start of the hunk, which is a chain
608    of `struct change'.  The first and last line numbers of file 0 are stored in
609    *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1.
610    Note that these are internal line numbers that count from 0.
611 
612    If no lines from file 0 are deleted, then FIRST0 is LAST0+1.
613 
614    Also set *DELETES nonzero if any lines of file 0 are deleted
615    and set *INSERTS nonzero if any lines of file 1 are inserted.
616    If only ignorable lines are inserted or deleted, both are
617    set to 0.  */
618 
619 void
analyze_hunk(hunk,first0,last0,first1,last1,deletes,inserts)620 analyze_hunk (hunk, first0, last0, first1, last1, deletes, inserts)
621      struct change *hunk;
622      int *first0, *last0, *first1, *last1;
623      int *deletes, *inserts;
624 {
625   int l0, l1, show_from, show_to;
626   int i;
627   int trivial = ignore_blank_lines_flag || ignore_regexp_list;
628   struct change *next;
629 
630   show_from = show_to = 0;
631 
632   *first0 = hunk->line0;
633   *first1 = hunk->line1;
634 
635   next = hunk;
636   do
637     {
638       l0 = next->line0 + next->deleted - 1;
639       l1 = next->line1 + next->inserted - 1;
640       show_from += next->deleted;
641       show_to += next->inserted;
642 
643       for (i = next->line0; i <= l0 && trivial; i++)
644 	if (!ignore_blank_lines_flag || files[0].linbuf[i][0] != '\n')
645 	  {
646 	    struct regexp_list *r;
647 	    char const *line = files[0].linbuf[i];
648 	    int len = files[0].linbuf[i + 1] - line;
649 
650 	    for (r = ignore_regexp_list; r; r = r->next)
651 	      if (0 <= re_search (&r->buf, line, len, 0, len, 0))
652 		break;	/* Found a match.  Ignore this line.  */
653 	    /* If we got all the way through the regexp list without
654 	       finding a match, then it's nontrivial.  */
655 	    if (!r)
656 	      trivial = 0;
657 	  }
658 
659       for (i = next->line1; i <= l1 && trivial; i++)
660 	if (!ignore_blank_lines_flag || files[1].linbuf[i][0] != '\n')
661 	  {
662 	    struct regexp_list *r;
663 	    char const *line = files[1].linbuf[i];
664 	    int len = files[1].linbuf[i + 1] - line;
665 
666 	    for (r = ignore_regexp_list; r; r = r->next)
667 	      if (0 <= re_search (&r->buf, line, len, 0, len, 0))
668 		break;	/* Found a match.  Ignore this line.  */
669 	    /* If we got all the way through the regexp list without
670 	       finding a match, then it's nontrivial.  */
671 	    if (!r)
672 	      trivial = 0;
673 	  }
674     }
675   while ((next = next->link) != 0);
676 
677   *last0 = l0;
678   *last1 = l1;
679 
680   /* If all inserted or deleted lines are ignorable,
681      tell the caller to ignore this hunk.  */
682 
683   if (trivial)
684     show_from = show_to = 0;
685 
686   *deletes = show_from;
687   *inserts = show_to;
688 }
689 
690 /* malloc a block of memory, with fatal error message if we can't do it. */
691 
692 VOID *
xmalloc(size)693 xmalloc (size)
694      size_t size;
695 {
696   register VOID *value;
697 
698   if (size == 0)
699     size = 1;
700 
701   value = (VOID *) malloc (size);
702 
703   if (!value)
704     fatal ("memory exhausted");
705   return value;
706 }
707 
708 /* realloc a block of memory, with fatal error message if we can't do it. */
709 
710 VOID *
xrealloc(old,size)711 xrealloc (old, size)
712      VOID *old;
713      size_t size;
714 {
715   register VOID *value;
716 
717   if (size == 0)
718     size = 1;
719 
720   value = (VOID *) realloc (old, size);
721 
722   if (!value)
723     fatal ("memory exhausted");
724   return value;
725 }
726 
727 /* Concatenate three strings, returning a newly malloc'd string.  */
728 
729 char *
concat(s1,s2,s3)730 concat (s1, s2, s3)
731      char const *s1, *s2, *s3;
732 {
733   size_t len = strlen (s1) + strlen (s2) + strlen (s3);
734   char *new = xmalloc (len + 1);
735   sprintf (new, "%s%s%s", s1, s2, s3);
736   return new;
737 }
738 
739 /* Yield the newly malloc'd pathname
740    of the file in DIR whose filename is FILE.  */
741 
742 char *
dir_file_pathname(dir,file)743 dir_file_pathname (dir, file)
744      char const *dir, *file;
745 {
746   char const *p = filename_lastdirchar (dir);
747   return concat (dir, "/" + (p && !p[1]), file);
748 }
749 
750 void
debug_script(sp)751 debug_script (sp)
752      struct change *sp;
753 {
754   fflush (stdout);
755   for (; sp; sp = sp->link)
756     fprintf (stderr, "%3d %3d delete %d insert %d\n",
757 	     sp->line0, sp->line1, sp->deleted, sp->inserted);
758   fflush (stderr);
759 }
760