xref: /netbsd-src/external/gpl2/diffutils/dist/src/util.c (revision 372807758056e79bf39280a322290dcebdfd4c30)
1 /*	$NetBSD: util.c,v 1.2 2020/12/13 00:04:40 roy Exp $	*/
2 
3 /* Support routines for GNU DIFF.
4 
5    Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002
6    Free Software Foundation, Inc.
7 
8    This file is part of GNU DIFF.
9 
10    GNU DIFF is free software; you can redistribute it and/or modify
11    it under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 2, or (at your option)
13    any later version.
14 
15    GNU DIFF is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; see the file COPYING.
22    If not, write to the Free Software Foundation,
23    59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
24 
25 #include "diff.h"
26 #include <dirname.h>
27 #include <error.h>
28 #include <quotesys.h>
29 #include <regex.h>
30 #include <xalloc.h>
31 
32 char const pr_program[] = PR_PROGRAM;
33 
34 /* Queue up one-line messages to be printed at the end,
35    when -l is specified.  Each message is recorded with a `struct msg'.  */
36 
37 struct msg
38 {
39   struct msg *next;
40   char args[1]; /* Format + 4 args, each '\0' terminated, concatenated.  */
41 };
42 
43 /* Head of the chain of queues messages.  */
44 
45 static struct msg *msg_chain;
46 
47 /* Tail of the chain of queues messages.  */
48 
49 static struct msg **msg_chain_end = &msg_chain;
50 
51 /* Use when a system call returns non-zero status.
52    NAME should normally be the file name.  */
53 
54 void
perror_with_name(char const * name)55 perror_with_name (char const *name)
56 {
57   error (0, errno, "%s", name);
58 }
59 
60 /* Use when a system call returns non-zero status and that is fatal.  */
61 
62 void
pfatal_with_name(char const * name)63 pfatal_with_name (char const *name)
64 {
65   int e = errno;
66   print_message_queue ();
67   error (EXIT_TROUBLE, e, "%s", name);
68   abort ();
69 }
70 
71 /* Print an error message containing MSGID, then exit.  */
72 
73 void
fatal(char const * msgid)74 fatal (char const *msgid)
75 {
76   print_message_queue ();
77   error (EXIT_TROUBLE, 0, "%s", _(msgid));
78   abort ();
79 }
80 
81 /* Like printf, except if -l in effect then save the message and print later.
82    This is used for things like "Only in ...".  */
83 
84 void
message(char const * format_msgid,char const * arg1,char const * arg2)85 message (char const *format_msgid, char const *arg1, char const *arg2)
86 {
87   message5 (format_msgid, arg1, arg2, 0, 0);
88 }
89 
90 void
message5(char const * format_msgid,char const * arg1,char const * arg2,char const * arg3,char const * arg4)91 message5 (char const *format_msgid, char const *arg1, char const *arg2,
92 	  char const *arg3, char const *arg4)
93 {
94   if (paginate)
95     {
96       char *p;
97       char const *arg[5];
98       int i;
99       size_t size[5];
100       size_t total_size = offsetof (struct msg, args);
101       struct msg *new;
102 
103       arg[0] = format_msgid;
104       arg[1] = arg1;
105       arg[2] = arg2;
106       arg[3] = arg3 ? arg3 : "";
107       arg[4] = arg4 ? arg4 : "";
108 
109       for (i = 0;  i < 5;  i++)
110 	total_size += size[i] = strlen (arg[i]) + 1;
111 
112       new = xmalloc (total_size);
113 
114       for (i = 0, p = new->args;  i < 5;  p += size[i++])
115 	memcpy (p, arg[i], size[i]);
116 
117       *msg_chain_end = new;
118       new->next = 0;
119       msg_chain_end = &new->next;
120     }
121   else
122     {
123       if (sdiff_merge_assist)
124 	putchar (' ');
125       printf (_(format_msgid), arg1, arg2, arg3, arg4);
126     }
127 }
128 
129 /* Output all the messages that were saved up by calls to `message'.  */
130 
131 void
print_message_queue(void)132 print_message_queue (void)
133 {
134   char const *arg[5];
135   int i;
136   struct msg *m = msg_chain;
137 
138   while (m)
139     {
140       struct msg *next = m->next;
141       arg[0] = m->args;
142       for (i = 0;  i < 4;  i++)
143 	arg[i + 1] = arg[i] + strlen (arg[i]) + 1;
144       printf (_(arg[0]), arg[1], arg[2], arg[3], arg[4]);
145       free (m);
146       m = next;
147     }
148 }
149 
150 /* Call before outputting the results of comparing files NAME0 and NAME1
151    to set up OUTFILE, the stdio stream for the output to go to.
152 
153    Usually, OUTFILE is just stdout.  But when -l was specified
154    we fork off a `pr' and make OUTFILE a pipe to it.
155    `pr' then outputs to our stdout.  */
156 
157 static char const *current_name0;
158 static char const *current_name1;
159 static bool currently_recursive;
160 
161 void
setup_output(char const * name0,char const * name1,bool recursive)162 setup_output (char const *name0, char const *name1, bool recursive)
163 {
164   current_name0 = name0;
165   current_name1 = name1;
166   currently_recursive = recursive;
167   outfile = 0;
168 }
169 
170 #if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
171 static pid_t pr_pid;
172 #endif
173 
174 void
begin_output(void)175 begin_output (void)
176 {
177   char *name;
178 
179   if (outfile != 0)
180     return;
181 
182   /* Construct the header of this piece of diff.  */
183   name = xmalloc (strlen (current_name0) + strlen (current_name1)
184 		  + strlen (switch_string) + 7);
185 
186   /* POSIX 1003.1-2001 specifies this format.  But there are some bugs in
187      the standard: it says that we must print only the last component
188      of the pathnames, and it requires two spaces after "diff" if
189      there are no options.  These requirements are silly and do not
190      match historical practice.  */
191   sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1);
192 
193   if (paginate)
194     {
195       if (fflush (stdout) != 0)
196 	pfatal_with_name (_("write failed"));
197 
198       /* Make OUTFILE a pipe to a subsidiary `pr'.  */
199       {
200 #if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
201 	int pipes[2];
202 
203 	if (pipe (pipes) != 0)
204 	  pfatal_with_name ("pipe");
205 
206 	pr_pid = vfork ();
207 	if (pr_pid < 0)
208 	  pfatal_with_name ("fork");
209 
210 	if (pr_pid == 0)
211 	  {
212 	    close (pipes[1]);
213 	    if (pipes[0] != STDIN_FILENO)
214 	      {
215 		if (dup2 (pipes[0], STDIN_FILENO) < 0)
216 		  pfatal_with_name ("dup2");
217 		close (pipes[0]);
218 	      }
219 
220 	    execl (pr_program, pr_program, "-h", name, NULL);
221 	    _exit (errno == ENOEXEC ? 126 : 127);
222 	  }
223 	else
224 	  {
225 	    close (pipes[0]);
226 	    outfile = fdopen (pipes[1], "w");
227 	    if (!outfile)
228 	      pfatal_with_name ("fdopen");
229 	  }
230 #else
231 	char *command = xmalloc (sizeof pr_program - 1 + 7
232 				 + quote_system_arg ((char *) 0, name) + 1);
233 	char *p;
234 	sprintf (command, "%s -f -h ", pr_program);
235 	p = command + sizeof pr_program - 1 + 7;
236 	p += quote_system_arg (p, name);
237 	*p = 0;
238 	errno = 0;
239 	outfile = popen (command, "w");
240 	if (!outfile)
241 	  pfatal_with_name (command);
242 	free (command);
243 #endif
244       }
245     }
246   else
247     {
248 
249       /* If -l was not specified, output the diff straight to `stdout'.  */
250 
251       outfile = stdout;
252 
253       /* If handling multiple files (because scanning a directory),
254 	 print which files the following output is about.  */
255       if (currently_recursive)
256 	printf ("%s\n", name);
257     }
258 
259   free (name);
260 
261   /* A special header is needed at the beginning of context output.  */
262   switch (output_style)
263     {
264     case OUTPUT_CONTEXT:
265       print_context_header (files, 0);
266       break;
267 
268     case OUTPUT_UNIFIED:
269       print_context_header (files, 1);
270       break;
271 
272     default:
273       break;
274     }
275 }
276 
277 /* Call after the end of output of diffs for one file.
278    Close OUTFILE and get rid of the `pr' subfork.  */
279 
280 void
finish_output(void)281 finish_output (void)
282 {
283   if (outfile != 0 && outfile != stdout)
284     {
285       int wstatus;
286       int werrno = 0;
287       if (ferror (outfile))
288 	fatal ("write failed");
289 #if ! (HAVE_WORKING_FORK || HAVE_WORKING_VFORK)
290       wstatus = pclose (outfile);
291       if (wstatus == -1)
292 	werrno = errno;
293 #else
294       if (fclose (outfile) != 0)
295 	pfatal_with_name (_("write failed"));
296       if (waitpid (pr_pid, &wstatus, 0) < 0)
297 	pfatal_with_name ("waitpid");
298 #endif
299       if (! werrno && WIFEXITED (wstatus) && WEXITSTATUS (wstatus) == 127)
300 	error (EXIT_TROUBLE, 0, _("subsidiary program `%s' not found"),
301 	       pr_program);
302       if (wstatus != 0)
303 	error (EXIT_TROUBLE, werrno, _("subsidiary program `%s' failed"),
304 	       pr_program);
305     }
306 
307   outfile = 0;
308 }
309 
310 /* Compare two lines (typically one from each input file)
311    according to the command line options.
312    For efficiency, this is invoked only when the lines do not match exactly
313    but an option like -i might cause us to ignore the difference.
314    Return nonzero if the lines differ.  */
315 
316 bool
lines_differ(char const * s1,char const * s2)317 lines_differ (char const *s1, char const *s2)
318 {
319   register unsigned char const *t1 = (unsigned char const *) s1;
320   register unsigned char const *t2 = (unsigned char const *) s2;
321   size_t column = 0;
322 
323   while (1)
324     {
325       register unsigned char c1 = *t1++;
326       register unsigned char c2 = *t2++;
327 
328       /* Test for exact char equality first, since it's a common case.  */
329       if (c1 != c2)
330 	{
331 	  switch (ignore_white_space)
332 	    {
333 	    case IGNORE_ALL_SPACE:
334 	      /* For -w, just skip past any white space.  */
335 	      while (ISSPACE (c1) && c1 != '\n') c1 = *t1++;
336 	      while (ISSPACE (c2) && c2 != '\n') c2 = *t2++;
337 	      break;
338 
339 	    case IGNORE_SPACE_CHANGE:
340 	      /* For -b, advance past any sequence of white space in
341 		 line 1 and consider it just one space, or nothing at
342 		 all if it is at the end of the line.  */
343 	      if (ISSPACE (c1))
344 		{
345 		  while (c1 != '\n')
346 		    {
347 		      c1 = *t1++;
348 		      if (! ISSPACE (c1))
349 			{
350 			  --t1;
351 			  c1 = ' ';
352 			  break;
353 			}
354 		    }
355 		}
356 
357 	      /* Likewise for line 2.  */
358 	      if (ISSPACE (c2))
359 		{
360 		  while (c2 != '\n')
361 		    {
362 		      c2 = *t2++;
363 		      if (! ISSPACE (c2))
364 			{
365 			  --t2;
366 			  c2 = ' ';
367 			  break;
368 			}
369 		    }
370 		}
371 
372 	      if (c1 != c2)
373 		{
374 		  /* If we went too far when doing the simple test
375 		     for equality, go back to the first non-white-space
376 		     character in both sides and try again.  */
377 		  if (c2 == ' ' && c1 != '\n'
378 		      && (unsigned char const *) s1 + 1 < t1
379 		      && ISSPACE (t1[-2]))
380 		    {
381 		      --t1;
382 		      continue;
383 		    }
384 		  if (c1 == ' ' && c2 != '\n'
385 		      && (unsigned char const *) s2 + 1 < t2
386 		      && ISSPACE (t2[-2]))
387 		    {
388 		      --t2;
389 		      continue;
390 		    }
391 		}
392 
393 	      break;
394 
395 	    case IGNORE_TAB_EXPANSION:
396 	      if ((c1 == ' ' && c2 == '\t')
397 		  || (c1 == '\t' && c2 == ' '))
398 		{
399 		  size_t column2 = column;
400 		  for (;; c1 = *t1++)
401 		    {
402 		      if (c1 == ' ')
403 			column++;
404 		      else if (c1 == '\t')
405 			column += TAB_WIDTH - column % TAB_WIDTH;
406 		      else
407 			break;
408 		    }
409 		  for (;; c2 = *t2++)
410 		    {
411 		      if (c2 == ' ')
412 			column2++;
413 		      else if (c2 == '\t')
414 			column2 += TAB_WIDTH - column2 % TAB_WIDTH;
415 		      else
416 			break;
417 		    }
418 		  if (column != column2)
419 		    return 1;
420 		}
421 	      break;
422 
423 	    case IGNORE_NO_WHITE_SPACE:
424 	      break;
425 	    }
426 
427 	  /* Lowercase all letters if -i is specified.  */
428 
429 	  if (ignore_case)
430 	    {
431 	      c1 = TOLOWER (c1);
432 	      c2 = TOLOWER (c2);
433 	    }
434 
435 	  if (c1 != c2)
436 	    break;
437 	}
438       if (c1 == '\n')
439 	return 0;
440 
441       column += c1 == '\t' ? TAB_WIDTH - column % TAB_WIDTH : 1;
442     }
443 
444   return 1;
445 }
446 
447 /* Find the consecutive changes at the start of the script START.
448    Return the last link before the first gap.  */
449 
450 struct change *
find_change(struct change * start)451 find_change (struct change *start)
452 {
453   return start;
454 }
455 
456 struct change *
find_reverse_change(struct change * start)457 find_reverse_change (struct change *start)
458 {
459   return start;
460 }
461 
462 /* Divide SCRIPT into pieces by calling HUNKFUN and
463    print each piece with PRINTFUN.
464    Both functions take one arg, an edit script.
465 
466    HUNKFUN is called with the tail of the script
467    and returns the last link that belongs together with the start
468    of the tail.
469 
470    PRINTFUN takes a subscript which belongs together (with a null
471    link at the end) and prints it.  */
472 
473 void
print_script(struct change * script,struct change * (* hunkfun)(struct change *),void (* printfun)(struct change *))474 print_script (struct change *script,
475 	      struct change * (*hunkfun) (struct change *),
476 	      void (*printfun) (struct change *))
477 {
478   struct change *next = script;
479 
480   while (next)
481     {
482       struct change *this, *end;
483 
484       /* Find a set of changes that belong together.  */
485       this = next;
486       end = (*hunkfun) (next);
487 
488       /* Disconnect them from the rest of the changes,
489 	 making them a hunk, and remember the rest for next iteration.  */
490       next = end->link;
491       end->link = 0;
492 #ifdef DEBUG
493       debug_script (this);
494 #endif
495 
496       /* Print this hunk.  */
497       (*printfun) (this);
498 
499       /* Reconnect the script so it will all be freed properly.  */
500       end->link = next;
501     }
502 }
503 
504 /* Print the text of a single line LINE,
505    flagging it with the characters in LINE_FLAG (which say whether
506    the line is inserted, deleted, changed, etc.).  */
507 
508 void
print_1_line(char const * line_flag,char const * const * line)509 print_1_line (char const *line_flag, char const *const *line)
510 {
511   char const *base = line[0], *limit = line[1]; /* Help the compiler.  */
512   FILE *out = outfile; /* Help the compiler some more.  */
513   char const *flag_format = 0;
514 
515   /* If -T was specified, use a Tab between the line-flag and the text.
516      Otherwise use a Space (as Unix diff does).
517      Print neither space nor tab if line-flags are empty.  */
518 
519   if (line_flag && *line_flag)
520     {
521       flag_format = initial_tab ? "%s\t" : "%s ";
522       fprintf (out, flag_format, line_flag);
523     }
524 
525   output_1_line (base, limit, flag_format, line_flag);
526 
527   if ((!line_flag || line_flag[0]) && limit[-1] != '\n')
528     fprintf (out, "\n\\ %s\n", _("No newline at end of file"));
529 }
530 
531 /* Output a line from BASE up to LIMIT.
532    With -t, expand white space characters to spaces, and if FLAG_FORMAT
533    is nonzero, output it with argument LINE_FLAG after every
534    internal carriage return, so that tab stops continue to line up.  */
535 
536 void
output_1_line(char const * base,char const * limit,char const * flag_format,char const * line_flag)537 output_1_line (char const *base, char const *limit, char const *flag_format,
538 	       char const *line_flag)
539 {
540   if (!expand_tabs)
541     fwrite (base, limit - base, 1, outfile);
542   else
543     {
544       register FILE *out = outfile;
545       register unsigned char c;
546       register char const *t = base;
547       register unsigned int column = 0;
548 
549       while (t < limit)
550 	switch ((c = *t++))
551 	  {
552 	  case '\t':
553 	    {
554 	      unsigned int spaces = TAB_WIDTH - column % TAB_WIDTH;
555 	      column += spaces;
556 	      do
557 		putc (' ', out);
558 	      while (--spaces);
559 	    }
560 	    break;
561 
562 	  case '\r':
563 	    putc (c, out);
564 	    if (flag_format && t < limit && *t != '\n')
565 	      fprintf (out, flag_format, line_flag);
566 	    column = 0;
567 	    break;
568 
569 	  case '\b':
570 	    if (column == 0)
571 	      continue;
572 	    column--;
573 	    putc (c, out);
574 	    break;
575 
576 	  default:
577 	    if (ISPRINT (c))
578 	      column++;
579 	    putc (c, out);
580 	    break;
581 	  }
582     }
583 }
584 
585 char const change_letter[] = { 0, 'd', 'a', 'c' };
586 
587 /* Translate an internal line number (an index into diff's table of lines)
588    into an actual line number in the input file.
589    The internal line number is I.  FILE points to the data on the file.
590 
591    Internal line numbers count from 0 starting after the prefix.
592    Actual line numbers count from 1 within the entire file.  */
593 
594 lin
translate_line_number(struct file_data const * file,lin i)595 translate_line_number (struct file_data const *file, lin i)
596 {
597   return i + file->prefix_lines + 1;
598 }
599 
600 /* Translate a line number range.  This is always done for printing,
601    so for convenience translate to long rather than lin, so that the
602    caller can use printf with "%ld" without casting.  */
603 
604 void
translate_range(struct file_data const * file,lin a,lin b,long * aptr,long * bptr)605 translate_range (struct file_data const *file,
606 		 lin a, lin b,
607 		 long *aptr, long *bptr)
608 {
609   *aptr = translate_line_number (file, a - 1) + 1;
610   *bptr = translate_line_number (file, b + 1) - 1;
611 }
612 
613 /* Print a pair of line numbers with SEPCHAR, translated for file FILE.
614    If the two numbers are identical, print just one number.
615 
616    Args A and B are internal line numbers.
617    We print the translated (real) line numbers.  */
618 
619 void
print_number_range(char sepchar,struct file_data * file,lin a,lin b)620 print_number_range (char sepchar, struct file_data *file, lin a, lin b)
621 {
622   long trans_a, trans_b;
623   translate_range (file, a, b, &trans_a, &trans_b);
624 
625   /* Note: we can have B < A in the case of a range of no lines.
626      In this case, we should print the line number before the range,
627      which is B.  */
628   if (trans_b > trans_a)
629     fprintf (outfile, "%ld%c%ld", trans_a, sepchar, trans_b);
630   else
631     fprintf (outfile, "%ld", trans_b);
632 }
633 
634 /* Look at a hunk of edit script and report the range of lines in each file
635    that it applies to.  HUNK is the start of the hunk, which is a chain
636    of `struct change'.  The first and last line numbers of file 0 are stored in
637    *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1.
638    Note that these are internal line numbers that count from 0.
639 
640    If no lines from file 0 are deleted, then FIRST0 is LAST0+1.
641 
642    Return UNCHANGED if only ignorable lines are inserted or deleted,
643    OLD if lines of file 0 are deleted,
644    NEW if lines of file 1 are inserted,
645    and CHANGED if both kinds of changes are found. */
646 
647 enum changes
analyze_hunk(struct change * hunk,lin * first0,lin * last0,lin * first1,lin * last1)648 analyze_hunk (struct change *hunk,
649 	      lin *first0, lin *last0,
650 	      lin *first1, lin *last1)
651 {
652   struct change *next;
653   lin l0, l1;
654   lin show_from, show_to;
655   lin i;
656   bool trivial = ignore_blank_lines || ignore_regexp.fastmap;
657   size_t trivial_length = (int) ignore_blank_lines - 1;
658     /* If 0, ignore zero-length lines;
659        if SIZE_MAX, do not ignore lines just because of their length.  */
660 
661   char const * const *linbuf0 = files[0].linbuf;  /* Help the compiler.  */
662   char const * const *linbuf1 = files[1].linbuf;
663 
664   show_from = show_to = 0;
665 
666   *first0 = hunk->line0;
667   *first1 = hunk->line1;
668 
669   next = hunk;
670   do
671     {
672       l0 = next->line0 + next->deleted - 1;
673       l1 = next->line1 + next->inserted - 1;
674       show_from += next->deleted;
675       show_to += next->inserted;
676 
677       for (i = next->line0; i <= l0 && trivial; i++)
678 	{
679 	  char const *line = linbuf0[i];
680 	  size_t len = linbuf0[i + 1] - line - 1;
681 	  if (len != trivial_length
682 	      && (! ignore_regexp.fastmap
683 		  || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
684 	    trivial = 0;
685 	}
686 
687       for (i = next->line1; i <= l1 && trivial; i++)
688 	{
689 	  char const *line = linbuf1[i];
690 	  size_t len = linbuf1[i + 1] - line - 1;
691 	  if (len != trivial_length
692 	      && (! ignore_regexp.fastmap
693 		  || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
694 	    trivial = 0;
695 	}
696     }
697   while ((next = next->link) != 0);
698 
699   *last0 = l0;
700   *last1 = l1;
701 
702   /* If all inserted or deleted lines are ignorable,
703      tell the caller to ignore this hunk.  */
704 
705   if (trivial)
706     return UNCHANGED;
707 
708   return (show_from ? OLD : UNCHANGED) | (show_to ? NEW : UNCHANGED);
709 }
710 
711 /* Concatenate three strings, returning a newly malloc'd string.  */
712 
713 char *
concat(char const * s1,char const * s2,char const * s3)714 concat (char const *s1, char const *s2, char const *s3)
715 {
716   char *new = xmalloc (strlen (s1) + strlen (s2) + strlen (s3) + 1);
717   sprintf (new, "%s%s%s", s1, s2, s3);
718   return new;
719 }
720 
721 /* Yield a new block of SIZE bytes, initialized to zero.  */
722 
723 void *
zalloc(size_t size)724 zalloc (size_t size)
725 {
726   void *p = xmalloc (size);
727   memset (p, 0, size);
728   return p;
729 }
730 
731 /* Yield the newly malloc'd pathname
732    of the file in DIR whose filename is FILE.  */
733 
734 char *
dir_file_pathname(char const * dir,char const * file)735 dir_file_pathname (char const *dir, char const *file)
736 {
737   char const *base = base_name (dir);
738   bool omit_slash = !*base || base[strlen (base) - 1] == '/';
739   return concat (dir, "/" + omit_slash, file);
740 }
741 
742 void
debug_script(struct change * sp)743 debug_script (struct change *sp)
744 {
745   fflush (stdout);
746 
747   for (; sp; sp = sp->link)
748     {
749       long line0 = sp->line0;
750       long line1 = sp->line1;
751       long deleted = sp->deleted;
752       long inserted = sp->inserted;
753       fprintf (stderr, "%3ld %3ld delete %ld insert %ld\n",
754 	       line0, line1, deleted, inserted);
755     }
756 
757   fflush (stderr);
758 }
759