xref: /netbsd-src/external/gpl2/grep/dist/src/ansi2knr.c (revision fb69a85ab0bac94047f5be60bf0f33641f617669)
1 /*	$NetBSD: ansi2knr.c,v 1.2 2016/01/10 22:16:40 christos Exp $	*/
2 
3 /* Copyright (C) 1989, 1997, 1998, 1999 Aladdin Enterprises.  All rights reserved. */
4 
5 /*Id: ansi2knr.c,v 1.1 2000/01/21 00:54:15 alainm Exp */
6 /* Convert ANSI C function definitions to K&R ("traditional C") syntax */
7 
8 /*
9 ansi2knr is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY.  No author or distributor accepts responsibility to anyone for the
11 consequences of using it or for whether it serves any particular purpose or
12 works at all, unless he says so in writing.  Refer to the GNU General Public
13 License (the "GPL") for full details.
14 
15 Everyone is granted permission to copy, modify and redistribute ansi2knr,
16 but only under the conditions described in the GPL.  A copy of this license
17 is supposed to have been given to you along with ansi2knr so you can know
18 your rights and responsibilities.  It should be in a file named COPYLEFT,
19 or, if there is no file named COPYLEFT, a file named COPYING.  Among other
20 things, the copyright notice and this notice must be preserved on all
21 copies.
22 
23 We explicitly state here what we believe is already implied by the GPL: if
24 the ansi2knr program is distributed as a separate set of sources and a
25 separate executable file which are aggregated on a storage medium together
26 with another program, this in itself does not bring the other program under
27 the GPL, nor does the mere fact that such a program or the procedures for
28 constructing it invoke the ansi2knr executable bring any other part of the
29 program under the GPL.
30 */
31 
32 /*
33  * Usage:
34 	ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]
35  * --filename provides the file name for the #line directive in the output,
36  * overriding input_file (if present).
37  * If no input_file is supplied, input is read from stdin.
38  * If no output_file is supplied, output goes to stdout.
39  * There are no error messages.
40  *
41  * ansi2knr recognizes function definitions by seeing a non-keyword
42  * identifier at the left margin, followed by a left parenthesis,
43  * with a right parenthesis as the last character on the line,
44  * and with a left brace as the first token on the following line
45  * (ignoring possible intervening comments), except that a line
46  * consisting of only
47  *	identifier1(identifier2)
48  * will not be considered a function definition unless identifier2 is
49  * the word "void", and a line consisting of
50  *	identifier1(identifier2, <<arbitrary>>)
51  * will not be considered a function definition.
52  * ansi2knr will recognize a multi-line header provided
53  * that no intervening line ends with a left or right brace or a semicolon.
54  * These algorithms ignore whitespace and comments, except that
55  * the function name must be the first thing on the line.
56  * The following constructs will confuse it:
57  *	- Any other construct that starts at the left margin and
58  *	    follows the above syntax (such as a macro or function call).
59  *	- Some macros that tinker with the syntax of function headers.
60  */
61 
62 /*
63  * The original and principal author of ansi2knr is L. Peter Deutsch
64  * <ghost@aladdin.com>.  Other authors are noted in the change history
65  * that follows (in reverse chronological order):
66 	lpd 1999-04-12 added minor fixes from Pavel Roskin
67 		<pavel_roskin@geocities.com> for clean compilation with
68 		gcc -W -Wall
69 	lpd 1999-03-22 added hack to recognize lines consisting of
70 		identifier1(identifier2, xxx) as *not* being procedures
71 	lpd 1999-02-03 made indentation of preprocessor commands consistent
72 	lpd 1999-01-28 fixed two bugs: a '/' in an argument list caused an
73 		endless loop; quoted strings within an argument list
74 		confused the parser
75 	lpd 1999-01-24 added a check for write errors on the output,
76 		suggested by Jim Meyering <meyering@ascend.com>
77 	lpd 1998-11-09 added further hack to recognize identifier(void)
78 		as being a procedure
79 	lpd 1998-10-23 added hack to recognize lines consisting of
80 		identifier1(identifier2) as *not* being procedures
81 	lpd 1997-12-08 made input_file optional; only closes input and/or
82 		output file if not stdin or stdout respectively; prints
83 		usage message on stderr rather than stdout; adds
84 		--filename switch (changes suggested by
85 		<ceder@lysator.liu.se>)
86 	lpd 1996-01-21 added code to cope with not HAVE_CONFIG_H and with
87 		compilers that don't understand void, as suggested by
88 		Tom Lane
89 	lpd 1996-01-15 changed to require that the first non-comment token
90 		on the line following a function header be a left brace,
91 		to reduce sensitivity to macros, as suggested by Tom Lane
92 		<tgl@sss.pgh.pa.us>
93 	lpd 1995-06-22 removed #ifndefs whose sole purpose was to define
94 		undefined preprocessor symbols as 0; changed all #ifdefs
95 		for configuration symbols to #ifs
96 	lpd 1995-04-05 changed copyright notice to make it clear that
97 		including ansi2knr in a program does not bring the entire
98 		program under the GPL
99 	lpd 1994-12-18 added conditionals for systems where ctype macros
100 		don't handle 8-bit characters properly, suggested by
101 		Francois Pinard <pinard@iro.umontreal.ca>;
102 		removed --varargs switch (this is now the default)
103 	lpd 1994-10-10 removed CONFIG_BROKETS conditional
104 	lpd 1994-07-16 added some conditionals to help GNU `configure',
105 		suggested by Francois Pinard <pinard@iro.umontreal.ca>;
106 		properly erase prototype args in function parameters,
107 		contributed by Jim Avera <jima@netcom.com>;
108 		correct error in writeblanks (it shouldn't erase EOLs)
109 	lpd 1989-xx-xx original version
110  */
111 
112 /* Most of the conditionals here are to make ansi2knr work with */
113 /* or without the GNU configure machinery. */
114 
115 #if HAVE_CONFIG_H
116 # include <config.h>
117 #endif
118 
119 #include <stdio.h>
120 #include <ctype.h>
121 
122 #if HAVE_CONFIG_H
123 
124 /*
125    For properly autoconfiguring ansi2knr, use AC_CONFIG_HEADER(config.h).
126    This will define HAVE_CONFIG_H and so, activate the following lines.
127  */
128 
129 # if STDC_HEADERS || HAVE_STRING_H
130 #  include <string.h>
131 # else
132 #  include <strings.h>
133 # endif
134 
135 #else /* not HAVE_CONFIG_H */
136 
137 /* Otherwise do it the hard way */
138 
139 # ifdef BSD
140 #  include <strings.h>
141 # else
142 #  ifdef VMS
143     extern int strlen(), strncmp();
144 #  else
145 #   include <string.h>
146 #  endif
147 # endif
148 
149 #endif /* not HAVE_CONFIG_H */
150 
151 #if STDC_HEADERS
152 # include <stdlib.h>
153 #else
154 /*
155    malloc and free should be declared in stdlib.h,
156    but if you've got a K&R compiler, they probably aren't.
157  */
158 # ifdef MSDOS
159 #  include <malloc.h>
160 # else
161 #  ifdef VMS
162      extern char *malloc();
163      extern void free();
164 #  else
165      extern char *malloc();
166      extern int free();
167 #  endif
168 # endif
169 
170 #endif
171 
172 /* Define NULL (for *very* old compilers). */
173 #ifndef NULL
174 # define NULL (0)
175 #endif
176 
177 /*
178  * The ctype macros don't always handle 8-bit characters correctly.
179  * Compensate for this here.
180  */
181 #ifdef isascii
182 # undef HAVE_ISASCII		/* just in case */
183 # define HAVE_ISASCII 1
184 #else
185 #endif
186 #if STDC_HEADERS || !HAVE_ISASCII
187 # define is_ascii(c) 1
188 #else
189 # define is_ascii(c) isascii(c)
190 #endif
191 
192 #define is_space(c) (is_ascii(c) && isspace(c))
193 #define is_alpha(c) (is_ascii(c) && isalpha(c))
194 #define is_alnum(c) (is_ascii(c) && isalnum(c))
195 
196 /* Scanning macros */
197 #define isidchar(ch) (is_alnum(ch) || (ch) == '_')
198 #define isidfirstchar(ch) (is_alpha(ch) || (ch) == '_')
199 
200 /* Forward references */
201 char *skipspace();
202 char *scanstring();
203 int writeblanks();
204 int test1();
205 int convert1();
206 
207 /* The main program */
208 int
main(argc,argv)209 main(argc, argv)
210     int argc;
211     char *argv[];
212 {	FILE *in = stdin;
213 	FILE *out = stdout;
214 	char *filename = 0;
215 	char *program_name = argv[0];
216 	char *output_name = 0;
217 #define bufsize 5000			/* arbitrary size */
218 	char *buf;
219 	char *line;
220 	char *more;
221 	char *usage =
222 	  "Usage: ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]\n";
223 	/*
224 	 * In previous versions, ansi2knr recognized a --varargs switch.
225 	 * If this switch was supplied, ansi2knr would attempt to convert
226 	 * a ... argument to va_alist and va_dcl; if this switch was not
227 	 * supplied, ansi2knr would simply drop any such arguments.
228 	 * Now, ansi2knr always does this conversion, and we only
229 	 * check for this switch for backward compatibility.
230 	 */
231 	int convert_varargs = 1;
232 	int output_error;
233 
234 	while ( argc > 1 && argv[1][0] == '-' ) {
235 	  if ( !strcmp(argv[1], "--varargs") ) {
236 	    convert_varargs = 1;
237 	    argc--;
238 	    argv++;
239 	    continue;
240 	  }
241 	  if ( !strcmp(argv[1], "--filename") && argc > 2 ) {
242 	    filename = argv[2];
243 	    argc -= 2;
244 	    argv += 2;
245 	    continue;
246 	  }
247 	  fprintf(stderr, "%s: Unrecognized switch: %s\n", program_name,
248 		  argv[1]);
249 	  fprintf(stderr, usage);
250 	  exit(1);
251 	}
252 	switch ( argc )
253 	   {
254 	default:
255 		fprintf(stderr, usage);
256 		exit(0);
257 	case 3:
258 		output_name = argv[2];
259 		out = fopen(output_name, "w");
260 		if ( out == NULL ) {
261 		  fprintf(stderr, "%s: Cannot open output file %s\n",
262 			  program_name, output_name);
263 		  exit(1);
264 		}
265 		/* falls through */
266 	case 2:
267 		in = fopen(argv[1], "r");
268 		if ( in == NULL ) {
269 		  fprintf(stderr, "%s: Cannot open input file %s\n",
270 			  program_name, argv[1]);
271 		  exit(1);
272 		}
273 		if ( filename == 0 )
274 		  filename = argv[1];
275 		/* falls through */
276 	case 1:
277 		break;
278 	   }
279 	if ( filename )
280 	  fprintf(out, "#line 1 \"%s\"\n", filename);
281 	buf = malloc(bufsize);
282 	if ( buf == NULL )
283 	   {
284 		fprintf(stderr, "Unable to allocate read buffer!\n");
285 		exit(1);
286 	   }
287 	line = buf;
288 	while ( fgets(line, (unsigned)(buf + bufsize - line), in) != NULL )
289 	   {
290 test:		line += strlen(line);
291 		switch ( test1(buf) )
292 		   {
293 		case 2:			/* a function header */
294 			convert1(buf, out, 1, convert_varargs);
295 			break;
296 		case 1:			/* a function */
297 			/* Check for a { at the start of the next line. */
298 			more = ++line;
299 f:			if ( line >= buf + (bufsize - 1) ) /* overflow check */
300 			  goto wl;
301 			if ( fgets(line, (unsigned)(buf + bufsize - line), in) == NULL )
302 			  goto wl;
303 			switch ( *skipspace(more, 1) )
304 			  {
305 			  case '{':
306 			    /* Definitely a function header. */
307 			    convert1(buf, out, 0, convert_varargs);
308 			    fputs(more, out);
309 			    break;
310 			  case 0:
311 			    /* The next line was blank or a comment: */
312 			    /* keep scanning for a non-comment. */
313 			    line += strlen(line);
314 			    goto f;
315 			  default:
316 			    /* buf isn't a function header, but */
317 			    /* more might be. */
318 			    fputs(buf, out);
319 			    strcpy(buf, more);
320 			    line = buf;
321 			    goto test;
322 			  }
323 			break;
324 		case -1:		/* maybe the start of a function */
325 			if ( line != buf + (bufsize - 1) ) /* overflow check */
326 			  continue;
327 			/* falls through */
328 		default:		/* not a function */
329 wl:			fputs(buf, out);
330 			break;
331 		   }
332 		line = buf;
333 	   }
334 	if ( line != buf )
335 	  fputs(buf, out);
336 	free(buf);
337 	if ( output_name ) {
338 	  output_error = ferror(out);
339 	  output_error |= fclose(out);
340 	} else {		/* out == stdout */
341 	  fflush(out);
342 	  output_error = ferror(out);
343 	}
344 	if ( output_error ) {
345 	  fprintf(stderr, "%s: error writing to %s\n", program_name,
346 		  (output_name ? output_name : "stdout"));
347 	  exit(1);
348 	}
349 	if ( in != stdin )
350 	  fclose(in);
351 	return 0;
352 }
353 
354 /* Skip over whitespace and comments, in either direction. */
355 char *
skipspace(p,dir)356 skipspace(p, dir)
357     register char *p;
358     register int dir;			/* 1 for forward, -1 for backward */
359 {	for ( ; ; )
360 	   {	while ( is_space(*p) )
361 		  p += dir;
362 		if ( !(*p == '/' && p[dir] == '*') )
363 		  break;
364 		p += dir;  p += dir;
365 		while ( !(*p == '*' && p[dir] == '/') )
366 		   {	if ( *p == 0 )
367 			  return p;	/* multi-line comment?? */
368 			p += dir;
369 		   }
370 		p += dir;  p += dir;
371 	   }
372 	return p;
373 }
374 
375 /* Scan over a quoted string, in either direction. */
376 char *
scanstring(p,dir)377 scanstring(p, dir)
378     register char *p;
379     register int dir;
380 {
381     for (p += dir; ; p += dir)
382 	if (*p == '"' && p[-dir] != '\\')
383 	    return p + dir;
384 }
385 
386 /*
387  * Write blanks over part of a string.
388  * Don't overwrite end-of-line characters.
389  */
390 int
writeblanks(start,end)391 writeblanks(start, end)
392     char *start;
393     char *end;
394 {	char *p;
395 	for ( p = start; p < end; p++ )
396 	  if ( *p != '\r' && *p != '\n' )
397 	    *p = ' ';
398 	return 0;
399 }
400 
401 /*
402  * Test whether the string in buf is a function definition.
403  * The string may contain and/or end with a newline.
404  * Return as follows:
405  *	0 - definitely not a function definition;
406  *	1 - definitely a function definition;
407  *	2 - definitely a function prototype (NOT USED);
408  *	-1 - may be the beginning of a function definition,
409  *		append another line and look again.
410  * The reason we don't attempt to convert function prototypes is that
411  * Ghostscript's declaration-generating macros look too much like
412  * prototypes, and confuse the algorithms.
413  */
414 int
test1(buf)415 test1(buf)
416     char *buf;
417 {	register char *p = buf;
418 	char *bend;
419 	char *endfn;
420 	int contin;
421 
422 	if ( !isidfirstchar(*p) )
423 	  return 0;		/* no name at left margin */
424 	bend = skipspace(buf + strlen(buf) - 1, -1);
425 	switch ( *bend )
426 	   {
427 	   case ';': contin = 0 /*2*/; break;
428 	   case ')': contin = 1; break;
429 	   case '{': return 0;		/* not a function */
430 	   case '}': return 0;		/* not a function */
431 	   default: contin = -1;
432 	   }
433 	while ( isidchar(*p) )
434 	  p++;
435 	endfn = p;
436 	p = skipspace(p, 1);
437 	if ( *p++ != '(' )
438 	  return 0;		/* not a function */
439 	p = skipspace(p, 1);
440 	if ( *p == ')' )
441 	  return 0;		/* no parameters */
442 	/* Check that the apparent function name isn't a keyword. */
443 	/* We only need to check for keywords that could be followed */
444 	/* by a left parenthesis (which, unfortunately, is most of them). */
445 	   {	static char *words[] =
446 		   {	"asm", "auto", "case", "char", "const", "double",
447 			"extern", "float", "for", "if", "int", "long",
448 			"register", "return", "short", "signed", "sizeof",
449 			"static", "switch", "typedef", "unsigned",
450 			"void", "volatile", "while", 0
451 		   };
452 		char **key = words;
453 		char *kp;
454 		unsigned len = endfn - buf;
455 
456 		while ( (kp = *key) != 0 )
457 		   {	if ( strlen(kp) == len && !strncmp(kp, buf, len) )
458 			  return 0;	/* name is a keyword */
459 			key++;
460 		   }
461 	   }
462 	   {
463 	       char *id = p;
464 	       ptrdiff_t len;
465 	       /*
466 		* Check for identifier1(identifier2) and not
467 		* identifier1(void), or identifier1(identifier2, xxxx).
468 		*/
469 
470 	       while ( isidchar(*p) )
471 		   p++;
472 	       len = p - id;
473 	       p = skipspace(p, 1);
474 	       if (*p == ',' ||
475 		   (*p == ')' && (len != 4 || strncmp(id, "void", 4)))
476 		   )
477 		   return 0;	/* not a function */
478 	   }
479 	/*
480 	 * If the last significant character was a ), we need to count
481 	 * parentheses, because it might be part of a formal parameter
482 	 * that is a procedure.
483 	 */
484 	if (contin > 0) {
485 	    int level = 0;
486 
487 	    for (p = skipspace(buf, 1); *p; p = skipspace(p + 1, 1))
488 		level += (*p == '(' ? 1 : *p == ')' ? -1 : 0);
489 	    if (level > 0)
490 		contin = -1;
491 	}
492 	return contin;
493 }
494 
495 /* Convert a recognized function definition or header to K&R syntax. */
496 int
convert1(buf,out,header,convert_varargs)497 convert1(buf, out, header, convert_varargs)
498     char *buf;
499     FILE *out;
500     int header;			/* Boolean */
501     int convert_varargs;	/* Boolean */
502 {	char *endfn;
503 	register char *p;
504 	/*
505 	 * The breaks table contains pointers to the beginning and end
506 	 * of each argument.
507 	 */
508 	char **breaks;
509 	unsigned num_breaks = 2;	/* for testing */
510 	char **btop;
511 	char **bp;
512 	char **ap;
513 	char *vararg = 0;
514 
515 	/* Pre-ANSI implementations don't agree on whether strchr */
516 	/* is called strchr or index, so we open-code it here. */
517 	for ( endfn = buf; *(endfn++) != '('; )
518 	  ;
519 top:	p = endfn;
520 	breaks = (char **)malloc(sizeof(char *) * num_breaks * 2);
521 	if ( breaks == NULL )
522 	   {	/* Couldn't allocate break table, give up */
523 		fprintf(stderr, "Unable to allocate break table!\n");
524 		fputs(buf, out);
525 		return -1;
526 	   }
527 	btop = breaks + num_breaks * 2 - 2;
528 	bp = breaks;
529 	/* Parse the argument list */
530 	do
531 	   {	int level = 0;
532 		char *lp = NULL;
533 		char *rp = NULL;
534 		char *end = NULL;
535 
536 		if ( bp >= btop )
537 		   {	/* Filled up break table. */
538 			/* Allocate a bigger one and start over. */
539 			free((char *)breaks);
540 			num_breaks <<= 1;
541 			goto top;
542 		   }
543 		*bp++ = p;
544 		/* Find the end of the argument */
545 		for ( ; end == NULL; p++ )
546 		   {	switch(*p)
547 			   {
548 			   case ',':
549 				if ( !level ) end = p;
550 				break;
551 			   case '(':
552 				if ( !level ) lp = p;
553 				level++;
554 				break;
555 			   case ')':
556 				if ( --level < 0 ) end = p;
557 				else rp = p;
558 				break;
559 			   case '/':
560 				if (p[1] == '*')
561 				    p = skipspace(p, 1) - 1;
562 				break;
563 			   case '"':
564 			       p = scanstring(p, 1) - 1;
565 			       break;
566 			   default:
567 				;
568 			   }
569 		   }
570 		/* Erase any embedded prototype parameters. */
571 		if ( lp && rp )
572 		  writeblanks(lp + 1, rp);
573 		p--;			/* back up over terminator */
574 		/* Find the name being declared. */
575 		/* This is complicated because of procedure and */
576 		/* array modifiers. */
577 		for ( ; ; )
578 		   {	p = skipspace(p - 1, -1);
579 			switch ( *p )
580 			   {
581 			   case ']':	/* skip array dimension(s) */
582 			   case ')':	/* skip procedure args OR name */
583 			   {	int level = 1;
584 				while ( level )
585 				 switch ( *--p )
586 				   {
587 				   case ']': case ')':
588 				       level++;
589 				       break;
590 				   case '[': case '(':
591 				       level--;
592 				       break;
593 				   case '/':
594 				       if (p > buf && p[-1] == '*')
595 					   p = skipspace(p, -1) + 1;
596 				       break;
597 				   case '"':
598 				       p = scanstring(p, -1) + 1;
599 				       break;
600 				   default: ;
601 				   }
602 			   }
603 				if ( *p == '(' && *skipspace(p + 1, 1) == '*' )
604 				   {	/* We found the name being declared */
605 					while ( !isidfirstchar(*p) )
606 					  p = skipspace(p, 1) + 1;
607 					goto found;
608 				   }
609 				break;
610 			   default:
611 				goto found;
612 			   }
613 		   }
614 found:		if ( *p == '.' && p[-1] == '.' && p[-2] == '.' )
615 		  {	if ( convert_varargs )
616 			  {	*bp++ = "va_alist";
617 				vararg = p-2;
618 			  }
619 			else
620 			  {	p++;
621 				if ( bp == breaks + 1 )	/* sole argument */
622 				  writeblanks(breaks[0], p);
623 				else
624 				  writeblanks(bp[-1] - 1, p);
625 				bp--;
626 			  }
627 		   }
628 		else
629 		   {	while ( isidchar(*p) ) p--;
630 			*bp++ = p+1;
631 		   }
632 		p = end;
633 	   }
634 	while ( *p++ == ',' );
635 	*bp = p;
636 	/* Make a special check for 'void' arglist */
637 	if ( bp == breaks+2 )
638 	   {	p = skipspace(breaks[0], 1);
639 		if ( !strncmp(p, "void", 4) )
640 		   {	p = skipspace(p+4, 1);
641 			if ( p == breaks[2] - 1 )
642 			   {	bp = breaks;	/* yup, pretend arglist is empty */
643 				writeblanks(breaks[0], p + 1);
644 			   }
645 		   }
646 	   }
647 	/* Put out the function name and left parenthesis. */
648 	p = buf;
649 	while ( p != endfn ) putc(*p, out), p++;
650 	/* Put out the declaration. */
651 	if ( header )
652 	  {	fputs(");", out);
653 		for ( p = breaks[0]; *p; p++ )
654 		  if ( *p == '\r' || *p == '\n' )
655 		    putc(*p, out);
656 	  }
657 	else
658 	  {	for ( ap = breaks+1; ap < bp; ap += 2 )
659 		  {	p = *ap;
660 			while ( isidchar(*p) )
661 			  putc(*p, out), p++;
662 			if ( ap < bp - 1 )
663 			  fputs(", ", out);
664 		  }
665 		fputs(")  ", out);
666 		/* Put out the argument declarations */
667 		for ( ap = breaks+2; ap <= bp; ap += 2 )
668 		  (*ap)[-1] = ';';
669 		if ( vararg != 0 )
670 		  {	*vararg = 0;
671 			fputs(breaks[0], out);		/* any prior args */
672 			fputs("va_dcl", out);		/* the final arg */
673 			fputs(bp[0], out);
674 		  }
675 		else
676 		  fputs(breaks[0], out);
677 	  }
678 	free((char *)breaks);
679 	return 0;
680 }
681