xref: /netbsd-src/usr.bin/ctags/C.c (revision 7027866a09786f44d29080aa3a48badc640ddfe2)
1 /*	$NetBSD: C.c,v 1.19 2009/07/13 19:05:40 roy Exp $	*/
2 
3 /*
4  * Copyright (c) 1987, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #if HAVE_NBTOOL_CONFIG_H
33 #include "nbtool_config.h"
34 #endif
35 
36 #include <sys/cdefs.h>
37 #if defined(__RCSID) && !defined(lint)
38 #if 0
39 static char sccsid[] = "@(#)C.c	8.4 (Berkeley) 4/2/94";
40 #else
41 __RCSID("$NetBSD: C.c,v 1.19 2009/07/13 19:05:40 roy Exp $");
42 #endif
43 #endif /* not lint */
44 
45 #include <limits.h>
46 #include <stddef.h>
47 #include <stdio.h>
48 #include <string.h>
49 
50 #include "ctags.h"
51 
52 static int	func_entry(void);
53 static void	hash_entry(void);
54 static void	skip_string(int);
55 static int	str_entry(int);
56 
57 /*
58  * c_entries --
59  *	read .c and .h files and call appropriate routines
60  */
61 void
c_entries(void)62 c_entries(void)
63 {
64 	int	c;			/* current character */
65 	int	level;			/* brace level */
66 	int	token;			/* if reading a token */
67 	int	t_def;			/* if reading a typedef */
68 	int	t_level;		/* typedef's brace level */
69 	char	*sp;			/* buffer pointer */
70 	char	tok[MAXTOKEN];		/* token buffer */
71 
72 	lineftell = ftell(inf);
73 	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
74 	while (GETC(!=, EOF)) {
75 		switch (c) {
76 		/*
77 		 * Here's where it DOESN'T handle: {
78 		 *	foo(a)
79 		 *	{
80 		 *	#ifdef notdef
81 		 *		}
82 		 *	#endif
83 		 *		if (a)
84 		 *			puts("hello, world");
85 		 *	}
86 		 */
87 		case '{':
88 			++level;
89 			goto endtok;
90 		case '}':
91 			/*
92 			 * if level goes below zero, try and fix
93 			 * it, even though we've already messed up
94 			 */
95 			if (--level < 0)
96 				level = 0;
97 			goto endtok;
98 
99 		case '\n':
100 			SETLINE;
101 			/*
102 			 * the above 3 cases are similar in that they
103 			 * are special characters that also end tokens.
104 			 */
105 	endtok:			if (sp > tok) {
106 				*sp = EOS;
107 				token = YES;
108 				sp = tok;
109 			}
110 			else
111 				token = NO;
112 			continue;
113 
114 		/*
115 		 * We ignore quoted strings and character constants
116 		 * completely.
117 		 */
118 		case '"':
119 		case '\'':
120 			(void)skip_string(c);
121 			break;
122 
123 		/*
124 		 * comments can be fun; note the state is unchanged after
125 		 * return, in case we found:
126 		 *	"foo() XX comment XX { int bar; }"
127 		 */
128 		case '/':
129 			if (GETC(==, '*')) {
130 				skip_comment(c);
131 				continue;
132 			} else if (c == '/') {
133 				skip_comment(c);
134 				continue;
135 			}
136 			(void)ungetc(c, inf);
137 			c = '/';
138 			goto storec;
139 
140 		/* hash marks flag #define's. */
141 		case '#':
142 			if (sp == tok) {
143 				hash_entry();
144 				break;
145 			}
146 			goto storec;
147 
148 		/*
149 		 * if we have a current token, parenthesis on
150 		 * level zero indicates a function.
151 		 */
152 		case '(':
153 			do c = getc(inf);
154 			while (c != EOF && iswhite(c));
155 			if (c == '*')
156 				break;
157 			if (c != EOF)
158 				ungetc(c, inf);
159 			if (!level && token) {
160 				int	curline;
161 
162 				if (sp != tok)
163 					*sp = EOS;
164 				/*
165 				 * grab the line immediately, we may
166 				 * already be wrong, for example,
167 				 *	foo\n
168 				 *	(arg1,
169 				 */
170 				get_line();
171 				curline = lineno;
172 				if (func_entry()) {
173 					++level;
174 					pfnote(tok, curline);
175 				}
176 				break;
177 			}
178 			goto storec;
179 
180 		/*
181 		 * semi-colons indicate the end of a typedef; if we find a
182 		 * typedef we search for the next semi-colon of the same
183 		 * level as the typedef.  Ignoring "structs", they are
184 		 * tricky, since you can find:
185 		 *
186 		 *	"typedef long time_t;"
187 		 *	"typedef unsigned int u_int;"
188 		 *	"typedef unsigned int u_int [10];"
189 		 *
190 		 * If looking at a typedef, we save a copy of the last token
191 		 * found.  Then, when we find the ';' we take the current
192 		 * token if it starts with a valid token name, else we take
193 		 * the one we saved.  There's probably some reasonable
194 		 * alternative to this...
195 		 */
196 		case ';':
197 			if (t_def && level == t_level) {
198 				t_def = NO;
199 				get_line();
200 				if (sp != tok)
201 					*sp = EOS;
202 				pfnote(tok, lineno);
203 				break;
204 			}
205 			goto storec;
206 
207 		/*
208 		 * store characters until one that can't be part of a token
209 		 * comes along; check the current token against certain
210 		 * reserved words.
211 		 */
212 		default:
213 	storec:		if (c == EOF)
214 				break;
215 			if (!intoken(c)) {
216 				if (sp == tok)
217 					break;
218 				*sp = EOS;
219 				if (tflag) {
220 					/* no typedefs inside typedefs */
221 					if (!t_def &&
222 						   !memcmp(tok, "typedef",8)) {
223 						t_def = YES;
224 						t_level = level;
225 						break;
226 					}
227 					/* catch "typedef struct" */
228 					if ((!t_def || t_level <= level)
229 					    && (!memcmp(tok, "struct", 7)
230 					    || !memcmp(tok, "union", 6)
231 					    || !memcmp(tok, "enum", 5))) {
232 						/*
233 						 * get line immediately;
234 						 * may change before '{'
235 						 */
236 						get_line();
237 						if (str_entry(c))
238 							++level;
239 						break;
240 						/* } */
241 					}
242 				}
243 				sp = tok;
244 			}
245 			else if (sp != tok || begtoken(c)) {
246 				if (sp < tok + sizeof tok)
247 					*sp++ = c;
248 				token = YES;
249 			}
250 			continue;
251 		}
252 
253 		sp = tok;
254 		token = NO;
255 	}
256 }
257 
258 /*
259  * func_entry --
260  *	handle a function reference
261  */
262 static int
func_entry(void)263 func_entry(void)
264 {
265 	int	c;			/* current character */
266 	int	level = 0;		/* for matching '()' */
267 	static char attribute[] = "__attribute__";
268 	char	maybe_attribute[sizeof attribute + 1],
269 		*anext;
270 
271 	/*
272 	 * Find the end of the assumed function declaration.
273 	 * Note that ANSI C functions can have type definitions so keep
274 	 * track of the parentheses nesting level.
275 	 */
276 	while (GETC(!=, EOF)) {
277 		switch (c) {
278 		case '\'':
279 		case '"':
280 			/* skip strings and character constants */
281 			skip_string(c);
282 			break;
283 		case '/':
284 			/* skip comments */
285 			if (GETC(==, '*'))
286 				skip_comment(c);
287 			else if (c == '/')
288 				skip_comment(c);
289 			break;
290 		case '(':
291 			level++;
292 			break;
293 		case ')':
294 			if (level == 0)
295 				goto fnd;
296 			level--;
297 			break;
298 		case '\n':
299 			SETLINE;
300 		}
301 	}
302 	return (NO);
303 fnd:
304 	/*
305 	 * we assume that the character after a function's right paren
306 	 * is a token character if it's a function and a non-token
307 	 * character if it's a declaration.  Comments don't count...
308 	 */
309 	for (anext = maybe_attribute;;) {
310 		while (GETC(!=, EOF) && iswhite(c))
311 			if (c == '\n')
312 				SETLINE;
313 		if (c == EOF)
314 			return NO;
315 		/*
316 		 * Recognize the gnu __attribute__ extension, which would
317 		 * otherwise make the heuristic test DTWT
318 		 */
319 		if (anext == maybe_attribute) {
320 			if (intoken(c)) {
321 				*anext++ = c;
322 				continue;
323 			}
324 		} else {
325 			if (intoken(c)) {
326 				if (anext - maybe_attribute
327 				 < (ptrdiff_t)(sizeof attribute - 1))
328 					*anext++ = c;
329 				else	break;
330 				continue;
331 			} else {
332 				*anext++ = '\0';
333 				if (strcmp(maybe_attribute, attribute) == 0) {
334 					(void)ungetc(c, inf);
335 					return NO;
336 				}
337 				break;
338 			}
339 		}
340 		if (intoken(c) || c == '{')
341 			break;
342 		if (c == '/' && GETC(==, '*'))
343 			skip_comment(c);
344 		else if (c == '/')
345 			skip_comment(c);
346 		else {				/* don't ever "read" '/' */
347 			(void)ungetc(c, inf);
348 			return (NO);
349 		}
350 	}
351 	if (c != '{')
352 		(void)skip_key('{');
353 	return (YES);
354 }
355 
356 /*
357  * hash_entry --
358  *	handle a line starting with a '#'
359  */
360 static void
hash_entry(void)361 hash_entry(void)
362 {
363 	int	c;			/* character read */
364 	int	curline;		/* line started on */
365 	char	*sp;			/* buffer pointer */
366 	char	tok[MAXTOKEN];		/* storage buffer */
367 
368 	curline = lineno;
369 	do if (GETC(==, EOF))
370 		return;
371 	while(c != '\n' && iswhite(c));
372 	ungetc(c, inf);
373 	for (sp = tok;;) {		/* get next token */
374 		if (GETC(==, EOF))
375 			return;
376 		if (iswhite(c))
377 			break;
378 		if (sp < tok + sizeof tok)
379 			*sp++ = c;
380 	}
381 	if(sp >= tok + sizeof tok)
382 		--sp;
383 	*sp = EOS;
384 	if (memcmp(tok, "define", 6))	/* only interested in #define's */
385 		goto skip;
386 	for (;;) {			/* this doesn't handle "#define \n" */
387 		if (GETC(==, EOF))
388 			return;
389 		if (!iswhite(c))
390 			break;
391 	}
392 	for (sp = tok;;) {		/* get next token */
393 		if(sp < tok + sizeof tok)
394 			*sp++ = c;
395 		if (GETC(==, EOF))
396 			return;
397 		/*
398 		 * this is where it DOESN'T handle
399 		 * "#define \n"
400 		 */
401 		if (!intoken(c))
402 			break;
403 	}
404 	if(sp >= tok + sizeof tok)
405 		--sp;
406 	*sp = EOS;
407 	if (dflag || c == '(') {	/* only want macros */
408 		get_line();
409 		pfnote(tok, curline);
410 	}
411 skip:	if (c == '\n') {		/* get rid of rest of define */
412 		SETLINE
413 		if (*(sp - 1) != '\\')
414 			return;
415 	}
416 	(void)skip_key('\n');
417 }
418 
419 /*
420  * str_entry --
421  *	handle a struct, union or enum entry
422  */
423 static int
str_entry(int c)424 str_entry(int c /* current character */)
425 {
426 	int	curline;		/* line started on */
427 	char	*sp;			/* buffer pointer */
428 	char	tok[LINE_MAX];		/* storage buffer */
429 
430 	curline = lineno;
431 	while (iswhite(c))
432 		if (GETC(==, EOF))
433 			return (NO);
434 	if (c == '{')		/* it was "struct {" */
435 		return (YES);
436 	for (sp = tok;;) {		/* get next token */
437 		*sp++ = c;
438 		if (GETC(==, EOF))
439 			return (NO);
440 		if (!intoken(c))
441 			break;
442 	}
443 	switch (c) {
444 		case '{':		/* it was "struct foo{" */
445 			--sp;
446 			break;
447 		case '\n':		/* it was "struct foo\n" */
448 			SETLINE;
449 			/*FALLTHROUGH*/
450 		default:		/* probably "struct foo " */
451 			while (GETC(!=, EOF))
452 				if (!iswhite(c))
453 					break;
454 			if (c != '{') {
455 				(void)ungetc(c, inf);
456 				return (NO);
457 			}
458 	}
459 	*sp = EOS;
460 	pfnote(tok, curline);
461 	return (YES);
462 }
463 
464 /*
465  * skip_comment --
466  *	skip over comment
467  */
468 void
skip_comment(int commenttype)469 skip_comment(int commenttype)
470 {
471 	int	c;			/* character read */
472 	int	star;			/* '*' flag */
473 
474 	for (star = 0; GETC(!=, EOF);)
475 		switch(c) {
476 		/* comments don't nest, nor can they be escaped. */
477 		case '*':
478 			star = YES;
479 			break;
480 		case '/':
481 			if (commenttype == '*' && star)
482 				return;
483 			break;
484 		case '\n':
485 			if (commenttype == '/') {
486 				/*
487 				 * we don't really parse C, so sometimes it
488 				 * is necessary to see the newline
489 				 */
490 				ungetc(c, inf);
491 				return;
492 			}
493 			SETLINE;
494 			/*FALLTHROUGH*/
495 		default:
496 			star = NO;
497 			break;
498 		}
499 }
500 
501 /*
502  * skip_string --
503  *	skip to the end of a string or character constant.
504  */
505 void
skip_string(int key)506 skip_string(int key)
507 {
508 	int	c,
509 		skip;
510 
511 	for (skip = NO; GETC(!=, EOF); )
512 		switch (c) {
513 		case '\\':		/* a backslash escapes anything */
514 			skip = !skip;	/* we toggle in case it's "\\" */
515 			break;
516 		case '\n':
517 			SETLINE;
518 			/*FALLTHROUGH*/
519 		default:
520 			if (c == key && !skip)
521 				return;
522 			skip = NO;
523 		}
524 }
525 
526 /*
527  * skip_key --
528  *	skip to next char "key"
529  */
530 int
skip_key(int key)531 skip_key(int key)
532 {
533 	int	c,
534 		skip,
535 		retval;
536 
537 	for (skip = retval = NO; GETC(!=, EOF);)
538 		switch(c) {
539 		case '\\':		/* a backslash escapes anything */
540 			skip = !skip;	/* we toggle in case it's "\\" */
541 			break;
542 		case ';':		/* special case for yacc; if one */
543 		case '|':		/* of these chars occurs, we may */
544 			retval = YES;	/* have moved out of the rule */
545 			break;		/* not used by C */
546 		case '\'':
547 		case '"':
548 			/* skip strings and character constants */
549 			skip_string(c);
550 			break;
551 		case '/':
552 			/* skip comments */
553 			if (GETC(==, '*')) {
554 				skip_comment(c);
555 				break;
556 			} else if (c == '/') {
557 				skip_comment(c);
558 				break;
559 			}
560 			(void)ungetc(c, inf);
561 			c = '/';
562 			goto norm;
563 		case '\n':
564 			SETLINE;
565 			/*FALLTHROUGH*/
566 		default:
567 		norm:
568 			if (c == key && !skip)
569 				return (retval);
570 			skip = NO;
571 		}
572 	return (retval);
573 }
574