xref: /openbsd-src/usr.bin/ctags/C.c (revision c659113e58a376753dbb937283e5d39207e1008f)
1 /*	$OpenBSD: C.c,v 1.15 2014/12/08 03:58:56 jsg Exp $	*/
2 /*	$NetBSD: C.c,v 1.3 1995/03/26 20:14:02 glass Exp $	*/
3 
4 /*
5  * Copyright (c) 1987, 1993, 1994
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <limits.h>
34 #include <stdio.h>
35 #include <string.h>
36 
37 #include "ctags.h"
38 
39 static int	func_entry(void);
40 static void	hash_entry(void);
41 static void	skip_string(int);
42 static int	str_entry(int);
43 
44 /*
45  * c_entries --
46  *	read .c and .h files and call appropriate routines
47  */
48 void
c_entries(void)49 c_entries(void)
50 {
51 	int	c;			/* current character */
52 	int	level;			/* brace level */
53 	int	token;			/* if reading a token */
54 	int	t_def;			/* if reading a typedef */
55 	int	t_level;		/* typedef's brace level */
56 	char	*sp;			/* buffer pointer */
57 	char	tok[MAXTOKEN];		/* token buffer */
58 
59 	lineftell = ftell(inf);
60 	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
61 	while (GETC(!=, EOF)) {
62 		switch (c) {
63 		/*
64 		 * Here's where it DOESN'T handle: {
65 		 *	foo(a)
66 		 *	{
67 		 *	#ifdef notdef
68 		 *		}
69 		 *	#endif
70 		 *		if (a)
71 		 *			puts("hello, world");
72 		 *	}
73 		 */
74 		case '{':
75 			++level;
76 			goto endtok;
77 		case '}':
78 			/*
79 			 * if level goes below zero, try and fix
80 			 * it, even though we've already messed up
81 			 */
82 			if (--level < 0)
83 				level = 0;
84 			goto endtok;
85 
86 		case '\n':
87 			SETLINE;
88 			/*
89 			 * the above 3 cases are similar in that they
90 			 * are special characters that also end tokens.
91 			 */
92 endtok:			if (sp > tok) {
93 				*sp = EOS;
94 				token = YES;
95 				sp = tok;
96 			}
97 			else
98 				token = NO;
99 			continue;
100 
101 		/*
102 		 * We ignore quoted strings and character constants
103 		 * completely.
104 		 */
105 		case '"':
106 		case '\'':
107 			(void)skip_string(c);
108 			break;
109 
110 		/*
111 		 * comments can be fun; note the state is unchanged after
112 		 * return, in case we found:
113 		 *	"foo() XX comment XX { int bar; }"
114 		 */
115 		case '/':
116 			if (GETC(==, '*')) {
117 				skip_comment(c);
118 				continue;
119 			} else if (c == '/') {
120 				skip_comment(c);
121 				continue;
122 			}
123 			(void)ungetc(c, inf);
124 			c = '/';
125 			goto storec;
126 
127 		/* hash marks flag #define's. */
128 		case '#':
129 			if (sp == tok) {
130 				hash_entry();
131 				break;
132 			}
133 			goto storec;
134 
135 		/*
136 		 * if we have a current token, parenthesis on
137 		 * level zero indicates a function.
138 		 */
139 		case '(':
140 			do {
141 				if (GETC(==, EOF))
142 					return;
143 			} while (iswhite(c));
144 			if (c == '*')
145 				break;
146 			else
147 				ungetc(c, inf);
148 			if (!level && token) {
149 				int	curline;
150 
151 				if (sp != tok)
152 					*sp = EOS;
153 				/*
154 				 * grab the line immediately, we may
155 				 * already be wrong, for example,
156 				 *	foo\n
157 				 *	(arg1,
158 				 */
159 				get_line();
160 				curline = lineno;
161 				if (func_entry()) {
162 					++level;
163 					pfnote(tok, curline);
164 				}
165 				break;
166 			}
167 			goto storec;
168 
169 		/*
170 		 * semi-colons indicate the end of a typedef; if we find a
171 		 * typedef we search for the next semi-colon of the same
172 		 * level as the typedef.  Ignoring "structs", they are
173 		 * tricky, since you can find:
174 		 *
175 		 *	"typedef int time_t;"
176 		 *	"typedef unsigned int u_int;"
177 		 *	"typedef unsigned int u_int [10];"
178 		 *
179 		 * If looking at a typedef, we save a copy of the last token
180 		 * found.  Then, when we find the ';' we take the current
181 		 * token if it starts with a valid token name, else we take
182 		 * the one we saved.  There's probably some reasonable
183 		 * alternative to this...
184 		 */
185 		case ';':
186 			if (t_def && level == t_level) {
187 				t_def = NO;
188 				get_line();
189 				if (sp != tok)
190 					*sp = EOS;
191 				pfnote(tok, lineno);
192 				break;
193 			}
194 			goto storec;
195 
196 		/*
197 		 * store characters until one that can't be part of a token
198 		 * comes along; check the current token against certain
199 		 * reserved words.
200 		 */
201 		default:
202 			/*
203 			 * to treat following function.
204 			 * func      (arg) {
205 			 * ....
206 			 * }
207 			 */
208 			if (c == ' ' || c == '\t') {
209 				int save = c;
210 				while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
211 					;
212 				if (c == EOF)
213 					return;
214 				(void)ungetc(c, inf);
215 				c = save;
216 			}
217 	storec:		if (!intoken(c)) {
218 				if (sp == tok)
219 					break;
220 				*sp = EOS;
221 				/* no typedefs inside typedefs */
222 				if (!t_def &&
223 					   !memcmp(tok, "typedef",8)) {
224 					t_def = YES;
225 					t_level = level;
226 					break;
227 				}
228 				/* catch "typedef struct" */
229 				if ((!t_def || t_level < level)
230 				    && (!memcmp(tok, "struct", 7)
231 				    || !memcmp(tok, "union", 6)
232 				    || !memcmp(tok, "enum", 5))) {
233 					/*
234 					 * get line immediately;
235 					 * may change before '{'
236 					 */
237 					get_line();
238 					if (str_entry(c))
239 						++level;
240 					break;
241 					/* } */
242 				}
243 				sp = tok;
244 			}
245 			else if (sp != tok || begtoken(c)) {
246 				/* hell... truncate it */
247 				if (sp == tok + sizeof tok - 1)
248 					*sp = EOS;
249 				else
250 					*sp++ = c;
251 				token = YES;
252 			}
253 			continue;
254 		}
255 
256 		sp = tok;
257 		token = NO;
258 	}
259 }
260 
261 /*
262  * func_entry --
263  *	handle a function reference
264  */
265 static int
func_entry(void)266 func_entry(void)
267 {
268 	int	c;			/* current character */
269 	int	level = 0;		/* for matching '()' */
270 	static char attribute[] = "__attribute__";
271 	char maybe_attribute[sizeof attribute + 1];
272 	char *anext;
273 
274 	/*
275 	 * Find the end of the assumed function declaration.
276 	 * Note that ANSI C functions can have type definitions so keep
277 	 * track of the parentheses nesting level.
278 	 */
279 	while (GETC(!=, EOF)) {
280 		switch (c) {
281 		case '\'':
282 		case '"':
283 			/* skip strings and character constants */
284 			skip_string(c);
285 			break;
286 		case '/':
287 			/* skip comments */
288 			if (GETC(==, '*'))
289 				skip_comment(c);
290 			else if (c == '/')
291 				skip_comment(c);
292 			break;
293 		case '(':
294 			level++;
295 			break;
296 		case ')':
297 			if (level == 0)
298 				goto fnd;
299 			level--;
300 			break;
301 		case '\n':
302 			SETLINE;
303 		}
304 	}
305 	return (NO);
306 fnd:
307 	/*
308 	 * we assume that the character after a function's right paren
309 	 * is a token character if it's a function and a non-token
310 	 * character if it's a declaration.  Comments don't count...
311 	 */
312 	for (anext = maybe_attribute;;) {
313 		while (GETC(!=, EOF) && iswhite(c))
314 			if (c == '\n')
315 				SETLINE;
316 		if (c == EOF)
317 			return NO;
318 		/*
319 		 * Recognize the GNU __attribute__ extension, which would
320 		 * otherwise make the heuristic test DTWT
321 		 */
322 		if (anext == maybe_attribute) {
323 			if (intoken(c)) {
324 				*anext++ = c;
325 				continue;
326 			}
327 		} else {
328 			if (intoken(c)) {
329 				if (anext - maybe_attribute < (int)(sizeof attribute - 1))
330 					*anext++ = c;
331 				else
332 					break;
333 				continue;
334 			} else {
335 				*anext++ = '\0';
336 				if (strcmp(maybe_attribute, attribute) == 0) {
337 					(void)ungetc(c, inf);
338 					return NO;
339 				}
340 				break;
341 			}
342 		}
343 		if (intoken(c) || c == '{')
344 			break;
345 		if (c == '/' && GETC(==, '*'))
346 			skip_comment(c);
347 		else if (c == '/')
348 			skip_comment(c);
349 		else {				/* don't ever "read" '/' */
350 			(void)ungetc(c, inf);
351 			return (NO);
352 		}
353 	}
354 	if (c != '{')
355 		(void)skip_key('{');
356 	return (YES);
357 }
358 
359 /*
360  * hash_entry --
361  *	handle a line starting with a '#'
362  */
363 static void
hash_entry(void)364 hash_entry(void)
365 {
366 	int	c;			/* character read */
367 	int	curline;		/* line started on */
368 	char	*sp;			/* buffer pointer */
369 	char	tok[MAXTOKEN];		/* storage buffer */
370 
371 	/*
372 	 * to treat following macro.
373 	 * #     macro(arg)        ....
374 	 */
375 	while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
376 		;
377 	(void)ungetc(c, inf);
378 
379 	curline = lineno;
380 	for (sp = tok;;) {		/* get next token */
381 		if (GETC(==, EOF))
382 			return;
383 		if (iswhite(c))
384 			break;
385 		/* hell... truncate it */
386 		if (sp == tok + sizeof tok - 1)
387 			*sp = EOS;
388 		else
389 			*sp++ = c;
390 	}
391 	*sp = EOS;
392 	if (memcmp(tok, "define", 6))	/* only interested in #define's */
393 		goto skip;
394 	for (;;) {			/* this doesn't handle "#define \n" */
395 		if (GETC(==, EOF))
396 			return;
397 		if (!iswhite(c))
398 			break;
399 	}
400 	for (sp = tok;;) {		/* get next token */
401 		/* hell... truncate it */
402 		if (sp == tok + sizeof tok - 1)
403 			*sp = EOS;
404 		else
405 			*sp++ = c;
406 		if (GETC(==, EOF))
407 			return;
408 		/*
409 		 * this is where it DOESN'T handle
410 		 * "#define \n"
411 		 */
412 		if (!intoken(c))
413 			break;
414 	}
415 	*sp = EOS;
416 	if (dflag || c == '(') {	/* only want macros */
417 		get_line();
418 		pfnote(tok, curline);
419 	}
420 skip:	if (c == '\n') {		/* get rid of rest of define */
421 		SETLINE
422 		if (*(sp - 1) != '\\')
423 			return;
424 	}
425 	(void)skip_key('\n');
426 }
427 
428 /*
429  * str_entry --
430  *	handle a struct, union or enum entry
431  */
432 static int
str_entry(int c)433 str_entry(int c)
434 {
435 	int	curline;		/* line started on */
436 	char	*sp;			/* buffer pointer */
437 	char	tok[LINE_MAX];		/* storage buffer */
438 
439 	curline = lineno;
440 	while (iswhite(c))
441 		if (GETC(==, EOF))
442 			return (NO);
443 	if (c == '{')		/* it was "struct {" */
444 		return (YES);
445 	for (sp = tok;;) {		/* get next token */
446 		/* hell... truncate it */
447 		if (sp == tok + sizeof tok - 1)
448 			*sp = EOS;
449 		else
450 			*sp++ = c;
451 		if (GETC(==, EOF))
452 			return (NO);
453 		if (!intoken(c))
454 			break;
455 	}
456 	switch (c) {
457 		case '{':		/* it was "struct foo{" */
458 			--sp;
459 			break;
460 		case '\n':		/* it was "struct foo\n" */
461 			SETLINE;
462 			/*FALLTHROUGH*/
463 		default:		/* probably "struct foo " */
464 			while (GETC(!=, EOF))
465 				if (!iswhite(c))
466 					break;
467 			if (c != '{') {
468 				(void)ungetc(c, inf);
469 				return (NO);
470 			}
471 	}
472 	*sp = EOS;
473 	pfnote(tok, curline);
474 	return (YES);
475 }
476 
477 /*
478  * skip_comment --
479  *	skip over comment
480  */
481 void
skip_comment(int commenttype)482 skip_comment(int commenttype)
483 {
484 	int	c;			/* character read */
485 	int	star;			/* '*' flag */
486 
487 	for (star = 0; GETC(!=, EOF);)
488 		switch(c) {
489 		/* comments don't nest, nor can they be escaped. */
490 		case '*':
491 			star = YES;
492 			break;
493 		case '/':
494 			if (commenttype == '*' && star)
495 				return;
496 			break;
497 		case '\n':
498 			if (commenttype == '/') {
499 				/* We don't really parse C, so sometimes it
500 				 * is necessary to see the newline
501 				 */
502 				ungetc(c, inf);
503 				return;
504 			}
505 			SETLINE;
506 			/*FALLTHROUGH*/
507 		default:
508 			star = NO;
509 			break;
510 		}
511 }
512 
513 /*
514  * skip_string --
515  *	skip to the end of a string or character constant.
516  */
517 static void
skip_string(int key)518 skip_string(int key)
519 {
520 	int	c,
521 		skip;
522 
523 	for (skip = NO; GETC(!=, EOF); )
524 		switch (c) {
525 		case '\\':		/* a backslash escapes anything */
526 			skip = !skip;	/* we toggle in case it's "\\" */
527 			break;
528 		case '\n':
529 			SETLINE;
530 			/*FALLTHROUGH*/
531 		default:
532 			if (c == key && !skip)
533 				return;
534 			skip = NO;
535 		}
536 }
537 
538 /*
539  * skip_key --
540  *	skip to next char "key"
541  */
542 int
skip_key(int key)543 skip_key(int key)
544 {
545 	int	c,
546 		skip,
547 		retval;
548 
549 	for (skip = retval = NO; GETC(!=, EOF);)
550 		switch(c) {
551 		case '\\':		/* a backslash escapes anything */
552 			skip = !skip;	/* we toggle in case it's "\\" */
553 			break;
554 		case ';':		/* special case for yacc; if one */
555 		case '|':		/* of these chars occurs, we may */
556 			retval = YES;	/* have moved out of the rule */
557 			break;		/* not used by C */
558 		case '\'':
559 		case '"':
560 			/* skip strings and character constants */
561 			skip_string(c);
562 			break;
563 		case '/':
564 			/* skip comments */
565 			if (GETC(==, '*')) {
566 				skip_comment(c);
567 				break;
568 			} else if (c == '/') {
569 				skip_comment(c);
570 				break;
571 			}
572 			(void)ungetc(c, inf);
573 			c = '/';
574 			goto norm;
575 		case '\n':
576 			SETLINE;
577 			/*FALLTHROUGH*/
578 		default:
579 		norm:
580 			if (c == key && !skip)
581 				return (retval);
582 			skip = NO;
583 		}
584 	return (retval);
585 }
586