1 /* $OpenBSD: C.c,v 1.15 2014/12/08 03:58:56 jsg Exp $ */
2 /* $NetBSD: C.c,v 1.3 1995/03/26 20:14:02 glass Exp $ */
3
4 /*
5 * Copyright (c) 1987, 1993, 1994
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 #include <limits.h>
34 #include <stdio.h>
35 #include <string.h>
36
37 #include "ctags.h"
38
39 static int func_entry(void);
40 static void hash_entry(void);
41 static void skip_string(int);
42 static int str_entry(int);
43
44 /*
45 * c_entries --
46 * read .c and .h files and call appropriate routines
47 */
48 void
c_entries(void)49 c_entries(void)
50 {
51 int c; /* current character */
52 int level; /* brace level */
53 int token; /* if reading a token */
54 int t_def; /* if reading a typedef */
55 int t_level; /* typedef's brace level */
56 char *sp; /* buffer pointer */
57 char tok[MAXTOKEN]; /* token buffer */
58
59 lineftell = ftell(inf);
60 sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
61 while (GETC(!=, EOF)) {
62 switch (c) {
63 /*
64 * Here's where it DOESN'T handle: {
65 * foo(a)
66 * {
67 * #ifdef notdef
68 * }
69 * #endif
70 * if (a)
71 * puts("hello, world");
72 * }
73 */
74 case '{':
75 ++level;
76 goto endtok;
77 case '}':
78 /*
79 * if level goes below zero, try and fix
80 * it, even though we've already messed up
81 */
82 if (--level < 0)
83 level = 0;
84 goto endtok;
85
86 case '\n':
87 SETLINE;
88 /*
89 * the above 3 cases are similar in that they
90 * are special characters that also end tokens.
91 */
92 endtok: if (sp > tok) {
93 *sp = EOS;
94 token = YES;
95 sp = tok;
96 }
97 else
98 token = NO;
99 continue;
100
101 /*
102 * We ignore quoted strings and character constants
103 * completely.
104 */
105 case '"':
106 case '\'':
107 (void)skip_string(c);
108 break;
109
110 /*
111 * comments can be fun; note the state is unchanged after
112 * return, in case we found:
113 * "foo() XX comment XX { int bar; }"
114 */
115 case '/':
116 if (GETC(==, '*')) {
117 skip_comment(c);
118 continue;
119 } else if (c == '/') {
120 skip_comment(c);
121 continue;
122 }
123 (void)ungetc(c, inf);
124 c = '/';
125 goto storec;
126
127 /* hash marks flag #define's. */
128 case '#':
129 if (sp == tok) {
130 hash_entry();
131 break;
132 }
133 goto storec;
134
135 /*
136 * if we have a current token, parenthesis on
137 * level zero indicates a function.
138 */
139 case '(':
140 do {
141 if (GETC(==, EOF))
142 return;
143 } while (iswhite(c));
144 if (c == '*')
145 break;
146 else
147 ungetc(c, inf);
148 if (!level && token) {
149 int curline;
150
151 if (sp != tok)
152 *sp = EOS;
153 /*
154 * grab the line immediately, we may
155 * already be wrong, for example,
156 * foo\n
157 * (arg1,
158 */
159 get_line();
160 curline = lineno;
161 if (func_entry()) {
162 ++level;
163 pfnote(tok, curline);
164 }
165 break;
166 }
167 goto storec;
168
169 /*
170 * semi-colons indicate the end of a typedef; if we find a
171 * typedef we search for the next semi-colon of the same
172 * level as the typedef. Ignoring "structs", they are
173 * tricky, since you can find:
174 *
175 * "typedef int time_t;"
176 * "typedef unsigned int u_int;"
177 * "typedef unsigned int u_int [10];"
178 *
179 * If looking at a typedef, we save a copy of the last token
180 * found. Then, when we find the ';' we take the current
181 * token if it starts with a valid token name, else we take
182 * the one we saved. There's probably some reasonable
183 * alternative to this...
184 */
185 case ';':
186 if (t_def && level == t_level) {
187 t_def = NO;
188 get_line();
189 if (sp != tok)
190 *sp = EOS;
191 pfnote(tok, lineno);
192 break;
193 }
194 goto storec;
195
196 /*
197 * store characters until one that can't be part of a token
198 * comes along; check the current token against certain
199 * reserved words.
200 */
201 default:
202 /*
203 * to treat following function.
204 * func (arg) {
205 * ....
206 * }
207 */
208 if (c == ' ' || c == '\t') {
209 int save = c;
210 while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
211 ;
212 if (c == EOF)
213 return;
214 (void)ungetc(c, inf);
215 c = save;
216 }
217 storec: if (!intoken(c)) {
218 if (sp == tok)
219 break;
220 *sp = EOS;
221 /* no typedefs inside typedefs */
222 if (!t_def &&
223 !memcmp(tok, "typedef",8)) {
224 t_def = YES;
225 t_level = level;
226 break;
227 }
228 /* catch "typedef struct" */
229 if ((!t_def || t_level < level)
230 && (!memcmp(tok, "struct", 7)
231 || !memcmp(tok, "union", 6)
232 || !memcmp(tok, "enum", 5))) {
233 /*
234 * get line immediately;
235 * may change before '{'
236 */
237 get_line();
238 if (str_entry(c))
239 ++level;
240 break;
241 /* } */
242 }
243 sp = tok;
244 }
245 else if (sp != tok || begtoken(c)) {
246 /* hell... truncate it */
247 if (sp == tok + sizeof tok - 1)
248 *sp = EOS;
249 else
250 *sp++ = c;
251 token = YES;
252 }
253 continue;
254 }
255
256 sp = tok;
257 token = NO;
258 }
259 }
260
261 /*
262 * func_entry --
263 * handle a function reference
264 */
265 static int
func_entry(void)266 func_entry(void)
267 {
268 int c; /* current character */
269 int level = 0; /* for matching '()' */
270 static char attribute[] = "__attribute__";
271 char maybe_attribute[sizeof attribute + 1];
272 char *anext;
273
274 /*
275 * Find the end of the assumed function declaration.
276 * Note that ANSI C functions can have type definitions so keep
277 * track of the parentheses nesting level.
278 */
279 while (GETC(!=, EOF)) {
280 switch (c) {
281 case '\'':
282 case '"':
283 /* skip strings and character constants */
284 skip_string(c);
285 break;
286 case '/':
287 /* skip comments */
288 if (GETC(==, '*'))
289 skip_comment(c);
290 else if (c == '/')
291 skip_comment(c);
292 break;
293 case '(':
294 level++;
295 break;
296 case ')':
297 if (level == 0)
298 goto fnd;
299 level--;
300 break;
301 case '\n':
302 SETLINE;
303 }
304 }
305 return (NO);
306 fnd:
307 /*
308 * we assume that the character after a function's right paren
309 * is a token character if it's a function and a non-token
310 * character if it's a declaration. Comments don't count...
311 */
312 for (anext = maybe_attribute;;) {
313 while (GETC(!=, EOF) && iswhite(c))
314 if (c == '\n')
315 SETLINE;
316 if (c == EOF)
317 return NO;
318 /*
319 * Recognize the GNU __attribute__ extension, which would
320 * otherwise make the heuristic test DTWT
321 */
322 if (anext == maybe_attribute) {
323 if (intoken(c)) {
324 *anext++ = c;
325 continue;
326 }
327 } else {
328 if (intoken(c)) {
329 if (anext - maybe_attribute < (int)(sizeof attribute - 1))
330 *anext++ = c;
331 else
332 break;
333 continue;
334 } else {
335 *anext++ = '\0';
336 if (strcmp(maybe_attribute, attribute) == 0) {
337 (void)ungetc(c, inf);
338 return NO;
339 }
340 break;
341 }
342 }
343 if (intoken(c) || c == '{')
344 break;
345 if (c == '/' && GETC(==, '*'))
346 skip_comment(c);
347 else if (c == '/')
348 skip_comment(c);
349 else { /* don't ever "read" '/' */
350 (void)ungetc(c, inf);
351 return (NO);
352 }
353 }
354 if (c != '{')
355 (void)skip_key('{');
356 return (YES);
357 }
358
359 /*
360 * hash_entry --
361 * handle a line starting with a '#'
362 */
363 static void
hash_entry(void)364 hash_entry(void)
365 {
366 int c; /* character read */
367 int curline; /* line started on */
368 char *sp; /* buffer pointer */
369 char tok[MAXTOKEN]; /* storage buffer */
370
371 /*
372 * to treat following macro.
373 * # macro(arg) ....
374 */
375 while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
376 ;
377 (void)ungetc(c, inf);
378
379 curline = lineno;
380 for (sp = tok;;) { /* get next token */
381 if (GETC(==, EOF))
382 return;
383 if (iswhite(c))
384 break;
385 /* hell... truncate it */
386 if (sp == tok + sizeof tok - 1)
387 *sp = EOS;
388 else
389 *sp++ = c;
390 }
391 *sp = EOS;
392 if (memcmp(tok, "define", 6)) /* only interested in #define's */
393 goto skip;
394 for (;;) { /* this doesn't handle "#define \n" */
395 if (GETC(==, EOF))
396 return;
397 if (!iswhite(c))
398 break;
399 }
400 for (sp = tok;;) { /* get next token */
401 /* hell... truncate it */
402 if (sp == tok + sizeof tok - 1)
403 *sp = EOS;
404 else
405 *sp++ = c;
406 if (GETC(==, EOF))
407 return;
408 /*
409 * this is where it DOESN'T handle
410 * "#define \n"
411 */
412 if (!intoken(c))
413 break;
414 }
415 *sp = EOS;
416 if (dflag || c == '(') { /* only want macros */
417 get_line();
418 pfnote(tok, curline);
419 }
420 skip: if (c == '\n') { /* get rid of rest of define */
421 SETLINE
422 if (*(sp - 1) != '\\')
423 return;
424 }
425 (void)skip_key('\n');
426 }
427
428 /*
429 * str_entry --
430 * handle a struct, union or enum entry
431 */
432 static int
str_entry(int c)433 str_entry(int c)
434 {
435 int curline; /* line started on */
436 char *sp; /* buffer pointer */
437 char tok[LINE_MAX]; /* storage buffer */
438
439 curline = lineno;
440 while (iswhite(c))
441 if (GETC(==, EOF))
442 return (NO);
443 if (c == '{') /* it was "struct {" */
444 return (YES);
445 for (sp = tok;;) { /* get next token */
446 /* hell... truncate it */
447 if (sp == tok + sizeof tok - 1)
448 *sp = EOS;
449 else
450 *sp++ = c;
451 if (GETC(==, EOF))
452 return (NO);
453 if (!intoken(c))
454 break;
455 }
456 switch (c) {
457 case '{': /* it was "struct foo{" */
458 --sp;
459 break;
460 case '\n': /* it was "struct foo\n" */
461 SETLINE;
462 /*FALLTHROUGH*/
463 default: /* probably "struct foo " */
464 while (GETC(!=, EOF))
465 if (!iswhite(c))
466 break;
467 if (c != '{') {
468 (void)ungetc(c, inf);
469 return (NO);
470 }
471 }
472 *sp = EOS;
473 pfnote(tok, curline);
474 return (YES);
475 }
476
477 /*
478 * skip_comment --
479 * skip over comment
480 */
481 void
skip_comment(int commenttype)482 skip_comment(int commenttype)
483 {
484 int c; /* character read */
485 int star; /* '*' flag */
486
487 for (star = 0; GETC(!=, EOF);)
488 switch(c) {
489 /* comments don't nest, nor can they be escaped. */
490 case '*':
491 star = YES;
492 break;
493 case '/':
494 if (commenttype == '*' && star)
495 return;
496 break;
497 case '\n':
498 if (commenttype == '/') {
499 /* We don't really parse C, so sometimes it
500 * is necessary to see the newline
501 */
502 ungetc(c, inf);
503 return;
504 }
505 SETLINE;
506 /*FALLTHROUGH*/
507 default:
508 star = NO;
509 break;
510 }
511 }
512
513 /*
514 * skip_string --
515 * skip to the end of a string or character constant.
516 */
517 static void
skip_string(int key)518 skip_string(int key)
519 {
520 int c,
521 skip;
522
523 for (skip = NO; GETC(!=, EOF); )
524 switch (c) {
525 case '\\': /* a backslash escapes anything */
526 skip = !skip; /* we toggle in case it's "\\" */
527 break;
528 case '\n':
529 SETLINE;
530 /*FALLTHROUGH*/
531 default:
532 if (c == key && !skip)
533 return;
534 skip = NO;
535 }
536 }
537
538 /*
539 * skip_key --
540 * skip to next char "key"
541 */
542 int
skip_key(int key)543 skip_key(int key)
544 {
545 int c,
546 skip,
547 retval;
548
549 for (skip = retval = NO; GETC(!=, EOF);)
550 switch(c) {
551 case '\\': /* a backslash escapes anything */
552 skip = !skip; /* we toggle in case it's "\\" */
553 break;
554 case ';': /* special case for yacc; if one */
555 case '|': /* of these chars occurs, we may */
556 retval = YES; /* have moved out of the rule */
557 break; /* not used by C */
558 case '\'':
559 case '"':
560 /* skip strings and character constants */
561 skip_string(c);
562 break;
563 case '/':
564 /* skip comments */
565 if (GETC(==, '*')) {
566 skip_comment(c);
567 break;
568 } else if (c == '/') {
569 skip_comment(c);
570 break;
571 }
572 (void)ungetc(c, inf);
573 c = '/';
574 goto norm;
575 case '\n':
576 SETLINE;
577 /*FALLTHROUGH*/
578 default:
579 norm:
580 if (c == key && !skip)
581 return (retval);
582 skip = NO;
583 }
584 return (retval);
585 }
586