xref: /netbsd-src/usr.bin/xlint/lint1/emit1.c (revision a644e605c1637fe174dffe39af8df4c1c51acbe0)
1 /* $NetBSD: emit1.c,v 1.96 2024/08/29 20:35:19 rillig Exp $ */
2 
3 /*
4  * Copyright (c) 1996 Christopher G. Demetriou.  All Rights Reserved.
5  * Copyright (c) 1994, 1995 Jochen Pohl
6  * All Rights Reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by Jochen Pohl for
19  *	The NetBSD Project.
20  * 4. The name of the author may not be used to endorse or promote products
21  *    derived from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #if HAVE_NBTOOL_CONFIG_H
36 #include "nbtool_config.h"
37 #endif
38 
39 #include <sys/cdefs.h>
40 #if defined(__RCSID)
41 __RCSID("$NetBSD: emit1.c,v 1.96 2024/08/29 20:35:19 rillig Exp $");
42 #endif
43 
44 #include <stdlib.h>
45 
46 #include "lint1.h"
47 
48 static void outtt(sym_t *, sym_t *);
49 static void outfstrg(const char *);
50 
51 /*
52  * Write type into the output file, encoded as follows:
53  *	const			c
54  *	volatile		v
55  *	_Bool			B
56  *	_Complex float		s X
57  *	_Complex double		X
58  *	_Complex long double	l X
59  *	char			C
60  *	signed char		s C
61  *	unsigned char		u C
62  *	short			S
63  *	unsigned short		u S
64  *	int			I
65  *	unsigned int		u I
66  *	long			L
67  *	unsigned long		u L
68  *	long long		Q
69  *	unsigned long long	u Q
70  *	float			s D
71  *	double			D
72  *	long double		l D
73  *	void			V
74  *	*			P
75  *	[n]			A n
76  *	()			F
77  *	(void)			F 0
78  *	(n parameters)		F n arg1 arg2 ... argn
79  *	(n parameters, ...)	F n arg1 arg2 ... argn E
80  *	enum tag		e T tag_or_typename
81  *	struct tag		s T tag_or_typename
82  *	union tag		u T tag_or_typename
83  *
84  *	tag_or_typename		0 (obsolete)		no tag or type name
85  *				1 n tag			tagged type
86  *				2 n typename		only typedef name
87  *				3 line.file.uniq	anonymous types
88  */
89 void
90 outtype(const type_t *tp)
91 {
92 	/* Available letters: ------GH--K-MNO--R--U-W-YZ */
93 #ifdef INT128_SIZE
94 	static const char tt[NTSPEC] = "???BCCCSSIILLQQJJDDD?XXXV?TTTPAF";
95 	static const char ss[NTSPEC] = "???  su u u u u us l?s l ?sue   ";
96 #else
97 	static const char tt[NTSPEC] = "???BCCCSSIILLQQDDD?XXXV?TTTPAF";
98 	static const char ss[NTSPEC] = "???  su u u u us l?s l ?sue   ";
99 #endif
100 	int na;
101 	tspec_t ts;
102 
103 	while (tp != NULL) {
104 		if ((ts = tp->t_tspec) == INT && tp->t_is_enum)
105 			ts = ENUM;
106 		lint_assert(tt[ts] != '?' && ss[ts] != '?');
107 		if (tp->t_const)
108 			outchar('c');
109 		if (tp->t_volatile)
110 			outchar('v');
111 		if (ss[ts] != ' ')
112 			outchar(ss[ts]);
113 		outchar(tt[ts]);
114 
115 		if (ts == ARRAY) {
116 			outint(tp->u.dimension);
117 		} else if (ts == ENUM) {
118 			outtt(tp->u.enumer->en_tag,
119 			    tp->u.enumer->en_first_typedef);
120 		} else if (is_struct_or_union(ts)) {
121 			outtt(tp->u.sou->sou_tag,
122 			    tp->u.sou->sou_first_typedef);
123 		} else if (ts == FUNC && tp->t_proto) {
124 			na = 0;
125 			for (const sym_t *param = tp->u.params;
126 			    param != NULL; param = param->s_next)
127 				na++;
128 			if (tp->t_vararg)
129 				na++;
130 			outint(na);
131 			for (const sym_t *param = tp->u.params;
132 			    param != NULL; param = param->s_next)
133 				outtype(param->s_type);
134 			if (tp->t_vararg)
135 				outchar('E');
136 		}
137 		tp = tp->t_subt;
138 	}
139 }
140 
141 /*
142  * write the name of a tag or typename
143  *
144  * if the tag is named, the name of the tag is written,
145  * otherwise, if a typename exists which refers to this tag,
146  * this typename is written
147  */
148 static void
149 outtt(sym_t *tag, sym_t *tdef)
150 {
151 
152 	/* 0 is no longer used. */
153 
154 	if (tag->s_name != unnamed) {
155 		outint(1);
156 		outname(tag->s_name);
157 	} else if (tdef != NULL) {
158 		outint(2);
159 		outname(tdef->s_name);
160 	} else {
161 		outint(3);
162 		outint(tag->s_def_pos.p_line);
163 		outchar('.');
164 		outint(get_filename_id(tag->s_def_pos.p_file));
165 		outchar('.');
166 		outint(tag->s_def_pos.p_uniq);
167 	}
168 }
169 
170 /*
171  * write information about a globally declared/defined symbol
172  * with storage class extern
173  *
174  * information about function definitions are written in outfdef(),
175  * not here
176  */
177 void
178 outsym(const sym_t *sym, scl_t sc, def_t def)
179 {
180 
181 	/*
182 	 * Static function declarations must also be written to the output
183 	 * file. Compatibility of function declarations (for both static and
184 	 * extern functions) must be checked in lint2. Lint1 can't do this,
185 	 * especially not if functions are declared at block level before their
186 	 * first declaration at level 0.
187 	 */
188 	if (sc != EXTERN && !(sc == STATIC && sym->s_type->t_tspec == FUNC))
189 		return;
190 	if (ch_isdigit(sym->s_name[0]))	/* see mktempsym */
191 		return;
192 
193 	outint(csrc_pos.p_line);
194 	outchar('d');		/* declaration */
195 	outint(get_filename_id(sym->s_def_pos.p_file));
196 	outchar('.');
197 	outint(sym->s_def_pos.p_line);
198 
199 	/* flags */
200 
201 	if (def == DEF)
202 		outchar('d');	/* defined */
203 	else if (def == TDEF)
204 		outchar('t');	/* tentative defined */
205 	else {
206 		lint_assert(def == DECL);
207 		outchar('e');	/* declared */
208 	}
209 
210 	if (llibflg && def != DECL) {
211 		/*
212 		 * mark it as used so lint2 does not complain about unused
213 		 * symbols in libraries
214 		 */
215 		outchar('u');
216 	}
217 
218 	if (sc == STATIC)
219 		outchar('s');
220 
221 	outname(sym->s_name);
222 
223 	if (sym->s_rename != NULL) {
224 		outchar('r');
225 		outname(sym->s_rename);
226 	}
227 
228 	outtype(sym->s_type);
229 	outchar('\n');
230 }
231 
232 /*
233  * Write information about a function definition. This is also done for static
234  * functions, to later check if they are called with proper argument types.
235  */
236 void
237 outfdef(const sym_t *fsym, const pos_t *posp, bool rval, bool osdef,
238 	const sym_t *args)
239 {
240 	int narg;
241 
242 	if (posp->p_file == csrc_pos.p_file) {
243 		outint(posp->p_line);
244 	} else {
245 		outint(csrc_pos.p_line);
246 	}
247 	outchar('d');		/* declaration */
248 	outint(get_filename_id(posp->p_file));
249 	outchar('.');
250 	outint(posp->p_line);
251 
252 	/* both SCANFLIKE and PRINTFLIKE imply VARARGS */
253 	if (printflike_argnum != -1) {
254 		nvararg = printflike_argnum;
255 	} else if (scanflike_argnum != -1) {
256 		nvararg = scanflike_argnum;
257 	}
258 
259 	if (nvararg != -1) {
260 		outchar('v');
261 		outint(nvararg);
262 	}
263 	if (scanflike_argnum != -1) {
264 		outchar('S');
265 		outint(scanflike_argnum);
266 	}
267 	if (printflike_argnum != -1) {
268 		outchar('P');
269 		outint(printflike_argnum);
270 	}
271 	nvararg = printflike_argnum = scanflike_argnum = -1;
272 
273 	outchar('d');
274 
275 	if (rval)
276 		outchar('r');	/* has return value */
277 
278 	if (llibflg)
279 		/*
280 		 * mark it as used so lint2 does not complain about unused
281 		 * symbols in libraries
282 		 */
283 		outchar('u');
284 
285 	if (osdef)
286 		outchar('o');	/* old-style function definition */
287 
288 	if (fsym->s_inline)
289 		outchar('i');
290 
291 	if (fsym->s_scl == STATIC)
292 		outchar('s');
293 
294 	outname(fsym->s_name);
295 
296 	if (fsym->s_rename != NULL) {
297 		outchar('r');
298 		outname(fsym->s_rename);
299 	}
300 
301 	/* parameter types and return value */
302 	if (osdef) {
303 		narg = 0;
304 		for (const sym_t *arg = args; arg != NULL; arg = arg->s_next)
305 			narg++;
306 		outchar('f');
307 		outint(narg);
308 		for (const sym_t *arg = args; arg != NULL; arg = arg->s_next)
309 			outtype(arg->s_type);
310 		outtype(fsym->s_type->t_subt);
311 	} else {
312 		outtype(fsym->s_type);
313 	}
314 	outchar('\n');
315 }
316 
317 /*
318  * write out all information necessary for lint2 to check function
319  * calls
320  *
321  * retval_used is set if the return value is used (assigned to a variable)
322  * retval_discarded is set if the return value is neither used nor ignored
323  * (that is, cast to void)
324  */
325 void
326 outcall(const tnode_t *tn, bool retval_used, bool retval_discarded)
327 {
328 	outint(csrc_pos.p_line);
329 	outchar('c');		/* function call */
330 	outint(get_filename_id(curr_pos.p_file));
331 	outchar('.');
332 	outint(curr_pos.p_line);
333 
334 	/*
335 	 * flags; 'u' and 'i' must be last to make sure a letter is between the
336 	 * numeric argument of a flag and the name of the function
337 	 */
338 	const function_call *call = tn->u.call;
339 
340 	for (size_t i = 0, n = call->args_len; i < n; i++) {
341 		const tnode_t *arg = call->args[i];
342 		if (arg->tn_op == CON) {
343 			tspec_t t = arg->tn_type->t_tspec;
344 			if (is_integer(t)) {
345 				/*
346 				 * XXX it would probably be better to
347 				 * explicitly test the sign
348 				 */
349 				int64_t si = arg->u.value.u.integer;
350 				if (si == 0)
351 					/* zero constant */
352 					outchar('z');
353 				else if (!msb(si, t))
354 					/* positive if cast to signed */
355 					outchar('p');
356 				else
357 					/* negative if cast to signed */
358 					outchar('n');
359 				outint((int)i + 1);
360 			}
361 		} else if (arg->tn_op == ADDR &&
362 		    arg->u.ops.left->tn_op == STRING &&
363 		    arg->u.ops.left->u.str_literals->data != NULL) {
364 			buffer buf;
365 			buf_init(&buf);
366 			quoted_iterator it = { .end = 0 };
367 			while (quoted_next(arg->u.ops.left->u.str_literals, &it))
368 				buf_add_char(&buf, (char)it.value);
369 
370 			/* string literal, write all format specifiers */
371 			outchar('s');
372 			outint((int)i + 1);
373 			outfstrg(buf.data);
374 			free(buf.data);
375 		}
376 	}
377 	outchar((char)(retval_discarded ? 'd' : retval_used ? 'u' : 'i'));
378 
379 	outname(call->func->u.ops.left->u.sym->s_name);
380 
381 	/* types of arguments */
382 	outchar('f');
383 	outint((int)call->args_len);
384 	for (size_t i = 0, n = call->args_len; i < n; i++)
385 		outtype(call->args[i]->tn_type);
386 	/* expected type of return value */
387 	outtype(tn->tn_type);
388 	outchar('\n');
389 }
390 
391 /* write a character to the output file, quoted if necessary */
392 static void
393 outqchar(char c)
394 {
395 
396 	if (ch_isprint(c) && c != '\\' && c != '"' && c != '\'') {
397 		outchar(c);
398 		return;
399 	}
400 
401 	outchar('\\');
402 	switch (c) {
403 	case '\\':
404 		outchar('\\');
405 		break;
406 	case '"':
407 		outchar('"');
408 		break;
409 	case '\'':
410 		outchar('\'');
411 		break;
412 	case '\b':
413 		outchar('b');
414 		break;
415 	case '\t':
416 		outchar('t');
417 		break;
418 	case '\n':
419 		outchar('n');
420 		break;
421 	case '\f':
422 		outchar('f');
423 		break;
424 	case '\r':
425 		outchar('r');
426 		break;
427 	case '\v':
428 		outchar('v');
429 		break;
430 	case '\a':
431 		outchar('a');
432 		break;
433 	default:
434 		outchar((char)((((unsigned char)c >> 6) & 07) + '0'));
435 		outchar((char)((((unsigned char)c >> 3) & 07) + '0'));
436 		outchar((char)((c & 07) + '0'));
437 		break;
438 	}
439 }
440 
441 /*
442  * extracts potential format specifiers for printf() and scanf() and
443  * writes them, enclosed in "" and quoted if necessary, to the output file
444  */
445 static void
446 outfstrg(const char *cp)
447 {
448 
449 	outchar('"');
450 
451 	char c = *cp++;
452 	while (c != '\0') {
453 
454 		if (c != '%') {
455 			c = *cp++;
456 			continue;
457 		}
458 
459 		outchar('%');
460 		c = *cp++;
461 
462 		/* flags for printf and scanf and *-fieldwidth for printf */
463 		while (c == '-' || c == '+' || c == ' ' ||
464 		    c == '#' || c == '0' || c == '*') {
465 			outchar(c);
466 			c = *cp++;
467 		}
468 
469 		/* numeric field width */
470 		while (ch_isdigit(c)) {
471 			outchar(c);
472 			c = *cp++;
473 		}
474 
475 		/* precision for printf */
476 		if (c == '.') {
477 			outchar(c);
478 			c = *cp++;
479 			if (c == '*') {
480 				outchar(c);
481 				c = *cp++;
482 			} else {
483 				while (ch_isdigit(c)) {
484 					outchar(c);
485 					c = *cp++;
486 				}
487 			}
488 		}
489 
490 		/* h, l, L and q flags for printf and scanf */
491 		if (c == 'h' || c == 'l' || c == 'L' || c == 'q') {
492 			outchar(c);
493 			c = *cp++;
494 		}
495 
496 		/*
497 		 * The last character. It is always written, so we can detect
498 		 * invalid format specifiers.
499 		 */
500 		if (c != '\0') {
501 			outqchar(c);
502 			char oc = c;
503 			c = *cp++;
504 			/*
505 			 * handle [ for scanf. [-] means that a minus sign was
506 			 * found at an undefined position.
507 			 */
508 			if (oc == '[') {
509 				if (c == '^')
510 					c = *cp++;
511 				if (c == ']')
512 					c = *cp++;
513 				bool first = true;
514 				while (c != '\0' && c != ']') {
515 					if (c == '-') {
516 						if (!first && *cp != ']')
517 							outchar(c);
518 					}
519 					first = false;
520 					c = *cp++;
521 				}
522 				if (c == ']') {
523 					outchar(c);
524 					c = *cp++;
525 				}
526 			}
527 		}
528 	}
529 
530 	outchar('"');
531 }
532 
533 /* writes a record if sym was used */
534 void
535 outusg(const sym_t *sym)
536 {
537 	if (ch_isdigit(sym->s_name[0]))	/* see mktempsym */
538 		return;
539 
540 	outint(csrc_pos.p_line);
541 	outchar('u');		/* used */
542 	outint(get_filename_id(curr_pos.p_file));
543 	outchar('.');
544 	outint(curr_pos.p_line);
545 	outchar('x');		/* separate the two numbers */
546 	outname(sym->s_name);
547 	outchar('\n');
548 }
549