xref: /openbsd-src/usr.bin/make/lowparse.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$OpenBSD: lowparse.c,v 1.32 2012/11/07 14:18:41 espie Exp $ */
2 
3 /* low-level parsing functions. */
4 
5 /*
6  * Copyright (c) 1999,2000 Marc Espie.
7  *
8  * Extensive code changes for the OpenBSD project.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OPENBSD
23  * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <assert.h>
33 #include <stddef.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include "config.h"
39 #include "defines.h"
40 #include "buf.h"
41 #include "lowparse.h"
42 #include "error.h"
43 #include "lst.h"
44 #include "memory.h"
45 #include "pathnames.h"
46 #ifndef LOCATION_TYPE
47 #include "location.h"
48 #endif
49 
50 
51 /* XXX check whether we can free filenames at the end, for a proper
52  * definition of `end'. */
53 
54 #if 0
55 static LIST	    fileNames;	/* file names to free at end */
56 #endif
57 
58 /* Input stream structure: file or string.
59  * Files have str == NULL, F != NULL.
60  * Strings have F == NULL, str != NULL.
61  */
62 struct input_stream {
63 	Location origin;	/* Name of file and line number */
64 	FILE *F;		/* Open stream, or NULL if pure string. */
65 	char *str;		/* Input string, if F == NULL. */
66 
67 	/* Line buffer. */
68 	char *ptr;		/* Where we are. */
69 	char *end;		/* Don't overdo it. */
70 };
71 
72 static struct input_stream *current;	/* the input_stream being parsed. */
73 
74 static LIST input_stack;	/* Stack of input_stream waiting to be parsed
75 				 * (includes and loop reparses) */
76 
77 /* record gnode location for proper reporting at runtime */
78 static Location *post_parse = NULL;
79 
80 /* input_stream ctors.
81  *
82  * obj = new_input_file(filename, filehandle);
83  *	Create input stream from filename, filehandle. */
84 static struct input_stream *new_input_file(const char *, FILE *);
85 /* obj = new_input_string(str, origin);
86  *	Create input stream from str, origin. */
87 static struct input_stream *new_input_string(char *, const Location *);
88 /* free_input_stream(obj);
89  *	Discard consumed input stream, closing files, freeing memory.  */
90 static void free_input_stream(struct input_stream *);
91 
92 
93 /* Handling basic character reading.
94  * c = read_char();
95  *	New character c from current input stream, or EOF at end of stream. */
96 #define read_char()	\
97     current->ptr < current->end ? *current->ptr++ : grab_new_line_and_readchar()
98 /* char = grab_new_line_and_readchar();
99  *	Guts for read_char. Grabs a new line off fgetln when we have
100  *	consumed the current line and returns the first char, or EOF at end of
101  *	stream.  */
102 static int grab_new_line_and_readchar(void);
103 /* c = skip_to_end_of_line();
104  *	Skips to the end of the current line, returns either '\n' or EOF.  */
105 static int skip_to_end_of_line(void);
106 
107 
108 /* Helper functions to handle basic parsing. */
109 /* read_logical_line(buffer, firstchar);
110  *	Grabs logical line into buffer, the first character has already been
111  *	read into firstchar.  */
112 static void read_logical_line(Buffer, int);
113 
114 /* firstchar = ParseSkipEmptyLines(buffer);
115  *	Scans lines, skipping empty lines. May put some characters into
116  *	buffer, returns the first character useful to continue parsing
117  *	(e.g., not a backslash or a space. */
118 static int skip_empty_lines_and_read_char(Buffer);
119 
120 const char *curdir;
121 size_t curdir_len;
122 
123 void
124 Parse_setcurdir(const char *dir)
125 {
126 	curdir = dir;
127 	curdir_len = strlen(dir);
128 }
129 
130 static bool
131 startswith(const char *f, const char *s, size_t len)
132 {
133 	return strncmp(f, s, len) == 0 && f[len] == '/';
134 }
135 
136 static const char *
137 simplify(const char *filename)
138 {
139 	if (startswith(filename, curdir, curdir_len))
140 		return filename + curdir_len + 1;
141 	else if (startswith(filename, _PATH_DEFSYSPATH,
142 	    sizeof(_PATH_DEFSYSPATH)-1)) {
143 	    	size_t sz;
144 		char *buf;
145 		sz = strlen(filename) - sizeof(_PATH_DEFSYSPATH)+3;
146 		buf = emalloc(sz);
147 		snprintf(buf, sz, "<%s>", filename+sizeof(_PATH_DEFSYSPATH));
148 		return buf;
149 	} else
150 		return filename;
151 }
152 
153 static struct input_stream *
154 new_input_file(const char *name, FILE *stream)
155 {
156 	struct input_stream *istream;
157 #if 0
158 	Lst_AtEnd(&fileNames, name);
159 #endif
160 
161 	istream = emalloc(sizeof(*istream));
162 	istream->origin.fname = simplify(name);
163 	istream->str = NULL;
164 	/* Naturally enough, we start reading at line 0. */
165 	istream->origin.lineno = 0;
166 	istream->F = stream;
167 	istream->ptr = istream->end = NULL;
168 	return istream;
169 }
170 
171 static void
172 free_input_stream(struct input_stream *istream)
173 {
174 	if (istream->F && fileno(istream->F) != STDIN_FILENO)
175 		(void)fclose(istream->F);
176 	free(istream->str);
177 	/* Note we can't free the file names yet, as they are embedded in GN
178 	 * for error reports. */
179 	free(istream);
180 }
181 
182 static struct input_stream *
183 new_input_string(char *str, const Location *origin)
184 {
185 	struct input_stream *istream;
186 
187 	istream = emalloc(sizeof(*istream));
188 	/* No malloc, name is always taken from an already existing istream
189 	 * and strings are used in for loops, so we need to reset the line counter
190          * to an appropriate value. */
191 	istream->origin = *origin;
192 	istream->F = NULL;
193 	istream->ptr = istream->str = str;
194 	istream->end = str + strlen(str);
195 	return istream;
196 }
197 
198 
199 void
200 Parse_FromString(char *str, unsigned long lineno)
201 {
202 	Location origin;
203 
204 	origin.fname = current->origin.fname;
205 	origin.lineno = lineno;
206 	if (DEBUG(FOR))
207 		(void)fprintf(stderr, "%s\n----\n", str);
208 
209 	Lst_Push(&input_stack, current);
210 	assert(current != NULL);
211 	current = new_input_string(str, &origin);
212 }
213 
214 
215 void
216 Parse_FromFile(const char *name, FILE *stream)
217 {
218 	if (current != NULL)
219 		Lst_Push(&input_stack, current);
220 	current = new_input_file(name, stream);
221 }
222 
223 bool
224 Parse_NextFile(void)
225 {
226 	if (current != NULL)
227 		free_input_stream(current);
228 	current = (struct input_stream *)Lst_Pop(&input_stack);
229 	return current != NULL;
230 }
231 
232 static int
233 grab_new_line_and_readchar(void)
234 {
235 	size_t len;
236 
237 	if (current->F) {
238 		current->ptr = fgetln(current->F, &len);
239 		if (current->ptr) {
240 			current->end = current->ptr + len;
241 			return *current->ptr++;
242 		} else {
243 			current->end = NULL;
244 		}
245 	}
246 	return EOF;
247 }
248 
249 static int
250 skip_to_end_of_line(void)
251 {
252 	if (current->F) {
253 		if (current->end - current->ptr > 1)
254 			current->ptr = current->end - 1;
255 		if (*current->ptr == '\n')
256 			return *current->ptr++;
257 		return EOF;
258 	} else {
259 		int c;
260 
261 		do {
262 			c = read_char();
263 		} while (c != '\n' && c != EOF);
264 		return c;
265 	}
266 }
267 
268 
269 char *
270 Parse_ReadNextConditionalLine(Buffer linebuf)
271 {
272 	int c;
273 
274 	/* If first char isn't dot, skip to end of line, handling \ */
275 	while ((c = read_char()) != '.') {
276 		for (;c != '\n'; c = read_char()) {
277 			if (c == '\\') {
278 				c = read_char();
279 				if (c == '\n')
280 					current->origin.lineno++;
281 			}
282 			if (c == EOF)
283 				/* Unclosed conditional, reported by cond.c */
284 				return NULL;
285 		}
286 		current->origin.lineno++;
287 	}
288 
289 	/* This is the line we need to copy */
290 	return Parse_ReadUnparsedLine(linebuf, "conditional");
291 }
292 
293 static void
294 read_logical_line(Buffer linebuf, int c)
295 {
296 	for (;;) {
297 		if (c == '\n') {
298 			current->origin.lineno++;
299 			break;
300 		}
301 		if (c == EOF)
302 			break;
303 		Buf_AddChar(linebuf, c);
304 		c = read_char();
305 		while (c == '\\') {
306 			c = read_char();
307 			if (c == '\n') {
308 				Buf_AddSpace(linebuf);
309 				current->origin.lineno++;
310 				do {
311 					c = read_char();
312 				} while (c == ' ' || c == '\t');
313 			} else {
314 				Buf_AddChar(linebuf, '\\');
315 				if (c == '\\') {
316 					Buf_AddChar(linebuf, '\\');
317 					c = read_char();
318 				}
319 				break;
320 			}
321 		}
322 	}
323 }
324 
325 char *
326 Parse_ReadUnparsedLine(Buffer linebuf, const char *type)
327 {
328 	int c;
329 
330 	Buf_Reset(linebuf);
331 	c = read_char();
332 	if (c == EOF) {
333 		Parse_Error(PARSE_FATAL, "Unclosed %s", type);
334 		return NULL;
335 	}
336 
337 	/* Handle '\' at beginning of line, since \\n needs special treatment */
338 	while (c == '\\') {
339 		c = read_char();
340 		if (c == '\n') {
341 			current->origin.lineno++;
342 			do {
343 				c = read_char();
344 			} while (c == ' ' || c == '\t');
345 		} else {
346 			Buf_AddChar(linebuf, '\\');
347 			if (c == '\\') {
348 				Buf_AddChar(linebuf, '\\');
349 				c = read_char();
350 			}
351 			break;
352 		}
353 	}
354 	read_logical_line(linebuf, c);
355 
356 	return Buf_Retrieve(linebuf);
357 }
358 
359 /* This is a fairly complex function, but without it, we could not skip
360  * blocks of comments without reading them. */
361 static int
362 skip_empty_lines_and_read_char(Buffer linebuf)
363 {
364 	int c;		/* the current character */
365 
366 	for (;;) {
367 		Buf_Reset(linebuf);
368 		c = read_char();
369 		/* Strip leading spaces, fold on '\n' */
370 		if (c == ' ') {
371 			do {
372 				c = read_char();
373 			} while (c == ' ' || c == '\t');
374 			while (c == '\\') {
375 				c = read_char();
376 				if (c == '\n') {
377 					current->origin.lineno++;
378 					do {
379 						c = read_char();
380 					} while (c == ' ' || c == '\t');
381 				} else {
382 					Buf_AddChar(linebuf, '\\');
383 					if (c == '\\') {
384 						Buf_AddChar(linebuf, '\\');
385 						c = read_char();
386 					}
387 					if (c == EOF)
388 						return '\n';
389 					else
390 						return c;
391 				}
392 			}
393 			assert(c != '\t');
394 		}
395 		if (c == '#')
396 			c = skip_to_end_of_line();
397 		/* Almost identical to spaces, except this occurs after
398 		 * comments have been taken care of, and we keep the tab
399 		 * itself.  */
400 		if (c == '\t') {
401 			Buf_AddChar(linebuf, '\t');
402 			do {
403 				c = read_char();
404 			} while (c == ' ' || c == '\t');
405 			while (c == '\\') {
406 				c = read_char();
407 				if (c == '\n') {
408 					current->origin.lineno++;
409 					do {
410 						c = read_char();
411 					} while (c == ' ' || c == '\t');
412 				} else {
413 					Buf_AddChar(linebuf, '\\');
414 					if (c == '\\') {
415 						Buf_AddChar(linebuf, '\\');
416 						c = read_char();
417 					}
418 					if (c == EOF)
419 						return '\n';
420 					else
421 						return c;
422 				}
423 			}
424 		}
425 		if (c == '\n')
426 			current->origin.lineno++;
427 		else
428 			return c;
429 	}
430 }
431 
432 /* Parse_ReadNormalLine removes beginning and trailing blanks (but keeps
433  * the first tab), handles escaped newlines, and skips over uninteresting
434  * lines.
435  *
436  * The line number is incremented, which implies that continuation
437  * lines are numbered with the last line number (we could do better, at a
438  * price).
439  *
440  * Trivial comments are also removed, but we can't do more, as
441  * we don't know which lines are shell commands or not.  */
442 char *
443 Parse_ReadNormalLine(Buffer linebuf)
444 {
445 	int c;		/* the current character */
446 
447 	c = skip_empty_lines_and_read_char(linebuf);
448 
449 	if (c == EOF)
450 		return NULL;
451 	else {
452 		read_logical_line(linebuf, c);
453 		return Buf_Retrieve(linebuf);
454 	}
455 }
456 
457 unsigned long
458 Parse_Getlineno(void)
459 {
460 	return current ? current->origin.lineno : 0;
461 }
462 
463 const char *
464 Parse_Getfilename(void)
465 {
466 	return current ? current->origin.fname : NULL;
467 }
468 
469 void
470 Parse_SetLocation(Location *origin)
471 {
472 	post_parse = origin;
473 }
474 
475 void
476 Parse_FillLocation(Location *origin)
477 {
478 	if (post_parse) {
479 		*origin = *post_parse;
480 	} else {
481 		origin->lineno = Parse_Getlineno();
482 		origin->fname = Parse_Getfilename();
483 	}
484 }
485 
486 void
487 Parse_ReportErrors(void)
488 {
489 	if (fatal_errors)
490 		exit(1);
491 	else
492 		assert(current == NULL);
493 }
494