xref: /openbsd-src/usr.bin/make/lowparse.c (revision 4c1e55dc91edd6e69ccc60ce855900fbc12cf34f)
1 /*	$OpenBSD: lowparse.c,v 1.27 2012/03/22 13:50:30 espie Exp $ */
2 
3 /* low-level parsing functions. */
4 
5 /*
6  * Copyright (c) 1999,2000 Marc Espie.
7  *
8  * Extensive code changes for the OpenBSD project.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OPENBSD
23  * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <assert.h>
33 #include <stddef.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include "config.h"
39 #include "defines.h"
40 #include "buf.h"
41 #include "lowparse.h"
42 #include "error.h"
43 #include "lst.h"
44 #include "memory.h"
45 #ifndef LOCATION_TYPE
46 #include "location.h"
47 #endif
48 
49 
50 /* XXX check whether we can free filenames at the end, for a proper
51  * definition of `end'. */
52 
53 #if 0
54 static LIST	    fileNames;	/* file names to free at end */
55 #endif
56 
57 /* Input stream structure: file or string.
58  * Files have str == NULL, F != NULL.
59  * Strings have F == NULL, str != NULL.
60  */
61 struct input_stream {
62 	Location origin;	/* Name of file and line number */
63 	FILE *F;		/* Open stream, or NULL if pure string. */
64 	char *str;		/* Input string, if F == NULL. */
65 
66 	/* Line buffer. */
67 	char *ptr;		/* Where we are. */
68 	char *end;		/* Don't overdo it. */
69 };
70 
71 static struct input_stream *current;	/* the input_stream being parsed. */
72 
73 static LIST input_stack;	/* Stack of input_stream waiting to be parsed
74 				 * (includes and loop reparses) */
75 
76 /* input_stream ctors.
77  *
78  * obj = new_input_file(filename, filehandle);
79  *	Create input stream from filename, filehandle. */
80 static struct input_stream *new_input_file(const char *, FILE *);
81 /* obj = new_input_string(str, origin);
82  *	Create input stream from str, origin. */
83 static struct input_stream *new_input_string(char *, const Location *);
84 /* free_input_stream(obj);
85  *	Discard consumed input stream, closing files, freeing memory.  */
86 static void free_input_stream(struct input_stream *);
87 
88 
89 /* Handling basic character reading.
90  * c = read_char();
91  *	New character c from current input stream, or EOF at end of stream. */
92 #define read_char()	\
93     current->ptr < current->end ? *current->ptr++ : grab_new_line_and_readchar()
94 /* char = grab_new_line_and_readchar();
95  *	Guts for read_char. Grabs a new line off fgetln when we have
96  *	consumed the current line and returns the first char, or EOF at end of
97  *	stream.  */
98 static int grab_new_line_and_readchar(void);
99 /* c = skip_to_end_of_line();
100  *	Skips to the end of the current line, returns either '\n' or EOF.  */
101 static int skip_to_end_of_line(void);
102 
103 
104 /* Helper functions to handle basic parsing. */
105 /* read_logical_line(buffer, firstchar);
106  *	Grabs logical line into buffer, the first character has already been
107  *	read into firstchar.  */
108 static void read_logical_line(Buffer, int);
109 
110 /* firstchar = ParseSkipEmptyLines(buffer);
111  *	Scans lines, skipping empty lines. May put some characters into
112  *	buffer, returns the first character useful to continue parsing
113  *	(e.g., not a backslash or a space. */
114 static int skip_empty_lines_and_read_char(Buffer);
115 
116 static struct input_stream *
117 new_input_file(const char *name, FILE *stream)
118 {
119 	struct input_stream *istream;
120 #if 0
121 	Lst_AtEnd(&fileNames, name);
122 #endif
123 
124 	istream = emalloc(sizeof(*istream));
125 	istream->origin.fname = name;
126 	istream->str = NULL;
127 	/* Naturally enough, we start reading at line 0. */
128 	istream->origin.lineno = 0;
129 	istream->F = stream;
130 	istream->ptr = istream->end = NULL;
131 	return istream;
132 }
133 
134 static void
135 free_input_stream(struct input_stream *istream)
136 {
137 	if (istream->F && fileno(istream->F) != STDIN_FILENO)
138 		(void)fclose(istream->F);
139 	free(istream->str);
140 	/* Note we can't free the file names yet, as they are embedded in GN
141 	 * for error reports. */
142 	free(istream);
143 }
144 
145 static struct input_stream *
146 new_input_string(char *str, const Location *origin)
147 {
148 	struct input_stream *istream;
149 
150 	istream = emalloc(sizeof(*istream));
151 	/* No malloc, name is always taken from an already existing istream
152 	 * and strings are used in for loops, so we need to reset the line counter
153          * to an appropriate value. */
154 	istream->origin = *origin;
155 	istream->F = NULL;
156 	istream->ptr = istream->str = str;
157 	istream->end = str + strlen(str);
158 	return istream;
159 }
160 
161 
162 void
163 Parse_FromString(char *str, unsigned long lineno)
164 {
165 	Location origin;
166 
167 	origin.fname = current->origin.fname;
168 	origin.lineno = lineno;
169 	if (DEBUG(FOR))
170 		(void)fprintf(stderr, "%s\n----\n", str);
171 
172 	Lst_Push(&input_stack, current);
173 	assert(current != NULL);
174 	current = new_input_string(str, &origin);
175 }
176 
177 
178 void
179 Parse_FromFile(const char *name, FILE *stream)
180 {
181 	if (current != NULL)
182 		Lst_Push(&input_stack, current);
183 	current = new_input_file(name, stream);
184 }
185 
186 bool
187 Parse_NextFile(void)
188 {
189 	if (current != NULL)
190 		free_input_stream(current);
191 	current = (struct input_stream *)Lst_Pop(&input_stack);
192 	return current != NULL;
193 }
194 
195 static int
196 grab_new_line_and_readchar(void)
197 {
198 	size_t len;
199 
200 	if (current->F) {
201 		current->ptr = fgetln(current->F, &len);
202 		if (current->ptr) {
203 			current->end = current->ptr + len;
204 			return *current->ptr++;
205 		} else {
206 			current->end = NULL;
207 		}
208 	}
209 	return EOF;
210 }
211 
212 static int
213 skip_to_end_of_line(void)
214 {
215 	if (current->F) {
216 		if (current->end - current->ptr > 1)
217 			current->ptr = current->end - 1;
218 		if (*current->ptr == '\n')
219 			return *current->ptr++;
220 		return EOF;
221 	} else {
222 		int c;
223 
224 		do {
225 			c = read_char();
226 		} while (c != '\n' && c != EOF);
227 		return c;
228 	}
229 }
230 
231 
232 char *
233 Parse_ReadNextConditionalLine(Buffer linebuf)
234 {
235 	int c;
236 
237 	/* If first char isn't dot, skip to end of line, handling \ */
238 	while ((c = read_char()) != '.') {
239 		for (;c != '\n'; c = read_char()) {
240 			if (c == '\\') {
241 				c = read_char();
242 				if (c == '\n')
243 					current->origin.lineno++;
244 			}
245 			if (c == EOF) {
246 				Parse_Error(PARSE_FATAL,
247 				    "Unclosed conditional");
248 				return NULL;
249 			}
250 		}
251 		current->origin.lineno++;
252 	}
253 
254 	/* This is the line we need to copy */
255 	return Parse_ReadUnparsedLine(linebuf, "conditional");
256 }
257 
258 static void
259 read_logical_line(Buffer linebuf, int c)
260 {
261 	for (;;) {
262 		if (c == '\n') {
263 			current->origin.lineno++;
264 			break;
265 		}
266 		if (c == EOF)
267 			break;
268 		Buf_AddChar(linebuf, c);
269 		c = read_char();
270 		while (c == '\\') {
271 			c = read_char();
272 			if (c == '\n') {
273 				Buf_AddSpace(linebuf);
274 				current->origin.lineno++;
275 				do {
276 					c = read_char();
277 				} while (c == ' ' || c == '\t');
278 			} else {
279 				Buf_AddChar(linebuf, '\\');
280 				if (c == '\\') {
281 					Buf_AddChar(linebuf, '\\');
282 					c = read_char();
283 				}
284 				break;
285 			}
286 		}
287 	}
288 }
289 
290 char *
291 Parse_ReadUnparsedLine(Buffer linebuf, const char *type)
292 {
293 	int c;
294 
295 	Buf_Reset(linebuf);
296 	c = read_char();
297 	if (c == EOF) {
298 		Parse_Error(PARSE_FATAL, "Unclosed %s", type);
299 		return NULL;
300 	}
301 
302 	/* Handle '\' at beginning of line, since \\n needs special treatment */
303 	while (c == '\\') {
304 		c = read_char();
305 		if (c == '\n') {
306 			current->origin.lineno++;
307 			do {
308 				c = read_char();
309 			} while (c == ' ' || c == '\t');
310 		} else {
311 			Buf_AddChar(linebuf, '\\');
312 			if (c == '\\') {
313 				Buf_AddChar(linebuf, '\\');
314 				c = read_char();
315 			}
316 			break;
317 		}
318 	}
319 	read_logical_line(linebuf, c);
320 
321 	return Buf_Retrieve(linebuf);
322 }
323 
324 /* This is a fairly complex function, but without it, we could not skip
325  * blocks of comments without reading them. */
326 static int
327 skip_empty_lines_and_read_char(Buffer linebuf)
328 {
329 	int c;		/* the current character */
330 
331 	for (;;) {
332 		Buf_Reset(linebuf);
333 		c = read_char();
334 		/* Strip leading spaces, fold on '\n' */
335 		if (c == ' ') {
336 			do {
337 				c = read_char();
338 			} while (c == ' ' || c == '\t');
339 			while (c == '\\') {
340 				c = read_char();
341 				if (c == '\n') {
342 					current->origin.lineno++;
343 					do {
344 						c = read_char();
345 					} while (c == ' ' || c == '\t');
346 				} else {
347 					Buf_AddChar(linebuf, '\\');
348 					if (c == '\\') {
349 						Buf_AddChar(linebuf, '\\');
350 						c = read_char();
351 					}
352 					if (c == EOF)
353 						return '\n';
354 					else
355 						return c;
356 				}
357 			}
358 			assert(c != '\t');
359 		}
360 		if (c == '#')
361 			c = skip_to_end_of_line();
362 		/* Almost identical to spaces, except this occurs after
363 		 * comments have been taken care of, and we keep the tab
364 		 * itself.  */
365 		if (c == '\t') {
366 			Buf_AddChar(linebuf, '\t');
367 			do {
368 				c = read_char();
369 			} while (c == ' ' || c == '\t');
370 			while (c == '\\') {
371 				c = read_char();
372 				if (c == '\n') {
373 					current->origin.lineno++;
374 					do {
375 						c = read_char();
376 					} while (c == ' ' || c == '\t');
377 				} else {
378 					Buf_AddChar(linebuf, '\\');
379 					if (c == '\\') {
380 						Buf_AddChar(linebuf, '\\');
381 						c = read_char();
382 					}
383 					if (c == EOF)
384 						return '\n';
385 					else
386 						return c;
387 				}
388 			}
389 		}
390 		if (c == '\n')
391 			current->origin.lineno++;
392 		else
393 			return c;
394 	}
395 }
396 
397 /* Parse_ReadNormalLine removes beginning and trailing blanks (but keeps
398  * the first tab), handles escaped newlines, and skips over uninteresting
399  * lines.
400  *
401  * The line number is incremented, which implies that continuation
402  * lines are numbered with the last line number (we could do better, at a
403  * price).
404  *
405  * Trivial comments are also removed, but we can't do more, as
406  * we don't know which lines are shell commands or not.  */
407 char *
408 Parse_ReadNormalLine(Buffer linebuf)
409 {
410 	int c;		/* the current character */
411 
412 	c = skip_empty_lines_and_read_char(linebuf);
413 
414 	if (c == EOF)
415 		return NULL;
416 	else {
417 		read_logical_line(linebuf, c);
418 		Buf_KillTrailingSpaces(linebuf);
419 		return Buf_Retrieve(linebuf);
420 	}
421 }
422 
423 unsigned long
424 Parse_Getlineno(void)
425 {
426 	return current ? current->origin.lineno : 0;
427 }
428 
429 const char *
430 Parse_Getfilename(void)
431 {
432 	return current ? current->origin.fname : NULL;
433 }
434 
435 void
436 Parse_FillLocation(Location *origin)
437 {
438 	origin->lineno = Parse_Getlineno();
439 	origin->fname = Parse_Getfilename();
440 }
441 
442 #ifdef CLEANUP
443 void
444 LowParse_Init(void)
445 {
446 	Static_Lst_Init(&input_stack);
447 	current = NULL;
448 }
449 
450 void
451 LowParse_End(void)
452 {
453 	Lst_Destroy(&input_stack, NOFREE);	/* Should be empty now */
454 #if 0
455 	Lst_Destroy(&fileNames, (SimpleProc)free);
456 #endif
457 }
458 #endif
459 
460 
461 void
462 Parse_ReportErrors(void)
463 {
464 	if (fatal_errors) {
465 #ifdef CLEANUP
466 		while (Parse_NextFile())
467 			;
468 #endif
469 		fprintf(stderr,
470 		    "Fatal errors encountered -- cannot continue\n");
471 		exit(1);
472 	} else
473 		assert(current == NULL);
474 }
475