1 /* $OpenBSD: lowparse.c,v 1.32 2012/11/07 14:18:41 espie Exp $ */ 2 3 /* low-level parsing functions. */ 4 5 /* 6 * Copyright (c) 1999,2000 Marc Espie. 7 * 8 * Extensive code changes for the OpenBSD project. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBSD 23 * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <assert.h> 33 #include <stddef.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <string.h> 37 #include <unistd.h> 38 #include "config.h" 39 #include "defines.h" 40 #include "buf.h" 41 #include "lowparse.h" 42 #include "error.h" 43 #include "lst.h" 44 #include "memory.h" 45 #include "pathnames.h" 46 #ifndef LOCATION_TYPE 47 #include "location.h" 48 #endif 49 50 51 /* XXX check whether we can free filenames at the end, for a proper 52 * definition of `end'. */ 53 54 #if 0 55 static LIST fileNames; /* file names to free at end */ 56 #endif 57 58 /* Input stream structure: file or string. 59 * Files have str == NULL, F != NULL. 60 * Strings have F == NULL, str != NULL. 61 */ 62 struct input_stream { 63 Location origin; /* Name of file and line number */ 64 FILE *F; /* Open stream, or NULL if pure string. */ 65 char *str; /* Input string, if F == NULL. */ 66 67 /* Line buffer. */ 68 char *ptr; /* Where we are. */ 69 char *end; /* Don't overdo it. */ 70 }; 71 72 static struct input_stream *current; /* the input_stream being parsed. */ 73 74 static LIST input_stack; /* Stack of input_stream waiting to be parsed 75 * (includes and loop reparses) */ 76 77 /* record gnode location for proper reporting at runtime */ 78 static Location *post_parse = NULL; 79 80 /* input_stream ctors. 81 * 82 * obj = new_input_file(filename, filehandle); 83 * Create input stream from filename, filehandle. */ 84 static struct input_stream *new_input_file(const char *, FILE *); 85 /* obj = new_input_string(str, origin); 86 * Create input stream from str, origin. */ 87 static struct input_stream *new_input_string(char *, const Location *); 88 /* free_input_stream(obj); 89 * Discard consumed input stream, closing files, freeing memory. */ 90 static void free_input_stream(struct input_stream *); 91 92 93 /* Handling basic character reading. 94 * c = read_char(); 95 * New character c from current input stream, or EOF at end of stream. */ 96 #define read_char() \ 97 current->ptr < current->end ? *current->ptr++ : grab_new_line_and_readchar() 98 /* char = grab_new_line_and_readchar(); 99 * Guts for read_char. Grabs a new line off fgetln when we have 100 * consumed the current line and returns the first char, or EOF at end of 101 * stream. */ 102 static int grab_new_line_and_readchar(void); 103 /* c = skip_to_end_of_line(); 104 * Skips to the end of the current line, returns either '\n' or EOF. */ 105 static int skip_to_end_of_line(void); 106 107 108 /* Helper functions to handle basic parsing. */ 109 /* read_logical_line(buffer, firstchar); 110 * Grabs logical line into buffer, the first character has already been 111 * read into firstchar. */ 112 static void read_logical_line(Buffer, int); 113 114 /* firstchar = ParseSkipEmptyLines(buffer); 115 * Scans lines, skipping empty lines. May put some characters into 116 * buffer, returns the first character useful to continue parsing 117 * (e.g., not a backslash or a space. */ 118 static int skip_empty_lines_and_read_char(Buffer); 119 120 const char *curdir; 121 size_t curdir_len; 122 123 void 124 Parse_setcurdir(const char *dir) 125 { 126 curdir = dir; 127 curdir_len = strlen(dir); 128 } 129 130 static bool 131 startswith(const char *f, const char *s, size_t len) 132 { 133 return strncmp(f, s, len) == 0 && f[len] == '/'; 134 } 135 136 static const char * 137 simplify(const char *filename) 138 { 139 if (startswith(filename, curdir, curdir_len)) 140 return filename + curdir_len + 1; 141 else if (startswith(filename, _PATH_DEFSYSPATH, 142 sizeof(_PATH_DEFSYSPATH)-1)) { 143 size_t sz; 144 char *buf; 145 sz = strlen(filename) - sizeof(_PATH_DEFSYSPATH)+3; 146 buf = emalloc(sz); 147 snprintf(buf, sz, "<%s>", filename+sizeof(_PATH_DEFSYSPATH)); 148 return buf; 149 } else 150 return filename; 151 } 152 153 static struct input_stream * 154 new_input_file(const char *name, FILE *stream) 155 { 156 struct input_stream *istream; 157 #if 0 158 Lst_AtEnd(&fileNames, name); 159 #endif 160 161 istream = emalloc(sizeof(*istream)); 162 istream->origin.fname = simplify(name); 163 istream->str = NULL; 164 /* Naturally enough, we start reading at line 0. */ 165 istream->origin.lineno = 0; 166 istream->F = stream; 167 istream->ptr = istream->end = NULL; 168 return istream; 169 } 170 171 static void 172 free_input_stream(struct input_stream *istream) 173 { 174 if (istream->F && fileno(istream->F) != STDIN_FILENO) 175 (void)fclose(istream->F); 176 free(istream->str); 177 /* Note we can't free the file names yet, as they are embedded in GN 178 * for error reports. */ 179 free(istream); 180 } 181 182 static struct input_stream * 183 new_input_string(char *str, const Location *origin) 184 { 185 struct input_stream *istream; 186 187 istream = emalloc(sizeof(*istream)); 188 /* No malloc, name is always taken from an already existing istream 189 * and strings are used in for loops, so we need to reset the line counter 190 * to an appropriate value. */ 191 istream->origin = *origin; 192 istream->F = NULL; 193 istream->ptr = istream->str = str; 194 istream->end = str + strlen(str); 195 return istream; 196 } 197 198 199 void 200 Parse_FromString(char *str, unsigned long lineno) 201 { 202 Location origin; 203 204 origin.fname = current->origin.fname; 205 origin.lineno = lineno; 206 if (DEBUG(FOR)) 207 (void)fprintf(stderr, "%s\n----\n", str); 208 209 Lst_Push(&input_stack, current); 210 assert(current != NULL); 211 current = new_input_string(str, &origin); 212 } 213 214 215 void 216 Parse_FromFile(const char *name, FILE *stream) 217 { 218 if (current != NULL) 219 Lst_Push(&input_stack, current); 220 current = new_input_file(name, stream); 221 } 222 223 bool 224 Parse_NextFile(void) 225 { 226 if (current != NULL) 227 free_input_stream(current); 228 current = (struct input_stream *)Lst_Pop(&input_stack); 229 return current != NULL; 230 } 231 232 static int 233 grab_new_line_and_readchar(void) 234 { 235 size_t len; 236 237 if (current->F) { 238 current->ptr = fgetln(current->F, &len); 239 if (current->ptr) { 240 current->end = current->ptr + len; 241 return *current->ptr++; 242 } else { 243 current->end = NULL; 244 } 245 } 246 return EOF; 247 } 248 249 static int 250 skip_to_end_of_line(void) 251 { 252 if (current->F) { 253 if (current->end - current->ptr > 1) 254 current->ptr = current->end - 1; 255 if (*current->ptr == '\n') 256 return *current->ptr++; 257 return EOF; 258 } else { 259 int c; 260 261 do { 262 c = read_char(); 263 } while (c != '\n' && c != EOF); 264 return c; 265 } 266 } 267 268 269 char * 270 Parse_ReadNextConditionalLine(Buffer linebuf) 271 { 272 int c; 273 274 /* If first char isn't dot, skip to end of line, handling \ */ 275 while ((c = read_char()) != '.') { 276 for (;c != '\n'; c = read_char()) { 277 if (c == '\\') { 278 c = read_char(); 279 if (c == '\n') 280 current->origin.lineno++; 281 } 282 if (c == EOF) 283 /* Unclosed conditional, reported by cond.c */ 284 return NULL; 285 } 286 current->origin.lineno++; 287 } 288 289 /* This is the line we need to copy */ 290 return Parse_ReadUnparsedLine(linebuf, "conditional"); 291 } 292 293 static void 294 read_logical_line(Buffer linebuf, int c) 295 { 296 for (;;) { 297 if (c == '\n') { 298 current->origin.lineno++; 299 break; 300 } 301 if (c == EOF) 302 break; 303 Buf_AddChar(linebuf, c); 304 c = read_char(); 305 while (c == '\\') { 306 c = read_char(); 307 if (c == '\n') { 308 Buf_AddSpace(linebuf); 309 current->origin.lineno++; 310 do { 311 c = read_char(); 312 } while (c == ' ' || c == '\t'); 313 } else { 314 Buf_AddChar(linebuf, '\\'); 315 if (c == '\\') { 316 Buf_AddChar(linebuf, '\\'); 317 c = read_char(); 318 } 319 break; 320 } 321 } 322 } 323 } 324 325 char * 326 Parse_ReadUnparsedLine(Buffer linebuf, const char *type) 327 { 328 int c; 329 330 Buf_Reset(linebuf); 331 c = read_char(); 332 if (c == EOF) { 333 Parse_Error(PARSE_FATAL, "Unclosed %s", type); 334 return NULL; 335 } 336 337 /* Handle '\' at beginning of line, since \\n needs special treatment */ 338 while (c == '\\') { 339 c = read_char(); 340 if (c == '\n') { 341 current->origin.lineno++; 342 do { 343 c = read_char(); 344 } while (c == ' ' || c == '\t'); 345 } else { 346 Buf_AddChar(linebuf, '\\'); 347 if (c == '\\') { 348 Buf_AddChar(linebuf, '\\'); 349 c = read_char(); 350 } 351 break; 352 } 353 } 354 read_logical_line(linebuf, c); 355 356 return Buf_Retrieve(linebuf); 357 } 358 359 /* This is a fairly complex function, but without it, we could not skip 360 * blocks of comments without reading them. */ 361 static int 362 skip_empty_lines_and_read_char(Buffer linebuf) 363 { 364 int c; /* the current character */ 365 366 for (;;) { 367 Buf_Reset(linebuf); 368 c = read_char(); 369 /* Strip leading spaces, fold on '\n' */ 370 if (c == ' ') { 371 do { 372 c = read_char(); 373 } while (c == ' ' || c == '\t'); 374 while (c == '\\') { 375 c = read_char(); 376 if (c == '\n') { 377 current->origin.lineno++; 378 do { 379 c = read_char(); 380 } while (c == ' ' || c == '\t'); 381 } else { 382 Buf_AddChar(linebuf, '\\'); 383 if (c == '\\') { 384 Buf_AddChar(linebuf, '\\'); 385 c = read_char(); 386 } 387 if (c == EOF) 388 return '\n'; 389 else 390 return c; 391 } 392 } 393 assert(c != '\t'); 394 } 395 if (c == '#') 396 c = skip_to_end_of_line(); 397 /* Almost identical to spaces, except this occurs after 398 * comments have been taken care of, and we keep the tab 399 * itself. */ 400 if (c == '\t') { 401 Buf_AddChar(linebuf, '\t'); 402 do { 403 c = read_char(); 404 } while (c == ' ' || c == '\t'); 405 while (c == '\\') { 406 c = read_char(); 407 if (c == '\n') { 408 current->origin.lineno++; 409 do { 410 c = read_char(); 411 } while (c == ' ' || c == '\t'); 412 } else { 413 Buf_AddChar(linebuf, '\\'); 414 if (c == '\\') { 415 Buf_AddChar(linebuf, '\\'); 416 c = read_char(); 417 } 418 if (c == EOF) 419 return '\n'; 420 else 421 return c; 422 } 423 } 424 } 425 if (c == '\n') 426 current->origin.lineno++; 427 else 428 return c; 429 } 430 } 431 432 /* Parse_ReadNormalLine removes beginning and trailing blanks (but keeps 433 * the first tab), handles escaped newlines, and skips over uninteresting 434 * lines. 435 * 436 * The line number is incremented, which implies that continuation 437 * lines are numbered with the last line number (we could do better, at a 438 * price). 439 * 440 * Trivial comments are also removed, but we can't do more, as 441 * we don't know which lines are shell commands or not. */ 442 char * 443 Parse_ReadNormalLine(Buffer linebuf) 444 { 445 int c; /* the current character */ 446 447 c = skip_empty_lines_and_read_char(linebuf); 448 449 if (c == EOF) 450 return NULL; 451 else { 452 read_logical_line(linebuf, c); 453 return Buf_Retrieve(linebuf); 454 } 455 } 456 457 unsigned long 458 Parse_Getlineno(void) 459 { 460 return current ? current->origin.lineno : 0; 461 } 462 463 const char * 464 Parse_Getfilename(void) 465 { 466 return current ? current->origin.fname : NULL; 467 } 468 469 void 470 Parse_SetLocation(Location *origin) 471 { 472 post_parse = origin; 473 } 474 475 void 476 Parse_FillLocation(Location *origin) 477 { 478 if (post_parse) { 479 *origin = *post_parse; 480 } else { 481 origin->lineno = Parse_Getlineno(); 482 origin->fname = Parse_Getfilename(); 483 } 484 } 485 486 void 487 Parse_ReportErrors(void) 488 { 489 if (fatal_errors) 490 exit(1); 491 else 492 assert(current == NULL); 493 } 494