1 /* $OpenBSD: lowparse.c,v 1.36 2023/09/04 11:35:11 espie Exp $ */ 2 3 /* low-level parsing functions. */ 4 5 /* 6 * Copyright (c) 1999,2000 Marc Espie. 7 * 8 * Extensive code changes for the OpenBSD project. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBSD 23 * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <assert.h> 33 #include <stddef.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <string.h> 37 #include <unistd.h> 38 #include "defines.h" 39 #include "buf.h" 40 #include "lowparse.h" 41 #include "error.h" 42 #include "lst.h" 43 #include "memory.h" 44 #include "pathnames.h" 45 #ifndef LOCATION_TYPE 46 #include "location.h" 47 #endif 48 #include "var.h" 49 50 51 #define READ_MAKEFILES "MAKEFILE_LIST" 52 53 /* Input stream structure: file or string. 54 * Files have str == NULL, F != NULL. 55 * Strings have F == NULL, str != NULL. 56 */ 57 struct input_stream { 58 Location origin; /* Name of file and line number */ 59 FILE *F; /* Open stream, or NULL if pure string. */ 60 char *str; /* Input string, if F == NULL. */ 61 62 /* Line buffer. */ 63 char *ptr; /* Where we are. */ 64 char *end; /* Don't overdo it. */ 65 }; 66 67 static struct input_stream *current; /* the input_stream being parsed. */ 68 69 static LIST input_stack; /* Stack of input_stream waiting to be parsed 70 * (includes and loop reparses) */ 71 72 /* record gnode location for proper reporting at runtime */ 73 static Location *post_parse = NULL; 74 75 /* input_stream ctors. 76 * 77 * obj = new_input_file(filename, filehandle); 78 * Create input stream from filename, filehandle. */ 79 static struct input_stream *new_input_file(const char *, FILE *); 80 /* obj = new_input_string(str, origin); 81 * Create input stream from str, origin. */ 82 static struct input_stream *new_input_string(char *, const Location *); 83 /* free_input_stream(obj); 84 * Discard consumed input stream, closing files, freeing memory. */ 85 static void free_input_stream(struct input_stream *); 86 87 88 /* Handling basic character reading. 89 * c = read_char(); 90 * New character c from current input stream, or EOF at end of stream. */ 91 #define read_char() \ 92 current->ptr < current->end ? *current->ptr++ : grab_new_line_and_readchar() 93 /* char = grab_new_line_and_readchar(); 94 * Guts for read_char. Grabs a new line off fgetln when we have 95 * consumed the current line and returns the first char, or EOF at end of 96 * stream. */ 97 static int grab_new_line_and_readchar(void); 98 /* c = skip_to_end_of_line(); 99 * Skips to the end of the current line, returns either '\n' or EOF. */ 100 static int skip_to_end_of_line(void); 101 102 103 /* Helper functions to handle basic parsing. */ 104 /* read_logical_line(buffer, firstchar); 105 * Grabs logical line into buffer, the first character has already been 106 * read into firstchar. */ 107 static void read_logical_line(Buffer, int); 108 109 /* firstchar = ParseSkipEmptyLines(buffer); 110 * Scans lines, skipping empty lines. May put some characters into 111 * buffer, returns the first character useful to continue parsing 112 * (e.g., not a backslash or a space. */ 113 static int skip_empty_lines_and_read_char(Buffer); 114 115 const char *curdir; 116 size_t curdir_len; 117 118 void 119 Parse_setcurdir(const char *dir) 120 { 121 curdir = dir; 122 curdir_len = strlen(dir); 123 } 124 125 static bool 126 startswith(const char *f, const char *s, size_t len) 127 { 128 return strncmp(f, s, len) == 0 && f[len] == '/'; 129 } 130 131 static const char * 132 simplify(const char *filename) 133 { 134 if (startswith(filename, curdir, curdir_len)) 135 return filename + curdir_len + 1; 136 else if (startswith(filename, _PATH_DEFSYSPATH, 137 sizeof(_PATH_DEFSYSPATH)-1)) { 138 size_t sz; 139 char *buf; 140 sz = strlen(filename) - sizeof(_PATH_DEFSYSPATH)+3; 141 buf = emalloc(sz); 142 snprintf(buf, sz, "<%s>", filename+sizeof(_PATH_DEFSYSPATH)); 143 return buf; 144 } else 145 return filename; 146 } 147 148 static struct input_stream * 149 new_input_file(const char *name, FILE *stream) 150 { 151 struct input_stream *istream; 152 153 istream = emalloc(sizeof(*istream)); 154 istream->origin.fname = simplify(name); 155 Var_Append(READ_MAKEFILES, name); 156 istream->str = NULL; 157 /* Naturally enough, we start reading at line 0. */ 158 istream->origin.lineno = 0; 159 istream->F = stream; 160 istream->ptr = istream->end = NULL; 161 return istream; 162 } 163 164 static void 165 free_input_stream(struct input_stream *istream) 166 { 167 if (istream->F) { 168 if (ferror(istream->F)) 169 Parse_Error(PARSE_FATAL, "Read error"); 170 if (fileno(istream->F) != STDIN_FILENO) 171 (void)fclose(istream->F); 172 } 173 free(istream->str); 174 /* Note we can't free the file names, as they are embedded in GN 175 * for error reports. */ 176 free(istream); 177 } 178 179 static struct input_stream * 180 new_input_string(char *str, const Location *origin) 181 { 182 struct input_stream *istream; 183 184 istream = emalloc(sizeof(*istream)); 185 /* No malloc, name is always taken from an already existing istream 186 * and strings are used in for loops, so we need to reset the line 187 * counter to an appropriate value. */ 188 istream->origin = *origin; 189 istream->F = NULL; 190 istream->ptr = istream->str = str; 191 istream->end = str + strlen(str); 192 return istream; 193 } 194 195 196 void 197 Parse_FromString(char *str, unsigned long lineno) 198 { 199 Location origin; 200 201 origin.fname = current->origin.fname; 202 origin.lineno = lineno; 203 if (DEBUG(FOR)) 204 (void)fprintf(stderr, "%s\n----\n", str); 205 206 Lst_Push(&input_stack, current); 207 assert(current != NULL); 208 current = new_input_string(str, &origin); 209 } 210 211 212 void 213 Parse_FromFile(const char *name, FILE *stream) 214 { 215 if (current != NULL) 216 Lst_Push(&input_stack, current); 217 current = new_input_file(name, stream); 218 } 219 220 bool 221 Parse_NextFile(void) 222 { 223 if (current != NULL) 224 free_input_stream(current); 225 current = Lst_Pop(&input_stack); 226 return current != NULL; 227 } 228 229 static int 230 grab_new_line_and_readchar(void) 231 { 232 size_t len; 233 234 if (current->F) { 235 current->ptr = fgetln(current->F, &len); 236 if (current->ptr) { 237 current->end = current->ptr + len; 238 return *current->ptr++; 239 } else { 240 current->end = NULL; 241 } 242 } 243 return EOF; 244 } 245 246 static int 247 skip_to_end_of_line(void) 248 { 249 if (current->F) { 250 if (current->end - current->ptr > 1) 251 current->ptr = current->end - 1; 252 if (*current->ptr == '\n') 253 return *current->ptr++; 254 return EOF; 255 } else { 256 int c; 257 258 do { 259 c = read_char(); 260 } while (c != '\n' && c != EOF); 261 return c; 262 } 263 } 264 265 266 char * 267 Parse_ReadNextConditionalLine(Buffer linebuf) 268 { 269 int c; 270 271 /* If first char isn't dot, skip to end of line, handling \ */ 272 while ((c = read_char()) != '.') { 273 for (;c != '\n'; c = read_char()) { 274 if (c == '\\') { 275 c = read_char(); 276 if (c == '\n') 277 current->origin.lineno++; 278 } 279 if (c == EOF) 280 /* Unclosed conditional, reported by cond.c */ 281 return NULL; 282 } 283 current->origin.lineno++; 284 } 285 286 /* This is the line we need to copy */ 287 return Parse_ReadUnparsedLine(linebuf, "conditional"); 288 } 289 290 static void 291 read_logical_line(Buffer linebuf, int c) 292 { 293 for (;;) { 294 if (c == '\n') { 295 current->origin.lineno++; 296 break; 297 } 298 if (c == EOF) 299 break; 300 Buf_AddChar(linebuf, c); 301 c = read_char(); 302 while (c == '\\') { 303 c = read_char(); 304 if (c == '\n') { 305 Buf_AddSpace(linebuf); 306 current->origin.lineno++; 307 do { 308 c = read_char(); 309 } while (c == ' ' || c == '\t'); 310 } else { 311 Buf_AddChar(linebuf, '\\'); 312 if (c == '\\') { 313 Buf_AddChar(linebuf, '\\'); 314 c = read_char(); 315 } 316 break; 317 } 318 } 319 } 320 } 321 322 char * 323 Parse_ReadUnparsedLine(Buffer linebuf, const char *type) 324 { 325 int c; 326 327 Buf_Reset(linebuf); 328 c = read_char(); 329 if (c == EOF) { 330 Parse_Error(PARSE_FATAL, "Unclosed %s", type); 331 return NULL; 332 } 333 334 /* Handle '\' at beginning of line, since \\n needs special treatment */ 335 while (c == '\\') { 336 c = read_char(); 337 if (c == '\n') { 338 current->origin.lineno++; 339 do { 340 c = read_char(); 341 } while (c == ' ' || c == '\t'); 342 } else { 343 Buf_AddChar(linebuf, '\\'); 344 if (c == '\\') { 345 Buf_AddChar(linebuf, '\\'); 346 c = read_char(); 347 } 348 break; 349 } 350 } 351 read_logical_line(linebuf, c); 352 353 return Buf_Retrieve(linebuf); 354 } 355 356 /* This is a fairly complex function, but without it, we could not skip 357 * blocks of comments without reading them. */ 358 static int 359 skip_empty_lines_and_read_char(Buffer linebuf) 360 { 361 int c; /* the current character */ 362 363 for (;;) { 364 Buf_Reset(linebuf); 365 c = read_char(); 366 /* Strip leading spaces, fold on '\n' */ 367 if (c == ' ') { 368 do { 369 c = read_char(); 370 } while (c == ' ' || c == '\t'); 371 while (c == '\\') { 372 c = read_char(); 373 if (c == '\n') { 374 current->origin.lineno++; 375 do { 376 c = read_char(); 377 } while (c == ' ' || c == '\t'); 378 } else { 379 Buf_AddChar(linebuf, '\\'); 380 if (c == '\\') { 381 Buf_AddChar(linebuf, '\\'); 382 c = read_char(); 383 } 384 if (c == EOF) 385 return '\n'; 386 else 387 return c; 388 } 389 } 390 assert(c != '\t'); 391 } 392 if (c == '#') 393 c = skip_to_end_of_line(); 394 /* Almost identical to spaces, except this occurs after 395 * comments have been taken care of, and we keep the tab 396 * itself. */ 397 if (c == '\t') { 398 Buf_AddChar(linebuf, '\t'); 399 do { 400 c = read_char(); 401 } while (c == ' ' || c == '\t'); 402 while (c == '\\') { 403 c = read_char(); 404 if (c == '\n') { 405 current->origin.lineno++; 406 do { 407 c = read_char(); 408 } while (c == ' ' || c == '\t'); 409 } else { 410 Buf_AddChar(linebuf, '\\'); 411 if (c == '\\') { 412 Buf_AddChar(linebuf, '\\'); 413 c = read_char(); 414 } 415 if (c == EOF) 416 return '\n'; 417 else 418 return c; 419 } 420 } 421 } 422 if (c == '\n') 423 current->origin.lineno++; 424 else 425 return c; 426 } 427 } 428 429 /* Parse_ReadNormalLine removes beginning and trailing blanks (but keeps 430 * the first tab), handles escaped newlines, and skips over uninteresting 431 * lines. 432 * 433 * The line number is incremented, which implies that continuation 434 * lines are numbered with the last line number (we could do better, at a 435 * price). 436 * 437 * Trivial comments are also removed, but we can't do more, as 438 * we don't know which lines are shell commands or not. */ 439 char * 440 Parse_ReadNormalLine(Buffer linebuf) 441 { 442 int c; /* the current character */ 443 444 c = skip_empty_lines_and_read_char(linebuf); 445 446 if (c == EOF) 447 return NULL; 448 else { 449 read_logical_line(linebuf, c); 450 return Buf_Retrieve(linebuf); 451 } 452 } 453 454 unsigned long 455 Parse_Getlineno(void) 456 { 457 return current ? current->origin.lineno : 0; 458 } 459 460 const char * 461 Parse_Getfilename(void) 462 { 463 return current ? current->origin.fname : NULL; 464 } 465 466 void 467 Parse_SetLocation(Location *origin) 468 { 469 post_parse = origin; 470 } 471 472 void 473 Parse_FillLocation(Location *origin) 474 { 475 if (post_parse) { 476 *origin = *post_parse; 477 } else { 478 origin->lineno = Parse_Getlineno(); 479 origin->fname = Parse_Getfilename(); 480 } 481 } 482 483 void 484 Parse_ReportErrors(void) 485 { 486 if (fatal_errors) 487 exit(1); 488 else 489 assert(current == NULL); 490 } 491