1 // Copyright 2012 Google Inc. 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are 6 // met: 7 // 8 // * Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above copyright 11 // notice, this list of conditions and the following disclaimer in the 12 // documentation and/or other materials provided with the distribution. 13 // * Neither the name of Google Inc. nor the names of its contributors 14 // may be used to endorse or promote products derived from this software 15 // without specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 #include "utils/text/templates.hpp" 30 31 #include <algorithm> 32 #include <fstream> 33 #include <sstream> 34 #include <stack> 35 36 #include "utils/format/macros.hpp" 37 #include "utils/noncopyable.hpp" 38 #include "utils/sanity.hpp" 39 #include "utils/text/exceptions.hpp" 40 #include "utils/text/operations.ipp" 41 42 namespace text = utils::text; 43 44 45 namespace { 46 47 48 /// Definition of a template statement. 49 /// 50 /// A template statement is a particular line in the input file that is 51 /// preceeded by a template marker. This class provides a high-level 52 /// representation of the contents of such statement and a mechanism to parse 53 /// the textual line into this high-level representation. 54 class statement_def { 55 public: 56 /// Types of the known statements. 57 enum statement_type { 58 /// Alternative clause of a conditional. 59 /// 60 /// Takes no arguments. 61 type_else, 62 63 /// End of conditional marker. 64 /// 65 /// Takes no arguments. 66 type_endif, 67 68 /// End of loop marker. 69 /// 70 /// Takes no arguments. 71 type_endloop, 72 73 /// Beginning of a conditional. 74 /// 75 /// Takes a single argument, which denotes the name of the variable or 76 /// vector to check for existence. This is the only expression 77 /// supported. 78 type_if, 79 80 /// Beginning of a loop over all the elements of a vector. 81 /// 82 /// Takes two arguments: the name of the vector over which to iterate 83 /// and the name of the iterator to later index this vector. 84 type_loop, 85 }; 86 87 private: 88 /// Internal data describing the structure of a particular statement type. 89 struct type_descriptor { 90 /// The native type of the statement. 91 statement_type type; 92 93 /// The expected number of arguments. 94 unsigned int n_arguments; 95 96 /// Constructs a new type descriptor. 97 /// 98 /// \param type_ The native type of the statement. 99 /// \param n_arguments_ The expected number of arguments. 100 type_descriptor(const statement_type type_, 101 const unsigned int n_arguments_) 102 : type(type_), n_arguments(n_arguments_) 103 { 104 } 105 }; 106 107 /// Mapping of statement type names to their definitions. 108 typedef std::map< std::string, type_descriptor > types_map; 109 110 /// Description of the different statement types. 111 /// 112 /// This static map is initialized once and reused later for any statement 113 /// lookup. Unfortunately, we cannot perform this initialization in a 114 /// static manner without C++11. 115 static types_map _types; 116 117 /// Generates a new types definition map. 118 /// 119 /// \return A new types definition map, to be assigned to _types. 120 static types_map 121 generate_types_map(void) 122 { 123 // If you change this, please edit the comments in the enum above. 124 types_map types; 125 types.insert(types_map::value_type( 126 "else", type_descriptor(type_else, 0))); 127 types.insert(types_map::value_type( 128 "endif", type_descriptor(type_endif, 0))); 129 types.insert(types_map::value_type( 130 "endloop", type_descriptor(type_endloop, 0))); 131 types.insert(types_map::value_type( 132 "if", type_descriptor(type_if, 1))); 133 types.insert(types_map::value_type( 134 "loop", type_descriptor(type_loop, 2))); 135 return types; 136 } 137 138 public: 139 /// The type of the statement. 140 statement_type type; 141 142 /// The arguments to the statement, in textual form. 143 const std::vector< std::string > arguments; 144 145 /// Creates a new statement. 146 /// 147 /// \param type_ The type of the statement. 148 /// \param arguments_ The arguments to the statement. 149 statement_def(const statement_type& type_, 150 const std::vector< std::string >& arguments_) : 151 type(type_), arguments(arguments_) 152 { 153 #if !defined(NDEBUG) 154 for (types_map::const_iterator iter = _types.begin(); 155 iter != _types.end(); ++iter) { 156 const type_descriptor& descriptor = (*iter).second; 157 if (descriptor.type == type_) { 158 PRE(descriptor.n_arguments == arguments_.size()); 159 return; 160 } 161 } 162 UNREACHABLE; 163 #endif 164 } 165 166 /// Parses a statement. 167 /// 168 /// \param line The textual representation of the statement without any 169 /// prefix. 170 /// 171 /// \return The parsed statement. 172 /// 173 /// \throw text::syntax_error If the statement is not correctly defined. 174 static statement_def 175 parse(const std::string& line) 176 { 177 if (_types.empty()) 178 _types = generate_types_map(); 179 180 const std::vector< std::string > words = text::split(line, ' '); 181 if (words.empty()) 182 throw text::syntax_error("Empty statement"); 183 184 const types_map::const_iterator iter = _types.find(words[0]); 185 if (iter == _types.end()) 186 throw text::syntax_error(F("Unknown statement '%s'") % words[0]); 187 const type_descriptor& descriptor = (*iter).second; 188 189 if (words.size() - 1 != descriptor.n_arguments) 190 throw text::syntax_error(F("Invalid number of arguments for " 191 "statement '%s'") % words[0]); 192 193 std::vector< std::string > new_arguments; 194 new_arguments.resize(words.size() - 1); 195 std::copy(words.begin() + 1, words.end(), new_arguments.begin()); 196 197 return statement_def(descriptor.type, new_arguments); 198 } 199 }; 200 201 202 statement_def::types_map statement_def::_types; 203 204 205 /// Definition of a loop. 206 /// 207 /// This simple structure is used to keep track of the parameters of a loop. 208 struct loop_def { 209 /// The name of the vector over which this loop is iterating. 210 std::string vector; 211 212 /// The name of the iterator defined by this loop. 213 std::string iterator; 214 215 /// Position in the input to which to rewind to on looping. 216 /// 217 /// This position points to the line after the loop statement, not the loop 218 /// itself. This is one of the reasons why we have this structure, so that 219 /// we can maintain the data about the loop without having to re-process it. 220 std::istream::pos_type position; 221 222 /// Constructs a new loop definition. 223 /// 224 /// \param vector_ The name of the vector (first argument). 225 /// \param iterator_ The name of the iterator (second argumnet). 226 /// \param position_ Position of the next line after the loop statement. 227 loop_def(const std::string& vector_, const std::string& iterator_, 228 const std::istream::pos_type position_) : 229 vector(vector_), iterator(iterator_), position(position_) 230 { 231 } 232 }; 233 234 235 /// Stateful class to instantiate the templates in an input stream. 236 /// 237 /// The goal of this parser is to scan the input once and not buffer anything in 238 /// memory. The only exception are loops: loops are reinterpreted on every 239 /// iteration from the same input file by rewidining the stream to the 240 /// appropriate position. 241 class templates_parser : utils::noncopyable { 242 /// The templates to apply. 243 /// 244 /// Note that this is not const because the parser has to have write access 245 /// to the templates. In particular, it needs to be able to define the 246 /// iterators as regular variables. 247 text::templates_def _templates; 248 249 /// Prefix that marks a line as a statement. 250 const std::string _prefix; 251 252 /// Delimiter to surround an expression instantiation. 253 const std::string _delimiter; 254 255 /// Whether to skip incoming lines or not. 256 /// 257 /// The top of the stack is true whenever we encounter a conditional that 258 /// evaluates to false or a loop that does not have any iterations left. 259 /// Under these circumstances, we need to continue scanning the input stream 260 /// until we find the matching closing endif or endloop construct. 261 /// 262 /// This is a stack rather than a plain boolean to allow us deal with 263 /// if-else clauses. 264 std::stack< bool > _skip; 265 266 /// Current count of nested conditionals. 267 unsigned int _if_level; 268 269 /// Level of the top-most conditional that evaluated to false. 270 unsigned int _exit_if_level; 271 272 /// Current count of nested loops. 273 unsigned int _loop_level; 274 275 /// Level of the top-most loop that does not have any iterations left. 276 unsigned int _exit_loop_level; 277 278 /// Information about all the nested loops up to the current point. 279 std::stack< loop_def > _loops; 280 281 /// Checks if a line is a statement or not. 282 /// 283 /// \param line The line to validate. 284 /// 285 /// \return True if the line looks like a statement, which is determined by 286 /// checking if the line starts by the predefined prefix. 287 bool 288 is_statement(const std::string& line) 289 { 290 return ((line.length() >= _prefix.length() && 291 line.substr(0, _prefix.length()) == _prefix) && 292 (line.length() < _delimiter.length() || 293 line.substr(0, _delimiter.length()) != _delimiter)); 294 } 295 296 /// Parses a given statement line into a statement definition. 297 /// 298 /// \param line The line to validate; it must be a valid statement. 299 /// 300 /// \return The parsed statement. 301 /// 302 /// \throw text::syntax_error If the input is not a valid statement. 303 statement_def 304 parse_statement(const std::string& line) 305 { 306 PRE(is_statement(line)); 307 return statement_def::parse(line.substr(_prefix.length())); 308 } 309 310 /// Processes a line from the input when not in skip mode. 311 /// 312 /// \param line The line to be processed. 313 /// \param input The input stream from which the line was read. The current 314 /// position in the stream must be after the line being processed. 315 /// \param output The output stream into which to write the results. 316 /// 317 /// \throw text::syntax_error If the input is not valid. 318 void 319 handle_normal(const std::string& line, std::istream& input, 320 std::ostream& output) 321 { 322 if (!is_statement(line)) { 323 // Fast path. Mostly to avoid an indentation level for the big 324 // chunk of code below. 325 output << line << '\n'; 326 return; 327 } 328 329 const statement_def statement = parse_statement(line); 330 331 switch (statement.type) { 332 case statement_def::type_else: 333 _skip.top() = !_skip.top(); 334 break; 335 336 case statement_def::type_endif: 337 _if_level--; 338 break; 339 340 case statement_def::type_endloop: { 341 PRE(_loops.size() == _loop_level); 342 loop_def& loop = _loops.top(); 343 344 const std::size_t next_index = 1 + text::to_type< std::size_t >( 345 _templates.get_variable(loop.iterator)); 346 347 if (next_index < _templates.get_vector(loop.vector).size()) { 348 _templates.add_variable(loop.iterator, F("%s") % next_index); 349 input.seekg(loop.position); 350 } else { 351 _loop_level--; 352 _loops.pop(); 353 _templates.remove_variable(loop.iterator); 354 } 355 } break; 356 357 case statement_def::type_if: { 358 _if_level++; 359 const std::string value = _templates.evaluate( 360 statement.arguments[0]); 361 if (value.empty() || value == "0" || value == "false") { 362 _exit_if_level = _if_level; 363 _skip.push(true); 364 } else { 365 _skip.push(false); 366 } 367 } break; 368 369 case statement_def::type_loop: { 370 _loop_level++; 371 372 const loop_def loop(statement.arguments[0], statement.arguments[1], 373 input.tellg()); 374 if (_templates.get_vector(loop.vector).empty()) { 375 _exit_loop_level = _loop_level; 376 _skip.push(true); 377 } else { 378 _templates.add_variable(loop.iterator, "0"); 379 _loops.push(loop); 380 _skip.push(false); 381 } 382 } break; 383 } 384 } 385 386 /// Processes a line from the input when in skip mode. 387 /// 388 /// \param line The line to be processed. 389 /// 390 /// \throw text::syntax_error If the input is not valid. 391 void 392 handle_skip(const std::string& line) 393 { 394 PRE(_skip.top()); 395 396 if (!is_statement(line)) 397 return; 398 399 const statement_def statement = parse_statement(line); 400 switch (statement.type) { 401 case statement_def::type_else: 402 if (_exit_if_level == _if_level) 403 _skip.top() = !_skip.top(); 404 break; 405 406 case statement_def::type_endif: 407 INV(_if_level >= _exit_if_level); 408 if (_if_level == _exit_if_level) 409 _skip.top() = false; 410 _if_level--; 411 _skip.pop(); 412 break; 413 414 case statement_def::type_endloop: 415 INV(_loop_level >= _exit_loop_level); 416 if (_loop_level == _exit_loop_level) 417 _skip.top() = false; 418 _loop_level--; 419 _skip.pop(); 420 break; 421 422 case statement_def::type_if: 423 _if_level++; 424 _skip.push(true); 425 break; 426 427 case statement_def::type_loop: 428 _loop_level++; 429 _skip.push(true); 430 break; 431 432 default: 433 break; 434 } 435 } 436 437 /// Evaluates expressions on a given input line. 438 /// 439 /// An expression is surrounded by _delimiter on both sides. We scan the 440 /// string from left to right finding any expressions that may appear, yank 441 /// them out and call templates_def::evaluate() to get their value. 442 /// 443 /// Lonely or unbalanced appearances of _delimiter on the input line are 444 /// not considered an error, given that the user may actually want to supply 445 /// that character sequence without being interpreted as a template. 446 /// 447 /// \param in_line The input line from which to evaluate expressions. 448 /// 449 /// \return The evaluated line. 450 /// 451 /// \throw text::syntax_error If the expressions in the line are malformed. 452 std::string 453 evaluate(const std::string& in_line) 454 { 455 std::string out_line; 456 457 std::string::size_type last_pos = 0; 458 while (last_pos != std::string::npos) { 459 const std::string::size_type open_pos = in_line.find( 460 _delimiter, last_pos); 461 if (open_pos == std::string::npos) { 462 out_line += in_line.substr(last_pos); 463 last_pos = std::string::npos; 464 } else { 465 const std::string::size_type close_pos = in_line.find( 466 _delimiter, open_pos + _delimiter.length()); 467 if (close_pos == std::string::npos) { 468 out_line += in_line.substr(last_pos); 469 last_pos = std::string::npos; 470 } else { 471 out_line += in_line.substr(last_pos, open_pos - last_pos); 472 out_line += _templates.evaluate(in_line.substr( 473 open_pos + _delimiter.length(), 474 close_pos - open_pos - _delimiter.length())); 475 last_pos = close_pos + _delimiter.length(); 476 } 477 } 478 } 479 480 return out_line; 481 } 482 483 public: 484 /// Constructs a new template parser. 485 /// 486 /// \param templates_ The templates to apply to the processed file. 487 /// \param prefix_ The prefix that identifies lines as statements. 488 /// \param delimiter_ Delimiter to surround a variable instantiation. 489 templates_parser(const text::templates_def& templates_, 490 const std::string& prefix_, 491 const std::string& delimiter_) : 492 _templates(templates_), 493 _prefix(prefix_), 494 _delimiter(delimiter_), 495 _if_level(0), 496 _exit_if_level(0), 497 _loop_level(0), 498 _exit_loop_level(0) 499 { 500 } 501 502 /// Applies the templates to a given input. 503 /// 504 /// \param input The stream to which to apply the templates. 505 /// \param output The stream into which to write the results. 506 /// 507 /// \throw text::syntax_error If the input is not valid. Note that the 508 /// is not guaranteed to be unmodified on exit if an error is 509 /// encountered. 510 void 511 instantiate(std::istream& input, std::ostream& output) 512 { 513 std::string line; 514 while (std::getline(input, line).good()) { 515 if (!_skip.empty() && _skip.top()) 516 handle_skip(line); 517 else 518 handle_normal(evaluate(line), input, output); 519 } 520 } 521 }; 522 523 524 } // anonymous namespace 525 526 527 /// Constructs an empty templates definition. 528 text::templates_def::templates_def(void) 529 { 530 } 531 532 533 /// Sets a string variable in the templates. 534 /// 535 /// If the variable already exists, its value is replaced. This behavior is 536 /// required to implement iterators, but client code should really not be 537 /// redefining variables. 538 /// 539 /// \pre The variable must not already exist as a vector. 540 /// 541 /// \param name The name of the variable to set. 542 /// \param value The value to set the given variable to. 543 void 544 text::templates_def::add_variable(const std::string& name, 545 const std::string& value) 546 { 547 PRE(_vectors.find(name) == _vectors.end()); 548 _variables[name] = value; 549 } 550 551 552 /// Unsets a string variable from the templates. 553 /// 554 /// Client code has no reason to use this. This is only required to implement 555 /// proper scoping of loop iterators. 556 /// 557 /// \pre The variable must exist. 558 /// 559 /// \param name The name of the variable to remove from the templates. 560 void 561 text::templates_def::remove_variable(const std::string& name) 562 { 563 PRE(_variables.find(name) != _variables.end()); 564 _variables.erase(_variables.find(name)); 565 } 566 567 568 /// Creates a new vector in the templates. 569 /// 570 /// If the vector already exists, it is cleared. Client code should really not 571 /// be redefining variables. 572 /// 573 /// \pre The vector must not already exist as a variable. 574 /// 575 /// \param name The name of the vector to set. 576 void 577 text::templates_def::add_vector(const std::string& name) 578 { 579 PRE(_variables.find(name) == _variables.end()); 580 _vectors[name] = strings_vector(); 581 } 582 583 584 /// Adds a value to an existing vector in the templates. 585 /// 586 /// \pre name The vector must exist. 587 /// 588 /// \param name The name of the vector to append the value to. 589 /// \param value The textual value to append to the vector. 590 void 591 text::templates_def::add_to_vector(const std::string& name, 592 const std::string& value) 593 { 594 PRE(_variables.find(name) == _variables.end()); 595 PRE(_vectors.find(name) != _vectors.end()); 596 _vectors[name].push_back(value); 597 } 598 599 600 /// Checks whether a given identifier exists as a variable or a vector. 601 /// 602 /// This is used to implement the evaluation of conditions in if clauses. 603 /// 604 /// \param name The name of the variable or vector. 605 /// 606 /// \return True if the given name exists as a variable or a vector; false 607 /// otherwise. 608 bool 609 text::templates_def::exists(const std::string& name) const 610 { 611 return (_variables.find(name) != _variables.end() || 612 _vectors.find(name) != _vectors.end()); 613 } 614 615 616 /// Gets the value of a variable. 617 /// 618 /// \param name The name of the variable. 619 /// 620 /// \return The value of the requested variable. 621 /// 622 /// \throw text::syntax_error If the variable does not exist. 623 const std::string& 624 text::templates_def::get_variable(const std::string& name) const 625 { 626 const variables_map::const_iterator iter = _variables.find(name); 627 if (iter == _variables.end()) 628 throw text::syntax_error(F("Unknown variable '%s'") % name); 629 return (*iter).second; 630 } 631 632 633 /// Gets a vector. 634 /// 635 /// \param name The name of the vector. 636 /// 637 /// \return A reference to the requested vector. 638 /// 639 /// \throw text::syntax_error If the vector does not exist. 640 const text::templates_def::strings_vector& 641 text::templates_def::get_vector(const std::string& name) const 642 { 643 const vectors_map::const_iterator iter = _vectors.find(name); 644 if (iter == _vectors.end()) 645 throw text::syntax_error(F("Unknown vector '%s'") % name); 646 return (*iter).second; 647 } 648 649 650 /// Indexes a vector and gets the value. 651 /// 652 /// \param name The name of the vector to index. 653 /// \param index_name The name of a variable representing the index to use. 654 /// This must be convertible to a natural. 655 /// 656 /// \return The value of the vector at the given index. 657 /// 658 /// \throw text::syntax_error If the vector does not existor if the index is out 659 /// of range. 660 const std::string& 661 text::templates_def::get_vector(const std::string& name, 662 const std::string& index_name) const 663 { 664 const strings_vector& vector = get_vector(name); 665 const std::string& index_str = get_variable(index_name); 666 667 std::size_t index; 668 try { 669 index = text::to_type< std::size_t >(index_str); 670 } catch (const text::syntax_error& e) { 671 throw text::syntax_error(F("Index '%s' not an integer, value '%s'") % 672 index_name % index_str); 673 } 674 if (index >= vector.size()) 675 throw text::syntax_error(F("Index '%s' out of range at position '%s'") % 676 index_name % index); 677 678 return vector[index]; 679 } 680 681 682 /// Evaluates a expression using these templates. 683 /// 684 /// An expression is a query on the current templates to fetch a particular 685 /// value. The value is always returned as a string, as this is how templates 686 /// are internally stored. 687 /// 688 /// \param expression The expression to evaluate. This should not include any 689 /// of the delimiters used in the user input, as otherwise the expression 690 /// will not be evaluated properly. 691 /// 692 /// \return The result of the expression evaluation as a string. 693 /// 694 /// \throw text::syntax_error If there is any problem while evaluating the 695 /// expression. 696 std::string 697 text::templates_def::evaluate(const std::string& expression) const 698 { 699 const std::string::size_type paren_open = expression.find('('); 700 if (paren_open == std::string::npos) { 701 return get_variable(expression); 702 } else { 703 const std::string::size_type paren_close = expression.find( 704 ')', paren_open); 705 if (paren_close == std::string::npos) 706 throw text::syntax_error(F("Expected ')' in expression '%s')") % 707 expression); 708 if (paren_close != expression.length() - 1) 709 throw text::syntax_error(F("Unexpected text found after ')' in " 710 "expression '%s'") % expression); 711 712 const std::string arg0 = expression.substr(0, paren_open); 713 const std::string arg1 = expression.substr( 714 paren_open + 1, paren_close - paren_open - 1); 715 if (arg0 == "defined") { 716 return exists(arg1) ? "true" : "false"; 717 } else if (arg0 == "length") { 718 return F("%s") % get_vector(arg1).size(); 719 } else { 720 return get_vector(arg0, arg1); 721 } 722 } 723 } 724 725 726 /// Applies a set of templates to an input stream. 727 /// 728 /// \param templates The templates to use. 729 /// \param input The input to process. 730 /// \param output The stream to which to write the processed text. 731 /// 732 /// \throw text::syntax_error If there is any problem processing the input. 733 void 734 text::instantiate(const templates_def& templates, 735 std::istream& input, std::ostream& output) 736 { 737 templates_parser parser(templates, "%", "%%"); 738 parser.instantiate(input, output); 739 } 740 741 742 /// Applies a set of templates to an input file and writes an output file. 743 /// 744 /// \param templates The templates to use. 745 /// \param input_file The path to the input to process. 746 /// \param output_file The path to the file into which to write the output. 747 /// 748 /// \throw text::error If the input or output files cannot be opened. 749 /// \throw text::syntax_error If there is any problem processing the input. 750 void 751 text::instantiate(const templates_def& templates, 752 const fs::path& input_file, const fs::path& output_file) 753 { 754 std::ifstream input(input_file.c_str()); 755 if (!input) 756 throw text::error(F("Failed to open %s for read") % input_file); 757 758 std::ofstream output(output_file.c_str()); 759 if (!output) 760 throw text::error(F("Failed to open %s for write") % output_file); 761 762 instantiate(templates, input, output); 763 } 764