1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // FileCheck does a line-by line check of a file that validates whether it 11 // contains the expected content. This is useful for regression tests etc. 12 // 13 // This program exits with an error status of 2 on error, exit status of 0 if 14 // the file matched the expected contents, and exit status of 1 if it did not 15 // contain the expected contents. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringExtras.h" 21 #include "llvm/ADT/StringMap.h" 22 #include "llvm/ADT/StringSet.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/PrettyStackTrace.h" 26 #include "llvm/Support/Regex.h" 27 #include "llvm/Support/Signals.h" 28 #include "llvm/Support/SourceMgr.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <algorithm> 31 #include <cctype> 32 #include <map> 33 #include <string> 34 #include <system_error> 35 #include <vector> 36 using namespace llvm; 37 38 static cl::opt<std::string> 39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 40 41 static cl::opt<std::string> 42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 43 cl::init("-"), cl::value_desc("filename")); 44 45 static cl::list<std::string> 46 CheckPrefixes("check-prefix", 47 cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 48 49 static cl::opt<bool> 50 NoCanonicalizeWhiteSpace("strict-whitespace", 51 cl::desc("Do not treat all horizontal whitespace as equivalent")); 52 53 static cl::list<std::string> ImplicitCheckNot( 54 "implicit-check-not", 55 cl::desc("Add an implicit negative check with this pattern to every\n" 56 "positive check. This can be used to ensure that no instances of\n" 57 "this pattern occur which are not matched by a positive pattern"), 58 cl::value_desc("pattern")); 59 60 static cl::opt<bool> AllowEmptyInput( 61 "allow-empty", cl::init(false), 62 cl::desc("Allow the input file to be empty. This is useful when making\n" 63 "checks that some error message does not occur, for example.")); 64 65 typedef cl::list<std::string>::const_iterator prefix_iterator; 66 67 //===----------------------------------------------------------------------===// 68 // Pattern Handling Code. 69 //===----------------------------------------------------------------------===// 70 71 namespace Check { 72 enum CheckType { 73 CheckNone = 0, 74 CheckPlain, 75 CheckNext, 76 CheckNot, 77 CheckDAG, 78 CheckLabel, 79 80 /// MatchEOF - When set, this pattern only matches the end of file. This is 81 /// used for trailing CHECK-NOTs. 82 CheckEOF 83 }; 84 } 85 86 class Pattern { 87 SMLoc PatternLoc; 88 89 Check::CheckType CheckTy; 90 91 /// FixedStr - If non-empty, this pattern is a fixed string match with the 92 /// specified fixed string. 93 StringRef FixedStr; 94 95 /// RegEx - If non-empty, this is a regex pattern. 96 std::string RegExStr; 97 98 /// \brief Contains the number of line this pattern is in. 99 unsigned LineNumber; 100 101 /// VariableUses - Entries in this vector map to uses of a variable in the 102 /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain 103 /// "foobaz" and we'll get an entry in this vector that tells us to insert the 104 /// value of bar at offset 3. 105 std::vector<std::pair<StringRef, unsigned> > VariableUses; 106 107 /// VariableDefs - Maps definitions of variables to their parenthesized 108 /// capture numbers. 109 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1. 110 std::map<StringRef, unsigned> VariableDefs; 111 112 public: 113 114 Pattern(Check::CheckType Ty) 115 : CheckTy(Ty) { } 116 117 /// getLoc - Return the location in source code. 118 SMLoc getLoc() const { return PatternLoc; } 119 120 /// ParsePattern - Parse the given string into the Pattern. Prefix provides 121 /// which prefix is being matched, SM provides the SourceMgr used for error 122 /// reports, and LineNumber is the line number in the input file from which 123 /// the pattern string was read. Returns true in case of an error, false 124 /// otherwise. 125 bool ParsePattern(StringRef PatternStr, 126 StringRef Prefix, 127 SourceMgr &SM, 128 unsigned LineNumber); 129 130 /// Match - Match the pattern string against the input buffer Buffer. This 131 /// returns the position that is matched or npos if there is no match. If 132 /// there is a match, the size of the matched string is returned in MatchLen. 133 /// 134 /// The VariableTable StringMap provides the current values of filecheck 135 /// variables and is updated if this match defines new values. 136 size_t Match(StringRef Buffer, size_t &MatchLen, 137 StringMap<StringRef> &VariableTable) const; 138 139 /// PrintFailureInfo - Print additional information about a failure to match 140 /// involving this pattern. 141 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 142 const StringMap<StringRef> &VariableTable) const; 143 144 bool hasVariable() const { return !(VariableUses.empty() && 145 VariableDefs.empty()); } 146 147 Check::CheckType getCheckTy() const { return CheckTy; } 148 149 private: 150 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 151 void AddBackrefToRegEx(unsigned BackrefNum); 152 153 /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of 154 /// matching this pattern at the start of \arg Buffer; a distance of zero 155 /// should correspond to a perfect match. 156 unsigned ComputeMatchDistance(StringRef Buffer, 157 const StringMap<StringRef> &VariableTable) const; 158 159 /// \brief Evaluates expression and stores the result to \p Value. 160 /// \return true on success. false when the expression has invalid syntax. 161 bool EvaluateExpression(StringRef Expr, std::string &Value) const; 162 163 /// \brief Finds the closing sequence of a regex variable usage or 164 /// definition. Str has to point in the beginning of the definition 165 /// (right after the opening sequence). 166 /// \return offset of the closing sequence within Str, or npos if it was not 167 /// found. 168 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 169 }; 170 171 172 bool Pattern::ParsePattern(StringRef PatternStr, 173 StringRef Prefix, 174 SourceMgr &SM, 175 unsigned LineNumber) { 176 this->LineNumber = LineNumber; 177 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 178 179 // Ignore trailing whitespace. 180 while (!PatternStr.empty() && 181 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 182 PatternStr = PatternStr.substr(0, PatternStr.size()-1); 183 184 // Check that there is something on the line. 185 if (PatternStr.empty()) { 186 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 187 "found empty check string with prefix '" + 188 Prefix + ":'"); 189 return true; 190 } 191 192 // Check to see if this is a fixed string, or if it has regex pieces. 193 if (PatternStr.size() < 2 || 194 (PatternStr.find("{{") == StringRef::npos && 195 PatternStr.find("[[") == StringRef::npos)) { 196 FixedStr = PatternStr; 197 return false; 198 } 199 200 // Paren value #0 is for the fully matched string. Any new parenthesized 201 // values add from there. 202 unsigned CurParen = 1; 203 204 // Otherwise, there is at least one regex piece. Build up the regex pattern 205 // by escaping scary characters in fixed strings, building up one big regex. 206 while (!PatternStr.empty()) { 207 // RegEx matches. 208 if (PatternStr.startswith("{{")) { 209 // This is the start of a regex match. Scan for the }}. 210 size_t End = PatternStr.find("}}"); 211 if (End == StringRef::npos) { 212 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 213 SourceMgr::DK_Error, 214 "found start of regex string with no end '}}'"); 215 return true; 216 } 217 218 // Enclose {{}} patterns in parens just like [[]] even though we're not 219 // capturing the result for any purpose. This is required in case the 220 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 221 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 222 RegExStr += '('; 223 ++CurParen; 224 225 if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM)) 226 return true; 227 RegExStr += ')'; 228 229 PatternStr = PatternStr.substr(End+2); 230 continue; 231 } 232 233 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 234 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 235 // second form is [[foo]] which is a reference to foo. The variable name 236 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 237 // it. This is to catch some common errors. 238 if (PatternStr.startswith("[[")) { 239 // Find the closing bracket pair ending the match. End is going to be an 240 // offset relative to the beginning of the match string. 241 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 242 243 if (End == StringRef::npos) { 244 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 245 SourceMgr::DK_Error, 246 "invalid named regex reference, no ]] found"); 247 return true; 248 } 249 250 StringRef MatchStr = PatternStr.substr(2, End); 251 PatternStr = PatternStr.substr(End+4); 252 253 // Get the regex name (e.g. "foo"). 254 size_t NameEnd = MatchStr.find(':'); 255 StringRef Name = MatchStr.substr(0, NameEnd); 256 257 if (Name.empty()) { 258 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 259 "invalid name in named regex: empty name"); 260 return true; 261 } 262 263 // Verify that the name/expression is well formed. FileCheck currently 264 // supports @LINE, @LINE+number, @LINE-number expressions. The check here 265 // is relaxed, more strict check is performed in \c EvaluateExpression. 266 bool IsExpression = false; 267 for (unsigned i = 0, e = Name.size(); i != e; ++i) { 268 if (i == 0 && Name[i] == '@') { 269 if (NameEnd != StringRef::npos) { 270 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 271 SourceMgr::DK_Error, 272 "invalid name in named regex definition"); 273 return true; 274 } 275 IsExpression = true; 276 continue; 277 } 278 if (Name[i] != '_' && !isalnum(Name[i]) && 279 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 280 SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i), 281 SourceMgr::DK_Error, "invalid name in named regex"); 282 return true; 283 } 284 } 285 286 // Name can't start with a digit. 287 if (isdigit(static_cast<unsigned char>(Name[0]))) { 288 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 289 "invalid name in named regex"); 290 return true; 291 } 292 293 // Handle [[foo]]. 294 if (NameEnd == StringRef::npos) { 295 // Handle variables that were defined earlier on the same line by 296 // emitting a backreference. 297 if (VariableDefs.find(Name) != VariableDefs.end()) { 298 unsigned VarParenNum = VariableDefs[Name]; 299 if (VarParenNum < 1 || VarParenNum > 9) { 300 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 301 SourceMgr::DK_Error, 302 "Can't back-reference more than 9 variables"); 303 return true; 304 } 305 AddBackrefToRegEx(VarParenNum); 306 } else { 307 VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 308 } 309 continue; 310 } 311 312 // Handle [[foo:.*]]. 313 VariableDefs[Name] = CurParen; 314 RegExStr += '('; 315 ++CurParen; 316 317 if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM)) 318 return true; 319 320 RegExStr += ')'; 321 } 322 323 // Handle fixed string matches. 324 // Find the end, which is the start of the next regex. 325 size_t FixedMatchEnd = PatternStr.find("{{"); 326 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 327 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 328 PatternStr = PatternStr.substr(FixedMatchEnd); 329 } 330 331 return false; 332 } 333 334 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, 335 SourceMgr &SM) { 336 Regex R(RS); 337 std::string Error; 338 if (!R.isValid(Error)) { 339 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 340 "invalid regex: " + Error); 341 return true; 342 } 343 344 RegExStr += RS.str(); 345 CurParen += R.getNumMatches(); 346 return false; 347 } 348 349 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 350 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 351 std::string Backref = std::string("\\") + 352 std::string(1, '0' + BackrefNum); 353 RegExStr += Backref; 354 } 355 356 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 357 // The only supported expression is @LINE([\+-]\d+)? 358 if (!Expr.startswith("@LINE")) 359 return false; 360 Expr = Expr.substr(StringRef("@LINE").size()); 361 int Offset = 0; 362 if (!Expr.empty()) { 363 if (Expr[0] == '+') 364 Expr = Expr.substr(1); 365 else if (Expr[0] != '-') 366 return false; 367 if (Expr.getAsInteger(10, Offset)) 368 return false; 369 } 370 Value = llvm::itostr(LineNumber + Offset); 371 return true; 372 } 373 374 /// Match - Match the pattern string against the input buffer Buffer. This 375 /// returns the position that is matched or npos if there is no match. If 376 /// there is a match, the size of the matched string is returned in MatchLen. 377 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 378 StringMap<StringRef> &VariableTable) const { 379 // If this is the EOF pattern, match it immediately. 380 if (CheckTy == Check::CheckEOF) { 381 MatchLen = 0; 382 return Buffer.size(); 383 } 384 385 // If this is a fixed string pattern, just match it now. 386 if (!FixedStr.empty()) { 387 MatchLen = FixedStr.size(); 388 return Buffer.find(FixedStr); 389 } 390 391 // Regex match. 392 393 // If there are variable uses, we need to create a temporary string with the 394 // actual value. 395 StringRef RegExToMatch = RegExStr; 396 std::string TmpStr; 397 if (!VariableUses.empty()) { 398 TmpStr = RegExStr; 399 400 unsigned InsertOffset = 0; 401 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) { 402 std::string Value; 403 404 if (VariableUses[i].first[0] == '@') { 405 if (!EvaluateExpression(VariableUses[i].first, Value)) 406 return StringRef::npos; 407 } else { 408 StringMap<StringRef>::iterator it = 409 VariableTable.find(VariableUses[i].first); 410 // If the variable is undefined, return an error. 411 if (it == VariableTable.end()) 412 return StringRef::npos; 413 414 // Look up the value and escape it so that we can put it into the regex. 415 Value += Regex::escape(it->second); 416 } 417 418 // Plop it into the regex at the adjusted offset. 419 TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset, 420 Value.begin(), Value.end()); 421 InsertOffset += Value.size(); 422 } 423 424 // Match the newly constructed regex. 425 RegExToMatch = TmpStr; 426 } 427 428 429 SmallVector<StringRef, 4> MatchInfo; 430 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 431 return StringRef::npos; 432 433 // Successful regex match. 434 assert(!MatchInfo.empty() && "Didn't get any match"); 435 StringRef FullMatch = MatchInfo[0]; 436 437 // If this defines any variables, remember their values. 438 for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(), 439 E = VariableDefs.end(); 440 I != E; ++I) { 441 assert(I->second < MatchInfo.size() && "Internal paren error"); 442 VariableTable[I->first] = MatchInfo[I->second]; 443 } 444 445 MatchLen = FullMatch.size(); 446 return FullMatch.data()-Buffer.data(); 447 } 448 449 unsigned Pattern::ComputeMatchDistance(StringRef Buffer, 450 const StringMap<StringRef> &VariableTable) const { 451 // Just compute the number of matching characters. For regular expressions, we 452 // just compare against the regex itself and hope for the best. 453 // 454 // FIXME: One easy improvement here is have the regex lib generate a single 455 // example regular expression which matches, and use that as the example 456 // string. 457 StringRef ExampleString(FixedStr); 458 if (ExampleString.empty()) 459 ExampleString = RegExStr; 460 461 // Only compare up to the first line in the buffer, or the string size. 462 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 463 BufferPrefix = BufferPrefix.split('\n').first; 464 return BufferPrefix.edit_distance(ExampleString); 465 } 466 467 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 468 const StringMap<StringRef> &VariableTable) const{ 469 // If this was a regular expression using variables, print the current 470 // variable values. 471 if (!VariableUses.empty()) { 472 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) { 473 SmallString<256> Msg; 474 raw_svector_ostream OS(Msg); 475 StringRef Var = VariableUses[i].first; 476 if (Var[0] == '@') { 477 std::string Value; 478 if (EvaluateExpression(Var, Value)) { 479 OS << "with expression \""; 480 OS.write_escaped(Var) << "\" equal to \""; 481 OS.write_escaped(Value) << "\""; 482 } else { 483 OS << "uses incorrect expression \""; 484 OS.write_escaped(Var) << "\""; 485 } 486 } else { 487 StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 488 489 // Check for undefined variable references. 490 if (it == VariableTable.end()) { 491 OS << "uses undefined variable \""; 492 OS.write_escaped(Var) << "\""; 493 } else { 494 OS << "with variable \""; 495 OS.write_escaped(Var) << "\" equal to \""; 496 OS.write_escaped(it->second) << "\""; 497 } 498 } 499 500 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 501 OS.str()); 502 } 503 } 504 505 // Attempt to find the closest/best fuzzy match. Usually an error happens 506 // because some string in the output didn't exactly match. In these cases, we 507 // would like to show the user a best guess at what "should have" matched, to 508 // save them having to actually check the input manually. 509 size_t NumLinesForward = 0; 510 size_t Best = StringRef::npos; 511 double BestQuality = 0; 512 513 // Use an arbitrary 4k limit on how far we will search. 514 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 515 if (Buffer[i] == '\n') 516 ++NumLinesForward; 517 518 // Patterns have leading whitespace stripped, so skip whitespace when 519 // looking for something which looks like a pattern. 520 if (Buffer[i] == ' ' || Buffer[i] == '\t') 521 continue; 522 523 // Compute the "quality" of this match as an arbitrary combination of the 524 // match distance and the number of lines skipped to get to this match. 525 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 526 double Quality = Distance + (NumLinesForward / 100.); 527 528 if (Quality < BestQuality || Best == StringRef::npos) { 529 Best = i; 530 BestQuality = Quality; 531 } 532 } 533 534 // Print the "possible intended match here" line if we found something 535 // reasonable and not equal to what we showed in the "scanning from here" 536 // line. 537 if (Best && Best != StringRef::npos && BestQuality < 50) { 538 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 539 SourceMgr::DK_Note, "possible intended match here"); 540 541 // FIXME: If we wanted to be really friendly we would show why the match 542 // failed, as it can be hard to spot simple one character differences. 543 } 544 } 545 546 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 547 // Offset keeps track of the current offset within the input Str 548 size_t Offset = 0; 549 // [...] Nesting depth 550 size_t BracketDepth = 0; 551 552 while (!Str.empty()) { 553 if (Str.startswith("]]") && BracketDepth == 0) 554 return Offset; 555 if (Str[0] == '\\') { 556 // Backslash escapes the next char within regexes, so skip them both. 557 Str = Str.substr(2); 558 Offset += 2; 559 } else { 560 switch (Str[0]) { 561 default: 562 break; 563 case '[': 564 BracketDepth++; 565 break; 566 case ']': 567 if (BracketDepth == 0) { 568 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 569 SourceMgr::DK_Error, 570 "missing closing \"]\" for regex variable"); 571 exit(1); 572 } 573 BracketDepth--; 574 break; 575 } 576 Str = Str.substr(1); 577 Offset++; 578 } 579 } 580 581 return StringRef::npos; 582 } 583 584 585 //===----------------------------------------------------------------------===// 586 // Check Strings. 587 //===----------------------------------------------------------------------===// 588 589 /// CheckString - This is a check that we found in the input file. 590 struct CheckString { 591 /// Pat - The pattern to match. 592 Pattern Pat; 593 594 /// Prefix - Which prefix name this check matched. 595 StringRef Prefix; 596 597 /// Loc - The location in the match file that the check string was specified. 598 SMLoc Loc; 599 600 /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive, 601 /// as opposed to a CHECK: directive. 602 Check::CheckType CheckTy; 603 604 /// DagNotStrings - These are all of the strings that are disallowed from 605 /// occurring between this match string and the previous one (or start of 606 /// file). 607 std::vector<Pattern> DagNotStrings; 608 609 610 CheckString(const Pattern &P, 611 StringRef S, 612 SMLoc L, 613 Check::CheckType Ty) 614 : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {} 615 616 /// Check - Match check string and its "not strings" and/or "dag strings". 617 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 618 size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 619 620 /// CheckNext - Verify there is a single line in the given buffer. 621 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 622 623 /// CheckNot - Verify there's no "not strings" in the given buffer. 624 bool CheckNot(const SourceMgr &SM, StringRef Buffer, 625 const std::vector<const Pattern *> &NotStrings, 626 StringMap<StringRef> &VariableTable) const; 627 628 /// CheckDag - Match "dag strings" and their mixed "not strings". 629 size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 630 std::vector<const Pattern *> &NotStrings, 631 StringMap<StringRef> &VariableTable) const; 632 }; 633 634 /// Canonicalize whitespaces in the input file. Line endings are replaced 635 /// with UNIX-style '\n'. 636 /// 637 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace 638 /// characters to a single space. 639 static std::unique_ptr<MemoryBuffer> 640 CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB, 641 bool PreserveHorizontal) { 642 SmallString<128> NewFile; 643 NewFile.reserve(MB->getBufferSize()); 644 645 for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd(); 646 Ptr != End; ++Ptr) { 647 // Eliminate trailing dosish \r. 648 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 649 continue; 650 } 651 652 // If current char is not a horizontal whitespace or if horizontal 653 // whitespace canonicalization is disabled, dump it to output as is. 654 if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) { 655 NewFile.push_back(*Ptr); 656 continue; 657 } 658 659 // Otherwise, add one space and advance over neighboring space. 660 NewFile.push_back(' '); 661 while (Ptr+1 != End && 662 (Ptr[1] == ' ' || Ptr[1] == '\t')) 663 ++Ptr; 664 } 665 666 return std::unique_ptr<MemoryBuffer>( 667 MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier())); 668 } 669 670 static bool IsPartOfWord(char c) { 671 return (isalnum(c) || c == '-' || c == '_'); 672 } 673 674 // Get the size of the prefix extension. 675 static size_t CheckTypeSize(Check::CheckType Ty) { 676 switch (Ty) { 677 case Check::CheckNone: 678 return 0; 679 680 case Check::CheckPlain: 681 return sizeof(":") - 1; 682 683 case Check::CheckNext: 684 return sizeof("-NEXT:") - 1; 685 686 case Check::CheckNot: 687 return sizeof("-NOT:") - 1; 688 689 case Check::CheckDAG: 690 return sizeof("-DAG:") - 1; 691 692 case Check::CheckLabel: 693 return sizeof("-LABEL:") - 1; 694 695 case Check::CheckEOF: 696 llvm_unreachable("Should not be using EOF size"); 697 } 698 699 llvm_unreachable("Bad check type"); 700 } 701 702 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 703 char NextChar = Buffer[Prefix.size()]; 704 705 // Verify that the : is present after the prefix. 706 if (NextChar == ':') 707 return Check::CheckPlain; 708 709 if (NextChar != '-') 710 return Check::CheckNone; 711 712 StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 713 if (Rest.startswith("NEXT:")) 714 return Check::CheckNext; 715 716 if (Rest.startswith("NOT:")) 717 return Check::CheckNot; 718 719 if (Rest.startswith("DAG:")) 720 return Check::CheckDAG; 721 722 if (Rest.startswith("LABEL:")) 723 return Check::CheckLabel; 724 725 return Check::CheckNone; 726 } 727 728 // From the given position, find the next character after the word. 729 static size_t SkipWord(StringRef Str, size_t Loc) { 730 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 731 ++Loc; 732 return Loc; 733 } 734 735 // Try to find the first match in buffer for any prefix. If a valid match is 736 // found, return that prefix and set its type and location. If there are almost 737 // matches (e.g. the actual prefix string is found, but is not an actual check 738 // string), but no valid match, return an empty string and set the position to 739 // resume searching from. If no partial matches are found, return an empty 740 // string and the location will be StringRef::npos. If one prefix is a substring 741 // of another, the maximal match should be found. e.g. if "A" and "AA" are 742 // prefixes then AA-CHECK: should match the second one. 743 static StringRef FindFirstCandidateMatch(StringRef &Buffer, 744 Check::CheckType &CheckTy, 745 size_t &CheckLoc) { 746 StringRef FirstPrefix; 747 size_t FirstLoc = StringRef::npos; 748 size_t SearchLoc = StringRef::npos; 749 Check::CheckType FirstTy = Check::CheckNone; 750 751 CheckTy = Check::CheckNone; 752 CheckLoc = StringRef::npos; 753 754 for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end(); 755 I != E; ++I) { 756 StringRef Prefix(*I); 757 size_t PrefixLoc = Buffer.find(Prefix); 758 759 if (PrefixLoc == StringRef::npos) 760 continue; 761 762 // Track where we are searching for invalid prefixes that look almost right. 763 // We need to only advance to the first partial match on the next attempt 764 // since a partial match could be a substring of a later, valid prefix. 765 // Need to skip to the end of the word, otherwise we could end up 766 // matching a prefix in a substring later. 767 if (PrefixLoc < SearchLoc) 768 SearchLoc = SkipWord(Buffer, PrefixLoc); 769 770 // We only want to find the first match to avoid skipping some. 771 if (PrefixLoc > FirstLoc) 772 continue; 773 // If one matching check-prefix is a prefix of another, choose the 774 // longer one. 775 if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size()) 776 continue; 777 778 StringRef Rest = Buffer.drop_front(PrefixLoc); 779 // Make sure we have actually found the prefix, and not a word containing 780 // it. This should also prevent matching the wrong prefix when one is a 781 // substring of another. 782 if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1])) 783 FirstTy = Check::CheckNone; 784 else 785 FirstTy = FindCheckType(Rest, Prefix); 786 787 FirstLoc = PrefixLoc; 788 FirstPrefix = Prefix; 789 } 790 791 // If the first prefix is invalid, we should continue the search after it. 792 if (FirstTy == Check::CheckNone) { 793 CheckLoc = SearchLoc; 794 return ""; 795 } 796 797 CheckTy = FirstTy; 798 CheckLoc = FirstLoc; 799 return FirstPrefix; 800 } 801 802 static StringRef FindFirstMatchingPrefix(StringRef &Buffer, 803 unsigned &LineNumber, 804 Check::CheckType &CheckTy, 805 size_t &CheckLoc) { 806 while (!Buffer.empty()) { 807 StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc); 808 // If we found a real match, we are done. 809 if (!Prefix.empty()) { 810 LineNumber += Buffer.substr(0, CheckLoc).count('\n'); 811 return Prefix; 812 } 813 814 // We didn't find any almost matches either, we are also done. 815 if (CheckLoc == StringRef::npos) 816 return StringRef(); 817 818 LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n'); 819 820 // Advance to the last possible match we found and try again. 821 Buffer = Buffer.drop_front(CheckLoc + 1); 822 } 823 824 return StringRef(); 825 } 826 827 /// ReadCheckFile - Read the check file, which specifies the sequence of 828 /// expected strings. The strings are added to the CheckStrings vector. 829 /// Returns true in case of an error, false otherwise. 830 static bool ReadCheckFile(SourceMgr &SM, 831 std::vector<CheckString> &CheckStrings) { 832 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = 833 MemoryBuffer::getFileOrSTDIN(CheckFilename); 834 if (std::error_code EC = FileOrErr.getError()) { 835 errs() << "Could not open check file '" << CheckFilename 836 << "': " << EC.message() << '\n'; 837 return true; 838 } 839 840 // If we want to canonicalize whitespace, strip excess whitespace from the 841 // buffer containing the CHECK lines. Remove DOS style line endings. 842 std::unique_ptr<MemoryBuffer> F = 843 CanonicalizeInputFile(std::move(*FileOrErr), NoCanonicalizeWhiteSpace); 844 845 // Find all instances of CheckPrefix followed by : in the file. 846 StringRef Buffer = F->getBuffer(); 847 848 SM.AddNewSourceBuffer(std::move(F), SMLoc()); 849 850 std::vector<Pattern> ImplicitNegativeChecks; 851 for (const auto &PatternString : ImplicitCheckNot) { 852 // Create a buffer with fake command line content in order to display the 853 // command line option responsible for the specific implicit CHECK-NOT. 854 std::string Prefix = std::string("-") + ImplicitCheckNot.ArgStr + "='"; 855 std::string Suffix = "'"; 856 std::unique_ptr<MemoryBuffer> CmdLine(MemoryBuffer::getMemBufferCopy( 857 Prefix + PatternString + Suffix, "command line")); 858 StringRef PatternInBuffer = 859 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 860 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 861 862 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); 863 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, 864 "IMPLICIT-CHECK", SM, 0); 865 } 866 867 868 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 869 870 // LineNumber keeps track of the line on which CheckPrefix instances are 871 // found. 872 unsigned LineNumber = 1; 873 874 while (1) { 875 Check::CheckType CheckTy; 876 size_t PrefixLoc; 877 878 // See if a prefix occurs in the memory buffer. 879 StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer, 880 LineNumber, 881 CheckTy, 882 PrefixLoc); 883 if (UsedPrefix.empty()) 884 break; 885 886 Buffer = Buffer.drop_front(PrefixLoc); 887 888 // Location to use for error messages. 889 const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1); 890 891 // PrefixLoc is to the start of the prefix. Skip to the end. 892 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 893 894 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 895 // leading and trailing whitespace. 896 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 897 898 // Scan ahead to the end of line. 899 size_t EOL = Buffer.find_first_of("\n\r"); 900 901 // Remember the location of the start of the pattern, for diagnostics. 902 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 903 904 // Parse the pattern. 905 Pattern P(CheckTy); 906 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 907 return true; 908 909 // Verify that CHECK-LABEL lines do not define or use variables 910 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 911 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 912 SourceMgr::DK_Error, 913 "found '" + UsedPrefix + "-LABEL:'" 914 " with variable definition or use"); 915 return true; 916 } 917 918 Buffer = Buffer.substr(EOL); 919 920 // Verify that CHECK-NEXT lines have at least one CHECK line before them. 921 if ((CheckTy == Check::CheckNext) && CheckStrings.empty()) { 922 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 923 SourceMgr::DK_Error, 924 "found '" + UsedPrefix + "-NEXT:' without previous '" 925 + UsedPrefix + ": line"); 926 return true; 927 } 928 929 // Handle CHECK-DAG/-NOT. 930 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 931 DagNotMatches.push_back(P); 932 continue; 933 } 934 935 // Okay, add the string we captured to the output vector and move on. 936 CheckStrings.push_back(CheckString(P, 937 UsedPrefix, 938 PatternLoc, 939 CheckTy)); 940 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 941 DagNotMatches = ImplicitNegativeChecks; 942 } 943 944 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 945 // prefix as a filler for the error message. 946 if (!DagNotMatches.empty()) { 947 CheckStrings.push_back(CheckString(Pattern(Check::CheckEOF), 948 CheckPrefixes[0], 949 SMLoc::getFromPointer(Buffer.data()), 950 Check::CheckEOF)); 951 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 952 } 953 954 if (CheckStrings.empty()) { 955 errs() << "error: no check strings found with prefix" 956 << (CheckPrefixes.size() > 1 ? "es " : " "); 957 for (size_t I = 0, N = CheckPrefixes.size(); I != N; ++I) { 958 StringRef Prefix(CheckPrefixes[I]); 959 errs() << '\'' << Prefix << ":'"; 960 if (I != N - 1) 961 errs() << ", "; 962 } 963 964 errs() << '\n'; 965 return true; 966 } 967 968 return false; 969 } 970 971 static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc, 972 const Pattern &Pat, StringRef Buffer, 973 StringMap<StringRef> &VariableTable) { 974 // Otherwise, we have an error, emit an error message. 975 SM.PrintMessage(Loc, SourceMgr::DK_Error, 976 "expected string not found in input"); 977 978 // Print the "scanning from here" line. If the current position is at the 979 // end of a line, advance to the start of the next line. 980 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 981 982 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 983 "scanning from here"); 984 985 // Allow the pattern to print additional information if desired. 986 Pat.PrintFailureInfo(SM, Buffer, VariableTable); 987 } 988 989 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 990 StringRef Buffer, 991 StringMap<StringRef> &VariableTable) { 992 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable); 993 } 994 995 /// CountNumNewlinesBetween - Count the number of newlines in the specified 996 /// range. 997 static unsigned CountNumNewlinesBetween(StringRef Range, 998 const char *&FirstNewLine) { 999 unsigned NumNewLines = 0; 1000 while (1) { 1001 // Scan for newline. 1002 Range = Range.substr(Range.find_first_of("\n\r")); 1003 if (Range.empty()) return NumNewLines; 1004 1005 ++NumNewLines; 1006 1007 // Handle \n\r and \r\n as a single newline. 1008 if (Range.size() > 1 && 1009 (Range[1] == '\n' || Range[1] == '\r') && 1010 (Range[0] != Range[1])) 1011 Range = Range.substr(1); 1012 Range = Range.substr(1); 1013 1014 if (NumNewLines == 1) 1015 FirstNewLine = Range.begin(); 1016 } 1017 } 1018 1019 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 1020 bool IsLabelScanMode, size_t &MatchLen, 1021 StringMap<StringRef> &VariableTable) const { 1022 size_t LastPos = 0; 1023 std::vector<const Pattern *> NotStrings; 1024 1025 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 1026 // bounds; we have not processed variable definitions within the bounded block 1027 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 1028 // over the block again (including the last CHECK-LABEL) in normal mode. 1029 if (!IsLabelScanMode) { 1030 // Match "dag strings" (with mixed "not strings" if any). 1031 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 1032 if (LastPos == StringRef::npos) 1033 return StringRef::npos; 1034 } 1035 1036 // Match itself from the last position after matching CHECK-DAG. 1037 StringRef MatchBuffer = Buffer.substr(LastPos); 1038 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1039 if (MatchPos == StringRef::npos) { 1040 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable); 1041 return StringRef::npos; 1042 } 1043 MatchPos += LastPos; 1044 1045 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1046 // or CHECK-NOT 1047 if (!IsLabelScanMode) { 1048 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1049 1050 // If this check is a "CHECK-NEXT", verify that the previous match was on 1051 // the previous line (i.e. that there is one newline between them). 1052 if (CheckNext(SM, SkippedRegion)) 1053 return StringRef::npos; 1054 1055 // If this match had "not strings", verify that they don't exist in the 1056 // skipped region. 1057 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1058 return StringRef::npos; 1059 } 1060 1061 return MatchPos; 1062 } 1063 1064 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 1065 if (CheckTy != Check::CheckNext) 1066 return false; 1067 1068 // Count the number of newlines between the previous match and this one. 1069 assert(Buffer.data() != 1070 SM.getMemoryBuffer( 1071 SM.FindBufferContainingLoc( 1072 SMLoc::getFromPointer(Buffer.data())))->getBufferStart() && 1073 "CHECK-NEXT can't be the first check in a file"); 1074 1075 const char *FirstNewLine = nullptr; 1076 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1077 1078 if (NumNewLines == 0) { 1079 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + 1080 "-NEXT: is on the same line as previous match"); 1081 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), 1082 SourceMgr::DK_Note, "'next' match was here"); 1083 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1084 "previous match ended here"); 1085 return true; 1086 } 1087 1088 if (NumNewLines != 1) { 1089 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + 1090 "-NEXT: is not on the line after the previous match"); 1091 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), 1092 SourceMgr::DK_Note, "'next' match was here"); 1093 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1094 "previous match ended here"); 1095 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1096 "non-matching line after previous match is here"); 1097 return true; 1098 } 1099 1100 return false; 1101 } 1102 1103 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 1104 const std::vector<const Pattern *> &NotStrings, 1105 StringMap<StringRef> &VariableTable) const { 1106 for (unsigned ChunkNo = 0, e = NotStrings.size(); 1107 ChunkNo != e; ++ChunkNo) { 1108 const Pattern *Pat = NotStrings[ChunkNo]; 1109 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 1110 1111 size_t MatchLen = 0; 1112 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1113 1114 if (Pos == StringRef::npos) continue; 1115 1116 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos), 1117 SourceMgr::DK_Error, 1118 Prefix + "-NOT: string occurred!"); 1119 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note, 1120 Prefix + "-NOT: pattern specified here"); 1121 return true; 1122 } 1123 1124 return false; 1125 } 1126 1127 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 1128 std::vector<const Pattern *> &NotStrings, 1129 StringMap<StringRef> &VariableTable) const { 1130 if (DagNotStrings.empty()) 1131 return 0; 1132 1133 size_t LastPos = 0; 1134 size_t StartPos = LastPos; 1135 1136 for (unsigned ChunkNo = 0, e = DagNotStrings.size(); 1137 ChunkNo != e; ++ChunkNo) { 1138 const Pattern &Pat = DagNotStrings[ChunkNo]; 1139 1140 assert((Pat.getCheckTy() == Check::CheckDAG || 1141 Pat.getCheckTy() == Check::CheckNot) && 1142 "Invalid CHECK-DAG or CHECK-NOT!"); 1143 1144 if (Pat.getCheckTy() == Check::CheckNot) { 1145 NotStrings.push_back(&Pat); 1146 continue; 1147 } 1148 1149 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 1150 1151 size_t MatchLen = 0, MatchPos; 1152 1153 // CHECK-DAG always matches from the start. 1154 StringRef MatchBuffer = Buffer.substr(StartPos); 1155 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1156 // With a group of CHECK-DAGs, a single mismatching means the match on 1157 // that group of CHECK-DAGs fails immediately. 1158 if (MatchPos == StringRef::npos) { 1159 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable); 1160 return StringRef::npos; 1161 } 1162 // Re-calc it as the offset relative to the start of the original string. 1163 MatchPos += StartPos; 1164 1165 if (!NotStrings.empty()) { 1166 if (MatchPos < LastPos) { 1167 // Reordered? 1168 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos), 1169 SourceMgr::DK_Error, 1170 Prefix + "-DAG: found a match of CHECK-DAG" 1171 " reordering across a CHECK-NOT"); 1172 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos), 1173 SourceMgr::DK_Note, 1174 Prefix + "-DAG: the farthest match of CHECK-DAG" 1175 " is found here"); 1176 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note, 1177 Prefix + "-NOT: the crossed pattern specified" 1178 " here"); 1179 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note, 1180 Prefix + "-DAG: the reordered pattern specified" 1181 " here"); 1182 return StringRef::npos; 1183 } 1184 // All subsequent CHECK-DAGs should be matched from the farthest 1185 // position of all precedent CHECK-DAGs (including this one.) 1186 StartPos = LastPos; 1187 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to 1188 // CHECK-DAG, verify that there's no 'not' strings occurred in that 1189 // region. 1190 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1191 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1192 return StringRef::npos; 1193 // Clear "not strings". 1194 NotStrings.clear(); 1195 } 1196 1197 // Update the last position with CHECK-DAG matches. 1198 LastPos = std::max(MatchPos + MatchLen, LastPos); 1199 } 1200 1201 return LastPos; 1202 } 1203 1204 // A check prefix must contain only alphanumeric, hyphens and underscores. 1205 static bool ValidateCheckPrefix(StringRef CheckPrefix) { 1206 Regex Validator("^[a-zA-Z0-9_-]*$"); 1207 return Validator.match(CheckPrefix); 1208 } 1209 1210 static bool ValidateCheckPrefixes() { 1211 StringSet<> PrefixSet; 1212 1213 for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end(); 1214 I != E; ++I) { 1215 StringRef Prefix(*I); 1216 1217 // Reject empty prefixes. 1218 if (Prefix == "") 1219 return false; 1220 1221 if (!PrefixSet.insert(Prefix)) 1222 return false; 1223 1224 if (!ValidateCheckPrefix(Prefix)) 1225 return false; 1226 } 1227 1228 return true; 1229 } 1230 1231 // I don't think there's a way to specify an initial value for cl::list, 1232 // so if nothing was specified, add the default 1233 static void AddCheckPrefixIfNeeded() { 1234 if (CheckPrefixes.empty()) 1235 CheckPrefixes.push_back("CHECK"); 1236 } 1237 1238 int main(int argc, char **argv) { 1239 sys::PrintStackTraceOnErrorSignal(); 1240 PrettyStackTraceProgram X(argc, argv); 1241 cl::ParseCommandLineOptions(argc, argv); 1242 1243 if (!ValidateCheckPrefixes()) { 1244 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 1245 "start with a letter and contain only alphanumeric characters, " 1246 "hyphens and underscores\n"; 1247 return 2; 1248 } 1249 1250 AddCheckPrefixIfNeeded(); 1251 1252 SourceMgr SM; 1253 1254 // Read the expected strings from the check file. 1255 std::vector<CheckString> CheckStrings; 1256 if (ReadCheckFile(SM, CheckStrings)) 1257 return 2; 1258 1259 // Open the file to check and add it to SourceMgr. 1260 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = 1261 MemoryBuffer::getFileOrSTDIN(InputFilename); 1262 if (std::error_code EC = FileOrErr.getError()) { 1263 errs() << "Could not open input file '" << InputFilename 1264 << "': " << EC.message() << '\n'; 1265 return 2; 1266 } 1267 std::unique_ptr<MemoryBuffer> &File = FileOrErr.get(); 1268 1269 if (File->getBufferSize() == 0 && !AllowEmptyInput) { 1270 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 1271 return 2; 1272 } 1273 1274 // Remove duplicate spaces in the input file if requested. 1275 // Remove DOS style line endings. 1276 std::unique_ptr<MemoryBuffer> F = 1277 CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace); 1278 1279 // Check that we have all of the expected strings, in order, in the input 1280 // file. 1281 StringRef Buffer = F->getBuffer(); 1282 1283 SM.AddNewSourceBuffer(std::move(F), SMLoc()); 1284 1285 /// VariableTable - This holds all the current filecheck variables. 1286 StringMap<StringRef> VariableTable; 1287 1288 bool hasError = false; 1289 1290 unsigned i = 0, j = 0, e = CheckStrings.size(); 1291 1292 while (true) { 1293 StringRef CheckRegion; 1294 if (j == e) { 1295 CheckRegion = Buffer; 1296 } else { 1297 const CheckString &CheckLabelStr = CheckStrings[j]; 1298 if (CheckLabelStr.CheckTy != Check::CheckLabel) { 1299 ++j; 1300 continue; 1301 } 1302 1303 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 1304 size_t MatchLabelLen = 0; 1305 size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true, 1306 MatchLabelLen, VariableTable); 1307 if (MatchLabelPos == StringRef::npos) { 1308 hasError = true; 1309 break; 1310 } 1311 1312 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 1313 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 1314 ++j; 1315 } 1316 1317 for ( ; i != j; ++i) { 1318 const CheckString &CheckStr = CheckStrings[i]; 1319 1320 // Check each string within the scanned region, including a second check 1321 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 1322 size_t MatchLen = 0; 1323 size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen, 1324 VariableTable); 1325 1326 if (MatchPos == StringRef::npos) { 1327 hasError = true; 1328 i = j; 1329 break; 1330 } 1331 1332 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 1333 } 1334 1335 if (j == e) 1336 break; 1337 } 1338 1339 return hasError ? 1 : 0; 1340 } 1341