1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // FileCheck does a line-by line check of a file that validates whether it 11 // contains the expected content. This is useful for regression tests etc. 12 // 13 // This program exits with an error status of 2 on error, exit status of 0 if 14 // the file matched the expected contents, and exit status of 1 if it did not 15 // contain the expected contents. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringExtras.h" 21 #include "llvm/ADT/StringMap.h" 22 #include "llvm/ADT/StringSet.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/PrettyStackTrace.h" 26 #include "llvm/Support/Regex.h" 27 #include "llvm/Support/Signals.h" 28 #include "llvm/Support/SourceMgr.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <algorithm> 31 #include <cctype> 32 #include <map> 33 #include <string> 34 #include <system_error> 35 #include <vector> 36 using namespace llvm; 37 38 static cl::opt<std::string> 39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 40 41 static cl::opt<std::string> 42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 43 cl::init("-"), cl::value_desc("filename")); 44 45 static cl::list<std::string> 46 CheckPrefixes("check-prefix", 47 cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 48 49 static cl::opt<bool> 50 NoCanonicalizeWhiteSpace("strict-whitespace", 51 cl::desc("Do not treat all horizontal whitespace as equivalent")); 52 53 static cl::list<std::string> ImplicitCheckNot( 54 "implicit-check-not", 55 cl::desc("Add an implicit negative check with this pattern to every\n" 56 "positive check. This can be used to ensure that no instances of\n" 57 "this pattern occur which are not matched by a positive pattern"), 58 cl::value_desc("pattern")); 59 60 static cl::opt<bool> AllowEmptyInput( 61 "allow-empty", cl::init(false), 62 cl::desc("Allow the input file to be empty. This is useful when making\n" 63 "checks that some error message does not occur, for example.")); 64 65 static cl::opt<bool> MatchFullLines( 66 "match-full-lines", cl::init(false), 67 cl::desc("Require all positive matches to cover an entire input line.\n" 68 "Allows leading and trailing whitespace if --strict-whitespace\n" 69 "is not also passed.")); 70 71 typedef cl::list<std::string>::const_iterator prefix_iterator; 72 73 //===----------------------------------------------------------------------===// 74 // Pattern Handling Code. 75 //===----------------------------------------------------------------------===// 76 77 namespace Check { 78 enum CheckType { 79 CheckNone = 0, 80 CheckPlain, 81 CheckNext, 82 CheckSame, 83 CheckNot, 84 CheckDAG, 85 CheckLabel, 86 87 /// MatchEOF - When set, this pattern only matches the end of file. This is 88 /// used for trailing CHECK-NOTs. 89 CheckEOF 90 }; 91 } 92 93 class Pattern { 94 SMLoc PatternLoc; 95 96 Check::CheckType CheckTy; 97 98 /// FixedStr - If non-empty, this pattern is a fixed string match with the 99 /// specified fixed string. 100 StringRef FixedStr; 101 102 /// RegEx - If non-empty, this is a regex pattern. 103 std::string RegExStr; 104 105 /// \brief Contains the number of line this pattern is in. 106 unsigned LineNumber; 107 108 /// VariableUses - Entries in this vector map to uses of a variable in the 109 /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain 110 /// "foobaz" and we'll get an entry in this vector that tells us to insert the 111 /// value of bar at offset 3. 112 std::vector<std::pair<StringRef, unsigned> > VariableUses; 113 114 /// VariableDefs - Maps definitions of variables to their parenthesized 115 /// capture numbers. 116 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1. 117 std::map<StringRef, unsigned> VariableDefs; 118 119 public: 120 121 Pattern(Check::CheckType Ty) 122 : CheckTy(Ty) { } 123 124 /// getLoc - Return the location in source code. 125 SMLoc getLoc() const { return PatternLoc; } 126 127 /// ParsePattern - Parse the given string into the Pattern. Prefix provides 128 /// which prefix is being matched, SM provides the SourceMgr used for error 129 /// reports, and LineNumber is the line number in the input file from which 130 /// the pattern string was read. Returns true in case of an error, false 131 /// otherwise. 132 bool ParsePattern(StringRef PatternStr, 133 StringRef Prefix, 134 SourceMgr &SM, 135 unsigned LineNumber); 136 137 /// Match - Match the pattern string against the input buffer Buffer. This 138 /// returns the position that is matched or npos if there is no match. If 139 /// there is a match, the size of the matched string is returned in MatchLen. 140 /// 141 /// The VariableTable StringMap provides the current values of filecheck 142 /// variables and is updated if this match defines new values. 143 size_t Match(StringRef Buffer, size_t &MatchLen, 144 StringMap<StringRef> &VariableTable) const; 145 146 /// PrintFailureInfo - Print additional information about a failure to match 147 /// involving this pattern. 148 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 149 const StringMap<StringRef> &VariableTable) const; 150 151 bool hasVariable() const { return !(VariableUses.empty() && 152 VariableDefs.empty()); } 153 154 Check::CheckType getCheckTy() const { return CheckTy; } 155 156 private: 157 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 158 void AddBackrefToRegEx(unsigned BackrefNum); 159 160 /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of 161 /// matching this pattern at the start of \arg Buffer; a distance of zero 162 /// should correspond to a perfect match. 163 unsigned ComputeMatchDistance(StringRef Buffer, 164 const StringMap<StringRef> &VariableTable) const; 165 166 /// \brief Evaluates expression and stores the result to \p Value. 167 /// \return true on success. false when the expression has invalid syntax. 168 bool EvaluateExpression(StringRef Expr, std::string &Value) const; 169 170 /// \brief Finds the closing sequence of a regex variable usage or 171 /// definition. Str has to point in the beginning of the definition 172 /// (right after the opening sequence). 173 /// \return offset of the closing sequence within Str, or npos if it was not 174 /// found. 175 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 176 }; 177 178 179 bool Pattern::ParsePattern(StringRef PatternStr, 180 StringRef Prefix, 181 SourceMgr &SM, 182 unsigned LineNumber) { 183 bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot; 184 185 this->LineNumber = LineNumber; 186 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 187 188 // Ignore trailing whitespace. 189 while (!PatternStr.empty() && 190 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 191 PatternStr = PatternStr.substr(0, PatternStr.size()-1); 192 193 // Check that there is something on the line. 194 if (PatternStr.empty()) { 195 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 196 "found empty check string with prefix '" + 197 Prefix + ":'"); 198 return true; 199 } 200 201 // Check to see if this is a fixed string, or if it has regex pieces. 202 if (!MatchFullLinesHere && 203 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && 204 PatternStr.find("[[") == StringRef::npos))) { 205 FixedStr = PatternStr; 206 return false; 207 } 208 209 if (MatchFullLinesHere) { 210 RegExStr += '^'; 211 if (!NoCanonicalizeWhiteSpace) 212 RegExStr += " *"; 213 } 214 215 // Paren value #0 is for the fully matched string. Any new parenthesized 216 // values add from there. 217 unsigned CurParen = 1; 218 219 // Otherwise, there is at least one regex piece. Build up the regex pattern 220 // by escaping scary characters in fixed strings, building up one big regex. 221 while (!PatternStr.empty()) { 222 // RegEx matches. 223 if (PatternStr.startswith("{{")) { 224 // This is the start of a regex match. Scan for the }}. 225 size_t End = PatternStr.find("}}"); 226 if (End == StringRef::npos) { 227 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 228 SourceMgr::DK_Error, 229 "found start of regex string with no end '}}'"); 230 return true; 231 } 232 233 // Enclose {{}} patterns in parens just like [[]] even though we're not 234 // capturing the result for any purpose. This is required in case the 235 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 236 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 237 RegExStr += '('; 238 ++CurParen; 239 240 if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM)) 241 return true; 242 RegExStr += ')'; 243 244 PatternStr = PatternStr.substr(End+2); 245 continue; 246 } 247 248 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 249 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 250 // second form is [[foo]] which is a reference to foo. The variable name 251 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 252 // it. This is to catch some common errors. 253 if (PatternStr.startswith("[[")) { 254 // Find the closing bracket pair ending the match. End is going to be an 255 // offset relative to the beginning of the match string. 256 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 257 258 if (End == StringRef::npos) { 259 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 260 SourceMgr::DK_Error, 261 "invalid named regex reference, no ]] found"); 262 return true; 263 } 264 265 StringRef MatchStr = PatternStr.substr(2, End); 266 PatternStr = PatternStr.substr(End+4); 267 268 // Get the regex name (e.g. "foo"). 269 size_t NameEnd = MatchStr.find(':'); 270 StringRef Name = MatchStr.substr(0, NameEnd); 271 272 if (Name.empty()) { 273 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 274 "invalid name in named regex: empty name"); 275 return true; 276 } 277 278 // Verify that the name/expression is well formed. FileCheck currently 279 // supports @LINE, @LINE+number, @LINE-number expressions. The check here 280 // is relaxed, more strict check is performed in \c EvaluateExpression. 281 bool IsExpression = false; 282 for (unsigned i = 0, e = Name.size(); i != e; ++i) { 283 if (i == 0 && Name[i] == '@') { 284 if (NameEnd != StringRef::npos) { 285 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 286 SourceMgr::DK_Error, 287 "invalid name in named regex definition"); 288 return true; 289 } 290 IsExpression = true; 291 continue; 292 } 293 if (Name[i] != '_' && !isalnum(Name[i]) && 294 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 295 SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i), 296 SourceMgr::DK_Error, "invalid name in named regex"); 297 return true; 298 } 299 } 300 301 // Name can't start with a digit. 302 if (isdigit(static_cast<unsigned char>(Name[0]))) { 303 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 304 "invalid name in named regex"); 305 return true; 306 } 307 308 // Handle [[foo]]. 309 if (NameEnd == StringRef::npos) { 310 // Handle variables that were defined earlier on the same line by 311 // emitting a backreference. 312 if (VariableDefs.find(Name) != VariableDefs.end()) { 313 unsigned VarParenNum = VariableDefs[Name]; 314 if (VarParenNum < 1 || VarParenNum > 9) { 315 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 316 SourceMgr::DK_Error, 317 "Can't back-reference more than 9 variables"); 318 return true; 319 } 320 AddBackrefToRegEx(VarParenNum); 321 } else { 322 VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 323 } 324 continue; 325 } 326 327 // Handle [[foo:.*]]. 328 VariableDefs[Name] = CurParen; 329 RegExStr += '('; 330 ++CurParen; 331 332 if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM)) 333 return true; 334 335 RegExStr += ')'; 336 } 337 338 // Handle fixed string matches. 339 // Find the end, which is the start of the next regex. 340 size_t FixedMatchEnd = PatternStr.find("{{"); 341 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 342 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 343 PatternStr = PatternStr.substr(FixedMatchEnd); 344 } 345 346 if (MatchFullLinesHere) { 347 if (!NoCanonicalizeWhiteSpace) 348 RegExStr += " *"; 349 RegExStr += '$'; 350 } 351 352 return false; 353 } 354 355 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, 356 SourceMgr &SM) { 357 Regex R(RS); 358 std::string Error; 359 if (!R.isValid(Error)) { 360 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 361 "invalid regex: " + Error); 362 return true; 363 } 364 365 RegExStr += RS.str(); 366 CurParen += R.getNumMatches(); 367 return false; 368 } 369 370 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 371 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 372 std::string Backref = std::string("\\") + 373 std::string(1, '0' + BackrefNum); 374 RegExStr += Backref; 375 } 376 377 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 378 // The only supported expression is @LINE([\+-]\d+)? 379 if (!Expr.startswith("@LINE")) 380 return false; 381 Expr = Expr.substr(StringRef("@LINE").size()); 382 int Offset = 0; 383 if (!Expr.empty()) { 384 if (Expr[0] == '+') 385 Expr = Expr.substr(1); 386 else if (Expr[0] != '-') 387 return false; 388 if (Expr.getAsInteger(10, Offset)) 389 return false; 390 } 391 Value = llvm::itostr(LineNumber + Offset); 392 return true; 393 } 394 395 /// Match - Match the pattern string against the input buffer Buffer. This 396 /// returns the position that is matched or npos if there is no match. If 397 /// there is a match, the size of the matched string is returned in MatchLen. 398 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 399 StringMap<StringRef> &VariableTable) const { 400 // If this is the EOF pattern, match it immediately. 401 if (CheckTy == Check::CheckEOF) { 402 MatchLen = 0; 403 return Buffer.size(); 404 } 405 406 // If this is a fixed string pattern, just match it now. 407 if (!FixedStr.empty()) { 408 MatchLen = FixedStr.size(); 409 return Buffer.find(FixedStr); 410 } 411 412 // Regex match. 413 414 // If there are variable uses, we need to create a temporary string with the 415 // actual value. 416 StringRef RegExToMatch = RegExStr; 417 std::string TmpStr; 418 if (!VariableUses.empty()) { 419 TmpStr = RegExStr; 420 421 unsigned InsertOffset = 0; 422 for (const auto &VariableUse : VariableUses) { 423 std::string Value; 424 425 if (VariableUse.first[0] == '@') { 426 if (!EvaluateExpression(VariableUse.first, Value)) 427 return StringRef::npos; 428 } else { 429 StringMap<StringRef>::iterator it = 430 VariableTable.find(VariableUse.first); 431 // If the variable is undefined, return an error. 432 if (it == VariableTable.end()) 433 return StringRef::npos; 434 435 // Look up the value and escape it so that we can put it into the regex. 436 Value += Regex::escape(it->second); 437 } 438 439 // Plop it into the regex at the adjusted offset. 440 TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset, 441 Value.begin(), Value.end()); 442 InsertOffset += Value.size(); 443 } 444 445 // Match the newly constructed regex. 446 RegExToMatch = TmpStr; 447 } 448 449 450 SmallVector<StringRef, 4> MatchInfo; 451 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 452 return StringRef::npos; 453 454 // Successful regex match. 455 assert(!MatchInfo.empty() && "Didn't get any match"); 456 StringRef FullMatch = MatchInfo[0]; 457 458 // If this defines any variables, remember their values. 459 for (const auto &VariableDef : VariableDefs) { 460 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 461 VariableTable[VariableDef.first] = MatchInfo[VariableDef.second]; 462 } 463 464 MatchLen = FullMatch.size(); 465 return FullMatch.data()-Buffer.data(); 466 } 467 468 unsigned Pattern::ComputeMatchDistance(StringRef Buffer, 469 const StringMap<StringRef> &VariableTable) const { 470 // Just compute the number of matching characters. For regular expressions, we 471 // just compare against the regex itself and hope for the best. 472 // 473 // FIXME: One easy improvement here is have the regex lib generate a single 474 // example regular expression which matches, and use that as the example 475 // string. 476 StringRef ExampleString(FixedStr); 477 if (ExampleString.empty()) 478 ExampleString = RegExStr; 479 480 // Only compare up to the first line in the buffer, or the string size. 481 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 482 BufferPrefix = BufferPrefix.split('\n').first; 483 return BufferPrefix.edit_distance(ExampleString); 484 } 485 486 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 487 const StringMap<StringRef> &VariableTable) const{ 488 // If this was a regular expression using variables, print the current 489 // variable values. 490 if (!VariableUses.empty()) { 491 for (const auto &VariableUse : VariableUses) { 492 SmallString<256> Msg; 493 raw_svector_ostream OS(Msg); 494 StringRef Var = VariableUse.first; 495 if (Var[0] == '@') { 496 std::string Value; 497 if (EvaluateExpression(Var, Value)) { 498 OS << "with expression \""; 499 OS.write_escaped(Var) << "\" equal to \""; 500 OS.write_escaped(Value) << "\""; 501 } else { 502 OS << "uses incorrect expression \""; 503 OS.write_escaped(Var) << "\""; 504 } 505 } else { 506 StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 507 508 // Check for undefined variable references. 509 if (it == VariableTable.end()) { 510 OS << "uses undefined variable \""; 511 OS.write_escaped(Var) << "\""; 512 } else { 513 OS << "with variable \""; 514 OS.write_escaped(Var) << "\" equal to \""; 515 OS.write_escaped(it->second) << "\""; 516 } 517 } 518 519 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 520 OS.str()); 521 } 522 } 523 524 // Attempt to find the closest/best fuzzy match. Usually an error happens 525 // because some string in the output didn't exactly match. In these cases, we 526 // would like to show the user a best guess at what "should have" matched, to 527 // save them having to actually check the input manually. 528 size_t NumLinesForward = 0; 529 size_t Best = StringRef::npos; 530 double BestQuality = 0; 531 532 // Use an arbitrary 4k limit on how far we will search. 533 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 534 if (Buffer[i] == '\n') 535 ++NumLinesForward; 536 537 // Patterns have leading whitespace stripped, so skip whitespace when 538 // looking for something which looks like a pattern. 539 if (Buffer[i] == ' ' || Buffer[i] == '\t') 540 continue; 541 542 // Compute the "quality" of this match as an arbitrary combination of the 543 // match distance and the number of lines skipped to get to this match. 544 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 545 double Quality = Distance + (NumLinesForward / 100.); 546 547 if (Quality < BestQuality || Best == StringRef::npos) { 548 Best = i; 549 BestQuality = Quality; 550 } 551 } 552 553 // Print the "possible intended match here" line if we found something 554 // reasonable and not equal to what we showed in the "scanning from here" 555 // line. 556 if (Best && Best != StringRef::npos && BestQuality < 50) { 557 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 558 SourceMgr::DK_Note, "possible intended match here"); 559 560 // FIXME: If we wanted to be really friendly we would show why the match 561 // failed, as it can be hard to spot simple one character differences. 562 } 563 } 564 565 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 566 // Offset keeps track of the current offset within the input Str 567 size_t Offset = 0; 568 // [...] Nesting depth 569 size_t BracketDepth = 0; 570 571 while (!Str.empty()) { 572 if (Str.startswith("]]") && BracketDepth == 0) 573 return Offset; 574 if (Str[0] == '\\') { 575 // Backslash escapes the next char within regexes, so skip them both. 576 Str = Str.substr(2); 577 Offset += 2; 578 } else { 579 switch (Str[0]) { 580 default: 581 break; 582 case '[': 583 BracketDepth++; 584 break; 585 case ']': 586 if (BracketDepth == 0) { 587 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 588 SourceMgr::DK_Error, 589 "missing closing \"]\" for regex variable"); 590 exit(1); 591 } 592 BracketDepth--; 593 break; 594 } 595 Str = Str.substr(1); 596 Offset++; 597 } 598 } 599 600 return StringRef::npos; 601 } 602 603 604 //===----------------------------------------------------------------------===// 605 // Check Strings. 606 //===----------------------------------------------------------------------===// 607 608 /// CheckString - This is a check that we found in the input file. 609 struct CheckString { 610 /// Pat - The pattern to match. 611 Pattern Pat; 612 613 /// Prefix - Which prefix name this check matched. 614 StringRef Prefix; 615 616 /// Loc - The location in the match file that the check string was specified. 617 SMLoc Loc; 618 619 /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive, 620 /// as opposed to a CHECK: directive. 621 // Check::CheckType CheckTy; 622 623 /// DagNotStrings - These are all of the strings that are disallowed from 624 /// occurring between this match string and the previous one (or start of 625 /// file). 626 std::vector<Pattern> DagNotStrings; 627 628 CheckString(const Pattern &P, StringRef S, SMLoc L) 629 : Pat(P), Prefix(S), Loc(L) {} 630 631 /// Check - Match check string and its "not strings" and/or "dag strings". 632 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 633 size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 634 635 /// CheckNext - Verify there is a single line in the given buffer. 636 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 637 638 /// CheckSame - Verify there is no newline in the given buffer. 639 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; 640 641 /// CheckNot - Verify there's no "not strings" in the given buffer. 642 bool CheckNot(const SourceMgr &SM, StringRef Buffer, 643 const std::vector<const Pattern *> &NotStrings, 644 StringMap<StringRef> &VariableTable) const; 645 646 /// CheckDag - Match "dag strings" and their mixed "not strings". 647 size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 648 std::vector<const Pattern *> &NotStrings, 649 StringMap<StringRef> &VariableTable) const; 650 }; 651 652 /// Canonicalize whitespaces in the input file. Line endings are replaced 653 /// with UNIX-style '\n'. 654 /// 655 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace 656 /// characters to a single space. 657 static std::unique_ptr<MemoryBuffer> 658 CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB, 659 bool PreserveHorizontal) { 660 SmallString<128> NewFile; 661 NewFile.reserve(MB->getBufferSize()); 662 663 for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd(); 664 Ptr != End; ++Ptr) { 665 // Eliminate trailing dosish \r. 666 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 667 continue; 668 } 669 670 // If current char is not a horizontal whitespace or if horizontal 671 // whitespace canonicalization is disabled, dump it to output as is. 672 if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) { 673 NewFile.push_back(*Ptr); 674 continue; 675 } 676 677 // Otherwise, add one space and advance over neighboring space. 678 NewFile.push_back(' '); 679 while (Ptr+1 != End && 680 (Ptr[1] == ' ' || Ptr[1] == '\t')) 681 ++Ptr; 682 } 683 684 return std::unique_ptr<MemoryBuffer>( 685 MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier())); 686 } 687 688 static bool IsPartOfWord(char c) { 689 return (isalnum(c) || c == '-' || c == '_'); 690 } 691 692 // Get the size of the prefix extension. 693 static size_t CheckTypeSize(Check::CheckType Ty) { 694 switch (Ty) { 695 case Check::CheckNone: 696 return 0; 697 698 case Check::CheckPlain: 699 return sizeof(":") - 1; 700 701 case Check::CheckNext: 702 return sizeof("-NEXT:") - 1; 703 704 case Check::CheckSame: 705 return sizeof("-SAME:") - 1; 706 707 case Check::CheckNot: 708 return sizeof("-NOT:") - 1; 709 710 case Check::CheckDAG: 711 return sizeof("-DAG:") - 1; 712 713 case Check::CheckLabel: 714 return sizeof("-LABEL:") - 1; 715 716 case Check::CheckEOF: 717 llvm_unreachable("Should not be using EOF size"); 718 } 719 720 llvm_unreachable("Bad check type"); 721 } 722 723 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 724 char NextChar = Buffer[Prefix.size()]; 725 726 // Verify that the : is present after the prefix. 727 if (NextChar == ':') 728 return Check::CheckPlain; 729 730 if (NextChar != '-') 731 return Check::CheckNone; 732 733 StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 734 if (Rest.startswith("NEXT:")) 735 return Check::CheckNext; 736 737 if (Rest.startswith("SAME:")) 738 return Check::CheckSame; 739 740 if (Rest.startswith("NOT:")) 741 return Check::CheckNot; 742 743 if (Rest.startswith("DAG:")) 744 return Check::CheckDAG; 745 746 if (Rest.startswith("LABEL:")) 747 return Check::CheckLabel; 748 749 return Check::CheckNone; 750 } 751 752 // From the given position, find the next character after the word. 753 static size_t SkipWord(StringRef Str, size_t Loc) { 754 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 755 ++Loc; 756 return Loc; 757 } 758 759 // Try to find the first match in buffer for any prefix. If a valid match is 760 // found, return that prefix and set its type and location. If there are almost 761 // matches (e.g. the actual prefix string is found, but is not an actual check 762 // string), but no valid match, return an empty string and set the position to 763 // resume searching from. If no partial matches are found, return an empty 764 // string and the location will be StringRef::npos. If one prefix is a substring 765 // of another, the maximal match should be found. e.g. if "A" and "AA" are 766 // prefixes then AA-CHECK: should match the second one. 767 static StringRef FindFirstCandidateMatch(StringRef &Buffer, 768 Check::CheckType &CheckTy, 769 size_t &CheckLoc) { 770 StringRef FirstPrefix; 771 size_t FirstLoc = StringRef::npos; 772 size_t SearchLoc = StringRef::npos; 773 Check::CheckType FirstTy = Check::CheckNone; 774 775 CheckTy = Check::CheckNone; 776 CheckLoc = StringRef::npos; 777 778 for (StringRef Prefix : CheckPrefixes) { 779 size_t PrefixLoc = Buffer.find(Prefix); 780 781 if (PrefixLoc == StringRef::npos) 782 continue; 783 784 // Track where we are searching for invalid prefixes that look almost right. 785 // We need to only advance to the first partial match on the next attempt 786 // since a partial match could be a substring of a later, valid prefix. 787 // Need to skip to the end of the word, otherwise we could end up 788 // matching a prefix in a substring later. 789 if (PrefixLoc < SearchLoc) 790 SearchLoc = SkipWord(Buffer, PrefixLoc); 791 792 // We only want to find the first match to avoid skipping some. 793 if (PrefixLoc > FirstLoc) 794 continue; 795 // If one matching check-prefix is a prefix of another, choose the 796 // longer one. 797 if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size()) 798 continue; 799 800 StringRef Rest = Buffer.drop_front(PrefixLoc); 801 // Make sure we have actually found the prefix, and not a word containing 802 // it. This should also prevent matching the wrong prefix when one is a 803 // substring of another. 804 if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1])) 805 FirstTy = Check::CheckNone; 806 else 807 FirstTy = FindCheckType(Rest, Prefix); 808 809 FirstLoc = PrefixLoc; 810 FirstPrefix = Prefix; 811 } 812 813 // If the first prefix is invalid, we should continue the search after it. 814 if (FirstTy == Check::CheckNone) { 815 CheckLoc = SearchLoc; 816 return ""; 817 } 818 819 CheckTy = FirstTy; 820 CheckLoc = FirstLoc; 821 return FirstPrefix; 822 } 823 824 static StringRef FindFirstMatchingPrefix(StringRef &Buffer, 825 unsigned &LineNumber, 826 Check::CheckType &CheckTy, 827 size_t &CheckLoc) { 828 while (!Buffer.empty()) { 829 StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc); 830 // If we found a real match, we are done. 831 if (!Prefix.empty()) { 832 LineNumber += Buffer.substr(0, CheckLoc).count('\n'); 833 return Prefix; 834 } 835 836 // We didn't find any almost matches either, we are also done. 837 if (CheckLoc == StringRef::npos) 838 return StringRef(); 839 840 LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n'); 841 842 // Advance to the last possible match we found and try again. 843 Buffer = Buffer.drop_front(CheckLoc + 1); 844 } 845 846 return StringRef(); 847 } 848 849 /// ReadCheckFile - Read the check file, which specifies the sequence of 850 /// expected strings. The strings are added to the CheckStrings vector. 851 /// Returns true in case of an error, false otherwise. 852 static bool ReadCheckFile(SourceMgr &SM, 853 std::vector<CheckString> &CheckStrings) { 854 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = 855 MemoryBuffer::getFileOrSTDIN(CheckFilename); 856 if (std::error_code EC = FileOrErr.getError()) { 857 errs() << "Could not open check file '" << CheckFilename 858 << "': " << EC.message() << '\n'; 859 return true; 860 } 861 862 // If we want to canonicalize whitespace, strip excess whitespace from the 863 // buffer containing the CHECK lines. Remove DOS style line endings. 864 std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile( 865 std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace); 866 867 // Find all instances of CheckPrefix followed by : in the file. 868 StringRef Buffer = F->getBuffer(); 869 870 SM.AddNewSourceBuffer(std::move(F), SMLoc()); 871 872 std::vector<Pattern> ImplicitNegativeChecks; 873 for (const auto &PatternString : ImplicitCheckNot) { 874 // Create a buffer with fake command line content in order to display the 875 // command line option responsible for the specific implicit CHECK-NOT. 876 std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str(); 877 std::string Suffix = "'"; 878 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 879 Prefix + PatternString + Suffix, "command line"); 880 881 StringRef PatternInBuffer = 882 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 883 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 884 885 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); 886 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, 887 "IMPLICIT-CHECK", SM, 0); 888 } 889 890 891 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 892 893 // LineNumber keeps track of the line on which CheckPrefix instances are 894 // found. 895 unsigned LineNumber = 1; 896 897 while (1) { 898 Check::CheckType CheckTy; 899 size_t PrefixLoc; 900 901 // See if a prefix occurs in the memory buffer. 902 StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer, 903 LineNumber, 904 CheckTy, 905 PrefixLoc); 906 if (UsedPrefix.empty()) 907 break; 908 909 Buffer = Buffer.drop_front(PrefixLoc); 910 911 // Location to use for error messages. 912 const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1); 913 914 // PrefixLoc is to the start of the prefix. Skip to the end. 915 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 916 917 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 918 // leading and trailing whitespace. 919 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 920 921 // Scan ahead to the end of line. 922 size_t EOL = Buffer.find_first_of("\n\r"); 923 924 // Remember the location of the start of the pattern, for diagnostics. 925 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 926 927 // Parse the pattern. 928 Pattern P(CheckTy); 929 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 930 return true; 931 932 // Verify that CHECK-LABEL lines do not define or use variables 933 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 934 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 935 SourceMgr::DK_Error, 936 "found '" + UsedPrefix + "-LABEL:'" 937 " with variable definition or use"); 938 return true; 939 } 940 941 Buffer = Buffer.substr(EOL); 942 943 // Verify that CHECK-NEXT lines have at least one CHECK line before them. 944 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) && 945 CheckStrings.empty()) { 946 StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME"; 947 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 948 SourceMgr::DK_Error, 949 "found '" + UsedPrefix + "-" + Type + "' without previous '" 950 + UsedPrefix + ": line"); 951 return true; 952 } 953 954 // Handle CHECK-DAG/-NOT. 955 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 956 DagNotMatches.push_back(P); 957 continue; 958 } 959 960 // Okay, add the string we captured to the output vector and move on. 961 CheckStrings.emplace_back(P, UsedPrefix, PatternLoc); 962 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 963 DagNotMatches = ImplicitNegativeChecks; 964 } 965 966 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 967 // prefix as a filler for the error message. 968 if (!DagNotMatches.empty()) { 969 CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(), 970 SMLoc::getFromPointer(Buffer.data())); 971 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 972 } 973 974 if (CheckStrings.empty()) { 975 errs() << "error: no check strings found with prefix" 976 << (CheckPrefixes.size() > 1 ? "es " : " "); 977 prefix_iterator I = CheckPrefixes.begin(); 978 prefix_iterator E = CheckPrefixes.end(); 979 if (I != E) { 980 errs() << "\'" << *I << ":'"; 981 ++I; 982 } 983 for (; I != E; ++I) 984 errs() << ", \'" << *I << ":'"; 985 986 errs() << '\n'; 987 return true; 988 } 989 990 return false; 991 } 992 993 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, 994 const Pattern &Pat, StringRef Buffer, 995 StringMap<StringRef> &VariableTable) { 996 // Otherwise, we have an error, emit an error message. 997 SM.PrintMessage(Loc, SourceMgr::DK_Error, 998 "expected string not found in input"); 999 1000 // Print the "scanning from here" line. If the current position is at the 1001 // end of a line, advance to the start of the next line. 1002 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 1003 1004 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1005 "scanning from here"); 1006 1007 // Allow the pattern to print additional information if desired. 1008 Pat.PrintFailureInfo(SM, Buffer, VariableTable); 1009 } 1010 1011 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 1012 StringRef Buffer, 1013 StringMap<StringRef> &VariableTable) { 1014 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable); 1015 } 1016 1017 /// CountNumNewlinesBetween - Count the number of newlines in the specified 1018 /// range. 1019 static unsigned CountNumNewlinesBetween(StringRef Range, 1020 const char *&FirstNewLine) { 1021 unsigned NumNewLines = 0; 1022 while (1) { 1023 // Scan for newline. 1024 Range = Range.substr(Range.find_first_of("\n\r")); 1025 if (Range.empty()) return NumNewLines; 1026 1027 ++NumNewLines; 1028 1029 // Handle \n\r and \r\n as a single newline. 1030 if (Range.size() > 1 && 1031 (Range[1] == '\n' || Range[1] == '\r') && 1032 (Range[0] != Range[1])) 1033 Range = Range.substr(1); 1034 Range = Range.substr(1); 1035 1036 if (NumNewLines == 1) 1037 FirstNewLine = Range.begin(); 1038 } 1039 } 1040 1041 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 1042 bool IsLabelScanMode, size_t &MatchLen, 1043 StringMap<StringRef> &VariableTable) const { 1044 size_t LastPos = 0; 1045 std::vector<const Pattern *> NotStrings; 1046 1047 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 1048 // bounds; we have not processed variable definitions within the bounded block 1049 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 1050 // over the block again (including the last CHECK-LABEL) in normal mode. 1051 if (!IsLabelScanMode) { 1052 // Match "dag strings" (with mixed "not strings" if any). 1053 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 1054 if (LastPos == StringRef::npos) 1055 return StringRef::npos; 1056 } 1057 1058 // Match itself from the last position after matching CHECK-DAG. 1059 StringRef MatchBuffer = Buffer.substr(LastPos); 1060 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1061 if (MatchPos == StringRef::npos) { 1062 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable); 1063 return StringRef::npos; 1064 } 1065 1066 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1067 // or CHECK-NOT 1068 if (!IsLabelScanMode) { 1069 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1070 1071 // If this check is a "CHECK-NEXT", verify that the previous match was on 1072 // the previous line (i.e. that there is one newline between them). 1073 if (CheckNext(SM, SkippedRegion)) 1074 return StringRef::npos; 1075 1076 // If this check is a "CHECK-SAME", verify that the previous match was on 1077 // the same line (i.e. that there is no newline between them). 1078 if (CheckSame(SM, SkippedRegion)) 1079 return StringRef::npos; 1080 1081 // If this match had "not strings", verify that they don't exist in the 1082 // skipped region. 1083 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1084 return StringRef::npos; 1085 } 1086 1087 return LastPos + MatchPos; 1088 } 1089 1090 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 1091 if (Pat.getCheckTy() != Check::CheckNext) 1092 return false; 1093 1094 // Count the number of newlines between the previous match and this one. 1095 assert(Buffer.data() != 1096 SM.getMemoryBuffer( 1097 SM.FindBufferContainingLoc( 1098 SMLoc::getFromPointer(Buffer.data())))->getBufferStart() && 1099 "CHECK-NEXT can't be the first check in a file"); 1100 1101 const char *FirstNewLine = nullptr; 1102 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1103 1104 if (NumNewLines == 0) { 1105 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + 1106 "-NEXT: is on the same line as previous match"); 1107 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), 1108 SourceMgr::DK_Note, "'next' match was here"); 1109 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1110 "previous match ended here"); 1111 return true; 1112 } 1113 1114 if (NumNewLines != 1) { 1115 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + 1116 "-NEXT: is not on the line after the previous match"); 1117 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), 1118 SourceMgr::DK_Note, "'next' match was here"); 1119 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1120 "previous match ended here"); 1121 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1122 "non-matching line after previous match is here"); 1123 return true; 1124 } 1125 1126 return false; 1127 } 1128 1129 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 1130 if (Pat.getCheckTy() != Check::CheckSame) 1131 return false; 1132 1133 // Count the number of newlines between the previous match and this one. 1134 assert(Buffer.data() != 1135 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1136 SMLoc::getFromPointer(Buffer.data()))) 1137 ->getBufferStart() && 1138 "CHECK-SAME can't be the first check in a file"); 1139 1140 const char *FirstNewLine = nullptr; 1141 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1142 1143 if (NumNewLines != 0) { 1144 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1145 Prefix + 1146 "-SAME: is not on the same line as the previous match"); 1147 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1148 "'next' match was here"); 1149 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1150 "previous match ended here"); 1151 return true; 1152 } 1153 1154 return false; 1155 } 1156 1157 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 1158 const std::vector<const Pattern *> &NotStrings, 1159 StringMap<StringRef> &VariableTable) const { 1160 for (const Pattern *Pat : NotStrings) { 1161 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 1162 1163 size_t MatchLen = 0; 1164 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1165 1166 if (Pos == StringRef::npos) continue; 1167 1168 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos), 1169 SourceMgr::DK_Error, 1170 Prefix + "-NOT: string occurred!"); 1171 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note, 1172 Prefix + "-NOT: pattern specified here"); 1173 return true; 1174 } 1175 1176 return false; 1177 } 1178 1179 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 1180 std::vector<const Pattern *> &NotStrings, 1181 StringMap<StringRef> &VariableTable) const { 1182 if (DagNotStrings.empty()) 1183 return 0; 1184 1185 size_t LastPos = 0; 1186 size_t StartPos = LastPos; 1187 1188 for (const Pattern &Pat : DagNotStrings) { 1189 assert((Pat.getCheckTy() == Check::CheckDAG || 1190 Pat.getCheckTy() == Check::CheckNot) && 1191 "Invalid CHECK-DAG or CHECK-NOT!"); 1192 1193 if (Pat.getCheckTy() == Check::CheckNot) { 1194 NotStrings.push_back(&Pat); 1195 continue; 1196 } 1197 1198 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 1199 1200 size_t MatchLen = 0, MatchPos; 1201 1202 // CHECK-DAG always matches from the start. 1203 StringRef MatchBuffer = Buffer.substr(StartPos); 1204 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1205 // With a group of CHECK-DAGs, a single mismatching means the match on 1206 // that group of CHECK-DAGs fails immediately. 1207 if (MatchPos == StringRef::npos) { 1208 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable); 1209 return StringRef::npos; 1210 } 1211 // Re-calc it as the offset relative to the start of the original string. 1212 MatchPos += StartPos; 1213 1214 if (!NotStrings.empty()) { 1215 if (MatchPos < LastPos) { 1216 // Reordered? 1217 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos), 1218 SourceMgr::DK_Error, 1219 Prefix + "-DAG: found a match of CHECK-DAG" 1220 " reordering across a CHECK-NOT"); 1221 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos), 1222 SourceMgr::DK_Note, 1223 Prefix + "-DAG: the farthest match of CHECK-DAG" 1224 " is found here"); 1225 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note, 1226 Prefix + "-NOT: the crossed pattern specified" 1227 " here"); 1228 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note, 1229 Prefix + "-DAG: the reordered pattern specified" 1230 " here"); 1231 return StringRef::npos; 1232 } 1233 // All subsequent CHECK-DAGs should be matched from the farthest 1234 // position of all precedent CHECK-DAGs (including this one.) 1235 StartPos = LastPos; 1236 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to 1237 // CHECK-DAG, verify that there's no 'not' strings occurred in that 1238 // region. 1239 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1240 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1241 return StringRef::npos; 1242 // Clear "not strings". 1243 NotStrings.clear(); 1244 } 1245 1246 // Update the last position with CHECK-DAG matches. 1247 LastPos = std::max(MatchPos + MatchLen, LastPos); 1248 } 1249 1250 return LastPos; 1251 } 1252 1253 // A check prefix must contain only alphanumeric, hyphens and underscores. 1254 static bool ValidateCheckPrefix(StringRef CheckPrefix) { 1255 Regex Validator("^[a-zA-Z0-9_-]*$"); 1256 return Validator.match(CheckPrefix); 1257 } 1258 1259 static bool ValidateCheckPrefixes() { 1260 StringSet<> PrefixSet; 1261 1262 for (StringRef Prefix : CheckPrefixes) { 1263 // Reject empty prefixes. 1264 if (Prefix == "") 1265 return false; 1266 1267 if (!PrefixSet.insert(Prefix).second) 1268 return false; 1269 1270 if (!ValidateCheckPrefix(Prefix)) 1271 return false; 1272 } 1273 1274 return true; 1275 } 1276 1277 // I don't think there's a way to specify an initial value for cl::list, 1278 // so if nothing was specified, add the default 1279 static void AddCheckPrefixIfNeeded() { 1280 if (CheckPrefixes.empty()) 1281 CheckPrefixes.push_back("CHECK"); 1282 } 1283 1284 int main(int argc, char **argv) { 1285 sys::PrintStackTraceOnErrorSignal(); 1286 PrettyStackTraceProgram X(argc, argv); 1287 cl::ParseCommandLineOptions(argc, argv); 1288 1289 if (!ValidateCheckPrefixes()) { 1290 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 1291 "start with a letter and contain only alphanumeric characters, " 1292 "hyphens and underscores\n"; 1293 return 2; 1294 } 1295 1296 AddCheckPrefixIfNeeded(); 1297 1298 SourceMgr SM; 1299 1300 // Read the expected strings from the check file. 1301 std::vector<CheckString> CheckStrings; 1302 if (ReadCheckFile(SM, CheckStrings)) 1303 return 2; 1304 1305 // Open the file to check and add it to SourceMgr. 1306 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = 1307 MemoryBuffer::getFileOrSTDIN(InputFilename); 1308 if (std::error_code EC = FileOrErr.getError()) { 1309 errs() << "Could not open input file '" << InputFilename 1310 << "': " << EC.message() << '\n'; 1311 return 2; 1312 } 1313 std::unique_ptr<MemoryBuffer> &File = FileOrErr.get(); 1314 1315 if (File->getBufferSize() == 0 && !AllowEmptyInput) { 1316 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 1317 return 2; 1318 } 1319 1320 // Remove duplicate spaces in the input file if requested. 1321 // Remove DOS style line endings. 1322 std::unique_ptr<MemoryBuffer> F = 1323 CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace); 1324 1325 // Check that we have all of the expected strings, in order, in the input 1326 // file. 1327 StringRef Buffer = F->getBuffer(); 1328 1329 SM.AddNewSourceBuffer(std::move(F), SMLoc()); 1330 1331 /// VariableTable - This holds all the current filecheck variables. 1332 StringMap<StringRef> VariableTable; 1333 1334 bool hasError = false; 1335 1336 unsigned i = 0, j = 0, e = CheckStrings.size(); 1337 1338 while (true) { 1339 StringRef CheckRegion; 1340 if (j == e) { 1341 CheckRegion = Buffer; 1342 } else { 1343 const CheckString &CheckLabelStr = CheckStrings[j]; 1344 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 1345 ++j; 1346 continue; 1347 } 1348 1349 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 1350 size_t MatchLabelLen = 0; 1351 size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true, 1352 MatchLabelLen, VariableTable); 1353 if (MatchLabelPos == StringRef::npos) { 1354 hasError = true; 1355 break; 1356 } 1357 1358 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 1359 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 1360 ++j; 1361 } 1362 1363 for ( ; i != j; ++i) { 1364 const CheckString &CheckStr = CheckStrings[i]; 1365 1366 // Check each string within the scanned region, including a second check 1367 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 1368 size_t MatchLen = 0; 1369 size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen, 1370 VariableTable); 1371 1372 if (MatchPos == StringRef::npos) { 1373 hasError = true; 1374 i = j; 1375 break; 1376 } 1377 1378 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 1379 } 1380 1381 if (j == e) 1382 break; 1383 } 1384 1385 return hasError ? 1 : 0; 1386 } 1387