1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // FileCheck does a line-by line check of a file that validates whether it 11 // contains the expected content. This is useful for regression tests etc. 12 // 13 // This program exits with an error status of 2 on error, exit status of 0 if 14 // the file matched the expected contents, and exit status of 1 if it did not 15 // contain the expected contents. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringExtras.h" 21 #include "llvm/ADT/StringMap.h" 22 #include "llvm/ADT/StringSet.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/PrettyStackTrace.h" 26 #include "llvm/Support/Regex.h" 27 #include "llvm/Support/Signals.h" 28 #include "llvm/Support/SourceMgr.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <algorithm> 31 #include <cctype> 32 #include <map> 33 #include <string> 34 #include <system_error> 35 #include <vector> 36 using namespace llvm; 37 38 static cl::opt<std::string> 39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 40 41 static cl::opt<std::string> 42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 43 cl::init("-"), cl::value_desc("filename")); 44 45 static cl::list<std::string> 46 CheckPrefixes("check-prefix", 47 cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 48 static cl::alias CheckPrefixesAlias( 49 "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated, 50 cl::NotHidden, 51 cl::desc( 52 "Alias for -check-prefix permitting multiple comma separated values")); 53 54 static cl::opt<bool> 55 NoCanonicalizeWhiteSpace("strict-whitespace", 56 cl::desc("Do not treat all horizontal whitespace as equivalent")); 57 58 static cl::list<std::string> ImplicitCheckNot( 59 "implicit-check-not", 60 cl::desc("Add an implicit negative check with this pattern to every\n" 61 "positive check. This can be used to ensure that no instances of\n" 62 "this pattern occur which are not matched by a positive pattern"), 63 cl::value_desc("pattern")); 64 65 static cl::opt<bool> AllowEmptyInput( 66 "allow-empty", cl::init(false), 67 cl::desc("Allow the input file to be empty. This is useful when making\n" 68 "checks that some error message does not occur, for example.")); 69 70 static cl::opt<bool> MatchFullLines( 71 "match-full-lines", cl::init(false), 72 cl::desc("Require all positive matches to cover an entire input line.\n" 73 "Allows leading and trailing whitespace if --strict-whitespace\n" 74 "is not also passed.")); 75 76 typedef cl::list<std::string>::const_iterator prefix_iterator; 77 78 //===----------------------------------------------------------------------===// 79 // Pattern Handling Code. 80 //===----------------------------------------------------------------------===// 81 82 namespace Check { 83 enum CheckType { 84 CheckNone = 0, 85 CheckPlain, 86 CheckNext, 87 CheckSame, 88 CheckNot, 89 CheckDAG, 90 CheckLabel, 91 92 /// MatchEOF - When set, this pattern only matches the end of file. This is 93 /// used for trailing CHECK-NOTs. 94 CheckEOF, 95 /// CheckBadNot - Found -NOT combined with another CHECK suffix. 96 CheckBadNot 97 }; 98 } 99 100 class Pattern { 101 SMLoc PatternLoc; 102 103 /// FixedStr - If non-empty, this pattern is a fixed string match with the 104 /// specified fixed string. 105 StringRef FixedStr; 106 107 /// RegEx - If non-empty, this is a regex pattern. 108 std::string RegExStr; 109 110 /// VariableUses - Entries in this vector map to uses of a variable in the 111 /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain 112 /// "foobaz" and we'll get an entry in this vector that tells us to insert the 113 /// value of bar at offset 3. 114 std::vector<std::pair<StringRef, unsigned> > VariableUses; 115 116 /// VariableDefs - Maps definitions of variables to their parenthesized 117 /// capture numbers. 118 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1. 119 std::map<StringRef, unsigned> VariableDefs; 120 121 Check::CheckType CheckTy; 122 123 /// \brief Contains the number of line this pattern is in. 124 unsigned LineNumber; 125 126 public: 127 explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {} 128 129 /// getLoc - Return the location in source code. 130 SMLoc getLoc() const { return PatternLoc; } 131 132 /// ParsePattern - Parse the given string into the Pattern. Prefix provides 133 /// which prefix is being matched, SM provides the SourceMgr used for error 134 /// reports, and LineNumber is the line number in the input file from which 135 /// the pattern string was read. Returns true in case of an error, false 136 /// otherwise. 137 bool ParsePattern(StringRef PatternStr, 138 StringRef Prefix, 139 SourceMgr &SM, 140 unsigned LineNumber); 141 142 /// Match - Match the pattern string against the input buffer Buffer. This 143 /// returns the position that is matched or npos if there is no match. If 144 /// there is a match, the size of the matched string is returned in MatchLen. 145 /// 146 /// The VariableTable StringMap provides the current values of filecheck 147 /// variables and is updated if this match defines new values. 148 size_t Match(StringRef Buffer, size_t &MatchLen, 149 StringMap<StringRef> &VariableTable) const; 150 151 /// PrintFailureInfo - Print additional information about a failure to match 152 /// involving this pattern. 153 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 154 const StringMap<StringRef> &VariableTable) const; 155 156 bool hasVariable() const { return !(VariableUses.empty() && 157 VariableDefs.empty()); } 158 159 Check::CheckType getCheckTy() const { return CheckTy; } 160 161 private: 162 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 163 void AddBackrefToRegEx(unsigned BackrefNum); 164 165 /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of 166 /// matching this pattern at the start of \arg Buffer; a distance of zero 167 /// should correspond to a perfect match. 168 unsigned ComputeMatchDistance(StringRef Buffer, 169 const StringMap<StringRef> &VariableTable) const; 170 171 /// \brief Evaluates expression and stores the result to \p Value. 172 /// \return true on success. false when the expression has invalid syntax. 173 bool EvaluateExpression(StringRef Expr, std::string &Value) const; 174 175 /// \brief Finds the closing sequence of a regex variable usage or 176 /// definition. Str has to point in the beginning of the definition 177 /// (right after the opening sequence). 178 /// \return offset of the closing sequence within Str, or npos if it was not 179 /// found. 180 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 181 }; 182 183 184 bool Pattern::ParsePattern(StringRef PatternStr, 185 StringRef Prefix, 186 SourceMgr &SM, 187 unsigned LineNumber) { 188 bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot; 189 190 this->LineNumber = LineNumber; 191 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 192 193 // Ignore trailing whitespace. 194 while (!PatternStr.empty() && 195 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 196 PatternStr = PatternStr.substr(0, PatternStr.size()-1); 197 198 // Check that there is something on the line. 199 if (PatternStr.empty()) { 200 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 201 "found empty check string with prefix '" + 202 Prefix + ":'"); 203 return true; 204 } 205 206 // Check to see if this is a fixed string, or if it has regex pieces. 207 if (!MatchFullLinesHere && 208 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && 209 PatternStr.find("[[") == StringRef::npos))) { 210 FixedStr = PatternStr; 211 return false; 212 } 213 214 if (MatchFullLinesHere) { 215 RegExStr += '^'; 216 if (!NoCanonicalizeWhiteSpace) 217 RegExStr += " *"; 218 } 219 220 // Paren value #0 is for the fully matched string. Any new parenthesized 221 // values add from there. 222 unsigned CurParen = 1; 223 224 // Otherwise, there is at least one regex piece. Build up the regex pattern 225 // by escaping scary characters in fixed strings, building up one big regex. 226 while (!PatternStr.empty()) { 227 // RegEx matches. 228 if (PatternStr.startswith("{{")) { 229 // This is the start of a regex match. Scan for the }}. 230 size_t End = PatternStr.find("}}"); 231 if (End == StringRef::npos) { 232 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 233 SourceMgr::DK_Error, 234 "found start of regex string with no end '}}'"); 235 return true; 236 } 237 238 // Enclose {{}} patterns in parens just like [[]] even though we're not 239 // capturing the result for any purpose. This is required in case the 240 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 241 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 242 RegExStr += '('; 243 ++CurParen; 244 245 if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM)) 246 return true; 247 RegExStr += ')'; 248 249 PatternStr = PatternStr.substr(End+2); 250 continue; 251 } 252 253 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 254 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 255 // second form is [[foo]] which is a reference to foo. The variable name 256 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 257 // it. This is to catch some common errors. 258 if (PatternStr.startswith("[[")) { 259 // Find the closing bracket pair ending the match. End is going to be an 260 // offset relative to the beginning of the match string. 261 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 262 263 if (End == StringRef::npos) { 264 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 265 SourceMgr::DK_Error, 266 "invalid named regex reference, no ]] found"); 267 return true; 268 } 269 270 StringRef MatchStr = PatternStr.substr(2, End); 271 PatternStr = PatternStr.substr(End+4); 272 273 // Get the regex name (e.g. "foo"). 274 size_t NameEnd = MatchStr.find(':'); 275 StringRef Name = MatchStr.substr(0, NameEnd); 276 277 if (Name.empty()) { 278 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 279 "invalid name in named regex: empty name"); 280 return true; 281 } 282 283 // Verify that the name/expression is well formed. FileCheck currently 284 // supports @LINE, @LINE+number, @LINE-number expressions. The check here 285 // is relaxed, more strict check is performed in \c EvaluateExpression. 286 bool IsExpression = false; 287 for (unsigned i = 0, e = Name.size(); i != e; ++i) { 288 if (i == 0 && Name[i] == '@') { 289 if (NameEnd != StringRef::npos) { 290 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 291 SourceMgr::DK_Error, 292 "invalid name in named regex definition"); 293 return true; 294 } 295 IsExpression = true; 296 continue; 297 } 298 if (Name[i] != '_' && !isalnum(Name[i]) && 299 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 300 SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i), 301 SourceMgr::DK_Error, "invalid name in named regex"); 302 return true; 303 } 304 } 305 306 // Name can't start with a digit. 307 if (isdigit(static_cast<unsigned char>(Name[0]))) { 308 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 309 "invalid name in named regex"); 310 return true; 311 } 312 313 // Handle [[foo]]. 314 if (NameEnd == StringRef::npos) { 315 // Handle variables that were defined earlier on the same line by 316 // emitting a backreference. 317 if (VariableDefs.find(Name) != VariableDefs.end()) { 318 unsigned VarParenNum = VariableDefs[Name]; 319 if (VarParenNum < 1 || VarParenNum > 9) { 320 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 321 SourceMgr::DK_Error, 322 "Can't back-reference more than 9 variables"); 323 return true; 324 } 325 AddBackrefToRegEx(VarParenNum); 326 } else { 327 VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 328 } 329 continue; 330 } 331 332 // Handle [[foo:.*]]. 333 VariableDefs[Name] = CurParen; 334 RegExStr += '('; 335 ++CurParen; 336 337 if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM)) 338 return true; 339 340 RegExStr += ')'; 341 } 342 343 // Handle fixed string matches. 344 // Find the end, which is the start of the next regex. 345 size_t FixedMatchEnd = PatternStr.find("{{"); 346 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 347 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 348 PatternStr = PatternStr.substr(FixedMatchEnd); 349 } 350 351 if (MatchFullLinesHere) { 352 if (!NoCanonicalizeWhiteSpace) 353 RegExStr += " *"; 354 RegExStr += '$'; 355 } 356 357 return false; 358 } 359 360 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, 361 SourceMgr &SM) { 362 Regex R(RS); 363 std::string Error; 364 if (!R.isValid(Error)) { 365 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 366 "invalid regex: " + Error); 367 return true; 368 } 369 370 RegExStr += RS.str(); 371 CurParen += R.getNumMatches(); 372 return false; 373 } 374 375 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 376 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 377 std::string Backref = std::string("\\") + 378 std::string(1, '0' + BackrefNum); 379 RegExStr += Backref; 380 } 381 382 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 383 // The only supported expression is @LINE([\+-]\d+)? 384 if (!Expr.startswith("@LINE")) 385 return false; 386 Expr = Expr.substr(StringRef("@LINE").size()); 387 int Offset = 0; 388 if (!Expr.empty()) { 389 if (Expr[0] == '+') 390 Expr = Expr.substr(1); 391 else if (Expr[0] != '-') 392 return false; 393 if (Expr.getAsInteger(10, Offset)) 394 return false; 395 } 396 Value = llvm::itostr(LineNumber + Offset); 397 return true; 398 } 399 400 /// Match - Match the pattern string against the input buffer Buffer. This 401 /// returns the position that is matched or npos if there is no match. If 402 /// there is a match, the size of the matched string is returned in MatchLen. 403 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 404 StringMap<StringRef> &VariableTable) const { 405 // If this is the EOF pattern, match it immediately. 406 if (CheckTy == Check::CheckEOF) { 407 MatchLen = 0; 408 return Buffer.size(); 409 } 410 411 // If this is a fixed string pattern, just match it now. 412 if (!FixedStr.empty()) { 413 MatchLen = FixedStr.size(); 414 return Buffer.find(FixedStr); 415 } 416 417 // Regex match. 418 419 // If there are variable uses, we need to create a temporary string with the 420 // actual value. 421 StringRef RegExToMatch = RegExStr; 422 std::string TmpStr; 423 if (!VariableUses.empty()) { 424 TmpStr = RegExStr; 425 426 unsigned InsertOffset = 0; 427 for (const auto &VariableUse : VariableUses) { 428 std::string Value; 429 430 if (VariableUse.first[0] == '@') { 431 if (!EvaluateExpression(VariableUse.first, Value)) 432 return StringRef::npos; 433 } else { 434 StringMap<StringRef>::iterator it = 435 VariableTable.find(VariableUse.first); 436 // If the variable is undefined, return an error. 437 if (it == VariableTable.end()) 438 return StringRef::npos; 439 440 // Look up the value and escape it so that we can put it into the regex. 441 Value += Regex::escape(it->second); 442 } 443 444 // Plop it into the regex at the adjusted offset. 445 TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset, 446 Value.begin(), Value.end()); 447 InsertOffset += Value.size(); 448 } 449 450 // Match the newly constructed regex. 451 RegExToMatch = TmpStr; 452 } 453 454 455 SmallVector<StringRef, 4> MatchInfo; 456 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 457 return StringRef::npos; 458 459 // Successful regex match. 460 assert(!MatchInfo.empty() && "Didn't get any match"); 461 StringRef FullMatch = MatchInfo[0]; 462 463 // If this defines any variables, remember their values. 464 for (const auto &VariableDef : VariableDefs) { 465 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 466 VariableTable[VariableDef.first] = MatchInfo[VariableDef.second]; 467 } 468 469 MatchLen = FullMatch.size(); 470 return FullMatch.data()-Buffer.data(); 471 } 472 473 unsigned Pattern::ComputeMatchDistance(StringRef Buffer, 474 const StringMap<StringRef> &VariableTable) const { 475 // Just compute the number of matching characters. For regular expressions, we 476 // just compare against the regex itself and hope for the best. 477 // 478 // FIXME: One easy improvement here is have the regex lib generate a single 479 // example regular expression which matches, and use that as the example 480 // string. 481 StringRef ExampleString(FixedStr); 482 if (ExampleString.empty()) 483 ExampleString = RegExStr; 484 485 // Only compare up to the first line in the buffer, or the string size. 486 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 487 BufferPrefix = BufferPrefix.split('\n').first; 488 return BufferPrefix.edit_distance(ExampleString); 489 } 490 491 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 492 const StringMap<StringRef> &VariableTable) const{ 493 // If this was a regular expression using variables, print the current 494 // variable values. 495 if (!VariableUses.empty()) { 496 for (const auto &VariableUse : VariableUses) { 497 SmallString<256> Msg; 498 raw_svector_ostream OS(Msg); 499 StringRef Var = VariableUse.first; 500 if (Var[0] == '@') { 501 std::string Value; 502 if (EvaluateExpression(Var, Value)) { 503 OS << "with expression \""; 504 OS.write_escaped(Var) << "\" equal to \""; 505 OS.write_escaped(Value) << "\""; 506 } else { 507 OS << "uses incorrect expression \""; 508 OS.write_escaped(Var) << "\""; 509 } 510 } else { 511 StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 512 513 // Check for undefined variable references. 514 if (it == VariableTable.end()) { 515 OS << "uses undefined variable \""; 516 OS.write_escaped(Var) << "\""; 517 } else { 518 OS << "with variable \""; 519 OS.write_escaped(Var) << "\" equal to \""; 520 OS.write_escaped(it->second) << "\""; 521 } 522 } 523 524 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 525 OS.str()); 526 } 527 } 528 529 // Attempt to find the closest/best fuzzy match. Usually an error happens 530 // because some string in the output didn't exactly match. In these cases, we 531 // would like to show the user a best guess at what "should have" matched, to 532 // save them having to actually check the input manually. 533 size_t NumLinesForward = 0; 534 size_t Best = StringRef::npos; 535 double BestQuality = 0; 536 537 // Use an arbitrary 4k limit on how far we will search. 538 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 539 if (Buffer[i] == '\n') 540 ++NumLinesForward; 541 542 // Patterns have leading whitespace stripped, so skip whitespace when 543 // looking for something which looks like a pattern. 544 if (Buffer[i] == ' ' || Buffer[i] == '\t') 545 continue; 546 547 // Compute the "quality" of this match as an arbitrary combination of the 548 // match distance and the number of lines skipped to get to this match. 549 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 550 double Quality = Distance + (NumLinesForward / 100.); 551 552 if (Quality < BestQuality || Best == StringRef::npos) { 553 Best = i; 554 BestQuality = Quality; 555 } 556 } 557 558 // Print the "possible intended match here" line if we found something 559 // reasonable and not equal to what we showed in the "scanning from here" 560 // line. 561 if (Best && Best != StringRef::npos && BestQuality < 50) { 562 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 563 SourceMgr::DK_Note, "possible intended match here"); 564 565 // FIXME: If we wanted to be really friendly we would show why the match 566 // failed, as it can be hard to spot simple one character differences. 567 } 568 } 569 570 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 571 // Offset keeps track of the current offset within the input Str 572 size_t Offset = 0; 573 // [...] Nesting depth 574 size_t BracketDepth = 0; 575 576 while (!Str.empty()) { 577 if (Str.startswith("]]") && BracketDepth == 0) 578 return Offset; 579 if (Str[0] == '\\') { 580 // Backslash escapes the next char within regexes, so skip them both. 581 Str = Str.substr(2); 582 Offset += 2; 583 } else { 584 switch (Str[0]) { 585 default: 586 break; 587 case '[': 588 BracketDepth++; 589 break; 590 case ']': 591 if (BracketDepth == 0) { 592 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 593 SourceMgr::DK_Error, 594 "missing closing \"]\" for regex variable"); 595 exit(1); 596 } 597 BracketDepth--; 598 break; 599 } 600 Str = Str.substr(1); 601 Offset++; 602 } 603 } 604 605 return StringRef::npos; 606 } 607 608 609 //===----------------------------------------------------------------------===// 610 // Check Strings. 611 //===----------------------------------------------------------------------===// 612 613 /// CheckString - This is a check that we found in the input file. 614 struct CheckString { 615 /// Pat - The pattern to match. 616 Pattern Pat; 617 618 /// Prefix - Which prefix name this check matched. 619 StringRef Prefix; 620 621 /// Loc - The location in the match file that the check string was specified. 622 SMLoc Loc; 623 624 /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive, 625 /// as opposed to a CHECK: directive. 626 // Check::CheckType CheckTy; 627 628 /// DagNotStrings - These are all of the strings that are disallowed from 629 /// occurring between this match string and the previous one (or start of 630 /// file). 631 std::vector<Pattern> DagNotStrings; 632 633 CheckString(const Pattern &P, StringRef S, SMLoc L) 634 : Pat(P), Prefix(S), Loc(L) {} 635 636 /// Check - Match check string and its "not strings" and/or "dag strings". 637 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 638 size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 639 640 /// CheckNext - Verify there is a single line in the given buffer. 641 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 642 643 /// CheckSame - Verify there is no newline in the given buffer. 644 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; 645 646 /// CheckNot - Verify there's no "not strings" in the given buffer. 647 bool CheckNot(const SourceMgr &SM, StringRef Buffer, 648 const std::vector<const Pattern *> &NotStrings, 649 StringMap<StringRef> &VariableTable) const; 650 651 /// CheckDag - Match "dag strings" and their mixed "not strings". 652 size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 653 std::vector<const Pattern *> &NotStrings, 654 StringMap<StringRef> &VariableTable) const; 655 }; 656 657 /// Canonicalize whitespaces in the input file. Line endings are replaced 658 /// with UNIX-style '\n'. 659 /// 660 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace 661 /// characters to a single space. 662 static std::unique_ptr<MemoryBuffer> 663 CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB, 664 bool PreserveHorizontal) { 665 SmallString<128> NewFile; 666 NewFile.reserve(MB->getBufferSize()); 667 668 for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd(); 669 Ptr != End; ++Ptr) { 670 // Eliminate trailing dosish \r. 671 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 672 continue; 673 } 674 675 // If current char is not a horizontal whitespace or if horizontal 676 // whitespace canonicalization is disabled, dump it to output as is. 677 if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) { 678 NewFile.push_back(*Ptr); 679 continue; 680 } 681 682 // Otherwise, add one space and advance over neighboring space. 683 NewFile.push_back(' '); 684 while (Ptr+1 != End && 685 (Ptr[1] == ' ' || Ptr[1] == '\t')) 686 ++Ptr; 687 } 688 689 return std::unique_ptr<MemoryBuffer>( 690 MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier())); 691 } 692 693 static bool IsPartOfWord(char c) { 694 return (isalnum(c) || c == '-' || c == '_'); 695 } 696 697 // Get the size of the prefix extension. 698 static size_t CheckTypeSize(Check::CheckType Ty) { 699 switch (Ty) { 700 case Check::CheckNone: 701 case Check::CheckBadNot: 702 return 0; 703 704 case Check::CheckPlain: 705 return sizeof(":") - 1; 706 707 case Check::CheckNext: 708 return sizeof("-NEXT:") - 1; 709 710 case Check::CheckSame: 711 return sizeof("-SAME:") - 1; 712 713 case Check::CheckNot: 714 return sizeof("-NOT:") - 1; 715 716 case Check::CheckDAG: 717 return sizeof("-DAG:") - 1; 718 719 case Check::CheckLabel: 720 return sizeof("-LABEL:") - 1; 721 722 case Check::CheckEOF: 723 llvm_unreachable("Should not be using EOF size"); 724 } 725 726 llvm_unreachable("Bad check type"); 727 } 728 729 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 730 char NextChar = Buffer[Prefix.size()]; 731 732 // Verify that the : is present after the prefix. 733 if (NextChar == ':') 734 return Check::CheckPlain; 735 736 if (NextChar != '-') 737 return Check::CheckNone; 738 739 StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 740 if (Rest.startswith("NEXT:")) 741 return Check::CheckNext; 742 743 if (Rest.startswith("SAME:")) 744 return Check::CheckSame; 745 746 if (Rest.startswith("NOT:")) 747 return Check::CheckNot; 748 749 if (Rest.startswith("DAG:")) 750 return Check::CheckDAG; 751 752 if (Rest.startswith("LABEL:")) 753 return Check::CheckLabel; 754 755 // You can't combine -NOT with another suffix. 756 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || 757 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || 758 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:")) 759 return Check::CheckBadNot; 760 761 return Check::CheckNone; 762 } 763 764 // From the given position, find the next character after the word. 765 static size_t SkipWord(StringRef Str, size_t Loc) { 766 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 767 ++Loc; 768 return Loc; 769 } 770 771 // Try to find the first match in buffer for any prefix. If a valid match is 772 // found, return that prefix and set its type and location. If there are almost 773 // matches (e.g. the actual prefix string is found, but is not an actual check 774 // string), but no valid match, return an empty string and set the position to 775 // resume searching from. If no partial matches are found, return an empty 776 // string and the location will be StringRef::npos. If one prefix is a substring 777 // of another, the maximal match should be found. e.g. if "A" and "AA" are 778 // prefixes then AA-CHECK: should match the second one. 779 static StringRef FindFirstCandidateMatch(StringRef &Buffer, 780 Check::CheckType &CheckTy, 781 size_t &CheckLoc) { 782 StringRef FirstPrefix; 783 size_t FirstLoc = StringRef::npos; 784 size_t SearchLoc = StringRef::npos; 785 Check::CheckType FirstTy = Check::CheckNone; 786 787 CheckTy = Check::CheckNone; 788 CheckLoc = StringRef::npos; 789 790 for (StringRef Prefix : CheckPrefixes) { 791 size_t PrefixLoc = Buffer.find(Prefix); 792 793 if (PrefixLoc == StringRef::npos) 794 continue; 795 796 // Track where we are searching for invalid prefixes that look almost right. 797 // We need to only advance to the first partial match on the next attempt 798 // since a partial match could be a substring of a later, valid prefix. 799 // Need to skip to the end of the word, otherwise we could end up 800 // matching a prefix in a substring later. 801 if (PrefixLoc < SearchLoc) 802 SearchLoc = SkipWord(Buffer, PrefixLoc); 803 804 // We only want to find the first match to avoid skipping some. 805 if (PrefixLoc > FirstLoc) 806 continue; 807 // If one matching check-prefix is a prefix of another, choose the 808 // longer one. 809 if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size()) 810 continue; 811 812 StringRef Rest = Buffer.drop_front(PrefixLoc); 813 // Make sure we have actually found the prefix, and not a word containing 814 // it. This should also prevent matching the wrong prefix when one is a 815 // substring of another. 816 if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1])) 817 FirstTy = Check::CheckNone; 818 else 819 FirstTy = FindCheckType(Rest, Prefix); 820 821 FirstLoc = PrefixLoc; 822 FirstPrefix = Prefix; 823 } 824 825 // If the first prefix is invalid, we should continue the search after it. 826 if (FirstTy == Check::CheckNone) { 827 CheckLoc = SearchLoc; 828 return ""; 829 } 830 831 CheckTy = FirstTy; 832 CheckLoc = FirstLoc; 833 return FirstPrefix; 834 } 835 836 static StringRef FindFirstMatchingPrefix(StringRef &Buffer, 837 unsigned &LineNumber, 838 Check::CheckType &CheckTy, 839 size_t &CheckLoc) { 840 while (!Buffer.empty()) { 841 StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc); 842 // If we found a real match, we are done. 843 if (!Prefix.empty()) { 844 LineNumber += Buffer.substr(0, CheckLoc).count('\n'); 845 return Prefix; 846 } 847 848 // We didn't find any almost matches either, we are also done. 849 if (CheckLoc == StringRef::npos) 850 return StringRef(); 851 852 LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n'); 853 854 // Advance to the last possible match we found and try again. 855 Buffer = Buffer.drop_front(CheckLoc + 1); 856 } 857 858 return StringRef(); 859 } 860 861 /// ReadCheckFile - Read the check file, which specifies the sequence of 862 /// expected strings. The strings are added to the CheckStrings vector. 863 /// Returns true in case of an error, false otherwise. 864 static bool ReadCheckFile(SourceMgr &SM, 865 std::vector<CheckString> &CheckStrings) { 866 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = 867 MemoryBuffer::getFileOrSTDIN(CheckFilename); 868 if (std::error_code EC = FileOrErr.getError()) { 869 errs() << "Could not open check file '" << CheckFilename 870 << "': " << EC.message() << '\n'; 871 return true; 872 } 873 874 // If we want to canonicalize whitespace, strip excess whitespace from the 875 // buffer containing the CHECK lines. Remove DOS style line endings. 876 std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile( 877 std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace); 878 879 // Find all instances of CheckPrefix followed by : in the file. 880 StringRef Buffer = F->getBuffer(); 881 882 SM.AddNewSourceBuffer(std::move(F), SMLoc()); 883 884 std::vector<Pattern> ImplicitNegativeChecks; 885 for (const auto &PatternString : ImplicitCheckNot) { 886 // Create a buffer with fake command line content in order to display the 887 // command line option responsible for the specific implicit CHECK-NOT. 888 std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str(); 889 std::string Suffix = "'"; 890 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 891 Prefix + PatternString + Suffix, "command line"); 892 893 StringRef PatternInBuffer = 894 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 895 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 896 897 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); 898 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, 899 "IMPLICIT-CHECK", SM, 0); 900 } 901 902 903 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 904 905 // LineNumber keeps track of the line on which CheckPrefix instances are 906 // found. 907 unsigned LineNumber = 1; 908 909 while (1) { 910 Check::CheckType CheckTy; 911 size_t PrefixLoc; 912 913 // See if a prefix occurs in the memory buffer. 914 StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer, 915 LineNumber, 916 CheckTy, 917 PrefixLoc); 918 if (UsedPrefix.empty()) 919 break; 920 921 Buffer = Buffer.drop_front(PrefixLoc); 922 923 // Location to use for error messages. 924 const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1); 925 926 // PrefixLoc is to the start of the prefix. Skip to the end. 927 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 928 929 // Complain about useful-looking but unsupported suffixes. 930 if (CheckTy == Check::CheckBadNot) { 931 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), 932 SourceMgr::DK_Error, 933 "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 934 return true; 935 } 936 937 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 938 // leading and trailing whitespace. 939 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 940 941 // Scan ahead to the end of line. 942 size_t EOL = Buffer.find_first_of("\n\r"); 943 944 // Remember the location of the start of the pattern, for diagnostics. 945 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 946 947 // Parse the pattern. 948 Pattern P(CheckTy); 949 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 950 return true; 951 952 // Verify that CHECK-LABEL lines do not define or use variables 953 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 954 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 955 SourceMgr::DK_Error, 956 "found '" + UsedPrefix + "-LABEL:'" 957 " with variable definition or use"); 958 return true; 959 } 960 961 Buffer = Buffer.substr(EOL); 962 963 // Verify that CHECK-NEXT lines have at least one CHECK line before them. 964 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) && 965 CheckStrings.empty()) { 966 StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME"; 967 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 968 SourceMgr::DK_Error, 969 "found '" + UsedPrefix + "-" + Type + "' without previous '" 970 + UsedPrefix + ": line"); 971 return true; 972 } 973 974 // Handle CHECK-DAG/-NOT. 975 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 976 DagNotMatches.push_back(P); 977 continue; 978 } 979 980 // Okay, add the string we captured to the output vector and move on. 981 CheckStrings.emplace_back(P, UsedPrefix, PatternLoc); 982 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 983 DagNotMatches = ImplicitNegativeChecks; 984 } 985 986 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 987 // prefix as a filler for the error message. 988 if (!DagNotMatches.empty()) { 989 CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(), 990 SMLoc::getFromPointer(Buffer.data())); 991 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 992 } 993 994 if (CheckStrings.empty()) { 995 errs() << "error: no check strings found with prefix" 996 << (CheckPrefixes.size() > 1 ? "es " : " "); 997 prefix_iterator I = CheckPrefixes.begin(); 998 prefix_iterator E = CheckPrefixes.end(); 999 if (I != E) { 1000 errs() << "\'" << *I << ":'"; 1001 ++I; 1002 } 1003 for (; I != E; ++I) 1004 errs() << ", \'" << *I << ":'"; 1005 1006 errs() << '\n'; 1007 return true; 1008 } 1009 1010 return false; 1011 } 1012 1013 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, 1014 const Pattern &Pat, StringRef Buffer, 1015 StringMap<StringRef> &VariableTable) { 1016 // Otherwise, we have an error, emit an error message. 1017 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1018 "expected string not found in input"); 1019 1020 // Print the "scanning from here" line. If the current position is at the 1021 // end of a line, advance to the start of the next line. 1022 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 1023 1024 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1025 "scanning from here"); 1026 1027 // Allow the pattern to print additional information if desired. 1028 Pat.PrintFailureInfo(SM, Buffer, VariableTable); 1029 } 1030 1031 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 1032 StringRef Buffer, 1033 StringMap<StringRef> &VariableTable) { 1034 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable); 1035 } 1036 1037 /// CountNumNewlinesBetween - Count the number of newlines in the specified 1038 /// range. 1039 static unsigned CountNumNewlinesBetween(StringRef Range, 1040 const char *&FirstNewLine) { 1041 unsigned NumNewLines = 0; 1042 while (1) { 1043 // Scan for newline. 1044 Range = Range.substr(Range.find_first_of("\n\r")); 1045 if (Range.empty()) return NumNewLines; 1046 1047 ++NumNewLines; 1048 1049 // Handle \n\r and \r\n as a single newline. 1050 if (Range.size() > 1 && 1051 (Range[1] == '\n' || Range[1] == '\r') && 1052 (Range[0] != Range[1])) 1053 Range = Range.substr(1); 1054 Range = Range.substr(1); 1055 1056 if (NumNewLines == 1) 1057 FirstNewLine = Range.begin(); 1058 } 1059 } 1060 1061 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 1062 bool IsLabelScanMode, size_t &MatchLen, 1063 StringMap<StringRef> &VariableTable) const { 1064 size_t LastPos = 0; 1065 std::vector<const Pattern *> NotStrings; 1066 1067 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 1068 // bounds; we have not processed variable definitions within the bounded block 1069 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 1070 // over the block again (including the last CHECK-LABEL) in normal mode. 1071 if (!IsLabelScanMode) { 1072 // Match "dag strings" (with mixed "not strings" if any). 1073 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 1074 if (LastPos == StringRef::npos) 1075 return StringRef::npos; 1076 } 1077 1078 // Match itself from the last position after matching CHECK-DAG. 1079 StringRef MatchBuffer = Buffer.substr(LastPos); 1080 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1081 if (MatchPos == StringRef::npos) { 1082 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable); 1083 return StringRef::npos; 1084 } 1085 1086 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1087 // or CHECK-NOT 1088 if (!IsLabelScanMode) { 1089 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1090 1091 // If this check is a "CHECK-NEXT", verify that the previous match was on 1092 // the previous line (i.e. that there is one newline between them). 1093 if (CheckNext(SM, SkippedRegion)) 1094 return StringRef::npos; 1095 1096 // If this check is a "CHECK-SAME", verify that the previous match was on 1097 // the same line (i.e. that there is no newline between them). 1098 if (CheckSame(SM, SkippedRegion)) 1099 return StringRef::npos; 1100 1101 // If this match had "not strings", verify that they don't exist in the 1102 // skipped region. 1103 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1104 return StringRef::npos; 1105 } 1106 1107 return LastPos + MatchPos; 1108 } 1109 1110 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 1111 if (Pat.getCheckTy() != Check::CheckNext) 1112 return false; 1113 1114 // Count the number of newlines between the previous match and this one. 1115 assert(Buffer.data() != 1116 SM.getMemoryBuffer( 1117 SM.FindBufferContainingLoc( 1118 SMLoc::getFromPointer(Buffer.data())))->getBufferStart() && 1119 "CHECK-NEXT can't be the first check in a file"); 1120 1121 const char *FirstNewLine = nullptr; 1122 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1123 1124 if (NumNewLines == 0) { 1125 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + 1126 "-NEXT: is on the same line as previous match"); 1127 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), 1128 SourceMgr::DK_Note, "'next' match was here"); 1129 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1130 "previous match ended here"); 1131 return true; 1132 } 1133 1134 if (NumNewLines != 1) { 1135 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + 1136 "-NEXT: is not on the line after the previous match"); 1137 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), 1138 SourceMgr::DK_Note, "'next' match was here"); 1139 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1140 "previous match ended here"); 1141 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1142 "non-matching line after previous match is here"); 1143 return true; 1144 } 1145 1146 return false; 1147 } 1148 1149 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 1150 if (Pat.getCheckTy() != Check::CheckSame) 1151 return false; 1152 1153 // Count the number of newlines between the previous match and this one. 1154 assert(Buffer.data() != 1155 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1156 SMLoc::getFromPointer(Buffer.data()))) 1157 ->getBufferStart() && 1158 "CHECK-SAME can't be the first check in a file"); 1159 1160 const char *FirstNewLine = nullptr; 1161 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1162 1163 if (NumNewLines != 0) { 1164 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1165 Prefix + 1166 "-SAME: is not on the same line as the previous match"); 1167 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1168 "'next' match was here"); 1169 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1170 "previous match ended here"); 1171 return true; 1172 } 1173 1174 return false; 1175 } 1176 1177 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 1178 const std::vector<const Pattern *> &NotStrings, 1179 StringMap<StringRef> &VariableTable) const { 1180 for (const Pattern *Pat : NotStrings) { 1181 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 1182 1183 size_t MatchLen = 0; 1184 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1185 1186 if (Pos == StringRef::npos) continue; 1187 1188 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos), 1189 SourceMgr::DK_Error, 1190 Prefix + "-NOT: string occurred!"); 1191 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note, 1192 Prefix + "-NOT: pattern specified here"); 1193 return true; 1194 } 1195 1196 return false; 1197 } 1198 1199 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 1200 std::vector<const Pattern *> &NotStrings, 1201 StringMap<StringRef> &VariableTable) const { 1202 if (DagNotStrings.empty()) 1203 return 0; 1204 1205 size_t LastPos = 0; 1206 size_t StartPos = LastPos; 1207 1208 for (const Pattern &Pat : DagNotStrings) { 1209 assert((Pat.getCheckTy() == Check::CheckDAG || 1210 Pat.getCheckTy() == Check::CheckNot) && 1211 "Invalid CHECK-DAG or CHECK-NOT!"); 1212 1213 if (Pat.getCheckTy() == Check::CheckNot) { 1214 NotStrings.push_back(&Pat); 1215 continue; 1216 } 1217 1218 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 1219 1220 size_t MatchLen = 0, MatchPos; 1221 1222 // CHECK-DAG always matches from the start. 1223 StringRef MatchBuffer = Buffer.substr(StartPos); 1224 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1225 // With a group of CHECK-DAGs, a single mismatching means the match on 1226 // that group of CHECK-DAGs fails immediately. 1227 if (MatchPos == StringRef::npos) { 1228 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable); 1229 return StringRef::npos; 1230 } 1231 // Re-calc it as the offset relative to the start of the original string. 1232 MatchPos += StartPos; 1233 1234 if (!NotStrings.empty()) { 1235 if (MatchPos < LastPos) { 1236 // Reordered? 1237 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos), 1238 SourceMgr::DK_Error, 1239 Prefix + "-DAG: found a match of CHECK-DAG" 1240 " reordering across a CHECK-NOT"); 1241 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos), 1242 SourceMgr::DK_Note, 1243 Prefix + "-DAG: the farthest match of CHECK-DAG" 1244 " is found here"); 1245 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note, 1246 Prefix + "-NOT: the crossed pattern specified" 1247 " here"); 1248 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note, 1249 Prefix + "-DAG: the reordered pattern specified" 1250 " here"); 1251 return StringRef::npos; 1252 } 1253 // All subsequent CHECK-DAGs should be matched from the farthest 1254 // position of all precedent CHECK-DAGs (including this one.) 1255 StartPos = LastPos; 1256 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to 1257 // CHECK-DAG, verify that there's no 'not' strings occurred in that 1258 // region. 1259 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1260 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1261 return StringRef::npos; 1262 // Clear "not strings". 1263 NotStrings.clear(); 1264 } 1265 1266 // Update the last position with CHECK-DAG matches. 1267 LastPos = std::max(MatchPos + MatchLen, LastPos); 1268 } 1269 1270 return LastPos; 1271 } 1272 1273 // A check prefix must contain only alphanumeric, hyphens and underscores. 1274 static bool ValidateCheckPrefix(StringRef CheckPrefix) { 1275 Regex Validator("^[a-zA-Z0-9_-]*$"); 1276 return Validator.match(CheckPrefix); 1277 } 1278 1279 static bool ValidateCheckPrefixes() { 1280 StringSet<> PrefixSet; 1281 1282 for (StringRef Prefix : CheckPrefixes) { 1283 // Reject empty prefixes. 1284 if (Prefix == "") 1285 return false; 1286 1287 if (!PrefixSet.insert(Prefix).second) 1288 return false; 1289 1290 if (!ValidateCheckPrefix(Prefix)) 1291 return false; 1292 } 1293 1294 return true; 1295 } 1296 1297 // I don't think there's a way to specify an initial value for cl::list, 1298 // so if nothing was specified, add the default 1299 static void AddCheckPrefixIfNeeded() { 1300 if (CheckPrefixes.empty()) 1301 CheckPrefixes.push_back("CHECK"); 1302 } 1303 1304 static void DumpCommandLine(int argc, char **argv) { 1305 errs() << "FileCheck command line: "; 1306 for (int I = 0; I < argc; I++) 1307 errs() << " " << argv[I]; 1308 errs() << "\n"; 1309 } 1310 1311 int main(int argc, char **argv) { 1312 sys::PrintStackTraceOnErrorSignal(argv[0]); 1313 PrettyStackTraceProgram X(argc, argv); 1314 cl::ParseCommandLineOptions(argc, argv); 1315 1316 if (!ValidateCheckPrefixes()) { 1317 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 1318 "start with a letter and contain only alphanumeric characters, " 1319 "hyphens and underscores\n"; 1320 return 2; 1321 } 1322 1323 AddCheckPrefixIfNeeded(); 1324 1325 SourceMgr SM; 1326 1327 // Read the expected strings from the check file. 1328 std::vector<CheckString> CheckStrings; 1329 if (ReadCheckFile(SM, CheckStrings)) 1330 return 2; 1331 1332 // Open the file to check and add it to SourceMgr. 1333 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = 1334 MemoryBuffer::getFileOrSTDIN(InputFilename); 1335 if (std::error_code EC = FileOrErr.getError()) { 1336 errs() << "Could not open input file '" << InputFilename 1337 << "': " << EC.message() << '\n'; 1338 return 2; 1339 } 1340 std::unique_ptr<MemoryBuffer> &File = FileOrErr.get(); 1341 1342 if (File->getBufferSize() == 0 && !AllowEmptyInput) { 1343 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 1344 DumpCommandLine(argc, argv); 1345 return 2; 1346 } 1347 1348 // Remove duplicate spaces in the input file if requested. 1349 // Remove DOS style line endings. 1350 std::unique_ptr<MemoryBuffer> F = 1351 CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace); 1352 1353 // Check that we have all of the expected strings, in order, in the input 1354 // file. 1355 StringRef Buffer = F->getBuffer(); 1356 1357 SM.AddNewSourceBuffer(std::move(F), SMLoc()); 1358 1359 /// VariableTable - This holds all the current filecheck variables. 1360 StringMap<StringRef> VariableTable; 1361 1362 bool hasError = false; 1363 1364 unsigned i = 0, j = 0, e = CheckStrings.size(); 1365 1366 while (true) { 1367 StringRef CheckRegion; 1368 if (j == e) { 1369 CheckRegion = Buffer; 1370 } else { 1371 const CheckString &CheckLabelStr = CheckStrings[j]; 1372 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 1373 ++j; 1374 continue; 1375 } 1376 1377 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 1378 size_t MatchLabelLen = 0; 1379 size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true, 1380 MatchLabelLen, VariableTable); 1381 if (MatchLabelPos == StringRef::npos) { 1382 hasError = true; 1383 break; 1384 } 1385 1386 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 1387 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 1388 ++j; 1389 } 1390 1391 for ( ; i != j; ++i) { 1392 const CheckString &CheckStr = CheckStrings[i]; 1393 1394 // Check each string within the scanned region, including a second check 1395 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 1396 size_t MatchLen = 0; 1397 size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen, 1398 VariableTable); 1399 1400 if (MatchPos == StringRef::npos) { 1401 hasError = true; 1402 i = j; 1403 break; 1404 } 1405 1406 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 1407 } 1408 1409 if (j == e) 1410 break; 1411 } 1412 1413 return hasError ? 1 : 0; 1414 } 1415