1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // FileCheck does a line-by line check of a file that validates whether it 11 // contains the expected content. This is useful for regression tests etc. 12 // 13 // This program exits with an error status of 2 on error, exit status of 0 if 14 // the file matched the expected contents, and exit status of 1 if it did not 15 // contain the expected contents. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringExtras.h" 21 #include "llvm/ADT/StringMap.h" 22 #include "llvm/ADT/StringSet.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/PrettyStackTrace.h" 26 #include "llvm/Support/Regex.h" 27 #include "llvm/Support/Signals.h" 28 #include "llvm/Support/SourceMgr.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <algorithm> 31 #include <cctype> 32 #include <map> 33 #include <string> 34 #include <system_error> 35 #include <vector> 36 using namespace llvm; 37 38 static cl::opt<std::string> 39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 40 41 static cl::opt<std::string> 42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 43 cl::init("-"), cl::value_desc("filename")); 44 45 static cl::list<std::string> 46 CheckPrefixes("check-prefix", 47 cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 48 49 static cl::opt<bool> 50 NoCanonicalizeWhiteSpace("strict-whitespace", 51 cl::desc("Do not treat all horizontal whitespace as equivalent")); 52 53 static cl::list<std::string> ImplicitCheckNot( 54 "implicit-check-not", 55 cl::desc("Add an implicit negative check with this pattern to every\n" 56 "positive check. This can be used to ensure that no instances of\n" 57 "this pattern occur which are not matched by a positive pattern"), 58 cl::value_desc("pattern")); 59 60 static cl::opt<bool> AllowEmptyInput( 61 "allow-empty", cl::init(false), 62 cl::desc("Allow the input file to be empty. This is useful when making\n" 63 "checks that some error message does not occur, for example.")); 64 65 static cl::opt<bool> MatchFullLines( 66 "match-full-lines", cl::init(false), 67 cl::desc("Require all positive matches to cover an entire input line.\n" 68 "Allows leading and trailing whitespace if --strict-whitespace\n" 69 "is not also passed.")); 70 71 typedef cl::list<std::string>::const_iterator prefix_iterator; 72 73 //===----------------------------------------------------------------------===// 74 // Pattern Handling Code. 75 //===----------------------------------------------------------------------===// 76 77 namespace Check { 78 enum CheckType { 79 CheckNone = 0, 80 CheckPlain, 81 CheckNext, 82 CheckSame, 83 CheckNot, 84 CheckDAG, 85 CheckLabel, 86 87 /// MatchEOF - When set, this pattern only matches the end of file. This is 88 /// used for trailing CHECK-NOTs. 89 CheckEOF, 90 /// CheckBadNot - Found -NOT combined with another CHECK suffix. 91 CheckBadNot 92 }; 93 } 94 95 class Pattern { 96 SMLoc PatternLoc; 97 98 Check::CheckType CheckTy; 99 100 /// FixedStr - If non-empty, this pattern is a fixed string match with the 101 /// specified fixed string. 102 StringRef FixedStr; 103 104 /// RegEx - If non-empty, this is a regex pattern. 105 std::string RegExStr; 106 107 /// \brief Contains the number of line this pattern is in. 108 unsigned LineNumber; 109 110 /// VariableUses - Entries in this vector map to uses of a variable in the 111 /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain 112 /// "foobaz" and we'll get an entry in this vector that tells us to insert the 113 /// value of bar at offset 3. 114 std::vector<std::pair<StringRef, unsigned> > VariableUses; 115 116 /// VariableDefs - Maps definitions of variables to their parenthesized 117 /// capture numbers. 118 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1. 119 std::map<StringRef, unsigned> VariableDefs; 120 121 public: 122 123 Pattern(Check::CheckType Ty) 124 : CheckTy(Ty) { } 125 126 /// getLoc - Return the location in source code. 127 SMLoc getLoc() const { return PatternLoc; } 128 129 /// ParsePattern - Parse the given string into the Pattern. Prefix provides 130 /// which prefix is being matched, SM provides the SourceMgr used for error 131 /// reports, and LineNumber is the line number in the input file from which 132 /// the pattern string was read. Returns true in case of an error, false 133 /// otherwise. 134 bool ParsePattern(StringRef PatternStr, 135 StringRef Prefix, 136 SourceMgr &SM, 137 unsigned LineNumber); 138 139 /// Match - Match the pattern string against the input buffer Buffer. This 140 /// returns the position that is matched or npos if there is no match. If 141 /// there is a match, the size of the matched string is returned in MatchLen. 142 /// 143 /// The VariableTable StringMap provides the current values of filecheck 144 /// variables and is updated if this match defines new values. 145 size_t Match(StringRef Buffer, size_t &MatchLen, 146 StringMap<StringRef> &VariableTable) const; 147 148 /// PrintFailureInfo - Print additional information about a failure to match 149 /// involving this pattern. 150 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 151 const StringMap<StringRef> &VariableTable) const; 152 153 bool hasVariable() const { return !(VariableUses.empty() && 154 VariableDefs.empty()); } 155 156 Check::CheckType getCheckTy() const { return CheckTy; } 157 158 private: 159 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 160 void AddBackrefToRegEx(unsigned BackrefNum); 161 162 /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of 163 /// matching this pattern at the start of \arg Buffer; a distance of zero 164 /// should correspond to a perfect match. 165 unsigned ComputeMatchDistance(StringRef Buffer, 166 const StringMap<StringRef> &VariableTable) const; 167 168 /// \brief Evaluates expression and stores the result to \p Value. 169 /// \return true on success. false when the expression has invalid syntax. 170 bool EvaluateExpression(StringRef Expr, std::string &Value) const; 171 172 /// \brief Finds the closing sequence of a regex variable usage or 173 /// definition. Str has to point in the beginning of the definition 174 /// (right after the opening sequence). 175 /// \return offset of the closing sequence within Str, or npos if it was not 176 /// found. 177 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 178 }; 179 180 181 bool Pattern::ParsePattern(StringRef PatternStr, 182 StringRef Prefix, 183 SourceMgr &SM, 184 unsigned LineNumber) { 185 bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot; 186 187 this->LineNumber = LineNumber; 188 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 189 190 // Ignore trailing whitespace. 191 while (!PatternStr.empty() && 192 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 193 PatternStr = PatternStr.substr(0, PatternStr.size()-1); 194 195 // Check that there is something on the line. 196 if (PatternStr.empty()) { 197 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 198 "found empty check string with prefix '" + 199 Prefix + ":'"); 200 return true; 201 } 202 203 // Check to see if this is a fixed string, or if it has regex pieces. 204 if (!MatchFullLinesHere && 205 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && 206 PatternStr.find("[[") == StringRef::npos))) { 207 FixedStr = PatternStr; 208 return false; 209 } 210 211 if (MatchFullLinesHere) { 212 RegExStr += '^'; 213 if (!NoCanonicalizeWhiteSpace) 214 RegExStr += " *"; 215 } 216 217 // Paren value #0 is for the fully matched string. Any new parenthesized 218 // values add from there. 219 unsigned CurParen = 1; 220 221 // Otherwise, there is at least one regex piece. Build up the regex pattern 222 // by escaping scary characters in fixed strings, building up one big regex. 223 while (!PatternStr.empty()) { 224 // RegEx matches. 225 if (PatternStr.startswith("{{")) { 226 // This is the start of a regex match. Scan for the }}. 227 size_t End = PatternStr.find("}}"); 228 if (End == StringRef::npos) { 229 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 230 SourceMgr::DK_Error, 231 "found start of regex string with no end '}}'"); 232 return true; 233 } 234 235 // Enclose {{}} patterns in parens just like [[]] even though we're not 236 // capturing the result for any purpose. This is required in case the 237 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 238 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 239 RegExStr += '('; 240 ++CurParen; 241 242 if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM)) 243 return true; 244 RegExStr += ')'; 245 246 PatternStr = PatternStr.substr(End+2); 247 continue; 248 } 249 250 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 251 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 252 // second form is [[foo]] which is a reference to foo. The variable name 253 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 254 // it. This is to catch some common errors. 255 if (PatternStr.startswith("[[")) { 256 // Find the closing bracket pair ending the match. End is going to be an 257 // offset relative to the beginning of the match string. 258 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 259 260 if (End == StringRef::npos) { 261 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 262 SourceMgr::DK_Error, 263 "invalid named regex reference, no ]] found"); 264 return true; 265 } 266 267 StringRef MatchStr = PatternStr.substr(2, End); 268 PatternStr = PatternStr.substr(End+4); 269 270 // Get the regex name (e.g. "foo"). 271 size_t NameEnd = MatchStr.find(':'); 272 StringRef Name = MatchStr.substr(0, NameEnd); 273 274 if (Name.empty()) { 275 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 276 "invalid name in named regex: empty name"); 277 return true; 278 } 279 280 // Verify that the name/expression is well formed. FileCheck currently 281 // supports @LINE, @LINE+number, @LINE-number expressions. The check here 282 // is relaxed, more strict check is performed in \c EvaluateExpression. 283 bool IsExpression = false; 284 for (unsigned i = 0, e = Name.size(); i != e; ++i) { 285 if (i == 0 && Name[i] == '@') { 286 if (NameEnd != StringRef::npos) { 287 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 288 SourceMgr::DK_Error, 289 "invalid name in named regex definition"); 290 return true; 291 } 292 IsExpression = true; 293 continue; 294 } 295 if (Name[i] != '_' && !isalnum(Name[i]) && 296 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 297 SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i), 298 SourceMgr::DK_Error, "invalid name in named regex"); 299 return true; 300 } 301 } 302 303 // Name can't start with a digit. 304 if (isdigit(static_cast<unsigned char>(Name[0]))) { 305 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 306 "invalid name in named regex"); 307 return true; 308 } 309 310 // Handle [[foo]]. 311 if (NameEnd == StringRef::npos) { 312 // Handle variables that were defined earlier on the same line by 313 // emitting a backreference. 314 if (VariableDefs.find(Name) != VariableDefs.end()) { 315 unsigned VarParenNum = VariableDefs[Name]; 316 if (VarParenNum < 1 || VarParenNum > 9) { 317 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 318 SourceMgr::DK_Error, 319 "Can't back-reference more than 9 variables"); 320 return true; 321 } 322 AddBackrefToRegEx(VarParenNum); 323 } else { 324 VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 325 } 326 continue; 327 } 328 329 // Handle [[foo:.*]]. 330 VariableDefs[Name] = CurParen; 331 RegExStr += '('; 332 ++CurParen; 333 334 if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM)) 335 return true; 336 337 RegExStr += ')'; 338 } 339 340 // Handle fixed string matches. 341 // Find the end, which is the start of the next regex. 342 size_t FixedMatchEnd = PatternStr.find("{{"); 343 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 344 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 345 PatternStr = PatternStr.substr(FixedMatchEnd); 346 } 347 348 if (MatchFullLinesHere) { 349 if (!NoCanonicalizeWhiteSpace) 350 RegExStr += " *"; 351 RegExStr += '$'; 352 } 353 354 return false; 355 } 356 357 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, 358 SourceMgr &SM) { 359 Regex R(RS); 360 std::string Error; 361 if (!R.isValid(Error)) { 362 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 363 "invalid regex: " + Error); 364 return true; 365 } 366 367 RegExStr += RS.str(); 368 CurParen += R.getNumMatches(); 369 return false; 370 } 371 372 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 373 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 374 std::string Backref = std::string("\\") + 375 std::string(1, '0' + BackrefNum); 376 RegExStr += Backref; 377 } 378 379 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 380 // The only supported expression is @LINE([\+-]\d+)? 381 if (!Expr.startswith("@LINE")) 382 return false; 383 Expr = Expr.substr(StringRef("@LINE").size()); 384 int Offset = 0; 385 if (!Expr.empty()) { 386 if (Expr[0] == '+') 387 Expr = Expr.substr(1); 388 else if (Expr[0] != '-') 389 return false; 390 if (Expr.getAsInteger(10, Offset)) 391 return false; 392 } 393 Value = llvm::itostr(LineNumber + Offset); 394 return true; 395 } 396 397 /// Match - Match the pattern string against the input buffer Buffer. This 398 /// returns the position that is matched or npos if there is no match. If 399 /// there is a match, the size of the matched string is returned in MatchLen. 400 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 401 StringMap<StringRef> &VariableTable) const { 402 // If this is the EOF pattern, match it immediately. 403 if (CheckTy == Check::CheckEOF) { 404 MatchLen = 0; 405 return Buffer.size(); 406 } 407 408 // If this is a fixed string pattern, just match it now. 409 if (!FixedStr.empty()) { 410 MatchLen = FixedStr.size(); 411 return Buffer.find(FixedStr); 412 } 413 414 // Regex match. 415 416 // If there are variable uses, we need to create a temporary string with the 417 // actual value. 418 StringRef RegExToMatch = RegExStr; 419 std::string TmpStr; 420 if (!VariableUses.empty()) { 421 TmpStr = RegExStr; 422 423 unsigned InsertOffset = 0; 424 for (const auto &VariableUse : VariableUses) { 425 std::string Value; 426 427 if (VariableUse.first[0] == '@') { 428 if (!EvaluateExpression(VariableUse.first, Value)) 429 return StringRef::npos; 430 } else { 431 StringMap<StringRef>::iterator it = 432 VariableTable.find(VariableUse.first); 433 // If the variable is undefined, return an error. 434 if (it == VariableTable.end()) 435 return StringRef::npos; 436 437 // Look up the value and escape it so that we can put it into the regex. 438 Value += Regex::escape(it->second); 439 } 440 441 // Plop it into the regex at the adjusted offset. 442 TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset, 443 Value.begin(), Value.end()); 444 InsertOffset += Value.size(); 445 } 446 447 // Match the newly constructed regex. 448 RegExToMatch = TmpStr; 449 } 450 451 452 SmallVector<StringRef, 4> MatchInfo; 453 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 454 return StringRef::npos; 455 456 // Successful regex match. 457 assert(!MatchInfo.empty() && "Didn't get any match"); 458 StringRef FullMatch = MatchInfo[0]; 459 460 // If this defines any variables, remember their values. 461 for (const auto &VariableDef : VariableDefs) { 462 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 463 VariableTable[VariableDef.first] = MatchInfo[VariableDef.second]; 464 } 465 466 MatchLen = FullMatch.size(); 467 return FullMatch.data()-Buffer.data(); 468 } 469 470 unsigned Pattern::ComputeMatchDistance(StringRef Buffer, 471 const StringMap<StringRef> &VariableTable) const { 472 // Just compute the number of matching characters. For regular expressions, we 473 // just compare against the regex itself and hope for the best. 474 // 475 // FIXME: One easy improvement here is have the regex lib generate a single 476 // example regular expression which matches, and use that as the example 477 // string. 478 StringRef ExampleString(FixedStr); 479 if (ExampleString.empty()) 480 ExampleString = RegExStr; 481 482 // Only compare up to the first line in the buffer, or the string size. 483 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 484 BufferPrefix = BufferPrefix.split('\n').first; 485 return BufferPrefix.edit_distance(ExampleString); 486 } 487 488 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 489 const StringMap<StringRef> &VariableTable) const{ 490 // If this was a regular expression using variables, print the current 491 // variable values. 492 if (!VariableUses.empty()) { 493 for (const auto &VariableUse : VariableUses) { 494 SmallString<256> Msg; 495 raw_svector_ostream OS(Msg); 496 StringRef Var = VariableUse.first; 497 if (Var[0] == '@') { 498 std::string Value; 499 if (EvaluateExpression(Var, Value)) { 500 OS << "with expression \""; 501 OS.write_escaped(Var) << "\" equal to \""; 502 OS.write_escaped(Value) << "\""; 503 } else { 504 OS << "uses incorrect expression \""; 505 OS.write_escaped(Var) << "\""; 506 } 507 } else { 508 StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 509 510 // Check for undefined variable references. 511 if (it == VariableTable.end()) { 512 OS << "uses undefined variable \""; 513 OS.write_escaped(Var) << "\""; 514 } else { 515 OS << "with variable \""; 516 OS.write_escaped(Var) << "\" equal to \""; 517 OS.write_escaped(it->second) << "\""; 518 } 519 } 520 521 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 522 OS.str()); 523 } 524 } 525 526 // Attempt to find the closest/best fuzzy match. Usually an error happens 527 // because some string in the output didn't exactly match. In these cases, we 528 // would like to show the user a best guess at what "should have" matched, to 529 // save them having to actually check the input manually. 530 size_t NumLinesForward = 0; 531 size_t Best = StringRef::npos; 532 double BestQuality = 0; 533 534 // Use an arbitrary 4k limit on how far we will search. 535 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 536 if (Buffer[i] == '\n') 537 ++NumLinesForward; 538 539 // Patterns have leading whitespace stripped, so skip whitespace when 540 // looking for something which looks like a pattern. 541 if (Buffer[i] == ' ' || Buffer[i] == '\t') 542 continue; 543 544 // Compute the "quality" of this match as an arbitrary combination of the 545 // match distance and the number of lines skipped to get to this match. 546 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 547 double Quality = Distance + (NumLinesForward / 100.); 548 549 if (Quality < BestQuality || Best == StringRef::npos) { 550 Best = i; 551 BestQuality = Quality; 552 } 553 } 554 555 // Print the "possible intended match here" line if we found something 556 // reasonable and not equal to what we showed in the "scanning from here" 557 // line. 558 if (Best && Best != StringRef::npos && BestQuality < 50) { 559 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 560 SourceMgr::DK_Note, "possible intended match here"); 561 562 // FIXME: If we wanted to be really friendly we would show why the match 563 // failed, as it can be hard to spot simple one character differences. 564 } 565 } 566 567 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 568 // Offset keeps track of the current offset within the input Str 569 size_t Offset = 0; 570 // [...] Nesting depth 571 size_t BracketDepth = 0; 572 573 while (!Str.empty()) { 574 if (Str.startswith("]]") && BracketDepth == 0) 575 return Offset; 576 if (Str[0] == '\\') { 577 // Backslash escapes the next char within regexes, so skip them both. 578 Str = Str.substr(2); 579 Offset += 2; 580 } else { 581 switch (Str[0]) { 582 default: 583 break; 584 case '[': 585 BracketDepth++; 586 break; 587 case ']': 588 if (BracketDepth == 0) { 589 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 590 SourceMgr::DK_Error, 591 "missing closing \"]\" for regex variable"); 592 exit(1); 593 } 594 BracketDepth--; 595 break; 596 } 597 Str = Str.substr(1); 598 Offset++; 599 } 600 } 601 602 return StringRef::npos; 603 } 604 605 606 //===----------------------------------------------------------------------===// 607 // Check Strings. 608 //===----------------------------------------------------------------------===// 609 610 /// CheckString - This is a check that we found in the input file. 611 struct CheckString { 612 /// Pat - The pattern to match. 613 Pattern Pat; 614 615 /// Prefix - Which prefix name this check matched. 616 StringRef Prefix; 617 618 /// Loc - The location in the match file that the check string was specified. 619 SMLoc Loc; 620 621 /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive, 622 /// as opposed to a CHECK: directive. 623 // Check::CheckType CheckTy; 624 625 /// DagNotStrings - These are all of the strings that are disallowed from 626 /// occurring between this match string and the previous one (or start of 627 /// file). 628 std::vector<Pattern> DagNotStrings; 629 630 CheckString(const Pattern &P, StringRef S, SMLoc L) 631 : Pat(P), Prefix(S), Loc(L) {} 632 633 /// Check - Match check string and its "not strings" and/or "dag strings". 634 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 635 size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 636 637 /// CheckNext - Verify there is a single line in the given buffer. 638 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 639 640 /// CheckSame - Verify there is no newline in the given buffer. 641 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; 642 643 /// CheckNot - Verify there's no "not strings" in the given buffer. 644 bool CheckNot(const SourceMgr &SM, StringRef Buffer, 645 const std::vector<const Pattern *> &NotStrings, 646 StringMap<StringRef> &VariableTable) const; 647 648 /// CheckDag - Match "dag strings" and their mixed "not strings". 649 size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 650 std::vector<const Pattern *> &NotStrings, 651 StringMap<StringRef> &VariableTable) const; 652 }; 653 654 /// Canonicalize whitespaces in the input file. Line endings are replaced 655 /// with UNIX-style '\n'. 656 /// 657 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace 658 /// characters to a single space. 659 static std::unique_ptr<MemoryBuffer> 660 CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB, 661 bool PreserveHorizontal) { 662 SmallString<128> NewFile; 663 NewFile.reserve(MB->getBufferSize()); 664 665 for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd(); 666 Ptr != End; ++Ptr) { 667 // Eliminate trailing dosish \r. 668 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 669 continue; 670 } 671 672 // If current char is not a horizontal whitespace or if horizontal 673 // whitespace canonicalization is disabled, dump it to output as is. 674 if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) { 675 NewFile.push_back(*Ptr); 676 continue; 677 } 678 679 // Otherwise, add one space and advance over neighboring space. 680 NewFile.push_back(' '); 681 while (Ptr+1 != End && 682 (Ptr[1] == ' ' || Ptr[1] == '\t')) 683 ++Ptr; 684 } 685 686 return std::unique_ptr<MemoryBuffer>( 687 MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier())); 688 } 689 690 static bool IsPartOfWord(char c) { 691 return (isalnum(c) || c == '-' || c == '_'); 692 } 693 694 // Get the size of the prefix extension. 695 static size_t CheckTypeSize(Check::CheckType Ty) { 696 switch (Ty) { 697 case Check::CheckNone: 698 case Check::CheckBadNot: 699 return 0; 700 701 case Check::CheckPlain: 702 return sizeof(":") - 1; 703 704 case Check::CheckNext: 705 return sizeof("-NEXT:") - 1; 706 707 case Check::CheckSame: 708 return sizeof("-SAME:") - 1; 709 710 case Check::CheckNot: 711 return sizeof("-NOT:") - 1; 712 713 case Check::CheckDAG: 714 return sizeof("-DAG:") - 1; 715 716 case Check::CheckLabel: 717 return sizeof("-LABEL:") - 1; 718 719 case Check::CheckEOF: 720 llvm_unreachable("Should not be using EOF size"); 721 } 722 723 llvm_unreachable("Bad check type"); 724 } 725 726 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 727 char NextChar = Buffer[Prefix.size()]; 728 729 // Verify that the : is present after the prefix. 730 if (NextChar == ':') 731 return Check::CheckPlain; 732 733 if (NextChar != '-') 734 return Check::CheckNone; 735 736 StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 737 if (Rest.startswith("NEXT:")) 738 return Check::CheckNext; 739 740 if (Rest.startswith("SAME:")) 741 return Check::CheckSame; 742 743 if (Rest.startswith("NOT:")) 744 return Check::CheckNot; 745 746 if (Rest.startswith("DAG:")) 747 return Check::CheckDAG; 748 749 if (Rest.startswith("LABEL:")) 750 return Check::CheckLabel; 751 752 // You can't combine -NOT with another suffix. 753 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || 754 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || 755 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:")) 756 return Check::CheckBadNot; 757 758 return Check::CheckNone; 759 } 760 761 // From the given position, find the next character after the word. 762 static size_t SkipWord(StringRef Str, size_t Loc) { 763 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 764 ++Loc; 765 return Loc; 766 } 767 768 // Try to find the first match in buffer for any prefix. If a valid match is 769 // found, return that prefix and set its type and location. If there are almost 770 // matches (e.g. the actual prefix string is found, but is not an actual check 771 // string), but no valid match, return an empty string and set the position to 772 // resume searching from. If no partial matches are found, return an empty 773 // string and the location will be StringRef::npos. If one prefix is a substring 774 // of another, the maximal match should be found. e.g. if "A" and "AA" are 775 // prefixes then AA-CHECK: should match the second one. 776 static StringRef FindFirstCandidateMatch(StringRef &Buffer, 777 Check::CheckType &CheckTy, 778 size_t &CheckLoc) { 779 StringRef FirstPrefix; 780 size_t FirstLoc = StringRef::npos; 781 size_t SearchLoc = StringRef::npos; 782 Check::CheckType FirstTy = Check::CheckNone; 783 784 CheckTy = Check::CheckNone; 785 CheckLoc = StringRef::npos; 786 787 for (StringRef Prefix : CheckPrefixes) { 788 size_t PrefixLoc = Buffer.find(Prefix); 789 790 if (PrefixLoc == StringRef::npos) 791 continue; 792 793 // Track where we are searching for invalid prefixes that look almost right. 794 // We need to only advance to the first partial match on the next attempt 795 // since a partial match could be a substring of a later, valid prefix. 796 // Need to skip to the end of the word, otherwise we could end up 797 // matching a prefix in a substring later. 798 if (PrefixLoc < SearchLoc) 799 SearchLoc = SkipWord(Buffer, PrefixLoc); 800 801 // We only want to find the first match to avoid skipping some. 802 if (PrefixLoc > FirstLoc) 803 continue; 804 // If one matching check-prefix is a prefix of another, choose the 805 // longer one. 806 if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size()) 807 continue; 808 809 StringRef Rest = Buffer.drop_front(PrefixLoc); 810 // Make sure we have actually found the prefix, and not a word containing 811 // it. This should also prevent matching the wrong prefix when one is a 812 // substring of another. 813 if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1])) 814 FirstTy = Check::CheckNone; 815 else 816 FirstTy = FindCheckType(Rest, Prefix); 817 818 FirstLoc = PrefixLoc; 819 FirstPrefix = Prefix; 820 } 821 822 // If the first prefix is invalid, we should continue the search after it. 823 if (FirstTy == Check::CheckNone) { 824 CheckLoc = SearchLoc; 825 return ""; 826 } 827 828 CheckTy = FirstTy; 829 CheckLoc = FirstLoc; 830 return FirstPrefix; 831 } 832 833 static StringRef FindFirstMatchingPrefix(StringRef &Buffer, 834 unsigned &LineNumber, 835 Check::CheckType &CheckTy, 836 size_t &CheckLoc) { 837 while (!Buffer.empty()) { 838 StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc); 839 // If we found a real match, we are done. 840 if (!Prefix.empty()) { 841 LineNumber += Buffer.substr(0, CheckLoc).count('\n'); 842 return Prefix; 843 } 844 845 // We didn't find any almost matches either, we are also done. 846 if (CheckLoc == StringRef::npos) 847 return StringRef(); 848 849 LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n'); 850 851 // Advance to the last possible match we found and try again. 852 Buffer = Buffer.drop_front(CheckLoc + 1); 853 } 854 855 return StringRef(); 856 } 857 858 /// ReadCheckFile - Read the check file, which specifies the sequence of 859 /// expected strings. The strings are added to the CheckStrings vector. 860 /// Returns true in case of an error, false otherwise. 861 static bool ReadCheckFile(SourceMgr &SM, 862 std::vector<CheckString> &CheckStrings) { 863 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = 864 MemoryBuffer::getFileOrSTDIN(CheckFilename); 865 if (std::error_code EC = FileOrErr.getError()) { 866 errs() << "Could not open check file '" << CheckFilename 867 << "': " << EC.message() << '\n'; 868 return true; 869 } 870 871 // If we want to canonicalize whitespace, strip excess whitespace from the 872 // buffer containing the CHECK lines. Remove DOS style line endings. 873 std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile( 874 std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace); 875 876 // Find all instances of CheckPrefix followed by : in the file. 877 StringRef Buffer = F->getBuffer(); 878 879 SM.AddNewSourceBuffer(std::move(F), SMLoc()); 880 881 std::vector<Pattern> ImplicitNegativeChecks; 882 for (const auto &PatternString : ImplicitCheckNot) { 883 // Create a buffer with fake command line content in order to display the 884 // command line option responsible for the specific implicit CHECK-NOT. 885 std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str(); 886 std::string Suffix = "'"; 887 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 888 Prefix + PatternString + Suffix, "command line"); 889 890 StringRef PatternInBuffer = 891 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 892 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 893 894 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); 895 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, 896 "IMPLICIT-CHECK", SM, 0); 897 } 898 899 900 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 901 902 // LineNumber keeps track of the line on which CheckPrefix instances are 903 // found. 904 unsigned LineNumber = 1; 905 906 while (1) { 907 Check::CheckType CheckTy; 908 size_t PrefixLoc; 909 910 // See if a prefix occurs in the memory buffer. 911 StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer, 912 LineNumber, 913 CheckTy, 914 PrefixLoc); 915 if (UsedPrefix.empty()) 916 break; 917 918 Buffer = Buffer.drop_front(PrefixLoc); 919 920 // Location to use for error messages. 921 const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1); 922 923 // PrefixLoc is to the start of the prefix. Skip to the end. 924 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 925 926 // Complain about useful-looking but unsupported suffixes. 927 if (CheckTy == Check::CheckBadNot) { 928 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), 929 SourceMgr::DK_Error, 930 "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 931 return true; 932 } 933 934 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 935 // leading and trailing whitespace. 936 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 937 938 // Scan ahead to the end of line. 939 size_t EOL = Buffer.find_first_of("\n\r"); 940 941 // Remember the location of the start of the pattern, for diagnostics. 942 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 943 944 // Parse the pattern. 945 Pattern P(CheckTy); 946 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 947 return true; 948 949 // Verify that CHECK-LABEL lines do not define or use variables 950 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 951 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 952 SourceMgr::DK_Error, 953 "found '" + UsedPrefix + "-LABEL:'" 954 " with variable definition or use"); 955 return true; 956 } 957 958 Buffer = Buffer.substr(EOL); 959 960 // Verify that CHECK-NEXT lines have at least one CHECK line before them. 961 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) && 962 CheckStrings.empty()) { 963 StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME"; 964 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 965 SourceMgr::DK_Error, 966 "found '" + UsedPrefix + "-" + Type + "' without previous '" 967 + UsedPrefix + ": line"); 968 return true; 969 } 970 971 // Handle CHECK-DAG/-NOT. 972 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 973 DagNotMatches.push_back(P); 974 continue; 975 } 976 977 // Okay, add the string we captured to the output vector and move on. 978 CheckStrings.emplace_back(P, UsedPrefix, PatternLoc); 979 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 980 DagNotMatches = ImplicitNegativeChecks; 981 } 982 983 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 984 // prefix as a filler for the error message. 985 if (!DagNotMatches.empty()) { 986 CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(), 987 SMLoc::getFromPointer(Buffer.data())); 988 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 989 } 990 991 if (CheckStrings.empty()) { 992 errs() << "error: no check strings found with prefix" 993 << (CheckPrefixes.size() > 1 ? "es " : " "); 994 prefix_iterator I = CheckPrefixes.begin(); 995 prefix_iterator E = CheckPrefixes.end(); 996 if (I != E) { 997 errs() << "\'" << *I << ":'"; 998 ++I; 999 } 1000 for (; I != E; ++I) 1001 errs() << ", \'" << *I << ":'"; 1002 1003 errs() << '\n'; 1004 return true; 1005 } 1006 1007 return false; 1008 } 1009 1010 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, 1011 const Pattern &Pat, StringRef Buffer, 1012 StringMap<StringRef> &VariableTable) { 1013 // Otherwise, we have an error, emit an error message. 1014 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1015 "expected string not found in input"); 1016 1017 // Print the "scanning from here" line. If the current position is at the 1018 // end of a line, advance to the start of the next line. 1019 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 1020 1021 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1022 "scanning from here"); 1023 1024 // Allow the pattern to print additional information if desired. 1025 Pat.PrintFailureInfo(SM, Buffer, VariableTable); 1026 } 1027 1028 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 1029 StringRef Buffer, 1030 StringMap<StringRef> &VariableTable) { 1031 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable); 1032 } 1033 1034 /// CountNumNewlinesBetween - Count the number of newlines in the specified 1035 /// range. 1036 static unsigned CountNumNewlinesBetween(StringRef Range, 1037 const char *&FirstNewLine) { 1038 unsigned NumNewLines = 0; 1039 while (1) { 1040 // Scan for newline. 1041 Range = Range.substr(Range.find_first_of("\n\r")); 1042 if (Range.empty()) return NumNewLines; 1043 1044 ++NumNewLines; 1045 1046 // Handle \n\r and \r\n as a single newline. 1047 if (Range.size() > 1 && 1048 (Range[1] == '\n' || Range[1] == '\r') && 1049 (Range[0] != Range[1])) 1050 Range = Range.substr(1); 1051 Range = Range.substr(1); 1052 1053 if (NumNewLines == 1) 1054 FirstNewLine = Range.begin(); 1055 } 1056 } 1057 1058 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 1059 bool IsLabelScanMode, size_t &MatchLen, 1060 StringMap<StringRef> &VariableTable) const { 1061 size_t LastPos = 0; 1062 std::vector<const Pattern *> NotStrings; 1063 1064 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 1065 // bounds; we have not processed variable definitions within the bounded block 1066 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 1067 // over the block again (including the last CHECK-LABEL) in normal mode. 1068 if (!IsLabelScanMode) { 1069 // Match "dag strings" (with mixed "not strings" if any). 1070 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 1071 if (LastPos == StringRef::npos) 1072 return StringRef::npos; 1073 } 1074 1075 // Match itself from the last position after matching CHECK-DAG. 1076 StringRef MatchBuffer = Buffer.substr(LastPos); 1077 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1078 if (MatchPos == StringRef::npos) { 1079 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable); 1080 return StringRef::npos; 1081 } 1082 1083 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1084 // or CHECK-NOT 1085 if (!IsLabelScanMode) { 1086 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1087 1088 // If this check is a "CHECK-NEXT", verify that the previous match was on 1089 // the previous line (i.e. that there is one newline between them). 1090 if (CheckNext(SM, SkippedRegion)) 1091 return StringRef::npos; 1092 1093 // If this check is a "CHECK-SAME", verify that the previous match was on 1094 // the same line (i.e. that there is no newline between them). 1095 if (CheckSame(SM, SkippedRegion)) 1096 return StringRef::npos; 1097 1098 // If this match had "not strings", verify that they don't exist in the 1099 // skipped region. 1100 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1101 return StringRef::npos; 1102 } 1103 1104 return LastPos + MatchPos; 1105 } 1106 1107 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 1108 if (Pat.getCheckTy() != Check::CheckNext) 1109 return false; 1110 1111 // Count the number of newlines between the previous match and this one. 1112 assert(Buffer.data() != 1113 SM.getMemoryBuffer( 1114 SM.FindBufferContainingLoc( 1115 SMLoc::getFromPointer(Buffer.data())))->getBufferStart() && 1116 "CHECK-NEXT can't be the first check in a file"); 1117 1118 const char *FirstNewLine = nullptr; 1119 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1120 1121 if (NumNewLines == 0) { 1122 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + 1123 "-NEXT: is on the same line as previous match"); 1124 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), 1125 SourceMgr::DK_Note, "'next' match was here"); 1126 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1127 "previous match ended here"); 1128 return true; 1129 } 1130 1131 if (NumNewLines != 1) { 1132 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + 1133 "-NEXT: is not on the line after the previous match"); 1134 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), 1135 SourceMgr::DK_Note, "'next' match was here"); 1136 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1137 "previous match ended here"); 1138 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1139 "non-matching line after previous match is here"); 1140 return true; 1141 } 1142 1143 return false; 1144 } 1145 1146 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 1147 if (Pat.getCheckTy() != Check::CheckSame) 1148 return false; 1149 1150 // Count the number of newlines between the previous match and this one. 1151 assert(Buffer.data() != 1152 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1153 SMLoc::getFromPointer(Buffer.data()))) 1154 ->getBufferStart() && 1155 "CHECK-SAME can't be the first check in a file"); 1156 1157 const char *FirstNewLine = nullptr; 1158 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1159 1160 if (NumNewLines != 0) { 1161 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1162 Prefix + 1163 "-SAME: is not on the same line as the previous match"); 1164 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1165 "'next' match was here"); 1166 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1167 "previous match ended here"); 1168 return true; 1169 } 1170 1171 return false; 1172 } 1173 1174 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 1175 const std::vector<const Pattern *> &NotStrings, 1176 StringMap<StringRef> &VariableTable) const { 1177 for (const Pattern *Pat : NotStrings) { 1178 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 1179 1180 size_t MatchLen = 0; 1181 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1182 1183 if (Pos == StringRef::npos) continue; 1184 1185 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos), 1186 SourceMgr::DK_Error, 1187 Prefix + "-NOT: string occurred!"); 1188 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note, 1189 Prefix + "-NOT: pattern specified here"); 1190 return true; 1191 } 1192 1193 return false; 1194 } 1195 1196 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 1197 std::vector<const Pattern *> &NotStrings, 1198 StringMap<StringRef> &VariableTable) const { 1199 if (DagNotStrings.empty()) 1200 return 0; 1201 1202 size_t LastPos = 0; 1203 size_t StartPos = LastPos; 1204 1205 for (const Pattern &Pat : DagNotStrings) { 1206 assert((Pat.getCheckTy() == Check::CheckDAG || 1207 Pat.getCheckTy() == Check::CheckNot) && 1208 "Invalid CHECK-DAG or CHECK-NOT!"); 1209 1210 if (Pat.getCheckTy() == Check::CheckNot) { 1211 NotStrings.push_back(&Pat); 1212 continue; 1213 } 1214 1215 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 1216 1217 size_t MatchLen = 0, MatchPos; 1218 1219 // CHECK-DAG always matches from the start. 1220 StringRef MatchBuffer = Buffer.substr(StartPos); 1221 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1222 // With a group of CHECK-DAGs, a single mismatching means the match on 1223 // that group of CHECK-DAGs fails immediately. 1224 if (MatchPos == StringRef::npos) { 1225 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable); 1226 return StringRef::npos; 1227 } 1228 // Re-calc it as the offset relative to the start of the original string. 1229 MatchPos += StartPos; 1230 1231 if (!NotStrings.empty()) { 1232 if (MatchPos < LastPos) { 1233 // Reordered? 1234 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos), 1235 SourceMgr::DK_Error, 1236 Prefix + "-DAG: found a match of CHECK-DAG" 1237 " reordering across a CHECK-NOT"); 1238 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos), 1239 SourceMgr::DK_Note, 1240 Prefix + "-DAG: the farthest match of CHECK-DAG" 1241 " is found here"); 1242 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note, 1243 Prefix + "-NOT: the crossed pattern specified" 1244 " here"); 1245 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note, 1246 Prefix + "-DAG: the reordered pattern specified" 1247 " here"); 1248 return StringRef::npos; 1249 } 1250 // All subsequent CHECK-DAGs should be matched from the farthest 1251 // position of all precedent CHECK-DAGs (including this one.) 1252 StartPos = LastPos; 1253 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to 1254 // CHECK-DAG, verify that there's no 'not' strings occurred in that 1255 // region. 1256 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1257 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1258 return StringRef::npos; 1259 // Clear "not strings". 1260 NotStrings.clear(); 1261 } 1262 1263 // Update the last position with CHECK-DAG matches. 1264 LastPos = std::max(MatchPos + MatchLen, LastPos); 1265 } 1266 1267 return LastPos; 1268 } 1269 1270 // A check prefix must contain only alphanumeric, hyphens and underscores. 1271 static bool ValidateCheckPrefix(StringRef CheckPrefix) { 1272 Regex Validator("^[a-zA-Z0-9_-]*$"); 1273 return Validator.match(CheckPrefix); 1274 } 1275 1276 static bool ValidateCheckPrefixes() { 1277 StringSet<> PrefixSet; 1278 1279 for (StringRef Prefix : CheckPrefixes) { 1280 // Reject empty prefixes. 1281 if (Prefix == "") 1282 return false; 1283 1284 if (!PrefixSet.insert(Prefix).second) 1285 return false; 1286 1287 if (!ValidateCheckPrefix(Prefix)) 1288 return false; 1289 } 1290 1291 return true; 1292 } 1293 1294 // I don't think there's a way to specify an initial value for cl::list, 1295 // so if nothing was specified, add the default 1296 static void AddCheckPrefixIfNeeded() { 1297 if (CheckPrefixes.empty()) 1298 CheckPrefixes.push_back("CHECK"); 1299 } 1300 1301 static void DumpCommandLine(int argc, char **argv) { 1302 errs() << "FileCheck command line: "; 1303 for (int I = 0; I < argc; I++) 1304 errs() << " " << argv[I]; 1305 errs() << "\n"; 1306 } 1307 1308 int main(int argc, char **argv) { 1309 sys::PrintStackTraceOnErrorSignal(argv[0]); 1310 PrettyStackTraceProgram X(argc, argv); 1311 cl::ParseCommandLineOptions(argc, argv); 1312 1313 if (!ValidateCheckPrefixes()) { 1314 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 1315 "start with a letter and contain only alphanumeric characters, " 1316 "hyphens and underscores\n"; 1317 return 2; 1318 } 1319 1320 AddCheckPrefixIfNeeded(); 1321 1322 SourceMgr SM; 1323 1324 // Read the expected strings from the check file. 1325 std::vector<CheckString> CheckStrings; 1326 if (ReadCheckFile(SM, CheckStrings)) 1327 return 2; 1328 1329 // Open the file to check and add it to SourceMgr. 1330 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = 1331 MemoryBuffer::getFileOrSTDIN(InputFilename); 1332 if (std::error_code EC = FileOrErr.getError()) { 1333 errs() << "Could not open input file '" << InputFilename 1334 << "': " << EC.message() << '\n'; 1335 return 2; 1336 } 1337 std::unique_ptr<MemoryBuffer> &File = FileOrErr.get(); 1338 1339 if (File->getBufferSize() == 0 && !AllowEmptyInput) { 1340 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 1341 DumpCommandLine(argc, argv); 1342 return 2; 1343 } 1344 1345 // Remove duplicate spaces in the input file if requested. 1346 // Remove DOS style line endings. 1347 std::unique_ptr<MemoryBuffer> F = 1348 CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace); 1349 1350 // Check that we have all of the expected strings, in order, in the input 1351 // file. 1352 StringRef Buffer = F->getBuffer(); 1353 1354 SM.AddNewSourceBuffer(std::move(F), SMLoc()); 1355 1356 /// VariableTable - This holds all the current filecheck variables. 1357 StringMap<StringRef> VariableTable; 1358 1359 bool hasError = false; 1360 1361 unsigned i = 0, j = 0, e = CheckStrings.size(); 1362 1363 while (true) { 1364 StringRef CheckRegion; 1365 if (j == e) { 1366 CheckRegion = Buffer; 1367 } else { 1368 const CheckString &CheckLabelStr = CheckStrings[j]; 1369 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 1370 ++j; 1371 continue; 1372 } 1373 1374 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 1375 size_t MatchLabelLen = 0; 1376 size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true, 1377 MatchLabelLen, VariableTable); 1378 if (MatchLabelPos == StringRef::npos) { 1379 hasError = true; 1380 break; 1381 } 1382 1383 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 1384 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 1385 ++j; 1386 } 1387 1388 for ( ; i != j; ++i) { 1389 const CheckString &CheckStr = CheckStrings[i]; 1390 1391 // Check each string within the scanned region, including a second check 1392 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 1393 size_t MatchLen = 0; 1394 size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen, 1395 VariableTable); 1396 1397 if (MatchPos == StringRef::npos) { 1398 hasError = true; 1399 i = j; 1400 break; 1401 } 1402 1403 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 1404 } 1405 1406 if (j == e) 1407 break; 1408 } 1409 1410 return hasError ? 1 : 0; 1411 } 1412