1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // FileCheck does a line-by line check of a file that validates whether it 11 // contains the expected content. This is useful for regression tests etc. 12 // 13 // This program exits with an error status of 2 on error, exit status of 0 if 14 // the file matched the expected contents, and exit status of 1 if it did not 15 // contain the expected contents. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringExtras.h" 21 #include "llvm/ADT/StringMap.h" 22 #include "llvm/ADT/StringSet.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/PrettyStackTrace.h" 26 #include "llvm/Support/Regex.h" 27 #include "llvm/Support/Signals.h" 28 #include "llvm/Support/SourceMgr.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <algorithm> 31 #include <cctype> 32 #include <map> 33 #include <string> 34 #include <system_error> 35 #include <vector> 36 using namespace llvm; 37 38 static cl::opt<std::string> 39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 40 41 static cl::opt<std::string> 42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 43 cl::init("-"), cl::value_desc("filename")); 44 45 static cl::list<std::string> CheckPrefixes( 46 "check-prefix", 47 cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 48 static cl::alias CheckPrefixesAlias( 49 "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated, 50 cl::NotHidden, 51 cl::desc( 52 "Alias for -check-prefix permitting multiple comma separated values")); 53 54 static cl::opt<bool> NoCanonicalizeWhiteSpace( 55 "strict-whitespace", 56 cl::desc("Do not treat all horizontal whitespace as equivalent")); 57 58 static cl::list<std::string> ImplicitCheckNot( 59 "implicit-check-not", 60 cl::desc("Add an implicit negative check with this pattern to every\n" 61 "positive check. This can be used to ensure that no instances of\n" 62 "this pattern occur which are not matched by a positive pattern"), 63 cl::value_desc("pattern")); 64 65 static cl::opt<bool> AllowEmptyInput( 66 "allow-empty", cl::init(false), 67 cl::desc("Allow the input file to be empty. This is useful when making\n" 68 "checks that some error message does not occur, for example.")); 69 70 static cl::opt<bool> MatchFullLines( 71 "match-full-lines", cl::init(false), 72 cl::desc("Require all positive matches to cover an entire input line.\n" 73 "Allows leading and trailing whitespace if --strict-whitespace\n" 74 "is not also passed.")); 75 76 typedef cl::list<std::string>::const_iterator prefix_iterator; 77 78 //===----------------------------------------------------------------------===// 79 // Pattern Handling Code. 80 //===----------------------------------------------------------------------===// 81 82 namespace Check { 83 enum CheckType { 84 CheckNone = 0, 85 CheckPlain, 86 CheckNext, 87 CheckSame, 88 CheckNot, 89 CheckDAG, 90 CheckLabel, 91 92 /// MatchEOF - When set, this pattern only matches the end of file. This is 93 /// used for trailing CHECK-NOTs. 94 CheckEOF, 95 /// CheckBadNot - Found -NOT combined with another CHECK suffix. 96 CheckBadNot 97 }; 98 } 99 100 class Pattern { 101 SMLoc PatternLoc; 102 103 /// FixedStr - If non-empty, this pattern is a fixed string match with the 104 /// specified fixed string. 105 StringRef FixedStr; 106 107 /// RegEx - If non-empty, this is a regex pattern. 108 std::string RegExStr; 109 110 /// VariableUses - Entries in this vector map to uses of a variable in the 111 /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain 112 /// "foobaz" and we'll get an entry in this vector that tells us to insert the 113 /// value of bar at offset 3. 114 std::vector<std::pair<StringRef, unsigned>> VariableUses; 115 116 /// VariableDefs - Maps definitions of variables to their parenthesized 117 /// capture numbers. 118 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1. 119 std::map<StringRef, unsigned> VariableDefs; 120 121 Check::CheckType CheckTy; 122 123 /// \brief Contains the number of line this pattern is in. 124 unsigned LineNumber; 125 126 public: 127 explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {} 128 129 /// getLoc - Return the location in source code. 130 SMLoc getLoc() const { return PatternLoc; } 131 132 /// ParsePattern - Parse the given string into the Pattern. Prefix provides 133 /// which prefix is being matched, SM provides the SourceMgr used for error 134 /// reports, and LineNumber is the line number in the input file from which 135 /// the pattern string was read. Returns true in case of an error, false 136 /// otherwise. 137 bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, 138 unsigned LineNumber); 139 140 /// Match - Match the pattern string against the input buffer Buffer. This 141 /// returns the position that is matched or npos if there is no match. If 142 /// there is a match, the size of the matched string is returned in MatchLen. 143 /// 144 /// The VariableTable StringMap provides the current values of filecheck 145 /// variables and is updated if this match defines new values. 146 size_t Match(StringRef Buffer, size_t &MatchLen, 147 StringMap<StringRef> &VariableTable) const; 148 149 /// PrintFailureInfo - Print additional information about a failure to match 150 /// involving this pattern. 151 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 152 const StringMap<StringRef> &VariableTable) const; 153 154 bool hasVariable() const { 155 return !(VariableUses.empty() && VariableDefs.empty()); 156 } 157 158 Check::CheckType getCheckTy() const { return CheckTy; } 159 160 private: 161 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 162 void AddBackrefToRegEx(unsigned BackrefNum); 163 164 /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of 165 /// matching this pattern at the start of \arg Buffer; a distance of zero 166 /// should correspond to a perfect match. 167 unsigned 168 ComputeMatchDistance(StringRef Buffer, 169 const StringMap<StringRef> &VariableTable) const; 170 171 /// \brief Evaluates expression and stores the result to \p Value. 172 /// \return true on success. false when the expression has invalid syntax. 173 bool EvaluateExpression(StringRef Expr, std::string &Value) const; 174 175 /// \brief Finds the closing sequence of a regex variable usage or 176 /// definition. Str has to point in the beginning of the definition 177 /// (right after the opening sequence). 178 /// \return offset of the closing sequence within Str, or npos if it was not 179 /// found. 180 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 181 }; 182 183 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix, 184 SourceMgr &SM, unsigned LineNumber) { 185 bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot; 186 187 this->LineNumber = LineNumber; 188 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 189 190 // Ignore trailing whitespace. 191 while (!PatternStr.empty() && 192 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 193 PatternStr = PatternStr.substr(0, PatternStr.size() - 1); 194 195 // Check that there is something on the line. 196 if (PatternStr.empty()) { 197 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 198 "found empty check string with prefix '" + Prefix + ":'"); 199 return true; 200 } 201 202 // Check to see if this is a fixed string, or if it has regex pieces. 203 if (!MatchFullLinesHere && 204 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && 205 PatternStr.find("[[") == StringRef::npos))) { 206 FixedStr = PatternStr; 207 return false; 208 } 209 210 if (MatchFullLinesHere) { 211 RegExStr += '^'; 212 if (!NoCanonicalizeWhiteSpace) 213 RegExStr += " *"; 214 } 215 216 // Paren value #0 is for the fully matched string. Any new parenthesized 217 // values add from there. 218 unsigned CurParen = 1; 219 220 // Otherwise, there is at least one regex piece. Build up the regex pattern 221 // by escaping scary characters in fixed strings, building up one big regex. 222 while (!PatternStr.empty()) { 223 // RegEx matches. 224 if (PatternStr.startswith("{{")) { 225 // This is the start of a regex match. Scan for the }}. 226 size_t End = PatternStr.find("}}"); 227 if (End == StringRef::npos) { 228 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 229 SourceMgr::DK_Error, 230 "found start of regex string with no end '}}'"); 231 return true; 232 } 233 234 // Enclose {{}} patterns in parens just like [[]] even though we're not 235 // capturing the result for any purpose. This is required in case the 236 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 237 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 238 RegExStr += '('; 239 ++CurParen; 240 241 if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM)) 242 return true; 243 RegExStr += ')'; 244 245 PatternStr = PatternStr.substr(End + 2); 246 continue; 247 } 248 249 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 250 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 251 // second form is [[foo]] which is a reference to foo. The variable name 252 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 253 // it. This is to catch some common errors. 254 if (PatternStr.startswith("[[")) { 255 // Find the closing bracket pair ending the match. End is going to be an 256 // offset relative to the beginning of the match string. 257 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 258 259 if (End == StringRef::npos) { 260 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 261 SourceMgr::DK_Error, 262 "invalid named regex reference, no ]] found"); 263 return true; 264 } 265 266 StringRef MatchStr = PatternStr.substr(2, End); 267 PatternStr = PatternStr.substr(End + 4); 268 269 // Get the regex name (e.g. "foo"). 270 size_t NameEnd = MatchStr.find(':'); 271 StringRef Name = MatchStr.substr(0, NameEnd); 272 273 if (Name.empty()) { 274 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 275 "invalid name in named regex: empty name"); 276 return true; 277 } 278 279 // Verify that the name/expression is well formed. FileCheck currently 280 // supports @LINE, @LINE+number, @LINE-number expressions. The check here 281 // is relaxed, more strict check is performed in \c EvaluateExpression. 282 bool IsExpression = false; 283 for (unsigned i = 0, e = Name.size(); i != e; ++i) { 284 if (i == 0 && Name[i] == '@') { 285 if (NameEnd != StringRef::npos) { 286 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 287 SourceMgr::DK_Error, 288 "invalid name in named regex definition"); 289 return true; 290 } 291 IsExpression = true; 292 continue; 293 } 294 if (Name[i] != '_' && !isalnum(Name[i]) && 295 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 296 SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i), 297 SourceMgr::DK_Error, "invalid name in named regex"); 298 return true; 299 } 300 } 301 302 // Name can't start with a digit. 303 if (isdigit(static_cast<unsigned char>(Name[0]))) { 304 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 305 "invalid name in named regex"); 306 return true; 307 } 308 309 // Handle [[foo]]. 310 if (NameEnd == StringRef::npos) { 311 // Handle variables that were defined earlier on the same line by 312 // emitting a backreference. 313 if (VariableDefs.find(Name) != VariableDefs.end()) { 314 unsigned VarParenNum = VariableDefs[Name]; 315 if (VarParenNum < 1 || VarParenNum > 9) { 316 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 317 SourceMgr::DK_Error, 318 "Can't back-reference more than 9 variables"); 319 return true; 320 } 321 AddBackrefToRegEx(VarParenNum); 322 } else { 323 VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 324 } 325 continue; 326 } 327 328 // Handle [[foo:.*]]. 329 VariableDefs[Name] = CurParen; 330 RegExStr += '('; 331 ++CurParen; 332 333 if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM)) 334 return true; 335 336 RegExStr += ')'; 337 } 338 339 // Handle fixed string matches. 340 // Find the end, which is the start of the next regex. 341 size_t FixedMatchEnd = PatternStr.find("{{"); 342 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 343 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 344 PatternStr = PatternStr.substr(FixedMatchEnd); 345 } 346 347 if (MatchFullLinesHere) { 348 if (!NoCanonicalizeWhiteSpace) 349 RegExStr += " *"; 350 RegExStr += '$'; 351 } 352 353 return false; 354 } 355 356 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { 357 Regex R(RS); 358 std::string Error; 359 if (!R.isValid(Error)) { 360 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 361 "invalid regex: " + Error); 362 return true; 363 } 364 365 RegExStr += RS.str(); 366 CurParen += R.getNumMatches(); 367 return false; 368 } 369 370 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 371 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 372 std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum); 373 RegExStr += Backref; 374 } 375 376 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 377 // The only supported expression is @LINE([\+-]\d+)? 378 if (!Expr.startswith("@LINE")) 379 return false; 380 Expr = Expr.substr(StringRef("@LINE").size()); 381 int Offset = 0; 382 if (!Expr.empty()) { 383 if (Expr[0] == '+') 384 Expr = Expr.substr(1); 385 else if (Expr[0] != '-') 386 return false; 387 if (Expr.getAsInteger(10, Offset)) 388 return false; 389 } 390 Value = llvm::itostr(LineNumber + Offset); 391 return true; 392 } 393 394 /// Match - Match the pattern string against the input buffer Buffer. This 395 /// returns the position that is matched or npos if there is no match. If 396 /// there is a match, the size of the matched string is returned in MatchLen. 397 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 398 StringMap<StringRef> &VariableTable) const { 399 // If this is the EOF pattern, match it immediately. 400 if (CheckTy == Check::CheckEOF) { 401 MatchLen = 0; 402 return Buffer.size(); 403 } 404 405 // If this is a fixed string pattern, just match it now. 406 if (!FixedStr.empty()) { 407 MatchLen = FixedStr.size(); 408 return Buffer.find(FixedStr); 409 } 410 411 // Regex match. 412 413 // If there are variable uses, we need to create a temporary string with the 414 // actual value. 415 StringRef RegExToMatch = RegExStr; 416 std::string TmpStr; 417 if (!VariableUses.empty()) { 418 TmpStr = RegExStr; 419 420 unsigned InsertOffset = 0; 421 for (const auto &VariableUse : VariableUses) { 422 std::string Value; 423 424 if (VariableUse.first[0] == '@') { 425 if (!EvaluateExpression(VariableUse.first, Value)) 426 return StringRef::npos; 427 } else { 428 StringMap<StringRef>::iterator it = 429 VariableTable.find(VariableUse.first); 430 // If the variable is undefined, return an error. 431 if (it == VariableTable.end()) 432 return StringRef::npos; 433 434 // Look up the value and escape it so that we can put it into the regex. 435 Value += Regex::escape(it->second); 436 } 437 438 // Plop it into the regex at the adjusted offset. 439 TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset, 440 Value.begin(), Value.end()); 441 InsertOffset += Value.size(); 442 } 443 444 // Match the newly constructed regex. 445 RegExToMatch = TmpStr; 446 } 447 448 SmallVector<StringRef, 4> MatchInfo; 449 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 450 return StringRef::npos; 451 452 // Successful regex match. 453 assert(!MatchInfo.empty() && "Didn't get any match"); 454 StringRef FullMatch = MatchInfo[0]; 455 456 // If this defines any variables, remember their values. 457 for (const auto &VariableDef : VariableDefs) { 458 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 459 VariableTable[VariableDef.first] = MatchInfo[VariableDef.second]; 460 } 461 462 MatchLen = FullMatch.size(); 463 return FullMatch.data() - Buffer.data(); 464 } 465 466 unsigned 467 Pattern::ComputeMatchDistance(StringRef Buffer, 468 const StringMap<StringRef> &VariableTable) const { 469 // Just compute the number of matching characters. For regular expressions, we 470 // just compare against the regex itself and hope for the best. 471 // 472 // FIXME: One easy improvement here is have the regex lib generate a single 473 // example regular expression which matches, and use that as the example 474 // string. 475 StringRef ExampleString(FixedStr); 476 if (ExampleString.empty()) 477 ExampleString = RegExStr; 478 479 // Only compare up to the first line in the buffer, or the string size. 480 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 481 BufferPrefix = BufferPrefix.split('\n').first; 482 return BufferPrefix.edit_distance(ExampleString); 483 } 484 485 void Pattern::PrintFailureInfo( 486 const SourceMgr &SM, StringRef Buffer, 487 const StringMap<StringRef> &VariableTable) const { 488 // If this was a regular expression using variables, print the current 489 // variable values. 490 if (!VariableUses.empty()) { 491 for (const auto &VariableUse : VariableUses) { 492 SmallString<256> Msg; 493 raw_svector_ostream OS(Msg); 494 StringRef Var = VariableUse.first; 495 if (Var[0] == '@') { 496 std::string Value; 497 if (EvaluateExpression(Var, Value)) { 498 OS << "with expression \""; 499 OS.write_escaped(Var) << "\" equal to \""; 500 OS.write_escaped(Value) << "\""; 501 } else { 502 OS << "uses incorrect expression \""; 503 OS.write_escaped(Var) << "\""; 504 } 505 } else { 506 StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 507 508 // Check for undefined variable references. 509 if (it == VariableTable.end()) { 510 OS << "uses undefined variable \""; 511 OS.write_escaped(Var) << "\""; 512 } else { 513 OS << "with variable \""; 514 OS.write_escaped(Var) << "\" equal to \""; 515 OS.write_escaped(it->second) << "\""; 516 } 517 } 518 519 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 520 OS.str()); 521 } 522 } 523 524 // Attempt to find the closest/best fuzzy match. Usually an error happens 525 // because some string in the output didn't exactly match. In these cases, we 526 // would like to show the user a best guess at what "should have" matched, to 527 // save them having to actually check the input manually. 528 size_t NumLinesForward = 0; 529 size_t Best = StringRef::npos; 530 double BestQuality = 0; 531 532 // Use an arbitrary 4k limit on how far we will search. 533 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 534 if (Buffer[i] == '\n') 535 ++NumLinesForward; 536 537 // Patterns have leading whitespace stripped, so skip whitespace when 538 // looking for something which looks like a pattern. 539 if (Buffer[i] == ' ' || Buffer[i] == '\t') 540 continue; 541 542 // Compute the "quality" of this match as an arbitrary combination of the 543 // match distance and the number of lines skipped to get to this match. 544 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 545 double Quality = Distance + (NumLinesForward / 100.); 546 547 if (Quality < BestQuality || Best == StringRef::npos) { 548 Best = i; 549 BestQuality = Quality; 550 } 551 } 552 553 // Print the "possible intended match here" line if we found something 554 // reasonable and not equal to what we showed in the "scanning from here" 555 // line. 556 if (Best && Best != StringRef::npos && BestQuality < 50) { 557 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 558 SourceMgr::DK_Note, "possible intended match here"); 559 560 // FIXME: If we wanted to be really friendly we would show why the match 561 // failed, as it can be hard to spot simple one character differences. 562 } 563 } 564 565 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 566 // Offset keeps track of the current offset within the input Str 567 size_t Offset = 0; 568 // [...] Nesting depth 569 size_t BracketDepth = 0; 570 571 while (!Str.empty()) { 572 if (Str.startswith("]]") && BracketDepth == 0) 573 return Offset; 574 if (Str[0] == '\\') { 575 // Backslash escapes the next char within regexes, so skip them both. 576 Str = Str.substr(2); 577 Offset += 2; 578 } else { 579 switch (Str[0]) { 580 default: 581 break; 582 case '[': 583 BracketDepth++; 584 break; 585 case ']': 586 if (BracketDepth == 0) { 587 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 588 SourceMgr::DK_Error, 589 "missing closing \"]\" for regex variable"); 590 exit(1); 591 } 592 BracketDepth--; 593 break; 594 } 595 Str = Str.substr(1); 596 Offset++; 597 } 598 } 599 600 return StringRef::npos; 601 } 602 603 //===----------------------------------------------------------------------===// 604 // Check Strings. 605 //===----------------------------------------------------------------------===// 606 607 /// CheckString - This is a check that we found in the input file. 608 struct CheckString { 609 /// Pat - The pattern to match. 610 Pattern Pat; 611 612 /// Prefix - Which prefix name this check matched. 613 StringRef Prefix; 614 615 /// Loc - The location in the match file that the check string was specified. 616 SMLoc Loc; 617 618 /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive, 619 /// as opposed to a CHECK: directive. 620 // Check::CheckType CheckTy; 621 622 /// DagNotStrings - These are all of the strings that are disallowed from 623 /// occurring between this match string and the previous one (or start of 624 /// file). 625 std::vector<Pattern> DagNotStrings; 626 627 CheckString(const Pattern &P, StringRef S, SMLoc L) 628 : Pat(P), Prefix(S), Loc(L) {} 629 630 /// Check - Match check string and its "not strings" and/or "dag strings". 631 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 632 size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 633 634 /// CheckNext - Verify there is a single line in the given buffer. 635 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 636 637 /// CheckSame - Verify there is no newline in the given buffer. 638 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; 639 640 /// CheckNot - Verify there's no "not strings" in the given buffer. 641 bool CheckNot(const SourceMgr &SM, StringRef Buffer, 642 const std::vector<const Pattern *> &NotStrings, 643 StringMap<StringRef> &VariableTable) const; 644 645 /// CheckDag - Match "dag strings" and their mixed "not strings". 646 size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 647 std::vector<const Pattern *> &NotStrings, 648 StringMap<StringRef> &VariableTable) const; 649 }; 650 651 /// Canonicalize whitespaces in the file. Line endings are replaced with 652 /// UNIX-style '\n'. 653 /// 654 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace 655 /// characters to a single space. 656 static StringRef CanonicalizeFile(MemoryBuffer &MB, bool PreserveHorizontal, 657 SmallVectorImpl<char> &OutputBuffer) { 658 OutputBuffer.reserve(MB.getBufferSize()); 659 660 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); 661 Ptr != End; ++Ptr) { 662 // Eliminate trailing dosish \r. 663 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 664 continue; 665 } 666 667 // If current char is not a horizontal whitespace or if horizontal 668 // whitespace canonicalization is disabled, dump it to output as is. 669 if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) { 670 OutputBuffer.push_back(*Ptr); 671 continue; 672 } 673 674 // Otherwise, add one space and advance over neighboring space. 675 OutputBuffer.push_back(' '); 676 while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) 677 ++Ptr; 678 } 679 680 // Add a null byte and then return all but that byte. 681 OutputBuffer.push_back('\0'); 682 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); 683 } 684 685 static bool IsPartOfWord(char c) { 686 return (isalnum(c) || c == '-' || c == '_'); 687 } 688 689 // Get the size of the prefix extension. 690 static size_t CheckTypeSize(Check::CheckType Ty) { 691 switch (Ty) { 692 case Check::CheckNone: 693 case Check::CheckBadNot: 694 return 0; 695 696 case Check::CheckPlain: 697 return sizeof(":") - 1; 698 699 case Check::CheckNext: 700 return sizeof("-NEXT:") - 1; 701 702 case Check::CheckSame: 703 return sizeof("-SAME:") - 1; 704 705 case Check::CheckNot: 706 return sizeof("-NOT:") - 1; 707 708 case Check::CheckDAG: 709 return sizeof("-DAG:") - 1; 710 711 case Check::CheckLabel: 712 return sizeof("-LABEL:") - 1; 713 714 case Check::CheckEOF: 715 llvm_unreachable("Should not be using EOF size"); 716 } 717 718 llvm_unreachable("Bad check type"); 719 } 720 721 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 722 char NextChar = Buffer[Prefix.size()]; 723 724 // Verify that the : is present after the prefix. 725 if (NextChar == ':') 726 return Check::CheckPlain; 727 728 if (NextChar != '-') 729 return Check::CheckNone; 730 731 StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 732 if (Rest.startswith("NEXT:")) 733 return Check::CheckNext; 734 735 if (Rest.startswith("SAME:")) 736 return Check::CheckSame; 737 738 if (Rest.startswith("NOT:")) 739 return Check::CheckNot; 740 741 if (Rest.startswith("DAG:")) 742 return Check::CheckDAG; 743 744 if (Rest.startswith("LABEL:")) 745 return Check::CheckLabel; 746 747 // You can't combine -NOT with another suffix. 748 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || 749 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || 750 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:")) 751 return Check::CheckBadNot; 752 753 return Check::CheckNone; 754 } 755 756 // From the given position, find the next character after the word. 757 static size_t SkipWord(StringRef Str, size_t Loc) { 758 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 759 ++Loc; 760 return Loc; 761 } 762 763 // Try to find the first match in buffer for any prefix. If a valid match is 764 // found, return that prefix and set its type and location. If there are almost 765 // matches (e.g. the actual prefix string is found, but is not an actual check 766 // string), but no valid match, return an empty string and set the position to 767 // resume searching from. If no partial matches are found, return an empty 768 // string and the location will be StringRef::npos. If one prefix is a substring 769 // of another, the maximal match should be found. e.g. if "A" and "AA" are 770 // prefixes then AA-CHECK: should match the second one. 771 static StringRef FindFirstCandidateMatch(StringRef &Buffer, 772 Check::CheckType &CheckTy, 773 size_t &CheckLoc) { 774 StringRef FirstPrefix; 775 size_t FirstLoc = StringRef::npos; 776 size_t SearchLoc = StringRef::npos; 777 Check::CheckType FirstTy = Check::CheckNone; 778 779 CheckTy = Check::CheckNone; 780 CheckLoc = StringRef::npos; 781 782 for (StringRef Prefix : CheckPrefixes) { 783 size_t PrefixLoc = Buffer.find(Prefix); 784 785 if (PrefixLoc == StringRef::npos) 786 continue; 787 788 // Track where we are searching for invalid prefixes that look almost right. 789 // We need to only advance to the first partial match on the next attempt 790 // since a partial match could be a substring of a later, valid prefix. 791 // Need to skip to the end of the word, otherwise we could end up 792 // matching a prefix in a substring later. 793 if (PrefixLoc < SearchLoc) 794 SearchLoc = SkipWord(Buffer, PrefixLoc); 795 796 // We only want to find the first match to avoid skipping some. 797 if (PrefixLoc > FirstLoc) 798 continue; 799 // If one matching check-prefix is a prefix of another, choose the 800 // longer one. 801 if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size()) 802 continue; 803 804 StringRef Rest = Buffer.drop_front(PrefixLoc); 805 // Make sure we have actually found the prefix, and not a word containing 806 // it. This should also prevent matching the wrong prefix when one is a 807 // substring of another. 808 if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1])) 809 FirstTy = Check::CheckNone; 810 else 811 FirstTy = FindCheckType(Rest, Prefix); 812 813 FirstLoc = PrefixLoc; 814 FirstPrefix = Prefix; 815 } 816 817 // If the first prefix is invalid, we should continue the search after it. 818 if (FirstTy == Check::CheckNone) { 819 CheckLoc = SearchLoc; 820 return ""; 821 } 822 823 CheckTy = FirstTy; 824 CheckLoc = FirstLoc; 825 return FirstPrefix; 826 } 827 828 static StringRef FindFirstMatchingPrefix(StringRef &Buffer, 829 unsigned &LineNumber, 830 Check::CheckType &CheckTy, 831 size_t &CheckLoc) { 832 while (!Buffer.empty()) { 833 StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc); 834 // If we found a real match, we are done. 835 if (!Prefix.empty()) { 836 LineNumber += Buffer.substr(0, CheckLoc).count('\n'); 837 return Prefix; 838 } 839 840 // We didn't find any almost matches either, we are also done. 841 if (CheckLoc == StringRef::npos) 842 return StringRef(); 843 844 LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n'); 845 846 // Advance to the last possible match we found and try again. 847 Buffer = Buffer.drop_front(CheckLoc + 1); 848 } 849 850 return StringRef(); 851 } 852 853 /// ReadCheckFile - Read the check file, which specifies the sequence of 854 /// expected strings. The strings are added to the CheckStrings vector. 855 /// Returns true in case of an error, false otherwise. 856 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, 857 std::vector<CheckString> &CheckStrings) { 858 std::vector<Pattern> ImplicitNegativeChecks; 859 for (const auto &PatternString : ImplicitCheckNot) { 860 // Create a buffer with fake command line content in order to display the 861 // command line option responsible for the specific implicit CHECK-NOT. 862 std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str(); 863 std::string Suffix = "'"; 864 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 865 Prefix + PatternString + Suffix, "command line"); 866 867 StringRef PatternInBuffer = 868 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 869 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 870 871 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); 872 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, 873 "IMPLICIT-CHECK", SM, 0); 874 } 875 876 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 877 878 // LineNumber keeps track of the line on which CheckPrefix instances are 879 // found. 880 unsigned LineNumber = 1; 881 882 while (1) { 883 Check::CheckType CheckTy; 884 size_t PrefixLoc; 885 886 // See if a prefix occurs in the memory buffer. 887 StringRef UsedPrefix = 888 FindFirstMatchingPrefix(Buffer, LineNumber, CheckTy, PrefixLoc); 889 if (UsedPrefix.empty()) 890 break; 891 892 Buffer = Buffer.drop_front(PrefixLoc); 893 894 // Location to use for error messages. 895 const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1); 896 897 // PrefixLoc is to the start of the prefix. Skip to the end. 898 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 899 900 // Complain about useful-looking but unsupported suffixes. 901 if (CheckTy == Check::CheckBadNot) { 902 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 903 "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 904 return true; 905 } 906 907 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 908 // leading and trailing whitespace. 909 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 910 911 // Scan ahead to the end of line. 912 size_t EOL = Buffer.find_first_of("\n\r"); 913 914 // Remember the location of the start of the pattern, for diagnostics. 915 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 916 917 // Parse the pattern. 918 Pattern P(CheckTy); 919 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 920 return true; 921 922 // Verify that CHECK-LABEL lines do not define or use variables 923 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 924 SM.PrintMessage( 925 SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error, 926 "found '" + UsedPrefix + "-LABEL:'" 927 " with variable definition or use"); 928 return true; 929 } 930 931 Buffer = Buffer.substr(EOL); 932 933 // Verify that CHECK-NEXT lines have at least one CHECK line before them. 934 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) && 935 CheckStrings.empty()) { 936 StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME"; 937 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 938 SourceMgr::DK_Error, 939 "found '" + UsedPrefix + "-" + Type + 940 "' without previous '" + UsedPrefix + ": line"); 941 return true; 942 } 943 944 // Handle CHECK-DAG/-NOT. 945 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 946 DagNotMatches.push_back(P); 947 continue; 948 } 949 950 // Okay, add the string we captured to the output vector and move on. 951 CheckStrings.emplace_back(P, UsedPrefix, PatternLoc); 952 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 953 DagNotMatches = ImplicitNegativeChecks; 954 } 955 956 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 957 // prefix as a filler for the error message. 958 if (!DagNotMatches.empty()) { 959 CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(), 960 SMLoc::getFromPointer(Buffer.data())); 961 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 962 } 963 964 if (CheckStrings.empty()) { 965 errs() << "error: no check strings found with prefix" 966 << (CheckPrefixes.size() > 1 ? "es " : " "); 967 prefix_iterator I = CheckPrefixes.begin(); 968 prefix_iterator E = CheckPrefixes.end(); 969 if (I != E) { 970 errs() << "\'" << *I << ":'"; 971 ++I; 972 } 973 for (; I != E; ++I) 974 errs() << ", \'" << *I << ":'"; 975 976 errs() << '\n'; 977 return true; 978 } 979 980 return false; 981 } 982 983 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat, 984 StringRef Buffer, 985 StringMap<StringRef> &VariableTable) { 986 // Otherwise, we have an error, emit an error message. 987 SM.PrintMessage(Loc, SourceMgr::DK_Error, 988 "expected string not found in input"); 989 990 // Print the "scanning from here" line. If the current position is at the 991 // end of a line, advance to the start of the next line. 992 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 993 994 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 995 "scanning from here"); 996 997 // Allow the pattern to print additional information if desired. 998 Pat.PrintFailureInfo(SM, Buffer, VariableTable); 999 } 1000 1001 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 1002 StringRef Buffer, 1003 StringMap<StringRef> &VariableTable) { 1004 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable); 1005 } 1006 1007 /// CountNumNewlinesBetween - Count the number of newlines in the specified 1008 /// range. 1009 static unsigned CountNumNewlinesBetween(StringRef Range, 1010 const char *&FirstNewLine) { 1011 unsigned NumNewLines = 0; 1012 while (1) { 1013 // Scan for newline. 1014 Range = Range.substr(Range.find_first_of("\n\r")); 1015 if (Range.empty()) 1016 return NumNewLines; 1017 1018 ++NumNewLines; 1019 1020 // Handle \n\r and \r\n as a single newline. 1021 if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && 1022 (Range[0] != Range[1])) 1023 Range = Range.substr(1); 1024 Range = Range.substr(1); 1025 1026 if (NumNewLines == 1) 1027 FirstNewLine = Range.begin(); 1028 } 1029 } 1030 1031 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 1032 bool IsLabelScanMode, size_t &MatchLen, 1033 StringMap<StringRef> &VariableTable) const { 1034 size_t LastPos = 0; 1035 std::vector<const Pattern *> NotStrings; 1036 1037 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 1038 // bounds; we have not processed variable definitions within the bounded block 1039 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 1040 // over the block again (including the last CHECK-LABEL) in normal mode. 1041 if (!IsLabelScanMode) { 1042 // Match "dag strings" (with mixed "not strings" if any). 1043 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 1044 if (LastPos == StringRef::npos) 1045 return StringRef::npos; 1046 } 1047 1048 // Match itself from the last position after matching CHECK-DAG. 1049 StringRef MatchBuffer = Buffer.substr(LastPos); 1050 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1051 if (MatchPos == StringRef::npos) { 1052 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable); 1053 return StringRef::npos; 1054 } 1055 1056 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1057 // or CHECK-NOT 1058 if (!IsLabelScanMode) { 1059 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1060 1061 // If this check is a "CHECK-NEXT", verify that the previous match was on 1062 // the previous line (i.e. that there is one newline between them). 1063 if (CheckNext(SM, SkippedRegion)) 1064 return StringRef::npos; 1065 1066 // If this check is a "CHECK-SAME", verify that the previous match was on 1067 // the same line (i.e. that there is no newline between them). 1068 if (CheckSame(SM, SkippedRegion)) 1069 return StringRef::npos; 1070 1071 // If this match had "not strings", verify that they don't exist in the 1072 // skipped region. 1073 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1074 return StringRef::npos; 1075 } 1076 1077 return LastPos + MatchPos; 1078 } 1079 1080 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 1081 if (Pat.getCheckTy() != Check::CheckNext) 1082 return false; 1083 1084 // Count the number of newlines between the previous match and this one. 1085 assert(Buffer.data() != 1086 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1087 SMLoc::getFromPointer(Buffer.data()))) 1088 ->getBufferStart() && 1089 "CHECK-NEXT can't be the first check in a file"); 1090 1091 const char *FirstNewLine = nullptr; 1092 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1093 1094 if (NumNewLines == 0) { 1095 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1096 Prefix + "-NEXT: is on the same line as previous match"); 1097 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1098 "'next' match was here"); 1099 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1100 "previous match ended here"); 1101 return true; 1102 } 1103 1104 if (NumNewLines != 1) { 1105 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1106 Prefix + 1107 "-NEXT: is not on the line after the previous match"); 1108 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1109 "'next' match was here"); 1110 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1111 "previous match ended here"); 1112 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1113 "non-matching line after previous match is here"); 1114 return true; 1115 } 1116 1117 return false; 1118 } 1119 1120 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 1121 if (Pat.getCheckTy() != Check::CheckSame) 1122 return false; 1123 1124 // Count the number of newlines between the previous match and this one. 1125 assert(Buffer.data() != 1126 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1127 SMLoc::getFromPointer(Buffer.data()))) 1128 ->getBufferStart() && 1129 "CHECK-SAME can't be the first check in a file"); 1130 1131 const char *FirstNewLine = nullptr; 1132 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1133 1134 if (NumNewLines != 0) { 1135 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1136 Prefix + 1137 "-SAME: is not on the same line as the previous match"); 1138 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1139 "'next' match was here"); 1140 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1141 "previous match ended here"); 1142 return true; 1143 } 1144 1145 return false; 1146 } 1147 1148 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 1149 const std::vector<const Pattern *> &NotStrings, 1150 StringMap<StringRef> &VariableTable) const { 1151 for (const Pattern *Pat : NotStrings) { 1152 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 1153 1154 size_t MatchLen = 0; 1155 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1156 1157 if (Pos == StringRef::npos) 1158 continue; 1159 1160 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos), 1161 SourceMgr::DK_Error, Prefix + "-NOT: string occurred!"); 1162 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note, 1163 Prefix + "-NOT: pattern specified here"); 1164 return true; 1165 } 1166 1167 return false; 1168 } 1169 1170 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 1171 std::vector<const Pattern *> &NotStrings, 1172 StringMap<StringRef> &VariableTable) const { 1173 if (DagNotStrings.empty()) 1174 return 0; 1175 1176 size_t LastPos = 0; 1177 size_t StartPos = LastPos; 1178 1179 for (const Pattern &Pat : DagNotStrings) { 1180 assert((Pat.getCheckTy() == Check::CheckDAG || 1181 Pat.getCheckTy() == Check::CheckNot) && 1182 "Invalid CHECK-DAG or CHECK-NOT!"); 1183 1184 if (Pat.getCheckTy() == Check::CheckNot) { 1185 NotStrings.push_back(&Pat); 1186 continue; 1187 } 1188 1189 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 1190 1191 size_t MatchLen = 0, MatchPos; 1192 1193 // CHECK-DAG always matches from the start. 1194 StringRef MatchBuffer = Buffer.substr(StartPos); 1195 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1196 // With a group of CHECK-DAGs, a single mismatching means the match on 1197 // that group of CHECK-DAGs fails immediately. 1198 if (MatchPos == StringRef::npos) { 1199 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable); 1200 return StringRef::npos; 1201 } 1202 // Re-calc it as the offset relative to the start of the original string. 1203 MatchPos += StartPos; 1204 1205 if (!NotStrings.empty()) { 1206 if (MatchPos < LastPos) { 1207 // Reordered? 1208 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos), 1209 SourceMgr::DK_Error, 1210 Prefix + "-DAG: found a match of CHECK-DAG" 1211 " reordering across a CHECK-NOT"); 1212 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos), 1213 SourceMgr::DK_Note, 1214 Prefix + "-DAG: the farthest match of CHECK-DAG" 1215 " is found here"); 1216 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note, 1217 Prefix + "-NOT: the crossed pattern specified" 1218 " here"); 1219 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note, 1220 Prefix + "-DAG: the reordered pattern specified" 1221 " here"); 1222 return StringRef::npos; 1223 } 1224 // All subsequent CHECK-DAGs should be matched from the farthest 1225 // position of all precedent CHECK-DAGs (including this one.) 1226 StartPos = LastPos; 1227 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to 1228 // CHECK-DAG, verify that there's no 'not' strings occurred in that 1229 // region. 1230 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1231 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1232 return StringRef::npos; 1233 // Clear "not strings". 1234 NotStrings.clear(); 1235 } 1236 1237 // Update the last position with CHECK-DAG matches. 1238 LastPos = std::max(MatchPos + MatchLen, LastPos); 1239 } 1240 1241 return LastPos; 1242 } 1243 1244 // A check prefix must contain only alphanumeric, hyphens and underscores. 1245 static bool ValidateCheckPrefix(StringRef CheckPrefix) { 1246 Regex Validator("^[a-zA-Z0-9_-]*$"); 1247 return Validator.match(CheckPrefix); 1248 } 1249 1250 static bool ValidateCheckPrefixes() { 1251 StringSet<> PrefixSet; 1252 1253 for (StringRef Prefix : CheckPrefixes) { 1254 // Reject empty prefixes. 1255 if (Prefix == "") 1256 return false; 1257 1258 if (!PrefixSet.insert(Prefix).second) 1259 return false; 1260 1261 if (!ValidateCheckPrefix(Prefix)) 1262 return false; 1263 } 1264 1265 return true; 1266 } 1267 1268 // I don't think there's a way to specify an initial value for cl::list, 1269 // so if nothing was specified, add the default 1270 static void AddCheckPrefixIfNeeded() { 1271 if (CheckPrefixes.empty()) 1272 CheckPrefixes.push_back("CHECK"); 1273 } 1274 1275 static void DumpCommandLine(int argc, char **argv) { 1276 errs() << "FileCheck command line: "; 1277 for (int I = 0; I < argc; I++) 1278 errs() << " " << argv[I]; 1279 errs() << "\n"; 1280 } 1281 1282 /// Check the input to FileCheck provided in the \p Buffer against the \p 1283 /// CheckStrings read from the check file. 1284 /// 1285 /// Returns false if the input fails to satisfy the checks. 1286 bool CheckInput(SourceMgr &SM, StringRef Buffer, 1287 ArrayRef<CheckString> CheckStrings) { 1288 bool ChecksFailed = false; 1289 1290 /// VariableTable - This holds all the current filecheck variables. 1291 StringMap<StringRef> VariableTable; 1292 1293 unsigned i = 0, j = 0, e = CheckStrings.size(); 1294 while (true) { 1295 StringRef CheckRegion; 1296 if (j == e) { 1297 CheckRegion = Buffer; 1298 } else { 1299 const CheckString &CheckLabelStr = CheckStrings[j]; 1300 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 1301 ++j; 1302 continue; 1303 } 1304 1305 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 1306 size_t MatchLabelLen = 0; 1307 size_t MatchLabelPos = 1308 CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable); 1309 if (MatchLabelPos == StringRef::npos) 1310 // Immediately bail of CHECK-LABEL fails, nothing else we can do. 1311 return false; 1312 1313 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 1314 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 1315 ++j; 1316 } 1317 1318 for (; i != j; ++i) { 1319 const CheckString &CheckStr = CheckStrings[i]; 1320 1321 // Check each string within the scanned region, including a second check 1322 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 1323 size_t MatchLen = 0; 1324 size_t MatchPos = 1325 CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable); 1326 1327 if (MatchPos == StringRef::npos) { 1328 ChecksFailed = true; 1329 i = j; 1330 break; 1331 } 1332 1333 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 1334 } 1335 1336 if (j == e) 1337 break; 1338 } 1339 1340 // Success if no checks failed. 1341 return !ChecksFailed; 1342 } 1343 1344 int main(int argc, char **argv) { 1345 sys::PrintStackTraceOnErrorSignal(argv[0]); 1346 PrettyStackTraceProgram X(argc, argv); 1347 cl::ParseCommandLineOptions(argc, argv); 1348 1349 if (!ValidateCheckPrefixes()) { 1350 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 1351 "start with a letter and contain only alphanumeric characters, " 1352 "hyphens and underscores\n"; 1353 return 2; 1354 } 1355 1356 AddCheckPrefixIfNeeded(); 1357 1358 SourceMgr SM; 1359 1360 // Read the expected strings from the check file. 1361 ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr = 1362 MemoryBuffer::getFileOrSTDIN(CheckFilename); 1363 if (std::error_code EC = CheckFileOrErr.getError()) { 1364 errs() << "Could not open check file '" << CheckFilename 1365 << "': " << EC.message() << '\n'; 1366 return 2; 1367 } 1368 MemoryBuffer &CheckFile = *CheckFileOrErr.get(); 1369 1370 SmallString<4096> CheckFileBuffer; 1371 StringRef CheckFileText = 1372 CanonicalizeFile(CheckFile, NoCanonicalizeWhiteSpace, CheckFileBuffer); 1373 1374 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1375 CheckFileText, CheckFile.getBufferIdentifier()), 1376 SMLoc()); 1377 1378 std::vector<CheckString> CheckStrings; 1379 if (ReadCheckFile(SM, CheckFileText, CheckStrings)) 1380 return 2; 1381 1382 // Open the file to check and add it to SourceMgr. 1383 ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr = 1384 MemoryBuffer::getFileOrSTDIN(InputFilename); 1385 if (std::error_code EC = InputFileOrErr.getError()) { 1386 errs() << "Could not open input file '" << InputFilename 1387 << "': " << EC.message() << '\n'; 1388 return 2; 1389 } 1390 MemoryBuffer &InputFile = *InputFileOrErr.get(); 1391 1392 if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) { 1393 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 1394 DumpCommandLine(argc, argv); 1395 return 2; 1396 } 1397 1398 SmallString<4096> InputFileBuffer; 1399 StringRef InputFileText = 1400 CanonicalizeFile(InputFile, NoCanonicalizeWhiteSpace, InputFileBuffer); 1401 1402 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1403 InputFileText, InputFile.getBufferIdentifier()), 1404 SMLoc()); 1405 1406 return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1; 1407 } 1408