1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // FileCheck does a line-by line check of a file that validates whether it 11 // contains the expected content. This is useful for regression tests etc. 12 // 13 // This program exits with an exit status of 2 on error, exit status of 0 if 14 // the file matched the expected contents, and exit status of 1 if it did not 15 // contain the expected contents. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringExtras.h" 21 #include "llvm/ADT/StringMap.h" 22 #include "llvm/ADT/StringSet.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/InitLLVM.h" 25 #include "llvm/Support/MemoryBuffer.h" 26 #include "llvm/Support/Regex.h" 27 #include "llvm/Support/SourceMgr.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <algorithm> 30 #include <cctype> 31 #include <list> 32 #include <map> 33 #include <string> 34 #include <system_error> 35 #include <vector> 36 using namespace llvm; 37 38 static cl::opt<std::string> 39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 40 41 static cl::opt<std::string> 42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 43 cl::init("-"), cl::value_desc("filename")); 44 45 static cl::list<std::string> CheckPrefixes( 46 "check-prefix", 47 cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 48 static cl::alias CheckPrefixesAlias( 49 "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated, 50 cl::NotHidden, 51 cl::desc( 52 "Alias for -check-prefix permitting multiple comma separated values")); 53 54 static cl::opt<bool> NoCanonicalizeWhiteSpace( 55 "strict-whitespace", 56 cl::desc("Do not treat all horizontal whitespace as equivalent")); 57 58 static cl::list<std::string> ImplicitCheckNot( 59 "implicit-check-not", 60 cl::desc("Add an implicit negative check with this pattern to every\n" 61 "positive check. This can be used to ensure that no instances of\n" 62 "this pattern occur which are not matched by a positive pattern"), 63 cl::value_desc("pattern")); 64 65 static cl::list<std::string> GlobalDefines("D", cl::Prefix, 66 cl::desc("Define a variable to be used in capture patterns."), 67 cl::value_desc("VAR=VALUE")); 68 69 static cl::opt<bool> AllowEmptyInput( 70 "allow-empty", cl::init(false), 71 cl::desc("Allow the input file to be empty. This is useful when making\n" 72 "checks that some error message does not occur, for example.")); 73 74 static cl::opt<bool> MatchFullLines( 75 "match-full-lines", cl::init(false), 76 cl::desc("Require all positive matches to cover an entire input line.\n" 77 "Allows leading and trailing whitespace if --strict-whitespace\n" 78 "is not also passed.")); 79 80 static cl::opt<bool> EnableVarScope( 81 "enable-var-scope", cl::init(false), 82 cl::desc("Enables scope for regex variables. Variables with names that\n" 83 "do not start with '$' will be reset at the beginning of\n" 84 "each CHECK-LABEL block.")); 85 86 static cl::opt<bool> AllowDeprecatedDagOverlap( 87 "allow-deprecated-dag-overlap", cl::init(false), 88 cl::desc("Enable overlapping among matches in a group of consecutive\n" 89 "CHECK-DAG directives. This option is deprecated and is only\n" 90 "provided for convenience as old tests are migrated to the new\n" 91 "non-overlapping CHECK-DAG implementation.\n")); 92 93 typedef cl::list<std::string>::const_iterator prefix_iterator; 94 95 //===----------------------------------------------------------------------===// 96 // Pattern Handling Code. 97 //===----------------------------------------------------------------------===// 98 99 namespace Check { 100 enum CheckType { 101 CheckNone = 0, 102 CheckPlain, 103 CheckNext, 104 CheckSame, 105 CheckNot, 106 CheckDAG, 107 CheckLabel, 108 CheckEmpty, 109 110 /// Indicates the pattern only matches the end of file. This is used for 111 /// trailing CHECK-NOTs. 112 CheckEOF, 113 114 /// Marks when parsing found a -NOT check combined with another CHECK suffix. 115 CheckBadNot 116 }; 117 } 118 119 class Pattern { 120 SMLoc PatternLoc; 121 122 /// A fixed string to match as the pattern or empty if this pattern requires 123 /// a regex match. 124 StringRef FixedStr; 125 126 /// A regex string to match as the pattern or empty if this pattern requires 127 /// a fixed string to match. 128 std::string RegExStr; 129 130 /// Entries in this vector map to uses of a variable in the pattern, e.g. 131 /// "foo[[bar]]baz". In this case, the RegExStr will contain "foobaz" and 132 /// we'll get an entry in this vector that tells us to insert the value of 133 /// bar at offset 3. 134 std::vector<std::pair<StringRef, unsigned>> VariableUses; 135 136 /// Maps definitions of variables to their parenthesized capture numbers. 137 /// 138 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 139 /// 1. 140 std::map<StringRef, unsigned> VariableDefs; 141 142 Check::CheckType CheckTy; 143 144 /// Contains the number of line this pattern is in. 145 unsigned LineNumber; 146 147 public: 148 explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {} 149 150 /// Returns the location in source code. 151 SMLoc getLoc() const { return PatternLoc; } 152 153 bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, 154 unsigned LineNumber); 155 size_t Match(StringRef Buffer, size_t &MatchLen, 156 StringMap<StringRef> &VariableTable) const; 157 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 158 const StringMap<StringRef> &VariableTable) const; 159 160 bool hasVariable() const { 161 return !(VariableUses.empty() && VariableDefs.empty()); 162 } 163 164 Check::CheckType getCheckTy() const { return CheckTy; } 165 166 private: 167 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 168 void AddBackrefToRegEx(unsigned BackrefNum); 169 unsigned 170 ComputeMatchDistance(StringRef Buffer, 171 const StringMap<StringRef> &VariableTable) const; 172 bool EvaluateExpression(StringRef Expr, std::string &Value) const; 173 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 174 }; 175 176 /// Parses the given string into the Pattern. 177 /// 178 /// \p Prefix provides which prefix is being matched, \p SM provides the 179 /// SourceMgr used for error reports, and \p LineNumber is the line number in 180 /// the input file from which the pattern string was read. Returns true in 181 /// case of an error, false otherwise. 182 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix, 183 SourceMgr &SM, unsigned LineNumber) { 184 bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot; 185 186 this->LineNumber = LineNumber; 187 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 188 189 if (!(NoCanonicalizeWhiteSpace && MatchFullLines)) 190 // Ignore trailing whitespace. 191 while (!PatternStr.empty() && 192 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 193 PatternStr = PatternStr.substr(0, PatternStr.size() - 1); 194 195 // Check that there is something on the line. 196 if (PatternStr.empty() && CheckTy != Check::CheckEmpty) { 197 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 198 "found empty check string with prefix '" + Prefix + ":'"); 199 return true; 200 } 201 202 if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) { 203 SM.PrintMessage( 204 PatternLoc, SourceMgr::DK_Error, 205 "found non-empty check string for empty check with prefix '" + Prefix + 206 ":'"); 207 return true; 208 } 209 210 if (CheckTy == Check::CheckEmpty) { 211 RegExStr = "(\n$)"; 212 return false; 213 } 214 215 // Check to see if this is a fixed string, or if it has regex pieces. 216 if (!MatchFullLinesHere && 217 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && 218 PatternStr.find("[[") == StringRef::npos))) { 219 FixedStr = PatternStr; 220 return false; 221 } 222 223 if (MatchFullLinesHere) { 224 RegExStr += '^'; 225 if (!NoCanonicalizeWhiteSpace) 226 RegExStr += " *"; 227 } 228 229 // Paren value #0 is for the fully matched string. Any new parenthesized 230 // values add from there. 231 unsigned CurParen = 1; 232 233 // Otherwise, there is at least one regex piece. Build up the regex pattern 234 // by escaping scary characters in fixed strings, building up one big regex. 235 while (!PatternStr.empty()) { 236 // RegEx matches. 237 if (PatternStr.startswith("{{")) { 238 // This is the start of a regex match. Scan for the }}. 239 size_t End = PatternStr.find("}}"); 240 if (End == StringRef::npos) { 241 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 242 SourceMgr::DK_Error, 243 "found start of regex string with no end '}}'"); 244 return true; 245 } 246 247 // Enclose {{}} patterns in parens just like [[]] even though we're not 248 // capturing the result for any purpose. This is required in case the 249 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 250 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 251 RegExStr += '('; 252 ++CurParen; 253 254 if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM)) 255 return true; 256 RegExStr += ')'; 257 258 PatternStr = PatternStr.substr(End + 2); 259 continue; 260 } 261 262 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 263 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 264 // second form is [[foo]] which is a reference to foo. The variable name 265 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 266 // it. This is to catch some common errors. 267 if (PatternStr.startswith("[[")) { 268 // Find the closing bracket pair ending the match. End is going to be an 269 // offset relative to the beginning of the match string. 270 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 271 272 if (End == StringRef::npos) { 273 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 274 SourceMgr::DK_Error, 275 "invalid named regex reference, no ]] found"); 276 return true; 277 } 278 279 StringRef MatchStr = PatternStr.substr(2, End); 280 PatternStr = PatternStr.substr(End + 4); 281 282 // Get the regex name (e.g. "foo"). 283 size_t NameEnd = MatchStr.find(':'); 284 StringRef Name = MatchStr.substr(0, NameEnd); 285 286 if (Name.empty()) { 287 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 288 "invalid name in named regex: empty name"); 289 return true; 290 } 291 292 // Verify that the name/expression is well formed. FileCheck currently 293 // supports @LINE, @LINE+number, @LINE-number expressions. The check here 294 // is relaxed, more strict check is performed in \c EvaluateExpression. 295 bool IsExpression = false; 296 for (unsigned i = 0, e = Name.size(); i != e; ++i) { 297 if (i == 0) { 298 if (Name[i] == '$') // Global vars start with '$' 299 continue; 300 if (Name[i] == '@') { 301 if (NameEnd != StringRef::npos) { 302 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 303 SourceMgr::DK_Error, 304 "invalid name in named regex definition"); 305 return true; 306 } 307 IsExpression = true; 308 continue; 309 } 310 } 311 if (Name[i] != '_' && !isalnum(Name[i]) && 312 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 313 SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i), 314 SourceMgr::DK_Error, "invalid name in named regex"); 315 return true; 316 } 317 } 318 319 // Name can't start with a digit. 320 if (isdigit(static_cast<unsigned char>(Name[0]))) { 321 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 322 "invalid name in named regex"); 323 return true; 324 } 325 326 // Handle [[foo]]. 327 if (NameEnd == StringRef::npos) { 328 // Handle variables that were defined earlier on the same line by 329 // emitting a backreference. 330 if (VariableDefs.find(Name) != VariableDefs.end()) { 331 unsigned VarParenNum = VariableDefs[Name]; 332 if (VarParenNum < 1 || VarParenNum > 9) { 333 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 334 SourceMgr::DK_Error, 335 "Can't back-reference more than 9 variables"); 336 return true; 337 } 338 AddBackrefToRegEx(VarParenNum); 339 } else { 340 VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 341 } 342 continue; 343 } 344 345 // Handle [[foo:.*]]. 346 VariableDefs[Name] = CurParen; 347 RegExStr += '('; 348 ++CurParen; 349 350 if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM)) 351 return true; 352 353 RegExStr += ')'; 354 } 355 356 // Handle fixed string matches. 357 // Find the end, which is the start of the next regex. 358 size_t FixedMatchEnd = PatternStr.find("{{"); 359 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 360 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 361 PatternStr = PatternStr.substr(FixedMatchEnd); 362 } 363 364 if (MatchFullLinesHere) { 365 if (!NoCanonicalizeWhiteSpace) 366 RegExStr += " *"; 367 RegExStr += '$'; 368 } 369 370 return false; 371 } 372 373 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { 374 Regex R(RS); 375 std::string Error; 376 if (!R.isValid(Error)) { 377 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 378 "invalid regex: " + Error); 379 return true; 380 } 381 382 RegExStr += RS.str(); 383 CurParen += R.getNumMatches(); 384 return false; 385 } 386 387 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 388 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 389 std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum); 390 RegExStr += Backref; 391 } 392 393 /// Evaluates expression and stores the result to \p Value. 394 /// 395 /// Returns true on success and false when the expression has invalid syntax. 396 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 397 // The only supported expression is @LINE([\+-]\d+)? 398 if (!Expr.startswith("@LINE")) 399 return false; 400 Expr = Expr.substr(StringRef("@LINE").size()); 401 int Offset = 0; 402 if (!Expr.empty()) { 403 if (Expr[0] == '+') 404 Expr = Expr.substr(1); 405 else if (Expr[0] != '-') 406 return false; 407 if (Expr.getAsInteger(10, Offset)) 408 return false; 409 } 410 Value = llvm::itostr(LineNumber + Offset); 411 return true; 412 } 413 414 /// Matches the pattern string against the input buffer \p Buffer 415 /// 416 /// This returns the position that is matched or npos if there is no match. If 417 /// there is a match, the size of the matched string is returned in \p 418 /// MatchLen. 419 /// 420 /// The \p VariableTable StringMap provides the current values of filecheck 421 /// variables and is updated if this match defines new values. 422 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 423 StringMap<StringRef> &VariableTable) const { 424 // If this is the EOF pattern, match it immediately. 425 if (CheckTy == Check::CheckEOF) { 426 MatchLen = 0; 427 return Buffer.size(); 428 } 429 430 // If this is a fixed string pattern, just match it now. 431 if (!FixedStr.empty()) { 432 MatchLen = FixedStr.size(); 433 return Buffer.find(FixedStr); 434 } 435 436 // Regex match. 437 438 // If there are variable uses, we need to create a temporary string with the 439 // actual value. 440 StringRef RegExToMatch = RegExStr; 441 std::string TmpStr; 442 if (!VariableUses.empty()) { 443 TmpStr = RegExStr; 444 445 unsigned InsertOffset = 0; 446 for (const auto &VariableUse : VariableUses) { 447 std::string Value; 448 449 if (VariableUse.first[0] == '@') { 450 if (!EvaluateExpression(VariableUse.first, Value)) 451 return StringRef::npos; 452 } else { 453 StringMap<StringRef>::iterator it = 454 VariableTable.find(VariableUse.first); 455 // If the variable is undefined, return an error. 456 if (it == VariableTable.end()) 457 return StringRef::npos; 458 459 // Look up the value and escape it so that we can put it into the regex. 460 Value += Regex::escape(it->second); 461 } 462 463 // Plop it into the regex at the adjusted offset. 464 TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset, 465 Value.begin(), Value.end()); 466 InsertOffset += Value.size(); 467 } 468 469 // Match the newly constructed regex. 470 RegExToMatch = TmpStr; 471 } 472 473 SmallVector<StringRef, 4> MatchInfo; 474 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 475 return StringRef::npos; 476 477 // Successful regex match. 478 assert(!MatchInfo.empty() && "Didn't get any match"); 479 StringRef FullMatch = MatchInfo[0]; 480 481 // If this defines any variables, remember their values. 482 for (const auto &VariableDef : VariableDefs) { 483 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 484 VariableTable[VariableDef.first] = MatchInfo[VariableDef.second]; 485 } 486 487 MatchLen = FullMatch.size(); 488 return FullMatch.data() - Buffer.data(); 489 } 490 491 492 /// Computes an arbitrary estimate for the quality of matching this pattern at 493 /// the start of \p Buffer; a distance of zero should correspond to a perfect 494 /// match. 495 unsigned 496 Pattern::ComputeMatchDistance(StringRef Buffer, 497 const StringMap<StringRef> &VariableTable) const { 498 // Just compute the number of matching characters. For regular expressions, we 499 // just compare against the regex itself and hope for the best. 500 // 501 // FIXME: One easy improvement here is have the regex lib generate a single 502 // example regular expression which matches, and use that as the example 503 // string. 504 StringRef ExampleString(FixedStr); 505 if (ExampleString.empty()) 506 ExampleString = RegExStr; 507 508 // Only compare up to the first line in the buffer, or the string size. 509 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 510 BufferPrefix = BufferPrefix.split('\n').first; 511 return BufferPrefix.edit_distance(ExampleString); 512 } 513 514 /// Prints additional information about a failure to match involving this 515 /// pattern. 516 void Pattern::PrintFailureInfo( 517 const SourceMgr &SM, StringRef Buffer, 518 const StringMap<StringRef> &VariableTable) const { 519 // If this was a regular expression using variables, print the current 520 // variable values. 521 if (!VariableUses.empty()) { 522 for (const auto &VariableUse : VariableUses) { 523 SmallString<256> Msg; 524 raw_svector_ostream OS(Msg); 525 StringRef Var = VariableUse.first; 526 if (Var[0] == '@') { 527 std::string Value; 528 if (EvaluateExpression(Var, Value)) { 529 OS << "with expression \""; 530 OS.write_escaped(Var) << "\" equal to \""; 531 OS.write_escaped(Value) << "\""; 532 } else { 533 OS << "uses incorrect expression \""; 534 OS.write_escaped(Var) << "\""; 535 } 536 } else { 537 StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 538 539 // Check for undefined variable references. 540 if (it == VariableTable.end()) { 541 OS << "uses undefined variable \""; 542 OS.write_escaped(Var) << "\""; 543 } else { 544 OS << "with variable \""; 545 OS.write_escaped(Var) << "\" equal to \""; 546 OS.write_escaped(it->second) << "\""; 547 } 548 } 549 550 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 551 OS.str()); 552 } 553 } 554 555 // Attempt to find the closest/best fuzzy match. Usually an error happens 556 // because some string in the output didn't exactly match. In these cases, we 557 // would like to show the user a best guess at what "should have" matched, to 558 // save them having to actually check the input manually. 559 size_t NumLinesForward = 0; 560 size_t Best = StringRef::npos; 561 double BestQuality = 0; 562 563 // Use an arbitrary 4k limit on how far we will search. 564 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 565 if (Buffer[i] == '\n') 566 ++NumLinesForward; 567 568 // Patterns have leading whitespace stripped, so skip whitespace when 569 // looking for something which looks like a pattern. 570 if (Buffer[i] == ' ' || Buffer[i] == '\t') 571 continue; 572 573 // Compute the "quality" of this match as an arbitrary combination of the 574 // match distance and the number of lines skipped to get to this match. 575 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 576 double Quality = Distance + (NumLinesForward / 100.); 577 578 if (Quality < BestQuality || Best == StringRef::npos) { 579 Best = i; 580 BestQuality = Quality; 581 } 582 } 583 584 // Print the "possible intended match here" line if we found something 585 // reasonable and not equal to what we showed in the "scanning from here" 586 // line. 587 if (Best && Best != StringRef::npos && BestQuality < 50) { 588 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 589 SourceMgr::DK_Note, "possible intended match here"); 590 591 // FIXME: If we wanted to be really friendly we would show why the match 592 // failed, as it can be hard to spot simple one character differences. 593 } 594 } 595 596 /// Finds the closing sequence of a regex variable usage or definition. 597 /// 598 /// \p Str has to point in the beginning of the definition (right after the 599 /// opening sequence). Returns the offset of the closing sequence within Str, 600 /// or npos if it was not found. 601 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 602 // Offset keeps track of the current offset within the input Str 603 size_t Offset = 0; 604 // [...] Nesting depth 605 size_t BracketDepth = 0; 606 607 while (!Str.empty()) { 608 if (Str.startswith("]]") && BracketDepth == 0) 609 return Offset; 610 if (Str[0] == '\\') { 611 // Backslash escapes the next char within regexes, so skip them both. 612 Str = Str.substr(2); 613 Offset += 2; 614 } else { 615 switch (Str[0]) { 616 default: 617 break; 618 case '[': 619 BracketDepth++; 620 break; 621 case ']': 622 if (BracketDepth == 0) { 623 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 624 SourceMgr::DK_Error, 625 "missing closing \"]\" for regex variable"); 626 exit(1); 627 } 628 BracketDepth--; 629 break; 630 } 631 Str = Str.substr(1); 632 Offset++; 633 } 634 } 635 636 return StringRef::npos; 637 } 638 639 //===----------------------------------------------------------------------===// 640 // Check Strings. 641 //===----------------------------------------------------------------------===// 642 643 /// A check that we found in the input file. 644 struct CheckString { 645 /// The pattern to match. 646 Pattern Pat; 647 648 /// Which prefix name this check matched. 649 StringRef Prefix; 650 651 /// The location in the match file that the check string was specified. 652 SMLoc Loc; 653 654 /// All of the strings that are disallowed from occurring between this match 655 /// string and the previous one (or start of file). 656 std::vector<Pattern> DagNotStrings; 657 658 CheckString(const Pattern &P, StringRef S, SMLoc L) 659 : Pat(P), Prefix(S), Loc(L) {} 660 661 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 662 size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 663 664 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 665 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; 666 bool CheckNot(const SourceMgr &SM, StringRef Buffer, 667 const std::vector<const Pattern *> &NotStrings, 668 StringMap<StringRef> &VariableTable) const; 669 size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 670 std::vector<const Pattern *> &NotStrings, 671 StringMap<StringRef> &VariableTable) const; 672 }; 673 674 /// Canonicalize whitespaces in the file. Line endings are replaced with 675 /// UNIX-style '\n'. 676 static StringRef CanonicalizeFile(MemoryBuffer &MB, 677 SmallVectorImpl<char> &OutputBuffer) { 678 OutputBuffer.reserve(MB.getBufferSize()); 679 680 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); 681 Ptr != End; ++Ptr) { 682 // Eliminate trailing dosish \r. 683 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 684 continue; 685 } 686 687 // If current char is not a horizontal whitespace or if horizontal 688 // whitespace canonicalization is disabled, dump it to output as is. 689 if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) { 690 OutputBuffer.push_back(*Ptr); 691 continue; 692 } 693 694 // Otherwise, add one space and advance over neighboring space. 695 OutputBuffer.push_back(' '); 696 while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) 697 ++Ptr; 698 } 699 700 // Add a null byte and then return all but that byte. 701 OutputBuffer.push_back('\0'); 702 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); 703 } 704 705 static bool IsPartOfWord(char c) { 706 return (isalnum(c) || c == '-' || c == '_'); 707 } 708 709 // Get the size of the prefix extension. 710 static size_t CheckTypeSize(Check::CheckType Ty) { 711 switch (Ty) { 712 case Check::CheckNone: 713 case Check::CheckBadNot: 714 return 0; 715 716 case Check::CheckPlain: 717 return sizeof(":") - 1; 718 719 case Check::CheckNext: 720 return sizeof("-NEXT:") - 1; 721 722 case Check::CheckSame: 723 return sizeof("-SAME:") - 1; 724 725 case Check::CheckNot: 726 return sizeof("-NOT:") - 1; 727 728 case Check::CheckDAG: 729 return sizeof("-DAG:") - 1; 730 731 case Check::CheckLabel: 732 return sizeof("-LABEL:") - 1; 733 734 case Check::CheckEmpty: 735 return sizeof("-EMPTY:") - 1; 736 737 case Check::CheckEOF: 738 llvm_unreachable("Should not be using EOF size"); 739 } 740 741 llvm_unreachable("Bad check type"); 742 } 743 744 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 745 if (Buffer.size() <= Prefix.size()) 746 return Check::CheckNone; 747 748 char NextChar = Buffer[Prefix.size()]; 749 750 // Verify that the : is present after the prefix. 751 if (NextChar == ':') 752 return Check::CheckPlain; 753 754 if (NextChar != '-') 755 return Check::CheckNone; 756 757 StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 758 if (Rest.startswith("NEXT:")) 759 return Check::CheckNext; 760 761 if (Rest.startswith("SAME:")) 762 return Check::CheckSame; 763 764 if (Rest.startswith("NOT:")) 765 return Check::CheckNot; 766 767 if (Rest.startswith("DAG:")) 768 return Check::CheckDAG; 769 770 if (Rest.startswith("LABEL:")) 771 return Check::CheckLabel; 772 773 if (Rest.startswith("EMPTY:")) 774 return Check::CheckEmpty; 775 776 // You can't combine -NOT with another suffix. 777 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || 778 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || 779 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") || 780 Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:")) 781 return Check::CheckBadNot; 782 783 return Check::CheckNone; 784 } 785 786 // From the given position, find the next character after the word. 787 static size_t SkipWord(StringRef Str, size_t Loc) { 788 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 789 ++Loc; 790 return Loc; 791 } 792 793 /// Search the buffer for the first prefix in the prefix regular expression. 794 /// 795 /// This searches the buffer using the provided regular expression, however it 796 /// enforces constraints beyond that: 797 /// 1) The found prefix must not be a suffix of something that looks like 798 /// a valid prefix. 799 /// 2) The found prefix must be followed by a valid check type suffix using \c 800 /// FindCheckType above. 801 /// 802 /// The first match of the regular expression to satisfy these two is returned, 803 /// otherwise an empty StringRef is returned to indicate failure. 804 /// 805 /// If this routine returns a valid prefix, it will also shrink \p Buffer to 806 /// start at the beginning of the returned prefix, increment \p LineNumber for 807 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of 808 /// check found by examining the suffix. 809 /// 810 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy 811 /// is unspecified. 812 static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer, 813 unsigned &LineNumber, 814 Check::CheckType &CheckTy) { 815 SmallVector<StringRef, 2> Matches; 816 817 while (!Buffer.empty()) { 818 // Find the first (longest) match using the RE. 819 if (!PrefixRE.match(Buffer, &Matches)) 820 // No match at all, bail. 821 return StringRef(); 822 823 StringRef Prefix = Matches[0]; 824 Matches.clear(); 825 826 assert(Prefix.data() >= Buffer.data() && 827 Prefix.data() < Buffer.data() + Buffer.size() && 828 "Prefix doesn't start inside of buffer!"); 829 size_t Loc = Prefix.data() - Buffer.data(); 830 StringRef Skipped = Buffer.substr(0, Loc); 831 Buffer = Buffer.drop_front(Loc); 832 LineNumber += Skipped.count('\n'); 833 834 // Check that the matched prefix isn't a suffix of some other check-like 835 // word. 836 // FIXME: This is a very ad-hoc check. it would be better handled in some 837 // other way. Among other things it seems hard to distinguish between 838 // intentional and unintentional uses of this feature. 839 if (Skipped.empty() || !IsPartOfWord(Skipped.back())) { 840 // Now extract the type. 841 CheckTy = FindCheckType(Buffer, Prefix); 842 843 // If we've found a valid check type for this prefix, we're done. 844 if (CheckTy != Check::CheckNone) 845 return Prefix; 846 } 847 848 // If we didn't successfully find a prefix, we need to skip this invalid 849 // prefix and continue scanning. We directly skip the prefix that was 850 // matched and any additional parts of that check-like word. 851 Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size())); 852 } 853 854 // We ran out of buffer while skipping partial matches so give up. 855 return StringRef(); 856 } 857 858 /// Read the check file, which specifies the sequence of expected strings. 859 /// 860 /// The strings are added to the CheckStrings vector. Returns true in case of 861 /// an error, false otherwise. 862 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, 863 std::vector<CheckString> &CheckStrings) { 864 std::vector<Pattern> ImplicitNegativeChecks; 865 for (const auto &PatternString : ImplicitCheckNot) { 866 // Create a buffer with fake command line content in order to display the 867 // command line option responsible for the specific implicit CHECK-NOT. 868 std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str(); 869 std::string Suffix = "'"; 870 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 871 Prefix + PatternString + Suffix, "command line"); 872 873 StringRef PatternInBuffer = 874 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 875 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 876 877 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); 878 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, 879 "IMPLICIT-CHECK", SM, 0); 880 } 881 882 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 883 884 // LineNumber keeps track of the line on which CheckPrefix instances are 885 // found. 886 unsigned LineNumber = 1; 887 888 while (1) { 889 Check::CheckType CheckTy; 890 891 // See if a prefix occurs in the memory buffer. 892 StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber, 893 CheckTy); 894 if (UsedPrefix.empty()) 895 break; 896 assert(UsedPrefix.data() == Buffer.data() && 897 "Failed to move Buffer's start forward, or pointed prefix outside " 898 "of the buffer!"); 899 900 // Location to use for error messages. 901 const char *UsedPrefixStart = UsedPrefix.data(); 902 903 // Skip the buffer to the end. 904 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 905 906 // Complain about useful-looking but unsupported suffixes. 907 if (CheckTy == Check::CheckBadNot) { 908 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 909 "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 910 return true; 911 } 912 913 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 914 // leading whitespace. 915 if (!(NoCanonicalizeWhiteSpace && MatchFullLines)) 916 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 917 918 // Scan ahead to the end of line. 919 size_t EOL = Buffer.find_first_of("\n\r"); 920 921 // Remember the location of the start of the pattern, for diagnostics. 922 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 923 924 // Parse the pattern. 925 Pattern P(CheckTy); 926 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 927 return true; 928 929 // Verify that CHECK-LABEL lines do not define or use variables 930 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 931 SM.PrintMessage( 932 SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error, 933 "found '" + UsedPrefix + "-LABEL:'" 934 " with variable definition or use"); 935 return true; 936 } 937 938 Buffer = Buffer.substr(EOL); 939 940 // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them. 941 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame || 942 CheckTy == Check::CheckEmpty) && 943 CheckStrings.empty()) { 944 StringRef Type = CheckTy == Check::CheckNext 945 ? "NEXT" 946 : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME"; 947 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 948 SourceMgr::DK_Error, 949 "found '" + UsedPrefix + "-" + Type + 950 "' without previous '" + UsedPrefix + ": line"); 951 return true; 952 } 953 954 // Handle CHECK-DAG/-NOT. 955 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 956 DagNotMatches.push_back(P); 957 continue; 958 } 959 960 // Okay, add the string we captured to the output vector and move on. 961 CheckStrings.emplace_back(P, UsedPrefix, PatternLoc); 962 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 963 DagNotMatches = ImplicitNegativeChecks; 964 } 965 966 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 967 // prefix as a filler for the error message. 968 if (!DagNotMatches.empty()) { 969 CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(), 970 SMLoc::getFromPointer(Buffer.data())); 971 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 972 } 973 974 if (CheckStrings.empty()) { 975 errs() << "error: no check strings found with prefix" 976 << (CheckPrefixes.size() > 1 ? "es " : " "); 977 prefix_iterator I = CheckPrefixes.begin(); 978 prefix_iterator E = CheckPrefixes.end(); 979 if (I != E) { 980 errs() << "\'" << *I << ":'"; 981 ++I; 982 } 983 for (; I != E; ++I) 984 errs() << ", \'" << *I << ":'"; 985 986 errs() << '\n'; 987 return true; 988 } 989 990 return false; 991 } 992 993 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat, 994 StringRef Buffer, 995 StringMap<StringRef> &VariableTable) { 996 // Otherwise, we have an error, emit an error message. 997 SM.PrintMessage(Loc, SourceMgr::DK_Error, 998 "expected string not found in input"); 999 1000 // Print the "scanning from here" line. If the current position is at the 1001 // end of a line, advance to the start of the next line. 1002 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 1003 1004 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1005 "scanning from here"); 1006 1007 // Allow the pattern to print additional information if desired. 1008 Pat.PrintFailureInfo(SM, Buffer, VariableTable); 1009 } 1010 1011 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 1012 StringRef Buffer, 1013 StringMap<StringRef> &VariableTable) { 1014 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable); 1015 } 1016 1017 /// Count the number of newlines in the specified range. 1018 static unsigned CountNumNewlinesBetween(StringRef Range, 1019 const char *&FirstNewLine) { 1020 unsigned NumNewLines = 0; 1021 while (1) { 1022 // Scan for newline. 1023 Range = Range.substr(Range.find_first_of("\n\r")); 1024 if (Range.empty()) 1025 return NumNewLines; 1026 1027 ++NumNewLines; 1028 1029 // Handle \n\r and \r\n as a single newline. 1030 if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && 1031 (Range[0] != Range[1])) 1032 Range = Range.substr(1); 1033 Range = Range.substr(1); 1034 1035 if (NumNewLines == 1) 1036 FirstNewLine = Range.begin(); 1037 } 1038 } 1039 1040 /// Match check string and its "not strings" and/or "dag strings". 1041 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 1042 bool IsLabelScanMode, size_t &MatchLen, 1043 StringMap<StringRef> &VariableTable) const { 1044 size_t LastPos = 0; 1045 std::vector<const Pattern *> NotStrings; 1046 1047 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 1048 // bounds; we have not processed variable definitions within the bounded block 1049 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 1050 // over the block again (including the last CHECK-LABEL) in normal mode. 1051 if (!IsLabelScanMode) { 1052 // Match "dag strings" (with mixed "not strings" if any). 1053 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 1054 if (LastPos == StringRef::npos) 1055 return StringRef::npos; 1056 } 1057 1058 // Match itself from the last position after matching CHECK-DAG. 1059 StringRef MatchBuffer = Buffer.substr(LastPos); 1060 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1061 if (MatchPos == StringRef::npos) { 1062 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable); 1063 return StringRef::npos; 1064 } 1065 1066 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1067 // or CHECK-NOT 1068 if (!IsLabelScanMode) { 1069 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1070 1071 // If this check is a "CHECK-NEXT", verify that the previous match was on 1072 // the previous line (i.e. that there is one newline between them). 1073 if (CheckNext(SM, SkippedRegion)) 1074 return StringRef::npos; 1075 1076 // If this check is a "CHECK-SAME", verify that the previous match was on 1077 // the same line (i.e. that there is no newline between them). 1078 if (CheckSame(SM, SkippedRegion)) 1079 return StringRef::npos; 1080 1081 // If this match had "not strings", verify that they don't exist in the 1082 // skipped region. 1083 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1084 return StringRef::npos; 1085 } 1086 1087 return LastPos + MatchPos; 1088 } 1089 1090 /// Verify there is a single line in the given buffer. 1091 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 1092 if (Pat.getCheckTy() != Check::CheckNext && 1093 Pat.getCheckTy() != Check::CheckEmpty) 1094 return false; 1095 1096 Twine CheckName = 1097 Prefix + 1098 Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT"); 1099 1100 // Count the number of newlines between the previous match and this one. 1101 assert(Buffer.data() != 1102 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1103 SMLoc::getFromPointer(Buffer.data()))) 1104 ->getBufferStart() && 1105 "CHECK-NEXT and CHECK-EMPTY can't be the first check in a file"); 1106 1107 const char *FirstNewLine = nullptr; 1108 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1109 1110 // For CHECK-EMPTY, the preceding new line is consumed by the pattern, so 1111 // this needs to be re-added. 1112 if (Pat.getCheckTy() == Check::CheckEmpty) 1113 ++NumNewLines; 1114 1115 if (NumNewLines == 0) { 1116 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1117 CheckName + ": is on the same line as previous match"); 1118 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1119 "'next' match was here"); 1120 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1121 "previous match ended here"); 1122 return true; 1123 } 1124 1125 if (NumNewLines != 1) { 1126 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1127 CheckName + 1128 ": is not on the line after the previous match"); 1129 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1130 "'next' match was here"); 1131 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1132 "previous match ended here"); 1133 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1134 "non-matching line after previous match is here"); 1135 return true; 1136 } 1137 1138 return false; 1139 } 1140 1141 /// Verify there is no newline in the given buffer. 1142 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 1143 if (Pat.getCheckTy() != Check::CheckSame) 1144 return false; 1145 1146 // Count the number of newlines between the previous match and this one. 1147 assert(Buffer.data() != 1148 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1149 SMLoc::getFromPointer(Buffer.data()))) 1150 ->getBufferStart() && 1151 "CHECK-SAME can't be the first check in a file"); 1152 1153 const char *FirstNewLine = nullptr; 1154 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1155 1156 if (NumNewLines != 0) { 1157 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1158 Prefix + 1159 "-SAME: is not on the same line as the previous match"); 1160 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1161 "'next' match was here"); 1162 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1163 "previous match ended here"); 1164 return true; 1165 } 1166 1167 return false; 1168 } 1169 1170 /// Verify there's no "not strings" in the given buffer. 1171 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 1172 const std::vector<const Pattern *> &NotStrings, 1173 StringMap<StringRef> &VariableTable) const { 1174 for (const Pattern *Pat : NotStrings) { 1175 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 1176 1177 size_t MatchLen = 0; 1178 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1179 1180 if (Pos == StringRef::npos) 1181 continue; 1182 1183 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos), 1184 SourceMgr::DK_Error, Prefix + "-NOT: string occurred!"); 1185 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note, 1186 Prefix + "-NOT: pattern specified here"); 1187 return true; 1188 } 1189 1190 return false; 1191 } 1192 1193 /// Match "dag strings" and their mixed "not strings". 1194 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 1195 std::vector<const Pattern *> &NotStrings, 1196 StringMap<StringRef> &VariableTable) const { 1197 if (DagNotStrings.empty()) 1198 return 0; 1199 1200 size_t LastPos = 0; 1201 size_t StartPos = LastPos; 1202 1203 // A sorted list of ranges for non-overlapping dag matches. 1204 struct Match { 1205 size_t Pos; 1206 size_t End; 1207 }; 1208 std::list<Match> Matches; 1209 1210 for (const Pattern &Pat : DagNotStrings) { 1211 assert((Pat.getCheckTy() == Check::CheckDAG || 1212 Pat.getCheckTy() == Check::CheckNot) && 1213 "Invalid CHECK-DAG or CHECK-NOT!"); 1214 1215 if (Pat.getCheckTy() == Check::CheckNot) { 1216 NotStrings.push_back(&Pat); 1217 continue; 1218 } 1219 1220 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 1221 1222 // CHECK-DAG always matches from the start. 1223 size_t MatchLen = 0, MatchPos = StartPos; 1224 1225 // Search for a match that doesn't overlap a previous match in this 1226 // CHECK-DAG group. 1227 for (auto MI = Matches.begin(), ME = Matches.end(); true; ++MI) { 1228 StringRef MatchBuffer = Buffer.substr(MatchPos); 1229 size_t MatchPosBuf = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1230 // With a group of CHECK-DAGs, a single mismatching means the match on 1231 // that group of CHECK-DAGs fails immediately. 1232 if (MatchPosBuf == StringRef::npos) { 1233 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable); 1234 return StringRef::npos; 1235 } 1236 // Re-calc it as the offset relative to the start of the original string. 1237 MatchPos += MatchPosBuf; 1238 if (AllowDeprecatedDagOverlap) 1239 break; 1240 // Iterate previous matches until overlapping match or insertion point. 1241 Match M{MatchPos, MatchPos + MatchLen}; 1242 bool Overlap = false; 1243 for (; MI != ME; ++MI) { 1244 if (M.Pos < MI->End) { 1245 // !Overlap => New match has no overlap and is before this old match. 1246 // Overlap => New match overlaps this old match. 1247 Overlap = MI->Pos < M.End; 1248 break; 1249 } 1250 } 1251 if (!Overlap) { 1252 // Insert non-overlapping match into list. 1253 Matches.insert(MI, M); 1254 break; 1255 } 1256 MatchPos = MI->End; 1257 } 1258 1259 if (!NotStrings.empty()) { 1260 if (MatchPos < LastPos) { 1261 // Reordered? 1262 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos), 1263 SourceMgr::DK_Error, 1264 Prefix + "-DAG: found a match of CHECK-DAG" 1265 " reordering across a CHECK-NOT"); 1266 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos), 1267 SourceMgr::DK_Note, 1268 Prefix + "-DAG: the farthest match of CHECK-DAG" 1269 " is found here"); 1270 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note, 1271 Prefix + "-NOT: the crossed pattern specified" 1272 " here"); 1273 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note, 1274 Prefix + "-DAG: the reordered pattern specified" 1275 " here"); 1276 return StringRef::npos; 1277 } 1278 // All subsequent CHECK-DAGs should be matched from the farthest 1279 // position of all precedent CHECK-DAGs (not including this one). 1280 StartPos = LastPos; 1281 // Don't waste time checking for (impossible) overlaps before that. 1282 Matches.clear(); 1283 Matches.push_back(Match{MatchPos, MatchPos + MatchLen}); 1284 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to 1285 // CHECK-DAG, verify that there's no 'not' strings occurred in that 1286 // region. 1287 StringRef SkippedRegion = Buffer.slice(LastPos, MatchPos); 1288 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1289 return StringRef::npos; 1290 // Clear "not strings". 1291 NotStrings.clear(); 1292 } 1293 1294 // Update the last position with CHECK-DAG matches. 1295 LastPos = std::max(MatchPos + MatchLen, LastPos); 1296 } 1297 1298 return LastPos; 1299 } 1300 1301 // A check prefix must contain only alphanumeric, hyphens and underscores. 1302 static bool ValidateCheckPrefix(StringRef CheckPrefix) { 1303 Regex Validator("^[a-zA-Z0-9_-]*$"); 1304 return Validator.match(CheckPrefix); 1305 } 1306 1307 static bool ValidateCheckPrefixes() { 1308 StringSet<> PrefixSet; 1309 1310 for (StringRef Prefix : CheckPrefixes) { 1311 // Reject empty prefixes. 1312 if (Prefix == "") 1313 return false; 1314 1315 if (!PrefixSet.insert(Prefix).second) 1316 return false; 1317 1318 if (!ValidateCheckPrefix(Prefix)) 1319 return false; 1320 } 1321 1322 return true; 1323 } 1324 1325 // Combines the check prefixes into a single regex so that we can efficiently 1326 // scan for any of the set. 1327 // 1328 // The semantics are that the longest-match wins which matches our regex 1329 // library. 1330 static Regex buildCheckPrefixRegex() { 1331 // I don't think there's a way to specify an initial value for cl::list, 1332 // so if nothing was specified, add the default 1333 if (CheckPrefixes.empty()) 1334 CheckPrefixes.push_back("CHECK"); 1335 1336 // We already validated the contents of CheckPrefixes so just concatenate 1337 // them as alternatives. 1338 SmallString<32> PrefixRegexStr; 1339 for (StringRef Prefix : CheckPrefixes) { 1340 if (Prefix != CheckPrefixes.front()) 1341 PrefixRegexStr.push_back('|'); 1342 1343 PrefixRegexStr.append(Prefix); 1344 } 1345 1346 return Regex(PrefixRegexStr); 1347 } 1348 1349 static void DumpCommandLine(int argc, char **argv) { 1350 errs() << "FileCheck command line: "; 1351 for (int I = 0; I < argc; I++) 1352 errs() << " " << argv[I]; 1353 errs() << "\n"; 1354 } 1355 1356 // Remove local variables from \p VariableTable. Global variables 1357 // (start with '$') are preserved. 1358 static void ClearLocalVars(StringMap<StringRef> &VariableTable) { 1359 SmallVector<StringRef, 16> LocalVars; 1360 for (const auto &Var : VariableTable) 1361 if (Var.first()[0] != '$') 1362 LocalVars.push_back(Var.first()); 1363 1364 for (const auto &Var : LocalVars) 1365 VariableTable.erase(Var); 1366 } 1367 1368 /// Check the input to FileCheck provided in the \p Buffer against the \p 1369 /// CheckStrings read from the check file. 1370 /// 1371 /// Returns false if the input fails to satisfy the checks. 1372 bool CheckInput(SourceMgr &SM, StringRef Buffer, 1373 ArrayRef<CheckString> CheckStrings) { 1374 bool ChecksFailed = false; 1375 1376 /// VariableTable - This holds all the current filecheck variables. 1377 StringMap<StringRef> VariableTable; 1378 1379 for (const auto& Def : GlobalDefines) 1380 VariableTable.insert(StringRef(Def).split('=')); 1381 1382 unsigned i = 0, j = 0, e = CheckStrings.size(); 1383 while (true) { 1384 StringRef CheckRegion; 1385 if (j == e) { 1386 CheckRegion = Buffer; 1387 } else { 1388 const CheckString &CheckLabelStr = CheckStrings[j]; 1389 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 1390 ++j; 1391 continue; 1392 } 1393 1394 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 1395 size_t MatchLabelLen = 0; 1396 size_t MatchLabelPos = 1397 CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable); 1398 if (MatchLabelPos == StringRef::npos) 1399 // Immediately bail of CHECK-LABEL fails, nothing else we can do. 1400 return false; 1401 1402 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 1403 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 1404 ++j; 1405 } 1406 1407 if (EnableVarScope) 1408 ClearLocalVars(VariableTable); 1409 1410 for (; i != j; ++i) { 1411 const CheckString &CheckStr = CheckStrings[i]; 1412 1413 // Check each string within the scanned region, including a second check 1414 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 1415 size_t MatchLen = 0; 1416 size_t MatchPos = 1417 CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable); 1418 1419 if (MatchPos == StringRef::npos) { 1420 ChecksFailed = true; 1421 i = j; 1422 break; 1423 } 1424 1425 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 1426 } 1427 1428 if (j == e) 1429 break; 1430 } 1431 1432 // Success if no checks failed. 1433 return !ChecksFailed; 1434 } 1435 1436 int main(int argc, char **argv) { 1437 InitLLVM X(argc, argv); 1438 cl::ParseCommandLineOptions(argc, argv); 1439 1440 if (!ValidateCheckPrefixes()) { 1441 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 1442 "start with a letter and contain only alphanumeric characters, " 1443 "hyphens and underscores\n"; 1444 return 2; 1445 } 1446 1447 Regex PrefixRE = buildCheckPrefixRegex(); 1448 std::string REError; 1449 if (!PrefixRE.isValid(REError)) { 1450 errs() << "Unable to combine check-prefix strings into a prefix regular " 1451 "expression! This is likely a bug in FileCheck's verification of " 1452 "the check-prefix strings. Regular expression parsing failed " 1453 "with the following error: " 1454 << REError << "\n"; 1455 return 2; 1456 } 1457 1458 SourceMgr SM; 1459 1460 // Read the expected strings from the check file. 1461 ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr = 1462 MemoryBuffer::getFileOrSTDIN(CheckFilename); 1463 if (std::error_code EC = CheckFileOrErr.getError()) { 1464 errs() << "Could not open check file '" << CheckFilename 1465 << "': " << EC.message() << '\n'; 1466 return 2; 1467 } 1468 MemoryBuffer &CheckFile = *CheckFileOrErr.get(); 1469 1470 SmallString<4096> CheckFileBuffer; 1471 StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer); 1472 1473 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1474 CheckFileText, CheckFile.getBufferIdentifier()), 1475 SMLoc()); 1476 1477 std::vector<CheckString> CheckStrings; 1478 if (ReadCheckFile(SM, CheckFileText, PrefixRE, CheckStrings)) 1479 return 2; 1480 1481 // Open the file to check and add it to SourceMgr. 1482 ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr = 1483 MemoryBuffer::getFileOrSTDIN(InputFilename); 1484 if (std::error_code EC = InputFileOrErr.getError()) { 1485 errs() << "Could not open input file '" << InputFilename 1486 << "': " << EC.message() << '\n'; 1487 return 2; 1488 } 1489 MemoryBuffer &InputFile = *InputFileOrErr.get(); 1490 1491 if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) { 1492 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 1493 DumpCommandLine(argc, argv); 1494 return 2; 1495 } 1496 1497 SmallString<4096> InputFileBuffer; 1498 StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer); 1499 1500 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1501 InputFileText, InputFile.getBufferIdentifier()), 1502 SMLoc()); 1503 1504 return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1; 1505 } 1506