1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // FileCheck does a line-by line check of a file that validates whether it 11 // contains the expected content. This is useful for regression tests etc. 12 // 13 // This program exits with an exit status of 2 on error, exit status of 0 if 14 // the file matched the expected contents, and exit status of 1 if it did not 15 // contain the expected contents. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringExtras.h" 21 #include "llvm/ADT/StringMap.h" 22 #include "llvm/ADT/StringSet.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/InitLLVM.h" 25 #include "llvm/Support/MemoryBuffer.h" 26 #include "llvm/Support/Regex.h" 27 #include "llvm/Support/SourceMgr.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <algorithm> 30 #include <cctype> 31 #include <map> 32 #include <string> 33 #include <system_error> 34 #include <vector> 35 using namespace llvm; 36 37 static cl::opt<std::string> 38 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 39 40 static cl::opt<std::string> 41 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 42 cl::init("-"), cl::value_desc("filename")); 43 44 static cl::list<std::string> CheckPrefixes( 45 "check-prefix", 46 cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 47 static cl::alias CheckPrefixesAlias( 48 "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated, 49 cl::NotHidden, 50 cl::desc( 51 "Alias for -check-prefix permitting multiple comma separated values")); 52 53 static cl::opt<bool> NoCanonicalizeWhiteSpace( 54 "strict-whitespace", 55 cl::desc("Do not treat all horizontal whitespace as equivalent")); 56 57 static cl::list<std::string> ImplicitCheckNot( 58 "implicit-check-not", 59 cl::desc("Add an implicit negative check with this pattern to every\n" 60 "positive check. This can be used to ensure that no instances of\n" 61 "this pattern occur which are not matched by a positive pattern"), 62 cl::value_desc("pattern")); 63 64 static cl::list<std::string> GlobalDefines("D", cl::Prefix, 65 cl::desc("Define a variable to be used in capture patterns."), 66 cl::value_desc("VAR=VALUE")); 67 68 static cl::opt<bool> AllowEmptyInput( 69 "allow-empty", cl::init(false), 70 cl::desc("Allow the input file to be empty. This is useful when making\n" 71 "checks that some error message does not occur, for example.")); 72 73 static cl::opt<bool> MatchFullLines( 74 "match-full-lines", cl::init(false), 75 cl::desc("Require all positive matches to cover an entire input line.\n" 76 "Allows leading and trailing whitespace if --strict-whitespace\n" 77 "is not also passed.")); 78 79 static cl::opt<bool> EnableVarScope( 80 "enable-var-scope", cl::init(false), 81 cl::desc("Enables scope for regex variables. Variables with names that\n" 82 "do not start with '$' will be reset at the beginning of\n" 83 "each CHECK-LABEL block.")); 84 85 typedef cl::list<std::string>::const_iterator prefix_iterator; 86 87 //===----------------------------------------------------------------------===// 88 // Pattern Handling Code. 89 //===----------------------------------------------------------------------===// 90 91 namespace Check { 92 enum CheckType { 93 CheckNone = 0, 94 CheckPlain, 95 CheckNext, 96 CheckSame, 97 CheckNot, 98 CheckDAG, 99 CheckLabel, 100 CheckEmpty, 101 102 /// Indicates the pattern only matches the end of file. This is used for 103 /// trailing CHECK-NOTs. 104 CheckEOF, 105 106 /// Marks when parsing found a -NOT check combined with another CHECK suffix. 107 CheckBadNot 108 }; 109 } 110 111 class Pattern { 112 SMLoc PatternLoc; 113 114 /// A fixed string to match as the pattern or empty if this pattern requires 115 /// a regex match. 116 StringRef FixedStr; 117 118 /// A regex string to match as the pattern or empty if this pattern requires 119 /// a fixed string to match. 120 std::string RegExStr; 121 122 /// Entries in this vector map to uses of a variable in the pattern, e.g. 123 /// "foo[[bar]]baz". In this case, the RegExStr will contain "foobaz" and 124 /// we'll get an entry in this vector that tells us to insert the value of 125 /// bar at offset 3. 126 std::vector<std::pair<StringRef, unsigned>> VariableUses; 127 128 /// Maps definitions of variables to their parenthesized capture numbers. 129 /// 130 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 131 /// 1. 132 std::map<StringRef, unsigned> VariableDefs; 133 134 Check::CheckType CheckTy; 135 136 /// Contains the number of line this pattern is in. 137 unsigned LineNumber; 138 139 public: 140 explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {} 141 142 /// Returns the location in source code. 143 SMLoc getLoc() const { return PatternLoc; } 144 145 bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, 146 unsigned LineNumber); 147 size_t Match(StringRef Buffer, size_t &MatchLen, 148 StringMap<StringRef> &VariableTable) const; 149 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 150 const StringMap<StringRef> &VariableTable) const; 151 152 bool hasVariable() const { 153 return !(VariableUses.empty() && VariableDefs.empty()); 154 } 155 156 Check::CheckType getCheckTy() const { return CheckTy; } 157 158 private: 159 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 160 void AddBackrefToRegEx(unsigned BackrefNum); 161 unsigned 162 ComputeMatchDistance(StringRef Buffer, 163 const StringMap<StringRef> &VariableTable) const; 164 bool EvaluateExpression(StringRef Expr, std::string &Value) const; 165 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 166 }; 167 168 /// Parses the given string into the Pattern. 169 /// 170 /// \p Prefix provides which prefix is being matched, \p SM provides the 171 /// SourceMgr used for error reports, and \p LineNumber is the line number in 172 /// the input file from which the pattern string was read. Returns true in 173 /// case of an error, false otherwise. 174 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix, 175 SourceMgr &SM, unsigned LineNumber) { 176 bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot; 177 178 this->LineNumber = LineNumber; 179 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 180 181 if (!(NoCanonicalizeWhiteSpace && MatchFullLines)) 182 // Ignore trailing whitespace. 183 while (!PatternStr.empty() && 184 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 185 PatternStr = PatternStr.substr(0, PatternStr.size() - 1); 186 187 // Check that there is something on the line. 188 if (PatternStr.empty() && CheckTy != Check::CheckEmpty) { 189 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 190 "found empty check string with prefix '" + Prefix + ":'"); 191 return true; 192 } 193 194 if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) { 195 SM.PrintMessage( 196 PatternLoc, SourceMgr::DK_Error, 197 "found non-empty check string for empty check with prefix '" + Prefix + 198 ":'"); 199 return true; 200 } 201 202 if (CheckTy == Check::CheckEmpty) { 203 RegExStr = "(\n$)"; 204 return false; 205 } 206 207 // Check to see if this is a fixed string, or if it has regex pieces. 208 if (!MatchFullLinesHere && 209 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && 210 PatternStr.find("[[") == StringRef::npos))) { 211 FixedStr = PatternStr; 212 return false; 213 } 214 215 if (MatchFullLinesHere) { 216 RegExStr += '^'; 217 if (!NoCanonicalizeWhiteSpace) 218 RegExStr += " *"; 219 } 220 221 // Paren value #0 is for the fully matched string. Any new parenthesized 222 // values add from there. 223 unsigned CurParen = 1; 224 225 // Otherwise, there is at least one regex piece. Build up the regex pattern 226 // by escaping scary characters in fixed strings, building up one big regex. 227 while (!PatternStr.empty()) { 228 // RegEx matches. 229 if (PatternStr.startswith("{{")) { 230 // This is the start of a regex match. Scan for the }}. 231 size_t End = PatternStr.find("}}"); 232 if (End == StringRef::npos) { 233 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 234 SourceMgr::DK_Error, 235 "found start of regex string with no end '}}'"); 236 return true; 237 } 238 239 // Enclose {{}} patterns in parens just like [[]] even though we're not 240 // capturing the result for any purpose. This is required in case the 241 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 242 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 243 RegExStr += '('; 244 ++CurParen; 245 246 if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM)) 247 return true; 248 RegExStr += ')'; 249 250 PatternStr = PatternStr.substr(End + 2); 251 continue; 252 } 253 254 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 255 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 256 // second form is [[foo]] which is a reference to foo. The variable name 257 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 258 // it. This is to catch some common errors. 259 if (PatternStr.startswith("[[")) { 260 // Find the closing bracket pair ending the match. End is going to be an 261 // offset relative to the beginning of the match string. 262 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 263 264 if (End == StringRef::npos) { 265 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 266 SourceMgr::DK_Error, 267 "invalid named regex reference, no ]] found"); 268 return true; 269 } 270 271 StringRef MatchStr = PatternStr.substr(2, End); 272 PatternStr = PatternStr.substr(End + 4); 273 274 // Get the regex name (e.g. "foo"). 275 size_t NameEnd = MatchStr.find(':'); 276 StringRef Name = MatchStr.substr(0, NameEnd); 277 278 if (Name.empty()) { 279 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 280 "invalid name in named regex: empty name"); 281 return true; 282 } 283 284 // Verify that the name/expression is well formed. FileCheck currently 285 // supports @LINE, @LINE+number, @LINE-number expressions. The check here 286 // is relaxed, more strict check is performed in \c EvaluateExpression. 287 bool IsExpression = false; 288 for (unsigned i = 0, e = Name.size(); i != e; ++i) { 289 if (i == 0) { 290 if (Name[i] == '$') // Global vars start with '$' 291 continue; 292 if (Name[i] == '@') { 293 if (NameEnd != StringRef::npos) { 294 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 295 SourceMgr::DK_Error, 296 "invalid name in named regex definition"); 297 return true; 298 } 299 IsExpression = true; 300 continue; 301 } 302 } 303 if (Name[i] != '_' && !isalnum(Name[i]) && 304 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 305 SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i), 306 SourceMgr::DK_Error, "invalid name in named regex"); 307 return true; 308 } 309 } 310 311 // Name can't start with a digit. 312 if (isdigit(static_cast<unsigned char>(Name[0]))) { 313 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 314 "invalid name in named regex"); 315 return true; 316 } 317 318 // Handle [[foo]]. 319 if (NameEnd == StringRef::npos) { 320 // Handle variables that were defined earlier on the same line by 321 // emitting a backreference. 322 if (VariableDefs.find(Name) != VariableDefs.end()) { 323 unsigned VarParenNum = VariableDefs[Name]; 324 if (VarParenNum < 1 || VarParenNum > 9) { 325 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 326 SourceMgr::DK_Error, 327 "Can't back-reference more than 9 variables"); 328 return true; 329 } 330 AddBackrefToRegEx(VarParenNum); 331 } else { 332 VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 333 } 334 continue; 335 } 336 337 // Handle [[foo:.*]]. 338 VariableDefs[Name] = CurParen; 339 RegExStr += '('; 340 ++CurParen; 341 342 if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM)) 343 return true; 344 345 RegExStr += ')'; 346 } 347 348 // Handle fixed string matches. 349 // Find the end, which is the start of the next regex. 350 size_t FixedMatchEnd = PatternStr.find("{{"); 351 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 352 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 353 PatternStr = PatternStr.substr(FixedMatchEnd); 354 } 355 356 if (MatchFullLinesHere) { 357 if (!NoCanonicalizeWhiteSpace) 358 RegExStr += " *"; 359 RegExStr += '$'; 360 } 361 362 return false; 363 } 364 365 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { 366 Regex R(RS); 367 std::string Error; 368 if (!R.isValid(Error)) { 369 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 370 "invalid regex: " + Error); 371 return true; 372 } 373 374 RegExStr += RS.str(); 375 CurParen += R.getNumMatches(); 376 return false; 377 } 378 379 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 380 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 381 std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum); 382 RegExStr += Backref; 383 } 384 385 /// Evaluates expression and stores the result to \p Value. 386 /// 387 /// Returns true on success and false when the expression has invalid syntax. 388 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 389 // The only supported expression is @LINE([\+-]\d+)? 390 if (!Expr.startswith("@LINE")) 391 return false; 392 Expr = Expr.substr(StringRef("@LINE").size()); 393 int Offset = 0; 394 if (!Expr.empty()) { 395 if (Expr[0] == '+') 396 Expr = Expr.substr(1); 397 else if (Expr[0] != '-') 398 return false; 399 if (Expr.getAsInteger(10, Offset)) 400 return false; 401 } 402 Value = llvm::itostr(LineNumber + Offset); 403 return true; 404 } 405 406 /// Matches the pattern string against the input buffer \p Buffer 407 /// 408 /// This returns the position that is matched or npos if there is no match. If 409 /// there is a match, the size of the matched string is returned in \p 410 /// MatchLen. 411 /// 412 /// The \p VariableTable StringMap provides the current values of filecheck 413 /// variables and is updated if this match defines new values. 414 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 415 StringMap<StringRef> &VariableTable) const { 416 // If this is the EOF pattern, match it immediately. 417 if (CheckTy == Check::CheckEOF) { 418 MatchLen = 0; 419 return Buffer.size(); 420 } 421 422 // If this is a fixed string pattern, just match it now. 423 if (!FixedStr.empty()) { 424 MatchLen = FixedStr.size(); 425 return Buffer.find(FixedStr); 426 } 427 428 // Regex match. 429 430 // If there are variable uses, we need to create a temporary string with the 431 // actual value. 432 StringRef RegExToMatch = RegExStr; 433 std::string TmpStr; 434 if (!VariableUses.empty()) { 435 TmpStr = RegExStr; 436 437 unsigned InsertOffset = 0; 438 for (const auto &VariableUse : VariableUses) { 439 std::string Value; 440 441 if (VariableUse.first[0] == '@') { 442 if (!EvaluateExpression(VariableUse.first, Value)) 443 return StringRef::npos; 444 } else { 445 StringMap<StringRef>::iterator it = 446 VariableTable.find(VariableUse.first); 447 // If the variable is undefined, return an error. 448 if (it == VariableTable.end()) 449 return StringRef::npos; 450 451 // Look up the value and escape it so that we can put it into the regex. 452 Value += Regex::escape(it->second); 453 } 454 455 // Plop it into the regex at the adjusted offset. 456 TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset, 457 Value.begin(), Value.end()); 458 InsertOffset += Value.size(); 459 } 460 461 // Match the newly constructed regex. 462 RegExToMatch = TmpStr; 463 } 464 465 SmallVector<StringRef, 4> MatchInfo; 466 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 467 return StringRef::npos; 468 469 // Successful regex match. 470 assert(!MatchInfo.empty() && "Didn't get any match"); 471 StringRef FullMatch = MatchInfo[0]; 472 473 // If this defines any variables, remember their values. 474 for (const auto &VariableDef : VariableDefs) { 475 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 476 VariableTable[VariableDef.first] = MatchInfo[VariableDef.second]; 477 } 478 479 MatchLen = FullMatch.size(); 480 return FullMatch.data() - Buffer.data(); 481 } 482 483 484 /// Computes an arbitrary estimate for the quality of matching this pattern at 485 /// the start of \p Buffer; a distance of zero should correspond to a perfect 486 /// match. 487 unsigned 488 Pattern::ComputeMatchDistance(StringRef Buffer, 489 const StringMap<StringRef> &VariableTable) const { 490 // Just compute the number of matching characters. For regular expressions, we 491 // just compare against the regex itself and hope for the best. 492 // 493 // FIXME: One easy improvement here is have the regex lib generate a single 494 // example regular expression which matches, and use that as the example 495 // string. 496 StringRef ExampleString(FixedStr); 497 if (ExampleString.empty()) 498 ExampleString = RegExStr; 499 500 // Only compare up to the first line in the buffer, or the string size. 501 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 502 BufferPrefix = BufferPrefix.split('\n').first; 503 return BufferPrefix.edit_distance(ExampleString); 504 } 505 506 /// Prints additional information about a failure to match involving this 507 /// pattern. 508 void Pattern::PrintFailureInfo( 509 const SourceMgr &SM, StringRef Buffer, 510 const StringMap<StringRef> &VariableTable) const { 511 // If this was a regular expression using variables, print the current 512 // variable values. 513 if (!VariableUses.empty()) { 514 for (const auto &VariableUse : VariableUses) { 515 SmallString<256> Msg; 516 raw_svector_ostream OS(Msg); 517 StringRef Var = VariableUse.first; 518 if (Var[0] == '@') { 519 std::string Value; 520 if (EvaluateExpression(Var, Value)) { 521 OS << "with expression \""; 522 OS.write_escaped(Var) << "\" equal to \""; 523 OS.write_escaped(Value) << "\""; 524 } else { 525 OS << "uses incorrect expression \""; 526 OS.write_escaped(Var) << "\""; 527 } 528 } else { 529 StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 530 531 // Check for undefined variable references. 532 if (it == VariableTable.end()) { 533 OS << "uses undefined variable \""; 534 OS.write_escaped(Var) << "\""; 535 } else { 536 OS << "with variable \""; 537 OS.write_escaped(Var) << "\" equal to \""; 538 OS.write_escaped(it->second) << "\""; 539 } 540 } 541 542 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 543 OS.str()); 544 } 545 } 546 547 // Attempt to find the closest/best fuzzy match. Usually an error happens 548 // because some string in the output didn't exactly match. In these cases, we 549 // would like to show the user a best guess at what "should have" matched, to 550 // save them having to actually check the input manually. 551 size_t NumLinesForward = 0; 552 size_t Best = StringRef::npos; 553 double BestQuality = 0; 554 555 // Use an arbitrary 4k limit on how far we will search. 556 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 557 if (Buffer[i] == '\n') 558 ++NumLinesForward; 559 560 // Patterns have leading whitespace stripped, so skip whitespace when 561 // looking for something which looks like a pattern. 562 if (Buffer[i] == ' ' || Buffer[i] == '\t') 563 continue; 564 565 // Compute the "quality" of this match as an arbitrary combination of the 566 // match distance and the number of lines skipped to get to this match. 567 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 568 double Quality = Distance + (NumLinesForward / 100.); 569 570 if (Quality < BestQuality || Best == StringRef::npos) { 571 Best = i; 572 BestQuality = Quality; 573 } 574 } 575 576 // Print the "possible intended match here" line if we found something 577 // reasonable and not equal to what we showed in the "scanning from here" 578 // line. 579 if (Best && Best != StringRef::npos && BestQuality < 50) { 580 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 581 SourceMgr::DK_Note, "possible intended match here"); 582 583 // FIXME: If we wanted to be really friendly we would show why the match 584 // failed, as it can be hard to spot simple one character differences. 585 } 586 } 587 588 /// Finds the closing sequence of a regex variable usage or definition. 589 /// 590 /// \p Str has to point in the beginning of the definition (right after the 591 /// opening sequence). Returns the offset of the closing sequence within Str, 592 /// or npos if it was not found. 593 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 594 // Offset keeps track of the current offset within the input Str 595 size_t Offset = 0; 596 // [...] Nesting depth 597 size_t BracketDepth = 0; 598 599 while (!Str.empty()) { 600 if (Str.startswith("]]") && BracketDepth == 0) 601 return Offset; 602 if (Str[0] == '\\') { 603 // Backslash escapes the next char within regexes, so skip them both. 604 Str = Str.substr(2); 605 Offset += 2; 606 } else { 607 switch (Str[0]) { 608 default: 609 break; 610 case '[': 611 BracketDepth++; 612 break; 613 case ']': 614 if (BracketDepth == 0) { 615 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 616 SourceMgr::DK_Error, 617 "missing closing \"]\" for regex variable"); 618 exit(1); 619 } 620 BracketDepth--; 621 break; 622 } 623 Str = Str.substr(1); 624 Offset++; 625 } 626 } 627 628 return StringRef::npos; 629 } 630 631 //===----------------------------------------------------------------------===// 632 // Check Strings. 633 //===----------------------------------------------------------------------===// 634 635 /// A check that we found in the input file. 636 struct CheckString { 637 /// The pattern to match. 638 Pattern Pat; 639 640 /// Which prefix name this check matched. 641 StringRef Prefix; 642 643 /// The location in the match file that the check string was specified. 644 SMLoc Loc; 645 646 /// All of the strings that are disallowed from occurring between this match 647 /// string and the previous one (or start of file). 648 std::vector<Pattern> DagNotStrings; 649 650 CheckString(const Pattern &P, StringRef S, SMLoc L) 651 : Pat(P), Prefix(S), Loc(L) {} 652 653 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 654 size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 655 656 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 657 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; 658 bool CheckNot(const SourceMgr &SM, StringRef Buffer, 659 const std::vector<const Pattern *> &NotStrings, 660 StringMap<StringRef> &VariableTable) const; 661 size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 662 std::vector<const Pattern *> &NotStrings, 663 StringMap<StringRef> &VariableTable) const; 664 }; 665 666 /// Canonicalize whitespaces in the file. Line endings are replaced with 667 /// UNIX-style '\n'. 668 static StringRef CanonicalizeFile(MemoryBuffer &MB, 669 SmallVectorImpl<char> &OutputBuffer) { 670 OutputBuffer.reserve(MB.getBufferSize()); 671 672 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); 673 Ptr != End; ++Ptr) { 674 // Eliminate trailing dosish \r. 675 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 676 continue; 677 } 678 679 // If current char is not a horizontal whitespace or if horizontal 680 // whitespace canonicalization is disabled, dump it to output as is. 681 if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) { 682 OutputBuffer.push_back(*Ptr); 683 continue; 684 } 685 686 // Otherwise, add one space and advance over neighboring space. 687 OutputBuffer.push_back(' '); 688 while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) 689 ++Ptr; 690 } 691 692 // Add a null byte and then return all but that byte. 693 OutputBuffer.push_back('\0'); 694 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); 695 } 696 697 static bool IsPartOfWord(char c) { 698 return (isalnum(c) || c == '-' || c == '_'); 699 } 700 701 // Get the size of the prefix extension. 702 static size_t CheckTypeSize(Check::CheckType Ty) { 703 switch (Ty) { 704 case Check::CheckNone: 705 case Check::CheckBadNot: 706 return 0; 707 708 case Check::CheckPlain: 709 return sizeof(":") - 1; 710 711 case Check::CheckNext: 712 return sizeof("-NEXT:") - 1; 713 714 case Check::CheckSame: 715 return sizeof("-SAME:") - 1; 716 717 case Check::CheckNot: 718 return sizeof("-NOT:") - 1; 719 720 case Check::CheckDAG: 721 return sizeof("-DAG:") - 1; 722 723 case Check::CheckLabel: 724 return sizeof("-LABEL:") - 1; 725 726 case Check::CheckEmpty: 727 return sizeof("-EMPTY:") - 1; 728 729 case Check::CheckEOF: 730 llvm_unreachable("Should not be using EOF size"); 731 } 732 733 llvm_unreachable("Bad check type"); 734 } 735 736 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 737 if (Buffer.size() <= Prefix.size()) 738 return Check::CheckNone; 739 740 char NextChar = Buffer[Prefix.size()]; 741 742 // Verify that the : is present after the prefix. 743 if (NextChar == ':') 744 return Check::CheckPlain; 745 746 if (NextChar != '-') 747 return Check::CheckNone; 748 749 StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 750 if (Rest.startswith("NEXT:")) 751 return Check::CheckNext; 752 753 if (Rest.startswith("SAME:")) 754 return Check::CheckSame; 755 756 if (Rest.startswith("NOT:")) 757 return Check::CheckNot; 758 759 if (Rest.startswith("DAG:")) 760 return Check::CheckDAG; 761 762 if (Rest.startswith("LABEL:")) 763 return Check::CheckLabel; 764 765 if (Rest.startswith("EMPTY:")) 766 return Check::CheckEmpty; 767 768 // You can't combine -NOT with another suffix. 769 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || 770 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || 771 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") || 772 Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:")) 773 return Check::CheckBadNot; 774 775 return Check::CheckNone; 776 } 777 778 // From the given position, find the next character after the word. 779 static size_t SkipWord(StringRef Str, size_t Loc) { 780 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 781 ++Loc; 782 return Loc; 783 } 784 785 /// Search the buffer for the first prefix in the prefix regular expression. 786 /// 787 /// This searches the buffer using the provided regular expression, however it 788 /// enforces constraints beyond that: 789 /// 1) The found prefix must not be a suffix of something that looks like 790 /// a valid prefix. 791 /// 2) The found prefix must be followed by a valid check type suffix using \c 792 /// FindCheckType above. 793 /// 794 /// The first match of the regular expression to satisfy these two is returned, 795 /// otherwise an empty StringRef is returned to indicate failure. 796 /// 797 /// If this routine returns a valid prefix, it will also shrink \p Buffer to 798 /// start at the beginning of the returned prefix, increment \p LineNumber for 799 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of 800 /// check found by examining the suffix. 801 /// 802 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy 803 /// is unspecified. 804 static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer, 805 unsigned &LineNumber, 806 Check::CheckType &CheckTy) { 807 SmallVector<StringRef, 2> Matches; 808 809 while (!Buffer.empty()) { 810 // Find the first (longest) match using the RE. 811 if (!PrefixRE.match(Buffer, &Matches)) 812 // No match at all, bail. 813 return StringRef(); 814 815 StringRef Prefix = Matches[0]; 816 Matches.clear(); 817 818 assert(Prefix.data() >= Buffer.data() && 819 Prefix.data() < Buffer.data() + Buffer.size() && 820 "Prefix doesn't start inside of buffer!"); 821 size_t Loc = Prefix.data() - Buffer.data(); 822 StringRef Skipped = Buffer.substr(0, Loc); 823 Buffer = Buffer.drop_front(Loc); 824 LineNumber += Skipped.count('\n'); 825 826 // Check that the matched prefix isn't a suffix of some other check-like 827 // word. 828 // FIXME: This is a very ad-hoc check. it would be better handled in some 829 // other way. Among other things it seems hard to distinguish between 830 // intentional and unintentional uses of this feature. 831 if (Skipped.empty() || !IsPartOfWord(Skipped.back())) { 832 // Now extract the type. 833 CheckTy = FindCheckType(Buffer, Prefix); 834 835 // If we've found a valid check type for this prefix, we're done. 836 if (CheckTy != Check::CheckNone) 837 return Prefix; 838 } 839 840 // If we didn't successfully find a prefix, we need to skip this invalid 841 // prefix and continue scanning. We directly skip the prefix that was 842 // matched and any additional parts of that check-like word. 843 Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size())); 844 } 845 846 // We ran out of buffer while skipping partial matches so give up. 847 return StringRef(); 848 } 849 850 /// Read the check file, which specifies the sequence of expected strings. 851 /// 852 /// The strings are added to the CheckStrings vector. Returns true in case of 853 /// an error, false otherwise. 854 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, 855 std::vector<CheckString> &CheckStrings) { 856 std::vector<Pattern> ImplicitNegativeChecks; 857 for (const auto &PatternString : ImplicitCheckNot) { 858 // Create a buffer with fake command line content in order to display the 859 // command line option responsible for the specific implicit CHECK-NOT. 860 std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str(); 861 std::string Suffix = "'"; 862 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 863 Prefix + PatternString + Suffix, "command line"); 864 865 StringRef PatternInBuffer = 866 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 867 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 868 869 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); 870 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, 871 "IMPLICIT-CHECK", SM, 0); 872 } 873 874 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 875 876 // LineNumber keeps track of the line on which CheckPrefix instances are 877 // found. 878 unsigned LineNumber = 1; 879 880 while (1) { 881 Check::CheckType CheckTy; 882 883 // See if a prefix occurs in the memory buffer. 884 StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber, 885 CheckTy); 886 if (UsedPrefix.empty()) 887 break; 888 assert(UsedPrefix.data() == Buffer.data() && 889 "Failed to move Buffer's start forward, or pointed prefix outside " 890 "of the buffer!"); 891 892 // Location to use for error messages. 893 const char *UsedPrefixStart = UsedPrefix.data(); 894 895 // Skip the buffer to the end. 896 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 897 898 // Complain about useful-looking but unsupported suffixes. 899 if (CheckTy == Check::CheckBadNot) { 900 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 901 "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 902 return true; 903 } 904 905 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 906 // leading whitespace. 907 if (!(NoCanonicalizeWhiteSpace && MatchFullLines)) 908 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 909 910 // Scan ahead to the end of line. 911 size_t EOL = Buffer.find_first_of("\n\r"); 912 913 // Remember the location of the start of the pattern, for diagnostics. 914 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 915 916 // Parse the pattern. 917 Pattern P(CheckTy); 918 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 919 return true; 920 921 // Verify that CHECK-LABEL lines do not define or use variables 922 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 923 SM.PrintMessage( 924 SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error, 925 "found '" + UsedPrefix + "-LABEL:'" 926 " with variable definition or use"); 927 return true; 928 } 929 930 Buffer = Buffer.substr(EOL); 931 932 // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them. 933 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame || 934 CheckTy == Check::CheckEmpty) && 935 CheckStrings.empty()) { 936 StringRef Type = CheckTy == Check::CheckNext 937 ? "NEXT" 938 : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME"; 939 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 940 SourceMgr::DK_Error, 941 "found '" + UsedPrefix + "-" + Type + 942 "' without previous '" + UsedPrefix + ": line"); 943 return true; 944 } 945 946 // Handle CHECK-DAG/-NOT. 947 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 948 DagNotMatches.push_back(P); 949 continue; 950 } 951 952 // Okay, add the string we captured to the output vector and move on. 953 CheckStrings.emplace_back(P, UsedPrefix, PatternLoc); 954 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 955 DagNotMatches = ImplicitNegativeChecks; 956 } 957 958 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 959 // prefix as a filler for the error message. 960 if (!DagNotMatches.empty()) { 961 CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(), 962 SMLoc::getFromPointer(Buffer.data())); 963 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 964 } 965 966 if (CheckStrings.empty()) { 967 errs() << "error: no check strings found with prefix" 968 << (CheckPrefixes.size() > 1 ? "es " : " "); 969 prefix_iterator I = CheckPrefixes.begin(); 970 prefix_iterator E = CheckPrefixes.end(); 971 if (I != E) { 972 errs() << "\'" << *I << ":'"; 973 ++I; 974 } 975 for (; I != E; ++I) 976 errs() << ", \'" << *I << ":'"; 977 978 errs() << '\n'; 979 return true; 980 } 981 982 return false; 983 } 984 985 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat, 986 StringRef Buffer, 987 StringMap<StringRef> &VariableTable) { 988 // Otherwise, we have an error, emit an error message. 989 SM.PrintMessage(Loc, SourceMgr::DK_Error, 990 "expected string not found in input"); 991 992 // Print the "scanning from here" line. If the current position is at the 993 // end of a line, advance to the start of the next line. 994 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 995 996 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 997 "scanning from here"); 998 999 // Allow the pattern to print additional information if desired. 1000 Pat.PrintFailureInfo(SM, Buffer, VariableTable); 1001 } 1002 1003 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 1004 StringRef Buffer, 1005 StringMap<StringRef> &VariableTable) { 1006 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable); 1007 } 1008 1009 /// Count the number of newlines in the specified range. 1010 static unsigned CountNumNewlinesBetween(StringRef Range, 1011 const char *&FirstNewLine) { 1012 unsigned NumNewLines = 0; 1013 while (1) { 1014 // Scan for newline. 1015 Range = Range.substr(Range.find_first_of("\n\r")); 1016 if (Range.empty()) 1017 return NumNewLines; 1018 1019 ++NumNewLines; 1020 1021 // Handle \n\r and \r\n as a single newline. 1022 if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && 1023 (Range[0] != Range[1])) 1024 Range = Range.substr(1); 1025 Range = Range.substr(1); 1026 1027 if (NumNewLines == 1) 1028 FirstNewLine = Range.begin(); 1029 } 1030 } 1031 1032 /// Match check string and its "not strings" and/or "dag strings". 1033 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 1034 bool IsLabelScanMode, size_t &MatchLen, 1035 StringMap<StringRef> &VariableTable) const { 1036 size_t LastPos = 0; 1037 std::vector<const Pattern *> NotStrings; 1038 1039 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 1040 // bounds; we have not processed variable definitions within the bounded block 1041 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 1042 // over the block again (including the last CHECK-LABEL) in normal mode. 1043 if (!IsLabelScanMode) { 1044 // Match "dag strings" (with mixed "not strings" if any). 1045 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 1046 if (LastPos == StringRef::npos) 1047 return StringRef::npos; 1048 } 1049 1050 // Match itself from the last position after matching CHECK-DAG. 1051 StringRef MatchBuffer = Buffer.substr(LastPos); 1052 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1053 if (MatchPos == StringRef::npos) { 1054 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable); 1055 return StringRef::npos; 1056 } 1057 1058 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1059 // or CHECK-NOT 1060 if (!IsLabelScanMode) { 1061 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1062 1063 // If this check is a "CHECK-NEXT", verify that the previous match was on 1064 // the previous line (i.e. that there is one newline between them). 1065 if (CheckNext(SM, SkippedRegion)) 1066 return StringRef::npos; 1067 1068 // If this check is a "CHECK-SAME", verify that the previous match was on 1069 // the same line (i.e. that there is no newline between them). 1070 if (CheckSame(SM, SkippedRegion)) 1071 return StringRef::npos; 1072 1073 // If this match had "not strings", verify that they don't exist in the 1074 // skipped region. 1075 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1076 return StringRef::npos; 1077 } 1078 1079 return LastPos + MatchPos; 1080 } 1081 1082 /// Verify there is a single line in the given buffer. 1083 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 1084 if (Pat.getCheckTy() != Check::CheckNext && 1085 Pat.getCheckTy() != Check::CheckEmpty) 1086 return false; 1087 1088 Twine CheckName = 1089 Prefix + 1090 Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT"); 1091 1092 // Count the number of newlines between the previous match and this one. 1093 assert(Buffer.data() != 1094 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1095 SMLoc::getFromPointer(Buffer.data()))) 1096 ->getBufferStart() && 1097 "CHECK-NEXT and CHECK-EMPTY can't be the first check in a file"); 1098 1099 const char *FirstNewLine = nullptr; 1100 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1101 1102 // For CHECK-EMPTY, the preceding new line is consumed by the pattern, so 1103 // this needs to be re-added. 1104 if (Pat.getCheckTy() == Check::CheckEmpty) 1105 ++NumNewLines; 1106 1107 if (NumNewLines == 0) { 1108 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1109 CheckName + ": is on the same line as previous match"); 1110 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1111 "'next' match was here"); 1112 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1113 "previous match ended here"); 1114 return true; 1115 } 1116 1117 if (NumNewLines != 1) { 1118 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1119 CheckName + 1120 ": is not on the line after the previous match"); 1121 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1122 "'next' match was here"); 1123 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1124 "previous match ended here"); 1125 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1126 "non-matching line after previous match is here"); 1127 return true; 1128 } 1129 1130 return false; 1131 } 1132 1133 /// Verify there is no newline in the given buffer. 1134 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 1135 if (Pat.getCheckTy() != Check::CheckSame) 1136 return false; 1137 1138 // Count the number of newlines between the previous match and this one. 1139 assert(Buffer.data() != 1140 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1141 SMLoc::getFromPointer(Buffer.data()))) 1142 ->getBufferStart() && 1143 "CHECK-SAME can't be the first check in a file"); 1144 1145 const char *FirstNewLine = nullptr; 1146 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1147 1148 if (NumNewLines != 0) { 1149 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1150 Prefix + 1151 "-SAME: is not on the same line as the previous match"); 1152 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1153 "'next' match was here"); 1154 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1155 "previous match ended here"); 1156 return true; 1157 } 1158 1159 return false; 1160 } 1161 1162 /// Verify there's no "not strings" in the given buffer. 1163 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 1164 const std::vector<const Pattern *> &NotStrings, 1165 StringMap<StringRef> &VariableTable) const { 1166 for (const Pattern *Pat : NotStrings) { 1167 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 1168 1169 size_t MatchLen = 0; 1170 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1171 1172 if (Pos == StringRef::npos) 1173 continue; 1174 1175 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos), 1176 SourceMgr::DK_Error, Prefix + "-NOT: string occurred!"); 1177 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note, 1178 Prefix + "-NOT: pattern specified here"); 1179 return true; 1180 } 1181 1182 return false; 1183 } 1184 1185 /// Match "dag strings" and their mixed "not strings". 1186 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 1187 std::vector<const Pattern *> &NotStrings, 1188 StringMap<StringRef> &VariableTable) const { 1189 if (DagNotStrings.empty()) 1190 return 0; 1191 1192 size_t LastPos = 0; 1193 size_t StartPos = LastPos; 1194 1195 for (const Pattern &Pat : DagNotStrings) { 1196 assert((Pat.getCheckTy() == Check::CheckDAG || 1197 Pat.getCheckTy() == Check::CheckNot) && 1198 "Invalid CHECK-DAG or CHECK-NOT!"); 1199 1200 if (Pat.getCheckTy() == Check::CheckNot) { 1201 NotStrings.push_back(&Pat); 1202 continue; 1203 } 1204 1205 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 1206 1207 size_t MatchLen = 0, MatchPos; 1208 1209 // CHECK-DAG always matches from the start. 1210 StringRef MatchBuffer = Buffer.substr(StartPos); 1211 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1212 // With a group of CHECK-DAGs, a single mismatching means the match on 1213 // that group of CHECK-DAGs fails immediately. 1214 if (MatchPos == StringRef::npos) { 1215 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable); 1216 return StringRef::npos; 1217 } 1218 // Re-calc it as the offset relative to the start of the original string. 1219 MatchPos += StartPos; 1220 1221 if (!NotStrings.empty()) { 1222 if (MatchPos < LastPos) { 1223 // Reordered? 1224 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos), 1225 SourceMgr::DK_Error, 1226 Prefix + "-DAG: found a match of CHECK-DAG" 1227 " reordering across a CHECK-NOT"); 1228 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos), 1229 SourceMgr::DK_Note, 1230 Prefix + "-DAG: the farthest match of CHECK-DAG" 1231 " is found here"); 1232 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note, 1233 Prefix + "-NOT: the crossed pattern specified" 1234 " here"); 1235 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note, 1236 Prefix + "-DAG: the reordered pattern specified" 1237 " here"); 1238 return StringRef::npos; 1239 } 1240 // All subsequent CHECK-DAGs should be matched from the farthest 1241 // position of all precedent CHECK-DAGs (including this one.) 1242 StartPos = LastPos; 1243 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to 1244 // CHECK-DAG, verify that there's no 'not' strings occurred in that 1245 // region. 1246 StringRef SkippedRegion = Buffer.slice(LastPos, MatchPos); 1247 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1248 return StringRef::npos; 1249 // Clear "not strings". 1250 NotStrings.clear(); 1251 } 1252 1253 // Update the last position with CHECK-DAG matches. 1254 LastPos = std::max(MatchPos + MatchLen, LastPos); 1255 } 1256 1257 return LastPos; 1258 } 1259 1260 // A check prefix must contain only alphanumeric, hyphens and underscores. 1261 static bool ValidateCheckPrefix(StringRef CheckPrefix) { 1262 Regex Validator("^[a-zA-Z0-9_-]*$"); 1263 return Validator.match(CheckPrefix); 1264 } 1265 1266 static bool ValidateCheckPrefixes() { 1267 StringSet<> PrefixSet; 1268 1269 for (StringRef Prefix : CheckPrefixes) { 1270 // Reject empty prefixes. 1271 if (Prefix == "") 1272 return false; 1273 1274 if (!PrefixSet.insert(Prefix).second) 1275 return false; 1276 1277 if (!ValidateCheckPrefix(Prefix)) 1278 return false; 1279 } 1280 1281 return true; 1282 } 1283 1284 // Combines the check prefixes into a single regex so that we can efficiently 1285 // scan for any of the set. 1286 // 1287 // The semantics are that the longest-match wins which matches our regex 1288 // library. 1289 static Regex buildCheckPrefixRegex() { 1290 // I don't think there's a way to specify an initial value for cl::list, 1291 // so if nothing was specified, add the default 1292 if (CheckPrefixes.empty()) 1293 CheckPrefixes.push_back("CHECK"); 1294 1295 // We already validated the contents of CheckPrefixes so just concatenate 1296 // them as alternatives. 1297 SmallString<32> PrefixRegexStr; 1298 for (StringRef Prefix : CheckPrefixes) { 1299 if (Prefix != CheckPrefixes.front()) 1300 PrefixRegexStr.push_back('|'); 1301 1302 PrefixRegexStr.append(Prefix); 1303 } 1304 1305 return Regex(PrefixRegexStr); 1306 } 1307 1308 static void DumpCommandLine(int argc, char **argv) { 1309 errs() << "FileCheck command line: "; 1310 for (int I = 0; I < argc; I++) 1311 errs() << " " << argv[I]; 1312 errs() << "\n"; 1313 } 1314 1315 // Remove local variables from \p VariableTable. Global variables 1316 // (start with '$') are preserved. 1317 static void ClearLocalVars(StringMap<StringRef> &VariableTable) { 1318 SmallVector<StringRef, 16> LocalVars; 1319 for (const auto &Var : VariableTable) 1320 if (Var.first()[0] != '$') 1321 LocalVars.push_back(Var.first()); 1322 1323 for (const auto &Var : LocalVars) 1324 VariableTable.erase(Var); 1325 } 1326 1327 /// Check the input to FileCheck provided in the \p Buffer against the \p 1328 /// CheckStrings read from the check file. 1329 /// 1330 /// Returns false if the input fails to satisfy the checks. 1331 bool CheckInput(SourceMgr &SM, StringRef Buffer, 1332 ArrayRef<CheckString> CheckStrings) { 1333 bool ChecksFailed = false; 1334 1335 /// VariableTable - This holds all the current filecheck variables. 1336 StringMap<StringRef> VariableTable; 1337 1338 for (const auto& Def : GlobalDefines) 1339 VariableTable.insert(StringRef(Def).split('=')); 1340 1341 unsigned i = 0, j = 0, e = CheckStrings.size(); 1342 while (true) { 1343 StringRef CheckRegion; 1344 if (j == e) { 1345 CheckRegion = Buffer; 1346 } else { 1347 const CheckString &CheckLabelStr = CheckStrings[j]; 1348 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 1349 ++j; 1350 continue; 1351 } 1352 1353 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 1354 size_t MatchLabelLen = 0; 1355 size_t MatchLabelPos = 1356 CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable); 1357 if (MatchLabelPos == StringRef::npos) 1358 // Immediately bail of CHECK-LABEL fails, nothing else we can do. 1359 return false; 1360 1361 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 1362 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 1363 ++j; 1364 } 1365 1366 if (EnableVarScope) 1367 ClearLocalVars(VariableTable); 1368 1369 for (; i != j; ++i) { 1370 const CheckString &CheckStr = CheckStrings[i]; 1371 1372 // Check each string within the scanned region, including a second check 1373 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 1374 size_t MatchLen = 0; 1375 size_t MatchPos = 1376 CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable); 1377 1378 if (MatchPos == StringRef::npos) { 1379 ChecksFailed = true; 1380 i = j; 1381 break; 1382 } 1383 1384 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 1385 } 1386 1387 if (j == e) 1388 break; 1389 } 1390 1391 // Success if no checks failed. 1392 return !ChecksFailed; 1393 } 1394 1395 int main(int argc, char **argv) { 1396 InitLLVM X(argc, argv); 1397 cl::ParseCommandLineOptions(argc, argv); 1398 1399 if (!ValidateCheckPrefixes()) { 1400 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 1401 "start with a letter and contain only alphanumeric characters, " 1402 "hyphens and underscores\n"; 1403 return 2; 1404 } 1405 1406 Regex PrefixRE = buildCheckPrefixRegex(); 1407 std::string REError; 1408 if (!PrefixRE.isValid(REError)) { 1409 errs() << "Unable to combine check-prefix strings into a prefix regular " 1410 "expression! This is likely a bug in FileCheck's verification of " 1411 "the check-prefix strings. Regular expression parsing failed " 1412 "with the following error: " 1413 << REError << "\n"; 1414 return 2; 1415 } 1416 1417 SourceMgr SM; 1418 1419 // Read the expected strings from the check file. 1420 ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr = 1421 MemoryBuffer::getFileOrSTDIN(CheckFilename); 1422 if (std::error_code EC = CheckFileOrErr.getError()) { 1423 errs() << "Could not open check file '" << CheckFilename 1424 << "': " << EC.message() << '\n'; 1425 return 2; 1426 } 1427 MemoryBuffer &CheckFile = *CheckFileOrErr.get(); 1428 1429 SmallString<4096> CheckFileBuffer; 1430 StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer); 1431 1432 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1433 CheckFileText, CheckFile.getBufferIdentifier()), 1434 SMLoc()); 1435 1436 std::vector<CheckString> CheckStrings; 1437 if (ReadCheckFile(SM, CheckFileText, PrefixRE, CheckStrings)) 1438 return 2; 1439 1440 // Open the file to check and add it to SourceMgr. 1441 ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr = 1442 MemoryBuffer::getFileOrSTDIN(InputFilename); 1443 if (std::error_code EC = InputFileOrErr.getError()) { 1444 errs() << "Could not open input file '" << InputFilename 1445 << "': " << EC.message() << '\n'; 1446 return 2; 1447 } 1448 MemoryBuffer &InputFile = *InputFileOrErr.get(); 1449 1450 if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) { 1451 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 1452 DumpCommandLine(argc, argv); 1453 return 2; 1454 } 1455 1456 SmallString<4096> InputFileBuffer; 1457 StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer); 1458 1459 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1460 InputFileText, InputFile.getBufferIdentifier()), 1461 SMLoc()); 1462 1463 return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1; 1464 } 1465