1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // FileCheck does a line-by line check of a file that validates whether it 11 // contains the expected content. This is useful for regression tests etc. 12 // 13 // This program exits with an error status of 2 on error, exit status of 0 if 14 // the file matched the expected contents, and exit status of 1 if it did not 15 // contain the expected contents. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringExtras.h" 21 #include "llvm/ADT/StringMap.h" 22 #include "llvm/ADT/StringSet.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/PrettyStackTrace.h" 26 #include "llvm/Support/Regex.h" 27 #include "llvm/Support/Signals.h" 28 #include "llvm/Support/SourceMgr.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <algorithm> 31 #include <cctype> 32 #include <map> 33 #include <string> 34 #include <system_error> 35 #include <vector> 36 using namespace llvm; 37 38 static cl::opt<std::string> 39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 40 41 static cl::opt<std::string> 42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 43 cl::init("-"), cl::value_desc("filename")); 44 45 static cl::list<std::string> CheckPrefixes( 46 "check-prefix", 47 cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 48 static cl::alias CheckPrefixesAlias( 49 "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated, 50 cl::NotHidden, 51 cl::desc( 52 "Alias for -check-prefix permitting multiple comma separated values")); 53 54 static cl::opt<bool> NoCanonicalizeWhiteSpace( 55 "strict-whitespace", 56 cl::desc("Do not treat all horizontal whitespace as equivalent")); 57 58 static cl::list<std::string> ImplicitCheckNot( 59 "implicit-check-not", 60 cl::desc("Add an implicit negative check with this pattern to every\n" 61 "positive check. This can be used to ensure that no instances of\n" 62 "this pattern occur which are not matched by a positive pattern"), 63 cl::value_desc("pattern")); 64 65 static cl::opt<bool> AllowEmptyInput( 66 "allow-empty", cl::init(false), 67 cl::desc("Allow the input file to be empty. This is useful when making\n" 68 "checks that some error message does not occur, for example.")); 69 70 static cl::opt<bool> MatchFullLines( 71 "match-full-lines", cl::init(false), 72 cl::desc("Require all positive matches to cover an entire input line.\n" 73 "Allows leading and trailing whitespace if --strict-whitespace\n" 74 "is not also passed.")); 75 76 typedef cl::list<std::string>::const_iterator prefix_iterator; 77 78 //===----------------------------------------------------------------------===// 79 // Pattern Handling Code. 80 //===----------------------------------------------------------------------===// 81 82 namespace Check { 83 enum CheckType { 84 CheckNone = 0, 85 CheckPlain, 86 CheckNext, 87 CheckSame, 88 CheckNot, 89 CheckDAG, 90 CheckLabel, 91 92 /// Indicates the pattern only matches the end of file. This is used for 93 /// trailing CHECK-NOTs. 94 CheckEOF, 95 96 /// Marks when parsing found a -NOT check combined with another CHECK suffix. 97 CheckBadNot 98 }; 99 } 100 101 class Pattern { 102 SMLoc PatternLoc; 103 104 /// A fixed string to match as the pattern or empty if this pattern requires 105 /// a regex match. 106 StringRef FixedStr; 107 108 /// A regex string to match as the pattern or empty if this pattern requires 109 /// a fixed string to match. 110 std::string RegExStr; 111 112 /// Entries in this vector map to uses of a variable in the pattern, e.g. 113 /// "foo[[bar]]baz". In this case, the RegExStr will contain "foobaz" and 114 /// we'll get an entry in this vector that tells us to insert the value of 115 /// bar at offset 3. 116 std::vector<std::pair<StringRef, unsigned>> VariableUses; 117 118 /// Maps definitions of variables to their parenthesized capture numbers. 119 /// 120 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 121 /// 1. 122 std::map<StringRef, unsigned> VariableDefs; 123 124 Check::CheckType CheckTy; 125 126 /// Contains the number of line this pattern is in. 127 unsigned LineNumber; 128 129 public: 130 explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {} 131 132 /// Returns the location in source code. 133 SMLoc getLoc() const { return PatternLoc; } 134 135 bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, 136 unsigned LineNumber); 137 size_t Match(StringRef Buffer, size_t &MatchLen, 138 StringMap<StringRef> &VariableTable) const; 139 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 140 const StringMap<StringRef> &VariableTable) const; 141 142 bool hasVariable() const { 143 return !(VariableUses.empty() && VariableDefs.empty()); 144 } 145 146 Check::CheckType getCheckTy() const { return CheckTy; } 147 148 private: 149 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 150 void AddBackrefToRegEx(unsigned BackrefNum); 151 unsigned 152 ComputeMatchDistance(StringRef Buffer, 153 const StringMap<StringRef> &VariableTable) const; 154 bool EvaluateExpression(StringRef Expr, std::string &Value) const; 155 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 156 }; 157 158 /// Parses the given string into the Pattern. 159 /// 160 /// \p Prefix provides which prefix is being matched, \p SM provides the 161 /// SourceMgr used for error reports, and \p LineNumber is the line number in 162 /// the input file from which the pattern string was read. Returns true in 163 /// case of an error, false otherwise. 164 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix, 165 SourceMgr &SM, unsigned LineNumber) { 166 bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot; 167 168 this->LineNumber = LineNumber; 169 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 170 171 if (!(NoCanonicalizeWhiteSpace && MatchFullLines)) 172 // Ignore trailing whitespace. 173 while (!PatternStr.empty() && 174 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 175 PatternStr = PatternStr.substr(0, PatternStr.size() - 1); 176 177 // Check that there is something on the line. 178 if (PatternStr.empty()) { 179 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 180 "found empty check string with prefix '" + Prefix + ":'"); 181 return true; 182 } 183 184 // Check to see if this is a fixed string, or if it has regex pieces. 185 if (!MatchFullLinesHere && 186 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && 187 PatternStr.find("[[") == StringRef::npos))) { 188 FixedStr = PatternStr; 189 return false; 190 } 191 192 if (MatchFullLinesHere) { 193 RegExStr += '^'; 194 if (!NoCanonicalizeWhiteSpace) 195 RegExStr += " *"; 196 } 197 198 // Paren value #0 is for the fully matched string. Any new parenthesized 199 // values add from there. 200 unsigned CurParen = 1; 201 202 // Otherwise, there is at least one regex piece. Build up the regex pattern 203 // by escaping scary characters in fixed strings, building up one big regex. 204 while (!PatternStr.empty()) { 205 // RegEx matches. 206 if (PatternStr.startswith("{{")) { 207 // This is the start of a regex match. Scan for the }}. 208 size_t End = PatternStr.find("}}"); 209 if (End == StringRef::npos) { 210 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 211 SourceMgr::DK_Error, 212 "found start of regex string with no end '}}'"); 213 return true; 214 } 215 216 // Enclose {{}} patterns in parens just like [[]] even though we're not 217 // capturing the result for any purpose. This is required in case the 218 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 219 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 220 RegExStr += '('; 221 ++CurParen; 222 223 if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM)) 224 return true; 225 RegExStr += ')'; 226 227 PatternStr = PatternStr.substr(End + 2); 228 continue; 229 } 230 231 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 232 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 233 // second form is [[foo]] which is a reference to foo. The variable name 234 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 235 // it. This is to catch some common errors. 236 if (PatternStr.startswith("[[")) { 237 // Find the closing bracket pair ending the match. End is going to be an 238 // offset relative to the beginning of the match string. 239 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 240 241 if (End == StringRef::npos) { 242 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 243 SourceMgr::DK_Error, 244 "invalid named regex reference, no ]] found"); 245 return true; 246 } 247 248 StringRef MatchStr = PatternStr.substr(2, End); 249 PatternStr = PatternStr.substr(End + 4); 250 251 // Get the regex name (e.g. "foo"). 252 size_t NameEnd = MatchStr.find(':'); 253 StringRef Name = MatchStr.substr(0, NameEnd); 254 255 if (Name.empty()) { 256 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 257 "invalid name in named regex: empty name"); 258 return true; 259 } 260 261 // Verify that the name/expression is well formed. FileCheck currently 262 // supports @LINE, @LINE+number, @LINE-number expressions. The check here 263 // is relaxed, more strict check is performed in \c EvaluateExpression. 264 bool IsExpression = false; 265 for (unsigned i = 0, e = Name.size(); i != e; ++i) { 266 if (i == 0 && Name[i] == '@') { 267 if (NameEnd != StringRef::npos) { 268 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 269 SourceMgr::DK_Error, 270 "invalid name in named regex definition"); 271 return true; 272 } 273 IsExpression = true; 274 continue; 275 } 276 if (Name[i] != '_' && !isalnum(Name[i]) && 277 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 278 SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i), 279 SourceMgr::DK_Error, "invalid name in named regex"); 280 return true; 281 } 282 } 283 284 // Name can't start with a digit. 285 if (isdigit(static_cast<unsigned char>(Name[0]))) { 286 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 287 "invalid name in named regex"); 288 return true; 289 } 290 291 // Handle [[foo]]. 292 if (NameEnd == StringRef::npos) { 293 // Handle variables that were defined earlier on the same line by 294 // emitting a backreference. 295 if (VariableDefs.find(Name) != VariableDefs.end()) { 296 unsigned VarParenNum = VariableDefs[Name]; 297 if (VarParenNum < 1 || VarParenNum > 9) { 298 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 299 SourceMgr::DK_Error, 300 "Can't back-reference more than 9 variables"); 301 return true; 302 } 303 AddBackrefToRegEx(VarParenNum); 304 } else { 305 VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 306 } 307 continue; 308 } 309 310 // Handle [[foo:.*]]. 311 VariableDefs[Name] = CurParen; 312 RegExStr += '('; 313 ++CurParen; 314 315 if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM)) 316 return true; 317 318 RegExStr += ')'; 319 } 320 321 // Handle fixed string matches. 322 // Find the end, which is the start of the next regex. 323 size_t FixedMatchEnd = PatternStr.find("{{"); 324 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 325 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 326 PatternStr = PatternStr.substr(FixedMatchEnd); 327 } 328 329 if (MatchFullLinesHere) { 330 if (!NoCanonicalizeWhiteSpace) 331 RegExStr += " *"; 332 RegExStr += '$'; 333 } 334 335 return false; 336 } 337 338 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { 339 Regex R(RS); 340 std::string Error; 341 if (!R.isValid(Error)) { 342 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 343 "invalid regex: " + Error); 344 return true; 345 } 346 347 RegExStr += RS.str(); 348 CurParen += R.getNumMatches(); 349 return false; 350 } 351 352 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 353 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 354 std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum); 355 RegExStr += Backref; 356 } 357 358 /// Evaluates expression and stores the result to \p Value. 359 /// 360 /// Returns true on success and false when the expression has invalid syntax. 361 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 362 // The only supported expression is @LINE([\+-]\d+)? 363 if (!Expr.startswith("@LINE")) 364 return false; 365 Expr = Expr.substr(StringRef("@LINE").size()); 366 int Offset = 0; 367 if (!Expr.empty()) { 368 if (Expr[0] == '+') 369 Expr = Expr.substr(1); 370 else if (Expr[0] != '-') 371 return false; 372 if (Expr.getAsInteger(10, Offset)) 373 return false; 374 } 375 Value = llvm::itostr(LineNumber + Offset); 376 return true; 377 } 378 379 /// Matches the pattern string against the input buffer \p Buffer 380 /// 381 /// This returns the position that is matched or npos if there is no match. If 382 /// there is a match, the size of the matched string is returned in \p 383 /// MatchLen. 384 /// 385 /// The \p VariableTable StringMap provides the current values of filecheck 386 /// variables and is updated if this match defines new values. 387 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 388 StringMap<StringRef> &VariableTable) const { 389 // If this is the EOF pattern, match it immediately. 390 if (CheckTy == Check::CheckEOF) { 391 MatchLen = 0; 392 return Buffer.size(); 393 } 394 395 // If this is a fixed string pattern, just match it now. 396 if (!FixedStr.empty()) { 397 MatchLen = FixedStr.size(); 398 return Buffer.find(FixedStr); 399 } 400 401 // Regex match. 402 403 // If there are variable uses, we need to create a temporary string with the 404 // actual value. 405 StringRef RegExToMatch = RegExStr; 406 std::string TmpStr; 407 if (!VariableUses.empty()) { 408 TmpStr = RegExStr; 409 410 unsigned InsertOffset = 0; 411 for (const auto &VariableUse : VariableUses) { 412 std::string Value; 413 414 if (VariableUse.first[0] == '@') { 415 if (!EvaluateExpression(VariableUse.first, Value)) 416 return StringRef::npos; 417 } else { 418 StringMap<StringRef>::iterator it = 419 VariableTable.find(VariableUse.first); 420 // If the variable is undefined, return an error. 421 if (it == VariableTable.end()) 422 return StringRef::npos; 423 424 // Look up the value and escape it so that we can put it into the regex. 425 Value += Regex::escape(it->second); 426 } 427 428 // Plop it into the regex at the adjusted offset. 429 TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset, 430 Value.begin(), Value.end()); 431 InsertOffset += Value.size(); 432 } 433 434 // Match the newly constructed regex. 435 RegExToMatch = TmpStr; 436 } 437 438 SmallVector<StringRef, 4> MatchInfo; 439 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 440 return StringRef::npos; 441 442 // Successful regex match. 443 assert(!MatchInfo.empty() && "Didn't get any match"); 444 StringRef FullMatch = MatchInfo[0]; 445 446 // If this defines any variables, remember their values. 447 for (const auto &VariableDef : VariableDefs) { 448 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 449 VariableTable[VariableDef.first] = MatchInfo[VariableDef.second]; 450 } 451 452 MatchLen = FullMatch.size(); 453 return FullMatch.data() - Buffer.data(); 454 } 455 456 457 /// Computes an arbitrary estimate for the quality of matching this pattern at 458 /// the start of \p Buffer; a distance of zero should correspond to a perfect 459 /// match. 460 unsigned 461 Pattern::ComputeMatchDistance(StringRef Buffer, 462 const StringMap<StringRef> &VariableTable) const { 463 // Just compute the number of matching characters. For regular expressions, we 464 // just compare against the regex itself and hope for the best. 465 // 466 // FIXME: One easy improvement here is have the regex lib generate a single 467 // example regular expression which matches, and use that as the example 468 // string. 469 StringRef ExampleString(FixedStr); 470 if (ExampleString.empty()) 471 ExampleString = RegExStr; 472 473 // Only compare up to the first line in the buffer, or the string size. 474 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 475 BufferPrefix = BufferPrefix.split('\n').first; 476 return BufferPrefix.edit_distance(ExampleString); 477 } 478 479 /// Prints additional information about a failure to match involving this 480 /// pattern. 481 void Pattern::PrintFailureInfo( 482 const SourceMgr &SM, StringRef Buffer, 483 const StringMap<StringRef> &VariableTable) const { 484 // If this was a regular expression using variables, print the current 485 // variable values. 486 if (!VariableUses.empty()) { 487 for (const auto &VariableUse : VariableUses) { 488 SmallString<256> Msg; 489 raw_svector_ostream OS(Msg); 490 StringRef Var = VariableUse.first; 491 if (Var[0] == '@') { 492 std::string Value; 493 if (EvaluateExpression(Var, Value)) { 494 OS << "with expression \""; 495 OS.write_escaped(Var) << "\" equal to \""; 496 OS.write_escaped(Value) << "\""; 497 } else { 498 OS << "uses incorrect expression \""; 499 OS.write_escaped(Var) << "\""; 500 } 501 } else { 502 StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 503 504 // Check for undefined variable references. 505 if (it == VariableTable.end()) { 506 OS << "uses undefined variable \""; 507 OS.write_escaped(Var) << "\""; 508 } else { 509 OS << "with variable \""; 510 OS.write_escaped(Var) << "\" equal to \""; 511 OS.write_escaped(it->second) << "\""; 512 } 513 } 514 515 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 516 OS.str()); 517 } 518 } 519 520 // Attempt to find the closest/best fuzzy match. Usually an error happens 521 // because some string in the output didn't exactly match. In these cases, we 522 // would like to show the user a best guess at what "should have" matched, to 523 // save them having to actually check the input manually. 524 size_t NumLinesForward = 0; 525 size_t Best = StringRef::npos; 526 double BestQuality = 0; 527 528 // Use an arbitrary 4k limit on how far we will search. 529 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 530 if (Buffer[i] == '\n') 531 ++NumLinesForward; 532 533 // Patterns have leading whitespace stripped, so skip whitespace when 534 // looking for something which looks like a pattern. 535 if (Buffer[i] == ' ' || Buffer[i] == '\t') 536 continue; 537 538 // Compute the "quality" of this match as an arbitrary combination of the 539 // match distance and the number of lines skipped to get to this match. 540 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 541 double Quality = Distance + (NumLinesForward / 100.); 542 543 if (Quality < BestQuality || Best == StringRef::npos) { 544 Best = i; 545 BestQuality = Quality; 546 } 547 } 548 549 // Print the "possible intended match here" line if we found something 550 // reasonable and not equal to what we showed in the "scanning from here" 551 // line. 552 if (Best && Best != StringRef::npos && BestQuality < 50) { 553 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 554 SourceMgr::DK_Note, "possible intended match here"); 555 556 // FIXME: If we wanted to be really friendly we would show why the match 557 // failed, as it can be hard to spot simple one character differences. 558 } 559 } 560 561 /// Finds the closing sequence of a regex variable usage or definition. 562 /// 563 /// \p Str has to point in the beginning of the definition (right after the 564 /// opening sequence). Returns the offset of the closing sequence within Str, 565 /// or npos if it was not found. 566 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 567 // Offset keeps track of the current offset within the input Str 568 size_t Offset = 0; 569 // [...] Nesting depth 570 size_t BracketDepth = 0; 571 572 while (!Str.empty()) { 573 if (Str.startswith("]]") && BracketDepth == 0) 574 return Offset; 575 if (Str[0] == '\\') { 576 // Backslash escapes the next char within regexes, so skip them both. 577 Str = Str.substr(2); 578 Offset += 2; 579 } else { 580 switch (Str[0]) { 581 default: 582 break; 583 case '[': 584 BracketDepth++; 585 break; 586 case ']': 587 if (BracketDepth == 0) { 588 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 589 SourceMgr::DK_Error, 590 "missing closing \"]\" for regex variable"); 591 exit(1); 592 } 593 BracketDepth--; 594 break; 595 } 596 Str = Str.substr(1); 597 Offset++; 598 } 599 } 600 601 return StringRef::npos; 602 } 603 604 //===----------------------------------------------------------------------===// 605 // Check Strings. 606 //===----------------------------------------------------------------------===// 607 608 /// A check that we found in the input file. 609 struct CheckString { 610 /// The pattern to match. 611 Pattern Pat; 612 613 /// Which prefix name this check matched. 614 StringRef Prefix; 615 616 /// The location in the match file that the check string was specified. 617 SMLoc Loc; 618 619 /// All of the strings that are disallowed from occurring between this match 620 /// string and the previous one (or start of file). 621 std::vector<Pattern> DagNotStrings; 622 623 CheckString(const Pattern &P, StringRef S, SMLoc L) 624 : Pat(P), Prefix(S), Loc(L) {} 625 626 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 627 size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 628 629 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 630 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; 631 bool CheckNot(const SourceMgr &SM, StringRef Buffer, 632 const std::vector<const Pattern *> &NotStrings, 633 StringMap<StringRef> &VariableTable) const; 634 size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 635 std::vector<const Pattern *> &NotStrings, 636 StringMap<StringRef> &VariableTable) const; 637 }; 638 639 /// Canonicalize whitespaces in the file. Line endings are replaced with 640 /// UNIX-style '\n'. 641 static StringRef CanonicalizeFile(MemoryBuffer &MB, 642 SmallVectorImpl<char> &OutputBuffer) { 643 OutputBuffer.reserve(MB.getBufferSize()); 644 645 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); 646 Ptr != End; ++Ptr) { 647 // Eliminate trailing dosish \r. 648 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 649 continue; 650 } 651 652 // If current char is not a horizontal whitespace or if horizontal 653 // whitespace canonicalization is disabled, dump it to output as is. 654 if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) { 655 OutputBuffer.push_back(*Ptr); 656 continue; 657 } 658 659 // Otherwise, add one space and advance over neighboring space. 660 OutputBuffer.push_back(' '); 661 while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) 662 ++Ptr; 663 } 664 665 // Add a null byte and then return all but that byte. 666 OutputBuffer.push_back('\0'); 667 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); 668 } 669 670 static bool IsPartOfWord(char c) { 671 return (isalnum(c) || c == '-' || c == '_'); 672 } 673 674 // Get the size of the prefix extension. 675 static size_t CheckTypeSize(Check::CheckType Ty) { 676 switch (Ty) { 677 case Check::CheckNone: 678 case Check::CheckBadNot: 679 return 0; 680 681 case Check::CheckPlain: 682 return sizeof(":") - 1; 683 684 case Check::CheckNext: 685 return sizeof("-NEXT:") - 1; 686 687 case Check::CheckSame: 688 return sizeof("-SAME:") - 1; 689 690 case Check::CheckNot: 691 return sizeof("-NOT:") - 1; 692 693 case Check::CheckDAG: 694 return sizeof("-DAG:") - 1; 695 696 case Check::CheckLabel: 697 return sizeof("-LABEL:") - 1; 698 699 case Check::CheckEOF: 700 llvm_unreachable("Should not be using EOF size"); 701 } 702 703 llvm_unreachable("Bad check type"); 704 } 705 706 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 707 char NextChar = Buffer[Prefix.size()]; 708 709 // Verify that the : is present after the prefix. 710 if (NextChar == ':') 711 return Check::CheckPlain; 712 713 if (NextChar != '-') 714 return Check::CheckNone; 715 716 StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 717 if (Rest.startswith("NEXT:")) 718 return Check::CheckNext; 719 720 if (Rest.startswith("SAME:")) 721 return Check::CheckSame; 722 723 if (Rest.startswith("NOT:")) 724 return Check::CheckNot; 725 726 if (Rest.startswith("DAG:")) 727 return Check::CheckDAG; 728 729 if (Rest.startswith("LABEL:")) 730 return Check::CheckLabel; 731 732 // You can't combine -NOT with another suffix. 733 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || 734 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || 735 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:")) 736 return Check::CheckBadNot; 737 738 return Check::CheckNone; 739 } 740 741 // From the given position, find the next character after the word. 742 static size_t SkipWord(StringRef Str, size_t Loc) { 743 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 744 ++Loc; 745 return Loc; 746 } 747 748 /// Search the buffer for the first prefix in the prefix regular expression. 749 /// 750 /// This searches the buffer using the provided regular expression, however it 751 /// enforces constraints beyond that: 752 /// 1) The found prefix must not be a suffix of something that looks like 753 /// a valid prefix. 754 /// 2) The found prefix must be followed by a valid check type suffix using \c 755 /// FindCheckType above. 756 /// 757 /// The first match of the regular expression to satisfy these two is returned, 758 /// otherwise an empty StringRef is returned to indicate failure. 759 /// 760 /// If this routine returns a valid prefix, it will also shrink \p Buffer to 761 /// start at the beginning of the returned prefix, increment \p LineNumber for 762 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of 763 /// check found by examining the suffix. 764 /// 765 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy 766 /// is unspecified. 767 static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer, 768 unsigned &LineNumber, 769 Check::CheckType &CheckTy) { 770 SmallVector<StringRef, 2> Matches; 771 772 while (!Buffer.empty()) { 773 // Find the first (longest) match using the RE. 774 if (!PrefixRE.match(Buffer, &Matches)) 775 // No match at all, bail. 776 return StringRef(); 777 778 StringRef Prefix = Matches[0]; 779 Matches.clear(); 780 781 assert(Prefix.data() >= Buffer.data() && 782 Prefix.data() < Buffer.data() + Buffer.size() && 783 "Prefix doesn't start inside of buffer!"); 784 size_t Loc = Prefix.data() - Buffer.data(); 785 StringRef Skipped = Buffer.substr(0, Loc); 786 Buffer = Buffer.drop_front(Loc); 787 LineNumber += Skipped.count('\n'); 788 789 // Check that the matched prefix isn't a suffix of some other check-like 790 // word. 791 // FIXME: This is a very ad-hoc check. it would be better handled in some 792 // other way. Among other things it seems hard to distinguish between 793 // intentional and unintentional uses of this feature. 794 if (Skipped.empty() || !IsPartOfWord(Skipped.back())) { 795 // Now extract the type. 796 CheckTy = FindCheckType(Buffer, Prefix); 797 798 // If we've found a valid check type for this prefix, we're done. 799 if (CheckTy != Check::CheckNone) 800 return Prefix; 801 } 802 803 // If we didn't successfully find a prefix, we need to skip this invalid 804 // prefix and continue scanning. We directly skip the prefix that was 805 // matched and any additional parts of that check-like word. 806 Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size())); 807 } 808 809 // We ran out of buffer while skipping partial matches so give up. 810 return StringRef(); 811 } 812 813 /// Read the check file, which specifies the sequence of expected strings. 814 /// 815 /// The strings are added to the CheckStrings vector. Returns true in case of 816 /// an error, false otherwise. 817 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, 818 std::vector<CheckString> &CheckStrings) { 819 std::vector<Pattern> ImplicitNegativeChecks; 820 for (const auto &PatternString : ImplicitCheckNot) { 821 // Create a buffer with fake command line content in order to display the 822 // command line option responsible for the specific implicit CHECK-NOT. 823 std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str(); 824 std::string Suffix = "'"; 825 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 826 Prefix + PatternString + Suffix, "command line"); 827 828 StringRef PatternInBuffer = 829 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 830 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 831 832 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); 833 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, 834 "IMPLICIT-CHECK", SM, 0); 835 } 836 837 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 838 839 // LineNumber keeps track of the line on which CheckPrefix instances are 840 // found. 841 unsigned LineNumber = 1; 842 843 while (1) { 844 Check::CheckType CheckTy; 845 846 // See if a prefix occurs in the memory buffer. 847 StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber, 848 CheckTy); 849 if (UsedPrefix.empty()) 850 break; 851 assert(UsedPrefix.data() == Buffer.data() && 852 "Failed to move Buffer's start forward, or pointed prefix outside " 853 "of the buffer!"); 854 855 // Location to use for error messages. 856 const char *UsedPrefixStart = UsedPrefix.data(); 857 858 // Skip the buffer to the end. 859 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 860 861 // Complain about useful-looking but unsupported suffixes. 862 if (CheckTy == Check::CheckBadNot) { 863 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 864 "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 865 return true; 866 } 867 868 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 869 // leading whitespace. 870 if (!(NoCanonicalizeWhiteSpace && MatchFullLines)) 871 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 872 873 // Scan ahead to the end of line. 874 size_t EOL = Buffer.find_first_of("\n\r"); 875 876 // Remember the location of the start of the pattern, for diagnostics. 877 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 878 879 // Parse the pattern. 880 Pattern P(CheckTy); 881 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 882 return true; 883 884 // Verify that CHECK-LABEL lines do not define or use variables 885 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 886 SM.PrintMessage( 887 SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error, 888 "found '" + UsedPrefix + "-LABEL:'" 889 " with variable definition or use"); 890 return true; 891 } 892 893 Buffer = Buffer.substr(EOL); 894 895 // Verify that CHECK-NEXT lines have at least one CHECK line before them. 896 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) && 897 CheckStrings.empty()) { 898 StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME"; 899 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 900 SourceMgr::DK_Error, 901 "found '" + UsedPrefix + "-" + Type + 902 "' without previous '" + UsedPrefix + ": line"); 903 return true; 904 } 905 906 // Handle CHECK-DAG/-NOT. 907 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 908 DagNotMatches.push_back(P); 909 continue; 910 } 911 912 // Okay, add the string we captured to the output vector and move on. 913 CheckStrings.emplace_back(P, UsedPrefix, PatternLoc); 914 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 915 DagNotMatches = ImplicitNegativeChecks; 916 } 917 918 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 919 // prefix as a filler for the error message. 920 if (!DagNotMatches.empty()) { 921 CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(), 922 SMLoc::getFromPointer(Buffer.data())); 923 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 924 } 925 926 if (CheckStrings.empty()) { 927 errs() << "error: no check strings found with prefix" 928 << (CheckPrefixes.size() > 1 ? "es " : " "); 929 prefix_iterator I = CheckPrefixes.begin(); 930 prefix_iterator E = CheckPrefixes.end(); 931 if (I != E) { 932 errs() << "\'" << *I << ":'"; 933 ++I; 934 } 935 for (; I != E; ++I) 936 errs() << ", \'" << *I << ":'"; 937 938 errs() << '\n'; 939 return true; 940 } 941 942 return false; 943 } 944 945 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat, 946 StringRef Buffer, 947 StringMap<StringRef> &VariableTable) { 948 // Otherwise, we have an error, emit an error message. 949 SM.PrintMessage(Loc, SourceMgr::DK_Error, 950 "expected string not found in input"); 951 952 // Print the "scanning from here" line. If the current position is at the 953 // end of a line, advance to the start of the next line. 954 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 955 956 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 957 "scanning from here"); 958 959 // Allow the pattern to print additional information if desired. 960 Pat.PrintFailureInfo(SM, Buffer, VariableTable); 961 } 962 963 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 964 StringRef Buffer, 965 StringMap<StringRef> &VariableTable) { 966 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable); 967 } 968 969 /// Count the number of newlines in the specified range. 970 static unsigned CountNumNewlinesBetween(StringRef Range, 971 const char *&FirstNewLine) { 972 unsigned NumNewLines = 0; 973 while (1) { 974 // Scan for newline. 975 Range = Range.substr(Range.find_first_of("\n\r")); 976 if (Range.empty()) 977 return NumNewLines; 978 979 ++NumNewLines; 980 981 // Handle \n\r and \r\n as a single newline. 982 if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && 983 (Range[0] != Range[1])) 984 Range = Range.substr(1); 985 Range = Range.substr(1); 986 987 if (NumNewLines == 1) 988 FirstNewLine = Range.begin(); 989 } 990 } 991 992 /// Match check string and its "not strings" and/or "dag strings". 993 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 994 bool IsLabelScanMode, size_t &MatchLen, 995 StringMap<StringRef> &VariableTable) const { 996 size_t LastPos = 0; 997 std::vector<const Pattern *> NotStrings; 998 999 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 1000 // bounds; we have not processed variable definitions within the bounded block 1001 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 1002 // over the block again (including the last CHECK-LABEL) in normal mode. 1003 if (!IsLabelScanMode) { 1004 // Match "dag strings" (with mixed "not strings" if any). 1005 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 1006 if (LastPos == StringRef::npos) 1007 return StringRef::npos; 1008 } 1009 1010 // Match itself from the last position after matching CHECK-DAG. 1011 StringRef MatchBuffer = Buffer.substr(LastPos); 1012 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1013 if (MatchPos == StringRef::npos) { 1014 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable); 1015 return StringRef::npos; 1016 } 1017 1018 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1019 // or CHECK-NOT 1020 if (!IsLabelScanMode) { 1021 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1022 1023 // If this check is a "CHECK-NEXT", verify that the previous match was on 1024 // the previous line (i.e. that there is one newline between them). 1025 if (CheckNext(SM, SkippedRegion)) 1026 return StringRef::npos; 1027 1028 // If this check is a "CHECK-SAME", verify that the previous match was on 1029 // the same line (i.e. that there is no newline between them). 1030 if (CheckSame(SM, SkippedRegion)) 1031 return StringRef::npos; 1032 1033 // If this match had "not strings", verify that they don't exist in the 1034 // skipped region. 1035 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1036 return StringRef::npos; 1037 } 1038 1039 return LastPos + MatchPos; 1040 } 1041 1042 /// Verify there is a single line in the given buffer. 1043 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 1044 if (Pat.getCheckTy() != Check::CheckNext) 1045 return false; 1046 1047 // Count the number of newlines between the previous match and this one. 1048 assert(Buffer.data() != 1049 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1050 SMLoc::getFromPointer(Buffer.data()))) 1051 ->getBufferStart() && 1052 "CHECK-NEXT can't be the first check in a file"); 1053 1054 const char *FirstNewLine = nullptr; 1055 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1056 1057 if (NumNewLines == 0) { 1058 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1059 Prefix + "-NEXT: is on the same line as previous match"); 1060 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1061 "'next' match was here"); 1062 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1063 "previous match ended here"); 1064 return true; 1065 } 1066 1067 if (NumNewLines != 1) { 1068 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1069 Prefix + 1070 "-NEXT: is not on the line after the previous match"); 1071 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1072 "'next' match was here"); 1073 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1074 "previous match ended here"); 1075 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1076 "non-matching line after previous match is here"); 1077 return true; 1078 } 1079 1080 return false; 1081 } 1082 1083 /// Verify there is no newline in the given buffer. 1084 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 1085 if (Pat.getCheckTy() != Check::CheckSame) 1086 return false; 1087 1088 // Count the number of newlines between the previous match and this one. 1089 assert(Buffer.data() != 1090 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1091 SMLoc::getFromPointer(Buffer.data()))) 1092 ->getBufferStart() && 1093 "CHECK-SAME can't be the first check in a file"); 1094 1095 const char *FirstNewLine = nullptr; 1096 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1097 1098 if (NumNewLines != 0) { 1099 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1100 Prefix + 1101 "-SAME: is not on the same line as the previous match"); 1102 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1103 "'next' match was here"); 1104 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1105 "previous match ended here"); 1106 return true; 1107 } 1108 1109 return false; 1110 } 1111 1112 /// Verify there's no "not strings" in the given buffer. 1113 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 1114 const std::vector<const Pattern *> &NotStrings, 1115 StringMap<StringRef> &VariableTable) const { 1116 for (const Pattern *Pat : NotStrings) { 1117 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 1118 1119 size_t MatchLen = 0; 1120 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1121 1122 if (Pos == StringRef::npos) 1123 continue; 1124 1125 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos), 1126 SourceMgr::DK_Error, Prefix + "-NOT: string occurred!"); 1127 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note, 1128 Prefix + "-NOT: pattern specified here"); 1129 return true; 1130 } 1131 1132 return false; 1133 } 1134 1135 /// Match "dag strings" and their mixed "not strings". 1136 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 1137 std::vector<const Pattern *> &NotStrings, 1138 StringMap<StringRef> &VariableTable) const { 1139 if (DagNotStrings.empty()) 1140 return 0; 1141 1142 size_t LastPos = 0; 1143 size_t StartPos = LastPos; 1144 1145 for (const Pattern &Pat : DagNotStrings) { 1146 assert((Pat.getCheckTy() == Check::CheckDAG || 1147 Pat.getCheckTy() == Check::CheckNot) && 1148 "Invalid CHECK-DAG or CHECK-NOT!"); 1149 1150 if (Pat.getCheckTy() == Check::CheckNot) { 1151 NotStrings.push_back(&Pat); 1152 continue; 1153 } 1154 1155 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 1156 1157 size_t MatchLen = 0, MatchPos; 1158 1159 // CHECK-DAG always matches from the start. 1160 StringRef MatchBuffer = Buffer.substr(StartPos); 1161 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1162 // With a group of CHECK-DAGs, a single mismatching means the match on 1163 // that group of CHECK-DAGs fails immediately. 1164 if (MatchPos == StringRef::npos) { 1165 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable); 1166 return StringRef::npos; 1167 } 1168 // Re-calc it as the offset relative to the start of the original string. 1169 MatchPos += StartPos; 1170 1171 if (!NotStrings.empty()) { 1172 if (MatchPos < LastPos) { 1173 // Reordered? 1174 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos), 1175 SourceMgr::DK_Error, 1176 Prefix + "-DAG: found a match of CHECK-DAG" 1177 " reordering across a CHECK-NOT"); 1178 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos), 1179 SourceMgr::DK_Note, 1180 Prefix + "-DAG: the farthest match of CHECK-DAG" 1181 " is found here"); 1182 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note, 1183 Prefix + "-NOT: the crossed pattern specified" 1184 " here"); 1185 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note, 1186 Prefix + "-DAG: the reordered pattern specified" 1187 " here"); 1188 return StringRef::npos; 1189 } 1190 // All subsequent CHECK-DAGs should be matched from the farthest 1191 // position of all precedent CHECK-DAGs (including this one.) 1192 StartPos = LastPos; 1193 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to 1194 // CHECK-DAG, verify that there's no 'not' strings occurred in that 1195 // region. 1196 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1197 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1198 return StringRef::npos; 1199 // Clear "not strings". 1200 NotStrings.clear(); 1201 } 1202 1203 // Update the last position with CHECK-DAG matches. 1204 LastPos = std::max(MatchPos + MatchLen, LastPos); 1205 } 1206 1207 return LastPos; 1208 } 1209 1210 // A check prefix must contain only alphanumeric, hyphens and underscores. 1211 static bool ValidateCheckPrefix(StringRef CheckPrefix) { 1212 Regex Validator("^[a-zA-Z0-9_-]*$"); 1213 return Validator.match(CheckPrefix); 1214 } 1215 1216 static bool ValidateCheckPrefixes() { 1217 StringSet<> PrefixSet; 1218 1219 for (StringRef Prefix : CheckPrefixes) { 1220 // Reject empty prefixes. 1221 if (Prefix == "") 1222 return false; 1223 1224 if (!PrefixSet.insert(Prefix).second) 1225 return false; 1226 1227 if (!ValidateCheckPrefix(Prefix)) 1228 return false; 1229 } 1230 1231 return true; 1232 } 1233 1234 // Combines the check prefixes into a single regex so that we can efficiently 1235 // scan for any of the set. 1236 // 1237 // The semantics are that the longest-match wins which matches our regex 1238 // library. 1239 static Regex buildCheckPrefixRegex() { 1240 // I don't think there's a way to specify an initial value for cl::list, 1241 // so if nothing was specified, add the default 1242 if (CheckPrefixes.empty()) 1243 CheckPrefixes.push_back("CHECK"); 1244 1245 // We already validated the contents of CheckPrefixes so just concatenate 1246 // them as alternatives. 1247 SmallString<32> PrefixRegexStr; 1248 for (StringRef Prefix : CheckPrefixes) { 1249 if (Prefix != CheckPrefixes.front()) 1250 PrefixRegexStr.push_back('|'); 1251 1252 PrefixRegexStr.append(Prefix); 1253 } 1254 1255 return Regex(PrefixRegexStr); 1256 } 1257 1258 static void DumpCommandLine(int argc, char **argv) { 1259 errs() << "FileCheck command line: "; 1260 for (int I = 0; I < argc; I++) 1261 errs() << " " << argv[I]; 1262 errs() << "\n"; 1263 } 1264 1265 /// Check the input to FileCheck provided in the \p Buffer against the \p 1266 /// CheckStrings read from the check file. 1267 /// 1268 /// Returns false if the input fails to satisfy the checks. 1269 bool CheckInput(SourceMgr &SM, StringRef Buffer, 1270 ArrayRef<CheckString> CheckStrings) { 1271 bool ChecksFailed = false; 1272 1273 /// VariableTable - This holds all the current filecheck variables. 1274 StringMap<StringRef> VariableTable; 1275 1276 unsigned i = 0, j = 0, e = CheckStrings.size(); 1277 while (true) { 1278 StringRef CheckRegion; 1279 if (j == e) { 1280 CheckRegion = Buffer; 1281 } else { 1282 const CheckString &CheckLabelStr = CheckStrings[j]; 1283 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 1284 ++j; 1285 continue; 1286 } 1287 1288 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 1289 size_t MatchLabelLen = 0; 1290 size_t MatchLabelPos = 1291 CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable); 1292 if (MatchLabelPos == StringRef::npos) 1293 // Immediately bail of CHECK-LABEL fails, nothing else we can do. 1294 return false; 1295 1296 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 1297 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 1298 ++j; 1299 } 1300 1301 for (; i != j; ++i) { 1302 const CheckString &CheckStr = CheckStrings[i]; 1303 1304 // Check each string within the scanned region, including a second check 1305 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 1306 size_t MatchLen = 0; 1307 size_t MatchPos = 1308 CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable); 1309 1310 if (MatchPos == StringRef::npos) { 1311 ChecksFailed = true; 1312 i = j; 1313 break; 1314 } 1315 1316 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 1317 } 1318 1319 if (j == e) 1320 break; 1321 } 1322 1323 // Success if no checks failed. 1324 return !ChecksFailed; 1325 } 1326 1327 int main(int argc, char **argv) { 1328 sys::PrintStackTraceOnErrorSignal(argv[0]); 1329 PrettyStackTraceProgram X(argc, argv); 1330 cl::ParseCommandLineOptions(argc, argv); 1331 1332 if (!ValidateCheckPrefixes()) { 1333 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 1334 "start with a letter and contain only alphanumeric characters, " 1335 "hyphens and underscores\n"; 1336 return 2; 1337 } 1338 1339 Regex PrefixRE = buildCheckPrefixRegex(); 1340 std::string REError; 1341 if (!PrefixRE.isValid(REError)) { 1342 errs() << "Unable to combine check-prefix strings into a prefix regular " 1343 "expression! This is likely a bug in FileCheck's verification of " 1344 "the check-prefix strings. Regular expression parsing failed " 1345 "with the following error: " 1346 << REError << "\n"; 1347 return 2; 1348 } 1349 1350 SourceMgr SM; 1351 1352 // Read the expected strings from the check file. 1353 ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr = 1354 MemoryBuffer::getFileOrSTDIN(CheckFilename); 1355 if (std::error_code EC = CheckFileOrErr.getError()) { 1356 errs() << "Could not open check file '" << CheckFilename 1357 << "': " << EC.message() << '\n'; 1358 return 2; 1359 } 1360 MemoryBuffer &CheckFile = *CheckFileOrErr.get(); 1361 1362 SmallString<4096> CheckFileBuffer; 1363 StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer); 1364 1365 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1366 CheckFileText, CheckFile.getBufferIdentifier()), 1367 SMLoc()); 1368 1369 std::vector<CheckString> CheckStrings; 1370 if (ReadCheckFile(SM, CheckFileText, PrefixRE, CheckStrings)) 1371 return 2; 1372 1373 // Open the file to check and add it to SourceMgr. 1374 ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr = 1375 MemoryBuffer::getFileOrSTDIN(InputFilename); 1376 if (std::error_code EC = InputFileOrErr.getError()) { 1377 errs() << "Could not open input file '" << InputFilename 1378 << "': " << EC.message() << '\n'; 1379 return 2; 1380 } 1381 MemoryBuffer &InputFile = *InputFileOrErr.get(); 1382 1383 if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) { 1384 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 1385 DumpCommandLine(argc, argv); 1386 return 2; 1387 } 1388 1389 SmallString<4096> InputFileBuffer; 1390 StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer); 1391 1392 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1393 InputFileText, InputFile.getBufferIdentifier()), 1394 SMLoc()); 1395 1396 return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1; 1397 } 1398