1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // FileCheck does a line-by line check of a file that validates whether it 11 // contains the expected content. This is useful for regression tests etc. 12 // 13 // This program exits with an error status of 2 on error, exit status of 0 if 14 // the file matched the expected contents, and exit status of 1 if it did not 15 // contain the expected contents. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringExtras.h" 21 #include "llvm/ADT/StringMap.h" 22 #include "llvm/ADT/StringSet.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/PrettyStackTrace.h" 26 #include "llvm/Support/Regex.h" 27 #include "llvm/Support/Signals.h" 28 #include "llvm/Support/SourceMgr.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <algorithm> 31 #include <cctype> 32 #include <map> 33 #include <string> 34 #include <system_error> 35 #include <vector> 36 using namespace llvm; 37 38 static cl::opt<std::string> 39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 40 41 static cl::opt<std::string> 42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 43 cl::init("-"), cl::value_desc("filename")); 44 45 static cl::list<std::string> CheckPrefixes( 46 "check-prefix", 47 cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 48 static cl::alias CheckPrefixesAlias( 49 "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated, 50 cl::NotHidden, 51 cl::desc( 52 "Alias for -check-prefix permitting multiple comma separated values")); 53 54 static cl::opt<bool> NoCanonicalizeWhiteSpace( 55 "strict-whitespace", 56 cl::desc("Do not treat all horizontal whitespace as equivalent")); 57 58 static cl::list<std::string> ImplicitCheckNot( 59 "implicit-check-not", 60 cl::desc("Add an implicit negative check with this pattern to every\n" 61 "positive check. This can be used to ensure that no instances of\n" 62 "this pattern occur which are not matched by a positive pattern"), 63 cl::value_desc("pattern")); 64 65 static cl::opt<bool> AllowEmptyInput( 66 "allow-empty", cl::init(false), 67 cl::desc("Allow the input file to be empty. This is useful when making\n" 68 "checks that some error message does not occur, for example.")); 69 70 static cl::opt<bool> MatchFullLines( 71 "match-full-lines", cl::init(false), 72 cl::desc("Require all positive matches to cover an entire input line.\n" 73 "Allows leading and trailing whitespace if --strict-whitespace\n" 74 "is not also passed.")); 75 76 typedef cl::list<std::string>::const_iterator prefix_iterator; 77 78 //===----------------------------------------------------------------------===// 79 // Pattern Handling Code. 80 //===----------------------------------------------------------------------===// 81 82 namespace Check { 83 enum CheckType { 84 CheckNone = 0, 85 CheckPlain, 86 CheckNext, 87 CheckSame, 88 CheckNot, 89 CheckDAG, 90 CheckLabel, 91 92 /// Indicates the pattern only matches the end of file. This is used for 93 /// trailing CHECK-NOTs. 94 CheckEOF, 95 96 /// Marks when parsing found a -NOT check combined with another CHECK suffix. 97 CheckBadNot 98 }; 99 } 100 101 class Pattern { 102 SMLoc PatternLoc; 103 104 /// A fixed string to match as the pattern or empty if this pattern requires 105 /// a regex match. 106 StringRef FixedStr; 107 108 /// A regex string to match as the pattern or empty if this pattern requires 109 /// a fixed string to match. 110 std::string RegExStr; 111 112 /// Entries in this vector map to uses of a variable in the pattern, e.g. 113 /// "foo[[bar]]baz". In this case, the RegExStr will contain "foobaz" and 114 /// we'll get an entry in this vector that tells us to insert the value of 115 /// bar at offset 3. 116 std::vector<std::pair<StringRef, unsigned>> VariableUses; 117 118 /// Maps definitions of variables to their parenthesized capture numbers. 119 /// 120 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 121 /// 1. 122 std::map<StringRef, unsigned> VariableDefs; 123 124 Check::CheckType CheckTy; 125 126 /// Contains the number of line this pattern is in. 127 unsigned LineNumber; 128 129 public: 130 explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {} 131 132 /// Returns the location in source code. 133 SMLoc getLoc() const { return PatternLoc; } 134 135 bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, 136 unsigned LineNumber); 137 size_t Match(StringRef Buffer, size_t &MatchLen, 138 StringMap<StringRef> &VariableTable) const; 139 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 140 const StringMap<StringRef> &VariableTable) const; 141 142 bool hasVariable() const { 143 return !(VariableUses.empty() && VariableDefs.empty()); 144 } 145 146 Check::CheckType getCheckTy() const { return CheckTy; } 147 148 private: 149 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 150 void AddBackrefToRegEx(unsigned BackrefNum); 151 unsigned 152 ComputeMatchDistance(StringRef Buffer, 153 const StringMap<StringRef> &VariableTable) const; 154 bool EvaluateExpression(StringRef Expr, std::string &Value) const; 155 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 156 }; 157 158 /// Parses the given string into the Pattern. 159 /// 160 /// \p Prefix provides which prefix is being matched, \p SM provides the 161 /// SourceMgr used for error reports, and \p LineNumber is the line number in 162 /// the input file from which the pattern string was read. Returns true in 163 /// case of an error, false otherwise. 164 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix, 165 SourceMgr &SM, unsigned LineNumber) { 166 bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot; 167 168 this->LineNumber = LineNumber; 169 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 170 171 // Ignore trailing whitespace. 172 while (!PatternStr.empty() && 173 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 174 PatternStr = PatternStr.substr(0, PatternStr.size() - 1); 175 176 // Check that there is something on the line. 177 if (PatternStr.empty()) { 178 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 179 "found empty check string with prefix '" + Prefix + ":'"); 180 return true; 181 } 182 183 // Check to see if this is a fixed string, or if it has regex pieces. 184 if (!MatchFullLinesHere && 185 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && 186 PatternStr.find("[[") == StringRef::npos))) { 187 FixedStr = PatternStr; 188 return false; 189 } 190 191 if (MatchFullLinesHere) { 192 RegExStr += '^'; 193 if (!NoCanonicalizeWhiteSpace) 194 RegExStr += " *"; 195 } 196 197 // Paren value #0 is for the fully matched string. Any new parenthesized 198 // values add from there. 199 unsigned CurParen = 1; 200 201 // Otherwise, there is at least one regex piece. Build up the regex pattern 202 // by escaping scary characters in fixed strings, building up one big regex. 203 while (!PatternStr.empty()) { 204 // RegEx matches. 205 if (PatternStr.startswith("{{")) { 206 // This is the start of a regex match. Scan for the }}. 207 size_t End = PatternStr.find("}}"); 208 if (End == StringRef::npos) { 209 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 210 SourceMgr::DK_Error, 211 "found start of regex string with no end '}}'"); 212 return true; 213 } 214 215 // Enclose {{}} patterns in parens just like [[]] even though we're not 216 // capturing the result for any purpose. This is required in case the 217 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 218 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 219 RegExStr += '('; 220 ++CurParen; 221 222 if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM)) 223 return true; 224 RegExStr += ')'; 225 226 PatternStr = PatternStr.substr(End + 2); 227 continue; 228 } 229 230 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 231 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 232 // second form is [[foo]] which is a reference to foo. The variable name 233 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 234 // it. This is to catch some common errors. 235 if (PatternStr.startswith("[[")) { 236 // Find the closing bracket pair ending the match. End is going to be an 237 // offset relative to the beginning of the match string. 238 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 239 240 if (End == StringRef::npos) { 241 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 242 SourceMgr::DK_Error, 243 "invalid named regex reference, no ]] found"); 244 return true; 245 } 246 247 StringRef MatchStr = PatternStr.substr(2, End); 248 PatternStr = PatternStr.substr(End + 4); 249 250 // Get the regex name (e.g. "foo"). 251 size_t NameEnd = MatchStr.find(':'); 252 StringRef Name = MatchStr.substr(0, NameEnd); 253 254 if (Name.empty()) { 255 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 256 "invalid name in named regex: empty name"); 257 return true; 258 } 259 260 // Verify that the name/expression is well formed. FileCheck currently 261 // supports @LINE, @LINE+number, @LINE-number expressions. The check here 262 // is relaxed, more strict check is performed in \c EvaluateExpression. 263 bool IsExpression = false; 264 for (unsigned i = 0, e = Name.size(); i != e; ++i) { 265 if (i == 0 && Name[i] == '@') { 266 if (NameEnd != StringRef::npos) { 267 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 268 SourceMgr::DK_Error, 269 "invalid name in named regex definition"); 270 return true; 271 } 272 IsExpression = true; 273 continue; 274 } 275 if (Name[i] != '_' && !isalnum(Name[i]) && 276 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 277 SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i), 278 SourceMgr::DK_Error, "invalid name in named regex"); 279 return true; 280 } 281 } 282 283 // Name can't start with a digit. 284 if (isdigit(static_cast<unsigned char>(Name[0]))) { 285 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 286 "invalid name in named regex"); 287 return true; 288 } 289 290 // Handle [[foo]]. 291 if (NameEnd == StringRef::npos) { 292 // Handle variables that were defined earlier on the same line by 293 // emitting a backreference. 294 if (VariableDefs.find(Name) != VariableDefs.end()) { 295 unsigned VarParenNum = VariableDefs[Name]; 296 if (VarParenNum < 1 || VarParenNum > 9) { 297 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 298 SourceMgr::DK_Error, 299 "Can't back-reference more than 9 variables"); 300 return true; 301 } 302 AddBackrefToRegEx(VarParenNum); 303 } else { 304 VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 305 } 306 continue; 307 } 308 309 // Handle [[foo:.*]]. 310 VariableDefs[Name] = CurParen; 311 RegExStr += '('; 312 ++CurParen; 313 314 if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM)) 315 return true; 316 317 RegExStr += ')'; 318 } 319 320 // Handle fixed string matches. 321 // Find the end, which is the start of the next regex. 322 size_t FixedMatchEnd = PatternStr.find("{{"); 323 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 324 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 325 PatternStr = PatternStr.substr(FixedMatchEnd); 326 } 327 328 if (MatchFullLinesHere) { 329 if (!NoCanonicalizeWhiteSpace) 330 RegExStr += " *"; 331 RegExStr += '$'; 332 } 333 334 return false; 335 } 336 337 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { 338 Regex R(RS); 339 std::string Error; 340 if (!R.isValid(Error)) { 341 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 342 "invalid regex: " + Error); 343 return true; 344 } 345 346 RegExStr += RS.str(); 347 CurParen += R.getNumMatches(); 348 return false; 349 } 350 351 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 352 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 353 std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum); 354 RegExStr += Backref; 355 } 356 357 /// Evaluates expression and stores the result to \p Value. 358 /// 359 /// Returns true on success and false when the expression has invalid syntax. 360 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 361 // The only supported expression is @LINE([\+-]\d+)? 362 if (!Expr.startswith("@LINE")) 363 return false; 364 Expr = Expr.substr(StringRef("@LINE").size()); 365 int Offset = 0; 366 if (!Expr.empty()) { 367 if (Expr[0] == '+') 368 Expr = Expr.substr(1); 369 else if (Expr[0] != '-') 370 return false; 371 if (Expr.getAsInteger(10, Offset)) 372 return false; 373 } 374 Value = llvm::itostr(LineNumber + Offset); 375 return true; 376 } 377 378 /// Matches the pattern string against the input buffer \p Buffer 379 /// 380 /// This returns the position that is matched or npos if there is no match. If 381 /// there is a match, the size of the matched string is returned in \p 382 /// MatchLen. 383 /// 384 /// The \p VariableTable StringMap provides the current values of filecheck 385 /// variables and is updated if this match defines new values. 386 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 387 StringMap<StringRef> &VariableTable) const { 388 // If this is the EOF pattern, match it immediately. 389 if (CheckTy == Check::CheckEOF) { 390 MatchLen = 0; 391 return Buffer.size(); 392 } 393 394 // If this is a fixed string pattern, just match it now. 395 if (!FixedStr.empty()) { 396 MatchLen = FixedStr.size(); 397 return Buffer.find(FixedStr); 398 } 399 400 // Regex match. 401 402 // If there are variable uses, we need to create a temporary string with the 403 // actual value. 404 StringRef RegExToMatch = RegExStr; 405 std::string TmpStr; 406 if (!VariableUses.empty()) { 407 TmpStr = RegExStr; 408 409 unsigned InsertOffset = 0; 410 for (const auto &VariableUse : VariableUses) { 411 std::string Value; 412 413 if (VariableUse.first[0] == '@') { 414 if (!EvaluateExpression(VariableUse.first, Value)) 415 return StringRef::npos; 416 } else { 417 StringMap<StringRef>::iterator it = 418 VariableTable.find(VariableUse.first); 419 // If the variable is undefined, return an error. 420 if (it == VariableTable.end()) 421 return StringRef::npos; 422 423 // Look up the value and escape it so that we can put it into the regex. 424 Value += Regex::escape(it->second); 425 } 426 427 // Plop it into the regex at the adjusted offset. 428 TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset, 429 Value.begin(), Value.end()); 430 InsertOffset += Value.size(); 431 } 432 433 // Match the newly constructed regex. 434 RegExToMatch = TmpStr; 435 } 436 437 SmallVector<StringRef, 4> MatchInfo; 438 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 439 return StringRef::npos; 440 441 // Successful regex match. 442 assert(!MatchInfo.empty() && "Didn't get any match"); 443 StringRef FullMatch = MatchInfo[0]; 444 445 // If this defines any variables, remember their values. 446 for (const auto &VariableDef : VariableDefs) { 447 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 448 VariableTable[VariableDef.first] = MatchInfo[VariableDef.second]; 449 } 450 451 MatchLen = FullMatch.size(); 452 return FullMatch.data() - Buffer.data(); 453 } 454 455 456 /// Computes an arbitrary estimate for the quality of matching this pattern at 457 /// the start of \p Buffer; a distance of zero should correspond to a perfect 458 /// match. 459 unsigned 460 Pattern::ComputeMatchDistance(StringRef Buffer, 461 const StringMap<StringRef> &VariableTable) const { 462 // Just compute the number of matching characters. For regular expressions, we 463 // just compare against the regex itself and hope for the best. 464 // 465 // FIXME: One easy improvement here is have the regex lib generate a single 466 // example regular expression which matches, and use that as the example 467 // string. 468 StringRef ExampleString(FixedStr); 469 if (ExampleString.empty()) 470 ExampleString = RegExStr; 471 472 // Only compare up to the first line in the buffer, or the string size. 473 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 474 BufferPrefix = BufferPrefix.split('\n').first; 475 return BufferPrefix.edit_distance(ExampleString); 476 } 477 478 /// Prints additional information about a failure to match involving this 479 /// pattern. 480 void Pattern::PrintFailureInfo( 481 const SourceMgr &SM, StringRef Buffer, 482 const StringMap<StringRef> &VariableTable) const { 483 // If this was a regular expression using variables, print the current 484 // variable values. 485 if (!VariableUses.empty()) { 486 for (const auto &VariableUse : VariableUses) { 487 SmallString<256> Msg; 488 raw_svector_ostream OS(Msg); 489 StringRef Var = VariableUse.first; 490 if (Var[0] == '@') { 491 std::string Value; 492 if (EvaluateExpression(Var, Value)) { 493 OS << "with expression \""; 494 OS.write_escaped(Var) << "\" equal to \""; 495 OS.write_escaped(Value) << "\""; 496 } else { 497 OS << "uses incorrect expression \""; 498 OS.write_escaped(Var) << "\""; 499 } 500 } else { 501 StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 502 503 // Check for undefined variable references. 504 if (it == VariableTable.end()) { 505 OS << "uses undefined variable \""; 506 OS.write_escaped(Var) << "\""; 507 } else { 508 OS << "with variable \""; 509 OS.write_escaped(Var) << "\" equal to \""; 510 OS.write_escaped(it->second) << "\""; 511 } 512 } 513 514 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 515 OS.str()); 516 } 517 } 518 519 // Attempt to find the closest/best fuzzy match. Usually an error happens 520 // because some string in the output didn't exactly match. In these cases, we 521 // would like to show the user a best guess at what "should have" matched, to 522 // save them having to actually check the input manually. 523 size_t NumLinesForward = 0; 524 size_t Best = StringRef::npos; 525 double BestQuality = 0; 526 527 // Use an arbitrary 4k limit on how far we will search. 528 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 529 if (Buffer[i] == '\n') 530 ++NumLinesForward; 531 532 // Patterns have leading whitespace stripped, so skip whitespace when 533 // looking for something which looks like a pattern. 534 if (Buffer[i] == ' ' || Buffer[i] == '\t') 535 continue; 536 537 // Compute the "quality" of this match as an arbitrary combination of the 538 // match distance and the number of lines skipped to get to this match. 539 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 540 double Quality = Distance + (NumLinesForward / 100.); 541 542 if (Quality < BestQuality || Best == StringRef::npos) { 543 Best = i; 544 BestQuality = Quality; 545 } 546 } 547 548 // Print the "possible intended match here" line if we found something 549 // reasonable and not equal to what we showed in the "scanning from here" 550 // line. 551 if (Best && Best != StringRef::npos && BestQuality < 50) { 552 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 553 SourceMgr::DK_Note, "possible intended match here"); 554 555 // FIXME: If we wanted to be really friendly we would show why the match 556 // failed, as it can be hard to spot simple one character differences. 557 } 558 } 559 560 /// Finds the closing sequence of a regex variable usage or definition. 561 /// 562 /// \p Str has to point in the beginning of the definition (right after the 563 /// opening sequence). Returns the offset of the closing sequence within Str, 564 /// or npos if it was not found. 565 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 566 // Offset keeps track of the current offset within the input Str 567 size_t Offset = 0; 568 // [...] Nesting depth 569 size_t BracketDepth = 0; 570 571 while (!Str.empty()) { 572 if (Str.startswith("]]") && BracketDepth == 0) 573 return Offset; 574 if (Str[0] == '\\') { 575 // Backslash escapes the next char within regexes, so skip them both. 576 Str = Str.substr(2); 577 Offset += 2; 578 } else { 579 switch (Str[0]) { 580 default: 581 break; 582 case '[': 583 BracketDepth++; 584 break; 585 case ']': 586 if (BracketDepth == 0) { 587 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 588 SourceMgr::DK_Error, 589 "missing closing \"]\" for regex variable"); 590 exit(1); 591 } 592 BracketDepth--; 593 break; 594 } 595 Str = Str.substr(1); 596 Offset++; 597 } 598 } 599 600 return StringRef::npos; 601 } 602 603 //===----------------------------------------------------------------------===// 604 // Check Strings. 605 //===----------------------------------------------------------------------===// 606 607 /// A check that we found in the input file. 608 struct CheckString { 609 /// The pattern to match. 610 Pattern Pat; 611 612 /// Which prefix name this check matched. 613 StringRef Prefix; 614 615 /// The location in the match file that the check string was specified. 616 SMLoc Loc; 617 618 /// All of the strings that are disallowed from occurring between this match 619 /// string and the previous one (or start of file). 620 std::vector<Pattern> DagNotStrings; 621 622 CheckString(const Pattern &P, StringRef S, SMLoc L) 623 : Pat(P), Prefix(S), Loc(L) {} 624 625 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 626 size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 627 628 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 629 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; 630 bool CheckNot(const SourceMgr &SM, StringRef Buffer, 631 const std::vector<const Pattern *> &NotStrings, 632 StringMap<StringRef> &VariableTable) const; 633 size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 634 std::vector<const Pattern *> &NotStrings, 635 StringMap<StringRef> &VariableTable) const; 636 }; 637 638 /// Canonicalize whitespaces in the file. Line endings are replaced with 639 /// UNIX-style '\n'. 640 static StringRef CanonicalizeFile(MemoryBuffer &MB, 641 SmallVectorImpl<char> &OutputBuffer) { 642 OutputBuffer.reserve(MB.getBufferSize()); 643 644 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); 645 Ptr != End; ++Ptr) { 646 // Eliminate trailing dosish \r. 647 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 648 continue; 649 } 650 651 // If current char is not a horizontal whitespace or if horizontal 652 // whitespace canonicalization is disabled, dump it to output as is. 653 if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) { 654 OutputBuffer.push_back(*Ptr); 655 continue; 656 } 657 658 // Otherwise, add one space and advance over neighboring space. 659 OutputBuffer.push_back(' '); 660 while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) 661 ++Ptr; 662 } 663 664 // Add a null byte and then return all but that byte. 665 OutputBuffer.push_back('\0'); 666 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); 667 } 668 669 static bool IsPartOfWord(char c) { 670 return (isalnum(c) || c == '-' || c == '_'); 671 } 672 673 // Get the size of the prefix extension. 674 static size_t CheckTypeSize(Check::CheckType Ty) { 675 switch (Ty) { 676 case Check::CheckNone: 677 case Check::CheckBadNot: 678 return 0; 679 680 case Check::CheckPlain: 681 return sizeof(":") - 1; 682 683 case Check::CheckNext: 684 return sizeof("-NEXT:") - 1; 685 686 case Check::CheckSame: 687 return sizeof("-SAME:") - 1; 688 689 case Check::CheckNot: 690 return sizeof("-NOT:") - 1; 691 692 case Check::CheckDAG: 693 return sizeof("-DAG:") - 1; 694 695 case Check::CheckLabel: 696 return sizeof("-LABEL:") - 1; 697 698 case Check::CheckEOF: 699 llvm_unreachable("Should not be using EOF size"); 700 } 701 702 llvm_unreachable("Bad check type"); 703 } 704 705 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 706 char NextChar = Buffer[Prefix.size()]; 707 708 // Verify that the : is present after the prefix. 709 if (NextChar == ':') 710 return Check::CheckPlain; 711 712 if (NextChar != '-') 713 return Check::CheckNone; 714 715 StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 716 if (Rest.startswith("NEXT:")) 717 return Check::CheckNext; 718 719 if (Rest.startswith("SAME:")) 720 return Check::CheckSame; 721 722 if (Rest.startswith("NOT:")) 723 return Check::CheckNot; 724 725 if (Rest.startswith("DAG:")) 726 return Check::CheckDAG; 727 728 if (Rest.startswith("LABEL:")) 729 return Check::CheckLabel; 730 731 // You can't combine -NOT with another suffix. 732 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || 733 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || 734 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:")) 735 return Check::CheckBadNot; 736 737 return Check::CheckNone; 738 } 739 740 // From the given position, find the next character after the word. 741 static size_t SkipWord(StringRef Str, size_t Loc) { 742 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 743 ++Loc; 744 return Loc; 745 } 746 747 /// Search the buffer for the first prefix in the prefix regular expression. 748 /// 749 /// This searches the buffer using the provided regular expression, however it 750 /// enforces constraints beyond that: 751 /// 1) The found prefix must not be a suffix of something that looks like 752 /// a valid prefix. 753 /// 2) The found prefix must be followed by a valid check type suffix using \c 754 /// FindCheckType above. 755 /// 756 /// The first match of the regular expression to satisfy these two is returned, 757 /// otherwise an empty StringRef is returned to indicate failure. 758 /// 759 /// If this routine returns a valid prefix, it will also shrink \p Buffer to 760 /// start at the beginning of the returned prefix, increment \p LineNumber for 761 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of 762 /// check found by examining the suffix. 763 /// 764 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy 765 /// is unspecified. 766 static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer, 767 unsigned &LineNumber, 768 Check::CheckType &CheckTy) { 769 SmallVector<StringRef, 2> Matches; 770 771 while (!Buffer.empty()) { 772 // Find the first (longest) match using the RE. 773 if (!PrefixRE.match(Buffer, &Matches)) 774 // No match at all, bail. 775 return StringRef(); 776 777 StringRef Prefix = Matches[0]; 778 Matches.clear(); 779 780 assert(Prefix.data() >= Buffer.data() && 781 Prefix.data() < Buffer.data() + Buffer.size() && 782 "Prefix doesn't start inside of buffer!"); 783 size_t Loc = Prefix.data() - Buffer.data(); 784 StringRef Skipped = Buffer.substr(0, Loc); 785 Buffer = Buffer.drop_front(Loc); 786 LineNumber += Skipped.count('\n'); 787 788 // Check that the matched prefix isn't a suffix of some other check-like 789 // word. 790 // FIXME: This is a very ad-hoc check. it would be better handled in some 791 // other way. Among other things it seems hard to distinguish between 792 // intentional and unintentional uses of this feature. 793 if (Skipped.empty() || !IsPartOfWord(Skipped.back())) { 794 // Now extract the type. 795 CheckTy = FindCheckType(Buffer, Prefix); 796 797 // If we've found a valid check type for this prefix, we're done. 798 if (CheckTy != Check::CheckNone) 799 return Prefix; 800 } 801 802 // If we didn't successfully find a prefix, we need to skip this invalid 803 // prefix and continue scanning. We directly skip the prefix that was 804 // matched and any additional parts of that check-like word. 805 Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size())); 806 } 807 808 // We ran out of buffer while skipping partial matches so give up. 809 return StringRef(); 810 } 811 812 /// Read the check file, which specifies the sequence of expected strings. 813 /// 814 /// The strings are added to the CheckStrings vector. Returns true in case of 815 /// an error, false otherwise. 816 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, 817 std::vector<CheckString> &CheckStrings) { 818 std::vector<Pattern> ImplicitNegativeChecks; 819 for (const auto &PatternString : ImplicitCheckNot) { 820 // Create a buffer with fake command line content in order to display the 821 // command line option responsible for the specific implicit CHECK-NOT. 822 std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str(); 823 std::string Suffix = "'"; 824 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 825 Prefix + PatternString + Suffix, "command line"); 826 827 StringRef PatternInBuffer = 828 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 829 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 830 831 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); 832 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, 833 "IMPLICIT-CHECK", SM, 0); 834 } 835 836 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 837 838 // LineNumber keeps track of the line on which CheckPrefix instances are 839 // found. 840 unsigned LineNumber = 1; 841 842 while (1) { 843 Check::CheckType CheckTy; 844 845 // See if a prefix occurs in the memory buffer. 846 StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber, 847 CheckTy); 848 if (UsedPrefix.empty()) 849 break; 850 assert(UsedPrefix.data() == Buffer.data() && 851 "Failed to move Buffer's start forward, or pointed prefix outside " 852 "of the buffer!"); 853 854 // Location to use for error messages. 855 const char *UsedPrefixStart = UsedPrefix.data(); 856 857 // Skip the buffer to the end. 858 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 859 860 // Complain about useful-looking but unsupported suffixes. 861 if (CheckTy == Check::CheckBadNot) { 862 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 863 "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 864 return true; 865 } 866 867 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 868 // leading whitespace. 869 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 870 871 // Scan ahead to the end of line. 872 size_t EOL = Buffer.find_first_of("\n\r"); 873 874 // Remember the location of the start of the pattern, for diagnostics. 875 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 876 877 // Parse the pattern. 878 Pattern P(CheckTy); 879 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 880 return true; 881 882 // Verify that CHECK-LABEL lines do not define or use variables 883 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 884 SM.PrintMessage( 885 SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error, 886 "found '" + UsedPrefix + "-LABEL:'" 887 " with variable definition or use"); 888 return true; 889 } 890 891 Buffer = Buffer.substr(EOL); 892 893 // Verify that CHECK-NEXT lines have at least one CHECK line before them. 894 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) && 895 CheckStrings.empty()) { 896 StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME"; 897 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 898 SourceMgr::DK_Error, 899 "found '" + UsedPrefix + "-" + Type + 900 "' without previous '" + UsedPrefix + ": line"); 901 return true; 902 } 903 904 // Handle CHECK-DAG/-NOT. 905 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 906 DagNotMatches.push_back(P); 907 continue; 908 } 909 910 // Okay, add the string we captured to the output vector and move on. 911 CheckStrings.emplace_back(P, UsedPrefix, PatternLoc); 912 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 913 DagNotMatches = ImplicitNegativeChecks; 914 } 915 916 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 917 // prefix as a filler for the error message. 918 if (!DagNotMatches.empty()) { 919 CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(), 920 SMLoc::getFromPointer(Buffer.data())); 921 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 922 } 923 924 if (CheckStrings.empty()) { 925 errs() << "error: no check strings found with prefix" 926 << (CheckPrefixes.size() > 1 ? "es " : " "); 927 prefix_iterator I = CheckPrefixes.begin(); 928 prefix_iterator E = CheckPrefixes.end(); 929 if (I != E) { 930 errs() << "\'" << *I << ":'"; 931 ++I; 932 } 933 for (; I != E; ++I) 934 errs() << ", \'" << *I << ":'"; 935 936 errs() << '\n'; 937 return true; 938 } 939 940 return false; 941 } 942 943 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat, 944 StringRef Buffer, 945 StringMap<StringRef> &VariableTable) { 946 // Otherwise, we have an error, emit an error message. 947 SM.PrintMessage(Loc, SourceMgr::DK_Error, 948 "expected string not found in input"); 949 950 // Print the "scanning from here" line. If the current position is at the 951 // end of a line, advance to the start of the next line. 952 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 953 954 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 955 "scanning from here"); 956 957 // Allow the pattern to print additional information if desired. 958 Pat.PrintFailureInfo(SM, Buffer, VariableTable); 959 } 960 961 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 962 StringRef Buffer, 963 StringMap<StringRef> &VariableTable) { 964 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable); 965 } 966 967 /// Count the number of newlines in the specified range. 968 static unsigned CountNumNewlinesBetween(StringRef Range, 969 const char *&FirstNewLine) { 970 unsigned NumNewLines = 0; 971 while (1) { 972 // Scan for newline. 973 Range = Range.substr(Range.find_first_of("\n\r")); 974 if (Range.empty()) 975 return NumNewLines; 976 977 ++NumNewLines; 978 979 // Handle \n\r and \r\n as a single newline. 980 if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && 981 (Range[0] != Range[1])) 982 Range = Range.substr(1); 983 Range = Range.substr(1); 984 985 if (NumNewLines == 1) 986 FirstNewLine = Range.begin(); 987 } 988 } 989 990 /// Match check string and its "not strings" and/or "dag strings". 991 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 992 bool IsLabelScanMode, size_t &MatchLen, 993 StringMap<StringRef> &VariableTable) const { 994 size_t LastPos = 0; 995 std::vector<const Pattern *> NotStrings; 996 997 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 998 // bounds; we have not processed variable definitions within the bounded block 999 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 1000 // over the block again (including the last CHECK-LABEL) in normal mode. 1001 if (!IsLabelScanMode) { 1002 // Match "dag strings" (with mixed "not strings" if any). 1003 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 1004 if (LastPos == StringRef::npos) 1005 return StringRef::npos; 1006 } 1007 1008 // Match itself from the last position after matching CHECK-DAG. 1009 StringRef MatchBuffer = Buffer.substr(LastPos); 1010 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1011 if (MatchPos == StringRef::npos) { 1012 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable); 1013 return StringRef::npos; 1014 } 1015 1016 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1017 // or CHECK-NOT 1018 if (!IsLabelScanMode) { 1019 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1020 1021 // If this check is a "CHECK-NEXT", verify that the previous match was on 1022 // the previous line (i.e. that there is one newline between them). 1023 if (CheckNext(SM, SkippedRegion)) 1024 return StringRef::npos; 1025 1026 // If this check is a "CHECK-SAME", verify that the previous match was on 1027 // the same line (i.e. that there is no newline between them). 1028 if (CheckSame(SM, SkippedRegion)) 1029 return StringRef::npos; 1030 1031 // If this match had "not strings", verify that they don't exist in the 1032 // skipped region. 1033 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1034 return StringRef::npos; 1035 } 1036 1037 return LastPos + MatchPos; 1038 } 1039 1040 /// Verify there is a single line in the given buffer. 1041 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 1042 if (Pat.getCheckTy() != Check::CheckNext) 1043 return false; 1044 1045 // Count the number of newlines between the previous match and this one. 1046 assert(Buffer.data() != 1047 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1048 SMLoc::getFromPointer(Buffer.data()))) 1049 ->getBufferStart() && 1050 "CHECK-NEXT can't be the first check in a file"); 1051 1052 const char *FirstNewLine = nullptr; 1053 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1054 1055 if (NumNewLines == 0) { 1056 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1057 Prefix + "-NEXT: is on the same line as previous match"); 1058 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1059 "'next' match was here"); 1060 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1061 "previous match ended here"); 1062 return true; 1063 } 1064 1065 if (NumNewLines != 1) { 1066 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1067 Prefix + 1068 "-NEXT: is not on the line after the previous match"); 1069 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1070 "'next' match was here"); 1071 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1072 "previous match ended here"); 1073 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1074 "non-matching line after previous match is here"); 1075 return true; 1076 } 1077 1078 return false; 1079 } 1080 1081 /// Verify there is no newline in the given buffer. 1082 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 1083 if (Pat.getCheckTy() != Check::CheckSame) 1084 return false; 1085 1086 // Count the number of newlines between the previous match and this one. 1087 assert(Buffer.data() != 1088 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1089 SMLoc::getFromPointer(Buffer.data()))) 1090 ->getBufferStart() && 1091 "CHECK-SAME can't be the first check in a file"); 1092 1093 const char *FirstNewLine = nullptr; 1094 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1095 1096 if (NumNewLines != 0) { 1097 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1098 Prefix + 1099 "-SAME: is not on the same line as the previous match"); 1100 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1101 "'next' match was here"); 1102 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1103 "previous match ended here"); 1104 return true; 1105 } 1106 1107 return false; 1108 } 1109 1110 /// Verify there's no "not strings" in the given buffer. 1111 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 1112 const std::vector<const Pattern *> &NotStrings, 1113 StringMap<StringRef> &VariableTable) const { 1114 for (const Pattern *Pat : NotStrings) { 1115 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 1116 1117 size_t MatchLen = 0; 1118 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1119 1120 if (Pos == StringRef::npos) 1121 continue; 1122 1123 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos), 1124 SourceMgr::DK_Error, Prefix + "-NOT: string occurred!"); 1125 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note, 1126 Prefix + "-NOT: pattern specified here"); 1127 return true; 1128 } 1129 1130 return false; 1131 } 1132 1133 /// Match "dag strings" and their mixed "not strings". 1134 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 1135 std::vector<const Pattern *> &NotStrings, 1136 StringMap<StringRef> &VariableTable) const { 1137 if (DagNotStrings.empty()) 1138 return 0; 1139 1140 size_t LastPos = 0; 1141 size_t StartPos = LastPos; 1142 1143 for (const Pattern &Pat : DagNotStrings) { 1144 assert((Pat.getCheckTy() == Check::CheckDAG || 1145 Pat.getCheckTy() == Check::CheckNot) && 1146 "Invalid CHECK-DAG or CHECK-NOT!"); 1147 1148 if (Pat.getCheckTy() == Check::CheckNot) { 1149 NotStrings.push_back(&Pat); 1150 continue; 1151 } 1152 1153 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 1154 1155 size_t MatchLen = 0, MatchPos; 1156 1157 // CHECK-DAG always matches from the start. 1158 StringRef MatchBuffer = Buffer.substr(StartPos); 1159 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1160 // With a group of CHECK-DAGs, a single mismatching means the match on 1161 // that group of CHECK-DAGs fails immediately. 1162 if (MatchPos == StringRef::npos) { 1163 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable); 1164 return StringRef::npos; 1165 } 1166 // Re-calc it as the offset relative to the start of the original string. 1167 MatchPos += StartPos; 1168 1169 if (!NotStrings.empty()) { 1170 if (MatchPos < LastPos) { 1171 // Reordered? 1172 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos), 1173 SourceMgr::DK_Error, 1174 Prefix + "-DAG: found a match of CHECK-DAG" 1175 " reordering across a CHECK-NOT"); 1176 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos), 1177 SourceMgr::DK_Note, 1178 Prefix + "-DAG: the farthest match of CHECK-DAG" 1179 " is found here"); 1180 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note, 1181 Prefix + "-NOT: the crossed pattern specified" 1182 " here"); 1183 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note, 1184 Prefix + "-DAG: the reordered pattern specified" 1185 " here"); 1186 return StringRef::npos; 1187 } 1188 // All subsequent CHECK-DAGs should be matched from the farthest 1189 // position of all precedent CHECK-DAGs (including this one.) 1190 StartPos = LastPos; 1191 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to 1192 // CHECK-DAG, verify that there's no 'not' strings occurred in that 1193 // region. 1194 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1195 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1196 return StringRef::npos; 1197 // Clear "not strings". 1198 NotStrings.clear(); 1199 } 1200 1201 // Update the last position with CHECK-DAG matches. 1202 LastPos = std::max(MatchPos + MatchLen, LastPos); 1203 } 1204 1205 return LastPos; 1206 } 1207 1208 // A check prefix must contain only alphanumeric, hyphens and underscores. 1209 static bool ValidateCheckPrefix(StringRef CheckPrefix) { 1210 Regex Validator("^[a-zA-Z0-9_-]*$"); 1211 return Validator.match(CheckPrefix); 1212 } 1213 1214 static bool ValidateCheckPrefixes() { 1215 StringSet<> PrefixSet; 1216 1217 for (StringRef Prefix : CheckPrefixes) { 1218 // Reject empty prefixes. 1219 if (Prefix == "") 1220 return false; 1221 1222 if (!PrefixSet.insert(Prefix).second) 1223 return false; 1224 1225 if (!ValidateCheckPrefix(Prefix)) 1226 return false; 1227 } 1228 1229 return true; 1230 } 1231 1232 // Combines the check prefixes into a single regex so that we can efficiently 1233 // scan for any of the set. 1234 // 1235 // The semantics are that the longest-match wins which matches our regex 1236 // library. 1237 static Regex buildCheckPrefixRegex() { 1238 // I don't think there's a way to specify an initial value for cl::list, 1239 // so if nothing was specified, add the default 1240 if (CheckPrefixes.empty()) 1241 CheckPrefixes.push_back("CHECK"); 1242 1243 // We already validated the contents of CheckPrefixes so just concatenate 1244 // them as alternatives. 1245 SmallString<32> PrefixRegexStr; 1246 for (StringRef Prefix : CheckPrefixes) { 1247 if (Prefix != CheckPrefixes.front()) 1248 PrefixRegexStr.push_back('|'); 1249 1250 PrefixRegexStr.append(Prefix); 1251 } 1252 1253 return Regex(PrefixRegexStr); 1254 } 1255 1256 static void DumpCommandLine(int argc, char **argv) { 1257 errs() << "FileCheck command line: "; 1258 for (int I = 0; I < argc; I++) 1259 errs() << " " << argv[I]; 1260 errs() << "\n"; 1261 } 1262 1263 /// Check the input to FileCheck provided in the \p Buffer against the \p 1264 /// CheckStrings read from the check file. 1265 /// 1266 /// Returns false if the input fails to satisfy the checks. 1267 bool CheckInput(SourceMgr &SM, StringRef Buffer, 1268 ArrayRef<CheckString> CheckStrings) { 1269 bool ChecksFailed = false; 1270 1271 /// VariableTable - This holds all the current filecheck variables. 1272 StringMap<StringRef> VariableTable; 1273 1274 unsigned i = 0, j = 0, e = CheckStrings.size(); 1275 while (true) { 1276 StringRef CheckRegion; 1277 if (j == e) { 1278 CheckRegion = Buffer; 1279 } else { 1280 const CheckString &CheckLabelStr = CheckStrings[j]; 1281 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 1282 ++j; 1283 continue; 1284 } 1285 1286 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 1287 size_t MatchLabelLen = 0; 1288 size_t MatchLabelPos = 1289 CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable); 1290 if (MatchLabelPos == StringRef::npos) 1291 // Immediately bail of CHECK-LABEL fails, nothing else we can do. 1292 return false; 1293 1294 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 1295 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 1296 ++j; 1297 } 1298 1299 for (; i != j; ++i) { 1300 const CheckString &CheckStr = CheckStrings[i]; 1301 1302 // Check each string within the scanned region, including a second check 1303 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 1304 size_t MatchLen = 0; 1305 size_t MatchPos = 1306 CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable); 1307 1308 if (MatchPos == StringRef::npos) { 1309 ChecksFailed = true; 1310 i = j; 1311 break; 1312 } 1313 1314 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 1315 } 1316 1317 if (j == e) 1318 break; 1319 } 1320 1321 // Success if no checks failed. 1322 return !ChecksFailed; 1323 } 1324 1325 int main(int argc, char **argv) { 1326 sys::PrintStackTraceOnErrorSignal(argv[0]); 1327 PrettyStackTraceProgram X(argc, argv); 1328 cl::ParseCommandLineOptions(argc, argv); 1329 1330 if (!ValidateCheckPrefixes()) { 1331 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 1332 "start with a letter and contain only alphanumeric characters, " 1333 "hyphens and underscores\n"; 1334 return 2; 1335 } 1336 1337 Regex PrefixRE = buildCheckPrefixRegex(); 1338 std::string REError; 1339 if (!PrefixRE.isValid(REError)) { 1340 errs() << "Unable to combine check-prefix strings into a prefix regular " 1341 "expression! This is likely a bug in FileCheck's verification of " 1342 "the check-prefix strings. Regular expression parsing failed " 1343 "with the following error: " 1344 << REError << "\n"; 1345 return 2; 1346 } 1347 1348 SourceMgr SM; 1349 1350 // Read the expected strings from the check file. 1351 ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr = 1352 MemoryBuffer::getFileOrSTDIN(CheckFilename); 1353 if (std::error_code EC = CheckFileOrErr.getError()) { 1354 errs() << "Could not open check file '" << CheckFilename 1355 << "': " << EC.message() << '\n'; 1356 return 2; 1357 } 1358 MemoryBuffer &CheckFile = *CheckFileOrErr.get(); 1359 1360 SmallString<4096> CheckFileBuffer; 1361 StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer); 1362 1363 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1364 CheckFileText, CheckFile.getBufferIdentifier()), 1365 SMLoc()); 1366 1367 std::vector<CheckString> CheckStrings; 1368 if (ReadCheckFile(SM, CheckFileText, PrefixRE, CheckStrings)) 1369 return 2; 1370 1371 // Open the file to check and add it to SourceMgr. 1372 ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr = 1373 MemoryBuffer::getFileOrSTDIN(InputFilename); 1374 if (std::error_code EC = InputFileOrErr.getError()) { 1375 errs() << "Could not open input file '" << InputFilename 1376 << "': " << EC.message() << '\n'; 1377 return 2; 1378 } 1379 MemoryBuffer &InputFile = *InputFileOrErr.get(); 1380 1381 if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) { 1382 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 1383 DumpCommandLine(argc, argv); 1384 return 2; 1385 } 1386 1387 SmallString<4096> InputFileBuffer; 1388 StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer); 1389 1390 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1391 InputFileText, InputFile.getBufferIdentifier()), 1392 SMLoc()); 1393 1394 return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1; 1395 } 1396