1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // FileCheck does a line-by line check of a file that validates whether it 11 // contains the expected content. This is useful for regression tests etc. 12 // 13 // This program exits with an exit status of 2 on error, exit status of 0 if 14 // the file matched the expected contents, and exit status of 1 if it did not 15 // contain the expected contents. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringExtras.h" 21 #include "llvm/ADT/StringMap.h" 22 #include "llvm/ADT/StringSet.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/PrettyStackTrace.h" 26 #include "llvm/Support/Regex.h" 27 #include "llvm/Support/Signals.h" 28 #include "llvm/Support/SourceMgr.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <algorithm> 31 #include <cctype> 32 #include <map> 33 #include <string> 34 #include <system_error> 35 #include <vector> 36 using namespace llvm; 37 38 static cl::opt<std::string> 39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 40 41 static cl::opt<std::string> 42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 43 cl::init("-"), cl::value_desc("filename")); 44 45 static cl::list<std::string> CheckPrefixes( 46 "check-prefix", 47 cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 48 static cl::alias CheckPrefixesAlias( 49 "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated, 50 cl::NotHidden, 51 cl::desc( 52 "Alias for -check-prefix permitting multiple comma separated values")); 53 54 static cl::opt<bool> NoCanonicalizeWhiteSpace( 55 "strict-whitespace", 56 cl::desc("Do not treat all horizontal whitespace as equivalent")); 57 58 static cl::list<std::string> ImplicitCheckNot( 59 "implicit-check-not", 60 cl::desc("Add an implicit negative check with this pattern to every\n" 61 "positive check. This can be used to ensure that no instances of\n" 62 "this pattern occur which are not matched by a positive pattern"), 63 cl::value_desc("pattern")); 64 65 static cl::list<std::string> GlobalDefines("D", cl::Prefix, 66 cl::desc("Define a variable to be used in capture patterns."), 67 cl::value_desc("VAR=VALUE")); 68 69 static cl::opt<bool> AllowEmptyInput( 70 "allow-empty", cl::init(false), 71 cl::desc("Allow the input file to be empty. This is useful when making\n" 72 "checks that some error message does not occur, for example.")); 73 74 static cl::opt<bool> MatchFullLines( 75 "match-full-lines", cl::init(false), 76 cl::desc("Require all positive matches to cover an entire input line.\n" 77 "Allows leading and trailing whitespace if --strict-whitespace\n" 78 "is not also passed.")); 79 80 static cl::opt<bool> EnableVarScope( 81 "enable-var-scope", cl::init(false), 82 cl::desc("Enables scope for regex variables. Variables with names that\n" 83 "do not start with '$' will be reset at the beginning of\n" 84 "each CHECK-LABEL block.")); 85 86 typedef cl::list<std::string>::const_iterator prefix_iterator; 87 88 //===----------------------------------------------------------------------===// 89 // Pattern Handling Code. 90 //===----------------------------------------------------------------------===// 91 92 namespace Check { 93 enum CheckType { 94 CheckNone = 0, 95 CheckPlain, 96 CheckNext, 97 CheckSame, 98 CheckNot, 99 CheckDAG, 100 CheckLabel, 101 102 /// Indicates the pattern only matches the end of file. This is used for 103 /// trailing CHECK-NOTs. 104 CheckEOF, 105 106 /// Marks when parsing found a -NOT check combined with another CHECK suffix. 107 CheckBadNot 108 }; 109 } 110 111 class Pattern { 112 SMLoc PatternLoc; 113 114 /// A fixed string to match as the pattern or empty if this pattern requires 115 /// a regex match. 116 StringRef FixedStr; 117 118 /// A regex string to match as the pattern or empty if this pattern requires 119 /// a fixed string to match. 120 std::string RegExStr; 121 122 /// Entries in this vector map to uses of a variable in the pattern, e.g. 123 /// "foo[[bar]]baz". In this case, the RegExStr will contain "foobaz" and 124 /// we'll get an entry in this vector that tells us to insert the value of 125 /// bar at offset 3. 126 std::vector<std::pair<StringRef, unsigned>> VariableUses; 127 128 /// Maps definitions of variables to their parenthesized capture numbers. 129 /// 130 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 131 /// 1. 132 std::map<StringRef, unsigned> VariableDefs; 133 134 Check::CheckType CheckTy; 135 136 /// Contains the number of line this pattern is in. 137 unsigned LineNumber; 138 139 public: 140 explicit Pattern(Check::CheckType Ty) : CheckTy(Ty) {} 141 142 /// Returns the location in source code. 143 SMLoc getLoc() const { return PatternLoc; } 144 145 bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, 146 unsigned LineNumber); 147 size_t Match(StringRef Buffer, size_t &MatchLen, 148 StringMap<StringRef> &VariableTable) const; 149 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, 150 const StringMap<StringRef> &VariableTable) const; 151 152 bool hasVariable() const { 153 return !(VariableUses.empty() && VariableDefs.empty()); 154 } 155 156 Check::CheckType getCheckTy() const { return CheckTy; } 157 158 private: 159 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); 160 void AddBackrefToRegEx(unsigned BackrefNum); 161 unsigned 162 ComputeMatchDistance(StringRef Buffer, 163 const StringMap<StringRef> &VariableTable) const; 164 bool EvaluateExpression(StringRef Expr, std::string &Value) const; 165 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); 166 }; 167 168 /// Parses the given string into the Pattern. 169 /// 170 /// \p Prefix provides which prefix is being matched, \p SM provides the 171 /// SourceMgr used for error reports, and \p LineNumber is the line number in 172 /// the input file from which the pattern string was read. Returns true in 173 /// case of an error, false otherwise. 174 bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix, 175 SourceMgr &SM, unsigned LineNumber) { 176 bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot; 177 178 this->LineNumber = LineNumber; 179 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 180 181 if (!(NoCanonicalizeWhiteSpace && MatchFullLines)) 182 // Ignore trailing whitespace. 183 while (!PatternStr.empty() && 184 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 185 PatternStr = PatternStr.substr(0, PatternStr.size() - 1); 186 187 // Check that there is something on the line. 188 if (PatternStr.empty()) { 189 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 190 "found empty check string with prefix '" + Prefix + ":'"); 191 return true; 192 } 193 194 // Check to see if this is a fixed string, or if it has regex pieces. 195 if (!MatchFullLinesHere && 196 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && 197 PatternStr.find("[[") == StringRef::npos))) { 198 FixedStr = PatternStr; 199 return false; 200 } 201 202 if (MatchFullLinesHere) { 203 RegExStr += '^'; 204 if (!NoCanonicalizeWhiteSpace) 205 RegExStr += " *"; 206 } 207 208 // Paren value #0 is for the fully matched string. Any new parenthesized 209 // values add from there. 210 unsigned CurParen = 1; 211 212 // Otherwise, there is at least one regex piece. Build up the regex pattern 213 // by escaping scary characters in fixed strings, building up one big regex. 214 while (!PatternStr.empty()) { 215 // RegEx matches. 216 if (PatternStr.startswith("{{")) { 217 // This is the start of a regex match. Scan for the }}. 218 size_t End = PatternStr.find("}}"); 219 if (End == StringRef::npos) { 220 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 221 SourceMgr::DK_Error, 222 "found start of regex string with no end '}}'"); 223 return true; 224 } 225 226 // Enclose {{}} patterns in parens just like [[]] even though we're not 227 // capturing the result for any purpose. This is required in case the 228 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 229 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 230 RegExStr += '('; 231 ++CurParen; 232 233 if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM)) 234 return true; 235 RegExStr += ')'; 236 237 PatternStr = PatternStr.substr(End + 2); 238 continue; 239 } 240 241 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 242 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 243 // second form is [[foo]] which is a reference to foo. The variable name 244 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 245 // it. This is to catch some common errors. 246 if (PatternStr.startswith("[[")) { 247 // Find the closing bracket pair ending the match. End is going to be an 248 // offset relative to the beginning of the match string. 249 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); 250 251 if (End == StringRef::npos) { 252 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 253 SourceMgr::DK_Error, 254 "invalid named regex reference, no ]] found"); 255 return true; 256 } 257 258 StringRef MatchStr = PatternStr.substr(2, End); 259 PatternStr = PatternStr.substr(End + 4); 260 261 // Get the regex name (e.g. "foo"). 262 size_t NameEnd = MatchStr.find(':'); 263 StringRef Name = MatchStr.substr(0, NameEnd); 264 265 if (Name.empty()) { 266 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 267 "invalid name in named regex: empty name"); 268 return true; 269 } 270 271 // Verify that the name/expression is well formed. FileCheck currently 272 // supports @LINE, @LINE+number, @LINE-number expressions. The check here 273 // is relaxed, more strict check is performed in \c EvaluateExpression. 274 bool IsExpression = false; 275 for (unsigned i = 0, e = Name.size(); i != e; ++i) { 276 if (i == 0) { 277 if (Name[i] == '$') // Global vars start with '$' 278 continue; 279 if (Name[i] == '@') { 280 if (NameEnd != StringRef::npos) { 281 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 282 SourceMgr::DK_Error, 283 "invalid name in named regex definition"); 284 return true; 285 } 286 IsExpression = true; 287 continue; 288 } 289 } 290 if (Name[i] != '_' && !isalnum(Name[i]) && 291 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { 292 SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i), 293 SourceMgr::DK_Error, "invalid name in named regex"); 294 return true; 295 } 296 } 297 298 // Name can't start with a digit. 299 if (isdigit(static_cast<unsigned char>(Name[0]))) { 300 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 301 "invalid name in named regex"); 302 return true; 303 } 304 305 // Handle [[foo]]. 306 if (NameEnd == StringRef::npos) { 307 // Handle variables that were defined earlier on the same line by 308 // emitting a backreference. 309 if (VariableDefs.find(Name) != VariableDefs.end()) { 310 unsigned VarParenNum = VariableDefs[Name]; 311 if (VarParenNum < 1 || VarParenNum > 9) { 312 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 313 SourceMgr::DK_Error, 314 "Can't back-reference more than 9 variables"); 315 return true; 316 } 317 AddBackrefToRegEx(VarParenNum); 318 } else { 319 VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 320 } 321 continue; 322 } 323 324 // Handle [[foo:.*]]. 325 VariableDefs[Name] = CurParen; 326 RegExStr += '('; 327 ++CurParen; 328 329 if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM)) 330 return true; 331 332 RegExStr += ')'; 333 } 334 335 // Handle fixed string matches. 336 // Find the end, which is the start of the next regex. 337 size_t FixedMatchEnd = PatternStr.find("{{"); 338 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 339 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 340 PatternStr = PatternStr.substr(FixedMatchEnd); 341 } 342 343 if (MatchFullLinesHere) { 344 if (!NoCanonicalizeWhiteSpace) 345 RegExStr += " *"; 346 RegExStr += '$'; 347 } 348 349 return false; 350 } 351 352 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { 353 Regex R(RS); 354 std::string Error; 355 if (!R.isValid(Error)) { 356 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 357 "invalid regex: " + Error); 358 return true; 359 } 360 361 RegExStr += RS.str(); 362 CurParen += R.getNumMatches(); 363 return false; 364 } 365 366 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 367 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 368 std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum); 369 RegExStr += Backref; 370 } 371 372 /// Evaluates expression and stores the result to \p Value. 373 /// 374 /// Returns true on success and false when the expression has invalid syntax. 375 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { 376 // The only supported expression is @LINE([\+-]\d+)? 377 if (!Expr.startswith("@LINE")) 378 return false; 379 Expr = Expr.substr(StringRef("@LINE").size()); 380 int Offset = 0; 381 if (!Expr.empty()) { 382 if (Expr[0] == '+') 383 Expr = Expr.substr(1); 384 else if (Expr[0] != '-') 385 return false; 386 if (Expr.getAsInteger(10, Offset)) 387 return false; 388 } 389 Value = llvm::itostr(LineNumber + Offset); 390 return true; 391 } 392 393 /// Matches the pattern string against the input buffer \p Buffer 394 /// 395 /// This returns the position that is matched or npos if there is no match. If 396 /// there is a match, the size of the matched string is returned in \p 397 /// MatchLen. 398 /// 399 /// The \p VariableTable StringMap provides the current values of filecheck 400 /// variables and is updated if this match defines new values. 401 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 402 StringMap<StringRef> &VariableTable) const { 403 // If this is the EOF pattern, match it immediately. 404 if (CheckTy == Check::CheckEOF) { 405 MatchLen = 0; 406 return Buffer.size(); 407 } 408 409 // If this is a fixed string pattern, just match it now. 410 if (!FixedStr.empty()) { 411 MatchLen = FixedStr.size(); 412 return Buffer.find(FixedStr); 413 } 414 415 // Regex match. 416 417 // If there are variable uses, we need to create a temporary string with the 418 // actual value. 419 StringRef RegExToMatch = RegExStr; 420 std::string TmpStr; 421 if (!VariableUses.empty()) { 422 TmpStr = RegExStr; 423 424 unsigned InsertOffset = 0; 425 for (const auto &VariableUse : VariableUses) { 426 std::string Value; 427 428 if (VariableUse.first[0] == '@') { 429 if (!EvaluateExpression(VariableUse.first, Value)) 430 return StringRef::npos; 431 } else { 432 StringMap<StringRef>::iterator it = 433 VariableTable.find(VariableUse.first); 434 // If the variable is undefined, return an error. 435 if (it == VariableTable.end()) 436 return StringRef::npos; 437 438 // Look up the value and escape it so that we can put it into the regex. 439 Value += Regex::escape(it->second); 440 } 441 442 // Plop it into the regex at the adjusted offset. 443 TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset, 444 Value.begin(), Value.end()); 445 InsertOffset += Value.size(); 446 } 447 448 // Match the newly constructed regex. 449 RegExToMatch = TmpStr; 450 } 451 452 SmallVector<StringRef, 4> MatchInfo; 453 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 454 return StringRef::npos; 455 456 // Successful regex match. 457 assert(!MatchInfo.empty() && "Didn't get any match"); 458 StringRef FullMatch = MatchInfo[0]; 459 460 // If this defines any variables, remember their values. 461 for (const auto &VariableDef : VariableDefs) { 462 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 463 VariableTable[VariableDef.first] = MatchInfo[VariableDef.second]; 464 } 465 466 MatchLen = FullMatch.size(); 467 return FullMatch.data() - Buffer.data(); 468 } 469 470 471 /// Computes an arbitrary estimate for the quality of matching this pattern at 472 /// the start of \p Buffer; a distance of zero should correspond to a perfect 473 /// match. 474 unsigned 475 Pattern::ComputeMatchDistance(StringRef Buffer, 476 const StringMap<StringRef> &VariableTable) const { 477 // Just compute the number of matching characters. For regular expressions, we 478 // just compare against the regex itself and hope for the best. 479 // 480 // FIXME: One easy improvement here is have the regex lib generate a single 481 // example regular expression which matches, and use that as the example 482 // string. 483 StringRef ExampleString(FixedStr); 484 if (ExampleString.empty()) 485 ExampleString = RegExStr; 486 487 // Only compare up to the first line in the buffer, or the string size. 488 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 489 BufferPrefix = BufferPrefix.split('\n').first; 490 return BufferPrefix.edit_distance(ExampleString); 491 } 492 493 /// Prints additional information about a failure to match involving this 494 /// pattern. 495 void Pattern::PrintFailureInfo( 496 const SourceMgr &SM, StringRef Buffer, 497 const StringMap<StringRef> &VariableTable) const { 498 // If this was a regular expression using variables, print the current 499 // variable values. 500 if (!VariableUses.empty()) { 501 for (const auto &VariableUse : VariableUses) { 502 SmallString<256> Msg; 503 raw_svector_ostream OS(Msg); 504 StringRef Var = VariableUse.first; 505 if (Var[0] == '@') { 506 std::string Value; 507 if (EvaluateExpression(Var, Value)) { 508 OS << "with expression \""; 509 OS.write_escaped(Var) << "\" equal to \""; 510 OS.write_escaped(Value) << "\""; 511 } else { 512 OS << "uses incorrect expression \""; 513 OS.write_escaped(Var) << "\""; 514 } 515 } else { 516 StringMap<StringRef>::const_iterator it = VariableTable.find(Var); 517 518 // Check for undefined variable references. 519 if (it == VariableTable.end()) { 520 OS << "uses undefined variable \""; 521 OS.write_escaped(Var) << "\""; 522 } else { 523 OS << "with variable \""; 524 OS.write_escaped(Var) << "\" equal to \""; 525 OS.write_escaped(it->second) << "\""; 526 } 527 } 528 529 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 530 OS.str()); 531 } 532 } 533 534 // Attempt to find the closest/best fuzzy match. Usually an error happens 535 // because some string in the output didn't exactly match. In these cases, we 536 // would like to show the user a best guess at what "should have" matched, to 537 // save them having to actually check the input manually. 538 size_t NumLinesForward = 0; 539 size_t Best = StringRef::npos; 540 double BestQuality = 0; 541 542 // Use an arbitrary 4k limit on how far we will search. 543 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 544 if (Buffer[i] == '\n') 545 ++NumLinesForward; 546 547 // Patterns have leading whitespace stripped, so skip whitespace when 548 // looking for something which looks like a pattern. 549 if (Buffer[i] == ' ' || Buffer[i] == '\t') 550 continue; 551 552 // Compute the "quality" of this match as an arbitrary combination of the 553 // match distance and the number of lines skipped to get to this match. 554 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); 555 double Quality = Distance + (NumLinesForward / 100.); 556 557 if (Quality < BestQuality || Best == StringRef::npos) { 558 Best = i; 559 BestQuality = Quality; 560 } 561 } 562 563 // Print the "possible intended match here" line if we found something 564 // reasonable and not equal to what we showed in the "scanning from here" 565 // line. 566 if (Best && Best != StringRef::npos && BestQuality < 50) { 567 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), 568 SourceMgr::DK_Note, "possible intended match here"); 569 570 // FIXME: If we wanted to be really friendly we would show why the match 571 // failed, as it can be hard to spot simple one character differences. 572 } 573 } 574 575 /// Finds the closing sequence of a regex variable usage or definition. 576 /// 577 /// \p Str has to point in the beginning of the definition (right after the 578 /// opening sequence). Returns the offset of the closing sequence within Str, 579 /// or npos if it was not found. 580 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 581 // Offset keeps track of the current offset within the input Str 582 size_t Offset = 0; 583 // [...] Nesting depth 584 size_t BracketDepth = 0; 585 586 while (!Str.empty()) { 587 if (Str.startswith("]]") && BracketDepth == 0) 588 return Offset; 589 if (Str[0] == '\\') { 590 // Backslash escapes the next char within regexes, so skip them both. 591 Str = Str.substr(2); 592 Offset += 2; 593 } else { 594 switch (Str[0]) { 595 default: 596 break; 597 case '[': 598 BracketDepth++; 599 break; 600 case ']': 601 if (BracketDepth == 0) { 602 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 603 SourceMgr::DK_Error, 604 "missing closing \"]\" for regex variable"); 605 exit(1); 606 } 607 BracketDepth--; 608 break; 609 } 610 Str = Str.substr(1); 611 Offset++; 612 } 613 } 614 615 return StringRef::npos; 616 } 617 618 //===----------------------------------------------------------------------===// 619 // Check Strings. 620 //===----------------------------------------------------------------------===// 621 622 /// A check that we found in the input file. 623 struct CheckString { 624 /// The pattern to match. 625 Pattern Pat; 626 627 /// Which prefix name this check matched. 628 StringRef Prefix; 629 630 /// The location in the match file that the check string was specified. 631 SMLoc Loc; 632 633 /// All of the strings that are disallowed from occurring between this match 634 /// string and the previous one (or start of file). 635 std::vector<Pattern> DagNotStrings; 636 637 CheckString(const Pattern &P, StringRef S, SMLoc L) 638 : Pat(P), Prefix(S), Loc(L) {} 639 640 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, 641 size_t &MatchLen, StringMap<StringRef> &VariableTable) const; 642 643 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; 644 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; 645 bool CheckNot(const SourceMgr &SM, StringRef Buffer, 646 const std::vector<const Pattern *> &NotStrings, 647 StringMap<StringRef> &VariableTable) const; 648 size_t CheckDag(const SourceMgr &SM, StringRef Buffer, 649 std::vector<const Pattern *> &NotStrings, 650 StringMap<StringRef> &VariableTable) const; 651 }; 652 653 /// Canonicalize whitespaces in the file. Line endings are replaced with 654 /// UNIX-style '\n'. 655 static StringRef CanonicalizeFile(MemoryBuffer &MB, 656 SmallVectorImpl<char> &OutputBuffer) { 657 OutputBuffer.reserve(MB.getBufferSize()); 658 659 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); 660 Ptr != End; ++Ptr) { 661 // Eliminate trailing dosish \r. 662 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 663 continue; 664 } 665 666 // If current char is not a horizontal whitespace or if horizontal 667 // whitespace canonicalization is disabled, dump it to output as is. 668 if (NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) { 669 OutputBuffer.push_back(*Ptr); 670 continue; 671 } 672 673 // Otherwise, add one space and advance over neighboring space. 674 OutputBuffer.push_back(' '); 675 while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) 676 ++Ptr; 677 } 678 679 // Add a null byte and then return all but that byte. 680 OutputBuffer.push_back('\0'); 681 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); 682 } 683 684 static bool IsPartOfWord(char c) { 685 return (isalnum(c) || c == '-' || c == '_'); 686 } 687 688 // Get the size of the prefix extension. 689 static size_t CheckTypeSize(Check::CheckType Ty) { 690 switch (Ty) { 691 case Check::CheckNone: 692 case Check::CheckBadNot: 693 return 0; 694 695 case Check::CheckPlain: 696 return sizeof(":") - 1; 697 698 case Check::CheckNext: 699 return sizeof("-NEXT:") - 1; 700 701 case Check::CheckSame: 702 return sizeof("-SAME:") - 1; 703 704 case Check::CheckNot: 705 return sizeof("-NOT:") - 1; 706 707 case Check::CheckDAG: 708 return sizeof("-DAG:") - 1; 709 710 case Check::CheckLabel: 711 return sizeof("-LABEL:") - 1; 712 713 case Check::CheckEOF: 714 llvm_unreachable("Should not be using EOF size"); 715 } 716 717 llvm_unreachable("Bad check type"); 718 } 719 720 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { 721 char NextChar = Buffer[Prefix.size()]; 722 723 // Verify that the : is present after the prefix. 724 if (NextChar == ':') 725 return Check::CheckPlain; 726 727 if (NextChar != '-') 728 return Check::CheckNone; 729 730 StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 731 if (Rest.startswith("NEXT:")) 732 return Check::CheckNext; 733 734 if (Rest.startswith("SAME:")) 735 return Check::CheckSame; 736 737 if (Rest.startswith("NOT:")) 738 return Check::CheckNot; 739 740 if (Rest.startswith("DAG:")) 741 return Check::CheckDAG; 742 743 if (Rest.startswith("LABEL:")) 744 return Check::CheckLabel; 745 746 // You can't combine -NOT with another suffix. 747 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || 748 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || 749 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:")) 750 return Check::CheckBadNot; 751 752 return Check::CheckNone; 753 } 754 755 // From the given position, find the next character after the word. 756 static size_t SkipWord(StringRef Str, size_t Loc) { 757 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 758 ++Loc; 759 return Loc; 760 } 761 762 /// Search the buffer for the first prefix in the prefix regular expression. 763 /// 764 /// This searches the buffer using the provided regular expression, however it 765 /// enforces constraints beyond that: 766 /// 1) The found prefix must not be a suffix of something that looks like 767 /// a valid prefix. 768 /// 2) The found prefix must be followed by a valid check type suffix using \c 769 /// FindCheckType above. 770 /// 771 /// The first match of the regular expression to satisfy these two is returned, 772 /// otherwise an empty StringRef is returned to indicate failure. 773 /// 774 /// If this routine returns a valid prefix, it will also shrink \p Buffer to 775 /// start at the beginning of the returned prefix, increment \p LineNumber for 776 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of 777 /// check found by examining the suffix. 778 /// 779 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy 780 /// is unspecified. 781 static StringRef FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer, 782 unsigned &LineNumber, 783 Check::CheckType &CheckTy) { 784 SmallVector<StringRef, 2> Matches; 785 786 while (!Buffer.empty()) { 787 // Find the first (longest) match using the RE. 788 if (!PrefixRE.match(Buffer, &Matches)) 789 // No match at all, bail. 790 return StringRef(); 791 792 StringRef Prefix = Matches[0]; 793 Matches.clear(); 794 795 assert(Prefix.data() >= Buffer.data() && 796 Prefix.data() < Buffer.data() + Buffer.size() && 797 "Prefix doesn't start inside of buffer!"); 798 size_t Loc = Prefix.data() - Buffer.data(); 799 StringRef Skipped = Buffer.substr(0, Loc); 800 Buffer = Buffer.drop_front(Loc); 801 LineNumber += Skipped.count('\n'); 802 803 // Check that the matched prefix isn't a suffix of some other check-like 804 // word. 805 // FIXME: This is a very ad-hoc check. it would be better handled in some 806 // other way. Among other things it seems hard to distinguish between 807 // intentional and unintentional uses of this feature. 808 if (Skipped.empty() || !IsPartOfWord(Skipped.back())) { 809 // Now extract the type. 810 CheckTy = FindCheckType(Buffer, Prefix); 811 812 // If we've found a valid check type for this prefix, we're done. 813 if (CheckTy != Check::CheckNone) 814 return Prefix; 815 } 816 817 // If we didn't successfully find a prefix, we need to skip this invalid 818 // prefix and continue scanning. We directly skip the prefix that was 819 // matched and any additional parts of that check-like word. 820 Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size())); 821 } 822 823 // We ran out of buffer while skipping partial matches so give up. 824 return StringRef(); 825 } 826 827 /// Read the check file, which specifies the sequence of expected strings. 828 /// 829 /// The strings are added to the CheckStrings vector. Returns true in case of 830 /// an error, false otherwise. 831 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, 832 std::vector<CheckString> &CheckStrings) { 833 std::vector<Pattern> ImplicitNegativeChecks; 834 for (const auto &PatternString : ImplicitCheckNot) { 835 // Create a buffer with fake command line content in order to display the 836 // command line option responsible for the specific implicit CHECK-NOT. 837 std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str(); 838 std::string Suffix = "'"; 839 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 840 Prefix + PatternString + Suffix, "command line"); 841 842 StringRef PatternInBuffer = 843 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 844 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 845 846 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); 847 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, 848 "IMPLICIT-CHECK", SM, 0); 849 } 850 851 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 852 853 // LineNumber keeps track of the line on which CheckPrefix instances are 854 // found. 855 unsigned LineNumber = 1; 856 857 while (1) { 858 Check::CheckType CheckTy; 859 860 // See if a prefix occurs in the memory buffer. 861 StringRef UsedPrefix = FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber, 862 CheckTy); 863 if (UsedPrefix.empty()) 864 break; 865 assert(UsedPrefix.data() == Buffer.data() && 866 "Failed to move Buffer's start forward, or pointed prefix outside " 867 "of the buffer!"); 868 869 // Location to use for error messages. 870 const char *UsedPrefixStart = UsedPrefix.data(); 871 872 // Skip the buffer to the end. 873 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); 874 875 // Complain about useful-looking but unsupported suffixes. 876 if (CheckTy == Check::CheckBadNot) { 877 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 878 "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 879 return true; 880 } 881 882 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 883 // leading whitespace. 884 if (!(NoCanonicalizeWhiteSpace && MatchFullLines)) 885 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 886 887 // Scan ahead to the end of line. 888 size_t EOL = Buffer.find_first_of("\n\r"); 889 890 // Remember the location of the start of the pattern, for diagnostics. 891 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 892 893 // Parse the pattern. 894 Pattern P(CheckTy); 895 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) 896 return true; 897 898 // Verify that CHECK-LABEL lines do not define or use variables 899 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 900 SM.PrintMessage( 901 SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error, 902 "found '" + UsedPrefix + "-LABEL:'" 903 " with variable definition or use"); 904 return true; 905 } 906 907 Buffer = Buffer.substr(EOL); 908 909 // Verify that CHECK-NEXT lines have at least one CHECK line before them. 910 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) && 911 CheckStrings.empty()) { 912 StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME"; 913 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 914 SourceMgr::DK_Error, 915 "found '" + UsedPrefix + "-" + Type + 916 "' without previous '" + UsedPrefix + ": line"); 917 return true; 918 } 919 920 // Handle CHECK-DAG/-NOT. 921 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 922 DagNotMatches.push_back(P); 923 continue; 924 } 925 926 // Okay, add the string we captured to the output vector and move on. 927 CheckStrings.emplace_back(P, UsedPrefix, PatternLoc); 928 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 929 DagNotMatches = ImplicitNegativeChecks; 930 } 931 932 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first 933 // prefix as a filler for the error message. 934 if (!DagNotMatches.empty()) { 935 CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(), 936 SMLoc::getFromPointer(Buffer.data())); 937 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); 938 } 939 940 if (CheckStrings.empty()) { 941 errs() << "error: no check strings found with prefix" 942 << (CheckPrefixes.size() > 1 ? "es " : " "); 943 prefix_iterator I = CheckPrefixes.begin(); 944 prefix_iterator E = CheckPrefixes.end(); 945 if (I != E) { 946 errs() << "\'" << *I << ":'"; 947 ++I; 948 } 949 for (; I != E; ++I) 950 errs() << ", \'" << *I << ":'"; 951 952 errs() << '\n'; 953 return true; 954 } 955 956 return false; 957 } 958 959 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat, 960 StringRef Buffer, 961 StringMap<StringRef> &VariableTable) { 962 // Otherwise, we have an error, emit an error message. 963 SM.PrintMessage(Loc, SourceMgr::DK_Error, 964 "expected string not found in input"); 965 966 // Print the "scanning from here" line. If the current position is at the 967 // end of a line, advance to the start of the next line. 968 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 969 970 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 971 "scanning from here"); 972 973 // Allow the pattern to print additional information if desired. 974 Pat.PrintFailureInfo(SM, Buffer, VariableTable); 975 } 976 977 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 978 StringRef Buffer, 979 StringMap<StringRef> &VariableTable) { 980 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable); 981 } 982 983 /// Count the number of newlines in the specified range. 984 static unsigned CountNumNewlinesBetween(StringRef Range, 985 const char *&FirstNewLine) { 986 unsigned NumNewLines = 0; 987 while (1) { 988 // Scan for newline. 989 Range = Range.substr(Range.find_first_of("\n\r")); 990 if (Range.empty()) 991 return NumNewLines; 992 993 ++NumNewLines; 994 995 // Handle \n\r and \r\n as a single newline. 996 if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && 997 (Range[0] != Range[1])) 998 Range = Range.substr(1); 999 Range = Range.substr(1); 1000 1001 if (NumNewLines == 1) 1002 FirstNewLine = Range.begin(); 1003 } 1004 } 1005 1006 /// Match check string and its "not strings" and/or "dag strings". 1007 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, 1008 bool IsLabelScanMode, size_t &MatchLen, 1009 StringMap<StringRef> &VariableTable) const { 1010 size_t LastPos = 0; 1011 std::vector<const Pattern *> NotStrings; 1012 1013 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 1014 // bounds; we have not processed variable definitions within the bounded block 1015 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 1016 // over the block again (including the last CHECK-LABEL) in normal mode. 1017 if (!IsLabelScanMode) { 1018 // Match "dag strings" (with mixed "not strings" if any). 1019 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); 1020 if (LastPos == StringRef::npos) 1021 return StringRef::npos; 1022 } 1023 1024 // Match itself from the last position after matching CHECK-DAG. 1025 StringRef MatchBuffer = Buffer.substr(LastPos); 1026 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1027 if (MatchPos == StringRef::npos) { 1028 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable); 1029 return StringRef::npos; 1030 } 1031 1032 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 1033 // or CHECK-NOT 1034 if (!IsLabelScanMode) { 1035 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 1036 1037 // If this check is a "CHECK-NEXT", verify that the previous match was on 1038 // the previous line (i.e. that there is one newline between them). 1039 if (CheckNext(SM, SkippedRegion)) 1040 return StringRef::npos; 1041 1042 // If this check is a "CHECK-SAME", verify that the previous match was on 1043 // the same line (i.e. that there is no newline between them). 1044 if (CheckSame(SM, SkippedRegion)) 1045 return StringRef::npos; 1046 1047 // If this match had "not strings", verify that they don't exist in the 1048 // skipped region. 1049 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1050 return StringRef::npos; 1051 } 1052 1053 return LastPos + MatchPos; 1054 } 1055 1056 /// Verify there is a single line in the given buffer. 1057 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 1058 if (Pat.getCheckTy() != Check::CheckNext) 1059 return false; 1060 1061 // Count the number of newlines between the previous match and this one. 1062 assert(Buffer.data() != 1063 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1064 SMLoc::getFromPointer(Buffer.data()))) 1065 ->getBufferStart() && 1066 "CHECK-NEXT can't be the first check in a file"); 1067 1068 const char *FirstNewLine = nullptr; 1069 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1070 1071 if (NumNewLines == 0) { 1072 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1073 Prefix + "-NEXT: is on the same line as previous match"); 1074 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1075 "'next' match was here"); 1076 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1077 "previous match ended here"); 1078 return true; 1079 } 1080 1081 if (NumNewLines != 1) { 1082 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1083 Prefix + 1084 "-NEXT: is not on the line after the previous match"); 1085 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1086 "'next' match was here"); 1087 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1088 "previous match ended here"); 1089 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 1090 "non-matching line after previous match is here"); 1091 return true; 1092 } 1093 1094 return false; 1095 } 1096 1097 /// Verify there is no newline in the given buffer. 1098 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 1099 if (Pat.getCheckTy() != Check::CheckSame) 1100 return false; 1101 1102 // Count the number of newlines between the previous match and this one. 1103 assert(Buffer.data() != 1104 SM.getMemoryBuffer(SM.FindBufferContainingLoc( 1105 SMLoc::getFromPointer(Buffer.data()))) 1106 ->getBufferStart() && 1107 "CHECK-SAME can't be the first check in a file"); 1108 1109 const char *FirstNewLine = nullptr; 1110 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 1111 1112 if (NumNewLines != 0) { 1113 SM.PrintMessage(Loc, SourceMgr::DK_Error, 1114 Prefix + 1115 "-SAME: is not on the same line as the previous match"); 1116 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 1117 "'next' match was here"); 1118 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 1119 "previous match ended here"); 1120 return true; 1121 } 1122 1123 return false; 1124 } 1125 1126 /// Verify there's no "not strings" in the given buffer. 1127 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 1128 const std::vector<const Pattern *> &NotStrings, 1129 StringMap<StringRef> &VariableTable) const { 1130 for (const Pattern *Pat : NotStrings) { 1131 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 1132 1133 size_t MatchLen = 0; 1134 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); 1135 1136 if (Pos == StringRef::npos) 1137 continue; 1138 1139 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos), 1140 SourceMgr::DK_Error, Prefix + "-NOT: string occurred!"); 1141 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note, 1142 Prefix + "-NOT: pattern specified here"); 1143 return true; 1144 } 1145 1146 return false; 1147 } 1148 1149 /// Match "dag strings" and their mixed "not strings". 1150 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 1151 std::vector<const Pattern *> &NotStrings, 1152 StringMap<StringRef> &VariableTable) const { 1153 if (DagNotStrings.empty()) 1154 return 0; 1155 1156 size_t LastPos = 0; 1157 size_t StartPos = LastPos; 1158 1159 for (const Pattern &Pat : DagNotStrings) { 1160 assert((Pat.getCheckTy() == Check::CheckDAG || 1161 Pat.getCheckTy() == Check::CheckNot) && 1162 "Invalid CHECK-DAG or CHECK-NOT!"); 1163 1164 if (Pat.getCheckTy() == Check::CheckNot) { 1165 NotStrings.push_back(&Pat); 1166 continue; 1167 } 1168 1169 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 1170 1171 size_t MatchLen = 0, MatchPos; 1172 1173 // CHECK-DAG always matches from the start. 1174 StringRef MatchBuffer = Buffer.substr(StartPos); 1175 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); 1176 // With a group of CHECK-DAGs, a single mismatching means the match on 1177 // that group of CHECK-DAGs fails immediately. 1178 if (MatchPos == StringRef::npos) { 1179 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable); 1180 return StringRef::npos; 1181 } 1182 // Re-calc it as the offset relative to the start of the original string. 1183 MatchPos += StartPos; 1184 1185 if (!NotStrings.empty()) { 1186 if (MatchPos < LastPos) { 1187 // Reordered? 1188 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos), 1189 SourceMgr::DK_Error, 1190 Prefix + "-DAG: found a match of CHECK-DAG" 1191 " reordering across a CHECK-NOT"); 1192 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos), 1193 SourceMgr::DK_Note, 1194 Prefix + "-DAG: the farthest match of CHECK-DAG" 1195 " is found here"); 1196 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note, 1197 Prefix + "-NOT: the crossed pattern specified" 1198 " here"); 1199 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note, 1200 Prefix + "-DAG: the reordered pattern specified" 1201 " here"); 1202 return StringRef::npos; 1203 } 1204 // All subsequent CHECK-DAGs should be matched from the farthest 1205 // position of all precedent CHECK-DAGs (including this one.) 1206 StartPos = LastPos; 1207 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to 1208 // CHECK-DAG, verify that there's no 'not' strings occurred in that 1209 // region. 1210 StringRef SkippedRegion = Buffer.slice(LastPos, MatchPos); 1211 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) 1212 return StringRef::npos; 1213 // Clear "not strings". 1214 NotStrings.clear(); 1215 } 1216 1217 // Update the last position with CHECK-DAG matches. 1218 LastPos = std::max(MatchPos + MatchLen, LastPos); 1219 } 1220 1221 return LastPos; 1222 } 1223 1224 // A check prefix must contain only alphanumeric, hyphens and underscores. 1225 static bool ValidateCheckPrefix(StringRef CheckPrefix) { 1226 Regex Validator("^[a-zA-Z0-9_-]*$"); 1227 return Validator.match(CheckPrefix); 1228 } 1229 1230 static bool ValidateCheckPrefixes() { 1231 StringSet<> PrefixSet; 1232 1233 for (StringRef Prefix : CheckPrefixes) { 1234 // Reject empty prefixes. 1235 if (Prefix == "") 1236 return false; 1237 1238 if (!PrefixSet.insert(Prefix).second) 1239 return false; 1240 1241 if (!ValidateCheckPrefix(Prefix)) 1242 return false; 1243 } 1244 1245 return true; 1246 } 1247 1248 // Combines the check prefixes into a single regex so that we can efficiently 1249 // scan for any of the set. 1250 // 1251 // The semantics are that the longest-match wins which matches our regex 1252 // library. 1253 static Regex buildCheckPrefixRegex() { 1254 // I don't think there's a way to specify an initial value for cl::list, 1255 // so if nothing was specified, add the default 1256 if (CheckPrefixes.empty()) 1257 CheckPrefixes.push_back("CHECK"); 1258 1259 // We already validated the contents of CheckPrefixes so just concatenate 1260 // them as alternatives. 1261 SmallString<32> PrefixRegexStr; 1262 for (StringRef Prefix : CheckPrefixes) { 1263 if (Prefix != CheckPrefixes.front()) 1264 PrefixRegexStr.push_back('|'); 1265 1266 PrefixRegexStr.append(Prefix); 1267 } 1268 1269 return Regex(PrefixRegexStr); 1270 } 1271 1272 static void DumpCommandLine(int argc, char **argv) { 1273 errs() << "FileCheck command line: "; 1274 for (int I = 0; I < argc; I++) 1275 errs() << " " << argv[I]; 1276 errs() << "\n"; 1277 } 1278 1279 // Remove local variables from \p VariableTable. Global variables 1280 // (start with '$') are preserved. 1281 static void ClearLocalVars(StringMap<StringRef> &VariableTable) { 1282 SmallVector<StringRef, 16> LocalVars; 1283 for (const auto &Var : VariableTable) 1284 if (Var.first()[0] != '$') 1285 LocalVars.push_back(Var.first()); 1286 1287 for (const auto &Var : LocalVars) 1288 VariableTable.erase(Var); 1289 } 1290 1291 /// Check the input to FileCheck provided in the \p Buffer against the \p 1292 /// CheckStrings read from the check file. 1293 /// 1294 /// Returns false if the input fails to satisfy the checks. 1295 bool CheckInput(SourceMgr &SM, StringRef Buffer, 1296 ArrayRef<CheckString> CheckStrings) { 1297 bool ChecksFailed = false; 1298 1299 /// VariableTable - This holds all the current filecheck variables. 1300 StringMap<StringRef> VariableTable; 1301 1302 for (const auto& Def : GlobalDefines) 1303 VariableTable.insert(StringRef(Def).split('=')); 1304 1305 unsigned i = 0, j = 0, e = CheckStrings.size(); 1306 while (true) { 1307 StringRef CheckRegion; 1308 if (j == e) { 1309 CheckRegion = Buffer; 1310 } else { 1311 const CheckString &CheckLabelStr = CheckStrings[j]; 1312 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 1313 ++j; 1314 continue; 1315 } 1316 1317 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 1318 size_t MatchLabelLen = 0; 1319 size_t MatchLabelPos = 1320 CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, VariableTable); 1321 if (MatchLabelPos == StringRef::npos) 1322 // Immediately bail of CHECK-LABEL fails, nothing else we can do. 1323 return false; 1324 1325 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 1326 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 1327 ++j; 1328 } 1329 1330 if (EnableVarScope) 1331 ClearLocalVars(VariableTable); 1332 1333 for (; i != j; ++i) { 1334 const CheckString &CheckStr = CheckStrings[i]; 1335 1336 // Check each string within the scanned region, including a second check 1337 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 1338 size_t MatchLen = 0; 1339 size_t MatchPos = 1340 CheckStr.Check(SM, CheckRegion, false, MatchLen, VariableTable); 1341 1342 if (MatchPos == StringRef::npos) { 1343 ChecksFailed = true; 1344 i = j; 1345 break; 1346 } 1347 1348 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 1349 } 1350 1351 if (j == e) 1352 break; 1353 } 1354 1355 // Success if no checks failed. 1356 return !ChecksFailed; 1357 } 1358 1359 int main(int argc, char **argv) { 1360 sys::PrintStackTraceOnErrorSignal(argv[0]); 1361 PrettyStackTraceProgram X(argc, argv); 1362 cl::ParseCommandLineOptions(argc, argv); 1363 1364 if (!ValidateCheckPrefixes()) { 1365 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " 1366 "start with a letter and contain only alphanumeric characters, " 1367 "hyphens and underscores\n"; 1368 return 2; 1369 } 1370 1371 Regex PrefixRE = buildCheckPrefixRegex(); 1372 std::string REError; 1373 if (!PrefixRE.isValid(REError)) { 1374 errs() << "Unable to combine check-prefix strings into a prefix regular " 1375 "expression! This is likely a bug in FileCheck's verification of " 1376 "the check-prefix strings. Regular expression parsing failed " 1377 "with the following error: " 1378 << REError << "\n"; 1379 return 2; 1380 } 1381 1382 SourceMgr SM; 1383 1384 // Read the expected strings from the check file. 1385 ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr = 1386 MemoryBuffer::getFileOrSTDIN(CheckFilename); 1387 if (std::error_code EC = CheckFileOrErr.getError()) { 1388 errs() << "Could not open check file '" << CheckFilename 1389 << "': " << EC.message() << '\n'; 1390 return 2; 1391 } 1392 MemoryBuffer &CheckFile = *CheckFileOrErr.get(); 1393 1394 SmallString<4096> CheckFileBuffer; 1395 StringRef CheckFileText = CanonicalizeFile(CheckFile, CheckFileBuffer); 1396 1397 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1398 CheckFileText, CheckFile.getBufferIdentifier()), 1399 SMLoc()); 1400 1401 std::vector<CheckString> CheckStrings; 1402 if (ReadCheckFile(SM, CheckFileText, PrefixRE, CheckStrings)) 1403 return 2; 1404 1405 // Open the file to check and add it to SourceMgr. 1406 ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr = 1407 MemoryBuffer::getFileOrSTDIN(InputFilename); 1408 if (std::error_code EC = InputFileOrErr.getError()) { 1409 errs() << "Could not open input file '" << InputFilename 1410 << "': " << EC.message() << '\n'; 1411 return 2; 1412 } 1413 MemoryBuffer &InputFile = *InputFileOrErr.get(); 1414 1415 if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) { 1416 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; 1417 DumpCommandLine(argc, argv); 1418 return 2; 1419 } 1420 1421 SmallString<4096> InputFileBuffer; 1422 StringRef InputFileText = CanonicalizeFile(InputFile, InputFileBuffer); 1423 1424 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( 1425 InputFileText, InputFile.getBufferIdentifier()), 1426 SMLoc()); 1427 1428 return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1; 1429 } 1430