1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // FileCheck does a line-by line check of a file that validates whether it 11 // contains the expected content. This is useful for regression tests etc. 12 // 13 // This program exits with an error status of 2 on error, exit status of 0 if 14 // the file matched the expected contents, and exit status of 1 if it did not 15 // contain the expected contents. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/Support/CommandLine.h" 20 #include "llvm/Support/MemoryBuffer.h" 21 #include "llvm/Support/PrettyStackTrace.h" 22 #include "llvm/Support/Regex.h" 23 #include "llvm/Support/SourceMgr.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include "llvm/System/Signals.h" 26 #include "llvm/ADT/StringMap.h" 27 #include <algorithm> 28 using namespace llvm; 29 30 static cl::opt<std::string> 31 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 32 33 static cl::opt<std::string> 34 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 35 cl::init("-"), cl::value_desc("filename")); 36 37 static cl::opt<std::string> 38 CheckPrefix("check-prefix", cl::init("CHECK"), 39 cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 40 41 static cl::opt<bool> 42 NoCanonicalizeWhiteSpace("strict-whitespace", 43 cl::desc("Do not treat all horizontal whitespace as equivalent")); 44 45 //===----------------------------------------------------------------------===// 46 // Pattern Handling Code. 47 //===----------------------------------------------------------------------===// 48 49 class Pattern { 50 SMLoc PatternLoc; 51 52 /// FixedStr - If non-empty, this pattern is a fixed string match with the 53 /// specified fixed string. 54 StringRef FixedStr; 55 56 /// RegEx - If non-empty, this is a regex pattern. 57 std::string RegExStr; 58 59 /// VariableUses - Entries in this vector map to uses of a variable in the 60 /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain 61 /// "foobaz" and we'll get an entry in this vector that tells us to insert the 62 /// value of bar at offset 3. 63 std::vector<std::pair<StringRef, unsigned> > VariableUses; 64 65 /// VariableDefs - Entries in this vector map to definitions of a variable in 66 /// the pattern, e.g. "foo[[bar:.*]]baz". In this case, the RegExStr will 67 /// contain "foo(.*)baz" and VariableDefs will contain the pair "bar",1. The 68 /// index indicates what parenthesized value captures the variable value. 69 std::vector<std::pair<StringRef, unsigned> > VariableDefs; 70 71 public: 72 73 Pattern() { } 74 75 bool ParsePattern(StringRef PatternStr, SourceMgr &SM); 76 77 /// Match - Match the pattern string against the input buffer Buffer. This 78 /// returns the position that is matched or npos if there is no match. If 79 /// there is a match, the size of the matched string is returned in MatchLen. 80 /// 81 /// The VariableTable StringMap provides the current values of filecheck 82 /// variables and is updated if this match defines new values. 83 size_t Match(StringRef Buffer, size_t &MatchLen, 84 StringMap<StringRef> &VariableTable) const; 85 86 private: 87 static void AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr); 88 bool AddRegExToRegEx(StringRef RegExStr, unsigned &CurParen, SourceMgr &SM); 89 }; 90 91 92 bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { 93 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 94 95 // Ignore trailing whitespace. 96 while (!PatternStr.empty() && 97 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 98 PatternStr = PatternStr.substr(0, PatternStr.size()-1); 99 100 // Check that there is something on the line. 101 if (PatternStr.empty()) { 102 SM.PrintMessage(PatternLoc, "found empty check string with prefix '" + 103 CheckPrefix+":'", "error"); 104 return true; 105 } 106 107 // Check to see if this is a fixed string, or if it has regex pieces. 108 if (PatternStr.size() < 2 || 109 (PatternStr.find("{{") == StringRef::npos && 110 PatternStr.find("[[") == StringRef::npos)) { 111 FixedStr = PatternStr; 112 return false; 113 } 114 115 // Paren value #0 is for the fully matched string. Any new parenthesized 116 // values add from their. 117 unsigned CurParen = 1; 118 119 // Otherwise, there is at least one regex piece. Build up the regex pattern 120 // by escaping scary characters in fixed strings, building up one big regex. 121 while (!PatternStr.empty()) { 122 // RegEx matches. 123 if (PatternStr.size() >= 2 && 124 PatternStr[0] == '{' && PatternStr[1] == '{') { 125 126 // Otherwise, this is the start of a regex match. Scan for the }}. 127 size_t End = PatternStr.find("}}"); 128 if (End == StringRef::npos) { 129 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 130 "found start of regex string with no end '}}'", "error"); 131 return true; 132 } 133 134 if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM)) 135 return true; 136 PatternStr = PatternStr.substr(End+2); 137 continue; 138 } 139 140 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* 141 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The 142 // second form is [[foo]] which is a reference to foo. The variable name 143 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject 144 // it. This is to catch some common errors. 145 if (PatternStr.size() >= 2 && 146 PatternStr[0] == '[' && PatternStr[1] == '[') { 147 // Verify that it is terminated properly. 148 size_t End = PatternStr.find("]]"); 149 if (End == StringRef::npos) { 150 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 151 "invalid named regex reference, no ]] found", "error"); 152 return true; 153 } 154 155 StringRef MatchStr = PatternStr.substr(2, End-2); 156 PatternStr = PatternStr.substr(End+2); 157 158 // Get the regex name (e.g. "foo"). 159 size_t NameEnd = MatchStr.find(':'); 160 StringRef Name = MatchStr.substr(0, NameEnd); 161 162 if (Name.empty()) { 163 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 164 "invalid name in named regex: empty name", "error"); 165 return true; 166 } 167 168 // Verify that the name is well formed. 169 for (unsigned i = 0, e = Name.size(); i != e; ++i) 170 if (Name[i] != '_' && 171 (Name[i] < 'a' || Name[i] > 'z') && 172 (Name[i] < 'A' || Name[i] > 'Z') && 173 (Name[i] < '0' || Name[i] > '9')) { 174 SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i), 175 "invalid name in named regex", "error"); 176 return true; 177 } 178 179 // Name can't start with a digit. 180 if (isdigit(Name[0])) { 181 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 182 "invalid name in named regex", "error"); 183 return true; 184 } 185 186 // Handle [[foo]]. 187 if (NameEnd == StringRef::npos) { 188 VariableUses.push_back(std::make_pair(Name, RegExStr.size())); 189 continue; 190 } 191 192 // Handle [[foo:.*]]. 193 VariableDefs.push_back(std::make_pair(Name, CurParen)); 194 RegExStr += '('; 195 ++CurParen; 196 197 if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM)) 198 return true; 199 200 RegExStr += ')'; 201 } 202 203 // Handle fixed string matches. 204 // Find the end, which is the start of the next regex. 205 size_t FixedMatchEnd = PatternStr.find("{{"); 206 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 207 AddFixedStringToRegEx(PatternStr.substr(0, FixedMatchEnd), RegExStr); 208 PatternStr = PatternStr.substr(FixedMatchEnd); 209 continue; 210 } 211 212 return false; 213 } 214 215 void Pattern::AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr) { 216 // Add the characters from FixedStr to the regex, escaping as needed. This 217 // avoids "leaning toothpicks" in common patterns. 218 for (unsigned i = 0, e = FixedStr.size(); i != e; ++i) { 219 switch (FixedStr[i]) { 220 // These are the special characters matched in "p_ere_exp". 221 case '(': 222 case ')': 223 case '^': 224 case '$': 225 case '|': 226 case '*': 227 case '+': 228 case '?': 229 case '.': 230 case '[': 231 case '\\': 232 case '{': 233 TheStr += '\\'; 234 // FALL THROUGH. 235 default: 236 TheStr += FixedStr[i]; 237 break; 238 } 239 } 240 } 241 242 bool Pattern::AddRegExToRegEx(StringRef RegexStr, unsigned &CurParen, 243 SourceMgr &SM) { 244 Regex R(RegexStr); 245 std::string Error; 246 if (!R.isValid(Error)) { 247 SM.PrintMessage(SMLoc::getFromPointer(RegexStr.data()), 248 "invalid regex: " + Error, "error"); 249 return true; 250 } 251 252 RegExStr += RegexStr.str(); 253 CurParen += R.getNumMatches(); 254 return false; 255 } 256 257 /// Match - Match the pattern string against the input buffer Buffer. This 258 /// returns the position that is matched or npos if there is no match. If 259 /// there is a match, the size of the matched string is returned in MatchLen. 260 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, 261 StringMap<StringRef> &VariableTable) const { 262 // If this is a fixed string pattern, just match it now. 263 if (!FixedStr.empty()) { 264 MatchLen = FixedStr.size(); 265 return Buffer.find(FixedStr); 266 } 267 268 // Regex match. 269 270 // If there are variable uses, we need to create a temporary string with the 271 // actual value. 272 StringRef RegExToMatch = RegExStr; 273 std::string TmpStr; 274 if (!VariableUses.empty()) { 275 TmpStr = RegExStr; 276 277 unsigned InsertOffset = 0; 278 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) { 279 // Look up the value and escape it so that we can plop it into the regex. 280 std::string Value; 281 AddFixedStringToRegEx(VariableTable[VariableUses[i].first], Value); 282 283 // Plop it into the regex at the adjusted offset. 284 TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset, 285 Value.begin(), Value.end()); 286 InsertOffset += Value.size(); 287 } 288 289 // Match the newly constructed regex. 290 RegExToMatch = TmpStr; 291 } 292 293 294 SmallVector<StringRef, 4> MatchInfo; 295 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) 296 return StringRef::npos; 297 298 // Successful regex match. 299 assert(!MatchInfo.empty() && "Didn't get any match"); 300 StringRef FullMatch = MatchInfo[0]; 301 302 // If this defines any variables, remember their values. 303 for (unsigned i = 0, e = VariableDefs.size(); i != e; ++i) { 304 assert(VariableDefs[i].second < MatchInfo.size() && 305 "Internal paren error"); 306 VariableTable[VariableDefs[i].first] = MatchInfo[VariableDefs[i].second]; 307 } 308 309 MatchLen = FullMatch.size(); 310 return FullMatch.data()-Buffer.data(); 311 } 312 313 314 //===----------------------------------------------------------------------===// 315 // Check Strings. 316 //===----------------------------------------------------------------------===// 317 318 /// CheckString - This is a check that we found in the input file. 319 struct CheckString { 320 /// Pat - The pattern to match. 321 Pattern Pat; 322 323 /// Loc - The location in the match file that the check string was specified. 324 SMLoc Loc; 325 326 /// IsCheckNext - This is true if this is a CHECK-NEXT: directive (as opposed 327 /// to a CHECK: directive. 328 bool IsCheckNext; 329 330 /// NotStrings - These are all of the strings that are disallowed from 331 /// occurring between this match string and the previous one (or start of 332 /// file). 333 std::vector<std::pair<SMLoc, Pattern> > NotStrings; 334 335 CheckString(const Pattern &P, SMLoc L, bool isCheckNext) 336 : Pat(P), Loc(L), IsCheckNext(isCheckNext) {} 337 }; 338 339 /// CanonicalizeInputFile - Remove duplicate horizontal space from the specified 340 /// memory buffer, free it, and return a new one. 341 static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) { 342 SmallVector<char, 16> NewFile; 343 NewFile.reserve(MB->getBufferSize()); 344 345 for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd(); 346 Ptr != End; ++Ptr) { 347 // If C is not a horizontal whitespace, skip it. 348 if (*Ptr != ' ' && *Ptr != '\t') { 349 NewFile.push_back(*Ptr); 350 continue; 351 } 352 353 // Otherwise, add one space and advance over neighboring space. 354 NewFile.push_back(' '); 355 while (Ptr+1 != End && 356 (Ptr[1] == ' ' || Ptr[1] == '\t')) 357 ++Ptr; 358 } 359 360 // Free the old buffer and return a new one. 361 MemoryBuffer *MB2 = 362 MemoryBuffer::getMemBufferCopy(NewFile.data(), 363 NewFile.data() + NewFile.size(), 364 MB->getBufferIdentifier()); 365 366 delete MB; 367 return MB2; 368 } 369 370 371 /// ReadCheckFile - Read the check file, which specifies the sequence of 372 /// expected strings. The strings are added to the CheckStrings vector. 373 static bool ReadCheckFile(SourceMgr &SM, 374 std::vector<CheckString> &CheckStrings) { 375 // Open the check file, and tell SourceMgr about it. 376 std::string ErrorStr; 377 MemoryBuffer *F = 378 MemoryBuffer::getFileOrSTDIN(CheckFilename.c_str(), &ErrorStr); 379 if (F == 0) { 380 errs() << "Could not open check file '" << CheckFilename << "': " 381 << ErrorStr << '\n'; 382 return true; 383 } 384 385 // If we want to canonicalize whitespace, strip excess whitespace from the 386 // buffer containing the CHECK lines. 387 if (!NoCanonicalizeWhiteSpace) 388 F = CanonicalizeInputFile(F); 389 390 SM.AddNewSourceBuffer(F, SMLoc()); 391 392 // Find all instances of CheckPrefix followed by : in the file. 393 StringRef Buffer = F->getBuffer(); 394 395 std::vector<std::pair<SMLoc, Pattern> > NotMatches; 396 397 while (1) { 398 // See if Prefix occurs in the memory buffer. 399 Buffer = Buffer.substr(Buffer.find(CheckPrefix)); 400 401 // If we didn't find a match, we're done. 402 if (Buffer.empty()) 403 break; 404 405 const char *CheckPrefixStart = Buffer.data(); 406 407 // When we find a check prefix, keep track of whether we find CHECK: or 408 // CHECK-NEXT: 409 bool IsCheckNext = false, IsCheckNot = false; 410 411 // Verify that the : is present after the prefix. 412 if (Buffer[CheckPrefix.size()] == ':') { 413 Buffer = Buffer.substr(CheckPrefix.size()+1); 414 } else if (Buffer.size() > CheckPrefix.size()+6 && 415 memcmp(Buffer.data()+CheckPrefix.size(), "-NEXT:", 6) == 0) { 416 Buffer = Buffer.substr(CheckPrefix.size()+7); 417 IsCheckNext = true; 418 } else if (Buffer.size() > CheckPrefix.size()+5 && 419 memcmp(Buffer.data()+CheckPrefix.size(), "-NOT:", 5) == 0) { 420 Buffer = Buffer.substr(CheckPrefix.size()+6); 421 IsCheckNot = true; 422 } else { 423 Buffer = Buffer.substr(1); 424 continue; 425 } 426 427 // Okay, we found the prefix, yay. Remember the rest of the line, but 428 // ignore leading and trailing whitespace. 429 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 430 431 // Scan ahead to the end of line. 432 size_t EOL = Buffer.find_first_of("\n\r"); 433 434 // Parse the pattern. 435 Pattern P; 436 if (P.ParsePattern(Buffer.substr(0, EOL), SM)) 437 return true; 438 439 Buffer = Buffer.substr(EOL); 440 441 442 // Verify that CHECK-NEXT lines have at least one CHECK line before them. 443 if (IsCheckNext && CheckStrings.empty()) { 444 SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart), 445 "found '"+CheckPrefix+"-NEXT:' without previous '"+ 446 CheckPrefix+ ": line", "error"); 447 return true; 448 } 449 450 // Handle CHECK-NOT. 451 if (IsCheckNot) { 452 NotMatches.push_back(std::make_pair(SMLoc::getFromPointer(Buffer.data()), 453 P)); 454 continue; 455 } 456 457 458 // Okay, add the string we captured to the output vector and move on. 459 CheckStrings.push_back(CheckString(P, 460 SMLoc::getFromPointer(Buffer.data()), 461 IsCheckNext)); 462 std::swap(NotMatches, CheckStrings.back().NotStrings); 463 } 464 465 if (CheckStrings.empty()) { 466 errs() << "error: no check strings found with prefix '" << CheckPrefix 467 << ":'\n"; 468 return true; 469 } 470 471 if (!NotMatches.empty()) { 472 errs() << "error: '" << CheckPrefix 473 << "-NOT:' not supported after last check line.\n"; 474 return true; 475 } 476 477 return false; 478 } 479 480 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 481 StringRef Buffer) { 482 // Otherwise, we have an error, emit an error message. 483 SM.PrintMessage(CheckStr.Loc, "expected string not found in input", 484 "error"); 485 486 // Print the "scanning from here" line. If the current position is at the 487 // end of a line, advance to the start of the next line. 488 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 489 490 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), "scanning from here", 491 "note"); 492 } 493 494 /// CountNumNewlinesBetween - Count the number of newlines in the specified 495 /// range. 496 static unsigned CountNumNewlinesBetween(StringRef Range) { 497 unsigned NumNewLines = 0; 498 while (1) { 499 // Scan for newline. 500 Range = Range.substr(Range.find_first_of("\n\r")); 501 if (Range.empty()) return NumNewLines; 502 503 ++NumNewLines; 504 505 // Handle \n\r and \r\n as a single newline. 506 if (Range.size() > 1 && 507 (Range[1] == '\n' || Range[1] == '\r') && 508 (Range[0] != Range[1])) 509 Range = Range.substr(1); 510 Range = Range.substr(1); 511 } 512 } 513 514 int main(int argc, char **argv) { 515 sys::PrintStackTraceOnErrorSignal(); 516 PrettyStackTraceProgram X(argc, argv); 517 cl::ParseCommandLineOptions(argc, argv); 518 519 SourceMgr SM; 520 521 // Read the expected strings from the check file. 522 std::vector<CheckString> CheckStrings; 523 if (ReadCheckFile(SM, CheckStrings)) 524 return 2; 525 526 // Open the file to check and add it to SourceMgr. 527 std::string ErrorStr; 528 MemoryBuffer *F = 529 MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), &ErrorStr); 530 if (F == 0) { 531 errs() << "Could not open input file '" << InputFilename << "': " 532 << ErrorStr << '\n'; 533 return true; 534 } 535 536 // Remove duplicate spaces in the input file if requested. 537 if (!NoCanonicalizeWhiteSpace) 538 F = CanonicalizeInputFile(F); 539 540 SM.AddNewSourceBuffer(F, SMLoc()); 541 542 /// VariableTable - This holds all the current filecheck variables. 543 StringMap<StringRef> VariableTable; 544 545 // Check that we have all of the expected strings, in order, in the input 546 // file. 547 StringRef Buffer = F->getBuffer(); 548 549 const char *LastMatch = Buffer.data(); 550 551 for (unsigned StrNo = 0, e = CheckStrings.size(); StrNo != e; ++StrNo) { 552 const CheckString &CheckStr = CheckStrings[StrNo]; 553 554 StringRef SearchFrom = Buffer; 555 556 // Find StrNo in the file. 557 size_t MatchLen = 0; 558 Buffer = Buffer.substr(CheckStr.Pat.Match(Buffer, MatchLen, VariableTable)); 559 560 // If we didn't find a match, reject the input. 561 if (Buffer.empty()) { 562 PrintCheckFailed(SM, CheckStr, SearchFrom); 563 return 1; 564 } 565 566 StringRef SkippedRegion(LastMatch, Buffer.data()-LastMatch); 567 568 // If this check is a "CHECK-NEXT", verify that the previous match was on 569 // the previous line (i.e. that there is one newline between them). 570 if (CheckStr.IsCheckNext) { 571 // Count the number of newlines between the previous match and this one. 572 assert(LastMatch != F->getBufferStart() && 573 "CHECK-NEXT can't be the first check in a file"); 574 575 unsigned NumNewLines = CountNumNewlinesBetween(SkippedRegion); 576 if (NumNewLines == 0) { 577 SM.PrintMessage(CheckStr.Loc, 578 CheckPrefix+"-NEXT: is on the same line as previous match", 579 "error"); 580 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), 581 "'next' match was here", "note"); 582 SM.PrintMessage(SMLoc::getFromPointer(LastMatch), 583 "previous match was here", "note"); 584 return 1; 585 } 586 587 if (NumNewLines != 1) { 588 SM.PrintMessage(CheckStr.Loc, 589 CheckPrefix+ 590 "-NEXT: is not on the line after the previous match", 591 "error"); 592 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), 593 "'next' match was here", "note"); 594 SM.PrintMessage(SMLoc::getFromPointer(LastMatch), 595 "previous match was here", "note"); 596 return 1; 597 } 598 } 599 600 // If this match had "not strings", verify that they don't exist in the 601 // skipped region. 602 for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size(); 603 ChunkNo != e; ++ChunkNo) { 604 size_t MatchLen = 0; 605 size_t Pos = CheckStr.NotStrings[ChunkNo].second.Match(SkippedRegion, 606 MatchLen, 607 VariableTable); 608 if (Pos == StringRef::npos) continue; 609 610 SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos), 611 CheckPrefix+"-NOT: string occurred!", "error"); 612 SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first, 613 CheckPrefix+"-NOT: pattern specified here", "note"); 614 return 1; 615 } 616 617 618 // Otherwise, everything is good. Step over the matched text and remember 619 // the position after the match as the end of the last match. 620 Buffer = Buffer.substr(MatchLen); 621 LastMatch = Buffer.data(); 622 } 623 624 return 0; 625 } 626