1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // FileCheck does a line-by line check of a file that validates whether it 11 // contains the expected content. This is useful for regression tests etc. 12 // 13 // This program exits with an error status of 2 on error, exit status of 0 if 14 // the file matched the expected contents, and exit status of 1 if it did not 15 // contain the expected contents. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/Support/CommandLine.h" 20 #include "llvm/Support/MemoryBuffer.h" 21 #include "llvm/Support/PrettyStackTrace.h" 22 #include "llvm/Support/Regex.h" 23 #include "llvm/Support/SourceMgr.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include "llvm/System/Signals.h" 26 using namespace llvm; 27 28 static cl::opt<std::string> 29 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); 30 31 static cl::opt<std::string> 32 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), 33 cl::init("-"), cl::value_desc("filename")); 34 35 static cl::opt<std::string> 36 CheckPrefix("check-prefix", cl::init("CHECK"), 37 cl::desc("Prefix to use from check file (defaults to 'CHECK')")); 38 39 static cl::opt<bool> 40 NoCanonicalizeWhiteSpace("strict-whitespace", 41 cl::desc("Do not treat all horizontal whitespace as equivalent")); 42 43 //===----------------------------------------------------------------------===// 44 // Pattern Handling Code. 45 //===----------------------------------------------------------------------===// 46 47 class PatternChunk { 48 StringRef Str; 49 bool isRegEx; 50 public: 51 PatternChunk(StringRef S, bool isRE) : Str(S), isRegEx(isRE) {} 52 53 size_t Match(StringRef Buffer, size_t &MatchLen) const { 54 if (!isRegEx) { 55 // Fixed string match. 56 MatchLen = Str.size(); 57 return Buffer.find(Str); 58 } 59 60 // Regex match. 61 SmallVector<StringRef, 4> MatchInfo; 62 if (!Regex(Str, Regex::Sub|Regex::Newline).match(Buffer, &MatchInfo)) 63 return StringRef::npos; 64 65 // Successful regex match. 66 assert(!MatchInfo.empty() && "Didn't get any match"); 67 StringRef FullMatch = MatchInfo[0]; 68 69 MatchLen = FullMatch.size(); 70 return FullMatch.data()-Buffer.data(); 71 } 72 }; 73 74 class Pattern { 75 /// Chunks - The pattern chunks to match. If the bool is false, it is a fixed 76 /// string match, if it is true, it is a regex match. 77 SmallVector<PatternChunk, 4> Chunks; 78 79 StringRef FixedStr; 80 public: 81 82 Pattern() { } 83 84 bool ParsePattern(StringRef PatternStr, SourceMgr &SM); 85 86 /// Match - Match the pattern string against the input buffer Buffer. This 87 /// returns the position that is matched or npos if there is no match. If 88 /// there is a match, the size of the matched string is returned in MatchLen. 89 size_t Match(StringRef Buffer, size_t &MatchLen) const; 90 }; 91 92 bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { 93 // Ignore trailing whitespace. 94 while (!PatternStr.empty() && 95 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 96 PatternStr = PatternStr.substr(0, PatternStr.size()-1); 97 98 // Check that there is something on the line. 99 if (PatternStr.empty()) { 100 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 101 "found empty check string with prefix '"+CheckPrefix+":'", 102 "error"); 103 return true; 104 } 105 106 // Check to see if this is a fixed string, or if it has regex pieces. 107 if (PatternStr.size() < 2 || PatternStr.find("{{") == StringRef::npos) { 108 FixedStr = PatternStr; 109 return false; 110 } 111 112 // Otherwise, there is at least one regex piece. 113 114 // Scan the pattern to break it into regex and non-regex pieces. 115 while (!PatternStr.empty()) { 116 // Handle fixed string matches. 117 if (PatternStr.size() < 2 || 118 PatternStr[0] != '{' || PatternStr[1] != '{') { 119 // Find the end, which is the start of the next regex. 120 size_t FixedMatchEnd = PatternStr.find("{{"); 121 122 Chunks.push_back(PatternChunk(PatternStr.substr(0, FixedMatchEnd),false)); 123 PatternStr = PatternStr.substr(FixedMatchEnd); 124 continue; 125 } 126 127 // Otherwise, this is the start of a regex match. Scan for the }}. 128 size_t End = PatternStr.find("}}"); 129 if (End == StringRef::npos) { 130 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 131 "found start of regex string with no end '}}'", "error"); 132 return true; 133 } 134 135 Regex R(PatternStr.substr(2, End-2)); 136 std::string Error; 137 if (!R.isValid(Error)) { 138 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()+2), 139 "invalid regex: " + Error, "error"); 140 return true; 141 } 142 143 Chunks.push_back(PatternChunk(PatternStr.substr(2, End-2), true)); 144 PatternStr = PatternStr.substr(End+2); 145 } 146 147 return false; 148 } 149 150 /// Match - Match the pattern string against the input buffer Buffer. This 151 /// returns the position that is matched or npos if there is no match. If 152 /// there is a match, the size of the matched string is returned in MatchLen. 153 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen) const { 154 // If this is a fixed string pattern, just match it now. 155 if (!FixedStr.empty()) { 156 MatchLen = FixedStr.size(); 157 return Buffer.find(FixedStr); 158 } 159 160 size_t FirstMatch = StringRef::npos; 161 MatchLen = 0; 162 163 while (!Buffer.empty()) { 164 StringRef MatchAttempt = Buffer; 165 166 unsigned ChunkNo = 0, e = Chunks.size(); 167 for (; ChunkNo != e; ++ChunkNo) { 168 size_t ThisMatch, ThisLength = StringRef::npos; 169 ThisMatch = Chunks[ChunkNo].Match(MatchAttempt, ThisLength); 170 171 // Otherwise, what we do depends on if this is the first match or not. If 172 // this is the first match, it doesn't match to match at the start of 173 // MatchAttempt. 174 if (ChunkNo == 0) { 175 // If the first match fails then this pattern will never match in 176 // Buffer. 177 if (ThisMatch == StringRef::npos) 178 return ThisMatch; 179 180 FirstMatch = ThisMatch; 181 MatchAttempt = MatchAttempt.substr(FirstMatch); 182 ThisMatch = 0; 183 } 184 185 // If this chunk didn't match, then the entire pattern didn't match from 186 // FirstMatch, try later in the buffer. 187 if (ThisMatch == StringRef::npos) 188 break; 189 190 // Ok, if the match didn't match at the beginning of MatchAttempt, then we 191 // have something like "ABC{{DEF}} and something was in-between. Reject 192 // the match. 193 if (ThisMatch != 0) 194 break; 195 196 // Otherwise, match the string and move to the next chunk. 197 MatchLen += ThisLength; 198 MatchAttempt = MatchAttempt.substr(ThisLength); 199 } 200 201 // If the whole thing matched, we win. 202 if (ChunkNo == e) 203 return FirstMatch; 204 205 // Otherwise, try matching again after FirstMatch to see if this pattern 206 // matches later in the buffer. 207 Buffer = Buffer.substr(FirstMatch+1); 208 } 209 210 // If we ran out of stuff to scan, then we didn't match. 211 return StringRef::npos; 212 } 213 214 215 //===----------------------------------------------------------------------===// 216 // Check Strings. 217 //===----------------------------------------------------------------------===// 218 219 /// CheckString - This is a check that we found in the input file. 220 struct CheckString { 221 /// Pat - The pattern to match. 222 Pattern Pat; 223 224 /// Loc - The location in the match file that the check string was specified. 225 SMLoc Loc; 226 227 /// IsCheckNext - This is true if this is a CHECK-NEXT: directive (as opposed 228 /// to a CHECK: directive. 229 bool IsCheckNext; 230 231 /// NotStrings - These are all of the strings that are disallowed from 232 /// occurring between this match string and the previous one (or start of 233 /// file). 234 std::vector<std::pair<SMLoc, Pattern> > NotStrings; 235 236 CheckString(const Pattern &P, SMLoc L, bool isCheckNext) 237 : Pat(P), Loc(L), IsCheckNext(isCheckNext) {} 238 }; 239 240 /// CanonicalizeInputFile - Remove duplicate horizontal space from the specified 241 /// memory buffer, free it, and return a new one. 242 static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) { 243 SmallVector<char, 16> NewFile; 244 NewFile.reserve(MB->getBufferSize()); 245 246 for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd(); 247 Ptr != End; ++Ptr) { 248 // If C is not a horizontal whitespace, skip it. 249 if (*Ptr != ' ' && *Ptr != '\t') { 250 NewFile.push_back(*Ptr); 251 continue; 252 } 253 254 // Otherwise, add one space and advance over neighboring space. 255 NewFile.push_back(' '); 256 while (Ptr+1 != End && 257 (Ptr[1] == ' ' || Ptr[1] == '\t')) 258 ++Ptr; 259 } 260 261 // Free the old buffer and return a new one. 262 MemoryBuffer *MB2 = 263 MemoryBuffer::getMemBufferCopy(NewFile.data(), 264 NewFile.data() + NewFile.size(), 265 MB->getBufferIdentifier()); 266 267 delete MB; 268 return MB2; 269 } 270 271 272 /// ReadCheckFile - Read the check file, which specifies the sequence of 273 /// expected strings. The strings are added to the CheckStrings vector. 274 static bool ReadCheckFile(SourceMgr &SM, 275 std::vector<CheckString> &CheckStrings) { 276 // Open the check file, and tell SourceMgr about it. 277 std::string ErrorStr; 278 MemoryBuffer *F = 279 MemoryBuffer::getFileOrSTDIN(CheckFilename.c_str(), &ErrorStr); 280 if (F == 0) { 281 errs() << "Could not open check file '" << CheckFilename << "': " 282 << ErrorStr << '\n'; 283 return true; 284 } 285 286 // If we want to canonicalize whitespace, strip excess whitespace from the 287 // buffer containing the CHECK lines. 288 if (!NoCanonicalizeWhiteSpace) 289 F = CanonicalizeInputFile(F); 290 291 SM.AddNewSourceBuffer(F, SMLoc()); 292 293 // Find all instances of CheckPrefix followed by : in the file. 294 StringRef Buffer = F->getBuffer(); 295 296 std::vector<std::pair<SMLoc, Pattern> > NotMatches; 297 298 while (1) { 299 // See if Prefix occurs in the memory buffer. 300 Buffer = Buffer.substr(Buffer.find(CheckPrefix)); 301 302 // If we didn't find a match, we're done. 303 if (Buffer.empty()) 304 break; 305 306 const char *CheckPrefixStart = Buffer.data(); 307 308 // When we find a check prefix, keep track of whether we find CHECK: or 309 // CHECK-NEXT: 310 bool IsCheckNext = false, IsCheckNot = false; 311 312 // Verify that the : is present after the prefix. 313 if (Buffer[CheckPrefix.size()] == ':') { 314 Buffer = Buffer.substr(CheckPrefix.size()+1); 315 } else if (Buffer.size() > CheckPrefix.size()+6 && 316 memcmp(Buffer.data()+CheckPrefix.size(), "-NEXT:", 6) == 0) { 317 Buffer = Buffer.substr(CheckPrefix.size()+7); 318 IsCheckNext = true; 319 } else if (Buffer.size() > CheckPrefix.size()+5 && 320 memcmp(Buffer.data()+CheckPrefix.size(), "-NOT:", 5) == 0) { 321 Buffer = Buffer.substr(CheckPrefix.size()+6); 322 IsCheckNot = true; 323 } else { 324 Buffer = Buffer.substr(1); 325 continue; 326 } 327 328 // Okay, we found the prefix, yay. Remember the rest of the line, but 329 // ignore leading and trailing whitespace. 330 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 331 332 // Scan ahead to the end of line. 333 size_t EOL = Buffer.find_first_of("\n\r"); 334 335 // Parse the pattern. 336 Pattern P; 337 if (P.ParsePattern(Buffer.substr(0, EOL), SM)) 338 return true; 339 340 Buffer = Buffer.substr(EOL); 341 342 343 // Verify that CHECK-NEXT lines have at least one CHECK line before them. 344 if (IsCheckNext && CheckStrings.empty()) { 345 SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart), 346 "found '"+CheckPrefix+"-NEXT:' without previous '"+ 347 CheckPrefix+ ": line", "error"); 348 return true; 349 } 350 351 // Handle CHECK-NOT. 352 if (IsCheckNot) { 353 NotMatches.push_back(std::make_pair(SMLoc::getFromPointer(Buffer.data()), 354 P)); 355 continue; 356 } 357 358 359 // Okay, add the string we captured to the output vector and move on. 360 CheckStrings.push_back(CheckString(P, 361 SMLoc::getFromPointer(Buffer.data()), 362 IsCheckNext)); 363 std::swap(NotMatches, CheckStrings.back().NotStrings); 364 } 365 366 if (CheckStrings.empty()) { 367 errs() << "error: no check strings found with prefix '" << CheckPrefix 368 << ":'\n"; 369 return true; 370 } 371 372 if (!NotMatches.empty()) { 373 errs() << "error: '" << CheckPrefix 374 << "-NOT:' not supported after last check line.\n"; 375 return true; 376 } 377 378 return false; 379 } 380 381 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, 382 StringRef Buffer) { 383 // Otherwise, we have an error, emit an error message. 384 SM.PrintMessage(CheckStr.Loc, "expected string not found in input", 385 "error"); 386 387 // Print the "scanning from here" line. If the current position is at the 388 // end of a line, advance to the start of the next line. 389 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 390 391 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), "scanning from here", 392 "note"); 393 } 394 395 /// CountNumNewlinesBetween - Count the number of newlines in the specified 396 /// range. 397 static unsigned CountNumNewlinesBetween(StringRef Range) { 398 unsigned NumNewLines = 0; 399 while (1) { 400 // Scan for newline. 401 Range = Range.substr(Range.find_first_of("\n\r")); 402 if (Range.empty()) return NumNewLines; 403 404 ++NumNewLines; 405 406 // Handle \n\r and \r\n as a single newline. 407 if (Range.size() > 1 && 408 (Range[1] == '\n' || Range[1] == '\r') && 409 (Range[0] != Range[1])) 410 Range = Range.substr(1); 411 Range = Range.substr(1); 412 } 413 } 414 415 int main(int argc, char **argv) { 416 sys::PrintStackTraceOnErrorSignal(); 417 PrettyStackTraceProgram X(argc, argv); 418 cl::ParseCommandLineOptions(argc, argv); 419 420 SourceMgr SM; 421 422 // Read the expected strings from the check file. 423 std::vector<CheckString> CheckStrings; 424 if (ReadCheckFile(SM, CheckStrings)) 425 return 2; 426 427 // Open the file to check and add it to SourceMgr. 428 std::string ErrorStr; 429 MemoryBuffer *F = 430 MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), &ErrorStr); 431 if (F == 0) { 432 errs() << "Could not open input file '" << InputFilename << "': " 433 << ErrorStr << '\n'; 434 return true; 435 } 436 437 // Remove duplicate spaces in the input file if requested. 438 if (!NoCanonicalizeWhiteSpace) 439 F = CanonicalizeInputFile(F); 440 441 SM.AddNewSourceBuffer(F, SMLoc()); 442 443 // Check that we have all of the expected strings, in order, in the input 444 // file. 445 StringRef Buffer = F->getBuffer(); 446 447 const char *LastMatch = Buffer.data(); 448 449 for (unsigned StrNo = 0, e = CheckStrings.size(); StrNo != e; ++StrNo) { 450 const CheckString &CheckStr = CheckStrings[StrNo]; 451 452 StringRef SearchFrom = Buffer; 453 454 // Find StrNo in the file. 455 size_t MatchLen = 0; 456 Buffer = Buffer.substr(CheckStr.Pat.Match(Buffer, MatchLen)); 457 458 // If we didn't find a match, reject the input. 459 if (Buffer.empty()) { 460 PrintCheckFailed(SM, CheckStr, SearchFrom); 461 return 1; 462 } 463 464 StringRef SkippedRegion(LastMatch, Buffer.data()-LastMatch); 465 466 // If this check is a "CHECK-NEXT", verify that the previous match was on 467 // the previous line (i.e. that there is one newline between them). 468 if (CheckStr.IsCheckNext) { 469 // Count the number of newlines between the previous match and this one. 470 assert(LastMatch != F->getBufferStart() && 471 "CHECK-NEXT can't be the first check in a file"); 472 473 unsigned NumNewLines = CountNumNewlinesBetween(SkippedRegion); 474 if (NumNewLines == 0) { 475 SM.PrintMessage(CheckStr.Loc, 476 CheckPrefix+"-NEXT: is on the same line as previous match", 477 "error"); 478 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), 479 "'next' match was here", "note"); 480 SM.PrintMessage(SMLoc::getFromPointer(LastMatch), 481 "previous match was here", "note"); 482 return 1; 483 } 484 485 if (NumNewLines != 1) { 486 SM.PrintMessage(CheckStr.Loc, 487 CheckPrefix+ 488 "-NEXT: is not on the line after the previous match", 489 "error"); 490 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), 491 "'next' match was here", "note"); 492 SM.PrintMessage(SMLoc::getFromPointer(LastMatch), 493 "previous match was here", "note"); 494 return 1; 495 } 496 } 497 498 // If this match had "not strings", verify that they don't exist in the 499 // skipped region. 500 for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size(); ChunkNo != e; ++ChunkNo) { 501 size_t MatchLen = 0; 502 size_t Pos = CheckStr.NotStrings[ChunkNo].second.Match(SkippedRegion, MatchLen); 503 if (Pos == StringRef::npos) continue; 504 505 SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos), 506 CheckPrefix+"-NOT: string occurred!", "error"); 507 SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first, 508 CheckPrefix+"-NOT: pattern specified here", "note"); 509 return 1; 510 } 511 512 513 // Otherwise, everything is good. Step over the matched text and remember 514 // the position after the match as the end of the last match. 515 Buffer = Buffer.substr(MatchLen); 516 LastMatch = Buffer.data(); 517 } 518 519 return 0; 520 } 521