xref: /llvm-project/llvm/utils/FileCheck/FileCheck.cpp (revision 56ccdbbd292e0066dff318e16fc3d4b3c22a3b2f)
1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content.  This is useful for regression tests etc.
12 //
13 // This program exits with an error status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cctype>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
36 using namespace llvm;
37 
38 static cl::opt<std::string>
39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40 
41 static cl::opt<std::string>
42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43               cl::init("-"), cl::value_desc("filename"));
44 
45 static cl::list<std::string>
46 CheckPrefixes("check-prefix",
47               cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48 
49 static cl::opt<bool>
50 NoCanonicalizeWhiteSpace("strict-whitespace",
51               cl::desc("Do not treat all horizontal whitespace as equivalent"));
52 
53 static cl::list<std::string> ImplicitCheckNot(
54     "implicit-check-not",
55     cl::desc("Add an implicit negative check with this pattern to every\n"
56              "positive check. This can be used to ensure that no instances of\n"
57              "this pattern occur which are not matched by a positive pattern"),
58     cl::value_desc("pattern"));
59 
60 typedef cl::list<std::string>::const_iterator prefix_iterator;
61 
62 //===----------------------------------------------------------------------===//
63 // Pattern Handling Code.
64 //===----------------------------------------------------------------------===//
65 
66 namespace Check {
67   enum CheckType {
68     CheckNone = 0,
69     CheckPlain,
70     CheckNext,
71     CheckNot,
72     CheckDAG,
73     CheckLabel,
74 
75     /// MatchEOF - When set, this pattern only matches the end of file. This is
76     /// used for trailing CHECK-NOTs.
77     CheckEOF
78   };
79 }
80 
81 class Pattern {
82   SMLoc PatternLoc;
83 
84   Check::CheckType CheckTy;
85 
86   /// FixedStr - If non-empty, this pattern is a fixed string match with the
87   /// specified fixed string.
88   StringRef FixedStr;
89 
90   /// RegEx - If non-empty, this is a regex pattern.
91   std::string RegExStr;
92 
93   /// \brief Contains the number of line this pattern is in.
94   unsigned LineNumber;
95 
96   /// VariableUses - Entries in this vector map to uses of a variable in the
97   /// pattern, e.g. "foo[[bar]]baz".  In this case, the RegExStr will contain
98   /// "foobaz" and we'll get an entry in this vector that tells us to insert the
99   /// value of bar at offset 3.
100   std::vector<std::pair<StringRef, unsigned> > VariableUses;
101 
102   /// VariableDefs - Maps definitions of variables to their parenthesized
103   /// capture numbers.
104   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
105   std::map<StringRef, unsigned> VariableDefs;
106 
107 public:
108 
109   Pattern(Check::CheckType Ty)
110     : CheckTy(Ty) { }
111 
112   /// getLoc - Return the location in source code.
113   SMLoc getLoc() const { return PatternLoc; }
114 
115   /// ParsePattern - Parse the given string into the Pattern. Prefix provides
116   /// which prefix is being matched, SM provides the SourceMgr used for error
117   /// reports, and LineNumber is the line number in the input file from which
118   /// the pattern string was read.  Returns true in case of an error, false
119   /// otherwise.
120   bool ParsePattern(StringRef PatternStr,
121                     StringRef Prefix,
122                     SourceMgr &SM,
123                     unsigned LineNumber);
124 
125   /// Match - Match the pattern string against the input buffer Buffer.  This
126   /// returns the position that is matched or npos if there is no match.  If
127   /// there is a match, the size of the matched string is returned in MatchLen.
128   ///
129   /// The VariableTable StringMap provides the current values of filecheck
130   /// variables and is updated if this match defines new values.
131   size_t Match(StringRef Buffer, size_t &MatchLen,
132                StringMap<StringRef> &VariableTable) const;
133 
134   /// PrintFailureInfo - Print additional information about a failure to match
135   /// involving this pattern.
136   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
137                         const StringMap<StringRef> &VariableTable) const;
138 
139   bool hasVariable() const { return !(VariableUses.empty() &&
140                                       VariableDefs.empty()); }
141 
142   Check::CheckType getCheckTy() const { return CheckTy; }
143 
144 private:
145   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
146   void AddBackrefToRegEx(unsigned BackrefNum);
147 
148   /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
149   /// matching this pattern at the start of \arg Buffer; a distance of zero
150   /// should correspond to a perfect match.
151   unsigned ComputeMatchDistance(StringRef Buffer,
152                                const StringMap<StringRef> &VariableTable) const;
153 
154   /// \brief Evaluates expression and stores the result to \p Value.
155   /// \return true on success. false when the expression has invalid syntax.
156   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
157 
158   /// \brief Finds the closing sequence of a regex variable usage or
159   /// definition. Str has to point in the beginning of the definition
160   /// (right after the opening sequence).
161   /// \return offset of the closing sequence within Str, or npos if it was not
162   /// found.
163   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
164 };
165 
166 
167 bool Pattern::ParsePattern(StringRef PatternStr,
168                            StringRef Prefix,
169                            SourceMgr &SM,
170                            unsigned LineNumber) {
171   this->LineNumber = LineNumber;
172   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
173 
174   // Ignore trailing whitespace.
175   while (!PatternStr.empty() &&
176          (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
177     PatternStr = PatternStr.substr(0, PatternStr.size()-1);
178 
179   // Check that there is something on the line.
180   if (PatternStr.empty()) {
181     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
182                     "found empty check string with prefix '" +
183                     Prefix + ":'");
184     return true;
185   }
186 
187   // Check to see if this is a fixed string, or if it has regex pieces.
188   if (PatternStr.size() < 2 ||
189       (PatternStr.find("{{") == StringRef::npos &&
190        PatternStr.find("[[") == StringRef::npos)) {
191     FixedStr = PatternStr;
192     return false;
193   }
194 
195   // Paren value #0 is for the fully matched string.  Any new parenthesized
196   // values add from there.
197   unsigned CurParen = 1;
198 
199   // Otherwise, there is at least one regex piece.  Build up the regex pattern
200   // by escaping scary characters in fixed strings, building up one big regex.
201   while (!PatternStr.empty()) {
202     // RegEx matches.
203     if (PatternStr.startswith("{{")) {
204       // This is the start of a regex match.  Scan for the }}.
205       size_t End = PatternStr.find("}}");
206       if (End == StringRef::npos) {
207         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
208                         SourceMgr::DK_Error,
209                         "found start of regex string with no end '}}'");
210         return true;
211       }
212 
213       // Enclose {{}} patterns in parens just like [[]] even though we're not
214       // capturing the result for any purpose.  This is required in case the
215       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
216       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
217       RegExStr += '(';
218       ++CurParen;
219 
220       if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
221         return true;
222       RegExStr += ')';
223 
224       PatternStr = PatternStr.substr(End+2);
225       continue;
226     }
227 
228     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
229     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
230     // second form is [[foo]] which is a reference to foo.  The variable name
231     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
232     // it.  This is to catch some common errors.
233     if (PatternStr.startswith("[[")) {
234       // Find the closing bracket pair ending the match.  End is going to be an
235       // offset relative to the beginning of the match string.
236       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
237 
238       if (End == StringRef::npos) {
239         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
240                         SourceMgr::DK_Error,
241                         "invalid named regex reference, no ]] found");
242         return true;
243       }
244 
245       StringRef MatchStr = PatternStr.substr(2, End);
246       PatternStr = PatternStr.substr(End+4);
247 
248       // Get the regex name (e.g. "foo").
249       size_t NameEnd = MatchStr.find(':');
250       StringRef Name = MatchStr.substr(0, NameEnd);
251 
252       if (Name.empty()) {
253         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
254                         "invalid name in named regex: empty name");
255         return true;
256       }
257 
258       // Verify that the name/expression is well formed. FileCheck currently
259       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
260       // is relaxed, more strict check is performed in \c EvaluateExpression.
261       bool IsExpression = false;
262       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
263         if (i == 0 && Name[i] == '@') {
264           if (NameEnd != StringRef::npos) {
265             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
266                             SourceMgr::DK_Error,
267                             "invalid name in named regex definition");
268             return true;
269           }
270           IsExpression = true;
271           continue;
272         }
273         if (Name[i] != '_' && !isalnum(Name[i]) &&
274             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
275           SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
276                           SourceMgr::DK_Error, "invalid name in named regex");
277           return true;
278         }
279       }
280 
281       // Name can't start with a digit.
282       if (isdigit(static_cast<unsigned char>(Name[0]))) {
283         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
284                         "invalid name in named regex");
285         return true;
286       }
287 
288       // Handle [[foo]].
289       if (NameEnd == StringRef::npos) {
290         // Handle variables that were defined earlier on the same line by
291         // emitting a backreference.
292         if (VariableDefs.find(Name) != VariableDefs.end()) {
293           unsigned VarParenNum = VariableDefs[Name];
294           if (VarParenNum < 1 || VarParenNum > 9) {
295             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
296                             SourceMgr::DK_Error,
297                             "Can't back-reference more than 9 variables");
298             return true;
299           }
300           AddBackrefToRegEx(VarParenNum);
301         } else {
302           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
303         }
304         continue;
305       }
306 
307       // Handle [[foo:.*]].
308       VariableDefs[Name] = CurParen;
309       RegExStr += '(';
310       ++CurParen;
311 
312       if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
313         return true;
314 
315       RegExStr += ')';
316     }
317 
318     // Handle fixed string matches.
319     // Find the end, which is the start of the next regex.
320     size_t FixedMatchEnd = PatternStr.find("{{");
321     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
322     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
323     PatternStr = PatternStr.substr(FixedMatchEnd);
324   }
325 
326   return false;
327 }
328 
329 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
330                               SourceMgr &SM) {
331   Regex R(RS);
332   std::string Error;
333   if (!R.isValid(Error)) {
334     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
335                     "invalid regex: " + Error);
336     return true;
337   }
338 
339   RegExStr += RS.str();
340   CurParen += R.getNumMatches();
341   return false;
342 }
343 
344 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
345   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
346   std::string Backref = std::string("\\") +
347                         std::string(1, '0' + BackrefNum);
348   RegExStr += Backref;
349 }
350 
351 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
352   // The only supported expression is @LINE([\+-]\d+)?
353   if (!Expr.startswith("@LINE"))
354     return false;
355   Expr = Expr.substr(StringRef("@LINE").size());
356   int Offset = 0;
357   if (!Expr.empty()) {
358     if (Expr[0] == '+')
359       Expr = Expr.substr(1);
360     else if (Expr[0] != '-')
361       return false;
362     if (Expr.getAsInteger(10, Offset))
363       return false;
364   }
365   Value = llvm::itostr(LineNumber + Offset);
366   return true;
367 }
368 
369 /// Match - Match the pattern string against the input buffer Buffer.  This
370 /// returns the position that is matched or npos if there is no match.  If
371 /// there is a match, the size of the matched string is returned in MatchLen.
372 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
373                       StringMap<StringRef> &VariableTable) const {
374   // If this is the EOF pattern, match it immediately.
375   if (CheckTy == Check::CheckEOF) {
376     MatchLen = 0;
377     return Buffer.size();
378   }
379 
380   // If this is a fixed string pattern, just match it now.
381   if (!FixedStr.empty()) {
382     MatchLen = FixedStr.size();
383     return Buffer.find(FixedStr);
384   }
385 
386   // Regex match.
387 
388   // If there are variable uses, we need to create a temporary string with the
389   // actual value.
390   StringRef RegExToMatch = RegExStr;
391   std::string TmpStr;
392   if (!VariableUses.empty()) {
393     TmpStr = RegExStr;
394 
395     unsigned InsertOffset = 0;
396     for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
397       std::string Value;
398 
399       if (VariableUses[i].first[0] == '@') {
400         if (!EvaluateExpression(VariableUses[i].first, Value))
401           return StringRef::npos;
402       } else {
403         StringMap<StringRef>::iterator it =
404           VariableTable.find(VariableUses[i].first);
405         // If the variable is undefined, return an error.
406         if (it == VariableTable.end())
407           return StringRef::npos;
408 
409         // Look up the value and escape it so that we can put it into the regex.
410         Value += Regex::escape(it->second);
411       }
412 
413       // Plop it into the regex at the adjusted offset.
414       TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
415                     Value.begin(), Value.end());
416       InsertOffset += Value.size();
417     }
418 
419     // Match the newly constructed regex.
420     RegExToMatch = TmpStr;
421   }
422 
423 
424   SmallVector<StringRef, 4> MatchInfo;
425   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
426     return StringRef::npos;
427 
428   // Successful regex match.
429   assert(!MatchInfo.empty() && "Didn't get any match");
430   StringRef FullMatch = MatchInfo[0];
431 
432   // If this defines any variables, remember their values.
433   for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
434                                                      E = VariableDefs.end();
435        I != E; ++I) {
436     assert(I->second < MatchInfo.size() && "Internal paren error");
437     VariableTable[I->first] = MatchInfo[I->second];
438   }
439 
440   MatchLen = FullMatch.size();
441   return FullMatch.data()-Buffer.data();
442 }
443 
444 unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
445                               const StringMap<StringRef> &VariableTable) const {
446   // Just compute the number of matching characters. For regular expressions, we
447   // just compare against the regex itself and hope for the best.
448   //
449   // FIXME: One easy improvement here is have the regex lib generate a single
450   // example regular expression which matches, and use that as the example
451   // string.
452   StringRef ExampleString(FixedStr);
453   if (ExampleString.empty())
454     ExampleString = RegExStr;
455 
456   // Only compare up to the first line in the buffer, or the string size.
457   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
458   BufferPrefix = BufferPrefix.split('\n').first;
459   return BufferPrefix.edit_distance(ExampleString);
460 }
461 
462 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
463                                const StringMap<StringRef> &VariableTable) const{
464   // If this was a regular expression using variables, print the current
465   // variable values.
466   if (!VariableUses.empty()) {
467     for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
468       SmallString<256> Msg;
469       raw_svector_ostream OS(Msg);
470       StringRef Var = VariableUses[i].first;
471       if (Var[0] == '@') {
472         std::string Value;
473         if (EvaluateExpression(Var, Value)) {
474           OS << "with expression \"";
475           OS.write_escaped(Var) << "\" equal to \"";
476           OS.write_escaped(Value) << "\"";
477         } else {
478           OS << "uses incorrect expression \"";
479           OS.write_escaped(Var) << "\"";
480         }
481       } else {
482         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
483 
484         // Check for undefined variable references.
485         if (it == VariableTable.end()) {
486           OS << "uses undefined variable \"";
487           OS.write_escaped(Var) << "\"";
488         } else {
489           OS << "with variable \"";
490           OS.write_escaped(Var) << "\" equal to \"";
491           OS.write_escaped(it->second) << "\"";
492         }
493       }
494 
495       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
496                       OS.str());
497     }
498   }
499 
500   // Attempt to find the closest/best fuzzy match.  Usually an error happens
501   // because some string in the output didn't exactly match. In these cases, we
502   // would like to show the user a best guess at what "should have" matched, to
503   // save them having to actually check the input manually.
504   size_t NumLinesForward = 0;
505   size_t Best = StringRef::npos;
506   double BestQuality = 0;
507 
508   // Use an arbitrary 4k limit on how far we will search.
509   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
510     if (Buffer[i] == '\n')
511       ++NumLinesForward;
512 
513     // Patterns have leading whitespace stripped, so skip whitespace when
514     // looking for something which looks like a pattern.
515     if (Buffer[i] == ' ' || Buffer[i] == '\t')
516       continue;
517 
518     // Compute the "quality" of this match as an arbitrary combination of the
519     // match distance and the number of lines skipped to get to this match.
520     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
521     double Quality = Distance + (NumLinesForward / 100.);
522 
523     if (Quality < BestQuality || Best == StringRef::npos) {
524       Best = i;
525       BestQuality = Quality;
526     }
527   }
528 
529   // Print the "possible intended match here" line if we found something
530   // reasonable and not equal to what we showed in the "scanning from here"
531   // line.
532   if (Best && Best != StringRef::npos && BestQuality < 50) {
533       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
534                       SourceMgr::DK_Note, "possible intended match here");
535 
536     // FIXME: If we wanted to be really friendly we would show why the match
537     // failed, as it can be hard to spot simple one character differences.
538   }
539 }
540 
541 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
542   // Offset keeps track of the current offset within the input Str
543   size_t Offset = 0;
544   // [...] Nesting depth
545   size_t BracketDepth = 0;
546 
547   while (!Str.empty()) {
548     if (Str.startswith("]]") && BracketDepth == 0)
549       return Offset;
550     if (Str[0] == '\\') {
551       // Backslash escapes the next char within regexes, so skip them both.
552       Str = Str.substr(2);
553       Offset += 2;
554     } else {
555       switch (Str[0]) {
556         default:
557           break;
558         case '[':
559           BracketDepth++;
560           break;
561         case ']':
562           if (BracketDepth == 0) {
563             SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
564                             SourceMgr::DK_Error,
565                             "missing closing \"]\" for regex variable");
566             exit(1);
567           }
568           BracketDepth--;
569           break;
570       }
571       Str = Str.substr(1);
572       Offset++;
573     }
574   }
575 
576   return StringRef::npos;
577 }
578 
579 
580 //===----------------------------------------------------------------------===//
581 // Check Strings.
582 //===----------------------------------------------------------------------===//
583 
584 /// CheckString - This is a check that we found in the input file.
585 struct CheckString {
586   /// Pat - The pattern to match.
587   Pattern Pat;
588 
589   /// Prefix - Which prefix name this check matched.
590   StringRef Prefix;
591 
592   /// Loc - The location in the match file that the check string was specified.
593   SMLoc Loc;
594 
595   /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
596   /// as opposed to a CHECK: directive.
597   Check::CheckType CheckTy;
598 
599   /// DagNotStrings - These are all of the strings that are disallowed from
600   /// occurring between this match string and the previous one (or start of
601   /// file).
602   std::vector<Pattern> DagNotStrings;
603 
604 
605   CheckString(const Pattern &P,
606               StringRef S,
607               SMLoc L,
608               Check::CheckType Ty)
609     : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {}
610 
611   /// Check - Match check string and its "not strings" and/or "dag strings".
612   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
613                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
614 
615   /// CheckNext - Verify there is a single line in the given buffer.
616   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
617 
618   /// CheckNot - Verify there's no "not strings" in the given buffer.
619   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
620                 const std::vector<const Pattern *> &NotStrings,
621                 StringMap<StringRef> &VariableTable) const;
622 
623   /// CheckDag - Match "dag strings" and their mixed "not strings".
624   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
625                   std::vector<const Pattern *> &NotStrings,
626                   StringMap<StringRef> &VariableTable) const;
627 };
628 
629 /// Canonicalize whitespaces in the input file. Line endings are replaced
630 /// with UNIX-style '\n'.
631 ///
632 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
633 /// characters to a single space.
634 static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB,
635                                            bool PreserveHorizontal) {
636   SmallString<128> NewFile;
637   NewFile.reserve(MB->getBufferSize());
638 
639   for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
640        Ptr != End; ++Ptr) {
641     // Eliminate trailing dosish \r.
642     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
643       continue;
644     }
645 
646     // If current char is not a horizontal whitespace or if horizontal
647     // whitespace canonicalization is disabled, dump it to output as is.
648     if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
649       NewFile.push_back(*Ptr);
650       continue;
651     }
652 
653     // Otherwise, add one space and advance over neighboring space.
654     NewFile.push_back(' ');
655     while (Ptr+1 != End &&
656            (Ptr[1] == ' ' || Ptr[1] == '\t'))
657       ++Ptr;
658   }
659 
660   // Free the old buffer and return a new one.
661   MemoryBuffer *MB2 =
662     MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier());
663 
664   delete MB;
665   return MB2;
666 }
667 
668 static bool IsPartOfWord(char c) {
669   return (isalnum(c) || c == '-' || c == '_');
670 }
671 
672 // Get the size of the prefix extension.
673 static size_t CheckTypeSize(Check::CheckType Ty) {
674   switch (Ty) {
675   case Check::CheckNone:
676     return 0;
677 
678   case Check::CheckPlain:
679     return sizeof(":") - 1;
680 
681   case Check::CheckNext:
682     return sizeof("-NEXT:") - 1;
683 
684   case Check::CheckNot:
685     return sizeof("-NOT:") - 1;
686 
687   case Check::CheckDAG:
688     return sizeof("-DAG:") - 1;
689 
690   case Check::CheckLabel:
691     return sizeof("-LABEL:") - 1;
692 
693   case Check::CheckEOF:
694     llvm_unreachable("Should not be using EOF size");
695   }
696 
697   llvm_unreachable("Bad check type");
698 }
699 
700 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
701   char NextChar = Buffer[Prefix.size()];
702 
703   // Verify that the : is present after the prefix.
704   if (NextChar == ':')
705     return Check::CheckPlain;
706 
707   if (NextChar != '-')
708     return Check::CheckNone;
709 
710   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
711   if (Rest.startswith("NEXT:"))
712     return Check::CheckNext;
713 
714   if (Rest.startswith("NOT:"))
715     return Check::CheckNot;
716 
717   if (Rest.startswith("DAG:"))
718     return Check::CheckDAG;
719 
720   if (Rest.startswith("LABEL:"))
721     return Check::CheckLabel;
722 
723   return Check::CheckNone;
724 }
725 
726 // From the given position, find the next character after the word.
727 static size_t SkipWord(StringRef Str, size_t Loc) {
728   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
729     ++Loc;
730   return Loc;
731 }
732 
733 // Try to find the first match in buffer for any prefix. If a valid match is
734 // found, return that prefix and set its type and location.  If there are almost
735 // matches (e.g. the actual prefix string is found, but is not an actual check
736 // string), but no valid match, return an empty string and set the position to
737 // resume searching from. If no partial matches are found, return an empty
738 // string and the location will be StringRef::npos. If one prefix is a substring
739 // of another, the maximal match should be found. e.g. if "A" and "AA" are
740 // prefixes then AA-CHECK: should match the second one.
741 static StringRef FindFirstCandidateMatch(StringRef &Buffer,
742                                          Check::CheckType &CheckTy,
743                                          size_t &CheckLoc) {
744   StringRef FirstPrefix;
745   size_t FirstLoc = StringRef::npos;
746   size_t SearchLoc = StringRef::npos;
747   Check::CheckType FirstTy = Check::CheckNone;
748 
749   CheckTy = Check::CheckNone;
750   CheckLoc = StringRef::npos;
751 
752   for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
753        I != E; ++I) {
754     StringRef Prefix(*I);
755     size_t PrefixLoc = Buffer.find(Prefix);
756 
757     if (PrefixLoc == StringRef::npos)
758       continue;
759 
760     // Track where we are searching for invalid prefixes that look almost right.
761     // We need to only advance to the first partial match on the next attempt
762     // since a partial match could be a substring of a later, valid prefix.
763     // Need to skip to the end of the word, otherwise we could end up
764     // matching a prefix in a substring later.
765     if (PrefixLoc < SearchLoc)
766       SearchLoc = SkipWord(Buffer, PrefixLoc);
767 
768     // We only want to find the first match to avoid skipping some.
769     if (PrefixLoc > FirstLoc)
770       continue;
771     // If one matching check-prefix is a prefix of another, choose the
772     // longer one.
773     if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
774       continue;
775 
776     StringRef Rest = Buffer.drop_front(PrefixLoc);
777     // Make sure we have actually found the prefix, and not a word containing
778     // it. This should also prevent matching the wrong prefix when one is a
779     // substring of another.
780     if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
781       FirstTy = Check::CheckNone;
782     else
783       FirstTy = FindCheckType(Rest, Prefix);
784 
785     FirstLoc = PrefixLoc;
786     FirstPrefix = Prefix;
787   }
788 
789   // If the first prefix is invalid, we should continue the search after it.
790   if (FirstTy == Check::CheckNone) {
791     CheckLoc = SearchLoc;
792     return "";
793   }
794 
795   CheckTy = FirstTy;
796   CheckLoc = FirstLoc;
797   return FirstPrefix;
798 }
799 
800 static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
801                                          unsigned &LineNumber,
802                                          Check::CheckType &CheckTy,
803                                          size_t &CheckLoc) {
804   while (!Buffer.empty()) {
805     StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
806     // If we found a real match, we are done.
807     if (!Prefix.empty()) {
808       LineNumber += Buffer.substr(0, CheckLoc).count('\n');
809       return Prefix;
810     }
811 
812     // We didn't find any almost matches either, we are also done.
813     if (CheckLoc == StringRef::npos)
814       return StringRef();
815 
816     LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
817 
818     // Advance to the last possible match we found and try again.
819     Buffer = Buffer.drop_front(CheckLoc + 1);
820   }
821 
822   return StringRef();
823 }
824 
825 /// ReadCheckFile - Read the check file, which specifies the sequence of
826 /// expected strings.  The strings are added to the CheckStrings vector.
827 /// Returns true in case of an error, false otherwise.
828 static bool ReadCheckFile(SourceMgr &SM,
829                           std::vector<CheckString> &CheckStrings) {
830   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
831       MemoryBuffer::getFileOrSTDIN(CheckFilename);
832   if (std::error_code EC = FileOrErr.getError()) {
833     errs() << "Could not open check file '" << CheckFilename
834            << "': " << EC.message() << '\n';
835     return true;
836   }
837 
838   // If we want to canonicalize whitespace, strip excess whitespace from the
839   // buffer containing the CHECK lines. Remove DOS style line endings.
840   MemoryBuffer *F = CanonicalizeInputFile(FileOrErr.get().release(),
841                                           NoCanonicalizeWhiteSpace);
842 
843   SM.AddNewSourceBuffer(F, SMLoc());
844 
845   // Find all instances of CheckPrefix followed by : in the file.
846   StringRef Buffer = F->getBuffer();
847 
848   std::vector<Pattern> ImplicitNegativeChecks;
849   for (const auto &PatternString : ImplicitCheckNot) {
850     // Create a buffer with fake command line content in order to display the
851     // command line option responsible for the specific implicit CHECK-NOT.
852     std::string Prefix = std::string("-") + ImplicitCheckNot.ArgStr + "='";
853     std::string Suffix = "'";
854     MemoryBuffer *CmdLine = MemoryBuffer::getMemBufferCopy(
855         Prefix + PatternString + Suffix, "command line");
856     StringRef PatternInBuffer =
857         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
858     SM.AddNewSourceBuffer(CmdLine, SMLoc());
859 
860     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
861     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
862                                                "IMPLICIT-CHECK", SM, 0);
863   }
864 
865 
866   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
867 
868   // LineNumber keeps track of the line on which CheckPrefix instances are
869   // found.
870   unsigned LineNumber = 1;
871 
872   while (1) {
873     Check::CheckType CheckTy;
874     size_t PrefixLoc;
875 
876     // See if a prefix occurs in the memory buffer.
877     StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
878                                                    LineNumber,
879                                                    CheckTy,
880                                                    PrefixLoc);
881     if (UsedPrefix.empty())
882       break;
883 
884     Buffer = Buffer.drop_front(PrefixLoc);
885 
886     // Location to use for error messages.
887     const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
888 
889     // PrefixLoc is to the start of the prefix. Skip to the end.
890     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
891 
892     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
893     // leading and trailing whitespace.
894     Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
895 
896     // Scan ahead to the end of line.
897     size_t EOL = Buffer.find_first_of("\n\r");
898 
899     // Remember the location of the start of the pattern, for diagnostics.
900     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
901 
902     // Parse the pattern.
903     Pattern P(CheckTy);
904     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
905       return true;
906 
907     // Verify that CHECK-LABEL lines do not define or use variables
908     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
909       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
910                       SourceMgr::DK_Error,
911                       "found '" + UsedPrefix + "-LABEL:'"
912                       " with variable definition or use");
913       return true;
914     }
915 
916     Buffer = Buffer.substr(EOL);
917 
918     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
919     if ((CheckTy == Check::CheckNext) && CheckStrings.empty()) {
920       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
921                       SourceMgr::DK_Error,
922                       "found '" + UsedPrefix + "-NEXT:' without previous '"
923                       + UsedPrefix + ": line");
924       return true;
925     }
926 
927     // Handle CHECK-DAG/-NOT.
928     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
929       DagNotMatches.push_back(P);
930       continue;
931     }
932 
933     // Okay, add the string we captured to the output vector and move on.
934     CheckStrings.push_back(CheckString(P,
935                                        UsedPrefix,
936                                        PatternLoc,
937                                        CheckTy));
938     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
939     DagNotMatches = ImplicitNegativeChecks;
940   }
941 
942   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
943   // prefix as a filler for the error message.
944   if (!DagNotMatches.empty()) {
945     CheckStrings.push_back(CheckString(Pattern(Check::CheckEOF),
946                                        CheckPrefixes[0],
947                                        SMLoc::getFromPointer(Buffer.data()),
948                                        Check::CheckEOF));
949     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
950   }
951 
952   if (CheckStrings.empty()) {
953     errs() << "error: no check strings found with prefix"
954            << (CheckPrefixes.size() > 1 ? "es " : " ");
955     for (size_t I = 0, N = CheckPrefixes.size(); I != N; ++I) {
956       StringRef Prefix(CheckPrefixes[I]);
957       errs() << '\'' << Prefix << ":'";
958       if (I != N - 1)
959         errs() << ", ";
960     }
961 
962     errs() << '\n';
963     return true;
964   }
965 
966   return false;
967 }
968 
969 static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc,
970                              const Pattern &Pat, StringRef Buffer,
971                              StringMap<StringRef> &VariableTable) {
972   // Otherwise, we have an error, emit an error message.
973   SM.PrintMessage(Loc, SourceMgr::DK_Error,
974                   "expected string not found in input");
975 
976   // Print the "scanning from here" line.  If the current position is at the
977   // end of a line, advance to the start of the next line.
978   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
979 
980   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
981                   "scanning from here");
982 
983   // Allow the pattern to print additional information if desired.
984   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
985 }
986 
987 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
988                              StringRef Buffer,
989                              StringMap<StringRef> &VariableTable) {
990   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
991 }
992 
993 /// CountNumNewlinesBetween - Count the number of newlines in the specified
994 /// range.
995 static unsigned CountNumNewlinesBetween(StringRef Range,
996                                         const char *&FirstNewLine) {
997   unsigned NumNewLines = 0;
998   while (1) {
999     // Scan for newline.
1000     Range = Range.substr(Range.find_first_of("\n\r"));
1001     if (Range.empty()) return NumNewLines;
1002 
1003     ++NumNewLines;
1004 
1005     // Handle \n\r and \r\n as a single newline.
1006     if (Range.size() > 1 &&
1007         (Range[1] == '\n' || Range[1] == '\r') &&
1008         (Range[0] != Range[1]))
1009       Range = Range.substr(1);
1010     Range = Range.substr(1);
1011 
1012     if (NumNewLines == 1)
1013       FirstNewLine = Range.begin();
1014   }
1015 }
1016 
1017 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1018                           bool IsLabelScanMode, size_t &MatchLen,
1019                           StringMap<StringRef> &VariableTable) const {
1020   size_t LastPos = 0;
1021   std::vector<const Pattern *> NotStrings;
1022 
1023   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1024   // bounds; we have not processed variable definitions within the bounded block
1025   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1026   // over the block again (including the last CHECK-LABEL) in normal mode.
1027   if (!IsLabelScanMode) {
1028     // Match "dag strings" (with mixed "not strings" if any).
1029     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1030     if (LastPos == StringRef::npos)
1031       return StringRef::npos;
1032   }
1033 
1034   // Match itself from the last position after matching CHECK-DAG.
1035   StringRef MatchBuffer = Buffer.substr(LastPos);
1036   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1037   if (MatchPos == StringRef::npos) {
1038     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1039     return StringRef::npos;
1040   }
1041   MatchPos += LastPos;
1042 
1043   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1044   // or CHECK-NOT
1045   if (!IsLabelScanMode) {
1046     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1047 
1048     // If this check is a "CHECK-NEXT", verify that the previous match was on
1049     // the previous line (i.e. that there is one newline between them).
1050     if (CheckNext(SM, SkippedRegion))
1051       return StringRef::npos;
1052 
1053     // If this match had "not strings", verify that they don't exist in the
1054     // skipped region.
1055     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1056       return StringRef::npos;
1057   }
1058 
1059   return MatchPos;
1060 }
1061 
1062 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1063   if (CheckTy != Check::CheckNext)
1064     return false;
1065 
1066   // Count the number of newlines between the previous match and this one.
1067   assert(Buffer.data() !=
1068          SM.getMemoryBuffer(
1069            SM.FindBufferContainingLoc(
1070              SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1071          "CHECK-NEXT can't be the first check in a file");
1072 
1073   const char *FirstNewLine = nullptr;
1074   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1075 
1076   if (NumNewLines == 0) {
1077     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1078                     "-NEXT: is on the same line as previous match");
1079     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1080                     SourceMgr::DK_Note, "'next' match was here");
1081     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1082                     "previous match ended here");
1083     return true;
1084   }
1085 
1086   if (NumNewLines != 1) {
1087     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1088                     "-NEXT: is not on the line after the previous match");
1089     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1090                     SourceMgr::DK_Note, "'next' match was here");
1091     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1092                     "previous match ended here");
1093     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1094                     "non-matching line after previous match is here");
1095     return true;
1096   }
1097 
1098   return false;
1099 }
1100 
1101 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1102                            const std::vector<const Pattern *> &NotStrings,
1103                            StringMap<StringRef> &VariableTable) const {
1104   for (unsigned ChunkNo = 0, e = NotStrings.size();
1105        ChunkNo != e; ++ChunkNo) {
1106     const Pattern *Pat = NotStrings[ChunkNo];
1107     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1108 
1109     size_t MatchLen = 0;
1110     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1111 
1112     if (Pos == StringRef::npos) continue;
1113 
1114     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1115                     SourceMgr::DK_Error,
1116                     Prefix + "-NOT: string occurred!");
1117     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1118                     Prefix + "-NOT: pattern specified here");
1119     return true;
1120   }
1121 
1122   return false;
1123 }
1124 
1125 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1126                              std::vector<const Pattern *> &NotStrings,
1127                              StringMap<StringRef> &VariableTable) const {
1128   if (DagNotStrings.empty())
1129     return 0;
1130 
1131   size_t LastPos = 0;
1132   size_t StartPos = LastPos;
1133 
1134   for (unsigned ChunkNo = 0, e = DagNotStrings.size();
1135        ChunkNo != e; ++ChunkNo) {
1136     const Pattern &Pat = DagNotStrings[ChunkNo];
1137 
1138     assert((Pat.getCheckTy() == Check::CheckDAG ||
1139             Pat.getCheckTy() == Check::CheckNot) &&
1140            "Invalid CHECK-DAG or CHECK-NOT!");
1141 
1142     if (Pat.getCheckTy() == Check::CheckNot) {
1143       NotStrings.push_back(&Pat);
1144       continue;
1145     }
1146 
1147     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1148 
1149     size_t MatchLen = 0, MatchPos;
1150 
1151     // CHECK-DAG always matches from the start.
1152     StringRef MatchBuffer = Buffer.substr(StartPos);
1153     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1154     // With a group of CHECK-DAGs, a single mismatching means the match on
1155     // that group of CHECK-DAGs fails immediately.
1156     if (MatchPos == StringRef::npos) {
1157       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1158       return StringRef::npos;
1159     }
1160     // Re-calc it as the offset relative to the start of the original string.
1161     MatchPos += StartPos;
1162 
1163     if (!NotStrings.empty()) {
1164       if (MatchPos < LastPos) {
1165         // Reordered?
1166         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1167                         SourceMgr::DK_Error,
1168                         Prefix + "-DAG: found a match of CHECK-DAG"
1169                         " reordering across a CHECK-NOT");
1170         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1171                         SourceMgr::DK_Note,
1172                         Prefix + "-DAG: the farthest match of CHECK-DAG"
1173                         " is found here");
1174         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1175                         Prefix + "-NOT: the crossed pattern specified"
1176                         " here");
1177         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1178                         Prefix + "-DAG: the reordered pattern specified"
1179                         " here");
1180         return StringRef::npos;
1181       }
1182       // All subsequent CHECK-DAGs should be matched from the farthest
1183       // position of all precedent CHECK-DAGs (including this one.)
1184       StartPos = LastPos;
1185       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1186       // CHECK-DAG, verify that there's no 'not' strings occurred in that
1187       // region.
1188       StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1189       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1190         return StringRef::npos;
1191       // Clear "not strings".
1192       NotStrings.clear();
1193     }
1194 
1195     // Update the last position with CHECK-DAG matches.
1196     LastPos = std::max(MatchPos + MatchLen, LastPos);
1197   }
1198 
1199   return LastPos;
1200 }
1201 
1202 // A check prefix must contain only alphanumeric, hyphens and underscores.
1203 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1204   Regex Validator("^[a-zA-Z0-9_-]*$");
1205   return Validator.match(CheckPrefix);
1206 }
1207 
1208 static bool ValidateCheckPrefixes() {
1209   StringSet<> PrefixSet;
1210 
1211   for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
1212        I != E; ++I) {
1213     StringRef Prefix(*I);
1214 
1215     if (!PrefixSet.insert(Prefix))
1216       return false;
1217 
1218     if (!ValidateCheckPrefix(Prefix))
1219       return false;
1220   }
1221 
1222   return true;
1223 }
1224 
1225 // I don't think there's a way to specify an initial value for cl::list,
1226 // so if nothing was specified, add the default
1227 static void AddCheckPrefixIfNeeded() {
1228   if (CheckPrefixes.empty())
1229     CheckPrefixes.push_back("CHECK");
1230 }
1231 
1232 int main(int argc, char **argv) {
1233   sys::PrintStackTraceOnErrorSignal();
1234   PrettyStackTraceProgram X(argc, argv);
1235   cl::ParseCommandLineOptions(argc, argv);
1236 
1237   if (!ValidateCheckPrefixes()) {
1238     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1239               "start with a letter and contain only alphanumeric characters, "
1240               "hyphens and underscores\n";
1241     return 2;
1242   }
1243 
1244   AddCheckPrefixIfNeeded();
1245 
1246   SourceMgr SM;
1247 
1248   // Read the expected strings from the check file.
1249   std::vector<CheckString> CheckStrings;
1250   if (ReadCheckFile(SM, CheckStrings))
1251     return 2;
1252 
1253   // Open the file to check and add it to SourceMgr.
1254   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
1255       MemoryBuffer::getFileOrSTDIN(InputFilename);
1256   if (std::error_code EC = FileOrErr.getError()) {
1257     errs() << "Could not open input file '" << InputFilename
1258            << "': " << EC.message() << '\n';
1259     return 2;
1260   }
1261   std::unique_ptr<MemoryBuffer> File = std::move(FileOrErr.get());
1262 
1263   if (File->getBufferSize() == 0) {
1264     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1265     return 2;
1266   }
1267 
1268   // Remove duplicate spaces in the input file if requested.
1269   // Remove DOS style line endings.
1270   MemoryBuffer *F =
1271     CanonicalizeInputFile(File.release(), NoCanonicalizeWhiteSpace);
1272 
1273   SM.AddNewSourceBuffer(F, SMLoc());
1274 
1275   /// VariableTable - This holds all the current filecheck variables.
1276   StringMap<StringRef> VariableTable;
1277 
1278   // Check that we have all of the expected strings, in order, in the input
1279   // file.
1280   StringRef Buffer = F->getBuffer();
1281 
1282   bool hasError = false;
1283 
1284   unsigned i = 0, j = 0, e = CheckStrings.size();
1285 
1286   while (true) {
1287     StringRef CheckRegion;
1288     if (j == e) {
1289       CheckRegion = Buffer;
1290     } else {
1291       const CheckString &CheckLabelStr = CheckStrings[j];
1292       if (CheckLabelStr.CheckTy != Check::CheckLabel) {
1293         ++j;
1294         continue;
1295       }
1296 
1297       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1298       size_t MatchLabelLen = 0;
1299       size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
1300                                                  MatchLabelLen, VariableTable);
1301       if (MatchLabelPos == StringRef::npos) {
1302         hasError = true;
1303         break;
1304       }
1305 
1306       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1307       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1308       ++j;
1309     }
1310 
1311     for ( ; i != j; ++i) {
1312       const CheckString &CheckStr = CheckStrings[i];
1313 
1314       // Check each string within the scanned region, including a second check
1315       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1316       size_t MatchLen = 0;
1317       size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1318                                        VariableTable);
1319 
1320       if (MatchPos == StringRef::npos) {
1321         hasError = true;
1322         i = j;
1323         break;
1324       }
1325 
1326       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1327     }
1328 
1329     if (j == e)
1330       break;
1331   }
1332 
1333   return hasError ? 1 : 0;
1334 }
1335