xref: /llvm-project/llvm/utils/FileCheck/FileCheck.cpp (revision 1b9f936f91d9fe8700e47cea3347b2607fc5b506)
1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content.  This is useful for regression tests etc.
12 //
13 // This program exits with an error status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cctype>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
36 using namespace llvm;
37 
38 static cl::opt<std::string>
39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40 
41 static cl::opt<std::string>
42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43               cl::init("-"), cl::value_desc("filename"));
44 
45 static cl::list<std::string>
46 CheckPrefixes("check-prefix",
47               cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48 
49 static cl::opt<bool>
50 NoCanonicalizeWhiteSpace("strict-whitespace",
51               cl::desc("Do not treat all horizontal whitespace as equivalent"));
52 
53 static cl::list<std::string> ImplicitCheckNot(
54     "implicit-check-not",
55     cl::desc("Add an implicit negative check with this pattern to every\n"
56              "positive check. This can be used to ensure that no instances of\n"
57              "this pattern occur which are not matched by a positive pattern"),
58     cl::value_desc("pattern"));
59 
60 static cl::opt<bool> AllowEmptyInput(
61     "allow-empty", cl::init(false),
62     cl::desc("Allow the input file to be empty. This is useful when making\n"
63              "checks that some error message does not occur, for example."));
64 
65 typedef cl::list<std::string>::const_iterator prefix_iterator;
66 
67 //===----------------------------------------------------------------------===//
68 // Pattern Handling Code.
69 //===----------------------------------------------------------------------===//
70 
71 namespace Check {
72   enum CheckType {
73     CheckNone = 0,
74     CheckPlain,
75     CheckNext,
76     CheckNot,
77     CheckDAG,
78     CheckLabel,
79 
80     /// MatchEOF - When set, this pattern only matches the end of file. This is
81     /// used for trailing CHECK-NOTs.
82     CheckEOF
83   };
84 }
85 
86 class Pattern {
87   SMLoc PatternLoc;
88 
89   Check::CheckType CheckTy;
90 
91   /// FixedStr - If non-empty, this pattern is a fixed string match with the
92   /// specified fixed string.
93   StringRef FixedStr;
94 
95   /// RegEx - If non-empty, this is a regex pattern.
96   std::string RegExStr;
97 
98   /// \brief Contains the number of line this pattern is in.
99   unsigned LineNumber;
100 
101   /// VariableUses - Entries in this vector map to uses of a variable in the
102   /// pattern, e.g. "foo[[bar]]baz".  In this case, the RegExStr will contain
103   /// "foobaz" and we'll get an entry in this vector that tells us to insert the
104   /// value of bar at offset 3.
105   std::vector<std::pair<StringRef, unsigned> > VariableUses;
106 
107   /// VariableDefs - Maps definitions of variables to their parenthesized
108   /// capture numbers.
109   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
110   std::map<StringRef, unsigned> VariableDefs;
111 
112 public:
113 
114   Pattern(Check::CheckType Ty)
115     : CheckTy(Ty) { }
116 
117   /// getLoc - Return the location in source code.
118   SMLoc getLoc() const { return PatternLoc; }
119 
120   /// ParsePattern - Parse the given string into the Pattern. Prefix provides
121   /// which prefix is being matched, SM provides the SourceMgr used for error
122   /// reports, and LineNumber is the line number in the input file from which
123   /// the pattern string was read.  Returns true in case of an error, false
124   /// otherwise.
125   bool ParsePattern(StringRef PatternStr,
126                     StringRef Prefix,
127                     SourceMgr &SM,
128                     unsigned LineNumber);
129 
130   /// Match - Match the pattern string against the input buffer Buffer.  This
131   /// returns the position that is matched or npos if there is no match.  If
132   /// there is a match, the size of the matched string is returned in MatchLen.
133   ///
134   /// The VariableTable StringMap provides the current values of filecheck
135   /// variables and is updated if this match defines new values.
136   size_t Match(StringRef Buffer, size_t &MatchLen,
137                StringMap<StringRef> &VariableTable) const;
138 
139   /// PrintFailureInfo - Print additional information about a failure to match
140   /// involving this pattern.
141   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
142                         const StringMap<StringRef> &VariableTable) const;
143 
144   bool hasVariable() const { return !(VariableUses.empty() &&
145                                       VariableDefs.empty()); }
146 
147   Check::CheckType getCheckTy() const { return CheckTy; }
148 
149 private:
150   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
151   void AddBackrefToRegEx(unsigned BackrefNum);
152 
153   /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
154   /// matching this pattern at the start of \arg Buffer; a distance of zero
155   /// should correspond to a perfect match.
156   unsigned ComputeMatchDistance(StringRef Buffer,
157                                const StringMap<StringRef> &VariableTable) const;
158 
159   /// \brief Evaluates expression and stores the result to \p Value.
160   /// \return true on success. false when the expression has invalid syntax.
161   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
162 
163   /// \brief Finds the closing sequence of a regex variable usage or
164   /// definition. Str has to point in the beginning of the definition
165   /// (right after the opening sequence).
166   /// \return offset of the closing sequence within Str, or npos if it was not
167   /// found.
168   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
169 };
170 
171 
172 bool Pattern::ParsePattern(StringRef PatternStr,
173                            StringRef Prefix,
174                            SourceMgr &SM,
175                            unsigned LineNumber) {
176   this->LineNumber = LineNumber;
177   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
178 
179   // Ignore trailing whitespace.
180   while (!PatternStr.empty() &&
181          (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
182     PatternStr = PatternStr.substr(0, PatternStr.size()-1);
183 
184   // Check that there is something on the line.
185   if (PatternStr.empty()) {
186     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
187                     "found empty check string with prefix '" +
188                     Prefix + ":'");
189     return true;
190   }
191 
192   // Check to see if this is a fixed string, or if it has regex pieces.
193   if (PatternStr.size() < 2 ||
194       (PatternStr.find("{{") == StringRef::npos &&
195        PatternStr.find("[[") == StringRef::npos)) {
196     FixedStr = PatternStr;
197     return false;
198   }
199 
200   // Paren value #0 is for the fully matched string.  Any new parenthesized
201   // values add from there.
202   unsigned CurParen = 1;
203 
204   // Otherwise, there is at least one regex piece.  Build up the regex pattern
205   // by escaping scary characters in fixed strings, building up one big regex.
206   while (!PatternStr.empty()) {
207     // RegEx matches.
208     if (PatternStr.startswith("{{")) {
209       // This is the start of a regex match.  Scan for the }}.
210       size_t End = PatternStr.find("}}");
211       if (End == StringRef::npos) {
212         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
213                         SourceMgr::DK_Error,
214                         "found start of regex string with no end '}}'");
215         return true;
216       }
217 
218       // Enclose {{}} patterns in parens just like [[]] even though we're not
219       // capturing the result for any purpose.  This is required in case the
220       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
221       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
222       RegExStr += '(';
223       ++CurParen;
224 
225       if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
226         return true;
227       RegExStr += ')';
228 
229       PatternStr = PatternStr.substr(End+2);
230       continue;
231     }
232 
233     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
234     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
235     // second form is [[foo]] which is a reference to foo.  The variable name
236     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
237     // it.  This is to catch some common errors.
238     if (PatternStr.startswith("[[")) {
239       // Find the closing bracket pair ending the match.  End is going to be an
240       // offset relative to the beginning of the match string.
241       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
242 
243       if (End == StringRef::npos) {
244         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
245                         SourceMgr::DK_Error,
246                         "invalid named regex reference, no ]] found");
247         return true;
248       }
249 
250       StringRef MatchStr = PatternStr.substr(2, End);
251       PatternStr = PatternStr.substr(End+4);
252 
253       // Get the regex name (e.g. "foo").
254       size_t NameEnd = MatchStr.find(':');
255       StringRef Name = MatchStr.substr(0, NameEnd);
256 
257       if (Name.empty()) {
258         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
259                         "invalid name in named regex: empty name");
260         return true;
261       }
262 
263       // Verify that the name/expression is well formed. FileCheck currently
264       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
265       // is relaxed, more strict check is performed in \c EvaluateExpression.
266       bool IsExpression = false;
267       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
268         if (i == 0 && Name[i] == '@') {
269           if (NameEnd != StringRef::npos) {
270             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
271                             SourceMgr::DK_Error,
272                             "invalid name in named regex definition");
273             return true;
274           }
275           IsExpression = true;
276           continue;
277         }
278         if (Name[i] != '_' && !isalnum(Name[i]) &&
279             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
280           SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
281                           SourceMgr::DK_Error, "invalid name in named regex");
282           return true;
283         }
284       }
285 
286       // Name can't start with a digit.
287       if (isdigit(static_cast<unsigned char>(Name[0]))) {
288         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
289                         "invalid name in named regex");
290         return true;
291       }
292 
293       // Handle [[foo]].
294       if (NameEnd == StringRef::npos) {
295         // Handle variables that were defined earlier on the same line by
296         // emitting a backreference.
297         if (VariableDefs.find(Name) != VariableDefs.end()) {
298           unsigned VarParenNum = VariableDefs[Name];
299           if (VarParenNum < 1 || VarParenNum > 9) {
300             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
301                             SourceMgr::DK_Error,
302                             "Can't back-reference more than 9 variables");
303             return true;
304           }
305           AddBackrefToRegEx(VarParenNum);
306         } else {
307           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
308         }
309         continue;
310       }
311 
312       // Handle [[foo:.*]].
313       VariableDefs[Name] = CurParen;
314       RegExStr += '(';
315       ++CurParen;
316 
317       if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
318         return true;
319 
320       RegExStr += ')';
321     }
322 
323     // Handle fixed string matches.
324     // Find the end, which is the start of the next regex.
325     size_t FixedMatchEnd = PatternStr.find("{{");
326     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
327     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
328     PatternStr = PatternStr.substr(FixedMatchEnd);
329   }
330 
331   return false;
332 }
333 
334 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
335                               SourceMgr &SM) {
336   Regex R(RS);
337   std::string Error;
338   if (!R.isValid(Error)) {
339     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
340                     "invalid regex: " + Error);
341     return true;
342   }
343 
344   RegExStr += RS.str();
345   CurParen += R.getNumMatches();
346   return false;
347 }
348 
349 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
350   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
351   std::string Backref = std::string("\\") +
352                         std::string(1, '0' + BackrefNum);
353   RegExStr += Backref;
354 }
355 
356 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
357   // The only supported expression is @LINE([\+-]\d+)?
358   if (!Expr.startswith("@LINE"))
359     return false;
360   Expr = Expr.substr(StringRef("@LINE").size());
361   int Offset = 0;
362   if (!Expr.empty()) {
363     if (Expr[0] == '+')
364       Expr = Expr.substr(1);
365     else if (Expr[0] != '-')
366       return false;
367     if (Expr.getAsInteger(10, Offset))
368       return false;
369   }
370   Value = llvm::itostr(LineNumber + Offset);
371   return true;
372 }
373 
374 /// Match - Match the pattern string against the input buffer Buffer.  This
375 /// returns the position that is matched or npos if there is no match.  If
376 /// there is a match, the size of the matched string is returned in MatchLen.
377 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
378                       StringMap<StringRef> &VariableTable) const {
379   // If this is the EOF pattern, match it immediately.
380   if (CheckTy == Check::CheckEOF) {
381     MatchLen = 0;
382     return Buffer.size();
383   }
384 
385   // If this is a fixed string pattern, just match it now.
386   if (!FixedStr.empty()) {
387     MatchLen = FixedStr.size();
388     return Buffer.find(FixedStr);
389   }
390 
391   // Regex match.
392 
393   // If there are variable uses, we need to create a temporary string with the
394   // actual value.
395   StringRef RegExToMatch = RegExStr;
396   std::string TmpStr;
397   if (!VariableUses.empty()) {
398     TmpStr = RegExStr;
399 
400     unsigned InsertOffset = 0;
401     for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
402       std::string Value;
403 
404       if (VariableUses[i].first[0] == '@') {
405         if (!EvaluateExpression(VariableUses[i].first, Value))
406           return StringRef::npos;
407       } else {
408         StringMap<StringRef>::iterator it =
409           VariableTable.find(VariableUses[i].first);
410         // If the variable is undefined, return an error.
411         if (it == VariableTable.end())
412           return StringRef::npos;
413 
414         // Look up the value and escape it so that we can put it into the regex.
415         Value += Regex::escape(it->second);
416       }
417 
418       // Plop it into the regex at the adjusted offset.
419       TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
420                     Value.begin(), Value.end());
421       InsertOffset += Value.size();
422     }
423 
424     // Match the newly constructed regex.
425     RegExToMatch = TmpStr;
426   }
427 
428 
429   SmallVector<StringRef, 4> MatchInfo;
430   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
431     return StringRef::npos;
432 
433   // Successful regex match.
434   assert(!MatchInfo.empty() && "Didn't get any match");
435   StringRef FullMatch = MatchInfo[0];
436 
437   // If this defines any variables, remember their values.
438   for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
439                                                      E = VariableDefs.end();
440        I != E; ++I) {
441     assert(I->second < MatchInfo.size() && "Internal paren error");
442     VariableTable[I->first] = MatchInfo[I->second];
443   }
444 
445   MatchLen = FullMatch.size();
446   return FullMatch.data()-Buffer.data();
447 }
448 
449 unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
450                               const StringMap<StringRef> &VariableTable) const {
451   // Just compute the number of matching characters. For regular expressions, we
452   // just compare against the regex itself and hope for the best.
453   //
454   // FIXME: One easy improvement here is have the regex lib generate a single
455   // example regular expression which matches, and use that as the example
456   // string.
457   StringRef ExampleString(FixedStr);
458   if (ExampleString.empty())
459     ExampleString = RegExStr;
460 
461   // Only compare up to the first line in the buffer, or the string size.
462   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
463   BufferPrefix = BufferPrefix.split('\n').first;
464   return BufferPrefix.edit_distance(ExampleString);
465 }
466 
467 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
468                                const StringMap<StringRef> &VariableTable) const{
469   // If this was a regular expression using variables, print the current
470   // variable values.
471   if (!VariableUses.empty()) {
472     for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
473       SmallString<256> Msg;
474       raw_svector_ostream OS(Msg);
475       StringRef Var = VariableUses[i].first;
476       if (Var[0] == '@') {
477         std::string Value;
478         if (EvaluateExpression(Var, Value)) {
479           OS << "with expression \"";
480           OS.write_escaped(Var) << "\" equal to \"";
481           OS.write_escaped(Value) << "\"";
482         } else {
483           OS << "uses incorrect expression \"";
484           OS.write_escaped(Var) << "\"";
485         }
486       } else {
487         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
488 
489         // Check for undefined variable references.
490         if (it == VariableTable.end()) {
491           OS << "uses undefined variable \"";
492           OS.write_escaped(Var) << "\"";
493         } else {
494           OS << "with variable \"";
495           OS.write_escaped(Var) << "\" equal to \"";
496           OS.write_escaped(it->second) << "\"";
497         }
498       }
499 
500       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
501                       OS.str());
502     }
503   }
504 
505   // Attempt to find the closest/best fuzzy match.  Usually an error happens
506   // because some string in the output didn't exactly match. In these cases, we
507   // would like to show the user a best guess at what "should have" matched, to
508   // save them having to actually check the input manually.
509   size_t NumLinesForward = 0;
510   size_t Best = StringRef::npos;
511   double BestQuality = 0;
512 
513   // Use an arbitrary 4k limit on how far we will search.
514   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
515     if (Buffer[i] == '\n')
516       ++NumLinesForward;
517 
518     // Patterns have leading whitespace stripped, so skip whitespace when
519     // looking for something which looks like a pattern.
520     if (Buffer[i] == ' ' || Buffer[i] == '\t')
521       continue;
522 
523     // Compute the "quality" of this match as an arbitrary combination of the
524     // match distance and the number of lines skipped to get to this match.
525     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
526     double Quality = Distance + (NumLinesForward / 100.);
527 
528     if (Quality < BestQuality || Best == StringRef::npos) {
529       Best = i;
530       BestQuality = Quality;
531     }
532   }
533 
534   // Print the "possible intended match here" line if we found something
535   // reasonable and not equal to what we showed in the "scanning from here"
536   // line.
537   if (Best && Best != StringRef::npos && BestQuality < 50) {
538       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
539                       SourceMgr::DK_Note, "possible intended match here");
540 
541     // FIXME: If we wanted to be really friendly we would show why the match
542     // failed, as it can be hard to spot simple one character differences.
543   }
544 }
545 
546 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
547   // Offset keeps track of the current offset within the input Str
548   size_t Offset = 0;
549   // [...] Nesting depth
550   size_t BracketDepth = 0;
551 
552   while (!Str.empty()) {
553     if (Str.startswith("]]") && BracketDepth == 0)
554       return Offset;
555     if (Str[0] == '\\') {
556       // Backslash escapes the next char within regexes, so skip them both.
557       Str = Str.substr(2);
558       Offset += 2;
559     } else {
560       switch (Str[0]) {
561         default:
562           break;
563         case '[':
564           BracketDepth++;
565           break;
566         case ']':
567           if (BracketDepth == 0) {
568             SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
569                             SourceMgr::DK_Error,
570                             "missing closing \"]\" for regex variable");
571             exit(1);
572           }
573           BracketDepth--;
574           break;
575       }
576       Str = Str.substr(1);
577       Offset++;
578     }
579   }
580 
581   return StringRef::npos;
582 }
583 
584 
585 //===----------------------------------------------------------------------===//
586 // Check Strings.
587 //===----------------------------------------------------------------------===//
588 
589 /// CheckString - This is a check that we found in the input file.
590 struct CheckString {
591   /// Pat - The pattern to match.
592   Pattern Pat;
593 
594   /// Prefix - Which prefix name this check matched.
595   StringRef Prefix;
596 
597   /// Loc - The location in the match file that the check string was specified.
598   SMLoc Loc;
599 
600   /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
601   /// as opposed to a CHECK: directive.
602   Check::CheckType CheckTy;
603 
604   /// DagNotStrings - These are all of the strings that are disallowed from
605   /// occurring between this match string and the previous one (or start of
606   /// file).
607   std::vector<Pattern> DagNotStrings;
608 
609 
610   CheckString(const Pattern &P,
611               StringRef S,
612               SMLoc L,
613               Check::CheckType Ty)
614     : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {}
615 
616   /// Check - Match check string and its "not strings" and/or "dag strings".
617   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
618                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
619 
620   /// CheckNext - Verify there is a single line in the given buffer.
621   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
622 
623   /// CheckNot - Verify there's no "not strings" in the given buffer.
624   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
625                 const std::vector<const Pattern *> &NotStrings,
626                 StringMap<StringRef> &VariableTable) const;
627 
628   /// CheckDag - Match "dag strings" and their mixed "not strings".
629   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
630                   std::vector<const Pattern *> &NotStrings,
631                   StringMap<StringRef> &VariableTable) const;
632 };
633 
634 /// Canonicalize whitespaces in the input file. Line endings are replaced
635 /// with UNIX-style '\n'.
636 ///
637 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
638 /// characters to a single space.
639 static MemoryBuffer *CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
640                                            bool PreserveHorizontal) {
641   SmallString<128> NewFile;
642   NewFile.reserve(MB->getBufferSize());
643 
644   for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
645        Ptr != End; ++Ptr) {
646     // Eliminate trailing dosish \r.
647     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
648       continue;
649     }
650 
651     // If current char is not a horizontal whitespace or if horizontal
652     // whitespace canonicalization is disabled, dump it to output as is.
653     if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
654       NewFile.push_back(*Ptr);
655       continue;
656     }
657 
658     // Otherwise, add one space and advance over neighboring space.
659     NewFile.push_back(' ');
660     while (Ptr+1 != End &&
661            (Ptr[1] == ' ' || Ptr[1] == '\t'))
662       ++Ptr;
663   }
664 
665   return MemoryBuffer::getMemBufferCopy(NewFile.str(),
666                                         MB->getBufferIdentifier());
667 }
668 
669 static bool IsPartOfWord(char c) {
670   return (isalnum(c) || c == '-' || c == '_');
671 }
672 
673 // Get the size of the prefix extension.
674 static size_t CheckTypeSize(Check::CheckType Ty) {
675   switch (Ty) {
676   case Check::CheckNone:
677     return 0;
678 
679   case Check::CheckPlain:
680     return sizeof(":") - 1;
681 
682   case Check::CheckNext:
683     return sizeof("-NEXT:") - 1;
684 
685   case Check::CheckNot:
686     return sizeof("-NOT:") - 1;
687 
688   case Check::CheckDAG:
689     return sizeof("-DAG:") - 1;
690 
691   case Check::CheckLabel:
692     return sizeof("-LABEL:") - 1;
693 
694   case Check::CheckEOF:
695     llvm_unreachable("Should not be using EOF size");
696   }
697 
698   llvm_unreachable("Bad check type");
699 }
700 
701 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
702   char NextChar = Buffer[Prefix.size()];
703 
704   // Verify that the : is present after the prefix.
705   if (NextChar == ':')
706     return Check::CheckPlain;
707 
708   if (NextChar != '-')
709     return Check::CheckNone;
710 
711   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
712   if (Rest.startswith("NEXT:"))
713     return Check::CheckNext;
714 
715   if (Rest.startswith("NOT:"))
716     return Check::CheckNot;
717 
718   if (Rest.startswith("DAG:"))
719     return Check::CheckDAG;
720 
721   if (Rest.startswith("LABEL:"))
722     return Check::CheckLabel;
723 
724   return Check::CheckNone;
725 }
726 
727 // From the given position, find the next character after the word.
728 static size_t SkipWord(StringRef Str, size_t Loc) {
729   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
730     ++Loc;
731   return Loc;
732 }
733 
734 // Try to find the first match in buffer for any prefix. If a valid match is
735 // found, return that prefix and set its type and location.  If there are almost
736 // matches (e.g. the actual prefix string is found, but is not an actual check
737 // string), but no valid match, return an empty string and set the position to
738 // resume searching from. If no partial matches are found, return an empty
739 // string and the location will be StringRef::npos. If one prefix is a substring
740 // of another, the maximal match should be found. e.g. if "A" and "AA" are
741 // prefixes then AA-CHECK: should match the second one.
742 static StringRef FindFirstCandidateMatch(StringRef &Buffer,
743                                          Check::CheckType &CheckTy,
744                                          size_t &CheckLoc) {
745   StringRef FirstPrefix;
746   size_t FirstLoc = StringRef::npos;
747   size_t SearchLoc = StringRef::npos;
748   Check::CheckType FirstTy = Check::CheckNone;
749 
750   CheckTy = Check::CheckNone;
751   CheckLoc = StringRef::npos;
752 
753   for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
754        I != E; ++I) {
755     StringRef Prefix(*I);
756     size_t PrefixLoc = Buffer.find(Prefix);
757 
758     if (PrefixLoc == StringRef::npos)
759       continue;
760 
761     // Track where we are searching for invalid prefixes that look almost right.
762     // We need to only advance to the first partial match on the next attempt
763     // since a partial match could be a substring of a later, valid prefix.
764     // Need to skip to the end of the word, otherwise we could end up
765     // matching a prefix in a substring later.
766     if (PrefixLoc < SearchLoc)
767       SearchLoc = SkipWord(Buffer, PrefixLoc);
768 
769     // We only want to find the first match to avoid skipping some.
770     if (PrefixLoc > FirstLoc)
771       continue;
772     // If one matching check-prefix is a prefix of another, choose the
773     // longer one.
774     if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
775       continue;
776 
777     StringRef Rest = Buffer.drop_front(PrefixLoc);
778     // Make sure we have actually found the prefix, and not a word containing
779     // it. This should also prevent matching the wrong prefix when one is a
780     // substring of another.
781     if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
782       FirstTy = Check::CheckNone;
783     else
784       FirstTy = FindCheckType(Rest, Prefix);
785 
786     FirstLoc = PrefixLoc;
787     FirstPrefix = Prefix;
788   }
789 
790   // If the first prefix is invalid, we should continue the search after it.
791   if (FirstTy == Check::CheckNone) {
792     CheckLoc = SearchLoc;
793     return "";
794   }
795 
796   CheckTy = FirstTy;
797   CheckLoc = FirstLoc;
798   return FirstPrefix;
799 }
800 
801 static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
802                                          unsigned &LineNumber,
803                                          Check::CheckType &CheckTy,
804                                          size_t &CheckLoc) {
805   while (!Buffer.empty()) {
806     StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
807     // If we found a real match, we are done.
808     if (!Prefix.empty()) {
809       LineNumber += Buffer.substr(0, CheckLoc).count('\n');
810       return Prefix;
811     }
812 
813     // We didn't find any almost matches either, we are also done.
814     if (CheckLoc == StringRef::npos)
815       return StringRef();
816 
817     LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
818 
819     // Advance to the last possible match we found and try again.
820     Buffer = Buffer.drop_front(CheckLoc + 1);
821   }
822 
823   return StringRef();
824 }
825 
826 /// ReadCheckFile - Read the check file, which specifies the sequence of
827 /// expected strings.  The strings are added to the CheckStrings vector.
828 /// Returns true in case of an error, false otherwise.
829 static bool ReadCheckFile(SourceMgr &SM,
830                           std::vector<CheckString> &CheckStrings) {
831   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
832       MemoryBuffer::getFileOrSTDIN(CheckFilename);
833   if (std::error_code EC = FileOrErr.getError()) {
834     errs() << "Could not open check file '" << CheckFilename
835            << "': " << EC.message() << '\n';
836     return true;
837   }
838 
839   // If we want to canonicalize whitespace, strip excess whitespace from the
840   // buffer containing the CHECK lines. Remove DOS style line endings.
841   MemoryBuffer *F = CanonicalizeInputFile(std::move(FileOrErr.get()),
842                                           NoCanonicalizeWhiteSpace);
843 
844   SM.AddNewSourceBuffer(F, SMLoc());
845 
846   // Find all instances of CheckPrefix followed by : in the file.
847   StringRef Buffer = F->getBuffer();
848 
849   std::vector<Pattern> ImplicitNegativeChecks;
850   for (const auto &PatternString : ImplicitCheckNot) {
851     // Create a buffer with fake command line content in order to display the
852     // command line option responsible for the specific implicit CHECK-NOT.
853     std::string Prefix = std::string("-") + ImplicitCheckNot.ArgStr + "='";
854     std::string Suffix = "'";
855     MemoryBuffer *CmdLine = MemoryBuffer::getMemBufferCopy(
856         Prefix + PatternString + Suffix, "command line");
857     StringRef PatternInBuffer =
858         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
859     SM.AddNewSourceBuffer(CmdLine, SMLoc());
860 
861     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
862     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
863                                                "IMPLICIT-CHECK", SM, 0);
864   }
865 
866 
867   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
868 
869   // LineNumber keeps track of the line on which CheckPrefix instances are
870   // found.
871   unsigned LineNumber = 1;
872 
873   while (1) {
874     Check::CheckType CheckTy;
875     size_t PrefixLoc;
876 
877     // See if a prefix occurs in the memory buffer.
878     StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
879                                                    LineNumber,
880                                                    CheckTy,
881                                                    PrefixLoc);
882     if (UsedPrefix.empty())
883       break;
884 
885     Buffer = Buffer.drop_front(PrefixLoc);
886 
887     // Location to use for error messages.
888     const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
889 
890     // PrefixLoc is to the start of the prefix. Skip to the end.
891     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
892 
893     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
894     // leading and trailing whitespace.
895     Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
896 
897     // Scan ahead to the end of line.
898     size_t EOL = Buffer.find_first_of("\n\r");
899 
900     // Remember the location of the start of the pattern, for diagnostics.
901     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
902 
903     // Parse the pattern.
904     Pattern P(CheckTy);
905     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
906       return true;
907 
908     // Verify that CHECK-LABEL lines do not define or use variables
909     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
910       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
911                       SourceMgr::DK_Error,
912                       "found '" + UsedPrefix + "-LABEL:'"
913                       " with variable definition or use");
914       return true;
915     }
916 
917     Buffer = Buffer.substr(EOL);
918 
919     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
920     if ((CheckTy == Check::CheckNext) && CheckStrings.empty()) {
921       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
922                       SourceMgr::DK_Error,
923                       "found '" + UsedPrefix + "-NEXT:' without previous '"
924                       + UsedPrefix + ": line");
925       return true;
926     }
927 
928     // Handle CHECK-DAG/-NOT.
929     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
930       DagNotMatches.push_back(P);
931       continue;
932     }
933 
934     // Okay, add the string we captured to the output vector and move on.
935     CheckStrings.push_back(CheckString(P,
936                                        UsedPrefix,
937                                        PatternLoc,
938                                        CheckTy));
939     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
940     DagNotMatches = ImplicitNegativeChecks;
941   }
942 
943   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
944   // prefix as a filler for the error message.
945   if (!DagNotMatches.empty()) {
946     CheckStrings.push_back(CheckString(Pattern(Check::CheckEOF),
947                                        CheckPrefixes[0],
948                                        SMLoc::getFromPointer(Buffer.data()),
949                                        Check::CheckEOF));
950     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
951   }
952 
953   if (CheckStrings.empty()) {
954     errs() << "error: no check strings found with prefix"
955            << (CheckPrefixes.size() > 1 ? "es " : " ");
956     for (size_t I = 0, N = CheckPrefixes.size(); I != N; ++I) {
957       StringRef Prefix(CheckPrefixes[I]);
958       errs() << '\'' << Prefix << ":'";
959       if (I != N - 1)
960         errs() << ", ";
961     }
962 
963     errs() << '\n';
964     return true;
965   }
966 
967   return false;
968 }
969 
970 static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc,
971                              const Pattern &Pat, StringRef Buffer,
972                              StringMap<StringRef> &VariableTable) {
973   // Otherwise, we have an error, emit an error message.
974   SM.PrintMessage(Loc, SourceMgr::DK_Error,
975                   "expected string not found in input");
976 
977   // Print the "scanning from here" line.  If the current position is at the
978   // end of a line, advance to the start of the next line.
979   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
980 
981   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
982                   "scanning from here");
983 
984   // Allow the pattern to print additional information if desired.
985   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
986 }
987 
988 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
989                              StringRef Buffer,
990                              StringMap<StringRef> &VariableTable) {
991   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
992 }
993 
994 /// CountNumNewlinesBetween - Count the number of newlines in the specified
995 /// range.
996 static unsigned CountNumNewlinesBetween(StringRef Range,
997                                         const char *&FirstNewLine) {
998   unsigned NumNewLines = 0;
999   while (1) {
1000     // Scan for newline.
1001     Range = Range.substr(Range.find_first_of("\n\r"));
1002     if (Range.empty()) return NumNewLines;
1003 
1004     ++NumNewLines;
1005 
1006     // Handle \n\r and \r\n as a single newline.
1007     if (Range.size() > 1 &&
1008         (Range[1] == '\n' || Range[1] == '\r') &&
1009         (Range[0] != Range[1]))
1010       Range = Range.substr(1);
1011     Range = Range.substr(1);
1012 
1013     if (NumNewLines == 1)
1014       FirstNewLine = Range.begin();
1015   }
1016 }
1017 
1018 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1019                           bool IsLabelScanMode, size_t &MatchLen,
1020                           StringMap<StringRef> &VariableTable) const {
1021   size_t LastPos = 0;
1022   std::vector<const Pattern *> NotStrings;
1023 
1024   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1025   // bounds; we have not processed variable definitions within the bounded block
1026   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1027   // over the block again (including the last CHECK-LABEL) in normal mode.
1028   if (!IsLabelScanMode) {
1029     // Match "dag strings" (with mixed "not strings" if any).
1030     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1031     if (LastPos == StringRef::npos)
1032       return StringRef::npos;
1033   }
1034 
1035   // Match itself from the last position after matching CHECK-DAG.
1036   StringRef MatchBuffer = Buffer.substr(LastPos);
1037   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1038   if (MatchPos == StringRef::npos) {
1039     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1040     return StringRef::npos;
1041   }
1042   MatchPos += LastPos;
1043 
1044   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1045   // or CHECK-NOT
1046   if (!IsLabelScanMode) {
1047     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1048 
1049     // If this check is a "CHECK-NEXT", verify that the previous match was on
1050     // the previous line (i.e. that there is one newline between them).
1051     if (CheckNext(SM, SkippedRegion))
1052       return StringRef::npos;
1053 
1054     // If this match had "not strings", verify that they don't exist in the
1055     // skipped region.
1056     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1057       return StringRef::npos;
1058   }
1059 
1060   return MatchPos;
1061 }
1062 
1063 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1064   if (CheckTy != Check::CheckNext)
1065     return false;
1066 
1067   // Count the number of newlines between the previous match and this one.
1068   assert(Buffer.data() !=
1069          SM.getMemoryBuffer(
1070            SM.FindBufferContainingLoc(
1071              SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1072          "CHECK-NEXT can't be the first check in a file");
1073 
1074   const char *FirstNewLine = nullptr;
1075   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1076 
1077   if (NumNewLines == 0) {
1078     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1079                     "-NEXT: is on the same line as previous match");
1080     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1081                     SourceMgr::DK_Note, "'next' match was here");
1082     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1083                     "previous match ended here");
1084     return true;
1085   }
1086 
1087   if (NumNewLines != 1) {
1088     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1089                     "-NEXT: is not on the line after the previous match");
1090     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1091                     SourceMgr::DK_Note, "'next' match was here");
1092     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1093                     "previous match ended here");
1094     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1095                     "non-matching line after previous match is here");
1096     return true;
1097   }
1098 
1099   return false;
1100 }
1101 
1102 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1103                            const std::vector<const Pattern *> &NotStrings,
1104                            StringMap<StringRef> &VariableTable) const {
1105   for (unsigned ChunkNo = 0, e = NotStrings.size();
1106        ChunkNo != e; ++ChunkNo) {
1107     const Pattern *Pat = NotStrings[ChunkNo];
1108     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1109 
1110     size_t MatchLen = 0;
1111     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1112 
1113     if (Pos == StringRef::npos) continue;
1114 
1115     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1116                     SourceMgr::DK_Error,
1117                     Prefix + "-NOT: string occurred!");
1118     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1119                     Prefix + "-NOT: pattern specified here");
1120     return true;
1121   }
1122 
1123   return false;
1124 }
1125 
1126 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1127                              std::vector<const Pattern *> &NotStrings,
1128                              StringMap<StringRef> &VariableTable) const {
1129   if (DagNotStrings.empty())
1130     return 0;
1131 
1132   size_t LastPos = 0;
1133   size_t StartPos = LastPos;
1134 
1135   for (unsigned ChunkNo = 0, e = DagNotStrings.size();
1136        ChunkNo != e; ++ChunkNo) {
1137     const Pattern &Pat = DagNotStrings[ChunkNo];
1138 
1139     assert((Pat.getCheckTy() == Check::CheckDAG ||
1140             Pat.getCheckTy() == Check::CheckNot) &&
1141            "Invalid CHECK-DAG or CHECK-NOT!");
1142 
1143     if (Pat.getCheckTy() == Check::CheckNot) {
1144       NotStrings.push_back(&Pat);
1145       continue;
1146     }
1147 
1148     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1149 
1150     size_t MatchLen = 0, MatchPos;
1151 
1152     // CHECK-DAG always matches from the start.
1153     StringRef MatchBuffer = Buffer.substr(StartPos);
1154     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1155     // With a group of CHECK-DAGs, a single mismatching means the match on
1156     // that group of CHECK-DAGs fails immediately.
1157     if (MatchPos == StringRef::npos) {
1158       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1159       return StringRef::npos;
1160     }
1161     // Re-calc it as the offset relative to the start of the original string.
1162     MatchPos += StartPos;
1163 
1164     if (!NotStrings.empty()) {
1165       if (MatchPos < LastPos) {
1166         // Reordered?
1167         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1168                         SourceMgr::DK_Error,
1169                         Prefix + "-DAG: found a match of CHECK-DAG"
1170                         " reordering across a CHECK-NOT");
1171         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1172                         SourceMgr::DK_Note,
1173                         Prefix + "-DAG: the farthest match of CHECK-DAG"
1174                         " is found here");
1175         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1176                         Prefix + "-NOT: the crossed pattern specified"
1177                         " here");
1178         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1179                         Prefix + "-DAG: the reordered pattern specified"
1180                         " here");
1181         return StringRef::npos;
1182       }
1183       // All subsequent CHECK-DAGs should be matched from the farthest
1184       // position of all precedent CHECK-DAGs (including this one.)
1185       StartPos = LastPos;
1186       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1187       // CHECK-DAG, verify that there's no 'not' strings occurred in that
1188       // region.
1189       StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1190       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1191         return StringRef::npos;
1192       // Clear "not strings".
1193       NotStrings.clear();
1194     }
1195 
1196     // Update the last position with CHECK-DAG matches.
1197     LastPos = std::max(MatchPos + MatchLen, LastPos);
1198   }
1199 
1200   return LastPos;
1201 }
1202 
1203 // A check prefix must contain only alphanumeric, hyphens and underscores.
1204 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1205   Regex Validator("^[a-zA-Z0-9_-]*$");
1206   return Validator.match(CheckPrefix);
1207 }
1208 
1209 static bool ValidateCheckPrefixes() {
1210   StringSet<> PrefixSet;
1211 
1212   for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
1213        I != E; ++I) {
1214     StringRef Prefix(*I);
1215 
1216     // Reject empty prefixes.
1217     if (Prefix == "")
1218       return false;
1219 
1220     if (!PrefixSet.insert(Prefix))
1221       return false;
1222 
1223     if (!ValidateCheckPrefix(Prefix))
1224       return false;
1225   }
1226 
1227   return true;
1228 }
1229 
1230 // I don't think there's a way to specify an initial value for cl::list,
1231 // so if nothing was specified, add the default
1232 static void AddCheckPrefixIfNeeded() {
1233   if (CheckPrefixes.empty())
1234     CheckPrefixes.push_back("CHECK");
1235 }
1236 
1237 int main(int argc, char **argv) {
1238   sys::PrintStackTraceOnErrorSignal();
1239   PrettyStackTraceProgram X(argc, argv);
1240   cl::ParseCommandLineOptions(argc, argv);
1241 
1242   if (!ValidateCheckPrefixes()) {
1243     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1244               "start with a letter and contain only alphanumeric characters, "
1245               "hyphens and underscores\n";
1246     return 2;
1247   }
1248 
1249   AddCheckPrefixIfNeeded();
1250 
1251   SourceMgr SM;
1252 
1253   // Read the expected strings from the check file.
1254   std::vector<CheckString> CheckStrings;
1255   if (ReadCheckFile(SM, CheckStrings))
1256     return 2;
1257 
1258   // Open the file to check and add it to SourceMgr.
1259   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
1260       MemoryBuffer::getFileOrSTDIN(InputFilename);
1261   if (std::error_code EC = FileOrErr.getError()) {
1262     errs() << "Could not open input file '" << InputFilename
1263            << "': " << EC.message() << '\n';
1264     return 2;
1265   }
1266   std::unique_ptr<MemoryBuffer> &File = FileOrErr.get();
1267 
1268   if (File->getBufferSize() == 0 && !AllowEmptyInput) {
1269     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1270     return 2;
1271   }
1272 
1273   // Remove duplicate spaces in the input file if requested.
1274   // Remove DOS style line endings.
1275   MemoryBuffer *F =
1276     CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
1277 
1278   SM.AddNewSourceBuffer(F, SMLoc());
1279 
1280   /// VariableTable - This holds all the current filecheck variables.
1281   StringMap<StringRef> VariableTable;
1282 
1283   // Check that we have all of the expected strings, in order, in the input
1284   // file.
1285   StringRef Buffer = F->getBuffer();
1286 
1287   bool hasError = false;
1288 
1289   unsigned i = 0, j = 0, e = CheckStrings.size();
1290 
1291   while (true) {
1292     StringRef CheckRegion;
1293     if (j == e) {
1294       CheckRegion = Buffer;
1295     } else {
1296       const CheckString &CheckLabelStr = CheckStrings[j];
1297       if (CheckLabelStr.CheckTy != Check::CheckLabel) {
1298         ++j;
1299         continue;
1300       }
1301 
1302       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1303       size_t MatchLabelLen = 0;
1304       size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
1305                                                  MatchLabelLen, VariableTable);
1306       if (MatchLabelPos == StringRef::npos) {
1307         hasError = true;
1308         break;
1309       }
1310 
1311       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1312       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1313       ++j;
1314     }
1315 
1316     for ( ; i != j; ++i) {
1317       const CheckString &CheckStr = CheckStrings[i];
1318 
1319       // Check each string within the scanned region, including a second check
1320       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1321       size_t MatchLen = 0;
1322       size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1323                                        VariableTable);
1324 
1325       if (MatchPos == StringRef::npos) {
1326         hasError = true;
1327         i = j;
1328         break;
1329       }
1330 
1331       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1332     }
1333 
1334     if (j == e)
1335       break;
1336   }
1337 
1338   return hasError ? 1 : 0;
1339 }
1340